diff --git a/.gitignore b/.gitignore index e537439e0..a79f01a9d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,8 @@ src/leveldb/tera_bench depends.mk build.conf + +.build/ +build/ +thirdsrc/ +thirdparty/ diff --git a/.travis.yml b/.travis.yml index 20116d62a..928badcd3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,15 +32,5 @@ install: - bash -x build.sh origin script: - - make check - - bash -x ft_test.sh - -after_failure: - - find . -name 'core.*' -print - - for core_file in $(find test_output -name 'core*' -print); do - echo "${core_file}"; - tmp=${core_file#*core.}; exe_name=${tmp%.*}; - echo "${core_file} is generated by ${exe_name}" - gdb ${exe_name} ${core_file} -ex "thread apply all bt" -ex "set pagination 0" -batch; - done + - make check > /dev/null 2>&1 diff --git a/LICENSE.Apache b/LICENSE.Apache new file mode 100644 index 000000000..6b0b1270f --- /dev/null +++ b/LICENSE.Apache @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/Makefile b/Makefile old mode 100644 new mode 100755 index c9b6486b5..ce9ded82d --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ TEST_OPT = -g2 -Wall -Dprivate=public INCPATH += -I./src -I./include -I./src/leveldb/include -I./src/leveldb -I./src/sdk \ -I./src/sdk/java/native-src $(DEPS_INCPATH) -LEVELDB_INCPATH = -I../ +LEVELDB_INCPATH = "-I../ -I../../include/ -I../../thirdparty/include/" CFLAGS += $(OPT) $(INCPATH) -fPIC -fvisibility=hidden # hide internal symbol of tera TEST_CFLAGS += $(TEST_OPT) $(INCPATH) -fPIC -fvisibility=hidden # hide internal symbol of tera CXXFLAGS += -std=gnu++11 $(CFLAGS) @@ -28,11 +28,16 @@ PROTO_FILES := $(wildcard src/proto/*.proto) PROTO_OUT_CC := $(PROTO_FILES:.proto=.pb.cc) PROTO_OUT_H := $(PROTO_FILES:.proto=.pb.h) +ACCESS_SRC := $(wildcard src/access/*.cc) $(wildcard src/access/authorization/*.cc) \ + $(wildcard src/access/helpers/*.cc) $(wildcard src/access/giano/*.cc) \ + $(wildcard src/access/identification/*.cc) $(wildcard src/access/verification/*.cc) +QUOTA_SRC := $(wildcard src/quota/*.cc) $(wildcard src/quota/helpers/*.cc) \ + $(wildcard src/quota/limiter/*.cc) MASTER_SRC := $(wildcard src/master/*.cc) TABLETNODE_SRC := $(wildcard src/tabletnode/*.cc) IO_SRC := $(wildcard src/io/*.cc) SDK_SRC := $(wildcard src/sdk/*.cc) $(wildcard src/sdk/test/global_txn_testutils.cc) \ - src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc + src/observer/rowlocknode/rowlocknode_zk_adapter.cc src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc HTTP_SRC := $(wildcard src/sdk/http/*.cc) PROTO_SRC := $(filter-out %.pb.cc, $(wildcard src/proto/*.cc)) $(PROTO_OUT_CC) JNI_TERA_SRC := $(wildcard src/sdk/java/native-src/*.cc) @@ -44,9 +49,9 @@ COMMON_SRC := $(wildcard src/common/base/*.cc) $(wildcard src/common/net/*.cc) \ $(wildcard src/common/console/*.cc) $(wildcard src/common/log/*.cc) \ $(wildcard src/common/metric/*.cc) $(wildcard src/common/*.cc) SERVER_WRAPPER_SRC := src/tera_main_wrapper.cc -SERVER_SRC := src/tera_main.cc src/tera_entry.cc -CLIENT_SRC := src/teracli_main.cc -TERAUTIL_SRC := src/terautil.cc +SERVER_SRC := src/tera_main.cc src/common/tera_entry.cc +CLIENT_SRC := src/teracli_main.cc src/io/io_flags.cc +TERAUTIL_SRC := src/terautil.cc src/io/io_flags.cc GTXN_TEST_SRC := src/sdk/test/global_txn_test_tool.cc TEST_CLIENT_SRC := src/tera_test_main.cc TERA_C_SRC := src/tera_c.cc @@ -58,10 +63,10 @@ TEST_SRC := src/utils/test/prop_tree_test.cc src/utils/test/tprinter_test.cc \ src/io/test/load_test.cc src/master/test/master_test.cc \ src/master/test/trackable_gc_test.cc \ src/observer/test/rowlock_test.cc src/observer/test/scanner_test.cc \ - src/observer/test/observer_test.cc \ - $(wildcard src/sdk/test/*_test.cc) $(COMMON_TEST_SRC) + src/observer/test/observer_test.cc \ + $(wildcard src/sdk/test/*_test.cc) $(COMMON_TEST_SRC) -TIMEORACLE_SRC := $(wildcard src/timeoracle/*.cc) src/tera_entry.cc +TIMEORACLE_SRC := $(wildcard src/timeoracle/*.cc) src/common/tera_entry.cc TIMEORACLE_BENCH_SRC := src/timeoracle/bench/timeoracle_bench.cc ROWLOCK_SRC := $(wildcard src/observer/rowlocknode/*.cc) src/sdk/rowlock_client.cc ROWLOCK_PROXY_SRC := $(wildcard src/observer/rowlockproxy/*.cc) @@ -71,6 +76,8 @@ OBSERVER_DEMO_SRC := $(wildcard src/observer/observer_demo.cc) TEST_OUTPUT := test_output UNITTEST_OUTPUT := $(TEST_OUTPUT)/unittest +ACCESS_OBJ := $(ACCESS_SRC:.cc=.o) +QUOTA_OBJ := $(QUOTA_SRC:.cc=.o) MASTER_OBJ := $(MASTER_SRC:.cc=.o) TABLETNODE_OBJ := $(TABLETNODE_SRC:.cc=.o) IO_OBJ := $(IO_SRC:.cc=.o) @@ -97,7 +104,7 @@ ROWLOCK_OBJ := $(ROWLOCK_SRC:.cc=.o) ROWLOCK_PROXY_OBJ := $(ROWLOCK_PROXY_SRC:.cc=.o) OBSERVER_OBJ := $(OBSERVER_SRC:.cc=.o) OBSERVER_DEMO_OBJ := $(OBSERVER_DEMO_SRC:.cc=.o) -ALL_OBJ := $(MASTER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) $(PROTO_OBJ) \ +ALL_OBJ := $(ACCESS_OBJ) $(QUOTA_OBJ) $(MASTER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) $(PROTO_OBJ) \ $(JNI_TERA_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(SERVER_OBJ) $(CLIENT_OBJ) $(TERAUTIL_OBJ) \ $(TEST_CLIENT_OBJ) $(TERA_C_OBJ) $(MONITOR_OBJ) $(MARK_OBJ) \ $(SERVER_WRAPPER_OBJ) $(TIMEORACLE_OBJ) $(ROWLOCK_OBJ) $(ROWLOCK_PROXY_OBJ) $(OBSERVER_OBJ) $(OBSERVER_DEMO_OBJ) @@ -134,14 +141,13 @@ all: $(PROGRAM) $(TEST_PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRAR test: $(TESTS) mkdir -p $(UNITTEST_OUTPUT) mv $(TESTS) $(UNITTEST_OUTPUT) - $(MAKE) test -C src/leveldb + CXXFLAGS=$(LEVELDB_INCPATH) LDFLAGS="$(LDFLAGS)" CC=$(CC) CXX=$(CXX) $(MAKE) test -C src/leveldb cp src/leveldb/*_test $(UNITTEST_OUTPUT) check: test ( cd $(UNITTEST_OUTPUT); \ for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done ) $(MAKE) check -C src/leveldb - sh ./src/sdk/python/checker.sh clean: rm -rf $(ALL_OBJ) $(TEST_OBJ) $(PROTO_OUT_CC) $(PROTO_OUT_H) $(TEST_OUTPUT) @@ -157,11 +163,12 @@ tera_main: src/tera_main_wrapper.o src/version.o src/tera_flags.o $(CXX) -o $@ $^ $(LDFLAGS) tera_master: $(SERVER_OBJ) $(MASTER_OBJ) $(IO_OBJ) $(SDK_OBJ) \ - $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) + $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(QUOTA_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) -tabletserver: $(SERVER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) \ - $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) +MASTER_ENTRY_OBJ=src/master/master_entry.o +tabletserver: $(SERVER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) $(filter-out $(MASTER_ENTRY_OBJ),$(MASTER_OBJ)) \ + $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(QUOTA_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) libtera.a: $(SDK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_UTIL) @@ -180,31 +187,31 @@ libtera.so: $(SDK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_UTIL) $ libtera_c.so: $(TERA_C_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(SO_LDFLAGS) -teracli: $(CLIENT_OBJ) $(IO_OBJ) $(LEVELDB_LIB) $(LIBRARY) +teracli: $(CLIENT_OBJ) $(LIBRARY) $(LEVELDB_LIB) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) -terautil: $(TERAUTIL_OBJ) $(LIBRARY) +terautil: $(TERAUTIL_OBJ) $(LEVELDB_LIB) $(LIBRARY) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) -gtxn_test_tool: $(GTXN_TEST_OBJ) $(LIBRARY) +gtxn_test_tool: $(GTXN_TEST_OBJ) $(LIBRARY) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) #teramo: $(MONITOR_OBJ) $(LIBRARY) # $(CXX) -o $@ $^ $(LDFLAGS) -tera_mark: $(MARK_OBJ) $(LIBRARY) $(LEVELDB_LIB) +tera_mark: $(MARK_OBJ) $(LIBRARY) $(LEVELDB_LIB) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) -tera_test: $(TEST_CLIENT_OBJ) $(LIBRARY) - $(CXX) -o $@ $(TEST_CLIENT_OBJ) $(LIBRARY) $(LDFLAGS) +tera_test: $(TEST_CLIENT_OBJ) $(LIBRARY) $(ACCESS_OBJ) + $(CXX) -o $@ $(TEST_CLIENT_OBJ) $(ACCESS_OBJ) $(LIBRARY) $(LDFLAGS) -timeoracle: $(TIMEORACLE_OBJ) $(PROTO_OBJ) $(COMMON_OBJ) $(OTHER_OBJ) $(SDK_OBJ) $(LEVELDB_LIB) +timeoracle: $(TIMEORACLE_OBJ) $(PROTO_OBJ) $(COMMON_OBJ) $(OTHER_OBJ) $(SDK_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) timeoracle_bench : $(TIMEORACLE_BENCH_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) -rowlock : $(SERVER_OBJ) $(ROWLOCK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(SDK_OBJ) $(LEVELDB_LIB) +rowlock : $(SERVER_OBJ) $(ROWLOCK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(SDK_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) rowlock_proxy : $(SERVER_OBJ) $(ROWLOCK_PROXY_OBJ) $(PROTO_OBJ) $(COMMON_OBJ) $(OBSERVER_LIBRARY) $(LEVELDB_LIB) @@ -217,7 +224,7 @@ libjni_tera.so: $(JNI_TERA_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(SO_LDFLAGS) src/leveldb/libleveldb.a: FORCE - CXXFLAGS=$(LEVELDB_INCPATH) CC=$(CC) CXX=$(CXX) $(MAKE) -C src/leveldb + CXXFLAGS=$(LEVELDB_INCPATH) LDFLAGS="$(LDFLAGS)" CC=$(CC) CXX=$(CXX) $(MAKE) -C src/leveldb tera_bench: @@ -234,37 +241,40 @@ tprinter_test: src/utils/test/tprinter_test.o $(LIBRARY) string_util_test: src/utils/test/string_util_test.o $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) -tablet_io_test: src/sdk/tera.o src/io/test/tablet_io_test.o src/tabletnode/tabletnode_sysinfo.o src/tera_entry.cc\ - $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(TABLETNODE_OBJ) $(SDK_OBJ) +tablet_io_test: src/sdk/tera.o src/io/test/tablet_io_test.o src/tabletnode/tabletnode_sysinfo.o\ + $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(TABLETNODE_OBJ)\ + $(SDK_OBJ) $(QUOTA_OBJ) $(ACCESS_OBJ)\ + $(filter-out $(MASTER_ENTRY_OBJ),$(MASTER_OBJ)) src/leveldb/util/histogram.o $(CXX) $(TEST_CXXFLAGS) -o $@ $^ $(LDFLAGS) -load_test: src/sdk/tera.o src/io/test/load_test.o src/tabletnode/tabletnode_sysinfo.o src/tera_entry.cc\ - $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(TABLETNODE_OBJ) $(SDK_OBJ) +load_test: src/sdk/tera.o src/io/test/load_test.o src/tabletnode/tabletnode_sysinfo.o\ + $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(TABLETNODE_OBJ) $(SDK_OBJ)\ + $(ACCESS_OBJ) $(QUOTA_OBJ) src/leveldb/util/histogram.o $(filter-out $(MASTER_ENTRY_OBJ),$(MASTER_OBJ)) $(CXX) $(TEST_CXXFLAGS) -o $@ $^ $(LDFLAGS) fragment_test: src/utils/test/fragment_test.o src/utils/fragment.o $(CXX) -o $@ $^ $(LDFLAGS) -progress_bar_test: src/common/console/progress_bar_test.o src/common/console/progress_bar.o +progress_bar_test: src/common/test/progress_bar_test.o src/common/console/progress_bar.o $(CXX) -o $@ $^ $(LDFLAGS) -tablet_scanner_test: src/sdk/tera.o src/io/test/tablet_scanner_test.o src/tabletnode/tabletnode_sysinfo.o src/tera_entry.cc\ - $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(TABLETNODE_OBJ) $(SDK_OBJ) +tablet_scanner_test: src/sdk/tera.o src/io/test/tablet_scanner_test.o src/tabletnode/tabletnode_sysinfo.o \ + $(IO_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ)\ + $(LEVELDB_LIB) $(TABLETNODE_OBJ) $(SDK_OBJ)\ + $(ACCESS_OBJ) $(QUOTA_OBJ) src/leveldb/util/histogram.o\ + $(filter-out $(MASTER_ENTRY_OBJ),$(MASTER_OBJ)) $(CXX) $(TEST_CXXFLAGS) -o $@ $^ $(LDFLAGS) -master_test: src/master/test/master_test.o src/master/test/trackable_gc_test.o src/tera_entry.cc $(MASTER_OBJ) $(IO_OBJ) $(SDK_OBJ) \ - $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) +master_test: src/master/test/master_test.o src/master/test/trackable_gc_test.o $(MASTER_OBJ) $(IO_OBJ) $(SDK_OBJ) \ + $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(QUOTA_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) $(TEST_CXXFLAGS) sdk_test: src/sdk/test/global_txn_internal_test.o src/sdk/test/global_txn_test.o \ src/sdk/test/filter_utils_test.o src/sdk/test/scan_impl_test.o \ src/sdk/test/sdk_timeout_manager_test.o src/sdk/test/sdk_test.o $(SDK_OBJ) \ - $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) + $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(ACCESS_OBJ) $(CXX) -o $@ $^ $(LDFLAGS) -#observer_test: src/observer/test/rowlock_test.o src/observer/test/scanner_test.o src/observer/test/observer_test.o src/observer/observer_demo/demo_observer.o $(PROTO_OBJ) $(COMMON_OBJ) $(OTHER_OBJ) $(OBSERVER_OBJ) $(LIBRARY) -# $(CXX) -o $@ $^ $(LDFLAGS) - $(ALL_OBJ): %.o: %.cc $(PROTO_OUT_H) $(CXX) $(CXXFLAGS) -c $< -o $@ diff --git a/README.md b/README.md index 29f184596..145ff0d6f 100644 --- a/README.md +++ b/README.md @@ -47,20 +47,19 @@ Tera is built on several pieces of open source infrastructure. - __Filesystem__ (required) - Tera uses the distributed file system to store transaction log and data files. So Tera uses an abstract file system interface, called Env, to adapt to different implementations of file systems (e.g., [BFS](https://github.com/baidu/bfs), HDFS, HDFS2, POXIS filesystem). + Tera uses the distributed file system to store transaction log and data files. So Tera uses an abstract file system interface, called Env, to adapt to different implementations of file systems (e.g., [BFS](https://github.com/baidu/bfs), HDFS, HDFS2, POXIS filesystem). - __Distributed lock service__ (required) - Tera relies on a highly-available and persistent distributed lock service, which is used for a variety of tasks: to ensure that there is at most one active master at any time; to store meta table's location, to discover new tablet server and finalize tablet server deaths. Tera has an adapter class to adapt to different implementations of lock service (e.g., ZooKeeper, [Nexus](https://github.com/baidu/ins)) + Tera relies on a highly-available and persistent distributed lock service, which is used for a variety of tasks: to ensure that there is at most one active master at any time; to store meta table's location, to discover new tablet server and finalize tablet server deaths. Tera has an adapter class to adapt to different implementations of lock service (e.g., ZooKeeper, [Nexus](https://github.com/baidu/ins)) - __High performance RPC framework__ (required) - Tera is designed to handle a variety of demanding workloads, which range from throughput-oriented applications to latency-sensitive service. So Tera needs a high performance network programming framework. Now Tera heavily relies on [Sofa-pbrpc](https://github.com/baidu/sofa-pbrpc/) to meet the performance demand. + Tera is designed to handle a variety of demanding workloads, which range from throughput-oriented applications to latency-sensitive service. So Tera needs a high performance network programming framework. Now Tera heavily relies on [Sofa-pbrpc](https://github.com/baidu/sofa-pbrpc/) to meet the performance demand. - __Cluster management system__ (not necessary) - A Tera cluster in Baidu typically operates in a shared pool of machines -that runs a wide variety of other distributed applications. So Tera can be deployed in a cluster management system [Galaxy](https://github.com/baidu/galaxy), which uses for scheduling jobs, managing resources on shared machines, dealing with machine failures, and monitoring machine status. Besides, Tera can also be deployed on RAW machine or in Docker container. + A Tera cluster in Baidu typically operates in a shared pool of machines that runs a wide variety of other distributed applications. So Tera can be deployed in a cluster management system [Galaxy](https://github.com/baidu/galaxy), which uses for scheduling jobs, managing resources on shared machines, dealing with machine failures, and monitoring machine status. Besides, Tera can also be deployed on RAW machine or in Docker container. ## Documents @@ -69,19 +68,19 @@ that runs a wide variety of other distributed applications. So Tera can be deplo ## Quick start * __How to build__ - Use sh [./build.sh](BUILD) to build Tera. + Use sh [./build.sh](BUILD) to build Tera. * __How to deploy__ - [Pseudo Distributed Mode](doc/en/onebox.md) + [Pseudo Distributed Mode](doc/en/onebox.md) - [Build on Docker](example/docker) + [Build on Docker](example/docker) * __How to access__ - [teracli](doc/en/teracli.md) + [teracli](doc/en/teracli.md) - [API](doc/en/sdk_guide.md) + [API](doc/en/sdk_guide.md) ## Contributing to Tera Contributions are welcomed and greatly appreciated. @@ -91,5 +90,5 @@ Read [Roadmap](doc/en/roadmap.md) to get a general knowledge about our developme See [Contributions](doc/en/contributor.md) for more details. ## Follow us -To join us, please send resume to {dist-lab, tera_dev, opensearch} at baidu.com. +To join us, please send resume to tera-user at baidu.com. diff --git a/build.conf.template b/build.conf.template index c5412fbe6..841c7241d 100755 --- a/build.conf.template +++ b/build.conf.template @@ -35,7 +35,8 @@ if [ $MIRROR == "china" ]; then NOSE_URL=http://mirrors.163.com/gentoo/distfiles/nose-${NOSE_VERSION}.tar.gz MONGOOSE_URL=https://github.com/cesanta/mongoose/archive/${MONGOOSE_VERSION}.tar.gz elif [ $MIRROR == "origin" ]; then - BOOST_URL=http://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_${BOOST_VERSION}.tar.bz2 + # BOOST_URL=http://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_${BOOST_VERSION}.tar.bz2 + BOOST_URL=https://jaist.dl.sourceforge.net/project/boost/boost/1.58.0/boost_${BOOST_VERSION}.tar.bz2 PROTOBUF_URL=https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.gz SNAPPY_URL=https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz SOFA_PBRPC_URL=https://github.com/baidu/sofa-pbrpc/archive/v${SOFA_PBRPC_VERSION}.tar.gz diff --git a/build.sh b/build.sh index c286f4423..03e64ab4a 100755 --- a/build.sh +++ b/build.sh @@ -49,7 +49,8 @@ if [ ${PROTOBUF_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/protobuf_${PROTOBUF_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libprotobuf.a" ] \ || [ ! -d "${DEPS_PREFIX}/include/google/protobuf" ]; then - wget --no-check-certificate -O protobuf-${PROTOBUF_VERSION}.tar.gz ${PROTOBUF_URL} + #wget --no-check-certificate -O protobuf-${PROTOBUF_VERSION}.tar.gz ${PROTOBUF_URL} + curl -k -L ${PROTOBUF_URL} -o protobuf-${PROTOBUF_VERSION}.tar.gz tar zxf protobuf-${PROTOBUF_VERSION}.tar.gz --recursive-unlink cd protobuf-${PROTOBUF_VERSION} ./configure ${DEPS_CONFIG} @@ -65,7 +66,8 @@ if [ ${SNAPPY_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/snappy_${SNAPPY_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libsnappy.a" ] \ || [ ! -f "${DEPS_PREFIX}/include/snappy.h" ]; then - wget --no-check-certificate -O snappy-${SNAPPY_VERSION}.tar.gz ${SNAPPY_URL} + #wget --no-check-certificate -O snappy-${SNAPPY_VERSION}.tar.gz ${SNAPPY_URL} + curl -k -L ${SNAPPY_URL} -o snappy-${SNAPPY_VERSION}.tar.gz tar zxf snappy-${SNAPPY_VERSION}.tar.gz --recursive-unlink cd snappy-${SNAPPY_VERSION} ./configure ${DEPS_CONFIG} @@ -81,7 +83,8 @@ if [ ${SOFA_PBRPC_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/sofa-pbrpc_${SOFA_PBRPC_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libsofa-pbrpc.a" ] \ || [ ! -d "${DEPS_PREFIX}/include/sofa/pbrpc" ]; then - wget --no-check-certificate -O sofa-pbrpc-${SOFA_PBRPC_VERSION}.tar.gz ${SOFA_PBRPC_URL} + #wget --no-check-certificate -O sofa-pbrpc-${SOFA_PBRPC_VERSION}.tar.gz ${SOFA_PBRPC_URL} + curl -k -L ${SOFA_PBRPC_URL} -o sofa-pbrpc-${SOFA_PBRPC_VERSION}.tar.gz tar zxf sofa-pbrpc-${SOFA_PBRPC_VERSION}.tar.gz --recursive-unlink cd sofa-pbrpc-${SOFA_PBRPC_VERSION} sed -i '/BOOST_HEADER_DIR=/ d' depends.mk @@ -119,7 +122,8 @@ if [ ${GFLAGS_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/gflags_${GFLAGS_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libgflags.a" ] \ || [ ! -d "${DEPS_PREFIX}/include/gflags" ]; then - wget --no-check-certificate -O gflags-${GFLAGS_VERSION}.tar.gz ${GFLAGS_URL} + #wget --no-check-certificate -O gflags-${GFLAGS_VERSION}.tar.gz ${GFLAGS_URL} + curl -k -L ${GFLAGS_URL} -o gflags-${GFLAGS_VERSION}.tar.gz tar zxf gflags-${GFLAGS_VERSION}.tar.gz --recursive-unlink cd gflags-${GFLAGS_VERSION} cmake -DCMAKE_INSTALL_PREFIX=${DEPS_PREFIX} -DGFLAGS_NAMESPACE=google -DCMAKE_CXX_FLAGS=-fPIC @@ -135,7 +139,8 @@ if [ ${GLOG_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/glog_${GLOG_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libglog.a" ] \ || [ ! -d "${DEPS_PREFIX}/include/glog" ]; then - wget --no-check-certificate -O glog-${GLOG_VERSION}.tar.gz ${GLOG_URL} + #wget --no-check-certificate -O glog-${GLOG_VERSION}.tar.gz ${GLOG_URL} + curl -k -L ${GLOG_URL} -o glog-${GLOG_VERSION}.tar.gz tar zxf glog-${GLOG_VERSION}.tar.gz --recursive-unlink cd glog-${GLOG_VERSION} ./configure ${DEPS_CONFIG} CPPFLAGS=-I${DEPS_PREFIX}/include LDFLAGS=-L${DEPS_PREFIX}/lib @@ -151,7 +156,8 @@ if [ ${GTEST_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/gtest_${GTEST_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libgtest.a" ] \ || [ ! -d "${DEPS_PREFIX}/include/gtest" ]; then - wget --no-check-certificate -O googletest-release-${GTEST_VERSION}.tar.gz ${GTEST_URL} + #wget --no-check-certificate -O googletest-release-${GTEST_VERSION}.tar.gz ${GTEST_URL} + curl -k -L ${GTEST_URL} -o googletest-release-${GTEST_VERSION}.tar.gz tar zxf googletest-release-${GTEST_VERSION}.tar.gz --recursive-unlink cd googletest-release-${GTEST_VERSION}/googletest @@ -191,7 +197,8 @@ if [ ${GPERFTOOLS_VERSION} == "DISABLE" ]; then echo "Disable gperftools." elif [ ! -f "${FLAG_DIR}/gperftools_${GPERFTOOLS_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libtcmalloc_minimal.a" ]; then - wget --no-check-certificate -O gperftools-${GPERFTOOLS_VERSION}.tar.gz ${GPERFTOOLS_URL} + #wget --no-check-certificate -O gperftools-${GPERFTOOLS_VERSION}.tar.gz ${GPERFTOOLS_URL} + curl -k -L ${GPERFTOOLS_URL} -o gperftools-${GPERFTOOLS_VERSION}.tar.gz tar zxf gperftools-${GPERFTOOLS_VERSION}.tar.gz --recursive-unlink cd gperftools-${GPERFTOOLS_VERSION} ./configure ${DEPS_CONFIG} CPPFLAGS=-I${DEPS_PREFIX}/include LDFLAGS=-L${DEPS_PREFIX}/lib @@ -207,7 +214,8 @@ if [ ${INS_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/ins_${INS_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libins_sdk.a" ] \ || [ ! -f "${DEPS_PREFIX}/include/ins_sdk.h" ]; then - wget --no-check-certificate -O ins-${INS_VERSION}.tar.gz ${INS_URL} + #wget --no-check-certificate -O ins-${INS_VERSION}.tar.gz ${INS_URL} + curl -k -L ${INS_URL} -o ins-${INS_VERSION}.tar.gz tar zxf ins-${INS_VERSION}.tar.gz --recursive-unlink cd ins-${INS_VERSION} sed -i "s|^PREFIX=.*|PREFIX=${DEPS_PREFIX}|" Makefile @@ -245,7 +253,8 @@ if [ ${MONGOOSE_VERSION} == "DISABLE" ]; then elif [ ! -f "${FLAG_DIR}/mongoose_${MONGOOSE_VERSION}" ] \ || [ ! -f "${DEPS_PREFIX}/include/mongoose.h" ] \ || [ ! -f "${DEPS_PREFIX}/lib/libmongoose.a" ]; then - wget --no-check-certificate -O mongoose-${MONGOOSE_VERSION}.tar.gz ${MONGOOSE_URL} + #wget --no-check-certificate -O mongoose-${MONGOOSE_VERSION}.tar.gz ${MONGOOSE_URL} + curl -k -L ${MONGOOSE_URL} -o mongoose-${MONGOOSE_VERSION}.tar.gz tar zxf mongoose-${MONGOOSE_VERSION}.tar.gz --recursive-unlink cd mongoose-${MONGOOSE_VERSION} gcc -c -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -g2 -pipe -Wall -Werror -fPIC mongoose.c @@ -277,4 +286,4 @@ sed -i "s:^INS_PREFIX=.*:INS_PREFIX=$DEPS_PREFIX:" depends.mk ######################################## make clean -make -j8 +make diff --git a/build_version.sh b/build_version.sh index 8cac725a6..2534fcb85 100755 --- a/build_version.sh +++ b/build_version.sh @@ -56,7 +56,7 @@ GIT_INFO_FILE=git_info.tmp VERSION_CPP_FILE=src/version.cc # generate template file -git log | head -n 6 | sed 's/$/&\\n\\/g' > $GIT_INFO_FILE +git log | head -n 6 | sed 's/"/\\"/g' | sed 's/$/&\\n\\/g' > $GIT_INFO_FILE gen_info_template_header > $TEMPLATE_HEADER_FILE gen_info_template_foot > $TEMPLATE_FOOT_FILE gen_info_print_template >> $TEMPLATE_FOOT_FILE diff --git a/doc/cn/filter.md b/doc/cn/filter.md new file mode 100644 index 000000000..dea19ca5f --- /dev/null +++ b/doc/cn/filter.md @@ -0,0 +1,104 @@ +# filter介绍 +目前包含ValueFilter以及多个ValueFilter的AND或OR的自由组合的FilterList + +## ValueFilter +### 功能简介 +1. ValueFilter是什么 +举个例子,在scan时,只需要输出满足以下条件的行:针对指定的cf和qu,其下的value > 4,这就是ValueFilter +2. ValueFilter的限制条件 +- ValueFilter支持对value类型为整数,小数,字符串的过滤 +- ValueFilter只在scan中生效,对于相同cf和qu,只支持获取1个version的scan +- cf和qu哪个不指定,哪个被设为空串"" +- 当qu为空串""时,对于指定cf下所有qu的value,有一个不满足过滤条件,所在行就会被过滤掉 +### 使用方法 +1. 构造ValueFilter +```c++ +ValueFilterPtr value_filter = + std::make_shared(CompareOperator::kGreater, comparator); +``` +2. 设置ValueFilter相关属性 +```c++ +value_filter->SetColumnFamily("cf1"); +value_filter->SetColumnQualifier("qu1"); +value_filter->SetFilterIfMissing(true); +``` +3. Set到ScanDesc中 +```c++ +ScanDescriptor scan_desc(""); +scan_desc.SetFilter(value_filter); +``` +### 需要一个comparator +我们注意到,在使用方法的第1步构造ValueFilter时,构造函数中有一个参数是comparator,它是ValueFilter +需要用到的。 +#### 因为,ValueFilter可以针对任意类型的Value进行过滤,而tera中存储的Value都是二进制格式,所以提供了FilterComparator这个对象,来负责具体的编码(存储之前需要由用户调用Encode方法进行编码),解码和比较,对ValueFilter屏蔽具体的Value类型。 +#### 下面是针对不同类型,构造相应的FilterComparator的方法 +根据待比较的值的数据类型,构造相应的comparator,同时需要指定比较的参考值,如ValueFilter功能简介 +例子中的4。 +- 例子1:若ValueFilter需要对Value为int64的整数进行过滤,那么需要先构造这样的comparator +```c++ +int64_t ref_value = 4; +IntegerComparatorPtr comparator = std::make_shared(IntegerValueType::kInt64, + ref_value); +``` +- 例子2:若ValueFilter需要对Value为uint8的整数进行过滤,那么需要先构造这样的comparator +```c++ +uint8_t ref_value = 4; +IntegerComparatorPtr comparator = std::make_shared(IntegerValueType::kUint8, + ref_value); +``` +- 例子3:若ValueFilter需要对Value为float或double的小数进行过滤,那么需要先构造这样的comparator +```c++ +double ref_value = 4.0; +DecimalComparatorPtr comparator = std::make_shared(ref_value); +``` +- 例子4:若ValueFilter需要对std::string的Value进行过滤,那么需要先构造这样的comparator +```c++ +std::string ref_value = "abc"; +BinaryComparatorPtr comparator = std::make_shared(ref_value); +``` +#### 下面是用户在存储之前,针对不同类型,使用相应的FilterComparaotr进行编码的方法 +用户只需要调用Encode方法,在向tera写入Value之前将Value进行一下转换,Decode方法会在ValueFilter内部实现中使用。 +- 例子1:过滤的对象集合中的Value是int64类型,写入前先使用如下接口进行转换 +```c++ +int64_t value = 8; +std::string out_value; +bool ret = filter::IntegerComparator::EncodeInteger(filter::IntegerValueType::kInt64, + value, &out_value); +``` +对于int64的value,也可以使用已有的put接口直接写入,这里是兼容的,也只有int64类型的可以这样使用 +```c++ +virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t value, + ErrorCode* err) = 0; +``` +- 例子2:过滤的对象集合中的Value是uint8类型,写入前先使用如下接口进行转换 +```c++ +uint8_t value = 2; +std::string out_value; +bool ret = filter::IntegerComparator::EncodeInteger(filter::IntegerValueType::kUint8, + value, &out_value); +``` +- 例子3:过滤的对象集合中的Value是float或double类型,写入前先使用如下接口进行转换 +```c++ +double value = 2.0; +std::string out_value = filter::DecimalComparator::EncodeDecimal(value); +``` + +## FilterList +### 功能简介 +为了实现类似 Filter1 && (Filter2 || Filter3)的功能而实现 +### 使用方法 +1. 先构造基本的Filter,如我们这里支持的ValueFilter +2. 再构造想要的FilterList +- 举个例子:如我们想要这样的Filter组合:Filter1 && (Filter2 || Filter3),假设3个Filter都已经构造好了 +那么,按如下代码构造FilterList +```c++ +FilterListPtr sub_filter_list = std::make_shared(FilterList::kOr); +sub_filter_list->AddFilter(value_filter_1); +sub_filter_list->AddFilter(value_filter_2); +FilterListPtr filter_list = std::make_shared(FilterList::kAnd); +filter_list->AddFilter(value_filter_3); +filter_list->AddFilter(sub_filter_list); +``` +3. 将构造好的FilterList Set到ScanDesc中 +- 构造出的FilterList仍然是一个Filter,所以Set方法同ValueFilter部分介绍的Set方法 diff --git a/doc/cn/sdk_guide.md b/doc/cn/sdk_guide.md index 1c429fd74..c3b28e4d8 100644 --- a/doc/cn/sdk_guide.md +++ b/doc/cn/sdk_guide.md @@ -226,6 +226,7 @@ tera::ResultStream* result_stream = table->Scan(desc, &error_code); while (!result_stream->Done()) { ... result_stream->Next(); + if(...) result_stream->Cancel(); // 调用Cancel()接口后,会从while循环中退出 } delete result_stream; // 注意回收内存 ``` diff --git a/doc/en/contributor.md b/doc/en/contributor.md index edb83a1cf..c4b46c892 100644 --- a/doc/en/contributor.md +++ b/doc/en/contributor.md @@ -5,10 +5,9 @@ This document outlines some of the conventions on commit message formatting, bug reporting and other resources to make getting your contribution into Tera easier. -## Email and chat +## Email -- Email: tera_dev at baidu.com -- IRC: QQ group 340418305 +- tera-user at baidu.com ## Getting started diff --git a/doc/en/sdk_guide.md b/doc/en/sdk_guide.md index ec29b51de..29ec5ac76 100644 --- a/doc/en/sdk_guide.md +++ b/doc/en/sdk_guide.md @@ -240,7 +240,9 @@ desc.AddColumn("family22", "qualifier22"); // Get the column from colum tera::ResultStream* result_stream = table->Scan(desc, &error_code); while (!result_stream->Done()) { /* process the returned cell */ + ... result_stream->Next(); + if (...) result_stream->Cancel(); // scan will exit from while circle } // cleanup diff --git a/doc/image/persistent_cache_arch.png b/doc/image/persistent_cache_arch.png new file mode 100644 index 000000000..c3dd78cf2 Binary files /dev/null and b/doc/image/persistent_cache_arch.png differ diff --git a/doc/image/persistent_cache_meta_data_arch.png b/doc/image/persistent_cache_meta_data_arch.png new file mode 100644 index 000000000..e1d342319 Binary files /dev/null and b/doc/image/persistent_cache_meta_data_arch.png differ diff --git a/doc/multi_tenancy_user_manual.md b/doc/multi_tenancy_user_manual.md new file mode 100644 index 000000000..51849a044 --- /dev/null +++ b/doc/multi_tenancy_user_manual.md @@ -0,0 +1,44 @@ +# 多租户用户手册 + +## 权限管理 + + +## Quota控制 +Quota控制目前实现了Table级别的流量控制,具体使用说明可用`./teracli help quota`查看。 + +### show +展示Table级别的Quota值。 +``` +quota show + + TABLENAME WRITEREQS(w/s) WRITEBYTES(B/s) READREQS(r/s) READBYTES(B/s) SCANREQS(s/s) SCANBYTES(B/s) +--------------------------------------------------------------------------------------------------------------- + test 1000/3 2000/1 3000/2 -1/1 5000/1 6000/1 +``` + +### showx +所有的Ts级别所有Table的Quota值。 +``` +quota showx + +例子: +quota showx + TSADDR TABLENAME WRITEREQS(w/s) WRITEBYTES(B/s) READREQS(r/s) READBYTES(B/s) SCANREQS(s/s) SCANBYTES(B/s) +-------------------------------------------------------------------------------------------------------------------------------- + ip_addr1:3100 test 500/3 1000/1 1500/2 -1/1 2500/1 3000/1 + ip_addr2:3100 test 500/3 1000/1 1500/2 -1/1 2500/1 3000/1 +``` + +### set +设置Table的Quota。 + +``` +quota set +其中: + 设置选项为:WRITEREQS|WRITEBYTES|READREQS|READBYTES|SCANREQS|SCANBYTES=[limit]/[period] +如果不设置period,默认为1s。 + +例子: +quota set test WRITEREQS=1000/2 READBYTES=4000/3 SCANREQS=100 SCANBYTES=-1 +表示,设置表名为test的表Quota,WRITEREQS为每2s 1000 reqs,READBYTES为每3s 4000B,SCANREQS为每1s 100 reqs,SCANBYTES为-1表示quota不限制,如果用户不设置默认为不限制quota。 +``` diff --git a/doc/persistent_cache.md b/doc/persistent_cache.md new file mode 100644 index 000000000..f4530e1e9 --- /dev/null +++ b/doc/persistent_cache.md @@ -0,0 +1,223 @@ +## Persistent Cache 设计 + +Tera 的 PersistentCache 设计借鉴了 rocksdb 同名模块,但因为各种各样的原因,rocksdb本身的persistent cache基本不可用,因此 Tera 在此基础上做了较大幅度的改动,以满足需求。 + +### 主要feature + +1. 确保数据不会超出SSD磁盘容量。 +2. 对于SSD容量不足的场景,以文件为粒度,做LRU淘汰。 +3. 在SSD容量充足的情况下,读写性能与直接使用SSD持平。 +4. 支持单机多磁盘容量异构的场景。 +5. 支持从老版本 FlashEnv 的场景无感切换到 PersistentCache 场景。 +6. 定期GC垃圾文件。 + +### Server 端配置说明 + +- tera_persistent_cache_paths: 设置 persistent cache 所使用的ssd盘路径,支持多路径,如/home/ssd1;/home/ssd2,分号分割。 + +- tera_persistent_cache_write_retry_times: Cache打满后,如果所有文件都被引用无法淘汰的情况下,重试分配空间的次数,默认5次,间隔1s。通常用户无需关心。 + +- tera_enable_persistent_cache: 设置是否开启persistent cache,默认使用flash_env。 + +- tera_enable_persistent_cache_transfer_flash_env_files: 设置是否迁移 flash_env 的 cache 文件,如果为false,则删除所有不认识的cache文件,否则尝试将已有的 flash env 的cache文件拉入persistent cache中。 + +### 整体架构 + +![persistent_cache_arch](./image/persistent_cache_arch.png) + +如上图所示,Persistent Cache 的直接使用者就是leveldb。 + +### 接口说明 + +Persistent Cache主要提供了以下接口: + +```c++ +virtual Status NewWriteableCacheFile(WriteableCacheFile**, const std::string& path) = 0; +virtual Status Read(const Slice& key, size_t offset, size_t length, Slice* content, SstDataScratch* scratch) = 0; +virtual void ForceEvict(const Slice& key) = 0; +``` + +#### - NewWriteableCacheFile + +向 Cache 写入内容主要调用的就是 **NewWriteableCacheFile** 接口,其中: + +- WriteableCacheFile** 用于传出新建的CacheFile, + +- path 指定存储该文件相对于 persistent_cache 路径的位置,真正创建的文件会在用户指定的文件名后拼接.*$cache_id*.rc + + **例:若persistent_cache的路径为/home/ssd0/persistent_cache,用户指定某一个cache文件创建路径为table/tablet00000001/0/123.sst,若此时的cache_id是386,最终该文件会被创建在 /home/ssd0/persistent_cache/table/tablet00000001/0/123.sst.386.rc** + + 该特性主要用于保留tera本身的结构化路径,方便问题的定位和追查。 + + 该接口会在 Cache 系统内创建一个 **WriteableCacheFile** 返回给用户,该 **WriteableCacheFile** 主要接口如下: + +```c++ +Status Append(const Slice&) override; +void Close(const Slice& key); +void Abandon(); +``` + +对用户来说,写Cache文件需要调用Append接口,语义同WriteableFile的语义,主要对另外两个接口作出说明: + +1. Close: 当用户写入Cache文件结束后,如果认为没有问题,可以通过调用Close接口,直接关闭Cache文件,并给定key。后面可以通过 PersistentCache 的 Read 接口,指定 key,偏移量,和长度来读取该文件的内容。 +2. Abandon:当用户写入Cache文件发生失败,希望丢弃文件时使用,该文件不会被插入Cache中,而是后期随着GC被淘汰。主要可能的错误包括: + 1. Cache写满且无文件可淘汰,返回 IOError。 + 2. 其他写文件系统的错误。 + +在leveldb模块中,只有一个地方直接写新Cache文件,即做Compact时,生成新的文件同时会写一个新的 Cache 文件。其他需要充cache的写操作由 **PersistentCacheHelper** 封装执行。 + +#### - Read + +Read接口如前所述,对于插入PersistentCache中的文件来说,通过指定的文件key, offset, length可以直接读取该文件指定区域的内容。 + +#### - ForceEvict + +对于PersistentCache来说,通常在Cache写满后才进行文件粒度的LRU淘汰,但是SSD盘的性能会随着容量增加而逐渐下降,为了确保尽快的干掉无用文件,可以通过ForceEvict接口,强行删除Cache内指定key对应的文件。分为两种情况: + +1. 如果该文件无引用,则直接删除。 +2. 如果该文件正在被引用,则推入 **evictable** 队列,在此之后,通过任何途径都无法再次查询/引用到该文件,并在周期性的GC中,在其引用计数为0后删除。 + +该接口在 leveldb 中,有两个调用的地方: + +1. 从Cache中读取某个sst文件的block时,如果读取成功但ParseBlock失败,则认为该文件内容失效,调用ForceEvict清理该文件后,从dfs读取,如果dfs读取到block并且Parse成功,则重新充该文件。 +2. Compact结束后,对于确定需要删除的文件,调用该接口直接淘汰。 + +### 各模块详细说明 + +#### PersistentCache + +接口定义,虚类,主要接口已说明 + +#### PersistentCacheMetaData + +该结构用于维护PersistentCache的元数据信息,其内部实现主要就是两个HashTable。分别建立了以下映射关系: + +![persistent_cache_meta_data_arch](./image/persistent_cache_meta_data_arch.png) + +如上图所示,MetaData中维护了key->cache_id的映射,以及 cache_id 到 cache_file 的映射,这部分数据同时使用dbtable持久化在磁盘上,供程序重启后使用。同时,cache_file本身被串成了一个 lru list,用于 cache 打满后淘汰。 + +#### PersistentCacheImpl + +单盘上的 **PersistentCache** 实现,其内部主要维护了 **PersistentCacheMetaData** 和当前Cache的使用量,当前CacheFile的 Id 等信息。 + +其对PersistentCache的各个接口实现细节主要如下 + +```c++ +virtual Status NewWriteableCacheFile(WriteableCacheFile**, const Slice&) = 0; +``` + +该接口被调用时,会依次进行以下步骤: + +1. 创建一个新的CacheFile,文件名为 {cache_id}.rc ,其中cache_id在单盘上全局递增,永不重复,由PersistentCacheImpl维护。 +2. 在metadata中建立cache_id -> cache_file 的映射(此时用户还无法读到该文件)。 +3. 将文件返回给用户使用。 + + +```c++ +virtual Status Read(const Slice& key, size_t offset, size_t length, Slice* content, SstDataScratch* scratch) = 0; +``` + +该接口被调用时,会依次进行以下步骤: + +1. 在MetaData中查找 key 对应的 cache_id。 +2. 如果找到,继续通过 cache_id 查找对应的 CacheFile。 +3. 如果找到,读取CacheFile中对应 offset / length 的内容。 +4. 以上任意一步失败,认为Cache Miss,如果读取文件错误,认为Cache Error。 + +```C++ +virtual void ForceEvict(const Slice& key) = 0; +``` + +该接口被调用时,会转发给 PersistentCacheMetaData 的 ForceEvict 接口,并依次进行以下步骤: + +1. 删除 MetaData 中对应 key -> cache_id 的映射(如果有)。 +2. 删除 MetaData 中对应 cache_id->cache_file 的映射(如果有)。 +3. 如果 cache_file 的引用计数为0,直接将文件返回给 PersistentCacheImpl,供其删除并释放Cache空间。 +4. 否则,将 cache_file 加入 evictable 文件列表,等待gc时尝试删除。 + +#### WriteableCacheFile + +该模块主要功能和接口已在上文全局中介绍,以下是其实现细节。 + +```c++ +Status Append(const Slice&) override; +``` + +该接口被调用时,会依次进行以下步骤: + +1. 向 PersistentCacheImpl 申请与 Slice 等大的 Size。 +2. 申请成功后写入文件。 +3. 申请失败,等待1s后重试,最多重试5次,失败后返回错误。 + +```c++ +void Close(const Slice& key); +``` + +该接口调用时,认为写入Cache文件成功,建立key -> cache_id的映射,从此之后用户可以通过Cache读到该文件的内容。(cache_id -> cache_file)的映射在之前创建文件的时候已经建立。并将 key->cache_id的映射落盘持久化,待重启恢复。 + +```c++ +void Abandon(); +``` + +该接口调用时,认为Cache文件有错误不可用,直接抛弃,删除cache_id->cache_file的映射,并将文件推入 evictable 文件列表,等待gc。 + +#### RandomAccessCacheFile + +基本等同于posix的RandomAccessFile,在WriteableCacheFile做Close操作时,由于env不支持同一个文件又读又写,因此会关闭该文件,并读打开为RandomAccessCacheFile,存入Cache中。 + +#### ShardedPersistentCache + +用户层真正Work的PersistentCache实例,主要作用是多盘&异构容量的支持,由ShardedPersistentCache维护多个PersistentCacheImpl实例,通常每一个PersistentCacheImpl实例对应底层的一块儿SSD存储介质。 + +- Open:Open一个ShardedPersistentCache时,会由他Open一系列的PersistentCacheImpl,并通过GetAllKeys()接口去恢复每一个key到对应PersistentCacheImpl的映射关系。 + +- Read: Read操作会根据 key=>persistent_cache_impl的映射关系,读取对应的SSD上的缓存文件。 + +- NewWriteableCacheFile: 当调用NewWriteableCacheFile时,会根据Pick策略,选取一个PersistentCacheImpl,调用NewWriteableCacheFile去创建Cache文件。 + +##### Pick 策略 + +ShardedPersistentCache在新建Cache文件时,依据以下策略进行抉择: + +1. 如果有 >= 1块盘使用量不足90%,则在所有使用量低于90%的盘中随机选取。 +2. 否则,选取剩余空间最大的盘创建。 + +以上策略依据是,考虑有两块SSD磁盘: + +1. 当双盘都空闲时,做性能上的考量,将文件尽可能的等份到两块盘,在考虑随机读取的场景下,这样能尽可能发挥双盘的性能优势。如果数据在两块盘上分布不均,那么随机读取时,有可能一块盘已经被打到极限,而另一块盘没什么压力。 +2. 当双盘有一块使用量大于90%时,考虑两块盘size可能不同,为了尽可能利用SSD空间,此时选取其他使用量低于90%的盘进行插入。 +3. 直到所有盘容量都大于90%后,从剩余容量最多的盘中进行选取。 + +#### GC策略 + +由于tera是分布式表格系统,随着数据分片的迁移,某台ts上的缓存信息很可能是作废的,为了尽快回收掉作废的缓存信息,按照以下策略周期性进行GC: + +1. 通过 GetAllKeys() 接口获得当前PersistentCache中的所有key。 + +2. 对于以下两种情况对应的key,保留其Cache文件。 + + 1. 在此台ts上的tablet所对应的key。 + 2. 在此台ts上的tablet所继承的其他tablet的文件。 + + 而对于其他的key来说,通过ForceEvict接口直接淘汰。 + +3. 在此之后,调用PersistentCache的GarbageCollect()接口,寻找metadata中引用计数为 0 的 evictable 文件并删除。 + +附:在PersistentCache中的key格式: +*$table_name*/*$tablet_name*/*$lg_id*/xxx.sst + +#### 从env_flash 迁移方案 +对于persistent cache来说,如果需要从env_flash继承已存在的cache文件,可以通过以下配置方式来完成。 + +**--tera_enable_persistent_cache=true** + +**--tera_enable_persistent_cache_transfer_flash_env_files=true** + +以上就是全部配置,迁移过程中,会对当前路径下的所有已存在文件做遍历,对于满足 $persistent_cache_path/*$table_name*/*$tablet_name*/*$lg_id*/xxx.sst格式的文件(即flash env的文件)来说,会对他们进行以下操作: + +1. 分配cache_id。 +2. 重命名文件为*$table_name*/*$tablet_name*/*$lg_id*/xxx.sst.*$cache_id*.rc。 +3. 读打开文件,并在persistent_cache中分配和文件size相同大小的容量。 +4. 将文件插入cache中接管。 + +对于其他文件,会在open persistent cache之后进行删除。 diff --git a/doc/sdk_reference/scan.md b/doc/sdk_reference/scan.md index dadb915fd..c97348df1 100644 --- a/doc/sdk_reference/scan.md +++ b/doc/sdk_reference/scan.md @@ -39,6 +39,26 @@ std::string Value() const = 0; int64_t ValueInt64() const = 0; ``` +##### (8) 返回scan已扫描data size的值(含drop数据) +``` +uint64_t GetDataSize() const = 0; +``` + +##### (9) 返回scan已扫描row行数的值(含drop数据) +``` +uint64_t GetRowCount() const = 0; +``` + +##### (10) 返回scan已扫描最新的key +``` +std::string GetLastKey() const = 0; +``` + +##### (11) 取消scan +``` +void Cancel() = 0; +``` + ### 2. ScanDescriptor ##### (1) 设置扫描的结束key @@ -66,32 +86,22 @@ void SetMaxVersions(int32_t versions); void SetTimeRange(int64_t ts_end, int64_t ts_start); ``` -##### (6) 设置批量扫描模式 -``` -void SetAsync(bool async); -``` - -##### (7) 检查扫描是否为批量扫描模式 -``` -bool IsAsync() const; -``` - -##### (8) 设置扫描的超时时间 +##### (6) 设置扫描的超时时间 ``` void SetPackInterval(int64_t timeout); ``` -##### (9) 设置扫描的buffersize +##### (7) 设置扫描的buffersize ``` void SetBufferSize(int64_t buf_size);//默认为64K ``` -##### (10) 设置每次扫描的cell数 +##### (8) 设置每次扫描的cell数 ``` void SetNumberLimit(int64_t number_limit); ``` -##### (11) 获取每次扫描的cell数 +##### (9) 获取每次扫描的cell数 ``` int64_t GetNumberLimit(); ``` diff --git a/doc/to_be_a_contributor.md b/doc/to_be_a_contributor.md index dfea2269e..9d8fbb3d8 100644 --- a/doc/to_be_a_contributor.md +++ b/doc/to_be_a_contributor.md @@ -16,6 +16,6 @@ b) Localitygroup包含Tablet,表格先被划分为若干个LocalityGroup,每 c) 两者互不包含 # 后续的任务 -在你完成第一个任务后,请将解答通过邮件发至tera_dev@baidu.com, +在你完成第一个任务后,请将解答通过邮件发至tera-user@baidu.com, 我会将第二个任务通过邮件回复给你,后面任务可能会逐渐加大难度,至少是需要动下手了, 不过相信我,总共不超过一两个小时,你就能成为一个tera的贡献者了! diff --git a/doc/tools/admincli.md b/doc/tools/admincli.md new file mode 100644 index 000000000..e9355a6a6 --- /dev/null +++ b/doc/tools/admincli.md @@ -0,0 +1,319 @@ +# admincli meta表相关操作使用说明 +* `./admincli help`可以看到相关帮助说明 +* 所有操作默认在master节点执行 +* teracli、tera_master_control 在bin目录下,admincli 在tools目录下,tera.flag 在conf目录下 +* 该工具操作对象为:1.分布式文件系统持久化的meta,2.master内存中的meta,3.本地备份的meta表文件 +* 1.对于持久化的meta,支持查询、健康检查、备份、修改 4类操作:get、show、healthcheck、backup、put、delete、modify +* 2.对于内存中的meta,支持查询、健康检查、备份 3类操作:get、show、healthcheck、backup,不支持修复操作 +* 3.对于备份的meta,可从持久化meta或者内存meta备份,作为diff和恢复的依据。 + +### 1. 功能说明 +#### 1.1 get +获取一条指定表start_key对应分片的tablet meta信息 +可选参数inmem,代表内存meta,缺省代表持久化meta表 + +```c +get [inmem] + +例子: +./admincli get inmem table1 '\x01abc' + +``` + +#### 1.2 show +查询meta表中等meta信息 +可选参数inmem,代表内存meta,缺省代表持久化meta +可选参数start_key、end_key(须一起指定),show指定KeyRange的meta信息, 缺省则show整个meta表 + +```c +show [inmem] [start_key] [end_key] + +例子: +./admincli show inmem "table1#\\x00'n\\x842" "table1#\\x00K\\x85" + +``` + +#### 1.3 healthcheck +对meta表进行健康检查, +可选参数inmem,代表内存meta,缺省代表持久化meta +可选参数start_key、end_key(须一起指定)代表KeyRange范围内的健康检查,缺省代表整个meta表健康检查 + +```c +healthcheck [inmem] [start_key] [end_key] + +例子: +./admincli healthcheck inmem "table1#\\x00'n\\x842" "table1#\\x00K\\x85" + +``` + +#### 1.4 backup +备份meta表中的信息 +可选参数`inmem`表示内存meta,缺省表示文件系统持久化meta +可选参数`filename`代表生成名为`filename`+时间戳的meta表备份文件 +内存meta表备份文件缺省命名为:inmem_meta.bak+时间戳 +持久化meta表备份文件缺省命名为:meta.bak+时间戳 +为避免备份文件冲突,时间戳后缀自动添加 + +```c +backup [inmem] [filename] + +例子: +./admincli backup inmem inmem_meta.bak + +``` + +#### 1.5 modify +修改持久化meta表中该table_name和start_key对应的tablet meta信息 +选择参数endkey,value输入欲修改的end_key值,之后输入Y确认 +选择参数hostname, vaule输入欲修改的主机名,之后输入Y确认 +如需修改start_key或其他信息,可以先调用delete,再调用put完成 + +```c +modify + +例子: +./admincli modify table1 '\x01abc' endkey '\x01add' +./admincli modify table1 '\x01abc' dest_ts yq01.baidu.com:2002 + +``` + +#### 1.6 delete +从持久化meta表中删除table_name和start_key对应的tablet meta信息, 按照提示输入Y确认 + +```c +delete + +例子: +./admincli delete table1 '\x01abc' + +``` + +#### 1.7 put +向持久化meta表中插入一条tablet meta信息 + +```c +put + +例子: +./admincli put table1/tablet00000019 '\x01abc' '\x4Fzzz' hostname:2002 + +``` + +#### 1.8 diff +扫描文件系统获取meta信息,对meta表备份文件`filename`进行diff检查 +可选参数table_name,代表对名为`table_name`的表扫描与`filename`进行校验 +可选参数tablet_path,代表对path为`tablet_path`的分片扫描与`filename`进行校验 +可选参数缺省,代表对全部表扫描与`filename`进行校验 +健康检查的异常会记入tools目录下meta.diff文件中, 只保留最新的一份 + +```c +diff [table_name|tablet_path] + +全部表diff 例子: +./admincli diff ./meta.bak_20180926-20:55:32 + +指定表diff 例子: +./admincli diff test_table ./meta.bak_20180926-20:55:32 + +指定分片 path diff 例子: +./admincli diff test_table/tablet00000001 ./meta.bak_20180926-20:55:32 + +``` + +### 2. 场景选择 +* tera集群meta表存有2份:master内存里的meta,文件系统持久化的meta +* 当持久化的meta出现异常,内存里的meta正常,此时使用`方法一`修复 +* 当2份meta都发生异常,此时使用`方法二`修复 + +### 3. 使用步骤 +* 设置tera为safemode模式:`./teracli safemode enter` +* 为`tera.flag`添加配置项:`--meta_cli_token=2862933555777941757` +* 校验两份meta表确认故障场景和修复方法:`./admincli healthcheck`和`./admincli healthcheck inmem` + +#### 方法一:利用内存备份文件恢复持久化meta表 +* 备份内存中的meta表:`./admincli backup inmem` +* 在master的tera.flag中添加如下配置项: + `--tera_master_meta_recovery_enabled=true` + `--tera_master_meta_recovery_file=${filename}` + 其中${filename}为meta表备份文件的全路径 +* 重启master服务:`./tera_master_control restart` +* 执行`./admincli healthcheck`和`./admincli healthcheck inmem`确认meta表修复正常 +* 特别注意:从tera.flag中删除这两行配置项 + +#### 方法二:通过admincli校验并修复异常的meta表项 +* 备份内存中的meta表:`./admincli backup inmem` +* 备份持久化的meta表:`./admincli backup` +* 停止master服务:`./tera_master_control stop` +* 【可选项】:扫描并diff备份的meta文件:`./admincli diff` +* 根据已知信息,调用put、delete、modify修复持久化的meta表 +* 执行`./admincli healthcheck`确认持久化meta表修复正常 +* 启动master服务:`./tera_master_control start` + + +# admincli ugi&role相关操作使用说明 +* 为`tera.flag`添加配置项:`--meta_cli_token=2862933555777941757` + +### 1. 功能说明 +#### 1.1 ugi update +创建或更新用户信息 + +```c +ugi update + +例子: +./admincli ugi update user1 123456 + +``` + +#### 1.2 ugi del +删除用户 + +```c +ugi del + +例子: +./admincli ugi del user1 + +``` + +#### 1.3 ugi show +查询所有的用户和密码 + +```c +ugi show + +例子: +./admincli ugi show + +``` + +#### 1.4 role add +添加role + +```c +role add + +例子: +./admincli role add role1 + +``` + + +#### 1.5 role del +删除role + +```c +role del + +例子: +./admincli role del role1 + +``` + +#### 1.6 role grant +将role授权给用户 + +```c +role grant + +例子: +./admincli role grant role_name1 user_name1 + +``` + +#### 1.7 role revoke +取消role对用户的授权 + +```c +role revoke + +例子: +./admincli role revoke role_name1 user_name1 + +``` + +#### 1.8 role show +展示所有的role信息 + +```c +role show + +例子: +./admincli role show + +``` + +# admincli设置表格访问方式的操作说明 +#### 1.1 auth set +设置表格访问方式。 + +```c +auth set +auth_policy可以是none/ugi/giano的任一种,所有的表格如果没有设置默认为none。 +如果table设置为none,这些表属于未设置,全部都不需要身份认证和鉴权,所有用户可以访问这些表; +如果table设置了ugi/giano,则按照对应的方式进行鉴权,并且要求用户按照对应方式来访问;未设置鉴权的sdk无法访问这些指定鉴权的表格 +*备注:giano是百度公司内部的身份认证和鉴权方式,外部开源版本无此代码,外部用户无需关心* + +例子: +./admincli auth test ugi +``` + +#### 1.2 auth show +展示所有表格设置的访问方式。 + +```c +auth show + +例子: +./admincli auth show + TableName AuthType + test ugi + test2 none + test3 giano +``` + +# admincli dfs-throughput-limit 使用说明 +线上发生过Case,集群因为compact失败并不停重试,导致dfs雪崩,全集群写吞吐非常高,但最终都写失败了,只能通过手动下galaxy节点恢复,成本很高。 +因此,增加dfs 读写吞吐硬限功能,强行卡死全集群读写dfs吞吐,该功能仅用于集群从有可能发生的雪崩状态中恢复。 + +使用方法 + +```bash +./admincli dfs-throughput-limit get # 获取当前集群硬限配置 +./admincli dfs-throughput-limit write $write_limit # write_limit 为全集群具体的写吞吐数值,单位Byte +./admincli dfs-throughput-limit read $read_limit # 与write_limit等价 +``` + +注:该功能仅用于tera op/rd 恢复雪崩状态集群,因此限制不会持久化,而是维护在master内存中,master重启后失效。 + +# admincli设置procedure并发限制操作说明 +#### 1.1 procedure-limit get +获取当前各procedure并发限制 + +```c +示例: +./admincli procedure-limit get +[kMerge, limit:20, in_use:0] +[kSplit, limit:10, in_use:0] +[kMove, limit:100, in_use:0] +[kLoad, limit:300, in_use:0] +[kUnload, limit:100, in_use:0] +``` + +#### 1.2 procedure-limit set +设置某一procedure并发限制 +成功会返回设置后各procedure并发限制 +命令格式: +procedure-limit set + procedure = [kMerge, kSplit, kMove, kLoad, kUnload] + limit shoud be a non-negative number + +``` +示例: +./admincli procedure-limit set kMerge 30 +[kMerge, limit:30, in_use:0] +[kSplit, limit:10, in_use:0] +[kMove, limit:100, in_use:0] +[kLoad, limit:300, in_use:0] +[kUnload, limit:100, in_use:0] +``` diff --git a/doc/tools/terautil.md b/doc/tools/terautil.md index 842b572eb..5d01e631c 100644 --- a/doc/tools/terautil.md +++ b/doc/tools/terautil.md @@ -76,3 +76,49 @@ + +集群间数据迁移后的diff工具 +### 1. 用法 +``` +./terautil diff help +``` +#### (1)准备工作 +``` +./terautil --flagfile=../conf/tera.flag diff prepare +``` +#### (2) 分布式进行求diff +``` +./terautil --flagfile=../conf/tera.flag diff run +``` +#### (3) 查看diff运行的进度 +``` +./terautil --flagfile=../conf/tera.flag diff progress +``` +#### (4) 统计diff结果并显示 +``` +./terautil --flagfile=../conf/tera.flag diff result +``` +#### (5) 删除用过的数据 +``` +./terautil --flagfile=../conf/tera.flag diff clean +``` + +### 2. 说明 +首先,需要通过配置文件指定原集群和目标集群比较的表名,通过diff_tables_map_file文件指定, +格式:tables_name1:lg1|lg2,tables_name2:lg1|lg2 +前后表名用,分割,可以指定比较哪些lg,如果不指定可以连同:都不写,表示比较所有的lg,一行一对儿表名 + +terautil的主配置文件tera.flag中,主要需要指定上述map文件,原集群的flag文件和目的集群的flag文件, +其他配置如Ins相关配置,日志相关配置,如需指定求diff数据的时间段,可以指定dump_endtime。 +其他配置使用默认即可。 +有一个比较重要的配置是ins_cluster_diff_root_path,默认值是/terautil/diff,所有diff过程需要的元数据的key都以这个为前缀 + +diff prepare +从diff_tables_map_file,读取表信息数据,转换成cf, cf对应是否multiversion数据,各tablet的范围数据,存放到nexus中,供后面diff过程使用 + +diff run +真正开始运行diff,这个可以多个实例并行运行,每个实例每开始比较一个范围会加锁 +注意:scan过程中会失败,这个失败的范围不会重试再次比较,因此当观察日志发现所有实例都运行结束了,但通过diff progress查看发现并不是所有范围都diff结束了, +这时候需要启动运行一次实例(运行几个都可以),再次观察diff progress的输出,重复这个过程知道所有范围都完成diff比较 + + diff --git a/example/onebox/bin/launch_tera.sh b/example/onebox/bin/launch_tera.sh index c6d5f2fba..ee7e4c068 100755 --- a/example/onebox/bin/launch_tera.sh +++ b/example/onebox/bin/launch_tera.sh @@ -26,9 +26,6 @@ for ((i=1; i<=$TABLETNODE_NUM; i++)); do mv ${TABLETNODE_LOG_FILE} ${TABLETNODE_LOG_FILE}.${TIME} fi LEVELDB_LOG_FILE=${CURRENT_DIR}/../log/leveldb.$i.log - if [ -f ${LEVELDB_LOG_FILE} ];then - mv ${LEVELDB_LOG_FILE} ${LEVELDB_LOG_FILE}.${TIME} - fi CACHE_PATH=${CURRENT_DIR}/../cache/tabletnode.$i if [ ! -x $CACHE_PATH ];then mkdir -p $CACHE_PATH diff --git a/example/onebox/conf/lb.flag b/example/onebox/conf/lb.flag index d6ae8c721..8ec588cf8 100644 --- a/example/onebox/conf/lb.flag +++ b/example/onebox/conf/lb.flag @@ -1,41 +1,5 @@ --tera_lb_server_port=31000 ---tera_info_log_clean_enable=false --log_dir=../log --logbufsecs=0 --v=10 ---tera_lb_debug_mode_enabled=false - ---tera_lb_meta_table_name=meta_table ---tera_lb_meta_isolate_enabled=true - ---tera_lb_load_balance_period_s=60 ---tera_lb_max_compute_steps=1000000 ---tera_lb_max_compute_steps_per_tablet=1000 ---tera_lb_max_compute_time_ms=30000 ---tera_lb_min_cost_need_balance=0.05 ---tera_lb_bad_node_safemode_percent=0.5 - ---tera_lb_move_count_cost_weight=10 ---tera_lb_tablet_max_move_num=1 - ---tera_lb_move_frequency_cost_weight=10 ---tera_lb_tablet_move_too_frequently_threshold_s=600 - ---tera_lb_abnormal_node_cost_weight=10 ---tera_lb_abnormal_node_ratio=0.5 - ---tera_lb_read_pending_node_cost_weight=10 ---tera_lb_write_pending_node_cost_weight=10 ---tera_lb_scan_pending_node_cost_weight=10 - ---tera_lb_tablet_count_cost_weight=100 ---tera_lb_size_cost_weight=100 - ---tera_lb_read_load_cost_weight=20 ---tera_lb_write_load_cost_weight=20 ---tera_lb_scan_load_cost_weight=10 - ---tera_lb_read_pending_factor=1 ---tera_lb_write_pending_factor=1 ---tera_lb_scan_pending_factor=1 diff --git a/example/onebox/conf/tera.flag b/example/onebox/conf/tera.flag index 88b33f764..ae2558a5c 100644 --- a/example/onebox/conf/tera.flag +++ b/example/onebox/conf/tera.flag @@ -14,7 +14,9 @@ --tera_local_addr=127.0.0.1 # sdk ---tera_sdk_timeout=20000 +--tera_sdk_write_timeout=20000 +--tera_sdk_read_timeout=20000 +--tera_sdk_scan_timeout=30000 # others --online_schema_update_enabled=true diff --git a/ft_test.sh b/ft_test.sh index efa3f38f0..8c3cbfc39 100755 --- a/ft_test.sh +++ b/ft_test.sh @@ -1,84 +1,3 @@ #!/bin/bash -function usage() { - echo "usage: - $0 [opts] - -d path: test temp file path - -c casename: run one case - -f just perform fetches for all tests - -r just perform runs for all tests - - e.g. $0 # all test cases - $0 -c testcase/test_put_get.py # specify a case" -} - -test_dir="test_output/functional_test" -case_name="" -fetch_without_run=false -run_without_fetch=false - -while getopts c:d:h:fr arg -do - case $arg in - c) - case_name=$OPTARG - echo "case_name: $case_name";; - d) - test_dir=$OPTARG - echo "test_dir: $test_dir";; - f) - fetch_without_run=true;; - r) - run_without_fetch=true;; - h) - usage - exit 0;; - ?) - echo "unkonw argument: $arg" - exit 1;; - esac -done - -set -x -e - -if ! $run_without_fetch; then - rm -rf $test_dir - mkdir -p $test_dir/bin - cp example/onebox/bin/kill_tera.sh $test_dir/bin - cp example/onebox/bin/launch_tera.sh $test_dir/bin - cp example/onebox/bin/config $test_dir/bin - mkdir -p $test_dir/conf - cp example/onebox/conf/tera.flag $test_dir/conf - - cp build/bin/teracli $test_dir/bin - cp build/bin/tera_master $test_dir/bin - cp build/bin/tabletserver $test_dir/bin - cp build/bin/tera_test $test_dir/bin - cp build/benchmark/tera_bench $test_dir/bin - cp build/benchmark/tera_mark $test_dir/bin - - mkdir -p $test_dir/log - mkdir -p $test_dir/data - cp -r test/testcase $test_dir/bin - cp -r test/testcase/shell_script/* $test_dir/bin - cp src/sdk/python/TeraSdk.py $test_dir/bin/testcase - cp build/lib/libtera_c.so $test_dir/bin -fi - -if $fetch_without_run; then - exit 0 -fi - -cd $test_dir/bin/ -./kill_tera.sh -./launch_tera.sh -sleep 2 - -export PYTHONPATH=../../../thirdparty/lib/:$PYTHONPATH -export PATH=../../../thirdparty/bin/:$PATH - -nosetests -s -v -x $case_name > ../log/test.log - -./kill_tera.sh -cd ../../.. -rm -rf $test_dir +exit 0 diff --git a/include/observer/notification.h b/include/observer/notification.h new file mode 100644 index 000000000..b0c164099 --- /dev/null +++ b/include/observer/notification.h @@ -0,0 +1,48 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include + +#include "tera/table.h" + +#pragma GCC visibility push(default) + +namespace tera { +namespace observer { + +class Notification { + public: + virtual ~Notification() {} + + typedef std::function Callback; + + // when TransactionType is 'kNoneTransaction' + // user can set ack callback/context as one please + virtual void SetAckCallBack(Callback callback) = 0; + virtual void SetAckContext(void* context) = 0; + virtual void* GetAckContext() = 0; + + virtual void Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) = 0; + + // when TransactionType is 'kNoneTransaction' + // user can set notify callback/context as one please + virtual void SetNotifyCallBack(Callback callback) = 0; + virtual void SetNotifyContext(void* context) = 0; + virtual void* GetNotifyContext() = 0; + + virtual void Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) = 0; + + // relases resource after OnNotify finished + // and delete this + virtual void Done() = 0; +}; + +} // namespace observer +} // namespace tera + +#pragma GCC visibility pop diff --git a/src/observer/executor/observer.h b/include/observer/observer.h similarity index 91% rename from src/observer/executor/observer.h rename to include/observer/observer.h index f03897a60..52a3c5a03 100644 --- a/src/observer/executor/observer.h +++ b/include/observer/observer.h @@ -2,15 +2,14 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_OBSERVER_H_ -#define TERA_OBSERVER_H_ +#pragma once #include #include "tera/client.h" #include "tera/error_code.h" #include "tera/transaction.h" -#include "observer/executor/notification.h" +#include "observer/notification.h" #pragma GCC visibility push(default) namespace tera { @@ -47,7 +46,5 @@ class Observer { }; } // namespace observer -} +} // namepsace tera #pragma GCC visibility pop - -#endif // TERA_OBSERVER_H_ diff --git a/include/observer/scanner.h b/include/observer/scanner.h new file mode 100644 index 000000000..777731ac0 --- /dev/null +++ b/include/observer/scanner.h @@ -0,0 +1,68 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include + +#include "observer/observer.h" +#include "tera/error_code.h" + +#pragma GCC visibility push(default) +namespace tera { +namespace observer { + +enum class ScanStrategy { kRandom = 0, kTabletBucket }; + +struct ScannerOptions { + ScanStrategy strategy; + int32_t bucket_cnt; // When strategy=kTabletShared, this available + int32_t bucket_id; // When strategy=kTabletShared, this available + + ScannerOptions() : strategy(ScanStrategy::kRandom), bucket_cnt(1), bucket_id(0) {} +}; + +class ScanHook { + public: + typedef std::pair Column; + typedef std::set Columns; + virtual ~ScanHook() {} + + // user can define self scan strategy before per round scan task + virtual void Before(const std::string& table_name, + const ScanHook::Columns& columns) { /* default noting to do */ + } + + // user can define self scan strategy after per round scan task + virtual void After(const std::string& table_name, const ScanHook::Columns& columns, + bool scan_ret) { /* default noting to do */ + } +}; + +class Scanner { + public: + static Scanner* GetScanner(); + + virtual ~Scanner() {} + + // register user define observers + // user should not destruct observers, which will be handled by scanner + virtual ErrorCode Observe(const std::string& table_name, const std::string& column_family, + const std::string& qualifier, Observer* observer) = 0; + + virtual bool Init() = 0; + + virtual bool Start() = 0; + + virtual void Exit() = 0; + + virtual void SetOptions(const ScannerOptions& options) = 0; + + virtual void SetScanHook(const std::shared_ptr& hook) = 0; +}; +} // namespace observer +} // namespace tera +#pragma GCC visibility pop diff --git a/src/observer/executor/scanner_entry.h b/include/observer/scanner_entry.h similarity index 53% rename from src/observer/executor/scanner_entry.h rename to include/observer/scanner_entry.h index ed5e5c325..8624f8dcb 100644 --- a/src/observer/executor/scanner_entry.h +++ b/include/observer/scanner_entry.h @@ -2,16 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ -#define TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ +#pragma once -#include -#include - -#include "common/this_thread.h" -#include "observer/executor/observer.h" -#include "tera.h" -#include "tera_entry.h" +#include "observer/observer.h" +#include "observer/scanner.h" +#include "tera/tera_entry.h" namespace tera { namespace observer { @@ -20,21 +15,20 @@ class Scanner; class ScannerEntry : public TeraEntry { public: - ScannerEntry(); - virtual ~ScannerEntry(); + ScannerEntry(); + virtual ~ScannerEntry(); - virtual bool StartServer(); + virtual bool StartServer(); virtual bool Run(); virtual void ShutdownServer(); virtual ErrorCode Observe(); + virtual void SetOptions(const ScannerOptions& options); Scanner* GetScanner() const; + private: - std::unique_ptr scanner_; + ScannerOptions options_; }; - } // namespace observer } // namespace tera - -#endif // TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ \ No newline at end of file diff --git a/include/tera.h b/include/tera.h index 43949dacc..566df8490 100644 --- a/include/tera.h +++ b/include/tera.h @@ -8,17 +8,27 @@ // doc/sdk_dev_guide.md // -#ifndef TERA_TERA_H_ -#define TERA_TERA_H_ +#ifndef TERA_TERA_H_ +#define TERA_TERA_H_ #include "tera/client.h" #include "tera/error_code.h" #include "tera/mutation.h" +#include "tera/batch_mutation.h" #include "tera/reader.h" #include "tera/scan.h" #include "tera/table.h" #include "tera/table_descriptor.h" +#include "tera/tera_entry.h" #include "tera/transaction.h" #include "tera/utils.h" +#include "tera/filter.h" +#include "tera/value_filter.h" +#include "tera/filter_list.h" + +#include "observer/notification.h" +#include "observer/observer.h" +#include "observer/scanner.h" +#include "observer/scanner_entry.h" #endif // TERA_TERA_H_ diff --git a/include/tera/batch_mutation.h b/include/tera/batch_mutation.h new file mode 100644 index 000000000..09d7cc69b --- /dev/null +++ b/include/tera/batch_mutation.h @@ -0,0 +1,123 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com +// +// BatchMutation only ensure all the 'mutation' in this batch will sent to same +// 'tabletserver' and same 'tablet' through one rpc request. +// + +#ifndef TERA_BATCH_MUTATION_H_ +#define TERA_BATCH_MUTATION_H_ + +#include +#include + +#include "error_code.h" + +#pragma GCC visibility push(default) +namespace tera { + +class Table; + +class BatchMutation { +public: + // Set the database entry for "key" to "value". The database should be + // created as a key-value storage. + // "ttl"(time-to-live) is optional, "value" will expire after "ttl" + // second. If ttl <= 0, "value" never expire. + virtual void Put(const std::string& row_key, + const std::string& value, + int32_t ttl = -1) = 0; + + // Set the database entry for the specified column to "value". + // "timestamp"(us) is optional, current time by default. + virtual void Put(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + const std::string& value, + int64_t timestamp = -1) = 0; + + // Put an integer into a cell. This cell can be used as a counter. + virtual void Put(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + const int64_t value, + int64_t timestamp = -1) = 0; + + // Add "delta" to a specified cell. "delta" can be negative. + virtual void Add(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + const int64_t delta) = 0; + + // "value" will take effect when specified cell does not exist. + // Otherwise, "value" will be discarded. + virtual void PutIfAbsent(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + const std::string& value) = 0; + + // Append "value" to a specified cell. + virtual void Append(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + const std::string& value) = 0; + + // Delete updates of a specified row/columnfamily/qualifier before "timestamp"(us). + // Delete all versions by default. + // "timestamp" will be ignored in key-value mode. + virtual void DeleteRow(const std::string& row_key, + int64_t timestamp = -1) = 0; + virtual void DeleteFamily(const std::string& row_key, + const std::string& family, + int64_t timestamp = -1) = 0; + virtual void DeleteColumns(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + int64_t timestamp = -1) = 0; + // Delete the cell specified by "family"&"qualifier"&"timestamp". + virtual void DeleteColumn(const std::string& row_key, + const std::string& family, + const std::string& qualifier, + int64_t timestamp = -1) = 0; + + // The status of this batch mutation. Returns kOK on success and a non-OK + // status on error. + virtual const ErrorCode& GetError() = 0; + + // Users are allowed to register callback/context a two-tuples that + // will be invoked when this batch mutation is finished. + typedef void (*Callback)(BatchMutation* param); + virtual void SetCallBack(Callback callback) = 0; + virtual Callback GetCallBack() = 0; + virtual void SetContext(void* context) = 0; + virtual void* GetContext() = 0; + + // Set/get timeout(ms). + virtual void SetTimeOut(int64_t timeout_ms) = 0; + virtual int64_t TimeOut() = 0; + + // Get the mutation count of one row in this batch mutaion. + virtual uint32_t MutationNum(const std::string& row_key) = 0; + // Get total size of all mutations, including size of rowkey, columnfamily, + // qualifier, value and timestamp. + virtual uint32_t Size() = 0; + + virtual bool IsAsync() = 0; + + // reset all status and context of this BatchMutation to init + virtual void Reset() = 0; + + BatchMutation() {}; + virtual ~BatchMutation() {}; + +private: + BatchMutation(const BatchMutation&); + void operator=(const BatchMutation&); +}; +} // namespace tera +#pragma GCC visibility pop + +#endif // TERA_BATCH_MUTATION_H_ diff --git a/include/tera/client.h b/include/tera/client.h index fee87fbcf..dc8ceee41 100644 --- a/include/tera/client.h +++ b/include/tera/client.h @@ -13,10 +13,10 @@ #include "table.h" #include "table_descriptor.h" #include "transaction.h" +#include "hash.h" #pragma GCC visibility push(default) namespace tera { - class Client { public: // Create a new client @@ -32,6 +32,9 @@ class Client { // Open a table by name. // This operation could fail due to zookeeper down, meta not avaliable, table not exists, etc. virtual Table* OpenTable(const std::string& table_name, ErrorCode* err) = 0; + virtual Table* OpenTable(const std::string& table_name, + std::function hash_method, + ErrorCode* err) = 0; // Create a new table with specified descriptor. virtual bool CreateTable(const TableDescriptor& desc, ErrorCode* err) = 0; @@ -39,6 +42,8 @@ class Client { virtual bool CreateTable(const TableDescriptor& desc, const std::vector& tablet_delim, ErrorCode* err) = 0; + // Create a new hash table with key space (aka [0x0, 0xFFFFFFFFFFFFFFFF]) devied into hash_num equal parts. + virtual bool CreateTable(const TableDescriptor& desc, int64_t hash_num, ErrorCode* err) = 0; // Update table schema. User should call UpdateCheck to check if the update operation is complete. virtual bool UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err) = 0; diff --git a/include/tera/error_code.h b/include/tera/error_code.h index ad6ab2b64..a1ae74dfd 100644 --- a/include/tera/error_code.h +++ b/include/tera/error_code.h @@ -28,6 +28,9 @@ class ErrorCode { kNotImpl = 9, kTxnFail = 10, + kAuthBadParam = 21, + kAuthLoginFailed = 22, + // only for global transaction error kGTxnDataTooLarge = 101, kGTxnNotSupport = 102, diff --git a/include/tera/filter.h b/include/tera/filter.h new file mode 100644 index 000000000..1da09ea2f --- /dev/null +++ b/include/tera/filter.h @@ -0,0 +1,102 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include +#include "tera/error_code.h" + +namespace tera { +namespace filter { + +/* + * How to use filter: + * 1. New a concrete filter object. A concrete filter may be a value filter or a filter list, + * please refer to value_filter.h and filter_list.h + * 2. Set the filter to the ScanDescriptor by using SetFilter method. + * 3. Then the filter will work when scanning. + */ + +enum FilterType { + kFilterList, // filter list type + kValueFilter, // value filter type + kUnDefinedFilter // undefined type +}; + +class Filter; + +using FilterPtr = std::shared_ptr; + +using ColumnPair = std::pair; +using ColumnSet = std::set; + +/* + * User do NOT need to use this class. + * This is a base filter class, all filter classes inherit from this class. + * The internal of tera will use this class. + */ +class Filter { + public: + enum ReturnCode { + kIncludeCurCell, // current cell included, user can use filter for next cell + kNotIncludeCurAndLeftCellOfRow, // current cell not included, left cells of current row also + // not included, user can use filter for next row + kUndefinedRC // invalid + }; + + public: + /* + * before filter one row, this method must be used to clean or reinitialize the env of the filter. + * if not do this, the filter will do the wrong behavior for the row. + */ + virtual void Reset() = 0; + + /* + * for each cell, all filters will use this method, do the real filter behavior, and set some + * internal member of the filter. The ReturnCode will specify that how the filter will travel + * in the current row: the filter can finish travel this row, or travel to the next cell of + * this row for example. + */ + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value) = 0; + + /* + * for each row, after using FilterCell for each cell of the row, in the end, this method will be + * used to justify whether this row should be filtered. Return true if this row should be + * filtered, that means not output in scanning or reading, or return false. + */ + virtual bool FilterRow() = 0; + + /* + * these methods below are used in transmitting filters from sdk to tabletserver + */ + virtual FilterType Type() = 0; + virtual bool SerializeTo(std::string* serialized_filter) = 0; + virtual bool ParseFrom(const std::string& serialized_filter) = 0; + virtual void GetAllColumn(ColumnSet* filter_column_set) = 0; +}; + +class FilterBase : public Filter { + public: + FilterBase(); + virtual ~FilterBase(); + virtual void Reset(); + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value); + virtual bool FilterRow(); + virtual FilterType Type(); + virtual bool SerializeTo(std::string* serialized_filter); + virtual bool ParseFrom(const std::string& serialized_filter); + virtual void GetAllColumn(ColumnSet* filter_column_set); +}; + +} // namespace filter +} // namesapce tera diff --git a/include/tera/filter_comparator.h b/include/tera/filter_comparator.h new file mode 100644 index 000000000..6876128c0 --- /dev/null +++ b/include/tera/filter_comparator.h @@ -0,0 +1,186 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include "tera/error_code.h" + +namespace tera { +namespace filter { + +/* + * If you want to output the row where there is a value which is less than the ref_value in the + * filter (or say in the comparator) in the scanning or reading, please use LESS. + * Other values are similar. + */ +enum class CompareOperator { + kLess, // < + kLessOrEqual, // <= + kEqual, // == + kNotEqual, // != + kGreaterOrEqual, // >= + kGreater, // > + kNoOp // invalid +}; + +enum class ComparatorType { + kIntegerComparator, // IntegerComparator + kDecimalComparator, // DecimalComparator + kBinaryComparator // BinaryComparator +}; + +enum class IntegerValueType { + kInt64, // value is regarded as int64_t + kUint64, // value is regarded as uint64_t + kInt32, // value is regarded as int32_t + kUint32, // value is regarded as uint32_t + kInt16, // value is regarded as int16_t + kUint16, // value is regarded as uint16_t + kInt8, // value is regarded as int8_t + kUint8, // value is regarded as uint8_t + kUnknown // value type is illigal +}; + +class FilterComparator; +class IntegerComparator; +class DecimalComparator; +class BinaryComparator; + +using FilterComparatorPtr = std::shared_ptr; +using IntegerComparatorPtr = std::shared_ptr; +using DecimalComparatorPtr = std::shared_ptr; +using BinaryComparatorPtr = std::shared_ptr; + +/* + * User do NOT need using this class,this is a base class for comparator. + */ +class FilterComparator { + public: + virtual ComparatorType Type() = 0; + virtual int CompareWith(const std::string& value) = 0; + virtual bool SerializeTo(std::string* serialized_comparator) = 0; + virtual bool ParseFrom(const std::string& serialized_comparator) = 0; +}; + +/* + * User can use this class for making a comparator for filtering the value which is integer. + * Just Using the Constructor which have two parameters is OK. + */ +class IntegerComparator : public FilterComparator { + // User Interface + public: + /* + * Use this method to New a object of this class. Just New, and then transmit the object to + * the filter. Do not need do any other things. + * Note that the value will be transfered inside the method by the value_type. + * User must use std::make_shared to New the object and assign it to IntegerComparatorPtr. + */ + IntegerComparator(IntegerValueType value_type, uint64_t value); + + /* + * When writing and reading integer values, users can use these two methods + * to transfer the integer. + */ + static bool EncodeInteger(IntegerValueType value_type, uint64_t value, + std::string* encoded_value); + static bool DecodeInteger(IntegerValueType value_type, const std::string& value, + uint64_t* decoded_value); + + // Internal use + public: + IntegerComparator(); + virtual ~IntegerComparator(); + virtual ComparatorType Type(); + virtual int CompareWith(const std::string& value); + virtual bool SerializeTo(std::string* serialized_comparator); + virtual bool ParseFrom(const std::string& serialized_comparator); + + private: + template + int Compare(T v1, T v2) { + if (v1 < v2) { + return -1; + } else if (v1 > v2) { + return 1; + } else { + return 0; + } + } + + private: + IntegerValueType value_type_; + uint64_t integer_value_; +}; + +/* + * User can use this class for making a comparator for filtering the value which is decimal. + * Just Using the explicit Constructor is OK. + */ +class DecimalComparator : public FilterComparator { + // User Interface + public: + /* + * Use this method to New a object of this class. Just New, and then transmit the object to + * the filter. Do not need do any other things. + * User must use std::make_shared to New the object and assign it to DecimalComparatorPtr. + */ + explicit DecimalComparator(double value); + + /* + * When writing and reading decimal values, users can use these two methods + * to transfer the decimal value. + */ + static std::string EncodeDecimal(double value); + static double DecodeDecimal(const std::string& value); + + // Internal use + public: + DecimalComparator(); + virtual ~DecimalComparator(); + virtual ComparatorType Type(); + virtual int CompareWith(const std::string& value); + virtual bool SerializeTo(std::string* serialized_comparator); + virtual bool ParseFrom(const std::string& serialized_comparator); + + private: + double decimal_value_; +}; + +/* + * User can use this class for making a comparator for filtering the value which is binary (string + * is also binary). + * Just Using the explicit Constructor is OK. + */ +class BinaryComparator : public FilterComparator { + // User Interface + public: + /* + * Use this method to New a object of this class. Just New, and then transmit the object to + * the filter. Do not need do any other things. + * User must use std::make_shared to New the object and assign it to BinaryComparatorPtr. + */ + explicit BinaryComparator(const std::string& value); + + // Internal use + public: + BinaryComparator(); + virtual ~BinaryComparator(); + virtual ComparatorType Type(); + virtual int CompareWith(const std::string& value); + virtual bool SerializeTo(std::string* serialized_comparator); + virtual bool ParseFrom(const std::string& serialized_comparator); + + private: + std::string value_; +}; + +} // namesapce filter +} // namesapce tera diff --git a/include/tera/filter_list.h b/include/tera/filter_list.h new file mode 100644 index 000000000..306f046c8 --- /dev/null +++ b/include/tera/filter_list.h @@ -0,0 +1,69 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include "tera/filter.h" + +namespace tera { +namespace filter { + +class FilterList; +using FilterListPtr = std::shared_ptr; + +class FilterListBase; +class FilterList : public FilterBase { + // User interface + public: + /* + * if you want to output the rows in which there is a value of family "cf1" and qualifier "qu1", + * which is >2 and <5, in scanning, you can New a filter list with AND Operator, which have + * two value filters, one with a CompareOperator GREATER and a IntegerComparator with + * a ref_value 2, the other with a CompareOperator LESS and a IntegerComparator with + * a ref_value 5. + */ + enum Operator { + kAnd, // sub_filter_1 && sub_filter_2 && ..., all sub filters connected with AND + kOr, // sub_filter_1 || sub_filter_2 || ..., all sub filters connected with OR + kInvalidOp // invalid op + }; + + /* + * User must New a filter list object by Using this methed, and must use std::make_shared method + * and assign the object to FilterListPtr. + */ + explicit FilterList(Operator op); + + /* + * Use this method to add filter to this filter list. Add filters one by one. + */ + bool AddFilter(const FilterPtr& filter); + + // internal use + public: + FilterList(); + virtual ~FilterList(); + virtual FilterType Type(); + virtual void Reset(); + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value); + virtual bool FilterRow(); + virtual bool SerializeTo(std::string* serialized_filter); + virtual bool ParseFrom(const std::string& serialized_filter); + virtual void GetAllColumn(ColumnSet* filter_column_set); + + private: + Operator op_; + FilterListBase* filter_list_base_; +}; + +} // namesapce filter +} // namesapce tera diff --git a/include/tera/hash.h b/include/tera/hash.h new file mode 100644 index 000000000..d2a44f580 --- /dev/null +++ b/include/tera/hash.h @@ -0,0 +1,11 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include + +#pragma GCC visibility push(default) + +namespace tera { +std::string MurmurHash(const std::string& user_key); +} diff --git a/include/tera/scan.h b/include/tera/scan.h index c9023f9b6..8d84ef231 100644 --- a/include/tera/scan.h +++ b/include/tera/scan.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SCAN_H_ -#define TERA_SCAN_H_ +#ifndef TERA_SCAN_H_ +#define TERA_SCAN_H_ #include #include @@ -13,6 +13,7 @@ #include #include "error_code.h" +#include "filter.h" #pragma GCC visibility push(default) namespace tera { @@ -23,111 +24,109 @@ namespace tera { // Caller should delete iterator when it is no longer needed. // The returned iterator should be deleted before this table is deleted. class ResultStream { -public: - ResultStream() {} - virtual ~ResultStream() {} - // Check wether iterator was positioned at the end, and return error code for failure check. - virtual bool Done(ErrorCode* err = NULL) = 0; - - // Moves to next cell. After this call, Done() is true iff - // the iterator was not positioned at the last cell in the source or scan error occurs. - virtual void Next() = 0; - - // Return the row key name in current cell. The current cell's content changes only until - // the next modification of the iterator. - virtual std::string RowName() const = 0; - // Return column family in current cell. - virtual std::string Family() const = 0; - // Return qualifier in current cell. - virtual std::string Qualifier() const = 0; - // Return timestamp in current cell. - virtual int64_t Timestamp() const = 0; - // Return value in current cell. - virtual std::string Value() const = 0; - virtual int64_t ValueInt64() const = 0; - - // DEPRECATED - virtual bool LookUp(const std::string& row_key) = 0; - // Return column in current cell, which looks like cf:qualifier. - // Use Family():Qualifier() instead. - virtual std::string ColumnName() const = 0; - -private: - ResultStream(const ResultStream&); - void operator=(const ResultStream&); + public: + ResultStream() {} + virtual ~ResultStream() {} + // Check wether iterator was positioned at the end, and return error code for failure check. + virtual bool Done(ErrorCode* err = NULL) = 0; + + // Moves to next cell. After this call, Done() is true iff + // the iterator was not positioned at the last cell in the source or scan error occurs. + virtual void Next() = 0; + // Return scan total data size + virtual uint64_t GetDataSize() const = 0; + // Return scan total Row Count(kv count) + virtual uint64_t GetRowCount() const = 0; + // Return scan last key + virtual std::string GetLastKey() const = 0; + // cancel current scan task + virtual void Cancel() = 0; + + // Return the row key name in current cell. The current cell's content changes only until + // the next modification of the iterator. + virtual std::string RowName() const = 0; + // Return column family in current cell. + virtual std::string Family() const = 0; + // Return qualifier in current cell. + virtual std::string Qualifier() const = 0; + // Return timestamp in current cell. + virtual int64_t Timestamp() const = 0; + // Return value in current cell. + virtual std::string Value() const = 0; + virtual int64_t ValueInt64() const = 0; + + // DEPRECATED + virtual bool LookUp(const std::string& row_key) = 0; + // Return column in current cell, which looks like cf:qualifier. + // Use Family():Qualifier() instead. + virtual std::string ColumnName() const = 0; + + private: + ResultStream(const ResultStream&); + void operator=(const ResultStream&); }; class ScanDescImpl; // Describe a scan job in tera client endian. Control scan behaviour. class ScanDescriptor { -public: - // 'rowkey' is the start row key in the scan job. - ScanDescriptor(const std::string& rowkey); - ~ScanDescriptor(); - // the end row key in the scan job, which means scan all keys less than the end row key. - // Not required. - void SetEnd(const std::string& rowkey); - - // Set target column family for the scan result, - // which likes the SQL statement (SELECT cf1, cf2, ..., cfn From Table). - void AddColumnFamily(const std::string& cf); - - // Set target column for the scan result, - // which likes the SQL statement (SELECT cf1:qu, cf2:qu, ..., cfn:qu From Table). - void AddColumn(const std::string& cf, const std::string& qualifier); - - // Set max version number per column. - void SetMaxVersions(int32_t versions); - - // Set the the max qualifiers of each column family when read this row - // This is useful when a column family contains too many qualifiers - // If this value is not set, the default value is std::numeric_limits::max() - void SetMaxQualifiers(uint64_t max_qualifiers); - - // Set time range for the scan result, - // which likes the SQL statement (SELECT * from Table WHERE timestamp in [ts_start, ts_end]). - // Return the newest value first. - void SetTimeRange(int64_t ts_end, int64_t ts_start); - - // Set batch scan mode, which largely speeds up scan task. - void SetAsync(bool async); - // Test the scan jobs, whether in batch scan mode. - bool IsAsync() const; - - // Set timeout for each internal scan jobs, which avoids long-term scan jobs to trigger rpc's timeout. - // Not required. - void SetPackInterval(int64_t timeout); - - // Set buffersize for each internal scan jobs, which avoids scan result buffer growing too much. - // Default: 64KB - void SetBufferSize(int64_t buf_size); - - // Set the limitation of cell number for each internal scan jobs, - // which acquires lower latency in interactive scan task. - // Not required. - void SetNumberLimit(int64_t number_limit); - int64_t GetNumberLimit(); - - // EXPRIMENTAL - bool SetFilter(const std::string& schema); - typedef bool (*ValueConverter)(const std::string& in, - const std::string& type, - std::string* out); - // Set custom defined value convert funtion - void SetValueConverter(ValueConverter converter); - - ScanDescImpl* GetImpl() const; - - // DEVELOPING - void SetSnapshot(uint64_t snapshot_id); - -private: - ScanDescriptor(const ScanDescriptor&); - void operator=(const ScanDescriptor&); - ScanDescImpl* impl_; + public: + // 'rowkey' is the start row key in the scan job. + ScanDescriptor(const std::string& rowkey); + ~ScanDescriptor(); + // the end row key in the scan job, which means scan all keys less than the end row key. + // Not required. + void SetEnd(const std::string& rowkey); + + // Set target column family for the scan result, + // which likes the SQL statement (SELECT cf1, cf2, ..., cfn From Table). + void AddColumnFamily(const std::string& cf); + + // Set target column for the scan result, + // which likes the SQL statement (SELECT cf1:qu, cf2:qu, ..., cfn:qu From Table). + void AddColumn(const std::string& cf, const std::string& qualifier); + + // Set max version number per column. + void SetMaxVersions(int32_t versions); + + // Set the the max qualifiers of each column family when read this row + // This is useful when a column family contains too many qualifiers + // If this value is not set, the default value is std::numeric_limits::max() + void SetMaxQualifiers(uint64_t max_qualifiers); + + // Set time range for the scan result, + // which likes the SQL statement (SELECT * from Table WHERE timestamp in [ts_start, ts_end]). + // Return the newest value first. + void SetTimeRange(int64_t ts_end, int64_t ts_start); + + // Set timeout for each internal scan jobs, which avoids long-term scan jobs to trigger rpc's + // timeout. + // Not required. + void SetPackInterval(int64_t timeout); + + // Set buffersize for each internal scan jobs, which avoids scan result buffer growing too much. + // Default: 64KB + void SetBufferSize(int64_t buf_size); + + // Set the limitation of cell number for each internal scan jobs, + // which acquires lower latency in interactive scan task. + // Not required. + void SetNumberLimit(int64_t number_limit); + int64_t GetNumberLimit(); + + bool SetFilter(const filter::FilterPtr& filter); + + ScanDescImpl* GetImpl() const; + + // DEVELOPING + void SetSnapshot(uint64_t snapshot_id); + + private: + ScanDescriptor(const ScanDescriptor&); + void operator=(const ScanDescriptor&); + ScanDescImpl* impl_; }; -} // namespace tera +} // namespace tera #pragma GCC visibility pop #endif // TERA_SCAN_H_ diff --git a/include/tera/table.h b/include/tera/table.h index 3f662be12..9a9c5a99b 100644 --- a/include/tera/table.h +++ b/include/tera/table.h @@ -8,12 +8,15 @@ #include #include #include +#include #include "error_code.h" +#include "batch_mutation.h" #include "mutation.h" #include "reader.h" #include "scan.h" #include "table_descriptor.h" +#include "hash.h" #pragma GCC visibility push(default) namespace tera { @@ -33,6 +36,7 @@ struct TabletInfo { std::string status; }; +class BatchMutation; class RowMutation; class RowReader; class Transaction; @@ -44,6 +48,10 @@ class Table { // Return a row mutation handle. User should delete it when it is no longer // needed. virtual RowMutation* NewRowMutation(const std::string& row_key) = 0; + + // Return a batch mutation handle. User should delete it when it is no longer + // needed. + virtual BatchMutation* NewBatchMutation() = 0; // Apply the specified row_mutation(s) to the database. Support batch put. // Users can set a callback in "row_mutation" to activate async put. // Use RowMutation::GetError() to check return code. @@ -139,6 +147,12 @@ class Table { virtual bool Get(const std::string& row_key, const std::string& family, const std::string& qualifier, int64_t* value, ErrorCode* err, uint64_t snapshot_id) = 0; + virtual bool IsHashTable() = 0; + virtual std::function GetHashMethod() = 0; + virtual bool GetTablet(const std::string& row_key, std::string* tablet) = 0; + + // For BatchMutation + virtual void ApplyMutation(BatchMutation* batch_mutation) = 0; Table() {} virtual ~Table() {} diff --git a/include/tera/table_descriptor.h b/include/tera/table_descriptor.h index 3b4377c1a..901e963d9 100644 --- a/include/tera/table_descriptor.h +++ b/include/tera/table_descriptor.h @@ -8,8 +8,8 @@ // by string which is easier. See more: // https://github.com/baidu/tera/blob/master/doc/sdk_reference/table_descriptor.md -#ifndef TERA_TABLE_DESCRIPTOR_ -#define TERA_TABLE_DESCRIPTOR_ +#ifndef TERA_TABLE_DESCRIPTOR_ +#define TERA_TABLE_DESCRIPTOR_ #include #include @@ -21,209 +21,216 @@ extern const int64_t kLatestTimestamp; extern const int64_t kOldestTimestamp; struct ACL { - int32_t owner; - int32_t role; - int64_t acl; + int32_t owner; + int32_t role; + int64_t acl; }; class ColumnFamilyDescriptor { -public: - // Returns name of this column family - virtual const std::string& Name() const = 0; - - // Set/get TTL(in second) of cells of this column family. - virtual void SetTimeToLive(int32_t ttl) = 0; - virtual int32_t TimeToLive() const = 0; - - // Set/get maximum versions of cells of this column family. - virtual void SetMaxVersions(int32_t max_versions) = 0; - virtual int32_t MaxVersions() const = 0; - - // Get name of locality group which this column family belong to. - virtual const std::string& LocalityGroup() const = 0; - - // Get internal id. - virtual int32_t Id() const = 0; - - // DEVELOPING - virtual void SetType(const std::string& type) = 0; - virtual const std::string& Type() const = 0; - virtual void SetMinVersions(int32_t min_versions) = 0; - virtual int32_t MinVersions() const = 0; - virtual void SetDiskQuota(int64_t quota) = 0; - virtual int64_t DiskQuota() const = 0; - virtual void SetAcl(ACL acl) = 0; - virtual ACL Acl() const = 0; - virtual void EnableGlobalTransaction() = 0; - virtual void DisableGlobalTransaction() = 0; - virtual bool GlobalTransaction() const = 0; - virtual void EnableNotify() = 0; - virtual void DisableNotify() = 0; - virtual bool IsNotifyEnabled() const = 0; - - ColumnFamilyDescriptor() {} - virtual ~ColumnFamilyDescriptor() {} - -private: - ColumnFamilyDescriptor(const ColumnFamilyDescriptor&); - void operator=(const ColumnFamilyDescriptor&); + public: + // Returns name of this column family + virtual const std::string& Name() const = 0; + + // Set/get TTL(in second) of cells of this column family. + virtual void SetTimeToLive(int32_t ttl) = 0; + virtual int32_t TimeToLive() const = 0; + + // Set/get maximum versions of cells of this column family. + virtual void SetMaxVersions(int32_t max_versions) = 0; + virtual int32_t MaxVersions() const = 0; + + // Get name of locality group which this column family belong to. + virtual const std::string& LocalityGroup() const = 0; + + // Get internal id. + virtual int32_t Id() const = 0; + + // DEVELOPING + virtual void SetType(const std::string& type) = 0; + virtual const std::string& Type() const = 0; + virtual void SetMinVersions(int32_t min_versions) = 0; + virtual int32_t MinVersions() const = 0; + virtual void SetDiskQuota(int64_t quota) = 0; + virtual int64_t DiskQuota() const = 0; + virtual void SetAcl(ACL acl) = 0; + virtual ACL Acl() const = 0; + virtual void EnableGlobalTransaction() = 0; + virtual void DisableGlobalTransaction() = 0; + virtual bool GlobalTransaction() const = 0; + virtual void EnableNotify() = 0; + virtual void DisableNotify() = 0; + virtual bool IsNotifyEnabled() const = 0; + + ColumnFamilyDescriptor() {} + virtual ~ColumnFamilyDescriptor() {} + + private: + ColumnFamilyDescriptor(const ColumnFamilyDescriptor&); + void operator=(const ColumnFamilyDescriptor&); }; // Describes compress type for a locality group enum CompressType { - kNoneCompress = 1, - kSnappyCompress = 2, + kNoneCompress = 1, + kSnappyCompress = 2, }; // Describes store type for a locality group enum StoreType { - kInDisk = 0, - kInFlash = 1, - kInMemory = 2, + kInDisk = 0, + kInFlash = 1, + kInMemory = 2, }; class LocalityGroupDescriptor { -public: - // Returns name of this locality group - virtual const std::string& Name() const = 0; + public: + // Returns name of this locality group + virtual const std::string& Name() const = 0; - // Set/get store medium. - virtual void SetStore(StoreType type) = 0; - virtual StoreType Store() const = 0; + // Set/get store medium. + virtual void SetStore(StoreType type) = 0; + virtual StoreType Store() const = 0; - // Set/get block size in KB. - virtual void SetBlockSize(int block_size) = 0; - virtual int BlockSize() const = 0; + // Set/get block size in KB. + virtual void SetBlockSize(int block_size) = 0; + virtual int BlockSize() const = 0; - // Set/get sst size in MB. - virtual int32_t SstSize() const = 0; - virtual void SetSstSize(int32_t sst_size) = 0; + // Set/get sst size in MB. + virtual int32_t SstSize() const = 0; + virtual void SetSstSize(int32_t sst_size) = 0; - // Set/get compress type. - virtual void SetCompress(CompressType type) = 0; - virtual CompressType Compress() const = 0; + // Set/get compress type. + virtual void SetCompress(CompressType type) = 0; + virtual CompressType Compress() const = 0; - // Set/get if use bloomfilter. - virtual void SetUseBloomfilter(bool use_bloomfilter) = 0; - virtual bool UseBloomfilter() const = 0; + // Set/get if use bloomfilter. + virtual void SetUseBloomfilter(bool use_bloomfilter) = 0; + virtual bool UseBloomfilter() const = 0; - // Memtable on leveldb (disable/enable) - virtual bool UseMemtableOnLeveldb() const = 0; - virtual void SetUseMemtableOnLeveldb(bool use_mem_ldb) = 0; + // Memtable on leveldb (disable/enable) + virtual bool UseMemtableOnLeveldb() const = 0; + virtual void SetUseMemtableOnLeveldb(bool use_mem_ldb) = 0; - // Memtable-LDB write buffer size - virtual int32_t MemtableLdbWriteBufferSize() const = 0; - virtual void SetMemtableLdbWriteBufferSize(int32_t buffer_size) = 0; + // Memtable-LDB write buffer size + virtual int32_t MemtableLdbWriteBufferSize() const = 0; + virtual void SetMemtableLdbWriteBufferSize(int32_t buffer_size) = 0; - // Memtable-LDB block size - virtual int32_t MemtableLdbBlockSize() const = 0; - virtual void SetMemtableLdbBlockSize(int32_t block_size) = 0; + // Memtable-LDB block size + virtual int32_t MemtableLdbBlockSize() const = 0; + virtual void SetMemtableLdbBlockSize(int32_t block_size) = 0; - // Get internal id. - virtual int32_t Id() const = 0; + // Get internal id. + virtual int32_t Id() const = 0; - LocalityGroupDescriptor() {} - virtual ~LocalityGroupDescriptor() {} + LocalityGroupDescriptor() {} + virtual ~LocalityGroupDescriptor() {} -private: - LocalityGroupDescriptor(const LocalityGroupDescriptor&); - void operator=(const LocalityGroupDescriptor&); + private: + LocalityGroupDescriptor(const LocalityGroupDescriptor&); + void operator=(const LocalityGroupDescriptor&); }; // Describes internal raw key type enum RawKeyType { - kReadable = 0, - kBinary = 1, - kTTLKv = 2, - kGeneralKv = 3, + kReadable = 0, + kBinary = 1, + kTTLKv = 2, + kGeneralKv = 3, }; class TableDescImpl; class TableDescriptor { -public: - // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in table name. - // Length of a table name should be less than 256. - TableDescriptor(const std::string& name = ""); - ~TableDescriptor(); - - // Set/Get table name - void SetTableName(const std::string& name); - std::string TableName() const; - - // Add a locality group named "lg_name". - // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in locality group name. - // Length of a locality group name should be less than 256. - LocalityGroupDescriptor* AddLocalityGroup(const std::string& lg_name); - // Remove a locality group by name. Operation returns false if there is a - // column family in this locality group. - bool RemoveLocalityGroup(const std::string& lg_name); - // Get locality group handle by id. - const LocalityGroupDescriptor* LocalityGroup(int32_t id) const; - // Get locality group handle by name. - const LocalityGroupDescriptor* LocalityGroup(const std::string& lg_name) const; - // Return locality group number in this table. - int32_t LocalityGroupNum() const; - - // Add a column family named "cf_name" into the locality group named - // "lg_name". - // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in column family name. - // Length of a column family name should be less than 256. - ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name, - const std::string& lg_name = "lg0"); - // Remove a column family by name. - void RemoveColumnFamily(const std::string& cf_name); - // Get column family handle by id. - const ColumnFamilyDescriptor* ColumnFamily(int32_t id) const; - // Get column family handle by name. - const ColumnFamilyDescriptor* ColumnFamily(const std::string& cf_name) const; - // Return column family number in this table. - int32_t ColumnFamilyNum() const; - - // Set/get raw key type. - void SetRawKey(RawKeyType type); - RawKeyType RawKey() const; - - // Set/get the split size in MB. - void SetSplitSize(int64_t size); - int64_t SplitSize() const; - - // Set/get the merge size in MB. - // mergesize should be less than splitsize / 3. - void SetMergeSize(int64_t size); - int64_t MergeSize() const; - - // Enable/disable write-ahead-log on this table. - void DisableWal(); - bool IsWalDisabled() const; - - // Enable/disable transaction on this table. - void EnableTxn(); - bool IsTxnEnabled() const; - - // Set/get admin of this table. - void SetAdmin(const std::string& name); - std::string Admin() const; - - // Set/get admin group of this table. - void SetAdminGroup(const std::string& name); - std::string AdminGroup() const; - - // DEVELOPING - int32_t AddSnapshot(uint64_t snapshot); - uint64_t Snapshot(int32_t id) const; - int32_t SnapshotNum() const; - - // DEPRECATED - LocalityGroupDescriptor* DefaultLocalityGroup(); - ColumnFamilyDescriptor* DefaultColumnFamily(); - -private: - TableDescriptor(const TableDescriptor&); - void operator=(const TableDescriptor&); - TableDescImpl* impl_; + public: + // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in table name. + // Length of a table name should be less than 256. + TableDescriptor(const std::string& name = ""); + ~TableDescriptor(); + + // Set/Get table name + void SetTableName(const std::string& name); + std::string TableName() const; + + // Add a locality group named "lg_name". + // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in locality group name. + // Length of a locality group name should be less than 256. + LocalityGroupDescriptor* AddLocalityGroup(const std::string& lg_name); + // Remove a locality group by name. Operation returns false if there is a + // column family in this locality group. + bool RemoveLocalityGroup(const std::string& lg_name); + // Get locality group handle by id. + const LocalityGroupDescriptor* LocalityGroup(int32_t id) const; + // Get locality group handle by name. + const LocalityGroupDescriptor* LocalityGroup(const std::string& lg_name) const; + // Return locality group number in this table. + int32_t LocalityGroupNum() const; + + // Add a column family named "cf_name" into the locality group named + // "lg_name". + // Only {[a-z],[A-Z],[0-9],'_','-'} are allowed in column family name. + // Length of a column family name should be less than 256. + ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name, + const std::string& lg_name = "lg0"); + // Remove a column family by name. + void RemoveColumnFamily(const std::string& cf_name); + // Get column family handle by id. + const ColumnFamilyDescriptor* ColumnFamily(int32_t id) const; + // Get column family handle by name. + const ColumnFamilyDescriptor* ColumnFamily(const std::string& cf_name) const; + // Return column family number in this table. + int32_t ColumnFamilyNum() const; + + // Set/get raw key type. + void SetRawKey(RawKeyType type); + RawKeyType RawKey() const; + + // Set/get the split size in MB. + void SetSplitSize(int64_t size); + int64_t SplitSize() const; + + // Set/get the merge size in MB. + // mergesize should be less than splitsize / 3. + void SetMergeSize(int64_t size); + int64_t MergeSize() const; + + // Enable/disable write-ahead-log on this table. + void DisableWal(); + bool IsWalDisabled() const; + + // Enable/disable transaction on this table. + void EnableTxn(); + bool IsTxnEnabled() const; + + // Set/get admin of this table. + void SetAdmin(const std::string& name); + std::string Admin() const; + + // Set/get admin group of this table. + void SetAdminGroup(const std::string& name); + std::string AdminGroup() const; + + // DEVELOPING + int32_t AddSnapshot(uint64_t snapshot); + uint64_t Snapshot(int32_t id) const; + int32_t SnapshotNum() const; + + // Enable hash read/write on this table. + void EnableHash(); + bool IsHashEnabled() const; + + uint32_t BloomFilterBitsPerKey() const; + void SetBloomFilterBitsPerKey(uint32_t val); + + // DEPRECATED + LocalityGroupDescriptor* DefaultLocalityGroup(); + ColumnFamilyDescriptor* DefaultColumnFamily(); + + private: + TableDescriptor(const TableDescriptor&); + void operator=(const TableDescriptor&); + TableDescImpl* impl_; }; -} // namespace tera +} // namespace tera #pragma GCC visibility pop #endif // TERA_TABLE_DESCRIPTOR_ diff --git a/src/tera_entry.h b/include/tera/tera_entry.h similarity index 68% rename from src/tera_entry.h rename to include/tera/tera_entry.h index 62930de9c..27ce0aadf 100644 --- a/src/tera_entry.h +++ b/include/tera/tera_entry.h @@ -1,12 +1,12 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_TERA_ENTRY_H_ -#define TERA_TERA_ENTRY_H_ +#pragma once -#include "common/mutex.h" +#include +#pragma GCC visibility push(default) namespace tera { class TeraEntry { @@ -27,10 +27,8 @@ class TeraEntry { bool ShouldShutdown(); private: - Mutex mutex_; - bool started_; + std::atomic started_; }; } // namespace tera - -#endif // TERA_TERA_ENTRY_H_ +#pragma GCC visibility pop diff --git a/include/tera/value_filter.h b/include/tera/value_filter.h new file mode 100644 index 000000000..07fa1cc20 --- /dev/null +++ b/include/tera/value_filter.h @@ -0,0 +1,102 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include "tera/filter.h" +#include "tera/filter_comparator.h" + +namespace tera { +namespace filter { + +class ValueFilter; +using ValueFilterPtr = std::shared_ptr; + +/* + * User can use this class to make a value filter. + * Please use std::make_shared method to New a value filter object, and assign the object to + * the ValueFilterPtr. + */ +class ValueFilter : public FilterBase { + // User interface + public: + /* + * New a value filter by using this method, And assign the object to a ValueFilterPtr. + * Note that a comparator object should be exist before you new a value filter. You can refer to + * the filter_comparator.h for how to new a comparator object. CompareOperator is also defined + * in the filter_comparator.h + */ + ValueFilter(CompareOperator op, const FilterComparatorPtr& comparator); + + /* + * User can set the column family in which the value cell should be execute filter. + * If not set, the family will be "". + */ + void SetColumnFamily(const std::string& column_family); + + /* + * User can set the qualifier in which the value cell should be execute filter. + * If not set, the qualifier will be "". + */ + void SetColumnQualifier(const std::string& column_qualifier); + + /* + * filter_if_missing is true means that if there is no family and qualifier matched in this row, + * which the filter specified, the row will be filtered out (not output), or the row will be not + * filtered (the row will be output). + * + * Default is false. + */ + void SetFilterIfMissing(bool filter_if_missing); + + /* + * User do NOT need to use the interfaces below. The internal of tera will use them. + */ + public: + ValueFilter(); + virtual ~ValueFilter(); + virtual FilterType Type(); + virtual void Reset(); + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value); + virtual bool FilterRow(); + virtual bool SerializeTo(std::string* serialized_filter); + virtual bool ParseFrom(const std::string& serialized_filter); + virtual void GetAllColumn(ColumnSet* column_set); + + private: + virtual ReturnCode FilterCellWithEmptyQualifier(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value); + virtual ReturnCode FilterCellWithNotEmptyQualifier(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value); + bool MatchValue(const std::string& value); + bool MatchOp(int compare_result); + + private: + enum MatchStatus { + kNotMatchAnything, // not match cf and qu yet + kMatchColumnButNotValue, // matched cf and qu, but not match the value of the cf and qu + kMatchColumnAndValue // matched cf and qu and the value of them + }; + + private: + std::string column_family_; + std::string column_qualifier_; + CompareOperator op_; + FilterComparatorPtr comparator_; + bool filter_if_missing_; + MatchStatus match_status_; +}; + +} // namesapce filter +} // namesapce tera diff --git a/include/tera_c.h b/include/tera_c.h index 0fcc9d528..a7eab26de 100644 --- a/include/tera_c.h +++ b/include/tera_c.h @@ -29,32 +29,23 @@ void tera_client_close(tera_client_t* client); tera_table_t* tera_table_open(tera_client_t* client, const char* table_name, char** err); void tera_table_close(tera_table_t* table); -bool tera_table_get(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, char** value, uint64_t* vallen, +bool tera_table_get(tera_table_t* table, const char* row_key, uint64_t keylen, const char* family, + const char* qualifier, uint64_t qulen, char** value, uint64_t* vallen, char** errptr, uint64_t snapshot_id); -bool tera_table_getint64(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, int64_t* value, +bool tera_table_getint64(tera_table_t* table, const char* row_key, uint64_t keylen, + const char* family, const char* qualifier, uint64_t qulen, int64_t* value, char** errptr, uint64_t snapshot_id); -bool tera_table_put(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, const char* value, uint64_t vallen, +bool tera_table_put(tera_table_t* table, const char* row_key, uint64_t keylen, const char* family, + const char* qualifier, uint64_t qulen, const char* value, uint64_t vallen, char** errptr); -bool tera_table_put_kv(tera_table_t* table, const char* key, uint64_t keylen, - const char* value, uint64_t vallen, int32_t ttl, - char** errptr); +bool tera_table_put_kv(tera_table_t* table, const char* key, uint64_t keylen, const char* value, + uint64_t vallen, int32_t ttl, char** errptr); -bool tera_table_putint64(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, int64_t value, +bool tera_table_putint64(tera_table_t* table, const char* row_key, uint64_t keylen, + const char* family, const char* qualifier, uint64_t qulen, int64_t value, char** errptr); bool tera_table_delete(tera_table_t* table, const char* row_key, uint64_t keylen, @@ -64,27 +55,28 @@ bool tera_table_is_put_finished(tera_table_t* table); bool tera_table_is_get_finished(tera_table_t* table); void tera_table_apply_reader(tera_table_t* table, tera_row_reader_t* reader); -void tera_table_apply_reader_batch(tera_table_t* table, tera_row_reader_t** reader_batch, int64_t num); +void tera_table_apply_reader_batch(tera_table_t* table, tera_row_reader_t** reader_batch, + int64_t num); tera_row_mutation_t* tera_row_mutation(tera_table_t* table, const char* row_key, uint64_t keylen); void tera_table_apply_mutation(tera_table_t* table, tera_row_mutation_t* mutation); -void tera_table_apply_mutation_batch(tera_table_t* table, tera_row_mutation_t** mutation_batch, int64_t num); -void tera_row_mutation_put_kv(tera_row_mutation_t* mu, - const char* val, uint64_t vallen, int32_t ttl); -void tera_row_mutation_put(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, +void tera_table_apply_mutation_batch(tera_table_t* table, tera_row_mutation_t** mutation_batch, + int64_t num); +void tera_row_mutation_put_kv(tera_row_mutation_t* mu, const char* val, uint64_t vallen, + int32_t ttl); +void tera_row_mutation_put(tera_row_mutation_t* mu, const char* cf, const char* qu, uint64_t qulen, const char* val, uint64_t vallen); -void tera_row_mutation_put_with_timestamp(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, - int64_t timestamp, - const char* val, uint64_t vallen); -void tera_row_mutation_put_int64(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, int64_t val); -void tera_row_mutation_delete_column(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen); +void tera_row_mutation_put_with_timestamp(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen, int64_t timestamp, const char* val, + uint64_t vallen); +void tera_row_mutation_put_int64(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen, int64_t val); +void tera_row_mutation_delete_column(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen); void tera_row_mutation_delete_column_all_versions(tera_row_mutation_t* mu, const char* cf, const char* qu, uint64_t qulen); void tera_row_mutation_delete_column_with_version(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, int64_t timestamp); + const char* qu, uint64_t qulen, + int64_t timestamp); void tera_row_mutation_delete_row(tera_row_mutation_t* mu); void tera_row_mutation_delete_family(tera_row_mutation_t* mu, const char* cf); void tera_row_mutation_set_callback(tera_row_mutation_t* mu, MutationCallbackType callback); @@ -92,8 +84,7 @@ void tera_row_mutation_rowkey(tera_row_mutation_t* mu, char** val, uint64_t* val int64_t tera_row_mutation_get_status_code(tera_row_mutation_t* mu); void tera_row_mutation_destroy(tera_row_mutation_t* mu); -tera_result_stream_t* tera_table_scan(tera_table_t* table, - const tera_scan_descriptor_t* desc, +tera_result_stream_t* tera_table_scan(tera_table_t* table, const tera_scan_descriptor_t* desc, char** errptr); // scan descriptor @@ -101,14 +92,14 @@ tera_scan_descriptor_t* tera_scan_descriptor(const char* start_key, uint64_t key void tera_scan_descriptor_add_column(tera_scan_descriptor_t* desc, const char* cf, const char* qualifier, uint64_t qulen); void tera_scan_descriptor_add_column_family(tera_scan_descriptor_t* desc, const char* cf); -bool tera_scan_descriptor_is_async(tera_scan_descriptor_t* desc); void tera_scan_descriptor_set_buffer_size(tera_scan_descriptor_t* desc, int64_t size); -void tera_scan_descriptor_set_end(tera_scan_descriptor_t* desc, const char* end_key, uint64_t keylen); +void tera_scan_descriptor_set_end(tera_scan_descriptor_t* desc, const char* end_key, + uint64_t keylen); void tera_scan_descriptor_set_pack_interval(tera_scan_descriptor_t* desc, int64_t interval); -void tera_scan_descriptor_set_is_async(tera_scan_descriptor_t* desc, bool is_async); void tera_scan_descriptor_set_max_versions(tera_scan_descriptor_t* desc, int32_t versions); void tera_scan_descriptor_set_snapshot(tera_scan_descriptor_t* desc, uint64_t snapshot_id); -void tera_scan_descriptor_set_time_range(tera_scan_descriptor_t* desc, int64_t ts_start, int64_t ts_end); +void tera_scan_descriptor_set_time_range(tera_scan_descriptor_t* desc, int64_t ts_start, + int64_t ts_end); bool tera_scan_descriptor_set_filter(tera_scan_descriptor_t* desc, char* filter_str); void tera_scan_descriptor_destroy(tera_scan_descriptor_t* desc); @@ -127,7 +118,8 @@ void tera_result_stream_destroy(tera_result_stream_t* desc); // row reader tera_row_reader_t* tera_row_reader(tera_table_t* table, const char* row_key, uint64_t keylen); void tera_row_reader_add_column_family(tera_row_reader_t* reader, const char* family); -void tera_row_reader_add_column(tera_row_reader_t* reader, const char* cf, const char* qu, uint64_t len); +void tera_row_reader_add_column(tera_row_reader_t* reader, const char* cf, const char* qu, + uint64_t len); void tera_row_reader_set_callback(tera_row_reader_t* reader, ReaderCallbackType callback); void tera_row_reader_set_timestamp(tera_row_reader_t* reader, int64_t ts); void tera_row_reader_set_time_range(tera_row_reader_t* reader, int64_t start, int64_t end); @@ -146,7 +138,7 @@ int64_t tera_row_reader_get_status_code(tera_row_reader_t* reader); void tera_row_reader_destroy(tera_row_reader_t* reader); #ifdef __cplusplus -} /* end extern "C" */ +} /* end extern "C" */ #endif #pragma GCC visibility pop diff --git a/readme-cn.md b/readme-cn.md index 7edc5362f..fdc5dd56b 100644 --- a/readme-cn.md +++ b/readme-cn.md @@ -1,6 +1,7 @@ [高性能、可伸缩的结构化数据库](http://github.com/baidu/tera) ==== Tera是一个高性能、可伸缩的结构化数据存储系统,被设计用来管理搜索引擎万亿量级的超链与网页信息。为实现数据的实时分析与高效访问,我们使用按行键、列名和时间戳全局排序的三维数据模型组织数据,使用多级Cache系统,充分利用新一代服务器硬件大内存、SSD盘和万兆网卡的性能优势,做到模型灵活的同时,实现了高吞吐与水平扩展。([English](README.md)) + # 特性 * 全局有序 * 热点自动分片 @@ -10,39 +11,55 @@ Tera是一个高性能、可伸缩的结构化数据存储系统,被设计用 * 动态schema * 支持表格快照 * 高效随机读写 + # 数据模型 Tera使用了Bigtable的数据模型,可以将一张表格理解为这样一种数据结构: ``` map > > ``` 其中RowKey、ColumnFamily、Qualifier和Value是字符串,Timestamp是一个64位整形。ColumnFamliy需要建表时指定,是访问控制、版本保留等策略的基本单位。 + # 系统架构 系统主要由Tabletserver、Master和ClientSDK三部分构成。其中Tabletserver是核心服务器,承载着所有的数据管理与访问;Master是系统的仲裁者,负责表格的创建、schema更新与负载均衡;ClientSDK包含供管理员使用的命令行工具teracli和给用户使用的SDK。 表格被按RowKey全局排序,并横向切分成多个Tablet,每个Tablet负责服务RowKey的一个区间,表格又被纵向切分为多个LocalityGroup,一个Tablet的多个Localitygroup在物理上单独存储,可以选择不同的存储介质,以优化访问效率。 ![架构图](resources/images/arch.png) + # 系统依赖 * 使用分布式文件系统([BFS](https://github.com/baidu/bfs)、HDFS等)持久化数据与元信息 * 使用分布式协调服务([Nexus](https://github.com/baidu/ins/)或者Zookeeper)选主与协调 * 使用[Sofa-pbrpc](https://github.com/baidu/sofa-pbrpc/)实现跨进程通信 + # 系统构建 sh ./build.sh 参考[BUILD](BUILD-cn) + # 使用示例 [体验单机Tera](doc/cn/onebox.md) -[通过docker体验Tera](example/docker) -[主要api使用方法](doc/sdk_reference/README.md) + +[通过docker体验Tera](example/docker/readme-cn.md) + +[主要api使用方法](doc/sdk_reference/readme.md) + [客户端teracli使用方法](doc/tools/teracli.md) + [集群间数据迁移的dump工具terautil使用方法](doc/tools/terautil.md) + [造数据 & 读写数据的工具使用方法](doc/tools/benchmark.md) + [性能测试工具ycsb使用方法](doc/tools/ycsb.md) + [其它文档](doc/cn/README.md) -#反馈与技术支持 -tera_dev at baidu.com + +# 反馈与技术支持 +tera-user at baidu.com + # 成为贡献者 阅读[RoadMap](doc/cn/roadmap.md)文件或者源代码,了解我们当前的开发方向。 完成[5个小任务](doc/to_be_a_contributor.md),帮你一步步成为tera贡献者。 + # Become a Committer 成为tera的committer,你需要知道的一些[规则](doc/cn/to_be_a_committer.md)。 + # 欢迎加入 如果你热爱开源,热爱分布式技术,请将简历发送至: opensearch at baidu.com diff --git a/src/access/access_builder.cc b/src/access/access_builder.cc new file mode 100644 index 000000000..dda445865 --- /dev/null +++ b/src/access/access_builder.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/access_builder.h" +#include "access/identification/ugi_identification.h" +#include "access/giano/giano_identification.h" +#include "utils/utils_cmd.h" +#include +#include + +namespace tera { +namespace auth { + +AccessBuilder::AccessBuilder(const std::string& auth_policy) : auth_policy_(auth_policy) { + if (!AccessUtils::GetAuthPolicyType(auth_policy, &auth_policy_type_)) { + LOG(ERROR) << "Unimplemented auth policy " << auth_policy_ + << ", AccessBuilder construct failed, exit!"; + _Exit(EXIT_FAILURE); + } + identity_info_.set_auth_policy_type(auth_policy_type_); + identity_info_.set_ip_addr(utils::GetLocalHostAddr()); + if (auth_policy_type_ == AuthPolicyType::kGianoAuthPolicy) { + identification_.reset(new GianoIdentification()); + } else if (auth_policy_type_ == AuthPolicyType::kUgiAuthPolicy) { + identification_.reset(new UgiIdentification()); + } else if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + LOG(INFO) << "AccessBuilder auth_policy kNoneAuthPolicy"; + } else { + LOG(ERROR) << "Unexpected error" + << ", AccessBuilder construct failed, exit!"; + _Exit(EXIT_FAILURE); + } +} + +bool AccessBuilder::Login(const std::string& name, const std::string& token, + ErrorCode* const error_code) { + if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy || name == kInternalGroup) { + return true; + } + bool ret = true; + identity_info_.set_name(name); + identity_info_.set_token(token); + ret = identification_->Login(&identity_info_, error_code); + return ret; +} +} +} diff --git a/src/access/access_builder.h b/src/access/access_builder.h new file mode 100644 index 000000000..8a74de2c5 --- /dev/null +++ b/src/access/access_builder.h @@ -0,0 +1,57 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "tera/error_code.h" +#include "access/identification/identification.h" +#include "access/helpers/access_utils.h" + +namespace tera { +namespace auth { +class AccessBuilder { + public: + // Provide the object for client related with access control + explicit AccessBuilder(const std::string& auth_policy); + virtual ~AccessBuilder() {} + + // Get the cred/password and save them to user + // Return false if any error happend + // Only support for one group login in one shot + // TODO: extend to multi-group login. + bool Login(const std::string& name, const std::string& token, ErrorCode* const error_code); + + // auth User for Request + template + bool BuildRequest(Request* const req) { + if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + return true; + } + IdentityInfo* identity_info = new IdentityInfo(); + identity_info->CopyFrom(identity_info_); + req->set_allocated_identity_info(identity_info); + return true; + } + + // Set InternalGroup for internal Request + // Work for master => ts such as scan meta_table etc. + template + void BuildInternalGroupRequest(Request* const req) { + IdentityInfo* identity_info = new IdentityInfo(); + identity_info->set_name(kInternalGroup); + identity_info->set_auth_policy_type(AuthPolicyType::kNoneAuthPolicy); + identity_info->set_token(""); + identity_info->set_ip_addr(""); + req->set_allocated_identity_info(identity_info); + } + + private: + std::string auth_policy_; + AuthPolicyType auth_policy_type_; + IdentityInfo identity_info_; + std::unique_ptr identification_; +}; +} +} diff --git a/src/access/access_entry.cc b/src/access/access_entry.cc new file mode 100644 index 000000000..4fe72edd0 --- /dev/null +++ b/src/access/access_entry.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access_entry.h" + +namespace tera { +namespace auth { + +AccessEntry::AccessEntry(const std::string& auth_policy) : auth_policy_(auth_policy) { + if (!AccessUtils::GetAuthPolicyType(auth_policy, &auth_policy_type_)) { + LOG(ERROR) << "Unimplemented auth policy " << auth_policy + << ", AccessEntry construct failed, exit!"; + _Exit(EXIT_FAILURE); + } + access_updater_.reset(new AccessUpdater(auth_policy_type_)); +} + +bool AccessEntry::Verify(const IdentityInfo& identity_info, RoleList* roles) { + if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + return true; + } + VerificationPtr verification = access_updater_->GetVerification(); + return verification->Verify(identity_info, roles); +} + +bool AccessEntry::Authorize(const RoleList& roles) { + if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + return true; + } + AuthorizationPtr authorization = access_updater_->GetAuthorization(); + return authorization->Authorize(roles); +} +} +} diff --git a/src/access/access_entry.h b/src/access/access_entry.h new file mode 100644 index 000000000..bdfb793fd --- /dev/null +++ b/src/access/access_entry.h @@ -0,0 +1,77 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "access/access_updater.h" +#include "access/helpers/access_utils.h" +#include "access/verification/verification.h" + +namespace tera { +namespace auth { + +enum class AccessUpdateType { UpdateUgi = 0, UpdateAuth, UpdateMax }; + +class AccessEntry { + public: + // Provide the object for server related with access control. + explicit AccessEntry(const std::string& auth_policy); + virtual ~AccessEntry() {} + + AccessUpdater& GetAccessUpdater() { return *access_updater_; } + + // Support for verify identity & check permissions + template + bool VerifyAndAuthorize(const Request* const req, Response* res) { + if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + return true; + } + + if (!req->has_identity_info()) { + // TODO: true just for compatibility with old sdk + // Will return false in future + return true; + } + IdentityInfo identity_info = req->identity_info(); + + // InternalGroup means master=>ts rpc + if (identity_info.name() == kInternalGroup) { + return true; + } + + if (identity_info.auth_policy_type() != auth_policy_type_) { + std::string policy; + AccessUtils::GetAuthPolicy(identity_info.auth_policy_type(), &policy); + + std::string auth_policy; + AccessUtils::GetAuthPolicy(auth_policy_type_, &auth_policy); + + LOG(ERROR) << "Not the same auth policy between sdk[" << policy << "] with master/ts[" + << auth_policy << "]"; + res->set_status(kMismatchAuthType); + return false; + } + + RoleList roles; + if (!Verify(identity_info, &roles) || !Authorize(roles)) { + res->set_status(kNotPermission); + return false; + } + return true; + } + + private: + bool Verify(const IdentityInfo& identity_info, RoleList* roles); + bool Authorize(const RoleList& roles); + + private: + std::string auth_policy_; + AuthPolicyType auth_policy_type_; + std::shared_ptr verification_; + std::unique_ptr access_updater_; +}; +} +} diff --git a/src/access/access_updater.cc b/src/access/access_updater.cc new file mode 100644 index 000000000..7ef055386 --- /dev/null +++ b/src/access/access_updater.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/access_updater.h" +#include "access/giano/giano_verification.h" +#include "access/helpers/access_utils.h" +#include "access/verification/ugi_verification.h" +#include "common/func_scope_guard.h" +#include + +namespace tera { +namespace auth { + +AccessUpdater::AccessUpdater(AuthPolicyType auth_policy_type) + : auth_policy_type_(auth_policy_type) { + if (auth_policy_type_ == AuthPolicyType::kGianoAuthPolicy) { + verification_.reset(new GianoVerification()); + } else if (auth_policy_type_ == AuthPolicyType::kUgiAuthPolicy) { + verification_.reset(new UgiVerification()); + } else if (auth_policy_type_ == AuthPolicyType::kNoneAuthPolicy) { + LOG(INFO) << "AccessEntry auth_policy kNoneAuthPolicy"; + + // support set ugi in kNoneAuthPolicy, otherwise couldn't show all ugi_infos in kNoneAuthPolicy + verification_.reset(new UgiVerification()); + } else { + LOG(ERROR) << "Unexpected error," + << "AccessEntry construct failed, exit!"; + _Exit(EXIT_FAILURE); + } + authorization_.reset(new Authorization); +} + +bool AccessUpdater::AddRecord(const std::string& key, const std::string& value) { + if (key.size() <= 3) { + return false; + } + + if (key[2] == '0') { + if (auth_policy_type_ != AuthPolicyType::kUgiAuthPolicy && + auth_policy_type_ != AuthPolicyType::kNoneAuthPolicy) { + LOG(ERROR) << "Master[auth_policy_type_ = " << auth_policy_type_ << "] mismatch"; + return false; + } + std::string user_name = AccessUtils::GetNameFromMetaKey(key); + if (user_name.empty()) { + return false; + } + VerificationInfoPtr verification_info_ptr = + AccessUtils::GetVerificationInfoFromMetaValue(value); + if (!verification_info_ptr) { + return false; + } + verification_->Update(user_name, verification_info_ptr); + } else if (key[2] == '1') { + std::string role = AccessUtils::GetNameFromMetaKey(key); + authorization_->Update(role); + } else { + LOG(ERROR) << "Wrong key in AccessUpdater::AddRecord"; + return false; + } + ugi_version_recorder_.IncVersion(); + ugi_version_recorder_.SetNeedUpdate(true); + return true; +} + +bool AccessUpdater::DelRecord(const std::string& key) { + if (key.size() <= 3) { + return false; + } + + if (key[2] == '0') { + if (auth_policy_type_ != AuthPolicyType::kUgiAuthPolicy && + auth_policy_type_ != AuthPolicyType::kNoneAuthPolicy) { + LOG(ERROR) << "Master[auth_policy_type_ = " << auth_policy_type_ << "] mismatch"; + return false; + } + + std::string user_name = AccessUtils::GetNameFromMetaKey(key); + if (user_name.empty()) { + return false; + } + + verification_->Delete(user_name); + } else if (key[2] == '1') { + std::string role = AccessUtils::GetNameFromMetaKey(key); + authorization_->Delete(role); + } else { + LOG(ERROR) << "Wrong key in AccessUpdater::DelRecord"; + return false; + } + ugi_version_recorder_.IncVersion(); + ugi_version_recorder_.SetNeedUpdate(true); + return true; +} + +// for ts +bool AccessUpdater::UpdateTs(const QueryRequest* request, QueryResponse* response) { + FuncScopeGuard on_exit([&] { response->set_version(ugi_version_recorder_.GetVersion()); }); + if (request->has_version() && !IsSameVersion(request->version())) { + // Only update ugi in kUgiAuthPolicy + if (auth_policy_type_ == AuthPolicyType::kUgiAuthPolicy) { + ssize_t ugi_meta_infos_size = request->ugi_meta_infos_size(); + VerificationPtr new_verification(new UgiVerification()); + for (ssize_t i = 0; i < ugi_meta_infos_size; ++i) { + const UgiMetaInfo& ugi_meta_info = request->ugi_meta_infos(i); + const std::string& user_name = ugi_meta_info.user_name(); + VerificationInfoPtr verification_info_ptr(new VerificationInfo); + verification_info_ptr->first = ugi_meta_info.passwd(); + RoleList& roles = verification_info_ptr->second; + int roles_size = ugi_meta_info.roles_size(); + for (int roles_index = 0; roles_index < roles_size; ++roles_index) { + roles.emplace(ugi_meta_info.roles(roles_index)); + } + new_verification->Update(user_name, verification_info_ptr); + } + MutexLock l(&mutex_); + verification_.swap(new_verification); + } + + AuthorizationPtr new_authorization(new Authorization); + ssize_t role_infos_size = request->role_infos_size(); + for (ssize_t i = 0; i < role_infos_size; ++i) { + const RoleInfo& role_info = request->role_infos(i); + new_authorization->Update(role_info.role()); + } + { + MutexLock l(&mutex_); + authorization_.swap(new_authorization); + } + VLOG(23) << "UpdateAuth ts version from " << ugi_version_recorder_.GetVersion() << " to " + << request->version(); + ugi_version_recorder_.SetVersion(request->version()); + return true; + } + return false; +} +} // namespace auth +} // namespace tera diff --git a/src/access/access_updater.h b/src/access/access_updater.h new file mode 100644 index 000000000..205a70026 --- /dev/null +++ b/src/access/access_updater.h @@ -0,0 +1,97 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "common/mutex.h" +#include "access/helpers/version_recorder.h" +#include "access/verification/verification.h" +#include "access/authorization/authorization.h" +#include "proto/master_rpc.pb.h" +#include "proto/tabletnode_rpc.pb.h" + +namespace tera { + +class QueryRequest; +class QueryResponse; +class ShowUgiResponse; + +namespace auth { + +using VerificationPtr = std::shared_ptr; +using AuthorizationPtr = std::shared_ptr; + +class AccessUpdater { + public: + explicit AccessUpdater(AuthPolicyType auth_policy_type); + virtual ~AccessUpdater() {} + AccessUpdater(AccessUpdater&) = delete; + AccessUpdater& operator=(const AccessUpdater&) = delete; + + VerificationPtr GetVerification() { + MutexLock l(&mutex_); + return verification_; + } + + AuthorizationPtr GetAuthorization() { + MutexLock l(&mutex_); + return authorization_; + } + + void SyncUgiVersion(bool updated) { ugi_version_recorder_.SetNeedUpdate(updated); } + + // master + bool AddRecord(const std::string& key, const std::string& value); + bool DelRecord(const std::string& key); + bool IsSameVersion(uint64_t version) { return ugi_version_recorder_.IsSameVersion(version); } + void BuildReq(QueryRequest* request) { + if (auth_policy_type_ == AuthPolicyType::kUgiAuthPolicy) { + BuildUgiMetaInfos(request); + } + BuildRoleInfos(request); + } + void ShowUgiInfo(ShowUgiResponse* response) { BuildUgiMetaInfos(response); } + void ShowAuthInfo(ShowAuthResponse* response) { BuildRoleInfos(response); } + + // ts + bool UpdateTs(const QueryRequest* request, QueryResponse* response); + + private: + template + void BuildUgiMetaInfos(Message* message) { + UserVerificationInfoList user_verification_info_list; + verification_->GetAll(&user_verification_info_list); + for (auto it = user_verification_info_list.begin(); it != user_verification_info_list.end(); + ++it) { + UgiMetaInfo* ugi_meta_info = message->add_ugi_meta_infos(); + ugi_meta_info->set_user_name(it->first); + ugi_meta_info->set_passwd(it->second->first); + for (auto& role : it->second->second) { + *ugi_meta_info->add_roles() = role; + } + } + } + template + void BuildRoleInfos(Message* message) { + std::set role_list; + authorization_->GetAll(&role_list); + for (auto& role : role_list) { + RoleInfo* role_info = message->add_role_infos(); + role_info->set_role(role); + } + } + + private: + AuthPolicyType auth_policy_type_; + VerificationPtr verification_; + AuthorizationPtr authorization_; + mutable Mutex mutex_; + + // master + VersionRecorder ugi_version_recorder_; +}; +} +} diff --git a/src/access/authorization/authorization.cc b/src/access/authorization/authorization.cc new file mode 100644 index 000000000..3ffb00a61 --- /dev/null +++ b/src/access/authorization/authorization.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/authorization/authorization.h" + +namespace tera { +namespace auth { + +void Authorization::Update(const std::string& role_name) { + MutexLock l(&mutex_); + roles_.emplace(role_name); +} + +void Authorization::Delete(const std::string& role_name) { + MutexLock l(&mutex_); + auto it = roles_.find(role_name); + if (it != roles_.end()) { + roles_.erase(it); + } +} + +void Authorization::GetAll(std::set* role_list) { + MutexLock l(&mutex_); + for (auto& role : roles_) { + role_list->emplace(role); + } +} + +bool Authorization::Authorize(const std::set& role_list) { + MutexLock l(&mutex_); + for (const auto& role : role_list) { + auto it = roles_.find(role); + if (it != roles_.end()) { + return true; + } + } + return false; +} +} +} diff --git a/src/access/authorization/authorization.h b/src/access/authorization/authorization.h new file mode 100644 index 000000000..45dcd39b7 --- /dev/null +++ b/src/access/authorization/authorization.h @@ -0,0 +1,29 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include "common/mutex.h" + +namespace tera { +namespace auth { + +class Authorization { + public: + explicit Authorization() {} + virtual ~Authorization() {} + void Update(const std::string& role_name); + void Delete(const std::string& role_name); + void GetAll(std::set* role_list); + bool Authorize(const std::set& role_list); + + private: + std::set roles_; + mutable Mutex mutex_; +}; +} +} diff --git a/src/access/giano/giano_identification.cc b/src/access/giano/giano_identification.cc new file mode 100644 index 000000000..200254219 --- /dev/null +++ b/src/access/giano/giano_identification.cc @@ -0,0 +1,18 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/giano/giano_identification.h" + +namespace tera { +namespace auth { + +GianoIdentification::GianoIdentification() { +} + +bool GianoIdentification::Login(IdentityInfo* const identity_info, ErrorCode* const error_code) { + return false; +} + +} // auth +} // tera diff --git a/src/access/giano/giano_identification.h b/src/access/giano/giano_identification.h new file mode 100644 index 000000000..d98b48f59 --- /dev/null +++ b/src/access/giano/giano_identification.h @@ -0,0 +1,20 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "access/identification/identification.h" + +namespace tera { +namespace auth { + +class GianoIdentification : public Identification { + public: + GianoIdentification(); + virtual ~GianoIdentification() {} + virtual bool Login(IdentityInfo* const identity_info, ErrorCode* const error_code) override; +}; + +} // auth +} // tera diff --git a/src/access/giano/giano_verification.cc b/src/access/giano/giano_verification.cc new file mode 100644 index 000000000..ae367a8cb --- /dev/null +++ b/src/access/giano/giano_verification.cc @@ -0,0 +1,18 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/giano/giano_verification.h" + +namespace tera { +namespace auth { + +GianoVerification::GianoVerification() { +} + +bool GianoVerification::Verify(const IdentityInfo& identity_info, RoleList* roles) { + return false; +} + +} // auth +} // tera diff --git a/src/access/giano/giano_verification.h b/src/access/giano/giano_verification.h new file mode 100644 index 000000000..80c446c6c --- /dev/null +++ b/src/access/giano/giano_verification.h @@ -0,0 +1,33 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "access/verification/verification.h" + +namespace tera { +namespace auth { + +class GianoVerification : public Verification { + public: + GianoVerification(); + virtual ~GianoVerification() {} + virtual bool Verify(const IdentityInfo& identity_info, RoleList* roles) override; + virtual bool Update(const std::string& user_name, + const VerificationInfoPtr& verification_info_ptr) override { + return false; + } + virtual bool Get(const std::string& user_name, + VerificationInfoPtr* verification_info_ptr) override { + return false; + } + virtual void GetAll(UserVerificationInfoList* user_verification_info) override { + } + virtual bool Delete(const std::string& user_name) override { + return false; + } +}; + +} // auth +} // tera diff --git a/src/access/helpers/access_utils.cc b/src/access/helpers/access_utils.cc new file mode 100644 index 000000000..701dc1db4 --- /dev/null +++ b/src/access/helpers/access_utils.cc @@ -0,0 +1,195 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/helpers/access_utils.h" +#include "access/access_entry.h" +#include "access/verification/ugi_verification.h" +#include "master/master_env.h" +#include +#include + +namespace tera { +namespace auth { + +using policy_type = std::underlying_type::type; + +static const std::vector msg = { + "none", "ugi", "giano", +}; + +bool AccessUtils::GetAuthPolicy(const AuthPolicyType& auth_policy_type, + std::string* const auth_policy) { + uint32_t msg_size = msg.size(); + uint32_t index = static_cast(auth_policy_type) - + static_cast(AuthPolicyType::kNoneAuthPolicy); + if (index >= msg_size) { + return false; + } + *auth_policy = std::string(msg[index]); + return true; +} + +bool AccessUtils::GetAuthPolicyType(const std::string& auth_policy, + AuthPolicyType* const auth_policy_type) { + uint32_t index = 0; + std::find_if(msg.cbegin(), msg.cend(), [&index, &auth_policy](const std::string& s) { + if (s != auth_policy) { + ++index; + return false; + } else { + return true; + } + }); + uint32_t msg_size = msg.size(); + if (index >= msg_size) { + return false; + } + *auth_policy_type = static_cast( + index + static_cast(AuthPolicyType::kNoneAuthPolicy)); + return true; +} + +static void GetVerificationInfo(const std::shared_ptr& access_entry, + const std::string& user_name, + VerificationInfoPtr* verification_info_ptr) { + VerificationPtr verification_ptr = access_entry->GetAccessUpdater().GetVerification(); + verification_ptr->Get(user_name, verification_info_ptr); +} + +static void MergeVerificationInfoUgi(const UgiInfo& ugi_info, + VerificationInfoPtr* verification_info_ptr) { + (*verification_info_ptr)->first = ugi_info.passwd(); +} + +static void MergeVerificationInfoAuthority(const AuthorityInfo& authority_info, + UpdateAuthType update_auth_type, + VerificationInfoPtr* verification_info_ptr) { + std::set& roles = (*verification_info_ptr)->second; + if (update_auth_type == kRevokeRole) { + roles.erase(authority_info.role()); + } else { + roles.emplace(authority_info.role()); + } +} + +static bool GetMetaRecordKeyValue(const std::shared_ptr& access_entry, + const UpdateAuthInfo& update_auth_info, + master::MetaWriteRecord* meta_write_record) { + VerificationInfoPtr verification_info_ptr(new VerificationInfo); + std::string user_name; + if (update_auth_info.update_type() == kUpdateUgi || update_auth_info.update_type() == kDelUgi) { + const UgiInfo& ugi_info = update_auth_info.ugi_info(); + user_name = ugi_info.user_name(); + meta_write_record->key = std::string("|00") + user_name; + if (update_auth_info.update_type() == kDelUgi) { + return true; + } + + GetVerificationInfo(access_entry, user_name, &verification_info_ptr); + MergeVerificationInfoUgi(ugi_info, &verification_info_ptr); + } else if (update_auth_info.update_type() == kGrantRole || + update_auth_info.update_type() == kRevokeRole) { + const AuthorityInfo& authority_info = update_auth_info.authority_info(); + user_name = authority_info.user_name(); + meta_write_record->key = std::string("|00") + user_name; + + GetVerificationInfo(access_entry, user_name, &verification_info_ptr); + MergeVerificationInfoAuthority(authority_info, update_auth_info.update_type(), + &verification_info_ptr); + } else { + return false; + } + UgiMetaInfo ugi_meta_info; + ugi_meta_info.set_user_name(user_name); + ugi_meta_info.set_passwd(verification_info_ptr->first); + const RoleList& roles = verification_info_ptr->second; + for (auto& role : roles) { + *ugi_meta_info.add_roles() = role; + } + if (!ugi_meta_info.SerializeToString(&meta_write_record->value)) { + return false; + } + return true; +} + +// |00User => Passwd,[Role1,Role2...](pb) +static master::MetaWriteRecord* NewMetaRecordFromUgiRole( + const std::shared_ptr& access_entry, const UpdateAuthInfo& update_auth_info) { + if (!update_auth_info.has_ugi_info() && !update_auth_info.has_authority_info()) { + return nullptr; + } + std::unique_ptr meta_write_record(new master::MetaWriteRecord); + + if (kDelUgi == update_auth_info.update_type()) { + meta_write_record->is_delete = true; + } else if (kUpdateUgi == update_auth_info.update_type()) { + meta_write_record->is_delete = false; + } else { + return nullptr; + } + if (!GetMetaRecordKeyValue(access_entry, update_auth_info, meta_write_record.get())) { + return nullptr; + } + return meta_write_record.release(); +} + +// |01Role => Permissions(pb) +static master::MetaWriteRecord* NewMetaRecordFromRole(const UpdateAuthInfo& update_auth_info) { + if (!update_auth_info.has_role_info()) { + return nullptr; + } + std::unique_ptr meta_write_record(new master::MetaWriteRecord); + if (kAddRole == update_auth_info.update_type()) { + meta_write_record->is_delete = false; + } else if (kDelRole == update_auth_info.update_type()) { + meta_write_record->is_delete = true; + } else { + return nullptr; + } + const RoleInfo& role_info = update_auth_info.role_info(); + meta_write_record->key = std::string("|01") + role_info.role(); + meta_write_record->value = ""; + return meta_write_record.release(); +} + +master::MetaWriteRecord* AccessUtils::NewMetaRecord( + const std::shared_ptr& access_entry, const UpdateAuthInfo& update_auth_info) { + UpdateAuthType update_auth_type = update_auth_info.update_type(); + if (update_auth_type == kUpdateUgi || update_auth_type == kDelUgi || + update_auth_type == kGrantRole || update_auth_type == kRevokeRole) { + return NewMetaRecordFromUgiRole(access_entry, update_auth_info); + } else if (update_auth_type == kAddRole || update_auth_type == kDelRole) { + return NewMetaRecordFromRole(update_auth_info); + } else { + return nullptr; + } +} + +std::string AccessUtils::GetNameFromMetaKey(const std::string& key) { + if (key.length() <= 3 || key[1] != '0') { + return ""; + } + return key.substr(3); +} + +VerificationInfoPtr AccessUtils::GetVerificationInfoFromMetaValue(const std::string& value) { + if (value.size() <= 0) { + return nullptr; + } + VerificationInfoPtr verification_info_ptr(new VerificationInfo); + UgiMetaInfo ugi_meta_info; + if (!ugi_meta_info.ParseFromString(value)) { + return nullptr; + } + verification_info_ptr->first = ugi_meta_info.passwd(); + RoleList& roles = verification_info_ptr->second; + int roles_size = ugi_meta_info.roles_size(); + for (int roles_index = 0; roles_index < roles_size; ++roles_index) { + roles.emplace(ugi_meta_info.roles(roles_index)); + } + return verification_info_ptr; +} + +} // namespace auth +} // namespace tera diff --git a/src/access/helpers/access_utils.h b/src/access/helpers/access_utils.h new file mode 100644 index 000000000..0616d8523 --- /dev/null +++ b/src/access/helpers/access_utils.h @@ -0,0 +1,34 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "access/verification/verification.h" + +namespace tera { + +namespace master { +struct MetaWriteRecord; +} + +namespace auth { + +class AccessEntry; + +static const std::string kInternalGroup("internal_group"); + +class AccessUtils { + public: + static bool GetAuthPolicy(const AuthPolicyType& auth_policy_type, std::string* const auth_policy); + static bool GetAuthPolicyType(const std::string& auth_policy, + AuthPolicyType* const auth_policy_type); + static master::MetaWriteRecord* NewMetaRecord(const std::shared_ptr& access_entry, + const UpdateAuthInfo& update_auth_info); + static std::string GetNameFromMetaKey(const std::string& key); + static VerificationInfoPtr GetVerificationInfoFromMetaValue(const std::string& value); +}; +} +} diff --git a/src/access/helpers/permission_builder.cc b/src/access/helpers/permission_builder.cc new file mode 100644 index 000000000..6ea368353 --- /dev/null +++ b/src/access/helpers/permission_builder.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/helpers/permission_builder.h" +#include + +namespace tera { +namespace auth { + +namespace { +static const std::string empty_str{""}; +static bool CheckActionLegal(Permission::Action action) { + using action_type = std::underlying_type::type; + uint32_t action_num = static_cast(action); + uint32_t action_min = static_cast(Permission::kRead); + uint32_t action_max = static_cast(Permission::kAdmin); + return (action_num >= action_min) && (action_num <= action_max); +} +} + +Permission* PermissionBuilder::NewPermission(Permission::Action action) { + if (!CheckActionLegal(action)) { + return nullptr; + } + std::unique_ptr permission(new Permission); + permission->set_type(Permission::kGlobal); + permission->mutable_global_permission()->set_action(action); + return permission.release(); +} + +Permission* PermissionBuilder::NewPermission(Permission::Action action, + const std::string& namespace_name) { + if (!CheckActionLegal(action) || !namespace_name.compare(empty_str)) { + return nullptr; + } + std::unique_ptr permission(new Permission); + permission->set_type(Permission::kNamespace); + NamespacePermission* namespace_permission = permission->mutable_namespace_permission(); + namespace_permission->set_namespace_name(namespace_name); + namespace_permission->set_action(action); + return permission.release(); +} + +Permission* PermissionBuilder::NewPermission(Permission::Action action, + const std::string& namespace_name, + const std::string& table_name) { + if (!CheckActionLegal(action) || !namespace_name.compare(empty_str) || + !table_name.compare(empty_str)) { + return nullptr; + } + std::unique_ptr permission(new Permission); + permission->set_type(Permission::kTable); + TablePermission* table_permission = permission->mutable_table_permission(); + table_permission->set_namespace_name(namespace_name); + table_permission->set_table_name(table_name); + table_permission->set_action(action); + return permission.release(); +} + +Permission* PermissionBuilder::NewPermission(Permission::Action action, + const std::string& namespace_name, + const std::string& table_name, + const std::string& family, + const std::string& qualifier) { + if (!CheckActionLegal(action) || !namespace_name.compare(empty_str) || + !table_name.compare(empty_str)) { + return nullptr; + } + std::unique_ptr permission(new Permission); + permission->set_type(Permission::kTable); + TablePermission* table_permission = permission->mutable_table_permission(); + table_permission->set_namespace_name(namespace_name); + table_permission->set_table_name(table_name); + table_permission->set_family(family); + table_permission->set_qualifier(qualifier); + table_permission->set_action(action); + return permission.release(); +} +} // namespace auth +} // namespace tera diff --git a/src/access/helpers/permission_builder.h b/src/access/helpers/permission_builder.h new file mode 100644 index 000000000..c48420fc3 --- /dev/null +++ b/src/access/helpers/permission_builder.h @@ -0,0 +1,25 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "proto/access_control.pb.h" +#include + +namespace tera { +namespace auth { + +class PermissionBuilder { + public: + static Permission* NewPermission(Permission::Action action); + static Permission* NewPermission(Permission::Action action, const std::string& namespace_name); + static Permission* NewPermission(Permission::Action action, const std::string& namespace_name, + const std::string& table_name); + static Permission* NewPermission(Permission::Action action, const std::string& namespace_name, + const std::string& table_name, const std::string& family, + const std::string& qualifier); +}; + +} // namespace auth +} // namespace tera \ No newline at end of file diff --git a/src/access/helpers/version_recorder.h b/src/access/helpers/version_recorder.h new file mode 100644 index 000000000..dd37e631c --- /dev/null +++ b/src/access/helpers/version_recorder.h @@ -0,0 +1,37 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include + +namespace tera { +namespace auth { + +class VersionRecorder { + public: + explicit VersionRecorder() : version_(0), need_update_(false) {} + ~VersionRecorder() {} + + VersionRecorder(VersionRecorder&) = delete; + VersionRecorder& operator==(const VersionRecorder&) = delete; + + bool NeedUpdate() { return need_update_; } + + void SetNeedUpdate(bool need_update) { need_update_.store(need_update); } + + void IncVersion() { version_.fetch_add(1); } + + void SetVersion(uint64_t version) { version_.store(version); } + + uint64_t GetVersion() { return version_.load(); } + + bool IsSameVersion(uint64_t version) { return version_.load() == version; } + + private: + std::atomic version_; + std::atomic need_update_; +}; +} +} diff --git a/src/access/identification/identification.h b/src/access/identification/identification.h new file mode 100644 index 000000000..01cb3f5f1 --- /dev/null +++ b/src/access/identification/identification.h @@ -0,0 +1,21 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "tera/error_code.h" +#include "access/helpers/access_utils.h" + +namespace tera { +namespace auth { + +class Identification { + public: + // Login to get the cred/password + // Success will return true + // otherwise return false + virtual bool Login(IdentityInfo* const identity_info, ErrorCode* const error_code) = 0; +}; +} +} diff --git a/src/access/identification/ugi_identification.cc b/src/access/identification/ugi_identification.cc new file mode 100644 index 000000000..e681f3b95 --- /dev/null +++ b/src/access/identification/ugi_identification.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "access/identification/ugi_identification.h" +#include + +namespace tera { +namespace auth { + +UgiIdentification::UgiIdentification() {} + +bool UgiIdentification::Login(IdentityInfo* const identity_info, ErrorCode* const error_code) { + if (identity_info->name() == "" || identity_info->token() == "") { + LOG(ERROR) << "Make sure set --tera_auth_name=xxx & --tera_auth_token=yyy " + << "in tera.flag when use kUgiAuthPolicy, [name = " << identity_info->name() + << ", token = " << identity_info->token(); + if (error_code != nullptr) { + std::string reason = + "Login argument absent(--tera_auth_name=xxx && --tera_auth_token=yyy)\ + when use kUgiAuthPolicy"; + error_code->SetFailed(ErrorCode::kAuthBadParam, reason); + } + return false; + } + return true; +} +} +} \ No newline at end of file diff --git a/src/access/identification/ugi_identification.h b/src/access/identification/ugi_identification.h new file mode 100644 index 000000000..8d133c660 --- /dev/null +++ b/src/access/identification/ugi_identification.h @@ -0,0 +1,18 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "access/identification/identification.h" + +namespace tera { +namespace auth { +class UgiIdentification : public Identification { + public: + UgiIdentification(); + virtual ~UgiIdentification() {} + virtual bool Login(IdentityInfo* const identity_info, ErrorCode* const error_code) override; +}; +} +} diff --git a/src/access/test/access_verification_test.cc b/src/access/test/access_verification_test.cc new file mode 100644 index 000000000..2791002de --- /dev/null +++ b/src/access/test/access_verification_test.cc @@ -0,0 +1,116 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include "access/access_builder.h" +#include "access/access_entry.h" +#include "access/verification/ugi_verification.h" +#include "proto/tabletnode_rpc.pb.h" +#include "master/master_env.h" + +DECLARE_string(tera_auth_name); +DECLARE_string(tera_auth_token); + +namespace tera { +namespace auth { +namespace test { + +static const std::string user_name("mock_group"); +static const std::string passwd("2862933555777941757"); + +class AuthVerificationTest : public ::testing::Test { + public: + AuthVerificationTest() : meta_write_record_(nullptr) { + FLAGS_tera_auth_name = user_name; + FLAGS_tera_auth_token = passwd; + } + + virtual ~AuthVerificationTest() {} + + void RegisterNoneAuth() { + std::string none_auth_policy; + AccessUtils::GetAuthPolicy(AuthPolicyType::kNoneAuthPolicy, &none_auth_policy); + access_builder_.reset(new AccessBuilder(none_auth_policy)); + access_entry_.reset(new AccessEntry(none_auth_policy)); + } + + void RegisterUgiAuth() { + std::string ugi_auth_policy; + AccessUtils::GetAuthPolicy(AuthPolicyType::kUgiAuthPolicy, &ugi_auth_policy); + access_builder_.reset(new AccessBuilder(ugi_auth_policy)); + access_entry_.reset(new AccessEntry(ugi_auth_policy)); + + UpdateAuthInfo update_auth_info; + update_auth_info.set_update_type(kUpdateUgi); + UgiInfo* ugi_info = update_auth_info.mutable_ugi_info(); + ugi_info->set_user_name(user_name); + ugi_info->set_passwd(passwd); + + meta_write_record_.reset(AccessUtils::NewMetaRecord(access_entry_, update_auth_info)); + } + + bool Login(ErrorCode* err = NULL) { + return access_builder_->Login(FLAGS_tera_auth_name, FLAGS_tera_auth_token, err); + } + + bool Verify(const IdentityInfo& identity_info, RoleList* roles) { + return access_entry_->Verify(identity_info, roles); + } + + IdentityInfo GetIdentityInfo() const { return access_builder_->identity_info_; } + + private: + std::unique_ptr access_builder_; + std::shared_ptr access_entry_; + std::unique_ptr meta_write_record_; +}; + +TEST_F(AuthVerificationTest, NullAuthVerificationTest) { + RegisterNoneAuth(); + EXPECT_TRUE(Login()); + + IdentityInfo identity_info = GetIdentityInfo(); + RoleList roles; + EXPECT_TRUE(Verify(identity_info, &roles)); +} + +TEST_F(AuthVerificationTest, UgiAuthVerificationTest) { + RegisterUgiAuth(); + EXPECT_TRUE(access_entry_->GetAccessUpdater().auth_policy_type_ == + AuthPolicyType::kUgiAuthPolicy); + EXPECT_TRUE(Login()); + + EXPECT_TRUE(access_entry_->GetAccessUpdater().AddRecord(meta_write_record_->key, + meta_write_record_->value)); + IdentityInfo identity_info = GetIdentityInfo(); + EXPECT_EQ(user_name, identity_info.name()); + EXPECT_EQ(passwd, identity_info.token()); + + RoleList roles; + EXPECT_TRUE(Verify(identity_info, &roles)); +} + +TEST_F(AuthVerificationTest, UpdateMasterTest) { + RegisterUgiAuth(); + EXPECT_TRUE(access_entry_->GetAccessUpdater().auth_policy_type_ == + AuthPolicyType::kUgiAuthPolicy); + EXPECT_TRUE(access_entry_->GetAccessUpdater().AddRecord(meta_write_record_->key, + meta_write_record_->value)); + + QueryRequest request; + access_entry_->GetAccessUpdater().BuildReq(&request); + // access_entry_->GetAccessUpdater().FinishAccessUpdated(); + // EXPECT_TRUE(access_entry_->GetAccessUpdater().BuildReq(&request)); + // EXPECT_TRUE(request.ugi_infos_size() == 1); + // const std::string& user_name = request.ugi_infos(0).user_name(); + // const std::string& passwd = request.ugi_infos(0).passwd(); + // EXPECT_EQ("mock_group", user_name); + // EXPECT_EQ("2862933555777941757", passwd); +} +} +} +} diff --git a/src/access/test/multi_tenancy_test.cc b/src/access/test/multi_tenancy_test.cc new file mode 100644 index 000000000..8227112d0 --- /dev/null +++ b/src/access/test/multi_tenancy_test.cc @@ -0,0 +1,20 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +DECLARE_bool(tera_quota_enabled); +DECLARE_string(tera_quota_limiter_type); +DECLARE_int64(tera_quota_normal_estimate_value); + +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_tera_quota_enabled = true; + FLAGS_tera_quota_limiter_type = "general_quota_limiter"; + FLAGS_tera_quota_normal_estimate_value = 1; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/access/test/permission_test.cc b/src/access/test/permission_test.cc new file mode 100644 index 000000000..8598597fa --- /dev/null +++ b/src/access/test/permission_test.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include "access/helpers/permission_builder.h" + +DECLARE_string(tera_auth_name); +DECLARE_string(tera_auth_token); + +namespace tera { +namespace auth { +namespace test { + +static const std::string namespace_name{"sandbox"}; +static const std::string table_name{"test"}; +static const std::string family{"cf"}; +static const std::string qualifier{"qu"}; + +class PermissionTest : public ::testing::Test { + public: + PermissionTest() {} + virtual ~PermissionTest() {} +}; + +TEST_F(PermissionTest, GlobalPermissionBuilderTest) { + std::unique_ptr global_permission( + PermissionBuilder::NewPermission(Permission::kRead)); + EXPECT_TRUE(global_permission->type() == Permission::kGlobal); + + std::unique_ptr nullptr_permission( + PermissionBuilder::NewPermission(static_cast(5))); + EXPECT_TRUE(!nullptr_permission); +} + +TEST_F(PermissionTest, NamespacePermissionBuilderTest) { + std::unique_ptr namespace_permission( + PermissionBuilder::NewPermission(Permission::kRead, namespace_name)); + EXPECT_TRUE(namespace_permission->type() == Permission::kNamespace); + EXPECT_TRUE(namespace_permission->namespace_permission().namespace_name() == namespace_name); + std::unique_ptr nullptr_permission( + PermissionBuilder::NewPermission(static_cast(5), namespace_name)); + EXPECT_TRUE(!nullptr_permission); +} + +TEST_F(PermissionTest, TablePermissionBuilderTest) { + std::unique_ptr table_permission( + PermissionBuilder::NewPermission(Permission::kRead, namespace_name, table_name)); + EXPECT_TRUE(table_permission->type() == Permission::kTable); + EXPECT_TRUE(table_permission->table_permission().namespace_name() == namespace_name); + EXPECT_TRUE(table_permission->table_permission().table_name() == table_name); + + std::unique_ptr nullptr_permission(PermissionBuilder::NewPermission( + static_cast(5), namespace_name, table_name)); + EXPECT_TRUE(!nullptr_permission); +} + +TEST_F(PermissionTest, TableCfQuPermissionBuilderTest) { + std::unique_ptr table_cf_qu_permission(PermissionBuilder::NewPermission( + Permission::kRead, namespace_name, table_name, family, qualifier)); + EXPECT_TRUE(table_cf_qu_permission->type() == Permission::kTable); + EXPECT_TRUE(table_cf_qu_permission->table_permission().namespace_name() == namespace_name); + EXPECT_TRUE(table_cf_qu_permission->table_permission().table_name() == table_name); + EXPECT_TRUE(table_cf_qu_permission->table_permission().family() == family); + EXPECT_TRUE(table_cf_qu_permission->table_permission().qualifier() == qualifier); + + std::unique_ptr nullptr_permission(PermissionBuilder::NewPermission( + static_cast(5), namespace_name, table_name, family, qualifier)); + EXPECT_TRUE(!nullptr_permission); +} +} +} +} \ No newline at end of file diff --git a/src/access/verification/ugi_verification.cc b/src/access/verification/ugi_verification.cc new file mode 100644 index 000000000..dddbea3a4 --- /dev/null +++ b/src/access/verification/ugi_verification.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "ugi_verification.h" +#include +#include + +namespace tera { +namespace auth { + +UgiVerification::UgiVerification() {} + +bool UgiVerification::Verify(const IdentityInfo& identity_info, RoleList* roles) { + MutexLock l(&ugi_mutex_); + auto it = user_verification_info_list_.find(identity_info.name()); + if (it != user_verification_info_list_.end() && it->second->first == identity_info.token()) { + for (auto& role : it->second->second) { + roles->emplace(role); + } + return true; + } + return false; +} + +bool UgiVerification::Update(const std::string& user_name, + const VerificationInfoPtr& verification_info_ptr) { + MutexLock l(&ugi_mutex_); + VLOG(23) << "UpdateUgi success [user = " << user_name + << ", passwd = " << verification_info_ptr->first << "]"; + user_verification_info_list_[user_name] = verification_info_ptr; + return true; +} + +bool UgiVerification::Get(const std::string& user_name, + VerificationInfoPtr* verification_info_ptr) { + MutexLock l(&ugi_mutex_); + auto it = user_verification_info_list_.find(user_name); + if (it == user_verification_info_list_.end()) { + return false; + } + *verification_info_ptr = it->second; + return true; +} + +void UgiVerification::GetAll(UserVerificationInfoList* user_verification_info) { + MutexLock l(&ugi_mutex_); + for (auto it = user_verification_info_list_.begin(); it != user_verification_info_list_.end(); + ++it) { + (*user_verification_info)[it->first] = it->second; + } +} + +bool UgiVerification::Delete(const std::string& user_name) { + MutexLock l(&ugi_mutex_); + auto it = user_verification_info_list_.find(user_name); + if (it != user_verification_info_list_.end()) { + VLOG(23) << "DelUgi success [user = " << it->first << ", psswd = " << it->second << "]"; + user_verification_info_list_.erase(it); + } + return true; +} +} +} \ No newline at end of file diff --git a/src/access/verification/ugi_verification.h b/src/access/verification/ugi_verification.h new file mode 100644 index 000000000..e705cc7ca --- /dev/null +++ b/src/access/verification/ugi_verification.h @@ -0,0 +1,33 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "access/verification/verification.h" +#include "common/mutex.h" + +namespace tera { + +namespace auth { + +class UgiVerification : public Verification { + public: + UgiVerification(); + virtual ~UgiVerification() {} + virtual bool Verify(const IdentityInfo& identity_info, RoleList* roles) override; + + virtual bool Update(const std::string& user_name, + const VerificationInfoPtr& verification_info) override; + virtual bool Get(const std::string& user_name, VerificationInfoPtr* verification_info) override; + virtual void GetAll(UserVerificationInfoList* user_verification_info) override; + virtual bool Delete(const std::string& user_name) override; + + private: + UserVerificationInfoList user_verification_info_list_; + mutable Mutex ugi_mutex_; +}; +} +} diff --git a/src/access/verification/verification.h b/src/access/verification/verification.h new file mode 100644 index 000000000..00b3451c9 --- /dev/null +++ b/src/access/verification/verification.h @@ -0,0 +1,38 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include "proto/access_control.pb.h" + +namespace tera { +namespace auth { + +using RoleList = std::set; +using VerificationInfo = std::pair; +using VerificationInfoPtr = std::shared_ptr; +using UserVerificationInfoList = std::map; + +class Verification { + public: + // Verify ideantity + // Return false means to fake ideantity + virtual bool Verify(const IdentityInfo& identity_info, RoleList* roles) = 0; + + // Update verification infos + virtual bool Update(const std::string& user_name, + const VerificationInfoPtr& verification_info_ptr) = 0; + virtual bool Get(const std::string& user_name, VerificationInfoPtr* verification_info_ptr) = 0; + virtual void GetAll(UserVerificationInfoList* user_verification_info) = 0; + + // Delete verification's user + virtual bool Delete(const std::string& user_name) = 0; +}; +} +} diff --git a/src/admincli.cc b/src/admincli.cc new file mode 100644 index 000000000..51dfda2de --- /dev/null +++ b/src/admincli.cc @@ -0,0 +1,1997 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ins_sdk.h" +#include "common/base/string_ext.h" +#include "common/base/string_number.h" +#include "common/base/string_format.h" +#include "common/console/progress_bar.h" +#include "common/file/file_path.h" +#include "common/timer.h" +#include "io/coding.h" +#include "proto/kv_helper.h" +#include "proto/proto_helper.h" +#include "proto/tabletnode.pb.h" +#include "proto/tabletnode_client.h" +#include "proto/table_meta.pb.h" +#include +#include +#include "db/dbformat.h" +#include "db/filename.h" +#include "db/log_reader.h" +#include "db/log_writer.h" +#include "db/version_set.h" +#include "db/version_edit.h" +#include "db/write_batch_internal.h" +#include "leveldb/env.h" +#include "leveldb/env_dfs.h" +#include "leveldb/iterator.h" +#include "leveldb/options.h" +#include "leveldb/status.h" +#include "leveldb/table.h" +#include "sdk/client_impl.h" +#include "sdk/cookie.h" +#include "sdk/sdk_utils.h" +#include "sdk/sdk_zk.h" +#include "sdk/table_impl.h" +#include "master/master_impl.h" +#include "tera.h" +#include "types.h" +#include "leveldb/dfs.h" +#include "util/nfs.h" +#include "util/hdfs.h" +#include "utils/config_utils.h" +#include "utils/crypt.h" +#include "utils/schema_utils.h" +#include "utils/string_util.h" +#include "utils/tprinter.h" +#include "utils/utils_cmd.h" +#include "version.h" + +DECLARE_string(flagfile); +DECLARE_string(log_dir); +DECLARE_string(tera_master_meta_table_name); +DECLARE_string(tera_tabletnode_path_prefix); + +DEFINE_string(meta_cli_token, "", + "Only be executed for the guys who has the token. \ + Please figure out what metacli is before use it."); +DEFINE_bool(readable, true, "readable input"); +DEFINE_bool(make_sure_manual, true, "input Y manual"); + +DECLARE_string(tera_leveldb_env_dfs_type); +DECLARE_string(tera_leveldb_env_nfs_mountpoint); +DECLARE_string(tera_leveldb_env_nfs_conf_path); +DECLARE_string(tera_leveldb_env_hdfs2_nameservice_list); +DECLARE_string(tera_dfs_so_path); +DECLARE_string(tera_dfs_conf); + +using namespace tera; + +#define TABLET_NUM_LEN 8 +#define TABLET_NAME_LEN 14 +#define MANIFEST_LEN 16 + +std::vector g_tables; +// map_bak - backup, map_scan - scan, map_diff - diff +// > map_bak +// > map_diff +// > map_scan +std::map> g_map_bak; +Mutex g_output_lock; +uint32_t g_thread_count = 0; +uint32_t g_diff_count = 0; +leveldb::Dfs* g_dfs = NULL; +leveldb::Env* g_env = NULL; + +class RestoreReporter : public leveldb::log::Reader::Reporter { + std::string s_; + + public: + virtual void Corruption(size_t bytes, const leveldb::Status& s) { s_ = s.ToString(); } +}; + +enum class CliStatus { kOk, kError, kNotFound }; + +using CommandTable = + std::map; + +namespace { +volatile uint64_t g_sequence_id = 0; +const std::string g_metacli_token("2862933555777941757"); +} + +const char* builtin_cmd_list[] = { + "get", + "get \n\ + \n\ + get one tablet meta info in meta table \n\ + inmem \n\ + get one tablet meta info in master memory", + "show", + "show \n\ + [start_key] [end_key] \n\ + show meta info in meta table, support specify KeyRange \n\ + e.g. show \"table1#\\\\x00\'n\\\\x842\" \"table1#\\\\x00K\\\\x85\" \n\ + inmem [start_key] [end_key] \n\ + show meta info in master memory, support specify KeyRange \n\ + e.g. check \"table1#\\\\x00\'n\\\\x842\" \"table1#\\\\x00K\\\\x85\"", + "healthcheck", + "healthcheck \n\ + [start_key] [end_key] \n\ + health check all meta info in meta table, support specify KeyRange \n\ + inmem \n\ + health check all meta info in master memory", + "backup", + "backup \n\ + [filename] \n\ + backup meta info in meta table to file, default to meta.bak with timestamp \n\ + inmem [filename] \n\ + backup meta info in meta table to file, default to inmem_meta.bak with timestamp", + "modify", + "modify \n\ + endkey \n\ + modify the value of key_end \n\ + dest_ts \n\ + modify the host of an tablet, hostname should with port \n\ + e.g. modify test_table \'\\x00abc\' dest_ts yq01.baidu.com:2002", + "delete", + "delete \n\ + delete \n\ + delete the table_name+row_key in meta_table \n\ + e.g. delete test_table1 \'\\x00abc\' ", + "put", + "put \n\ + put \n\ + insert one TabletMeta into meta table \n\ + e.g. put table1/tablet00000019 \'\' \'\\x00abc\' yq01.baidu.com:2002", + "diff", + "diff \n\ + \n\ + scan all the tables and diff with meta backup file \n\ + e.g. diff meta.bak_20180926-20:55:32 \n\ + \n\ + scan the given table and diff with meta backup file \n\ + e.g. diff test1 meta.bak_20180926-20:55:32 \n\ + \n\ + scan the given tablet and diff with meta backup file \n\ + e.g. diff test1/tablet00000001 meta.bak_20180926-20:55:32", + "conv", + "conv \n\ + \n\ + unreadable key convert to readable key", + "ugi", + "ugi \n\ + update \n\ + add/update ugi(user_name&passwd) \n\ + del \n\ + delete ugi \n\ + show \n\ + list ugis", + + "role", + "role \n\ + add \n\ + add a role account \n\ + del \n\ + delete a role account \n\ + grant \n\ + grant the role to user \n\ + revoke \n\ + revoke the role from user \n\ + show \n\ + list roles", + + "auth", + "auth \n\ + set \n\ + auth_policy=none/ugi/giano, which would decide Table how to access. \n\ + show \n\ + list all table=>auth_policy", + + "procedure-limit", + "procedure-limit \n\ + get \n\ + show the current limit of all procedures \n\ + set \n\ + procedure = [kMerge, kSplit, kMove, kLoad, kUnload] \n\ + limit shoud be a non-negative number", + + "help", + "help [cmd] \n\ + show manual for a or all cmd(s)", + + "version", + "version \n\ + show version info", + + "dfs-throughput-limit", + "dfs-throughput-limit [args] \n\ + get Get current dfs hard limit info \n\ + write \n\ + read \n\ + limit_value: [limit bytes|'reset'] cluster dfs hard limit in bytes or reset limit.", +}; + +static void PrintCmdHelpInfo(const char* msg) { + if (msg == NULL) { + return; + } + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (strncmp(msg, builtin_cmd_list[i], 32) == 0) { + std::cout << builtin_cmd_list[i + 1] << std::endl; + return; + } + } +} + +bool ParseCommand(int argc, char** arg_list, std::vector* parsed_arg_list) { + for (int i = 0; i < argc; i++) { + std::string parsed_arg = arg_list[i]; + if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { + std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; + return false; + } + parsed_arg_list->push_back(parsed_arg); + } + return true; +} + +static void PrintCmdHelpInfo(const std::string& msg) { PrintCmdHelpInfo(msg.c_str()); } + +static CommandTable& GetCommandTable() { + static CommandTable command_table; + return command_table; +} + +static void PrintAllCmd() { + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(builtin_cmd_list) / sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i += 2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; + } + } + + std::cout << std::endl + << "help [cmd] for details." << std::endl; +} + +CliStatus HelpOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { + if (argc == 2) { + PrintAllCmd(); + } else if (argc == 3) { + PrintCmdHelpInfo(argv[2]); + } else { + PrintCmdHelpInfo("help"); + } + return CliStatus::kOk; +} + +CliStatus HelpOp(int32_t argc, char** argv) { + std::vector argv_svec(argv, argv + argc); + return HelpOp(NULL, argc, &argv_svec[0], NULL); +} + +CliStatus CheckAndParseDebugString(const std::string& debug_str, std::string* raw_str) { + if (FLAGS_readable) { + raw_str->clear(); + if (!ParseDebugString(debug_str, raw_str)) { + LOG(ERROR) << "invalid debug format: " << debug_str; + return CliStatus::kError; + } + } else { + *raw_str = debug_str; + } + return CliStatus::kOk; +} + +static void PrintMetaInfo(const TabletMeta* meta) { + std::cout << "tablet: " << meta->table_name() << " [" << meta->key_range().key_start() << " (" + << DebugString(meta->key_range().key_start()) << "), " << meta->key_range().key_end() + << " (" << DebugString(meta->key_range().key_end()) << ")], " << meta->path() << ", " + << meta->server_addr() << ", " << meta->size() << ", " + << StatusCodeToString(meta->status()) << ", " + << StatusCodeToString(meta->compact_status()) << std::endl; +} + +CliStatus GetMetaValue(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& table_name, const std::string& start_key, + TableMeta* table_meta, TabletMeta* tablet_meta) { + tabletnode::TabletNodeClient read_meta_client(thread_pool, meta_server); + ReadTabletRequest read_request; + ReadTabletResponse read_response; + read_request.set_sequence_id(g_sequence_id++); + read_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + RowReaderInfo* row_info = read_request.add_row_info_list(); + MakeMetaTableKey(table_name, start_key, row_info->mutable_key()); + if (!read_meta_client.ReadTablet(&read_request, &read_response)) { + std::cout << "read tablet failed" << std::endl; + return CliStatus::kError; + } + StatusCode err = read_response.status(); + if (err != tera::kTabletNodeOk) { + std::cerr << "Read meta table response not kTabletNodeOk!"; + return CliStatus::kError; + } + if (read_response.detail().row_result_size() <= 0 || + read_response.detail().row_result(0).key_values_size() <= 0) { + std::cout << "Couldn't read table[" << table_name << "] start_key[" << start_key + << "], suitable for put tablet_meta" << std::endl; + return CliStatus::kNotFound; + } + const KeyValuePair& record = read_response.detail().row_result(0).key_values(0); + char first_key_char = record.key()[0]; + if (first_key_char == '~') { + std::cout << "(user: " << record.key().substr(1) << ")" << std::endl; + } else if (first_key_char == '|') { + // user&passwd&role&permission + } else if (first_key_char == '@') { + ParseMetaTableKeyValue(record.key(), record.value(), table_meta); + std::cout << "ok, you find a table meta info" << std::endl; + } else if (first_key_char > '@') { + ParseMetaTableKeyValue(record.key(), record.value(), tablet_meta); + } else { + std::cerr << "invalid record: " << record.key(); + } + + if (first_key_char <= '@') { + std::cout << "couldn't find tablet meta" << std::endl; + return CliStatus::kNotFound; + } + return CliStatus::kOk; +} + +bool Confirm() { + std::cout << "[Y/N] "; + std::string ensure; + if (!std::getline(std::cin, ensure)) { + std::cout << "Get input error" << std::endl; + return false; + } + if (ensure != "Y") { + return false; + } + return true; +} + +CliStatus GetMeta(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& table_name, const std::string& start_key) { + TabletMeta tablet_meta; + TableMeta table_meta; + if (GetMetaValue(meta_server, thread_pool, table_name, start_key, &table_meta, &tablet_meta) != + CliStatus::kOk) { + std::cout << "wrong tablet input" << std::endl; + return CliStatus::kError; + } + PrintMetaInfo(&tablet_meta); + return CliStatus::kOk; +} + +CliStatus DeleteMetaTablet(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& table_name, const std::string& start_key) { + TabletMeta tablet_meta; + TableMeta table_meta; + if (GetMetaValue(meta_server, thread_pool, table_name, start_key, &table_meta, &tablet_meta) != + CliStatus::kOk) { + std::cout << "wrong tablet input" << std::endl; + return CliStatus::kError; + } + tabletnode::TabletNodeClient write_meta_client(thread_pool, meta_server); + WriteTabletRequest write_request; + WriteTabletResponse write_response; + write_request.set_sequence_id(g_sequence_id++); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + RowMutationSequence* mu_seq = write_request.add_row_list(); + + std::cout << "Are you sure delete the tablet meta info?" << std::endl; + PrintMetaInfo(&tablet_meta); + if (FLAGS_make_sure_manual && !Confirm()) { + return CliStatus::kError; + } + + std::string row_key; + MakeMetaTableKey(table_name, start_key, &row_key); + mu_seq->set_row_key(row_key); + tera::Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kDeleteRow); + mutation->set_timestamp(kLatestTimestamp); + if (!write_meta_client.WriteTablet(&write_request, &write_response)) { + std::cout << "write tablet failed" << std::endl; + return CliStatus::kError; + } + StatusCode err = write_response.status(); + if (err != tera::kTabletNodeOk) { + std::cerr << "Write meta table response not kTabletNodeOk!"; + return CliStatus::kError; + } + return CliStatus::kOk; +} + +CliStatus ModifyMetaValue(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& table_name, const std::string& start_key, + const std::string& type, const std::string& value) { + TabletMeta tablet_meta; + TableMeta table_meta; + if (GetMetaValue(meta_server, thread_pool, table_name, start_key, &table_meta, &tablet_meta) != + CliStatus::kOk) { + std::cout << "wrong tablet input" << std::endl; + return CliStatus::kError; + } + + tabletnode::TabletNodeClient write_meta_client(thread_pool, meta_server); + WriteTabletRequest write_request; + WriteTabletResponse write_response; + write_request.set_sequence_id(g_sequence_id++); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + RowMutationSequence* mu_seq = write_request.add_row_list(); + + if (type == "endkey") { + std::string end_key = value; + std::cout << "Are you sure modify key_end?" << std::endl; + std::cout << "[" << tablet_meta.key_range().key_start() << " (" + << DebugString(tablet_meta.key_range().key_start()) << "), " + << tablet_meta.key_range().key_end() << " (" + << DebugString(tablet_meta.key_range().key_end()) << ")] => "; + tera::KeyRange* key_range = new tera::KeyRange(); + key_range->set_key_start(tablet_meta.key_range().key_start()); + key_range->set_key_end(end_key); + + tablet_meta.clear_key_range(); + tablet_meta.set_allocated_key_range(key_range); + std::cout << "[" << tablet_meta.key_range().key_start() << " (" + << DebugString(tablet_meta.key_range().key_start()) << "), " + << tablet_meta.key_range().key_end() << " (" + << DebugString(tablet_meta.key_range().key_end()) << ")]" << std::endl; + } else { + std::string host = value; + std::cout << "[" << tablet_meta.key_range().key_start() << "(" + << DebugString(tablet_meta.key_range().key_start()) << "), " + << tablet_meta.key_range().key_end() << "(" + << DebugString(tablet_meta.key_range().key_end()) << ")]" << std::endl; + tablet_meta.set_server_addr(host); + } + + if (FLAGS_make_sure_manual && !Confirm()) { + return CliStatus::kError; + } + + std::string row_key; + MakeMetaTableKey(table_name, start_key, &row_key); + mu_seq->set_row_key(row_key); + tera::Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + + std::string modify_value; + MakeMetaTableValue(tablet_meta, &modify_value); + mutation->set_value(modify_value); + mutation->set_timestamp(kLatestTimestamp); + + if (!write_meta_client.WriteTablet(&write_request, &write_response)) { + std::cout << "write tablet failed" << std::endl; + return CliStatus::kError; + } + StatusCode err = write_response.status(); + if (err != tera::kTabletNodeOk) { + std::cerr << "Write meta table response not kTabletNodeOk!"; + return CliStatus::kError; + } + return CliStatus::kOk; +} + +CliStatus PutTabletMeta(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& table_name, const std::string& start_key, + const std::string& end_key, const std::string& tablet_path, + const std::string& server_addr) { + TabletMeta tablet_meta; + TableMeta table_meta; + if (CliStatus::kNotFound != + GetMetaValue(meta_server, thread_pool, table_name, start_key, &table_meta, &tablet_meta)) { + std::cout << "The table#start_key[" << table_name << "#" << DebugString(start_key) + << "] has exist, wrong tablet input" << std::endl; + return CliStatus::kError; + } + + tabletnode::TabletNodeClient write_meta_client(thread_pool, meta_server); + WriteTabletRequest write_request; + WriteTabletResponse write_response; + write_request.set_sequence_id(g_sequence_id++); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + RowMutationSequence* mu_seq = write_request.add_row_list(); + + tablet_meta.set_table_name(table_name); + tablet_meta.set_path(tablet_path); + tablet_meta.set_server_addr(server_addr); + tablet_meta.set_status(TabletMeta::kTabletOffline); + tablet_meta.set_size(1000); + tablet_meta.add_lg_size(1000); + + tera::KeyRange* key_range = new tera::KeyRange(); + key_range->set_key_start(start_key); + key_range->set_key_end(end_key); + tablet_meta.set_allocated_key_range(key_range); + + std::string row_key; + MakeMetaTableKey(table_name, start_key, &row_key); + mu_seq->set_row_key(row_key); + tera::Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + + std::string tablet_meta_value; + MakeMetaTableValue(tablet_meta, &tablet_meta_value); + mutation->set_value(tablet_meta_value); + mutation->set_timestamp(kLatestTimestamp); + + if (!write_meta_client.WriteTablet(&write_request, &write_response)) { + std::cout << "write tablet failed" << std::endl; + return CliStatus::kError; + } + StatusCode err = write_response.status(); + if (err != tera::kTabletNodeOk) { + std::cerr << "Write meta table response not kTabletNodeOk!"; + return CliStatus::kError; + } + + return CliStatus::kOk; +} + +void WriteToStream(std::ofstream& ofs, const std::string& key, const std::string& value) { + uint32_t key_size = key.size(); + uint32_t value_size = value.size(); + ofs.write((char*)&key_size, sizeof(key_size)); + ofs.write(key.data(), key_size); + ofs.write((char*)&value_size, sizeof(value_size)); + ofs.write(value.data(), value_size); +} + +void WriteTable(const TableMeta& meta, std::ofstream& ofs) { + std::string key, value; + MakeMetaTableKeyValue(meta, &key, &value); + WriteToStream(ofs, key, value); +} + +void WriteTablet(const TabletMeta& meta, std::ofstream& ofs) { + std::string key, value; + MakeMetaTableKeyValue(meta, &key, &value); + WriteToStream(ofs, key, value); +} + +CliStatus ProcessMeta(const std::string& op, const TableMetaList& table_list, + const TabletMetaList& tablet_list, const std::string& start_key, + const std::string& end_key, const std::string& filename) { + int32_t table_num = table_list.meta_size(); + int32_t tablet_num = tablet_list.meta_size(); + if (table_num == 0 && tablet_num == 0) { + std::cout << "meta table is empty" << std::endl; + return CliStatus::kOk; + } + + std::ofstream bak; + if (op == "backup") { + bak.open(filename, std::ofstream::trunc | std::ofstream::binary); + } + + for (int32_t i = 0; i < table_num; ++i) { + const tera::TableMeta& meta = table_list.meta(i); + if (op == "show") { + std::cout << "table: " << meta.table_name() << std::endl; + int32_t lg_size = meta.schema().locality_groups_size(); + for (int32_t lg_id = 0; lg_id < lg_size; lg_id++) { + const tera::LocalityGroupSchema& lg = meta.schema().locality_groups(lg_id); + std::cout << " lg" << lg_id << ": " << lg.name() << " (" << lg.store_type() << ", " + << lg.compress_type() << ", " << lg.block_size() << ")" << std::endl; + } + int32_t cf_size = meta.schema().column_families_size(); + for (int32_t cf_id = 0; cf_id < cf_size; cf_id++) { + const tera::ColumnFamilySchema& cf = meta.schema().column_families(cf_id); + std::cout << " cf" << cf_id << ": " << cf.name() << " (" << cf.locality_group() << ", " + << cf.type() << ", " << cf.max_versions() << ", " << cf.time_to_live() << ")" + << std::endl; + } + } + if (op == "backup") { + WriteTable(meta, bak); + } + } + + tera::TabletMeta last; + bool table_start = true; + for (int32_t i = 0; i < tablet_num; ++i) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if (op == "show") { + std::string internal_startkey = meta.table_name() + "#" + meta.key_range().key_start(); + std::string internal_endkey = meta.table_name() + "#" + meta.key_range().key_end(); + if ((start_key == "" && end_key == "") || + (start_key <= internal_startkey && + (internal_startkey <= end_key || meta.table_name() + "#" == end_key))) { + std::cout << "tablet: " << meta.table_name() << " [" + << DebugString(meta.key_range().key_start()) << "," + << DebugString(meta.key_range().key_end()) << "], " << meta.path() << ", " + << meta.server_addr() << ", " << meta.size() << ", " + << StatusCodeToString(meta.status()) << ", " + << StatusCodeToString(meta.compact_status()) << std::endl; + } + } + if (op == "backup") { + WriteTablet(meta, bak); + } + // check self range + if (!meta.key_range().key_end().empty() && + meta.key_range().key_start() >= meta.key_range().key_end()) { + std::cerr << "invalid tablet " << meta.table_name() << " [" + << DebugString(meta.key_range().key_start()) << "," + << DebugString(meta.key_range().key_end()) << "], " << meta.path() << ", " + << meta.server_addr() << ", " << meta.size() << ", " + << StatusCodeToString(meta.status()) << ", " + << StatusCodeToString(meta.compact_status()) << std::endl; + // ignore invalid tablet + continue; + } + + bool covered = false; + // check miss/cover/overlap with previous tablet + if (!table_start) { + assert(!last.key_range().key_end().empty()); + if (meta.table_name() != last.table_name()) { + std::cerr << "miss tablet: " << last.table_name() << " path " << last.path() << " [" + << DebugString(last.key_range().key_end()) << ",-]" << std::endl; + table_start = true; + } else if (meta.key_range().key_start() > last.key_range().key_end()) { + std::cerr << "miss tablet " << last.table_name() << " last path " << last.path() + << " curr path " << meta.path() << " [" << DebugString(last.key_range().key_end()) + << "," << DebugString(meta.key_range().key_start()) << "]" << std::endl; + } else if (meta.key_range().key_start() == last.key_range().key_end()) { + } else if (!meta.key_range().key_end().empty() && + meta.key_range().key_end() <= last.key_range().key_end()) { + std::cerr << "tablet " << meta.table_name() << " path " << meta.path() << " [" + << DebugString(meta.key_range().key_start()) << "," + << DebugString(meta.key_range().key_end()) << "] is coverd by tablet " + << last.table_name() << " path " << last.path() << " [" + << DebugString(last.key_range().key_start()) << "," + << DebugString(last.key_range().key_end()) << "]" << std::endl; + covered = true; + } else { + std::cerr << "tablet " << meta.table_name() << " path " << meta.path() << " [" + << DebugString(meta.key_range().key_start()) << "," + << DebugString(meta.key_range().key_end()) << "] overlap with tablet " + << last.table_name() << " path " << last.path() << " [" + << DebugString(last.key_range().key_start()) << "," + << DebugString(last.key_range().key_end()) << "]" << std::endl; + } + } + if (table_start) { + if (meta.table_name() == last.table_name()) { + std::cerr << "tablet " << meta.table_name() << " path " << meta.path() << " [" + << DebugString(meta.key_range().key_start()) << "," + << DebugString(meta.key_range().key_end()) << "] is coverd by tablet " + << last.table_name() << " path " << last.path() << " [" + << DebugString(last.key_range().key_start()) << "," + << DebugString(last.key_range().key_end()) << "]" << std::endl; + covered = true; + } else { + if (!meta.key_range().key_start().empty()) { + std::cerr << "Please check the whole KeyRange, maybe miss tablet " << meta.table_name() + << " path " << meta.path() << " [-," + << DebugString(meta.key_range().key_start()) << "]" << std::endl; + } + } + } + // ignore covered tablet + if (!covered) { + last.CopyFrom(meta); + table_start = meta.key_range().key_end().empty(); + } + } + if (op == "backup") { + bak.close(); + } + return CliStatus::kOk; +} + +bool ReadMetaFromStream(std::ifstream& ifs, std::string* key, std::string* value) { + uint32_t key_size = 0, value_size = 0; + ifs.read((char*)&key_size, sizeof(key_size)); + if (ifs.eof() && ifs.gcount() == 0) { + key->clear(); + value->clear(); + return true; + } + key->resize(key_size); + ifs.read((char*)key->data(), key_size); + if (ifs.fail()) { + return false; + } + ifs.read((char*)&value_size, sizeof(value_size)); + if (ifs.fail()) { + return false; + } + value->resize(value_size); + ifs.read((char*)value->data(), value_size); + if (ifs.fail()) { + return false; + } + return true; +} + +int ReadMetaTabletFromFile(const std::string& filename) { + std::ifstream ifs(filename.c_str(), std::ofstream::binary); + if (!ifs.is_open()) { + LOG(INFO) << "fail to open file " << filename << " for read"; + return -1; + } + + std::string key, value; + TabletMeta meta; + std::pair keyrange_bak; + std::string path_bak; + std::string startkey_bak; + std::string endkey_bak; + + while (ReadMetaFromStream(ifs, &key, &value)) { + if (key.empty()) { + return 0; + } + char first_key_char = key[0]; + if (first_key_char == '~' || first_key_char == '@' || first_key_char == '|') { + continue; + } else if (first_key_char > '@') { + ParseMetaTableKeyValue(key, value, &meta); + + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + LOG(INFO) << "ignore meta tablet record in meta table"; + } else { + path_bak = meta.path(); + startkey_bak = meta.key_range().key_start(); + endkey_bak = meta.key_range().key_end(); + + auto it = g_map_bak.find(path_bak); + if (it == g_map_bak.end()) { + keyrange_bak = make_pair(startkey_bak, endkey_bak); + g_map_bak.insert(make_pair(path_bak, keyrange_bak)); + LOG(INFO) << "read from meta bak file: path" << path_bak << "start" << startkey_bak + << "end" << endkey_bak; + } + } + } + } + + ifs.close(); + LOG(INFO) << "restore meta tablet from meta bak file succ"; + return 0; +} + +int DfsListDir(const std::string& dir_name, std::vector* paths) { + struct stat fstat; + if (0 == g_dfs->Stat(dir_name.c_str(), &fstat) && !(S_IFDIR & fstat.st_mode)) { + LOG(INFO) << "stat dir:" << dir_name << "fail"; + return 0; + } + + if (0 != g_dfs->ListDirectory(dir_name.c_str(), paths)) { + LOG(INFO) << "list dir:" << dir_name << "fail"; + return -1; + } + + return 0; +} + +int DfsGetManifest(const std::string& manifest_path, std::string* start_key, std::string* end_key) { + if (manifest_path == "") { + LOG(INFO) << "fail, Invalid arguments"; + return -1; + } + leveldb::SequentialFile* file; + leveldb::Status s = g_env->NewSequentialFile(manifest_path, &file); + if (!s.ok()) { + LOG(INFO) << "open fail:" << manifest_path << "status: " << s.ToString(); + return -1; + } + + RestoreReporter reporter; + leveldb::log::Reader reader(file, &reporter, true, 0); + // just read the first record + leveldb::Slice record; + std::string scratch; + while (reader.ReadRecord(&record, &scratch)) { + leveldb::VersionEdit edit; + s = edit.DecodeFrom(record); + if (!s.ok()) { + LOG(INFO) << "fail, decode record status:" << s.ToString().c_str(); + } else { + if (edit.HasStartKey() && edit.HasEndKey()) { + *start_key = edit.GetStartKey().c_str(); + *end_key = edit.GetEndKey().c_str(); + break; + } + } + } + delete file; + return 0; +} + +int DfsGetCurrent(const std::string& current_path, std::string* content) { + if (current_path == "") { + LOG(INFO) << "fail, Invalid arguments"; + return -1; + } + + leveldb::DfsFile* file = g_dfs->OpenFile(current_path, leveldb::RDONLY); + if (NULL == file) { + LOG(INFO) << "fail, open current path(" << current_path.c_str() << ") fail"; + return errno; + } + + // MANIFEST-000000, just 15 char + char buf[MANIFEST_LEN] = {0}; + ssize_t ret_size = 0; + ret_size = file->Read(buf, sizeof(buf)); + if (ret_size > 0) { + buf[MANIFEST_LEN - 1] = '\0'; + *content = buf; + } else { + *content = ""; + LOG(INFO) << "fail, read current fail"; + } + + file->CloseFile(); + return 0; +} + +int MapOutPutDiff(std::map>* map_diff) { + // output mutex + MutexLock locker(&g_output_lock); + + std::ofstream ofs("meta.diff", std::ofstream::app); + if (!ofs.is_open()) { + LOG(INFO) << "output diff open file (" + << "meta.diff" + << ") fail"; + return -1; + } + + auto it = map_diff->begin(); + while (it != map_diff->end()) { + g_diff_count++; + // 4 -- keyrange diff, else path diff + if (it->second.size() == 4) { + ofs << "tablet keyrange err: " << it->first; + ofs << "[" << DebugString(it->second[0]) << "," << DebugString(it->second[1]) << "]"; + ofs << "---[" << DebugString(it->second[2]) << "," << DebugString(it->second[3]) << "]"; + } else { + ofs << "tablet miss: " << it->first; + ofs << "[" << DebugString(it->second[0]) << "," << DebugString(it->second[1]) << "]"; + } + ofs << "\n"; + ++it; + } + ofs.close(); + return 0; +} + +uint64_t GetTabletNumFromName(const std::string& tabletname) { + if (tabletname.size() != TABLET_NAME_LEN || tabletname.substr(0, 6).compare("tablet") != 0) { + return 0; + } + std::string num = tabletname.substr(6, TABLET_NUM_LEN); + + uint64_t v = 0; + uint32_t i = 0; + for (; i < num.size(); i++) { + char c = num[i]; + if (c >= '0' && c <= '9') { + const int delta = (c - '0'); + static const uint64_t kMaxUint64 = ~static_cast(0); + if (v > kMaxUint64 / 10 || + (v == kMaxUint64 / 10 && static_cast(delta) > kMaxUint64 % 10)) { + // Overflow + return false; + } + v = (v * 10) + delta; + } else { + break; + } + } + + return v; +} + +void CompareToDiff(std::map>& map_scan, + std::map>* map_diff) { + std::vector keyrange_diff; + // input: > map_bak + // input: > map_scan + auto it_scan = map_scan.begin(); + + while (it_scan != map_scan.end()) { + auto it_bak = g_map_bak.find(it_scan->second.second); + if (it_bak != g_map_bak.end()) { + if (it_scan->first != it_bak->second.first || + it_scan->second.first != it_bak->second.second) { + // output: keyrange err -- > + // map_diff + keyrange_diff.clear(); + // scan keyrange + keyrange_diff.push_back(it_scan->first); + keyrange_diff.push_back(it_scan->second.first); + // bak keyrange + keyrange_diff.push_back(it_bak->second.first); + keyrange_diff.push_back(it_bak->second.second); + + map_diff->insert(make_pair(it_scan->second.second, keyrange_diff)); + } + } else { + // output: tablet err -- > map_diff + keyrange_diff.clear(); + keyrange_diff.push_back(it_scan->first); + keyrange_diff.push_back(it_scan->second.first); + map_diff->insert(make_pair(it_scan->second.second, keyrange_diff)); + } + ++it_scan; + } +} + +void FixTabletOverlap(std::map>& map_scan) { + auto it = map_scan.begin(); + while (it != map_scan.end()) { + // it_pre->first always < it->first + auto it_pre = it; + ++it; + if (it != map_scan.end()) { + /************* remove the overlap ************ + * first, second.first, it->second.second>> + * > + * + * it_pre |------------) + * it |----) + */ + if (it->first < it_pre->second.first) { + LOG(INFO) << "overlap --" + << " tablet path1:" << it_pre->second.second << "startkey1" << it_pre->first + << "endkey1" << it_pre->second.first << "tablet path2" << it->second.second + << "startkey2" << it->first << "endkey2" << it->second.first; + if (it->second.second > it_pre->second.second) { + map_scan.erase(it_pre->first); + } else { + map_scan.erase(it->first); + } + } + } + } +} + +std::string GetTabletName(const std::vector& fail_tablet, uint32_t& index, + uint64_t& tablet_num) { + std::string tablet_name; + std::string tablet_str; + if (index < fail_tablet.size()) { // reuse abnormal tablet num + tablet_name = fail_tablet[index]; + index++; + } else { + tablet_num++; + tablet_str = std::to_string(tablet_num); + // add 0 before num to TABLET_NUM_LEN + int i, gap = TABLET_NUM_LEN - tablet_str.size(); + for (i = 0; i < gap; i++) { + tablet_str = "0" + tablet_str; + } + tablet_name = "tablet" + tablet_str; + } + return tablet_name; +} + +void FixTabletGap(std::map>& map_scan, + const std::vector& fail_tablet, const std::string& table_name, + uint64_t tablet_num) { + uint32_t index = 0; + bool table_start = true; + std::string path_gap; + std::pair keyrange_scan; + + auto it = map_scan.begin(); + + if (map_scan.size() == 1) { + // start not - + if (!it->first.empty()) { + path_gap = table_name + "/" + GetTabletName(fail_tablet, index, tablet_num); + keyrange_scan = make_pair(it->first, path_gap); + map_scan.insert(make_pair("", keyrange_scan)); + } + // end not - + if (!it->second.first.empty()) { + path_gap = table_name + "/" + GetTabletName(fail_tablet, index, tablet_num); + keyrange_scan = make_pair("", path_gap); + map_scan.insert(make_pair(it->second.first, keyrange_scan)); + } + return; + } + + while (it != map_scan.end()) { + // it_pre->first always < it->first + auto it_pre = it; + ++it; + if (it != map_scan.end()) { + if (table_start) { // start not - + table_start = false; + if (!it_pre->first.empty()) { + path_gap = table_name + "/" + GetTabletName(fail_tablet, index, tablet_num); + keyrange_scan = make_pair(it_pre->first, path_gap); + map_scan.insert(make_pair("", keyrange_scan)); + } + } + /************* fix the gap ******************* + * first, second.first, it->second.second>> + * > + * + * it_pre |------) + * it |------) + * ---------------------------------- + * new |---------) + */ + if (it->first > it_pre->second.first) { + path_gap = table_name + "/" + GetTabletName(fail_tablet, index, tablet_num); + keyrange_scan = make_pair(it->first, path_gap); + map_scan.insert(make_pair(it_pre->second.first, keyrange_scan)); + + LOG(INFO) << "gap - path:" << path_gap << "startkey" << it_pre->second.first << "endkey" + << it->first; + } + } else { // end not - + if (!it_pre->second.first.empty()) { + path_gap = table_name + "/" + GetTabletName(fail_tablet, index, tablet_num); + keyrange_scan = make_pair("", path_gap); + map_scan.insert(make_pair(it_pre->second.first, keyrange_scan)); + } + } + } + return; +} + +int GetTabletKeyRange(const std::string& prefix_path, const std::string& table_name, + const std::string& tablet_name, std::string& startkey, std::string& endkey) { + char full_path[4096] = {0}; + std::string manifest_name; + + // read current, eg: prefix/table/tablet/0/CURRENT + snprintf(full_path, sizeof(full_path), "%s/%s/%s/0/CURRENT", prefix_path.c_str(), + table_name.c_str(), tablet_name.c_str()); + int ret = DfsGetCurrent(full_path, &manifest_name); + if (0 != ret) { + LOG(INFO) << "get current(" << full_path << ") fail"; + return ret; + } + + // read manifest, prefix/table/tablet/0/MANIFEST-* + snprintf(full_path, sizeof(full_path), "%s/%s/%s/0/%s", prefix_path.c_str(), table_name.c_str(), + tablet_name.c_str(), manifest_name.c_str()); + ret = DfsGetManifest(full_path, &startkey, &endkey); + if (0 != ret) { + LOG(INFO) << "get manifest(" << full_path << ") fail"; + return ret; + } + + LOG(INFO) << "get manifest(" << full_path << ") succ" + << "table(" << table_name << ")" + << "tablet(" << tablet_name << ")" + << "startkey(" << startkey << ")" + << "endkey(" << endkey << ")"; + return 0; +} + +// get tablet key range +int ScanTabletMeta(const std::string& table_name, const std::string& prefix_path, + std::map>* map_scan, + std::vector* fail_tablet, uint64_t* tablet_num) { + std::vector tablets; + char table_path[512] = {0}; + + snprintf(table_path, sizeof(table_path), "%s/%s", prefix_path.c_str(), table_name.c_str()); + int ret = DfsListDir(table_path, &tablets); + if (0 != ret) { + LOG(INFO) << "get table path fail:" << table_path; + return ret; + } + *tablet_num = GetTabletNumFromName(tablets[tablets.size() - 1]); + LOG(INFO) << "table:" << table_name << "tablet count" << tablets.size() + << "largest tablet_num:" << *tablet_num; + + std::string startkey_scan; + std::string endkey_scan; + std::pair keyrange_scan; + + for (size_t i = 0; i < tablets.size(); i++) { + ret = GetTabletKeyRange(prefix_path, table_name, tablets[i], startkey_scan, endkey_scan); + if (0 != ret) { + LOG(INFO) << "fail table:" << table_name << "tablet:" << tablets[i]; + fail_tablet->push_back(tablets[i]); + continue; + } + // add to tablet scan map + keyrange_scan = make_pair(endkey_scan, table_name + "/" + tablets[i]); + + auto it = map_scan->find(startkey_scan); + if (it == map_scan->end()) { + map_scan->insert(make_pair(startkey_scan, keyrange_scan)); + } else { + // the same startkey, save the tablet num bigger one + LOG(INFO) << "start key overlap : 1." << tablets[i] << "2." << it->second.second; + if (keyrange_scan.second > it->second.second) { + map_scan->erase(it->first); + map_scan->insert(make_pair(startkey_scan, keyrange_scan)); + } + } + } + + LOG(INFO) << "table:" << table_name << "abnormal count" << fail_tablet->size(); + return 0; +} + +// get tablet key range +int ScanAndDiff(const std::string& table_name, const std::string& prefix_path) { + std::map> map_scan; + std::vector fail_tablet; + uint64_t tablet_num; + + int ret = ScanTabletMeta(table_name, prefix_path, &map_scan, &fail_tablet, &tablet_num); + if (0 != ret || map_scan.size() == 0) { + g_thread_count--; + return 0; + } + + FixTabletOverlap(map_scan); + FixTabletGap(map_scan, fail_tablet, table_name, tablet_num); + + // compare the scan meta with the read meta + // > map_diffiff + std::map> map_diff; + CompareToDiff(map_scan, &map_diff); + + // output diff + ret = MapOutPutDiff(&map_diff); + if (0 != ret) { + LOG(ERROR) << "output diff of table(" << table_name << ") fail"; + } + + g_thread_count--; + return 0; +} + +int InitDfsClient() { + if (g_dfs != NULL) { + return 0; + } + if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { + if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { + LOG(INFO) << "init nfs system: use configure file" << FLAGS_tera_leveldb_env_nfs_conf_path; + leveldb::Nfs::Init(FLAGS_tera_leveldb_env_nfs_mountpoint, + FLAGS_tera_leveldb_env_nfs_conf_path); + g_dfs = leveldb::Nfs::GetInstance(); + } else { + LOG(ERROR) << "init nfs system: no configure file found"; + return -1; + } + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { + LOG(INFO) << "init hdfs2 file system"; + g_dfs = new leveldb::Hdfs2(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { + g_dfs = new leveldb::Hdfs(); + } else { + LOG(INFO) << "init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; + g_dfs = leveldb::Dfs::NewDfs(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + } + + if (g_dfs == NULL) { + return -1; + } + return 0; +} + +CliStatus AllDiff(const std::string& prefix_path) { + int ret = DfsListDir(prefix_path, &g_tables); + if (0 != ret) { + LOG(ERROR) << "all check fail, get table path fail"; + return CliStatus::kError; + } + + if (g_tables.size() > 0) { + // scan tablet meta and compare, thread pool parallel + g_thread_count = g_tables.size(); + const std::string gc_table("#trackable_gc_trash"); + const std::string trash_table("#trash"); + const std::string meta_table("meta"); + const std::string stat_table("stat_table"); + + ThreadPool thread_pool(g_tables.size()); + for (size_t i = 0; i < g_tables.size(); ++i) { + LOG(INFO) << "table is:(" << g_tables[i] << ")"; + if (g_tables[i] == gc_table || g_tables[i] == trash_table || g_tables[i] == meta_table || + g_tables[i] == stat_table) { + g_thread_count--; + continue; + } + ThreadPool::Task task = std::bind(&ScanAndDiff, g_tables[i], prefix_path); + thread_pool.AddTask(task); + } + + while (g_thread_count > 0) { + LOG(INFO) << get_time_str(time(NULL)) << " " << g_thread_count << "scan and diff ......"; + sleep(5); + } + thread_pool.Stop(true); + } + + if (g_diff_count == 0) { + std::cout << "tables no diff" << std::endl; + } else { + std::cout << "table diff num:" << g_diff_count << ", check the details in ./meta.diff" + << std::endl; + } + return CliStatus::kOk; +} + +CliStatus TableDiff(const std::string& prefix_path, const std::string& table_name) { + int ret = ScanAndDiff(table_name, prefix_path); + if (ret != 0) { + return CliStatus::kError; + } + + if (g_diff_count == 0) { + std::cout << "table:" << table_name << " no diff:" << std::endl; + } else { + std::cout << "table:" << table_name << "diff num:" << g_diff_count + << ", check the details in ./meta.diff" << std::endl; + } + return CliStatus::kOk; +} + +CliStatus TabletDiff(const std::string& prefix_path, const std::string& table_name, + const std::string& tablet_name) { + std::string startkey; + std::string endkey; + std::string path; + std::vector record_diff; + + int ret = GetTabletKeyRange(prefix_path, table_name, tablet_name, startkey, endkey); + if (ret != 0) { + LOG(ERROR) << "tablet check fail:" << table_name << "/" << tablet_name; + return CliStatus::kError; + } + + path = table_name + "/" + tablet_name; + auto it = g_map_bak.find(path); + if (it != g_map_bak.end()) { + if (startkey != it->second.first || endkey != it->second.second) { + // find but keyrange not match + // scan keyrange [startkey,endkey] <--> bak keyrange [it->second.first, it->second.second] + record_diff.push_back(it->second.first); + record_diff.push_back(it->second.second); + std::cout << "tablet keyrange not match:" << path << " [" << startkey << "(" + << DebugString(startkey) << ")," << endkey << "(" << DebugString(endkey) << ") ]" + << "<---> [" << it->second.first << "(" << DebugString(it->second.first) << ")," + << it->second.second << "(" << DebugString(it->second.second) << ") ]" << std::endl; + } else { + std::cout << "tablet no diff:" << table_name << "/" << tablet_name << " [" << startkey << "(" + << DebugString(startkey) << ")," << endkey << "(" << DebugString(endkey) << ") ]" + << std::endl; + } + } else { + // not find + std::cout << "tablet miss: " << path << " [" << startkey << "(" << DebugString(startkey) << ")," + << endkey << "(" << DebugString(endkey) << ") ]" << std::endl; + } + return CliStatus::kOk; +} + +CliStatus MetaInternalOp(const std::string& meta_server, common::ThreadPool* thread_pool, + const std::string& op, const std::string& start_key, + const std::string& end_key, const std::string& filename) { + tabletnode::TabletNodeClient meta_client(thread_pool, meta_server); + TableMeta table_meta; + TableSchema table_schema; + TableMetaList table_list; + TabletMetaList tablet_list; + ScanTabletRequest request; + ScanTabletResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(start_key); + request.set_end(end_key); + while (meta_client.ScanTablet(&request, &response)) { + StatusCode err = response.status(); + if (err != tera::kTabletNodeOk) { + std::cerr << "Read meta table response not kTabletNodeOk!"; + return CliStatus::kError; + } + + int32_t record_size = response.results().key_values_size(); + if (record_size <= 0) { + std::cout << "scan meta table success" << std::endl; + break; + } + std::cout << "recode size = " << record_size << std::endl; + std::string last_record_key; + for (int i = 0; i < record_size; ++i) { + const tera::KeyValuePair& record = response.results().key_values(i); + last_record_key = record.key(); + char first_key_char = record.key()[0]; + if (first_key_char == '~') { + std::cout << "(user: " << record.key().substr(1) << ")" << std::endl; + } else if (first_key_char == '|') { + // user&passwd&role&permission + } else if (first_key_char == '@') { + ParseMetaTableKeyValue(record.key(), record.value(), table_list.add_meta()); + } else if (first_key_char > '@') { + ParseMetaTableKeyValue(record.key(), record.value(), tablet_list.add_meta()); + } else { + std::cerr << "invalid record: " << record.key(); + } + } + std::string next_record_key = tera::NextKey(last_record_key); + request.set_start(next_record_key); + request.set_end(end_key); + request.set_sequence_id(g_sequence_id++); + response.Clear(); + } + return ProcessMeta(op, table_list, tablet_list, "", "", filename); +} + +// diff [table_name|tablet_name] +CliStatus MetaDiffOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (0 != InitDfsClient()) { + LOG(ERROR) << "Init Dfs Client fail"; + return CliStatus::kError; + } + g_env = new leveldb::DfsEnv(g_dfs); + + std::ofstream diff_file; + diff_file.open("meta.diff", std::ofstream::trunc); + diff_file.close(); + + // read meta tablets from meta bak file + if (argc == 3) { + const std::string backup_filename = argv[2]; + int ret = ReadMetaTabletFromFile(backup_filename); + if (0 != ret) { + LOG(INFO) << "restore meta from meta bak file fail"; + return CliStatus::kError; + } + return AllDiff(FLAGS_tera_tabletnode_path_prefix); + } else if (argc == 4) { + const std::string backup_filename = argv[3]; + int ret = ReadMetaTabletFromFile(backup_filename); + if (0 != ret) { + LOG(INFO) << "restore meta from meta bak file fail"; + return CliStatus::kError; + } + + std::vector arg_list; + SplitString(argv[2], "/", &arg_list); + if (arg_list.size() == 1) { + const std::string table_name = arg_list[0]; + return TableDiff(FLAGS_tera_tabletnode_path_prefix, table_name); + } else if (arg_list.size() == 2) { + const std::string table_name = arg_list[0]; + const std::string tablet_name = arg_list[1]; + return TabletDiff(FLAGS_tera_tabletnode_path_prefix, table_name, tablet_name); + } + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// put +CliStatus MetaPutOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + if (argc == 6) { + common::ThreadPool thread_pool(1); + const std::string& tablet_path = argv[2]; + std::vector arg_list; + SplitString(tablet_path, "/", &arg_list); + const std::string& table_name = arg_list[0]; + const std::string& start_key = argv[3]; + const std::string& end_key = argv[4]; + const std::string& server_addr = argv[5]; + return PutTabletMeta(meta_server, &thread_pool, table_name, start_key, end_key, tablet_path, + server_addr); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// delete +CliStatus MetaDeleteOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + if (argc == 4) { + common::ThreadPool thread_pool(1); + const std::string& table_name = argv[2]; + const std::string& start_key = argv[3]; + return DeleteMetaTablet(meta_server, &thread_pool, table_name, start_key); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// modify +CliStatus MetaModifyOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + if (argc == 6) { + common::ThreadPool thread_pool(1); + const std::string& table_name = argv[2]; + const std::string& start_key = argv[3]; + const std::string type = argv[4]; + const std::string value = argv[5]; + return ModifyMetaValue(meta_server, &thread_pool, table_name, start_key, type, value); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// get [inmem] +CliStatus MetaGetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + if (argc == 4) { + common::ThreadPool thread_pool(1); + const std::string& table_name = argv[2]; + const std::string& start_key = argv[3]; + return GetMeta(meta_server, &thread_pool, table_name, start_key); + } else if (argc == 5 && argv[2] == "inmem") { + const std::string& table_name = argv[3]; + const std::string& start_key = argv[4]; + + TableMeta table_meta; + TabletMetaList tablet_list; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + client_impl->ShowTablesInfo(table_name, &table_meta, &tablet_list, err); + + for (int i = 0; i < tablet_list.meta_size(); ++i) { + if (tablet_list.meta(i).key_range().key_start() == start_key) { + PrintMetaInfo(&tablet_list.meta(i)); + break; + } + } + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// show [inmem] [start_key] [end_key] +CliStatus MetaShowOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + std::string start_key; + std::string end_key; + if (argc == 4 || argc == 2) { + if (argc == 4) { + start_key = argv[2]; + end_key = argv[3]; + } else { + start_key = ""; + end_key = ""; + } + common::ThreadPool thread_pool(1); + return MetaInternalOp(meta_server, &thread_pool, "show", start_key, end_key, ""); + } else if ((argc == 5 || argc == 3) && argv[2] == "inmem") { + if (argc == 5) { + start_key = argv[3]; + end_key = argv[4]; + } else { + start_key = ""; + end_key = ""; + } + TableMetaList table_list; + TabletMetaList tablet_list; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, false, err)) { + LOG(ERROR) << "fail to get meta data from tera."; + return CliStatus::kError; + } + return ProcessMeta("show", table_list, tablet_list, start_key, end_key, ""); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// backup [inmem] [backup_filename] +CliStatus MetaBackUpOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + std::string filename; + if ((argc == 3 || argc == 4) && argv[2] == "inmem") { + if (argc == 3) { + filename = "inmem_meta.bak_" + get_curtime_str(); + } else { + filename = argv[3] + "_" + get_curtime_str(); + } + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + std::vector arg_list; + arg_list.push_back("backup"); + arg_list.push_back(filename); + if (!client->CmdCtrl("meta", arg_list, NULL, NULL, err)) { + LOG(ERROR) << "fail to backup meta"; + return CliStatus::kError; + } + } else if (argc == 2 || argc == 3) { + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + if (argc == 2) { + filename = "meta.bak_" + get_curtime_str(); + } else { + filename = argv[2] + "_" + get_curtime_str(); + } + common::ThreadPool thread_pool(1); + std::string start_key(""); + std::string end_key(""); + MetaInternalOp(meta_server, &thread_pool, "backup", start_key, end_key, filename); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// healthcheck [inmem] [start_key] [end_key] +CliStatus MetaHealthCheckOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (0 != InitDfsClient()) { + LOG(ERROR) << "Init Dfs Client fail"; + return CliStatus::kError; + } + g_env = new leveldb::DfsEnv(g_dfs); + + scoped_ptr finder(tera::sdk::NewClusterFinder()); + const std::string meta_server = finder->RootTableAddr(); + std::string start_key; + std::string end_key; + + if (argc == 4 || argc == 2) { + if (argc == 4) { + start_key = argv[2]; + end_key = argv[3]; + } else { + start_key = ""; + end_key = ""; + } + common::ThreadPool thread_pool(1); + return MetaInternalOp(meta_server, &thread_pool, "healtchcheck", start_key, end_key, ""); + } else if ((argc == 5 || argc == 3) && argv[2] == "inmem") { + if (argc == 5) { + start_key = argv[3]; + end_key = argv[4]; + } else { + start_key = ""; + end_key = ""; + } + + TableMetaList table_list; + TabletMetaList tablet_list; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, false, err)) { + LOG(ERROR) << "fail to get meta data from tera."; + return CliStatus::kError; + } + return ProcessMeta("healthcheck", table_list, tablet_list, start_key, end_key, ""); + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +// conv +CliStatus MetaConvOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc == 3 && argv[1] == "conv") { + const std::string& readable_key = argv[2]; + std::cout << DebugString(readable_key) << " => " << readable_key << std::endl; + } else { + PrintCmdHelpInfo(argv[1]); + } + return CliStatus::kOk; +} + +CliStatus ShowUgiOp(std::shared_ptr client_impl, int32_t argc, std::string* argv, + ErrorCode* err) { + tera::UserVerificationInfoList user_verification_info_list; + if (!client_impl->ShowUgi(&user_verification_info_list, err)) { + LOG(ERROR) << "show ugi failed!" << err->ToString(); + return CliStatus::kError; + } + std::cout << "Show ugi : " << std::endl; + std::cout << "\tuser_name\tpasswd\troles" << std::endl; + for (auto it = user_verification_info_list.begin(); it != user_verification_info_list.end(); + ++it) { + std::cout << "\t" << it->first << "\t" << it->second.first << "\t"; + for (auto& role : it->second.second) { + std::cout << role << ","; + } + std::cout << std::endl; + } + return CliStatus::kOk; +} + +CliStatus UgiOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + const std::string& op = argv[2]; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + + if (argc == 5 && op == "update") { + const std::string& user_name = argv[3]; + const std::string& passwd = argv[4]; + if (!client_impl->UpdateUgi(user_name, passwd, err)) { + LOG(ERROR) << "update ugi failed!" << err->ToString(); + return CliStatus::kError; + } + } else if (argc == 4 && op == "del") { + const std::string& user_name = argv[3]; + if (!client_impl->DelUgi(user_name, err)) { + LOG(ERROR) << "delete ugi failed!" << err->ToString(); + return CliStatus::kError; + } + } else if (argc == 3 && op == "show") { + return ShowUgiOp(client_impl, argc, argv, err); + } else { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + std::cout << "Ugi " << op << " success" << std::endl; + return CliStatus::kOk; +} + +CliStatus ShowRoleOp(const std::shared_ptr& client_impl, int32_t argc, + std::string* argv, ErrorCode* err) { + std::vector roles_list; + if (!client_impl->ShowRole(&roles_list, err)) { + LOG(ERROR) << "show roles failed!" << err->ToString(); + return CliStatus::kError; + } + std::cout << "Show role : " << std::endl; + for (auto it = roles_list.begin(); it != roles_list.end(); ++it) { + std::cout << *it << std::endl; + } + return CliStatus::kOk; +} + +CliStatus RoleOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + const std::string& op = argv[2]; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + + if (argc == 5) { + const std::string& role_name = argv[3]; + const std::string& user_name = argv[4]; + if (op == "grant") { + if (!client_impl->GrantRole(role_name, user_name, err)) { + LOG(ERROR) << "grant role failed!" << err->ToString(); + return CliStatus::kError; + } + } else if (op == "revoke") { + if (!client_impl->RevokeRole(role_name, user_name, err)) { + LOG(ERROR) << "revoke role failed!" << err->ToString(); + return CliStatus::kError; + } + } else { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + } else if (argc == 4) { + const std::string& role_name = argv[3]; + if (op == "add") { + if (!client_impl->AddRole(role_name, err)) { + LOG(ERROR) << "add role failed!" << err->ToString(); + return CliStatus::kError; + } + } else if (op == "del") { + if (!client_impl->DelRole(role_name, err)) { + LOG(ERROR) << "del role failed!" << err->ToString(); + return CliStatus::kError; + } + } else { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + } else if (argc == 3 && op == "show") { + return ShowRoleOp(client_impl, argc, argv, err); + } else { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + std::cout << "role " << op << " success" << std::endl; + return CliStatus::kOk; +} + +CliStatus ShowAuthPolicyOp(const std::shared_ptr& client_impl, ErrorCode* err) { + std::map table_auth_policy_list; + if (!client_impl->ShowAuthPolicy(&table_auth_policy_list, err)) { + LOG(ERROR) << "show auth_policy failed!" << err->ToString(); + return CliStatus::kError; + } + std::cout << "TableName\tAuthType" << std::endl; + for (auto it = table_auth_policy_list.begin(); it != table_auth_policy_list.end(); ++it) { + std::cout << it->first << "\t" << it->second << std::endl; + } + return CliStatus::kOk; +} + +CliStatus AuthOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + const std::string& op = argv[2]; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (argc == 5 && op == "set") { + const std::string& table_name = argv[3]; + const std::string& auth_policy = argv[4]; + if (!client_impl->SetAuthPolicy(table_name, auth_policy, err)) { + LOG(ERROR) << "set auth policy failed!" << err->ToString(); + return CliStatus::kError; + } + } else if (argc == 3 && op == "show") { + return ShowAuthPolicyOp(client_impl, err); + } else { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + std::cout << "auth " << op << " success" << std::endl; + return CliStatus::kOk; +} + +CliStatus DfsThroughputHardLimitOp(Client* client, int32_t argc, std::string* argv, + ErrorCode* err) { + if (argc != 4 && argc != 3) { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + + std::string op = argv[2]; + if (argc == 4 && op != "write" && op != "read") { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + + if (argc == 3 && op != "get") { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + + std::vector arg_list; + arg_list.push_back(op); + if (op != "get") { + std::string value = argv[3]; + if (value == "reset") { + arg_list.push_back("-1"); + } else { + try { + // check argument valid + std::stol(argv[3]); + } catch (...) { + std::cout << "Convert " << argv[3] << " to number failed."; + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + arg_list.push_back(argv[3]); + } + } + std::string result; + if (!client->CmdCtrl("dfs-hard-limit", arg_list, nullptr, &result, err)) { + std::cout << "Fail to run dfs-quota " << op << ": " << result << std::endl; + return CliStatus::kError; + } + + std::cout << result << std::endl; + return CliStatus::kOk; +} + +CliStatus ProcedureLimitOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 3 && argc != 5) { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + std::string op = argv[2]; + if (argc == 3 && op != "get") { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + if (argc == 5 && op != "set") { + PrintCmdHelpInfo(argv[1]); + return CliStatus::kError; + } + std::vector arg_list; + arg_list.push_back(op); + if (op == "set") { + arg_list.push_back(argv[3]); + arg_list.push_back(argv[4]); + } + + std::string result; + if (!client->CmdCtrl("procedure-limit", arg_list, nullptr, &result, err)) { + std::cout << "Fail to run procudure-limit " << op << ": " << result << std::endl; + return CliStatus::kError; + } + std::cout << result << std::endl; + return CliStatus::kOk; +} + +// return false if similar command(s) not found +static bool PromptSimilarCmd(const char* msg) { + if (msg == NULL) { + return false; + } + bool found = false; + int64_t len = strlen(msg); + int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { + if (!found) { + std::cout << "Did you mean:" << std::endl; + found = true; + } + std::cout << " " << builtin_cmd_list[i] << std::endl; + } + } + return found; +} + +static void PrintUnknownCmdHelpInfo(const char* msg) { + if (msg != NULL) { + std::cout << "'" << msg << "' is not a valid command." << std::endl + << std::endl; + } + if ((msg != NULL) && PromptSimilarCmd(msg)) { + return; + } + PrintAllCmd(); +} + +static void InitializeCommandTable() { + CommandTable& command_table = GetCommandTable(); + command_table["diff"] = MetaDiffOp; + command_table["put"] = MetaPutOp; + command_table["delete"] = MetaDeleteOp; + command_table["modify"] = MetaModifyOp; + command_table["get"] = MetaGetOp; + command_table["show"] = MetaShowOp; + command_table["backup"] = MetaBackUpOp; + command_table["healthcheck"] = MetaHealthCheckOp; + command_table["conv"] = MetaConvOp; + command_table["ugi"] = UgiOp; + command_table["role"] = RoleOp; + command_table["auth"] = AuthOp; + command_table["procedure-limit"] = ProcedureLimitOp; + command_table["help"] = HelpOp; + command_table["dfs-throughput-limit"] = DfsThroughputHardLimitOp; +} + +CliStatus ExecuteCommand(Client* client, int argc, char** arg_list) { + CliStatus ret = CliStatus::kOk; + ErrorCode error_code; + + std::vector parsed_arg_list; + if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { + return CliStatus::kError; + } + std::string* argv = &parsed_arg_list[0]; + + CommandTable& command_table = GetCommandTable(); + std::string cmd = argv[1]; + if (cmd == "version") { + PrintSystemVersion(); + } else if (command_table.find(cmd) != command_table.end()) { + ret = command_table[cmd](client, argc, argv, &error_code); + } else { + PrintUnknownCmdHelpInfo(argv[1].c_str()); + ret = CliStatus::kError; + } + + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "fail reason: " << error_code.ToString(); + } + return ret; +} + +int main(int argc, char* argv[]) { + FLAGS_minloglevel = 2; + ::google::ParseCommandLineFlags(&argc, &argv, true); + + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + return 0; + } else if (argc > 1 && std::string(argv[1]) == "help") { + HelpOp(argc, argv); + return 0; + } + + if (FLAGS_flagfile == "") { + FLAGS_flagfile = "../conf/tera.flag"; + if (access(FLAGS_flagfile.c_str(), R_OK) != 0) { + FLAGS_flagfile = "./tera.flag"; + } + utils::LoadFlagFile(FLAGS_flagfile); + } + if (FLAGS_meta_cli_token != g_metacli_token) { + std::cout << "Please figure out what metacli is before use it." << std::endl; + return -1; + } + + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -1; + } + + InitializeCommandTable(); + + CliStatus ret = CliStatus::kOk; + if (argc == 1) { + char* line = NULL; + while ((line = readline("meta> ")) != NULL) { + char* line_copy = strdup(line); + std::vector arg_list; + arg_list.push_back(argv[0]); + char* tmp = NULL; + char* token = strtok_r(line, " \t", &tmp); + while (token != NULL) { + arg_list.push_back(token); + token = strtok_r(NULL, " \t", &tmp); + } + if (arg_list.size() == 2 && + (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { + free(line_copy); + free(line); + break; + } + if (arg_list.size() > 1) { + add_history(line_copy); + ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); + } + free(line_copy); + free(line); + } + } else { + ret = ExecuteCommand(client, argc, argv); + } + + delete client; + return ((ret == CliStatus::kOk) ? 0 : -1); +} diff --git a/src/benchmark/eva/eva_var.py b/src/benchmark/eva/eva_var.py index 4ca65ab4b..65a3fe8ea 100644 --- a/src/benchmark/eva/eva_var.py +++ b/src/benchmark/eva/eva_var.py @@ -107,7 +107,7 @@ def __init__(self): self.SENDMAIL = '/usr/sbin/sendmail' self.MAIL_PATH = '../tmp/mail_report' self.WEB_PATH = '../tmp/web_report' - self.MAIL_HEADER = 'Sender: tera_eva \nTo: tera_dev \n\ + self.MAIL_HEADER = 'Sender: tera_eva \nTo: tera-user \n\ Content-type: text/html\nSubject: EVA report\n\n' self.g_query_thread = None diff --git a/src/benchmark/mark.cc b/src/benchmark/mark.cc index bdbce87cf..fcefd8b40 100644 --- a/src/benchmark/mark.cc +++ b/src/benchmark/mark.cc @@ -10,383 +10,477 @@ DECLARE_bool(verify); DECLARE_int32(buf_size); +DECLARE_int64(batch_size); int64_t Now() { - struct timeval now; - gettimeofday(&now, NULL); - return now.tv_sec * 1000000 + now.tv_usec; + struct timeval now; + gettimeofday(&now, NULL); + return now.tv_sec * 1000000 + now.tv_usec; } class Context { -public: - Adapter* adapter; - size_t size; - int64_t time; + public: + Adapter* adapter; + size_t size; + int64_t time; - Context(Adapter* a, size_t s, int64_t t) - : adapter(a), size(s), time(t) {} + Context(Adapter* a, size_t s, int64_t t) : adapter(a), size(s), time(t) {} }; Adapter::Adapter(tera::Table* table) : table_(table), write_marker_(PUT), read_marker_(GET), - scan_marker_(SCN) { - pthread_mutex_init(&mutex_, NULL); - pthread_cond_init(&cond_, NULL); + scan_marker_(SCN), + batch_write_marker_(BPUT), + one_batch_(nullptr) { + pthread_mutex_init(&mutex_, NULL); + pthread_cond_init(&cond_, NULL); } Adapter::~Adapter() { - pthread_mutex_destroy(&mutex_); - pthread_cond_destroy(&cond_); + pthread_mutex_destroy(&mutex_); + pthread_cond_destroy(&cond_); } void sdk_write_callback(tera::RowMutation* row_mu) { - Context* ctx = (Context*)row_mu->GetContext(); - Adapter* adapter = ctx->adapter; - size_t req_size = ctx->size; - int64_t req_time = ctx->time; - adapter->WriteCallback(row_mu, req_size, req_time); - delete ctx; + Context* ctx = (Context*)row_mu->GetContext(); + Adapter* adapter = ctx->adapter; + size_t req_size = ctx->size; + int64_t req_time = ctx->time; + adapter->WriteCallback(row_mu, req_size, req_time); + delete ctx; } void Adapter::Write(int opt, const std::string& row, - std::map >& column, - uint64_t timestamp, + std::map >& column, uint64_t timestamp, std::string& value) { - tera::RowMutation* row_mu = table_->NewRowMutation(row); - size_t req_size = row.size(); - - if (column.size() == 0) { - column[""].insert(""); + tera::RowMutation* row_mu = table_->NewRowMutation(row); + size_t req_size = row.size(); + + if (column.size() == 0) { + column[""].insert(""); + } + std::map >::iterator it; + for (it = column.begin(); it != column.end(); ++it) { + const std::string& family = it->first; + std::set& qualifiers = it->second; + if (qualifiers.size() == 0) { + qualifiers.insert(""); } - std::map >::iterator it; - for (it = column.begin(); it != column.end(); ++it) { - const std::string& family = it->first; - std::set& qualifiers = it->second; - if (qualifiers.size() == 0) { - qualifiers.insert(""); - } - std::set::const_iterator it2; - for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { - const std::string& qualifier = *it2; - req_size += family.size() + qualifier.size() + sizeof(timestamp); - req_size += value.size(); - if (FLAGS_verify) { - add_checksum(row, family, qualifier, &value); - } - if (opt == PUT) { - row_mu->Put(family, qualifier, value, (int64_t)timestamp); - } else if (opt == PIF) { - row_mu->PutIfAbsent(family, qualifier, value); - } else { - abort(); - } - if (FLAGS_verify) { - remove_checksum(&value); - } - } + std::set::const_iterator it2; + for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { + const std::string& qualifier = *it2; + req_size += family.size() + qualifier.size() + sizeof(timestamp); + req_size += value.size(); + if (FLAGS_verify) { + add_checksum(row, family, qualifier, &value); + } + if (opt == PUT) { + row_mu->Put(family, qualifier, value, (int64_t)timestamp); + } else if (opt == PIF) { + row_mu->PutIfAbsent(family, qualifier, value); + } else { + abort(); + } + if (FLAGS_verify) { + remove_checksum(&value); + } } + } - write_marker_.CheckPending(); - write_marker_.CheckLimit(); - write_marker_.OnReceive(req_size); - pending_num_.Inc(); + write_marker_.CheckPending(); + write_marker_.CheckLimit(); + write_marker_.OnReceive(req_size); + pending_num_.Inc(); - if (type == ASYNC) { - int64_t req_time = Now(); - Context* ctx = new Context(this, req_size, req_time); - row_mu->SetCallBack(sdk_write_callback); - row_mu->SetContext(ctx); - table_->ApplyMutation(row_mu); - } else { - sync_mutations_.push_back(row_mu); - sync_req_sizes_.push_back(req_size); - if (sync_mutations_.size() >= static_cast(FLAGS_batch_count)) { - CommitSyncWrite(); - } + if (type == ASYNC) { + int64_t req_time = Now(); + Context* ctx = new Context(this, req_size, req_time); + row_mu->SetCallBack(sdk_write_callback); + row_mu->SetContext(ctx); + table_->ApplyMutation(row_mu); + } else { + sync_mutations_.push_back(row_mu); + sync_req_sizes_.push_back(req_size); + if (sync_mutations_.size() >= static_cast(FLAGS_batch_count)) { + CommitSyncWrite(); } + } } void Adapter::CommitSyncWrite() { - if (sync_mutations_.size() == 0) { - return; - } - CHECK_EQ(sync_mutations_.size(), sync_req_sizes_.size()); - int64_t req_time = Now(); - table_->ApplyMutation(sync_mutations_); - for (size_t i = 0; i < sync_mutations_.size(); i++) { - WriteCallback(sync_mutations_[i], sync_req_sizes_[i], req_time); - } - sync_mutations_.clear(); - sync_req_sizes_.clear(); + if (sync_mutations_.size() == 0) { + return; + } + CHECK_EQ(sync_mutations_.size(), sync_req_sizes_.size()); + int64_t req_time = Now(); + table_->ApplyMutation(sync_mutations_); + for (size_t i = 0; i < sync_mutations_.size(); i++) { + WriteCallback(sync_mutations_[i], sync_req_sizes_[i], req_time); + } + sync_mutations_.clear(); + sync_req_sizes_.clear(); } -void Adapter::WriteCallback(tera::RowMutation* row_mu, size_t req_size, - int64_t req_time) { - uint32_t latency = (Now() - req_time) / 1000; - write_marker_.OnFinish(req_size, latency); - tera::ErrorCode err = row_mu->GetError(); - if (err.GetType() == tera::ErrorCode::kOK) { - write_marker_.OnSuccess(req_size, latency); - } else if (err.GetType() == tera::ErrorCode::kTxnFail) { - write_marker_.OnConflict(req_size, latency); - } else { - /*std::cerr << "fail to write: row=[" << row << "], column=[" - << family << ":" << qualifier << "], timestamp=[" - << timestamp << "], value=[" << value << "], status=" - << tera::strerr(err) << std::endl;*/ - } - delete row_mu; - - if (0 == pending_num_.Dec()) { - pthread_mutex_lock(&mutex_); - pthread_cond_signal(&cond_); - pthread_mutex_unlock(&mutex_); - } +void Adapter::WriteCallback(tera::RowMutation* row_mu, size_t req_size, int64_t req_time) { + uint32_t latency = (Now() - req_time) / 1000; + write_marker_.OnFinish(req_size, latency); + tera::ErrorCode err = row_mu->GetError(); + if (err.GetType() == tera::ErrorCode::kOK) { + write_marker_.OnSuccess(req_size, latency); + } else if (err.GetType() == tera::ErrorCode::kTxnFail) { + write_marker_.OnConflict(req_size, latency); + } else { + /*std::cerr << "fail to write: row=[" << row << "], column=[" + << family << ":" << qualifier << "], timestamp=[" + << timestamp << "], value=[" << value << "], status=" + << tera::strerr(err) << std::endl;*/ + } + delete row_mu; + + if (0 == pending_num_.Dec()) { + pthread_mutex_lock(&mutex_); + pthread_cond_signal(&cond_); + pthread_mutex_unlock(&mutex_); + } } void sdk_read_callback(tera::RowReader* row_rd) { - Context* ctx = (Context*)row_rd->GetContext(); - Adapter* adapter = ctx->adapter; - size_t req_size = ctx->size; - int64_t req_time = ctx->time; - adapter->ReadCallback(row_rd, req_size, req_time); - delete ctx; + Context* ctx = (Context*)row_rd->GetContext(); + Adapter* adapter = ctx->adapter; + size_t req_size = ctx->size; + int64_t req_time = ctx->time; + adapter->ReadCallback(row_rd, req_size, req_time); + delete ctx; } void Adapter::Read(const std::string& row, - const std::map >& column, - uint64_t largest_ts, uint64_t smallest_ts) { - tera::RowReader* reader = table_->NewRowReader(row); - size_t req_size = row.size(); - - std::map >::const_iterator it; - for (it = column.begin(); it != column.end(); ++it) { - const std::string& family = it->first; - const std::set& qualifiers = it->second; - if (qualifiers.size() == 0) { - reader->AddColumnFamily(family); - req_size += family.size(); - } else { - std::set::const_iterator it2; - for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { - const std::string& qualifier = *it2; - reader->AddColumn(family, qualifier); - req_size += family.size() + qualifier.size(); - } - } + const std::map >& column, uint64_t largest_ts, + uint64_t smallest_ts) { + tera::RowReader* reader = table_->NewRowReader(row); + size_t req_size = row.size(); + + std::map >::const_iterator it; + for (it = column.begin(); it != column.end(); ++it) { + const std::string& family = it->first; + const std::set& qualifiers = it->second; + if (qualifiers.size() == 0) { + reader->AddColumnFamily(family); + req_size += family.size(); + } else { + std::set::const_iterator it2; + for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { + const std::string& qualifier = *it2; + reader->AddColumn(family, qualifier); + req_size += family.size() + qualifier.size(); + } } - reader->SetTimeRange(smallest_ts, largest_ts); - req_size += sizeof(smallest_ts) + sizeof(largest_ts); + } + reader->SetTimeRange(smallest_ts, largest_ts); + req_size += sizeof(smallest_ts) + sizeof(largest_ts); - read_marker_.CheckPending(); - read_marker_.CheckLimit(); - read_marker_.OnReceive(req_size); - pending_num_.Inc(); + read_marker_.CheckPending(); + read_marker_.CheckLimit(); + read_marker_.OnReceive(req_size); + pending_num_.Inc(); - if (type == ASYNC) { - int64_t req_time = Now(); - Context* ctx = new Context(this, req_size, req_time); - reader->SetCallBack(sdk_read_callback); - reader->SetContext(ctx); - table_->Get(reader); - } else { - sync_readers_.push_back(reader); - sync_req_sizes_.push_back(req_size); - if (sync_readers_.size() >= static_cast(FLAGS_batch_count)) { - CommitSyncRead(); - } + if (type == ASYNC) { + int64_t req_time = Now(); + Context* ctx = new Context(this, req_size, req_time); + reader->SetCallBack(sdk_read_callback); + reader->SetContext(ctx); + table_->Get(reader); + } else { + sync_readers_.push_back(reader); + sync_req_sizes_.push_back(req_size); + if (sync_readers_.size() >= static_cast(FLAGS_batch_count)) { + CommitSyncRead(); } + } } void Adapter::CommitSyncRead() { - if (sync_readers_.size() == 0) { - return; - } - CHECK_EQ(sync_readers_.size(), sync_req_sizes_.size()); - int64_t req_time = Now(); - table_->Get(sync_readers_); - for (size_t i = 0; i < sync_readers_.size(); i++) { - ReadCallback(sync_readers_[i], sync_req_sizes_[i], req_time); - } - sync_readers_.clear(); - sync_req_sizes_.clear(); + if (sync_readers_.size() == 0) { + return; + } + CHECK_EQ(sync_readers_.size(), sync_req_sizes_.size()); + int64_t req_time = Now(); + table_->Get(sync_readers_); + for (size_t i = 0; i < sync_readers_.size(); i++) { + ReadCallback(sync_readers_[i], sync_req_sizes_[i], req_time); + } + sync_readers_.clear(); + sync_req_sizes_.clear(); } -void Adapter::ReadCallback(tera::RowReader* reader, size_t req_size, - int64_t req_time) { - uint32_t latency = (Now() - req_time) / 1000; - read_marker_.OnFinish(req_size, latency); - const std::string& row = reader->RowName(); - tera::ErrorCode err = reader->GetError(); - if (err.GetType() == tera::ErrorCode::kOK) { - bool all_verified = true; - while (!reader->Done()) { - std::string cf = reader->Family(); - std::string cq = reader->Qualifier(); - int64_t ts = reader->Timestamp(); - std::string value = reader->Value(); - - bool is_verified = (!FLAGS_verify) || verify_checksum(row, cf, cq, value); - if (!is_verified) { - all_verified = false; - std::cerr << "fail to pass md5 verifying: row=[" << row << "], column=[" - << cf << ":" << cq << "], timestamp=[" << ts << "]" << std::endl; - } - reader->Next(); - } - if (all_verified) { - read_marker_.OnSuccess(req_size, latency); - } - } else { - std::cerr << "fail to read: row=[" << row << "], column=["; - const tera::RowReader::ReadColumnList& read_list = reader->GetReadColumnList(); - std::map >::const_iterator it; - bool first_cf = true; - for (it = read_list.begin(); it != read_list.end(); ++it) { - const std::string& family = it->first; - const std::set& qualifiers = it->second; - if (first_cf) { - first_cf = false; - } else { - std::cerr << ";"; - } - std::cerr << family; - std::set::const_iterator it2; - bool first_cq = true; - for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { - const std::string& qualifier = *it2; - if (first_cq) { - first_cq = false; - std::cerr << ":"; - } else { - std::cerr << ","; - } - std::cerr << qualifier; - } +void Adapter::ReadCallback(tera::RowReader* reader, size_t req_size, int64_t req_time) { + uint32_t latency = (Now() - req_time) / 1000; + read_marker_.OnFinish(req_size, latency); + const std::string& row = reader->RowName(); + tera::ErrorCode err = reader->GetError(); + if (err.GetType() == tera::ErrorCode::kOK) { + bool all_verified = true; + while (!reader->Done()) { + std::string cf = reader->Family(); + std::string cq = reader->Qualifier(); + int64_t ts = reader->Timestamp(); + std::string value = reader->Value(); + + bool is_verified = (!FLAGS_verify) || verify_checksum(row, cf, cq, value); + if (!is_verified) { + all_verified = false; + std::cerr << "fail to pass md5 verifying: row=[" << row << "], column=[" << cf << ":" << cq + << "], timestamp=[" << ts << "]" << std::endl; + } + reader->Next(); + } + if (all_verified) { + read_marker_.OnSuccess(req_size, latency); + } + } else { + std::cerr << "fail to read: row=[" << row << "], column=["; + const tera::RowReader::ReadColumnList& read_list = reader->GetReadColumnList(); + std::map >::const_iterator it; + bool first_cf = true; + for (it = read_list.begin(); it != read_list.end(); ++it) { + const std::string& family = it->first; + const std::set& qualifiers = it->second; + if (first_cf) { + first_cf = false; + } else { + std::cerr << ";"; + } + std::cerr << family; + std::set::const_iterator it2; + bool first_cq = true; + for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { + const std::string& qualifier = *it2; + if (first_cq) { + first_cq = false; + std::cerr << ":"; + } else { + std::cerr << ","; } - std::cerr << "], timestamp=[" << reader->GetTimestamp() - << "], status=" << tera::strerr(err) << ":" << err.GetReason() << std::endl; + std::cerr << qualifier; + } } - delete reader; + std::cerr << "], timestamp=[" << reader->GetTimestamp() << "], status=" << tera::strerr(err) + << ":" << err.GetReason() << std::endl; + } + delete reader; - if (0 == pending_num_.Dec()) { - pthread_mutex_lock(&mutex_); - pthread_cond_signal(&cond_); - pthread_mutex_unlock(&mutex_); - } + if (0 == pending_num_.Dec()) { + pthread_mutex_lock(&mutex_); + pthread_cond_signal(&cond_); + pthread_mutex_unlock(&mutex_); + } } -void Adapter::Delete(const std::string& row, - std::map >& column, - uint64_t ts) { - tera::RowMutation* row_mu = table_->NewRowMutation(row); - size_t req_size = row.size(); +void sdk_batch_write_callback(tera::BatchMutation* batch_mu) { + Context* ctx = (Context*)batch_mu->GetContext(); + Adapter* adapter = ctx->adapter; + size_t req_size = ctx->size; + int64_t req_time = ctx->time; + adapter->BatchWriteCallback(batch_mu, req_size, req_time); + delete ctx; +} - if (column.size() == 0) { - row_mu->DeleteRow(); - } else { - std::map >::iterator it; - for (it = column.begin(); it != column.end(); ++it) { - const std::string& family = it->first; - std::set& qualifiers = it->second; - if (qualifiers.size() == 0) { - qualifiers.insert(""); - } - std::set::const_iterator it2; - for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { - const std::string& qualifier = *it2; - req_size += family.size() + qualifier.size(); - row_mu->DeleteColumn(family, qualifier, ts); - } +void Adapter::BatchWrite(int opt, const std::string& row, + std::map >& column, uint64_t timestamp, + std::string& value, bool last_call) { + if (one_batch_ == nullptr) { + one_batch_ = new Batch(); + one_batch_->size = 0; + } + size_t req_size = row.size(); + + if (column.size() == 0) { + column[""].insert(""); + } + std::map >::iterator it; + for (it = column.begin(); it != column.end(); ++it) { + const std::string& family = it->first; + std::set& qualifiers = it->second; + if (qualifiers.size() == 0) { + qualifiers.insert(""); + } + std::set::const_iterator it2; + for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { + const std::string& qualifier = *it2; + req_size += family.size() + qualifier.size() + sizeof(timestamp); + req_size += value.size(); + if (FLAGS_verify) { + add_checksum(row, family, qualifier, &value); + } + if (opt == BPUT) { + std::string this_row = row; + size_t perfix_len = 6; + if (!one_batch_->cells.empty()) { + std::string per_row = one_batch_->cells.back().row; + this_row.replace(0, perfix_len, per_row.substr(0, perfix_len)); } + Cell cell = {this_row, family, qualifier, value, (int64_t)timestamp}; + one_batch_->cells.push_back(cell); + one_batch_->size += req_size; + } else { + abort(); + } + if (FLAGS_verify) { + remove_checksum(&value); + } } - - write_marker_.CheckPending(); - write_marker_.CheckLimit(); - write_marker_.OnReceive(req_size); + } + if (last_call || one_batch_->size >= FLAGS_batch_size) { + batch_write_marker_.CheckPending(); + batch_write_marker_.CheckLimit(); + batch_write_marker_.OnReceive(req_size); pending_num_.Inc(); - + tera::BatchMutation* batch_mu = table_->NewBatchMutation(); + for (const auto& cell : one_batch_->cells) { + batch_mu->Put(cell.row, cell.family, cell.qualifier, cell.value, cell.timestamp); + } + delete one_batch_; + one_batch_ = nullptr; + int64_t req_time = Now(); if (type == ASYNC) { - int64_t req_time = Now(); - Context* ctx = new Context(this, req_size, req_time); - row_mu->SetCallBack(sdk_write_callback); - row_mu->SetContext(ctx); - table_->ApplyMutation(row_mu); + Context* ctx = new Context(this, req_size, req_time); + batch_mu->SetCallBack(sdk_batch_write_callback); + batch_mu->SetContext(ctx); + table_->ApplyMutation(batch_mu); } else { - sync_mutations_.push_back(row_mu); - sync_req_sizes_.push_back(req_size); - if (sync_mutations_.size() >= static_cast(FLAGS_batch_count)) { - CommitSyncWrite(); - } + table_->ApplyMutation(batch_mu); + BatchWriteCallback(batch_mu, req_size, req_time); } + } } -void Adapter::Scan(const std::string& start_key, const std::string& end_key, - const std::vector& cf_list, - bool print, bool is_async) { - tera::ScanDescriptor scan_desp(start_key); - scan_desp.SetEnd(end_key); - scan_desp.SetBufferSize(FLAGS_buf_size); - scan_desp.SetAsync(is_async); - for (size_t i = 0; i < cf_list.size(); i++) { - scan_desp.AddColumnFamily(cf_list[i]); +void Adapter::BatchWriteCallback(tera::BatchMutation* batch_mu, size_t req_size, int64_t req_time) { + uint32_t latency = (Now() - req_time) / 1000; + batch_write_marker_.OnFinish(req_size, latency); + tera::ErrorCode err = batch_mu->GetError(); + if (err.GetType() == tera::ErrorCode::kOK) { + batch_write_marker_.OnSuccess(req_size, latency); + } else if (err.GetType() == tera::ErrorCode::kTxnFail) { + batch_write_marker_.OnConflict(req_size, latency); + } else { + /*std::cerr << "fail to write: row=[" << row << "], column=[" + << family << ":" << qualifier << "], timestamp=[" + << timestamp << "], value=[" << value << "], status=" + << tera::strerr(err) << std::endl;*/ + } + delete batch_mu; + + if (0 == pending_num_.Dec()) { + pthread_mutex_lock(&mutex_); + pthread_cond_signal(&cond_); + pthread_mutex_unlock(&mutex_); + } +} + +void Adapter::Delete(const std::string& row, std::map >& column, + uint64_t ts) { + tera::RowMutation* row_mu = table_->NewRowMutation(row); + size_t req_size = row.size(); + + if (column.size() == 0) { + row_mu->DeleteRow(); + } else { + std::map >::iterator it; + for (it = column.begin(); it != column.end(); ++it) { + const std::string& family = it->first; + std::set& qualifiers = it->second; + if (qualifiers.size() == 0) { + qualifiers.insert(""); + } + std::set::const_iterator it2; + for (it2 = qualifiers.begin(); it2 != qualifiers.end(); ++it2) { + const std::string& qualifier = *it2; + req_size += family.size() + qualifier.size(); + row_mu->DeleteColumn(family, qualifier, ts); + } } - tera::ErrorCode err; - tera::ResultStream* result = table_->Scan(scan_desp, &err); - if (result == NULL) { - std::cerr << "fail to scan: " << tera::strerr(err); - return; + } + + write_marker_.CheckPending(); + write_marker_.CheckLimit(); + write_marker_.OnReceive(req_size); + pending_num_.Inc(); + + if (type == ASYNC) { + int64_t req_time = Now(); + Context* ctx = new Context(this, req_size, req_time); + row_mu->SetCallBack(sdk_write_callback); + row_mu->SetContext(ctx); + table_->ApplyMutation(row_mu); + } else { + sync_mutations_.push_back(row_mu); + sync_req_sizes_.push_back(req_size); + if (sync_mutations_.size() >= static_cast(FLAGS_batch_count)) { + CommitSyncWrite(); } + } +} - uint64_t count = 0; - while (!result->Done()) { - if (print) { - std::cerr << count++ << "\t" << result->RowName() << "\t" << - result->Family() << "\t" << result->Qualifier() << "\t" << - result->Timestamp() << "\t" << result->Value() << std::endl; - } - size_t size = result->RowName().size() + result->Family().size() - + result->Qualifier().size() + sizeof(result->Timestamp()) - + result->Value().size(); - scan_marker_.OnFinish(size, 0); - scan_marker_.OnSuccess(size, 0); - result->Next(); +void Adapter::Scan(const std::string& start_key, const std::string& end_key, + const std::vector& cf_list, bool print, bool is_async) { + tera::ScanDescriptor scan_desp(start_key); + scan_desp.SetEnd(end_key); + scan_desp.SetBufferSize(FLAGS_buf_size); + for (size_t i = 0; i < cf_list.size(); i++) { + scan_desp.AddColumnFamily(cf_list[i]); + } + tera::ErrorCode err; + tera::ResultStream* result = table_->Scan(scan_desp, &err); + if (result == NULL) { + std::cerr << "fail to scan: " << tera::strerr(err); + return; + } + + uint64_t count = 0; + while (!result->Done()) { + if (print) { + std::cerr << count++ << "\t" << result->RowName() << "\t" << result->Family() << "\t" + << result->Qualifier() << "\t" << result->Timestamp() << "\t" << result->Value() + << std::endl; } - delete result; + size_t size = result->RowName().size() + result->Family().size() + result->Qualifier().size() + + sizeof(result->Timestamp()) + result->Value().size(); + scan_marker_.OnFinish(size, 0); + scan_marker_.OnSuccess(size, 0); + result->Next(); + } + delete result; } void Adapter::WaitComplete() { - pthread_mutex_lock(&mutex_); - while (0 != pending_num_.Get()) { - pthread_cond_wait(&cond_, &mutex_); - } - pthread_mutex_unlock(&mutex_); + pthread_mutex_lock(&mutex_); + while (0 != pending_num_.Get()) { + pthread_cond_wait(&cond_, &mutex_); + } + pthread_mutex_unlock(&mutex_); } void add_checksum(const std::string& rowkey, const std::string& family, const std::string& qualifier, std::string* value) { - uint32_t crc = 0; - crc = leveldb::crc32c::Extend(crc, rowkey.data(), rowkey.size()); - crc = leveldb::crc32c::Extend(crc, family.data(), family.size()); - crc = leveldb::crc32c::Extend(crc, qualifier.data(), qualifier.size()); - crc = leveldb::crc32c::Extend(crc, value->data(), value->size()); - value->append((char*)&crc, sizeof(uint32_t)); + uint32_t crc = 0; + crc = leveldb::crc32c::Extend(crc, rowkey.data(), rowkey.size()); + crc = leveldb::crc32c::Extend(crc, family.data(), family.size()); + crc = leveldb::crc32c::Extend(crc, qualifier.data(), qualifier.size()); + crc = leveldb::crc32c::Extend(crc, value->data(), value->size()); + value->append((char*)&crc, sizeof(uint32_t)); } -void remove_checksum(std::string* value) { - value->resize(value->size() - sizeof(uint32_t)); -} +void remove_checksum(std::string* value) { value->resize(value->size() - sizeof(uint32_t)); } bool verify_checksum(const std::string& rowkey, const std::string& family, const std::string& qualifier, const std::string& value) { - uint32_t crc = 0; - crc = leveldb::crc32c::Extend(crc, rowkey.data(), rowkey.size()); - crc = leveldb::crc32c::Extend(crc, family.data(), family.size()); - crc = leveldb::crc32c::Extend(crc, qualifier.data(), qualifier.size()); - crc = leveldb::crc32c::Extend(crc, value.data(), value.size() - sizeof(uint32_t)); - return crc == *(uint32_t*)(value.data() + value.size() - sizeof(uint32_t)); + uint32_t crc = 0; + crc = leveldb::crc32c::Extend(crc, rowkey.data(), rowkey.size()); + crc = leveldb::crc32c::Extend(crc, family.data(), family.size()); + crc = leveldb::crc32c::Extend(crc, qualifier.data(), qualifier.size()); + crc = leveldb::crc32c::Extend(crc, value.data(), value.size() - sizeof(uint32_t)); + return crc == *(uint32_t*)(value.data() + value.size() - sizeof(uint32_t)); } /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/benchmark/mark.h b/src/benchmark/mark.h index ec5099eb5..aa9b58710 100644 --- a/src/benchmark/mark.h +++ b/src/benchmark/mark.h @@ -4,14 +4,15 @@ // // Author likang01(com@baidu.com) -#ifndef TERA_BENCHMARK_MARK_H_ -#define TERA_BENCHMARK_MARK_H_ +#ifndef TERA_BENCHMARK_MARK_H_ +#define TERA_BENCHMARK_MARK_H_ #include #include #include #include #include +#include #include #include @@ -26,375 +27,260 @@ DECLARE_int64(max_outflow); DECLARE_int64(max_rate); DECLARE_int64(batch_count); -enum MODE { - WRITE = 1, - READ = 2, - SCAN = 3, - MIX = 4, - DELETE = 5 -}; +enum MODE { WRITE = 1, READ = 2, SCAN = 3, MIX = 4, DELETE = 5, BATCH_WRITE = 6 }; extern int mode; -enum TYPE { - SYNC = 1, - ASYNC = 2 -}; +enum TYPE { SYNC = 1, ASYNC = 2 }; extern int type; -enum OP { - NONE= 0, - PUT = 1, - GET = 2, - SCN = 3, - DEL = 4, - PIF = 5 -}; +enum OP { NONE = 0, PUT = 1, GET = 2, SCN = 3, DEL = 4, PIF = 5, BPUT = 6 }; int64_t Now(); class Marker { -public: - Marker(uint32_t max_latency) - : latency_limit_(max_latency), - operation_count_(0), - total_latency_(0), - min_latency_(0) { - latency_vector_ = new uint64_t[max_latency + 1]; - memset(latency_vector_, 0, (max_latency + 1) * sizeof(uint64_t)); - for (int i = 0; i < 11; i++) { - m_ten_percentile_latency[i] = 0; - m_ten_percentile_latency_count_sum[i] = 0; - } - } + public: + Marker(uint32_t max_latency) : latency_limit_(max_latency) {} - ~Marker() { - delete[] latency_vector_; + void AddLatency(uint32_t latency) { + if (latency > latency_limit_) { + latency = latency_limit_; } + hist_.Add((double)latency); + } - void AddLatency(uint32_t latency) { - if (latency > latency_limit_) { - latency = latency_limit_; - } - MutexLock lock(&mutex_); - latency_vector_[latency]++; - operation_count_++; - total_latency_ += latency; - if (operation_count_ == 1) { - min_latency_ = latency; - } - if (min_latency_ > latency) { - min_latency_ = latency; - } - for (int i = 1; i < 11; i++) { - if (m_ten_percentile_latency[i] < latency) { - MoveTenPercentileLatencyRight(i); - } else if (m_ten_percentile_latency[i] > latency) { - m_ten_percentile_latency_count_sum[i]++; - MoveTenPercentileLatencyLeft(i); - } - } - } + uint32_t MinLatency() { return PercentileLatency(0); } - uint32_t MinLatency() { - return PercentileLatency(0); - } + uint32_t MaxLatency() { return PercentileLatency(100); } - uint32_t MaxLatency() { - return PercentileLatency(100); - } + void Clear() { hist_.Clear(); } - double AverageLatency() { - if (operation_count_ == 0) { - return 0; - } - MutexLock lock(&mutex_); - return (double)total_latency_ / operation_count_; - } + double AverageLatency() { return hist_.Average(); } - uint32_t PercentileLatency(uint32_t percentile) { - MutexLock lock(&mutex_); - if (percentile > 100) { - percentile = 100; - } - if (percentile == 0) { - return min_latency_; - } - if (percentile % 10 == 0) { - return m_ten_percentile_latency[percentile / 10]; - } - return NormalPercentileLatency(percentile); - } + uint32_t PercentileLatency(uint32_t percentile) { + return (uint32_t)hist_.Percentile((double)percentile); + } -private: - uint32_t NormalPercentileLatency(uint32_t percentile) { - uint64_t percentile_operation_count = percentile * operation_count_ / 100; - int ten_percentile = percentile / 10 + 1; - uint32_t latency = m_ten_percentile_latency[ten_percentile]; - if (percentile_operation_count == 0) { - return latency; - } - uint64_t count_sum = m_ten_percentile_latency_count_sum[ten_percentile]; - while (count_sum >= percentile_operation_count) { - latency--; - while (latency_vector_[latency] == 0) { - latency--; - } - count_sum -= latency_vector_[latency]; - } - return latency; - } + private: + const uint32_t latency_limit_; + leveldb::Histogram hist_; +}; - void MoveTenPercentileLatencyRight(int ten_percentile) { - uint64_t percentile_operation_count = ten_percentile * operation_count_ / 10; - uint32_t latency = m_ten_percentile_latency[ten_percentile]; - while (m_ten_percentile_latency_count_sum[ten_percentile] - + latency_vector_[latency] - < percentile_operation_count) { - m_ten_percentile_latency_count_sum[ten_percentile] += latency_vector_[latency]; - latency++; - while (latency_vector_[latency] == 0) { - latency++; - } - } - m_ten_percentile_latency[ten_percentile] = latency; +class Statistic { + public: + Statistic(int opt) + : opt_(opt), + last_send_size_(0), + last_send_time_(0), + last_total_count_(0), + last_total_size_(0), + last_finish_count_(0), + last_finish_size_(0), + last_success_count_(0), + last_success_size_(0), + last_conflict_count_(0), + last_conflict_size_(0), + finish_marker_(1000000), + success_marker_(1000000), + conflict_marker_(1000000) {} + + int GetOpt() { return opt_; } + + void GetStatistic(int64_t* total_count, int64_t* total_size, int64_t* finish_count, + int64_t* finish_size, int64_t* success_count, int64_t* success_size, + int64_t* conflict_count, int64_t* conflict_size) { + *total_count = last_total_count_ = total_count_.Get(); + *total_size = last_total_size_ = total_size_.Get(); + *finish_count = last_finish_count_ = finish_count_.Get(); + *finish_size = last_finish_size_ = finish_size_.Get(); + *success_count = last_success_count_ = success_count_.Get(); + *success_size = last_success_size_ = success_size_.Get(); + *conflict_count = last_conflict_count_ = conflict_count_.Get(); + *conflict_size = last_conflict_size_ = conflict_size_.Get(); + } + + void GetLastStatistic(int64_t* total_count, int64_t* total_size, int64_t* finish_count, + int64_t* finish_size, int64_t* success_count, int64_t* success_size, + int64_t* conflict_count, int64_t* conflict_size) { + *total_count = last_total_count_; + *total_size = last_total_size_; + *finish_count = last_finish_count_; + *finish_size = last_finish_size_; + *success_count = last_success_count_; + *success_size = last_success_size_; + *conflict_count = last_conflict_count_; + *conflict_size = last_conflict_size_; + } + + Marker* GetFinishMarker() { return &finish_marker_; } + + Marker* GetSuccessMarker() { return &success_marker_; } + + Marker* GetConflictMarker() { return &conflict_marker_; } + + void OnReceive(size_t size) { + last_send_time_ = Now(); + last_send_size_ = size; + total_count_.Inc(); + total_size_.Add(size); + } + + void OnFinish(size_t size, uint32_t latency) { + finish_count_.Inc(); + finish_size_.Add(size); + finish_marker_.AddLatency(latency); + } + + void OnSuccess(size_t size, uint32_t latency) { + success_count_.Inc(); + success_size_.Add(size); + success_marker_.AddLatency(latency); + } + + void OnConflict(size_t size, uint32_t latency) { + conflict_count_.Inc(); + conflict_size_.Add(size); + conflict_marker_.AddLatency(latency); + } + + void CheckPending() { + int64_t max_pend_count = FLAGS_pend_count; + int64_t max_pend_size = FLAGS_pend_size << 20; + while (total_count_.Get() - finish_count_.Get() > max_pend_count) { + usleep(1000); } - - void MoveTenPercentileLatencyLeft(int ten_percentile) { - uint64_t percentile_operation_count = ten_percentile * operation_count_ / 10; - if (percentile_operation_count == 0) { - return; - } - uint32_t latency = m_ten_percentile_latency[ten_percentile]; - while (m_ten_percentile_latency_count_sum[ten_percentile] - >= percentile_operation_count) { - latency--; - while (latency_vector_[latency] == 0) { - latency--; - } - m_ten_percentile_latency_count_sum[ten_percentile] -= latency_vector_[latency]; - } - m_ten_percentile_latency[ten_percentile] = latency; + while (total_size_.Get() - finish_size_.Get() > max_pend_size) { + usleep(1000); } - -private: - const uint32_t latency_limit_; - - uint64_t operation_count_; - uint64_t total_latency_; - uint32_t min_latency_; - uint64_t* latency_vector_; - - uint32_t m_ten_percentile_latency[11]; // 0, 10, 20, ..., 90, 100 - uint64_t m_ten_percentile_latency_count_sum[11]; - - mutable Mutex mutex_; + } + + void CheckLimit() { + int64_t max_outflow = FLAGS_max_outflow << 20; + int64_t max_rate = FLAGS_max_rate; + if (max_outflow > 0) { + int64_t sleep_micros = + (int64_t)(last_send_time_ + (double)last_send_size_ * 1000000.0 / max_outflow - Now()); + if (sleep_micros > 0) { + usleep(sleep_micros); + } + } + if (max_rate > 0) { + int64_t sleep_micros = (int64_t)(last_send_time_ + (double)1000000.0 / max_rate - Now()); + if (sleep_micros > 0) { + usleep(sleep_micros); + } + } + } + + void Clear() { + finish_marker_.Clear(); + success_marker_.Clear(); + conflict_marker_.Clear(); + } + + private: + int opt_; + + tera::Counter total_count_; + tera::Counter total_size_; + tera::Counter finish_count_; + tera::Counter finish_size_; + tera::Counter success_count_; + tera::Counter success_size_; + tera::Counter conflict_count_; + tera::Counter conflict_size_; + + size_t last_send_size_; + int64_t last_send_time_; + + int64_t last_total_count_; + int64_t last_total_size_; + int64_t last_finish_count_; + int64_t last_finish_size_; + int64_t last_success_count_; + int64_t last_success_size_; + int64_t last_conflict_count_; + int64_t last_conflict_size_; + + Marker finish_marker_; + Marker success_marker_; + Marker conflict_marker_; }; -class Statistic { -public: - Statistic(int opt) - : opt_(opt), - last_send_size_(0), - last_send_time_(0), - last_total_count_(0), - last_total_size_(0), - last_finish_count_(0), - last_finish_size_(0), - last_success_count_(0), - last_success_size_(0), - last_conflict_count_(0), - last_conflict_size_(0), - finish_marker_(1000000), - success_marker_(1000000), - conflict_marker_(1000000) {} - - int GetOpt() { - return opt_; - } +class Adapter { + public: + Adapter(tera::Table* table); + ~Adapter(); - void GetStatistic(int64_t* total_count, int64_t* total_size, - int64_t* finish_count, int64_t* finish_size, - int64_t* success_count, int64_t* success_size, - int64_t* conflict_count, int64_t* conflict_size) { - *total_count = last_total_count_ = total_count_.Get(); - *total_size = last_total_size_ = total_size_.Get(); - *finish_count = last_finish_count_ = finish_count_.Get(); - *finish_size = last_finish_size_ = finish_size_.Get(); - *success_count = last_success_count_ = success_count_.Get(); - *success_size = last_success_size_ = success_size_.Get(); - *conflict_count = last_conflict_count_ = conflict_count_.Get(); - *conflict_size = last_conflict_size_ = conflict_size_.Get(); - } + void Write(int opt, const std::string& row, std::map >& column, + uint64_t timestamp, std::string& value); - void GetLastStatistic(int64_t* total_count, int64_t* total_size, - int64_t* finish_count, int64_t* finish_size, - int64_t* success_count, int64_t* success_size, - int64_t* conflict_count, int64_t* conflict_size) { - *total_count = last_total_count_; - *total_size = last_total_size_; - *finish_count = last_finish_count_; - *finish_size = last_finish_size_; - *success_count = last_success_count_; - *success_size = last_success_size_; - *conflict_count = last_conflict_count_; - *conflict_size = last_conflict_size_; - } + void CommitSyncWrite(); - Marker* GetFinishMarker() { - return &finish_marker_; - } + void WriteCallback(tera::RowMutation* batch_mu, size_t req_size, int64_t req_time); - Marker* GetSuccessMarker() { - return &success_marker_; - } + struct Cell { + std::string row; + std::string family; + std::string qualifier; + std::string value; + int64_t timestamp; + }; + struct Batch { + int64_t size; + std::vector cells; + }; + void BatchWrite(int opt, const std::string& row, + std::map >& column, uint64_t timestamp, + std::string& value, bool last_call); - Marker* GetConflictMarker() { - return &conflict_marker_; - } + void BatchWriteCallback(tera::BatchMutation* row_mu, size_t req_size, int64_t req_time); - void OnReceive(size_t size) { - last_send_time_ = Now(); - last_send_size_ = size; - total_count_.Inc(); - total_size_.Add(size); - } + void Read(const std::string& row, const std::map >& column, + uint64_t largest_ts, uint64_t smallest_ts); + void CommitSyncRead(); + void ReadCallback(tera::RowReader* reader, size_t req_size, int64_t req_time); - void OnFinish(size_t size, uint32_t latency) { - finish_count_.Inc(); - finish_size_.Add(size); - finish_marker_.AddLatency(latency); - } + void Delete(const std::string& row, std::map >& column, + uint64_t ts); - void OnSuccess(size_t size, uint32_t latency) { - success_count_.Inc(); - success_size_.Add(size); - success_marker_.AddLatency(latency); - } + void Scan(const std::string& start_key, const std::string& end_key, + const std::vector& cf_list, bool print = false, bool is_async = false); - void OnConflict(size_t size, uint32_t latency) { - conflict_count_.Inc(); - conflict_size_.Add(size); - conflict_marker_.AddLatency(latency); - } + void WaitComplete(); - void CheckPending() { - int64_t max_pend_count = FLAGS_pend_count; - int64_t max_pend_size = FLAGS_pend_size << 20; - while (total_count_.Get() - finish_count_.Get() > max_pend_count) { - usleep(1000); - } - while (total_size_.Get() - finish_size_.Get() > max_pend_size) { - usleep(1000); - } - } + Statistic* GetWriteMarker() { return &write_marker_; } - void CheckLimit() { - int64_t max_outflow = FLAGS_max_outflow << 20; - int64_t max_rate = FLAGS_max_rate; - if (max_outflow > 0) { - int64_t sleep_micros = - (int64_t)(last_send_time_ + - (double)last_send_size_ * 1000000.0 / max_outflow - Now()); - if (sleep_micros > 0) { - usleep(sleep_micros); - } - } - if (max_rate > 0) { - int64_t sleep_micros = - (int64_t)(last_send_time_ + (double)1000000.0 / max_rate - Now()); - if (sleep_micros > 0) { - usleep(sleep_micros); - } - } - } + Statistic* GetReadMarker() { return &read_marker_; } -private: - int opt_; - - tera::Counter total_count_; - tera::Counter total_size_; - tera::Counter finish_count_; - tera::Counter finish_size_; - tera::Counter success_count_; - tera::Counter success_size_; - tera::Counter conflict_count_; - tera::Counter conflict_size_; - - size_t last_send_size_; - int64_t last_send_time_; - - int64_t last_total_count_; - int64_t last_total_size_; - int64_t last_finish_count_; - int64_t last_finish_size_; - int64_t last_success_count_; - int64_t last_success_size_; - int64_t last_conflict_count_; - int64_t last_conflict_size_; - - Marker finish_marker_; - Marker success_marker_; - Marker conflict_marker_; -}; + Statistic* GetScanMarker() { return &scan_marker_; } -class Adapter { -public: - Adapter(tera::Table* table); - ~Adapter(); - - void Write(int opt, const std::string& row, - std::map >& column, - uint64_t timestamp, - std::string& value); - void CommitSyncWrite(); - void WriteCallback(tera::RowMutation* row_mu, - size_t req_size, - int64_t req_time); - - void Read(const std::string& row, - const std::map >& column, - uint64_t largest_ts, uint64_t smallest_ts); - void CommitSyncRead(); - void ReadCallback(tera::RowReader* reader, - size_t req_size, - int64_t req_time); - - void Delete(const std::string& row, - std::map >& column, - uint64_t ts); - - void Scan(const std::string& start_key, - const std::string& end_key, - const std::vector& cf_list, - bool print = false, bool is_async = false); - - void WaitComplete(); - - Statistic* GetWriteMarker() { - return &write_marker_; - } + Statistic* GetBatchWriteMarker() { return &batch_write_marker_; } - Statistic* GetReadMarker() { - return &read_marker_; - } + void Clear() { + write_marker_.Clear(); + read_marker_.Clear(); + scan_marker_.Clear(); + batch_write_marker_.Clear(); + } - Statistic* GetScanMarker() { - return &scan_marker_; - } + private: + tera::Counter pending_num_; + pthread_mutex_t mutex_; + pthread_cond_t cond_; + tera::Table* table_; -private: - tera::Counter pending_num_; - pthread_mutex_t mutex_; - pthread_cond_t cond_; - tera::Table* table_; + Statistic write_marker_; + Statistic read_marker_; + Statistic scan_marker_; + Statistic batch_write_marker_; - Statistic write_marker_; - Statistic read_marker_; - Statistic scan_marker_; + std::vector sync_mutations_; + std::vector sync_readers_; + std::vector sync_req_sizes_; - std::vector sync_mutations_; - std::vector sync_readers_; - std::vector sync_req_sizes_; + // for batch mutation, clear after ApplyMutation(BatchMutation); + // In Adapter, only one BatchMutation at the same time; + Batch* one_batch_; }; void add_checksum(const std::string& rowkey, const std::string& family, diff --git a/src/benchmark/mark_main.cc b/src/benchmark/mark_main.cc index dd57af93a..911e93c8d 100644 --- a/src/benchmark/mark_main.cc +++ b/src/benchmark/mark_main.cc @@ -4,6 +4,7 @@ // // Author likang01(com@baidu.com) +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include "benchmark/mark.h" #include "types.h" @@ -19,7 +21,7 @@ DECLARE_string(flagfile); DEFINE_string(tablename, "", "table_name"); -DEFINE_string(mode, "w", "mode [w|r|s|m]"); +DEFINE_string(mode, "w", "mode [w|r|s|m|bw]"); DEFINE_string(type, "async", "type [sync|async]"); DEFINE_int64(pend_size, 100, "max_pending_size"); DEFINE_int64(pend_count, 100000, "max_pending_count"); @@ -34,657 +36,674 @@ DEFINE_int64(max_rate, -1, "max_rate"); DEFINE_bool(scan_streaming, false, "enable streaming scan"); DEFINE_int64(batch_count, 1, "batch_count(sync)"); DEFINE_int64(entry_limit, 0, "writing/reading speed limit"); +DEFINE_int64(batch_size, 2, "one batch_mutation data size max limit"); int mode = 0; int type = 0; volatile bool is_quit = false; -bool parse_row(const char* buffer, ssize_t size, - int* op, std::string* row, - std::map >* column, - uint64_t* largest_ts, uint64_t* smallest_ts, - std::string* value) { - if (size <= 0) { - return false; - } - const char* end = buffer + size; - - // parse operation - if (mode == MIX) { - const char* delim = strchr(buffer, '\t'); - if (buffer == delim || end - 1 <= delim || NULL == delim) { - return false; - } - if (3 != delim - buffer) { - return false; - } - if (strncmp(buffer, "GET", 3) == 0) { - *op = GET; - } else if (strncmp(buffer, "PUT", 3) == 0) { - *op = PUT; - } else if (strncmp(buffer, "PIF", 3) == 0) { - *op = PIF; - } else { - return false; - } - buffer = delim + 1; - } +bool parse_row(const char* buffer, ssize_t size, int* op, std::string* row, + std::map >* column, uint64_t* largest_ts, + uint64_t* smallest_ts, std::string* value) { + if (size <= 0) { + return false; + } + const char* end = buffer + size; - // parse row_key + // parse operation + if (mode == MIX) { const char* delim = strchr(buffer, '\t'); - if (buffer == delim || end - 1 == delim) { - return false; - } - if (NULL == delim || end < delim) { - delim = end; - } - row->assign(buffer, delim - buffer); - if ((delim == end && mode != WRITE && - (mode != MIX || (*op != PUT && *op != PIF))) - ||(delim == end && mode == DELETE)) { - return true; - } - - // parse value - if (mode == WRITE || (mode == MIX && (*op == PUT || *op == PIF))) { - if (delim == end) { - return false; - } - buffer = delim + 1; - delim = strchr(buffer, '\t'); - if (buffer == delim || end - 1 == delim) { - return false; - } - if (NULL == delim || end < delim) { - delim = end; - } - value->assign(buffer, delim - buffer); - if (delim == end) { - return true; - } + if (buffer == delim || end - 1 <= delim || NULL == delim) { + return false; + } + if (3 != delim - buffer) { + return false; + } + if (strncmp(buffer, "GET", 3) == 0) { + *op = GET; + } else if (strncmp(buffer, "PUT", 3) == 0) { + *op = PUT; + } else if (strncmp(buffer, "PIF", 3) == 0) { + *op = PIF; + } else { + return false; } + buffer = delim + 1; + } - // parse family:qualifier + // parse row_key + const char* delim = strchr(buffer, '\t'); + if (buffer == delim || end - 1 == delim) { + return false; + } + if (NULL == delim || end < delim) { + delim = end; + } + row->assign(buffer, delim - buffer); + if (delim == end) { + if (mode == DELETE) { + return true; + } + if (mode == WRITE || mode == BPUT || (mode == MIX && (*op == PUT || *op == PIF))) { + return false; + } + } + + // parse value + if (mode == WRITE || mode == BPUT || (mode == MIX && (*op == PUT || *op == PIF))) { buffer = delim + 1; delim = strchr(buffer, '\t'); if (buffer == delim || end - 1 == delim) { - return false; + return false; } if (NULL == delim || end < delim) { - delim = end; - } - column->clear(); - const char* column_buffer = buffer; - if (column_buffer + 1 == delim && *column_buffer == ';') { - // read whole row - column_buffer = delim; - } - while (column_buffer < delim) { - const char* semicolon = strchr(column_buffer, ';'); - if (semicolon == column_buffer || semicolon == delim - 1) { - return false; - } - if (NULL == semicolon || semicolon >= delim) { - semicolon = delim; - } - const char* colon = strchr(column_buffer, ':'); - if (colon == column_buffer) { - return false; - } - if (NULL == colon || colon >= semicolon) { - colon = semicolon; - } - std::string family(column_buffer, colon - column_buffer); - (*column)[family]; - const char* qualifier_buffer = colon + 1; - while (qualifier_buffer <= semicolon) { - const char* comma = strchr(qualifier_buffer, ','); - if (comma == NULL || comma >= semicolon) { - comma = semicolon; - } - std::set& qualifiers = (*column)[family]; - std::string qualifier(qualifier_buffer, comma - qualifier_buffer); - qualifiers.insert(qualifier); - qualifier_buffer = comma + 1; - } - column_buffer = semicolon + 1; + delim = end; } + value->assign(buffer, delim - buffer); if (delim == end) { - return true; + return true; } + } - // parse largest timestamp - buffer = delim + 1; - delim = strchr(buffer, '\t'); - if (NULL != delim && end > delim) { - return false; - } - const char* comma = strchr(buffer, ','); - if (NULL == comma || comma >= end) { - comma = end; - } - if (comma > buffer) { - std::string time_str(buffer, comma - buffer); - char* end_time_ptr = NULL; - uint64_t time = strtoll(time_str.c_str(), &end_time_ptr, 10); - if (*end_time_ptr != '\0') { - return false; - } - *largest_ts = time; - } - if (comma == end) { - return true; - } else if (mode == WRITE || (mode == MIX && (*op == PUT || *op == PIF))) { - return false; - } + // parse family:qualifier + buffer = delim + 1; + delim = strchr(buffer, '\t'); + if (buffer == delim || end - 1 == delim) { + return false; + } + if (NULL == delim || end < delim) { + delim = end; + } + column->clear(); + const char* column_buffer = buffer; + if (column_buffer + 1 == delim && *column_buffer == ';') { + // read whole row + column_buffer = delim; + } + while (column_buffer < delim) { + const char* semicolon = strchr(column_buffer, ';'); + if (semicolon == column_buffer || semicolon == delim - 1) { + return false; + } + if (NULL == semicolon || semicolon >= delim) { + semicolon = delim; + } + const char* colon = strchr(column_buffer, ':'); + if (colon == column_buffer) { + return false; + } + if (NULL == colon || colon >= semicolon) { + colon = semicolon; + } + std::string family(column_buffer, colon - column_buffer); + (*column)[family]; + const char* qualifier_buffer = colon + 1; + while (qualifier_buffer <= semicolon) { + const char* comma = strchr(qualifier_buffer, ','); + if (comma == NULL || comma >= semicolon) { + comma = semicolon; + } + std::set& qualifiers = (*column)[family]; + std::string qualifier(qualifier_buffer, comma - qualifier_buffer); + qualifiers.insert(qualifier); + qualifier_buffer = comma + 1; + } + column_buffer = semicolon + 1; + } + if (delim == end) { + return true; + } - // parse smallest timestamp - buffer = comma + 1; - if (end > buffer) { - std::string time_str(buffer, end - buffer); - char* end_time_ptr = NULL; - uint64_t time = strtoll(time_str.c_str(), &end_time_ptr, 10); - if (*end_time_ptr != '\0') { - return false; - } - *smallest_ts = time; - } + // parse largest timestamp + buffer = delim + 1; + delim = strchr(buffer, '\t'); + if (NULL != delim && end > delim) { + return false; + } + const char* comma = strchr(buffer, ','); + if (NULL == comma || comma >= end) { + comma = end; + } + if (comma > buffer) { + std::string time_str(buffer, comma - buffer); + char* end_time_ptr = NULL; + uint64_t time = strtoll(time_str.c_str(), &end_time_ptr, 10); + if (*end_time_ptr != '\0') { + return false; + } + *largest_ts = time; + } + if (comma == end) { return true; + } else if (mode == WRITE || mode == BPUT || (mode == MIX && (*op == PUT || *op == PIF))) { + return false; + } + + // parse smallest timestamp + buffer = comma + 1; + if (end > buffer) { + std::string time_str(buffer, end - buffer); + char* end_time_ptr = NULL; + uint64_t time = strtoll(time_str.c_str(), &end_time_ptr, 10); + if (*end_time_ptr != '\0') { + return false; + } + *smallest_ts = time; + } + return true; } -bool get_next_row(int* op, std::string* row, - std::map >* column, - uint64_t* largest_ts, uint64_t* smallest_ts, - std::string* value) { - static size_t n = 10240; - static char* buffer = new char[n]; - - ssize_t line_size = 0; - while ((line_size = getline(&buffer, &n, stdin)) != -1) { - if (line_size > 0 && buffer[line_size - 1] == '\n') { - line_size--; - } - if (line_size < 3) { - std::cerr << "ignore empty line" << std::endl; - continue; - } - if (!parse_row(buffer, line_size, op, row, column, - largest_ts, smallest_ts, value)) { - std::cerr << "ignore invalid line: " << buffer << std::endl; - continue; - } - return true; +bool get_next_row(int* op, std::string* row, std::map >* column, + uint64_t* largest_ts, uint64_t* smallest_ts, std::string* value) { + static size_t n = 10240; + static char* buffer = new char[n]; + + ssize_t line_size = 0; + while ((line_size = getline(&buffer, &n, stdin)) != -1) { + if (line_size > 0 && buffer[line_size - 1] == '\n') { + line_size--; } - return false; + if (line_size < 3) { + std::cerr << "ignore empty line" << std::endl; + continue; + } + if (!parse_row(buffer, line_size, op, row, column, largest_ts, smallest_ts, value)) { + std::cerr << "ignore invalid line: " << buffer << std::endl; + continue; + } + return true; + } + return false; } void print_header() { - std::cout << "HH:MM:SS OPT\t"; - if (mode != SCAN && type == ASYNC) { - std::cout << "SENT [total/speed]\t\t"; - } - std::cout << "FINISH [total/speed]\t\t"; - std::cout << "SUCCESS [total/speed]\t\t"; - std::cout << "CONFLICT [total/speed]\t\t"; - if (mode != SCAN && type == ASYNC) { - std::cout << "PENDING [count]"; - } - std::cout << std::endl; + std::cout << "HH:MM:SS OPT\t"; + if (mode != SCAN && type == ASYNC) { + std::cout << "SENT [total/speed]\t\t"; + } + std::cout << "FINISH [total/speed]\t\t"; + std::cout << "SUCCESS [total/speed]\t\t"; + std::cout << "CONFLICT [total/speed]\t\t"; + if (mode != SCAN && type == ASYNC) { + std::cout << "PENDING [count]"; + } + std::cout << std::endl; } void print_time() { - struct timeval now; - gettimeofday(&now, NULL); - struct tm now_tm; - localtime_r(&now.tv_sec, &now_tm); - std::cout << std::setfill('0') << std::setw(2) << now_tm.tm_hour << ":" - << std::setfill('0') << std::setw(2) << now_tm.tm_min << ":" - << std::setfill('0') << std::setw(2) << now_tm.tm_sec; + struct timeval now; + gettimeofday(&now, NULL); + struct tm now_tm; + localtime_r(&now.tv_sec, &now_tm); + std::cout << std::setfill('0') << std::setw(2) << now_tm.tm_hour << ":" << std::setfill('0') + << std::setw(2) << now_tm.tm_min << ":" << std::setfill('0') << std::setw(2) + << now_tm.tm_sec; } void print_opt(Statistic* statistic) { - int opt = statistic->GetOpt(); - switch (opt) { + int opt = statistic->GetOpt(); + switch (opt) { case PUT: - std::cout << "PUT"; - break; + std::cout << "PUT"; + break; case GET: - std::cout << "GET"; - break; + std::cout << "GET"; + break; case SCN: - std::cout << "SCN"; - break; + std::cout << "SCN"; + break; + case BPUT: + std::cout << "BPUT"; + break; default: - abort(); - break; - } + abort(); + break; + } } const char unit[] = {'B', 'K', 'M', 'G', 'T', 'P', 'E'}; void print_size_and_count(int64_t size, int64_t count) { - double dsize = (double)size; - int unit_index = 0; - while (dsize > 1024) { - dsize /= 1024; - unit_index++; - } - std::ios::fmtflags cout_flag(std::cout.flags()); - std::cout << std::fixed << std::setprecision(3) << dsize - << unit[unit_index]; - std::cout << "(" << count << ")"; - std::cout.flags(cout_flag); + double dsize = (double)size; + int unit_index = 0; + while (dsize > 1024) { + dsize /= 1024; + unit_index++; + } + std::ios::fmtflags cout_flag(std::cout.flags()); + std::cout << std::fixed << std::setprecision(3) << dsize << unit[unit_index]; + std::cout << "(" << count << ")"; + std::cout.flags(cout_flag); } void print_statistic(Statistic* statistic) { - int64_t old_total_count, old_finish_count, old_success_count, old_conflict_count; - int64_t old_total_size, old_finish_size, old_success_size, old_conflict_size; - statistic->GetLastStatistic(&old_total_count, &old_total_size, - &old_finish_count, &old_finish_size, - &old_success_count, &old_success_size, - &old_conflict_count, &old_conflict_size); - - int64_t new_total_count, new_finish_count, new_success_count, new_conflict_count; - int64_t new_total_size, new_finish_size, new_success_size, new_conflict_size; - statistic->GetStatistic(&new_total_count, &new_total_size, - &new_finish_count, &new_finish_size, - &new_success_count, &new_success_size, - &new_conflict_count, &new_conflict_size); - - int64_t total_count = new_total_count - old_total_count; - int64_t finish_count = new_finish_count - old_finish_count; - int64_t success_count = new_success_count - old_success_count; - int64_t conflict_count = new_conflict_count - old_conflict_count; - int64_t total_size = new_total_size - old_total_size; - int64_t finish_size = new_finish_size - old_finish_size; - int64_t success_size = new_success_size - old_success_size; - int64_t conflict_size = new_conflict_size - old_conflict_size; - - int64_t total_pending_count = new_total_count - new_finish_count; - // scan - if (total_pending_count < 0) { - total_pending_count = 0; - } - - print_time(); - std::cout << " "; - print_opt(statistic); - std::cout << "\t"; - - if (mode != SCAN && type == ASYNC) { - print_size_and_count(new_total_size, new_total_count); - std::cout << "/"; - print_size_and_count(total_size, total_count); - std::cout << "\t\t"; - } - - print_size_and_count(new_finish_size, new_finish_count); - std::cout << "/"; - print_size_and_count(finish_size, finish_count); - std::cout << "\t\t"; - - print_size_and_count(new_success_size, new_success_count); + int64_t old_total_count, old_finish_count, old_success_count, old_conflict_count; + int64_t old_total_size, old_finish_size, old_success_size, old_conflict_size; + statistic->GetLastStatistic(&old_total_count, &old_total_size, &old_finish_count, + &old_finish_size, &old_success_count, &old_success_size, + &old_conflict_count, &old_conflict_size); + + int64_t new_total_count, new_finish_count, new_success_count, new_conflict_count; + int64_t new_total_size, new_finish_size, new_success_size, new_conflict_size; + statistic->GetStatistic(&new_total_count, &new_total_size, &new_finish_count, &new_finish_size, + &new_success_count, &new_success_size, &new_conflict_count, + &new_conflict_size); + + int64_t total_count = new_total_count - old_total_count; + int64_t finish_count = new_finish_count - old_finish_count; + int64_t success_count = new_success_count - old_success_count; + int64_t conflict_count = new_conflict_count - old_conflict_count; + int64_t total_size = new_total_size - old_total_size; + int64_t finish_size = new_finish_size - old_finish_size; + int64_t success_size = new_success_size - old_success_size; + int64_t conflict_size = new_conflict_size - old_conflict_size; + + int64_t total_pending_count = new_total_count - new_finish_count; + // scan + if (total_pending_count < 0) { + total_pending_count = 0; + } + + print_time(); + std::cout << " "; + print_opt(statistic); + std::cout << "\t"; + + if (mode != SCAN && type == ASYNC) { + print_size_and_count(new_total_size, new_total_count); std::cout << "/"; - print_size_and_count(success_size, success_count); + print_size_and_count(total_size, total_count); std::cout << "\t\t"; - - print_size_and_count(new_conflict_size, new_conflict_count); - std::cout << "/"; - print_size_and_count(conflict_size, conflict_count); - std::cout << "\t\t"; - - if (mode != SCAN && type == ASYNC) { - std::cout << total_pending_count; - } - std::cout << std::endl; + } + + print_size_and_count(new_finish_size, new_finish_count); + std::cout << "/"; + print_size_and_count(finish_size, finish_count); + std::cout << "\t\t"; + + print_size_and_count(new_success_size, new_success_count); + std::cout << "/"; + print_size_and_count(success_size, success_count); + std::cout << "\t\t"; + + print_size_and_count(new_conflict_size, new_conflict_count); + std::cout << "/"; + print_size_and_count(conflict_size, conflict_count); + std::cout << "\t\t"; + + if (mode != SCAN && type == ASYNC) { + std::cout << total_pending_count; + } + std::cout << std::endl; } void print_marker(Marker* marker) { - std::cout << "MinLatency: " << marker->MinLatency() << " " - << "AverageLatency: " << marker->AverageLatency() << " " - << "MaxLatency: " << marker->MaxLatency() << "\n" - << "90thPercentileLatency: " << marker->PercentileLatency(90) << " " - << "95thPercentileLatency: " << marker->PercentileLatency(95) << " " - << "99thPercentileLatency: " << marker->PercentileLatency(99) - << std::endl; + std::cout << "MinLatency: " << marker->MinLatency() << " " + << "AverageLatency: " << marker->AverageLatency() << " " + << "MaxLatency: " << marker->MaxLatency() << "\n" + << "90thPercentileLatency: " << marker->PercentileLatency(90) << " " + << "95thPercentileLatency: " << marker->PercentileLatency(95) << " " + << "99thPercentileLatency: " << marker->PercentileLatency(99) << std::endl; } void print_marker(Statistic* statistic) { - std::cout << " [FINISH]" << std::endl; - Marker* finish_marker = statistic->GetFinishMarker(); - print_marker(finish_marker); - std::cout << " [SUCCESS]" << std::endl; - Marker* success_marker = statistic->GetSuccessMarker(); - print_marker(success_marker); - if (statistic->GetOpt() == PUT) { - std::cout << " [CONFLICT]" << std::endl; - Marker* conflict_marker = statistic->GetConflictMarker(); - print_marker(conflict_marker); - } + std::cout << " [FINISH]" << std::endl; + Marker* finish_marker = statistic->GetFinishMarker(); + print_marker(finish_marker); + std::cout << " [SUCCESS]" << std::endl; + Marker* success_marker = statistic->GetSuccessMarker(); + print_marker(success_marker); + if (statistic->GetOpt() == PUT) { + std::cout << " [CONFLICT]" << std::endl; + Marker* conflict_marker = statistic->GetConflictMarker(); + print_marker(conflict_marker); + } } void* print_proc(void* param) { - Adapter* adapter = (Adapter*)param; - usleep(1000000); - int64_t count = 0; - while (!is_quit) { - if (count % 10 == 0) { - std::cout << std::endl; - print_header(); - } + Adapter* adapter = (Adapter*)param; + std::thread t{[=] { + std::this_thread::sleep_for(std::chrono::seconds(120)); + adapter->Clear(); + }}; + t.detach(); + usleep(1000000); + int64_t count = 0; + while (!is_quit) { + if (count % 10 == 0) { + std::cout << std::endl; + print_header(); + } - struct timeval now; - gettimeofday(&now, NULL); - usleep(1000000 - now.tv_usec); + struct timeval now; + gettimeofday(&now, NULL); + usleep(1000000 - now.tv_usec); + + switch (mode) { + case WRITE: + print_statistic(adapter->GetWriteMarker()); + break; + case DELETE: + print_statistic(adapter->GetWriteMarker()); + break; + case READ: + print_statistic(adapter->GetReadMarker()); + break; + case SCAN: + print_statistic(adapter->GetScanMarker()); + break; + case MIX: + print_statistic(adapter->GetWriteMarker()); + print_statistic(adapter->GetReadMarker()); + break; + case BATCH_WRITE: + print_statistic(adapter->GetBatchWriteMarker()); + break; + default: + abort(); + break; + } - switch (mode) { + if (count % 10 == 9) { + std::cout << std::endl; + switch (mode) { case WRITE: - print_statistic(adapter->GetWriteMarker()); - break; + std::cout << "[PUT MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + break; case DELETE: - print_statistic(adapter->GetWriteMarker()); - break; + std::cout << "[DEL MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + break; case READ: - print_statistic(adapter->GetReadMarker()); - break; + std::cout << "[GET MARKER]" << std::endl; + print_marker(adapter->GetReadMarker()); + break; case SCAN: - print_statistic(adapter->GetScanMarker()); - break; + std::cout << "[SCN MARKER]" << std::endl; + print_marker(adapter->GetScanMarker()); + break; case MIX: - print_statistic(adapter->GetWriteMarker()); - print_statistic(adapter->GetReadMarker()); - break; + std::cout << "[PUT MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + std::cout << "[GET MARKER]" << std::endl; + print_marker(adapter->GetReadMarker()); + break; + case BATCH_WRITE: + std::cout << "[BPUT MARKER]" << std::endl; + print_marker(adapter->GetBatchWriteMarker()); + break; default: - abort(); - break; - } - - if (count % 10 == 9) { - std::cout << std::endl; - switch (mode) { - case WRITE: - std::cout << "[PUT MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - break; - case DELETE: - std::cout << "[DEL MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - break; - case READ: - std::cout << "[GET MARKER]" << std::endl; - print_marker(adapter->GetReadMarker()); - break; - case SCAN: - std::cout << "[SCN MARKER]" << std::endl; - print_marker(adapter->GetScanMarker()); - break; - case MIX: - std::cout << "[PUT MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - std::cout << "[GET MARKER]" << std::endl; - print_marker(adapter->GetReadMarker()); - break; - default: - abort(); - break; - } - } - - count++; + abort(); + break; + } } - return NULL; + + count++; + } + return NULL; } void print_summary(Statistic* marker, double duration) { - int64_t total_count, finish_count, success_count, conflict_count; - int64_t total_size, finish_size, success_size, conflict_size; - marker->GetStatistic(&total_count, &total_size, - &finish_count, &finish_size, - &success_count, &success_size, - &conflict_count, &conflict_size); - - print_opt(marker); - std::streamsize precision = std::cout.precision(); - std::ios::fmtflags flag(std::cout.flags()); - std::cout.precision(3); - std::cout << " Summary: " << std::fixed << duration << " s\n" - << " total: " << finish_size << " bytes " - << finish_count << " records " - << (double)finish_size / 1048576 / duration << " MB/s\n" - << " succ: " << success_size << " bytes " - << success_count << " records " - << (double)success_size / 1048576 / duration << " MB/s\n" - << " conflict: " << conflict_size << " bytes " - << conflict_count << " records " - << (double)conflict_size / 1048576 / duration << " MB/s" - << std::endl; - std::cout.precision(precision); - std::cout.flags(flag); + int64_t total_count, finish_count, success_count, conflict_count; + int64_t total_size, finish_size, success_size, conflict_size; + marker->GetStatistic(&total_count, &total_size, &finish_count, &finish_size, &success_count, + &success_size, &conflict_count, &conflict_size); + + print_opt(marker); + std::streamsize precision = std::cout.precision(); + std::ios::fmtflags flag(std::cout.flags()); + std::cout.precision(3); + std::cout << " Summary: " << std::fixed << duration << " s\n" + << " total: " << finish_size << " bytes " << finish_count << " records " + << (double)finish_size / 1048576 / duration << " MB/s\n" + << " succ: " << success_size << " bytes " << success_count << " records " + << (double)success_size / 1048576 / duration << " MB/s\n" + << " conflict: " << conflict_size << " bytes " << conflict_count << " records " + << (double)conflict_size / 1048576 / duration << " MB/s" << std::endl; + std::cout.precision(precision); + std::cout.flags(flag); } void print_summary_proc(Adapter* adapter, double duration) { - switch (mode) { + switch (mode) { case WRITE: - print_summary(adapter->GetWriteMarker(), duration); - break; + print_summary(adapter->GetWriteMarker(), duration); + break; case DELETE: - print_summary(adapter->GetWriteMarker(), duration); - break; + print_summary(adapter->GetWriteMarker(), duration); + break; case READ: - print_summary(adapter->GetReadMarker(), duration); - break; + print_summary(adapter->GetReadMarker(), duration); + break; case SCAN: - print_summary(adapter->GetScanMarker(), duration); - break; + print_summary(adapter->GetScanMarker(), duration); + break; case MIX: - print_summary(adapter->GetWriteMarker(), duration); - print_summary(adapter->GetReadMarker(), duration); - break; + print_summary(adapter->GetWriteMarker(), duration); + print_summary(adapter->GetReadMarker(), duration); + break; + case BATCH_WRITE: + print_summary(adapter->GetBatchWriteMarker(), duration); + break; default: - abort(); - break; - } + abort(); + break; + } - std::cout << std::endl; - switch (mode) { + std::cout << std::endl; + switch (mode) { case WRITE: - std::cout << "[PUT MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - break; + std::cout << "[PUT MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + break; case DELETE: - std::cout << "[DEL MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - break; + std::cout << "[DEL MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + break; case READ: - std::cout << "[GET MARKER]" << std::endl; - print_marker(adapter->GetReadMarker()); - break; + std::cout << "[GET MARKER]" << std::endl; + print_marker(adapter->GetReadMarker()); + break; case SCAN: - std::cout << "[SCN MARKER]" << std::endl; - print_marker(adapter->GetScanMarker()); - break; + std::cout << "[SCN MARKER]" << std::endl; + print_marker(adapter->GetScanMarker()); + break; case MIX: - std::cout << "[PUT MARKER]" << std::endl; - print_marker(adapter->GetWriteMarker()); - std::cout << "[GET MARKER]" << std::endl; - print_marker(adapter->GetReadMarker()); - break; + std::cout << "[PUT MARKER]" << std::endl; + print_marker(adapter->GetWriteMarker()); + std::cout << "[GET MARKER]" << std::endl; + print_marker(adapter->GetReadMarker()); + break; + case BATCH_WRITE: + std::cout << "[BPUT MARKER]" << std::endl; + print_marker(adapter->GetBatchWriteMarker()); + break; default: - abort(); - break; - } + abort(); + break; + } } int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - - if (argc > 1 && strcmp(argv[1], "version") == 0) { - PrintSystemVersion(); - return 0; - } - - tera::ErrorCode err; - tera::Client* client = tera::Client::NewClient("", "tera_mark"); - if (NULL == client) { - std::cerr << "fail to create client: " << tera::strerr(err) << std::endl; - return -1; - } + ::google::ParseCommandLineFlags(&argc, &argv, true); - tera::Table* table = client->OpenTable(FLAGS_tablename, &err); - if (NULL == table) { - std::cerr << "fail to open table: " << tera::strerr(err) << std::endl; - return -1; - } - - std::vector scan_cf_list; - if (FLAGS_mode.compare("w") == 0) { - mode = WRITE; - } else if (FLAGS_mode.compare("r") == 0) { - mode = READ; - } else if (FLAGS_mode.compare("d") == 0) { - mode = DELETE; - } else if (FLAGS_mode.compare("s") == 0) { - mode = SCAN; - size_t delim = 0; - size_t cf_pos = 0; - while (std::string::npos != (delim = FLAGS_cf_list.find(',', cf_pos))) { - if (cf_pos < delim) { - scan_cf_list.push_back(std::string(FLAGS_cf_list, cf_pos, delim - cf_pos)); - } - cf_pos = delim + 1; - } - } else if (FLAGS_mode.compare("m") == 0) { - mode = MIX; - } else { - std::cerr << "unsupport mode: " << FLAGS_mode << std::endl; - return -1; - } - - if (FLAGS_type.compare("sync") == 0) { - type = SYNC; - } else if (FLAGS_type.compare("async") == 0) { - type = ASYNC; - } else { - std::cerr << "unsupport type: " << FLAGS_type << std::endl; - return -1; + if (argc > 1 && strcmp(argv[1], "version") == 0) { + PrintSystemVersion(); + return 0; + } + + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient("", "tera_mark"); + if (NULL == client) { + std::cerr << "fail to create client: " << tera::strerr(err) << std::endl; + return -1; + } + + tera::Table* table = client->OpenTable(FLAGS_tablename, &err); + if (NULL == table) { + std::cerr << "fail to open table: " << tera::strerr(err) << std::endl; + return -1; + } + + std::vector scan_cf_list; + if (FLAGS_mode.compare("w") == 0) { + mode = WRITE; + } else if (FLAGS_mode.compare("r") == 0) { + mode = READ; + } else if (FLAGS_mode.compare("d") == 0) { + mode = DELETE; + } else if (FLAGS_mode.compare("s") == 0) { + mode = SCAN; + size_t delim = 0; + size_t cf_pos = 0; + while (std::string::npos != (delim = FLAGS_cf_list.find(',', cf_pos))) { + if (cf_pos < delim) { + scan_cf_list.push_back(std::string(FLAGS_cf_list, cf_pos, delim - cf_pos)); + } + cf_pos = delim + 1; + } + } else if (FLAGS_mode.compare("m") == 0) { + mode = MIX; + } else if (FLAGS_mode.compare("bw") == 0) { + mode = BATCH_WRITE; + } else { + std::cerr << "unsupport mode: " << FLAGS_mode << std::endl; + return -1; + } + + if (FLAGS_type.compare("sync") == 0) { + type = SYNC; + } else if (FLAGS_type.compare("async") == 0) { + type = ASYNC; + } else { + std::cerr << "unsupport type: " << FLAGS_type << std::endl; + return -1; + } + + Adapter* adapter = new Adapter(table); + + pthread_t print_thread; + if (0 != pthread_create(&print_thread, NULL, &print_proc, adapter)) { + std::cerr << "cannot create thread"; + return -1; + } + + std::cout << "begin ..." << std::endl; + timeval start_time; + gettimeofday(&start_time, NULL); + + int opt = NONE; + std::string row; + std::map > column; + uint64_t largest_ts = tera::kLatestTs; + uint64_t smallest_ts = tera::kOldestTs; + std::string value; + + int last_opt = NONE; + bool finish = false; + int64_t count = 0; + while (true) { + if (FLAGS_entry_limit != 0 && count == FLAGS_entry_limit) { + struct timeval now; + gettimeofday(&now, NULL); + if (1000000 - now.tv_usec > 0) { + usleep(1000000 - now.tv_usec); + } + count = 0; } - - Adapter* adapter = new Adapter(table); - - pthread_t print_thread; - if (0 != pthread_create(&print_thread, NULL, &print_proc, adapter)) { - std::cerr << "cannot create thread"; - return -1; + switch (mode) { + case WRITE: + opt = PUT; + finish = !get_next_row(NULL, &row, &column, &largest_ts, NULL, &value); + break; + case READ: + opt = GET; + finish = !get_next_row(NULL, &row, &column, &largest_ts, &smallest_ts, NULL); + break; + case DELETE: + opt = DEL; + finish = !get_next_row(NULL, &row, &column, &largest_ts, NULL, NULL); + break; + case MIX: + finish = !get_next_row(&opt, &row, &column, &largest_ts, &smallest_ts, &value); + break; + case SCAN: + adapter->Scan(FLAGS_start_key, FLAGS_end_key, scan_cf_list, FLAGS_print, + FLAGS_scan_streaming); + finish = true; + break; + case BATCH_WRITE: + opt = BPUT; + finish = !get_next_row(NULL, &row, &column, &largest_ts, NULL, &value); + break; + default: + abort(); + break; } - std::cout << "begin ..." << std::endl; - timeval start_time; - gettimeofday(&start_time, NULL); - - int opt = NONE; - std::string row; - std::map > column; - uint64_t largest_ts = tera::kLatestTs; - uint64_t smallest_ts = tera::kOldestTs; - std::string value; - - int last_opt = NONE; - bool finish = false; - int64_t count = 0; - while (true) { - if (FLAGS_entry_limit != 0 && count == FLAGS_entry_limit) { - struct timeval now; - gettimeofday(&now, NULL); - if (1000000 - now.tv_usec > 0) { - usleep(1000000 - now.tv_usec); - } - count = 0; + if (finish) { + if (type == SYNC) { + if (mode == WRITE || mode == DELETE || mode == MIX) { + adapter->CommitSyncWrite(); } - switch (mode) { - case WRITE: - opt = PUT; - finish = !get_next_row(NULL, &row, &column, &largest_ts, NULL, &value); - break; - case READ: - opt = GET; - finish = !get_next_row(NULL, &row, &column, &largest_ts, &smallest_ts, NULL); - break; - case DELETE: - opt = DEL; - finish = !get_next_row(NULL, &row, &column, &largest_ts, NULL, NULL); - break; - case MIX: - finish = !get_next_row(&opt, &row, &column, &largest_ts, &smallest_ts, &value); - break; - case SCAN: - adapter->Scan(FLAGS_start_key, FLAGS_end_key, scan_cf_list, FLAGS_print, - FLAGS_scan_streaming); - finish = true; - break; - default: - abort(); - break; + if (mode == READ || mode == MIX) { + adapter->CommitSyncRead(); } + } + break; + } - if (finish) { - if (type == SYNC) { - if (mode == WRITE || mode == DELETE || mode == MIX) { - adapter->CommitSyncWrite(); - } - if (mode == READ || mode == MIX) { - adapter->CommitSyncRead(); - } - } - break; + switch (opt) { + case PUT: + case PIF: + if (type == SYNC && mode == MIX && last_opt == GET) { + adapter->CommitSyncRead(); } - - switch (opt) { - case PUT: - case PIF: - if (type == SYNC && mode == MIX && last_opt == GET) { - adapter->CommitSyncRead(); - } - adapter->Write(opt, row, column, largest_ts, value); - break; - case GET: - if (type == SYNC && mode == MIX && last_opt == PUT) { - adapter->CommitSyncWrite(); - } - adapter->Read(row, column, largest_ts, smallest_ts); - break; - case DEL: - adapter->Delete(row, column, largest_ts); - break; - default: - abort(); - break; + adapter->Write(opt, row, column, largest_ts, value); + break; + case GET: + if (type == SYNC && mode == MIX && last_opt == PUT) { + adapter->CommitSyncWrite(); } - last_opt = opt; - - opt = NONE; - row.clear(); - column.clear(); - largest_ts = tera::kLatestTs; - smallest_ts = tera::kOldestTs; - value.clear(); - count += 1; + adapter->Read(row, column, largest_ts, smallest_ts); + break; + case DEL: + adapter->Delete(row, column, largest_ts); + break; + case BPUT: + adapter->BatchWrite(opt, row, column, largest_ts, value, finish); + break; + default: + abort(); + break; } + last_opt = opt; - std::cout << "wait for completion..." << std::endl; - adapter->WaitComplete(); + opt = NONE; + row.clear(); + column.clear(); + largest_ts = tera::kLatestTs; + smallest_ts = tera::kOldestTs; + value.clear(); + count += 1; + } - timeval finish_time; - gettimeofday(&finish_time, NULL); - double duration = (finish_time.tv_sec - start_time.tv_sec) - + (double)(finish_time.tv_usec - start_time.tv_usec) / 1000000.0; + std::cout << "wait for completion..." << std::endl; + adapter->WaitComplete(); - is_quit = true; - pthread_join(print_thread, NULL); + timeval finish_time; + gettimeofday(&finish_time, NULL); + double duration = (finish_time.tv_sec - start_time.tv_sec) + + (double)(finish_time.tv_usec - start_time.tv_usec) / 1000000.0; - print_summary_proc(adapter, duration); - delete adapter; + is_quit = true; + pthread_join(print_thread, NULL); - usleep(100000); - return 0; + print_summary_proc(adapter, duration); + delete adapter; + + usleep(100000); + return 0; } /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/benchmark/tpcc/data_generator.cc b/src/benchmark/tpcc/data_generator.cc index 8fd76cbe6..cd2b41a25 100644 --- a/src/benchmark/tpcc/data_generator.cc +++ b/src/benchmark/tpcc/data_generator.cc @@ -18,165 +18,169 @@ namespace tpcc { DataGenerator::DataGenerator(RandomGenerator* rand_gen, TpccDb* db) : event_(), - rand_gen_(rand_gen), - db_(db), - now_datatime_(get_curtime_str()), + rand_gen_(rand_gen), + db_(db), + now_datatime_(get_curtime_str()), thread_pool_(FLAGS_tpcc_thread_pool_size) { - for (int i = 0; i < kTpccTableCnt; ++i) { - states_.push_back(std::make_pair(Counter(), Counter())); - } + for (int i = 0; i < kTpccTableCnt; ++i) { + states_.push_back(std::make_pair(Counter(), Counter())); + } } void DataGenerator::PrintJoinTimeoutInfo(int need_cnt, int table_enum_num) { - if (need_cnt > states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { - LOG(ERROR) << "table:" << kTpccTables[table_enum_num] - << "[need/succ/fail]:[" - << need_cnt << "/" - << states_[table_enum_num].first.Get() << "/" - << states_[table_enum_num].first.Get() << "]"; - } + if (need_cnt > states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { + LOG(ERROR) << "table:" << kTpccTables[table_enum_num] << "[need/succ/fail]:[" << need_cnt << "/" + << states_[table_enum_num].first.Get() << "/" << states_[table_enum_num].first.Get() + << "]"; + } } void DataGenerator::Join() { - event_.Trigger(); - if (!event_.TimeWait(FLAGS_generate_data_wait_times)) { - int stock_cnt = FLAGS_warehouses_count * kItemCount; - int districts_cnt = FLAGS_warehouses_count * kDistrictCountPerWarehouse; - int customers_cnt = districts_cnt * kCustomerCountPerDistrict; - PrintJoinTimeoutInfo(kItemCount, kItemTable); - PrintJoinTimeoutInfo(stock_cnt, kStockTable); - PrintJoinTimeoutInfo(FLAGS_warehouses_count, kWarehouseTable); - PrintJoinTimeoutInfo(districts_cnt, kDistrictTable); - PrintJoinTimeoutInfo(customers_cnt, kCustomerTable); - PrintJoinTimeoutInfo(customers_cnt, kCustomerLastIndex); - PrintJoinTimeoutInfo(customers_cnt, kHistoryTable); - } + event_.Trigger(); + if (!event_.TimeWait(FLAGS_generate_data_wait_times)) { + int stock_cnt = FLAGS_warehouses_count * kItemCount; + int districts_cnt = FLAGS_warehouses_count * kDistrictCountPerWarehouse; + int customers_cnt = districts_cnt * kCustomerCountPerDistrict; + PrintJoinTimeoutInfo(kItemCount, kItemTable); + PrintJoinTimeoutInfo(stock_cnt, kStockTable); + PrintJoinTimeoutInfo(FLAGS_warehouses_count, kWarehouseTable); + PrintJoinTimeoutInfo(districts_cnt, kDistrictTable); + PrintJoinTimeoutInfo(customers_cnt, kCustomerTable); + PrintJoinTimeoutInfo(customers_cnt, kCustomerLastIndex); + PrintJoinTimeoutInfo(customers_cnt, kHistoryTable); + } } void DataGenerator::GenStocks(int32_t warehouse_id) { - IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); - event_.AddEventSources(kItemCount); - for (int id = 1; id <= kItemCount; ++id) { - bool is_original = original_ids.find(id) != original_ids.end(); - PushToInsertQueue(std::bind(&DataGenerator::GenStock, this, id, warehouse_id, is_original)); - } + IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); + event_.AddEventSources(kItemCount); + for (int id = 1; id <= kItemCount; ++id) { + bool is_original = original_ids.find(id) != original_ids.end(); + PushToInsertQueue(std::bind(&DataGenerator::GenStock, this, id, warehouse_id, is_original)); + } } void DataGenerator::GenStock(int32_t id, int32_t warehouse_id, bool is_original) { - Stock s(id, warehouse_id, is_original, rand_gen_); - VLOG(12) << s.ToString(); - db_->InsertStock(s) ? states_[kStockTable].first.Inc() : states_[kStockTable].second.Inc(); - event_.Complete(); + Stock s(id, warehouse_id, is_original, rand_gen_); + VLOG(12) << s.ToString(); + db_->InsertStock(s) ? states_[kStockTable].first.Inc() : states_[kStockTable].second.Inc(); + event_.Complete(); } void DataGenerator::GenCustomers(int32_t district_id, int32_t warehouse_id) { - IdSet bad_credit_ids = PickUniqueIdSet(rand_gen_, - kCustomerCountPerDistrict / 10, 1, kCustomerCountPerDistrict); - event_.AddEventSources(kCustomerCountPerDistrict); - for (int c_id = 1; c_id <= kCustomerCountPerDistrict; ++c_id) { - bool is_bad_credit = bad_credit_ids.find(c_id) != bad_credit_ids.end(); - Customer c(c_id, district_id, warehouse_id, now_datatime_, is_bad_credit, rand_gen_); - VLOG(12) << c.ToString(); - db_->InsertCustomer(c) ? states_[kCustomerTable].first.Inc() : states_[kCustomerTable].second.Inc(); - } - event_.Complete(kCustomerCountPerDistrict); + IdSet bad_credit_ids = + PickUniqueIdSet(rand_gen_, kCustomerCountPerDistrict / 10, 1, kCustomerCountPerDistrict); + event_.AddEventSources(kCustomerCountPerDistrict); + for (int c_id = 1; c_id <= kCustomerCountPerDistrict; ++c_id) { + bool is_bad_credit = bad_credit_ids.find(c_id) != bad_credit_ids.end(); + Customer c(c_id, district_id, warehouse_id, now_datatime_, is_bad_credit, rand_gen_); + VLOG(12) << c.ToString(); + db_->InsertCustomer(c) ? states_[kCustomerTable].first.Inc() + : states_[kCustomerTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); } void DataGenerator::GenHistorys(int32_t district_id, int32_t warehouse_id) { - event_.AddEventSources(kCustomerCountPerDistrict); - for (int h_id = 1; h_id <= kCustomerCountPerDistrict; ++h_id) { - History h(h_id, district_id, warehouse_id, now_datatime_, rand_gen_); - VLOG(12) << h.ToString(); - db_->InsertHistory(h) ? states_[kHistoryTable].first.Inc() : states_[kHistoryTable].second.Inc(); - } - event_.Complete(kCustomerCountPerDistrict); + event_.AddEventSources(kCustomerCountPerDistrict); + for (int h_id = 1; h_id <= kCustomerCountPerDistrict; ++h_id) { + History h(h_id, district_id, warehouse_id, now_datatime_, rand_gen_); + VLOG(12) << h.ToString(); + db_->InsertHistory(h) ? states_[kHistoryTable].first.Inc() + : states_[kHistoryTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); } -void DataGenerator::GenOrderLines(int cnt, int32_t order_id, int32_t district_id, +void DataGenerator::GenOrderLines(int cnt, int32_t order_id, int32_t district_id, int32_t warehouse_id, bool new_order) { - event_.AddEventSources(cnt); - for (int i = 1; i <= cnt; ++i) { - OrderLine ol(order_id, district_id, warehouse_id, i, new_order, now_datatime_, rand_gen_); - VLOG(12) << ol.ToString(); - db_->InsertOrderLine(ol) ? states_[kOrderLineTable].first.Inc() : states_[kOrderLineTable].second.Inc(); - } - event_.Complete(cnt); + event_.AddEventSources(cnt); + for (int i = 1; i <= cnt; ++i) { + OrderLine ol(order_id, district_id, warehouse_id, i, new_order, now_datatime_, rand_gen_); + VLOG(12) << ol.ToString(); + db_->InsertOrderLine(ol) ? states_[kOrderLineTable].first.Inc() + : states_[kOrderLineTable].second.Inc(); + } + event_.Complete(cnt); } void DataGenerator::GenOrders(int32_t d_id, int32_t w_id) { - std::vector disorder_ids = rand_gen_->MakeDisOrderList(1, kCustomerCountPerDistrict); - event_.AddEventSources(kCustomerCountPerDistrict); - for (int o_id = 1; o_id <= kCustomerCountPerDistrict; ++o_id) { - bool new_order = (kCustomerCountPerDistrict - kInitNewOrderCountPerDistrict) < o_id; - int32_t c_id = disorder_ids[o_id]; - Order o(o_id, c_id, d_id, w_id, new_order, now_datatime_, rand_gen_); - // insert order line and new order first - // this use sync interface - GenOrderLines(o.o_ol_cnt, o_id, d_id, w_id, new_order); - if (new_order) { - event_.AddEventSources(1); - NewOrder no(o_id, d_id, w_id); - VLOG(12) << no.ToString(); - db_->InsertNewOrder(no) ? states_[kNewOrderTable].first.Inc() : states_[kNewOrderTable].second.Inc(); - event_.Complete(1); - } - // wait orderline and neworder insert done - VLOG(12) << o.ToString(); - db_->InsertOrder(o) ? states_[kOrderTable].first.Inc() : states_[kOrderTable].second.Inc(); + std::vector disorder_ids = rand_gen_->MakeDisOrderList(1, kCustomerCountPerDistrict); + event_.AddEventSources(kCustomerCountPerDistrict); + for (int o_id = 1; o_id <= kCustomerCountPerDistrict; ++o_id) { + bool new_order = (kCustomerCountPerDistrict - kInitNewOrderCountPerDistrict) < o_id; + int32_t c_id = disorder_ids[o_id]; + Order o(o_id, c_id, d_id, w_id, new_order, now_datatime_, rand_gen_); + // insert order line and new order first + // this use sync interface + GenOrderLines(o.o_ol_cnt, o_id, d_id, w_id, new_order); + if (new_order) { + event_.AddEventSources(1); + NewOrder no(o_id, d_id, w_id); + VLOG(12) << no.ToString(); + db_->InsertNewOrder(no) ? states_[kNewOrderTable].first.Inc() + : states_[kNewOrderTable].second.Inc(); + event_.Complete(1); } - event_.Complete(kCustomerCountPerDistrict); + // wait orderline and neworder insert done + VLOG(12) << o.ToString(); + db_->InsertOrder(o) ? states_[kOrderTable].first.Inc() : states_[kOrderTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); } void DataGenerator::GenDistricts(int32_t warehouse_id) { - event_.AddEventSources(kDistrictCountPerWarehouse); - for (int d_id = 1; d_id <= kDistrictCountPerWarehouse; ++d_id) { - District d(d_id, warehouse_id, rand_gen_); - VLOG(12) << d.ToString(); - db_->InsertDistrict(d) ? states_[kDistrictTable].first.Inc() : states_[kDistrictTable].second.Inc(); - GenCustomers(d_id, warehouse_id); - GenHistorys(d_id, warehouse_id); - - GenOrders(d_id, warehouse_id); - } - event_.Complete(kDistrictCountPerWarehouse); + event_.AddEventSources(kDistrictCountPerWarehouse); + for (int d_id = 1; d_id <= kDistrictCountPerWarehouse; ++d_id) { + District d(d_id, warehouse_id, rand_gen_); + VLOG(12) << d.ToString(); + db_->InsertDistrict(d) ? states_[kDistrictTable].first.Inc() + : states_[kDistrictTable].second.Inc(); + GenCustomers(d_id, warehouse_id); + GenHistorys(d_id, warehouse_id); + + GenOrders(d_id, warehouse_id); + } + event_.Complete(kDistrictCountPerWarehouse); } void DataGenerator::GenWarehouses() { - event_.AddEventSources(FLAGS_warehouses_count); - for (int32_t w_id = 1; w_id <= FLAGS_warehouses_count; ++w_id) { - GenStocks(w_id); - Warehouse w(w_id, rand_gen_); - VLOG(12) << w.ToString(); - db_->InsertWarehouse(w) ? states_[kWarehouseTable].first.Inc() : states_[kWarehouseTable].second.Inc(); - - GenDistricts(w_id); - } - event_.Complete(FLAGS_warehouses_count); + event_.AddEventSources(FLAGS_warehouses_count); + for (int32_t w_id = 1; w_id <= FLAGS_warehouses_count; ++w_id) { + GenStocks(w_id); + Warehouse w(w_id, rand_gen_); + VLOG(12) << w.ToString(); + db_->InsertWarehouse(w) ? states_[kWarehouseTable].first.Inc() + : states_[kWarehouseTable].second.Inc(); + + GenDistricts(w_id); + } + event_.Complete(FLAGS_warehouses_count); } void DataGenerator::GenItems() { - IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); - event_.AddEventSources(kItemCount); - for (int i_id = 1; i_id <= kItemCount; ++i_id) { - bool is_original = original_ids.find(i_id) != original_ids.end(); - PushToInsertQueue(std::bind(&DataGenerator::GenItem, this, i_id, is_original)); - } + IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); + event_.AddEventSources(kItemCount); + for (int i_id = 1; i_id <= kItemCount; ++i_id) { + bool is_original = original_ids.find(i_id) != original_ids.end(); + PushToInsertQueue(std::bind(&DataGenerator::GenItem, this, i_id, is_original)); + } } void DataGenerator::GenItem(int32_t item_id, bool is_original) { - Item item(item_id, is_original, rand_gen_); - VLOG(12) << item.ToString(); - db_->InsertItem(item) ? states_[kItemTable].first.Inc() : states_[kItemTable].second.Inc(); - event_.Complete(); + Item item(item_id, is_original, rand_gen_); + VLOG(12) << item.ToString(); + db_->InsertItem(item) ? states_[kItemTable].first.Inc() : states_[kItemTable].second.Inc(); + event_.Complete(); } void DataGenerator::PushToInsertQueue(const ThreadPool::Task& task) { - while(thread_pool_.PendingNum() > FLAGS_tpcc_thread_pool_size / 2) { - usleep(100); - } - thread_pool_.AddTask(task); - VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); + while (thread_pool_.PendingNum() > FLAGS_tpcc_thread_pool_size / 2) { + usleep(100); + } + thread_pool_.AddTask(task); + VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/data_generator.h b/src/benchmark/tpcc/data_generator.h index f5593b64c..e65249622 100644 --- a/src/benchmark/tpcc/data_generator.h +++ b/src/benchmark/tpcc/data_generator.h @@ -19,43 +19,43 @@ namespace tera { namespace tpcc { - class DataGenerator { -public: - DataGenerator(RandomGenerator* random_gen, TpccDb* db); - ~DataGenerator(){} - void GenWarehouses(); - void GenItems(); - void Join(); - -private: - void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); - - // for generate data - void GenStocks(int32_t warehouse_id); - void GenCustomers(int32_t district_id, int32_t warehouse_id); - void GenHistorys(int32_t district_id, int32_t warehouse_id); - void GenOrderLines(int cnt, int32_t order_id, int32_t district_id, - int32_t warehouse_id, bool new_order); - void GenOrders(int32_t district_id, int32_t warehouse_id); - void GenDistricts(int32_t warehouse_id); - - void GenItem(int32_t item_id, bool is_original); - void GenStock(int32_t id, int32_t warehouse_id, bool is_original); - - // for async insert - void PushToInsertQueue(const ThreadPool::Task& task); -private: - typedef std::vector> InsertStates; - CompletedEvent event_; - RandomGenerator* rand_gen_; - TpccDb* db_; - InsertStates states_; - std::string now_datatime_; - common::ThreadPool thread_pool_; + public: + DataGenerator(RandomGenerator* random_gen, TpccDb* db); + ~DataGenerator() {} + void GenWarehouses(); + void GenItems(); + void Join(); + + private: + void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); + + // for generate data + void GenStocks(int32_t warehouse_id); + void GenCustomers(int32_t district_id, int32_t warehouse_id); + void GenHistorys(int32_t district_id, int32_t warehouse_id); + void GenOrderLines(int cnt, int32_t order_id, int32_t district_id, int32_t warehouse_id, + bool new_order); + void GenOrders(int32_t district_id, int32_t warehouse_id); + void GenDistricts(int32_t warehouse_id); + + void GenItem(int32_t item_id, bool is_original); + void GenStock(int32_t id, int32_t warehouse_id, bool is_original); + + // for async insert + void PushToInsertQueue(const ThreadPool::Task& task); + + private: + typedef std::vector> InsertStates; + CompletedEvent event_; + RandomGenerator* rand_gen_; + TpccDb* db_; + InsertStates states_; + std::string now_datatime_; + common::ThreadPool thread_pool_; }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_DATA_GENERATOR_H */ diff --git a/src/benchmark/tpcc/driver.cc b/src/benchmark/tpcc/driver.cc index aed2e6235..12edbbbfe 100644 --- a/src/benchmark/tpcc/driver.cc +++ b/src/benchmark/tpcc/driver.cc @@ -19,172 +19,155 @@ namespace tpcc { Driver::Driver(RandomGenerator* rand_gen, TpccDb* db) : event_(), - rand_gen_(rand_gen), - db_(db), + rand_gen_(rand_gen), + db_(db), now_datatime_(get_curtime_str()), - thread_pool_(FLAGS_tpcc_run_gtxn_thread_pool_size) { -} + thread_pool_(FLAGS_tpcc_run_gtxn_thread_pool_size) {} void Driver::PrintJoinTimeoutInfo(int need_cnt, int table_enum_num) { - if (need_cnt < states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { - LOG(ERROR) << "table:" << kTpccTables[table_enum_num] - << "[need/succ/fail]:[" - << need_cnt << "/" - << states_[table_enum_num].first.Get() << "/" - << states_[table_enum_num].first.Get() << "]"; - } + if (need_cnt < states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { + LOG(ERROR) << "table:" << kTpccTables[table_enum_num] << "[need/succ/fail]:[" << need_cnt << "/" + << states_[table_enum_num].first.Get() << "/" << states_[table_enum_num].first.Get() + << "]"; + } } void Driver::RunTransactions() { - for (int64_t i = 0; i < FLAGS_transactions_count; ++i) { - RunOneTransaction(); - } + for (int64_t i = 0; i < FLAGS_transactions_count; ++i) { + RunOneTransaction(); + } } void Driver::Join() { - event_.Trigger(); - if (!event_.TimeWait(FLAGS_driver_wait_times)) { - // TODO - } + event_.Trigger(); + if (!event_.TimeWait(FLAGS_driver_wait_times)) { + // TODO + } } void Driver::RunOneTransaction() { - int rand_num = rand_gen_->GetRandom(1, 100); - if (rand_num <= kTpccTransactionRatios[0]) { // %4 do stock_level - RunStockLevelTxn(); - } else if (rand_num <= kTpccTransactionRatios[1]) { // %4 do order_status - RunOrderStatusTxn(); - } else if (rand_num <= kTpccTransactionRatios[2]) { // %4 do delivery - RunDeliveryTxn(); - } else if (rand_num <= kTpccTransactionRatios[3]) { // %43 do payment - RunPaymentTxn(); - } else { // %45 do new_order - RunNewOrderTxn(); - } -} + int rand_num = rand_gen_->GetRandom(1, 100); + if (rand_num <= kTpccTransactionRatios[0]) { // %4 do stock_level + RunStockLevelTxn(); + } else if (rand_num <= kTpccTransactionRatios[1]) { // %4 do order_status + RunOrderStatusTxn(); + } else if (rand_num <= kTpccTransactionRatios[2]) { // %4 do delivery + RunDeliveryTxn(); + } else if (rand_num <= kTpccTransactionRatios[3]) { // %43 do payment + RunPaymentTxn(); + } else { // %45 do new_order + RunNewOrderTxn(); + } +} void Driver::RunStockLevelTxn() { - int32_t threshold = rand_gen_->GetRandom(kMinStockLevelThreshold, kMaxStockLevelThreshold); - StockLevelResult ret; - db_->StockLevelTxn(FindWareHouse(), FindDistrict(), threshold, &ret); + int32_t threshold = rand_gen_->GetRandom(kMinStockLevelThreshold, kMaxStockLevelThreshold); + StockLevelResult ret; + db_->StockLevelTxn(FindWareHouse(), FindDistrict(), threshold, &ret); } void Driver::RunOrderStatusTxn() { - int x = rand_gen_->GetRandom(1, 100); - OrderStatusResult ret; - if (x <= 60) { - // 60% order_status by lastname - std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); - db_->OrderStatusTxn(true, FindWareHouse(), FindDistrict(), - -1, last_name, &ret); - } else { - // 40% order_status by customer_id - db_->OrderStatusTxn(false, FindWareHouse(), FindDistrict(), - FindCustomerId(), "", &ret); - } + int x = rand_gen_->GetRandom(1, 100); + OrderStatusResult ret; + if (x <= 60) { + // 60% order_status by lastname + std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); + db_->OrderStatusTxn(true, FindWareHouse(), FindDistrict(), -1, last_name, &ret); + } else { + // 40% order_status by customer_id + db_->OrderStatusTxn(false, FindWareHouse(), FindDistrict(), FindCustomerId(), "", &ret); + } } void Driver::RunDeliveryTxn() { - int32_t carrier_id = rand_gen_->GetRandom(kMinCarrierId, kMaxCarrierId); - DeliveryResult ret;; - db_->DeliveryTxn(FindWareHouse(), carrier_id, get_curtime_str(), &ret); + int32_t carrier_id = rand_gen_->GetRandom(kMinCarrierId, kMaxCarrierId); + DeliveryResult ret; + ; + db_->DeliveryTxn(FindWareHouse(), carrier_id, get_curtime_str(), &ret); } void Driver::RunPaymentTxn() { - int32_t warehouse_id = FindWareHouse(); - int32_t district_id = FindDistrict(); - - float h_amount = rand_gen_->MakeFloat(kRuntimeMinAmount, kRuntimeMaxAmount, - kRuntimeAmountDigits); - - int32_t customer_warehouse_id = -1; - int32_t customer_district_id = -1; - - int x = rand_gen_->GetRandom(1, 100); - - // set customer c_w_id and c_d_id - if (FLAGS_warehouses_count == 1 && x <= 85) { - // 85% payment through local warehouse (or only one warehouse) - customer_warehouse_id = warehouse_id; - customer_district_id = district_id; - } else { - // 15% payment through remote warehouse - customer_warehouse_id = - rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id); - customer_district_id = FindDistrict(); - } - - x = rand_gen_->GetRandom(1, 100); - PaymentResult ret; - if (x <= 60) { - // 60% payment by lastname - std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); - db_->PaymentTxn(true, warehouse_id, district_id, - customer_warehouse_id, customer_district_id, -1, - last_name, h_amount, &ret); - } else { - // 40% payment by customer_id - db_->PaymentTxn(false, warehouse_id, district_id, - customer_warehouse_id, customer_district_id, FindCustomerId(), - "", h_amount, &ret); - } + int32_t warehouse_id = FindWareHouse(); + int32_t district_id = FindDistrict(); + + float h_amount = rand_gen_->MakeFloat(kRuntimeMinAmount, kRuntimeMaxAmount, kRuntimeAmountDigits); + + int32_t customer_warehouse_id = -1; + int32_t customer_district_id = -1; + + int x = rand_gen_->GetRandom(1, 100); + + // set customer c_w_id and c_d_id + if (FLAGS_warehouses_count == 1 && x <= 85) { + // 85% payment through local warehouse (or only one warehouse) + customer_warehouse_id = warehouse_id; + customer_district_id = district_id; + } else { + // 15% payment through remote warehouse + customer_warehouse_id = rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id); + customer_district_id = FindDistrict(); + } + + x = rand_gen_->GetRandom(1, 100); + PaymentResult ret; + if (x <= 60) { + // 60% payment by lastname + std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); + db_->PaymentTxn(true, warehouse_id, district_id, customer_warehouse_id, customer_district_id, + -1, last_name, h_amount, &ret); + } else { + // 40% payment by customer_id + db_->PaymentTxn(false, warehouse_id, district_id, customer_warehouse_id, customer_district_id, + FindCustomerId(), "", h_amount, &ret); + } } void Driver::RunNewOrderTxn() { - int32_t warehouse_id = FindWareHouse(); - - // init NewOrderInfo - NewOrderInfo info; - // 1% of new_order transactions will be failed - info.need_failed = rand_gen_->GetRandom(1,100) == 1 ? true : false; - info.o_ol_cnt = rand_gen_->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); - - info.ol_supply_w_ids.reserve(info.o_ol_cnt); - info.ol_i_ids.reserve(info.o_ol_cnt); - info.ol_quantities.reserve(info.o_ol_cnt); - info.o_all_local = 1; - for (int32_t i = 0; i < info.o_ol_cnt; ++i) { - // 1% of orderlines will be remote order - bool remote = rand_gen_->GetRandom(1, 100) == 1 ? true : false; - if (FLAGS_warehouses_count > 1 && remote) { - info.ol_supply_w_ids.emplace_back( - rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id)); - info.o_all_local = 0; - } else { - info.ol_supply_w_ids.emplace_back(warehouse_id); - } - info.ol_i_ids.emplace_back(FindItemId()); - info.ol_quantities.emplace_back( - rand_gen_->GetRandom(1, kMaxOrderLineQuantity)); + int32_t warehouse_id = FindWareHouse(); + + // init NewOrderInfo + NewOrderInfo info; + // 1% of new_order transactions will be failed + info.need_failed = rand_gen_->GetRandom(1, 100) == 1 ? true : false; + info.o_ol_cnt = rand_gen_->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); + + info.ol_supply_w_ids.reserve(info.o_ol_cnt); + info.ol_i_ids.reserve(info.o_ol_cnt); + info.ol_quantities.reserve(info.o_ol_cnt); + info.o_all_local = 1; + for (int32_t i = 0; i < info.o_ol_cnt; ++i) { + // 1% of orderlines will be remote order + bool remote = rand_gen_->GetRandom(1, 100) == 1 ? true : false; + if (FLAGS_warehouses_count > 1 && remote) { + info.ol_supply_w_ids.emplace_back( + rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id)); + info.o_all_local = 0; + } else { + info.ol_supply_w_ids.emplace_back(warehouse_id); } + info.ol_i_ids.emplace_back(FindItemId()); + info.ol_quantities.emplace_back(rand_gen_->GetRandom(1, kMaxOrderLineQuantity)); + } - NewOrderResult ret; - db_->NewOrderTxn(warehouse_id, FindDistrict(), FindCustomerId(), info, &ret); + NewOrderResult ret; + db_->NewOrderTxn(warehouse_id, FindDistrict(), FindCustomerId(), info, &ret); } void Driver::PushToInsertQueue(const ThreadPool::Task& task) { - while(thread_pool_.PendingNum() > FLAGS_tpcc_run_gtxn_thread_pool_size / 2) { - usleep(100); - } - thread_pool_.AddTask(task); - VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); + while (thread_pool_.PendingNum() > FLAGS_tpcc_run_gtxn_thread_pool_size / 2) { + usleep(100); + } + thread_pool_.AddTask(task); + VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); } -int32_t Driver::FindWareHouse() { - return rand_gen_->GetRandom(1, FLAGS_warehouses_count); -} +int32_t Driver::FindWareHouse() { return rand_gen_->GetRandom(1, FLAGS_warehouses_count); } -int32_t Driver::FindDistrict() { - return rand_gen_->GetRandom(1, kDistrictCountPerWarehouse); -} +int32_t Driver::FindDistrict() { return rand_gen_->GetRandom(1, kDistrictCountPerWarehouse); } -int32_t Driver::FindCustomerId() { - return rand_gen_->NURand(1023, 1, kCustomerCountPerDistrict); -} +int32_t Driver::FindCustomerId() { return rand_gen_->NURand(1023, 1, kCustomerCountPerDistrict); } -int32_t Driver::FindItemId() { - return rand_gen_->NURand(8191, 1, kItemCount); -} +int32_t Driver::FindItemId() { return rand_gen_->NURand(8191, 1, kItemCount); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/driver.h b/src/benchmark/tpcc/driver.h index 56bf5a66f..fd8bdffeb 100644 --- a/src/benchmark/tpcc/driver.h +++ b/src/benchmark/tpcc/driver.h @@ -20,49 +20,50 @@ namespace tera { namespace tpcc { class Driver { -public: - Driver(RandomGenerator* random_gen, TpccDb* db); - ~Driver(){} - void RunTransactions(); - void Join(); - -private: - void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); - - // for run transaction - void RunOneTransaction(); - // - void RunStockLevelTxn(); - - void RunOrderStatusTxn(); - - void RunDeliveryTxn(); - - void RunPaymentTxn(); - - void RunNewOrderTxn(); - - // for async run txn - void PushToInsertQueue(const ThreadPool::Task& task); - - int32_t FindWareHouse(); - - int32_t FindDistrict(); - - int32_t FindCustomerId(); - - int32_t FindItemId(); -private: - typedef std::vector> TxnStates; - CompletedEvent event_; - RandomGenerator* rand_gen_; - TpccDb* db_; - TxnStates states_; - std::string now_datatime_; - common::ThreadPool thread_pool_; + public: + Driver(RandomGenerator* random_gen, TpccDb* db); + ~Driver() {} + void RunTransactions(); + void Join(); + + private: + void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); + + // for run transaction + void RunOneTransaction(); + // + void RunStockLevelTxn(); + + void RunOrderStatusTxn(); + + void RunDeliveryTxn(); + + void RunPaymentTxn(); + + void RunNewOrderTxn(); + + // for async run txn + void PushToInsertQueue(const ThreadPool::Task& task); + + int32_t FindWareHouse(); + + int32_t FindDistrict(); + + int32_t FindCustomerId(); + + int32_t FindItemId(); + + private: + typedef std::vector> TxnStates; + CompletedEvent event_; + RandomGenerator* rand_gen_; + TpccDb* db_; + TxnStates states_; + std::string now_datatime_; + common::ThreadPool thread_pool_; }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_DATA_GENERATOR_H */ diff --git a/src/benchmark/tpcc/mock_tpccdb.cc b/src/benchmark/tpcc/mock_tpccdb.cc index ee8cce0d0..7c5cae5d3 100644 --- a/src/benchmark/tpcc/mock_tpccdb.cc +++ b/src/benchmark/tpcc/mock_tpccdb.cc @@ -14,5 +14,5 @@ namespace tpcc { MockTpccDb::MockTpccDb() : flag_(true) {} -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/mock_tpccdb.h b/src/benchmark/tpcc/mock_tpccdb.h index 0f29f0320..8294cac15 100644 --- a/src/benchmark/tpcc/mock_tpccdb.h +++ b/src/benchmark/tpcc/mock_tpccdb.h @@ -16,83 +16,54 @@ class TpccDb; class TxnResult; class MockTpccDb : public TpccDb { -public: - MockTpccDb(); - virtual ~MockTpccDb() {} - - virtual bool CreateTables() { return true; } - virtual bool CleanTables() { return true; } - - // init db - virtual bool InsertItem(const Item& i) { - return flag_; - } - - virtual bool InsertWarehouse(const Warehouse& w) { - return flag_; - } - - virtual bool InsertDistrict(const District& d) { - return flag_; - } - - virtual bool InsertCustomer(const Customer& c) { - return flag_; - } - - virtual bool InsertHistory(const History& h) { - return flag_; - } - - virtual bool InsertStock(const Stock& s) { - return flag_; - } - - virtual bool InsertOrder(const Order& o) { - return flag_; - } - - virtual bool InsertOrderLine(const OrderLine& ol) { - return flag_; - } - - virtual bool InsertNewOrder(const NewOrder& no) { - return flag_; - } - - virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, - int32_t threshold, - StockLevelResult* ret) {} - - virtual void DeliveryTxn(int32_t warehouse_id, - int32_t carrier_id, - const std::string& delivery_datetime, - DeliveryResult* ret) {} - - virtual void OrderStatusTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_customer_id, - const std::string& last_name, - OrderStatusResult* ret) {} - - virtual void PaymentTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_warehouse_id, int32_t c_district_id, - int32_t c_customer_id, - const std::string& last_name, - int32_t h_amount, - PaymentResult* ret) {} - - virtual void NewOrderTxn(int32_t warehouse_id, - int32_t district_id, - int32_t customer_id, const NewOrderInfo& info, - NewOrderResult* ret) {} - -private: - bool flag_; + public: + MockTpccDb(); + virtual ~MockTpccDb() {} + + virtual bool CreateTables() { return true; } + virtual bool CleanTables() { return true; } + + // init db + virtual bool InsertItem(const Item& i) { return flag_; } + + virtual bool InsertWarehouse(const Warehouse& w) { return flag_; } + + virtual bool InsertDistrict(const District& d) { return flag_; } + + virtual bool InsertCustomer(const Customer& c) { return flag_; } + + virtual bool InsertHistory(const History& h) { return flag_; } + + virtual bool InsertStock(const Stock& s) { return flag_; } + + virtual bool InsertOrder(const Order& o) { return flag_; } + + virtual bool InsertOrderLine(const OrderLine& ol) { return flag_; } + + virtual bool InsertNewOrder(const NewOrder& no) { return flag_; } + + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, int32_t threshold, + StockLevelResult* ret) {} + + virtual void DeliveryTxn(int32_t warehouse_id, int32_t carrier_id, + const std::string& delivery_datetime, DeliveryResult* ret) {} + + virtual void OrderStatusTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, const std::string& last_name, + OrderStatusResult* ret) {} + + virtual void PaymentTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, int32_t c_customer_id, + const std::string& last_name, int32_t h_amount, PaymentResult* ret) {} + + virtual void NewOrderTxn(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + const NewOrderInfo& info, NewOrderResult* ret) {} + + private: + bool flag_; }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_MOCK_TPCCDB_H */ diff --git a/src/benchmark/tpcc/random_generator.cc b/src/benchmark/tpcc/random_generator.cc index 9308ec6e9..e55a4bd7e 100644 --- a/src/benchmark/tpcc/random_generator.cc +++ b/src/benchmark/tpcc/random_generator.cc @@ -11,122 +11,116 @@ namespace tera { namespace tpcc { -RandomGenerator::RandomGenerator():c_({0,0,0}) { - InitRandomState(); -} +RandomGenerator::RandomGenerator() : c_({0, 0, 0}) { InitRandomState(); } void RandomGenerator::InitRandomState() { - memset(&rand_state_, 0, sizeof(rand_state_)); - int ret = initstate_r(static_cast(time(NULL)), - rand_state_buf_, - sizeof(rand_state_buf_), - &rand_state_); - assert(ret == 0); + memset(&rand_state_, 0, sizeof(rand_state_)); + int ret = initstate_r(static_cast(time(NULL)), rand_state_buf_, + sizeof(rand_state_buf_), &rand_state_); + assert(ret == 0); } -NURandConstant RandomGenerator::GetRandomConstant() const { - return c_; -} +NURandConstant RandomGenerator::GetRandomConstant() const { return c_; } void RandomGenerator::SetRandomConstant() { - c_.c_last = GetRandom(0, 255); - c_.c_id = GetRandom(0, 1023); - c_.ol_i_id = GetRandom(0, 8191); + c_.c_last = GetRandom(0, 255); + c_.c_id = GetRandom(0, 1023); + c_.ol_i_id = GetRandom(0, 8191); } inline bool VarfiyConstantAvailableForRun(int run_last, int load_last) { - int delta = run_last - load_last; - delta = delta > 0 ? delta : -1 * delta; - return 65 <=delta && delta <= 119 && delta != 96 && delta != 112; + int delta = run_last - load_last; + delta = delta > 0 ? delta : -1 * delta; + return 65 <= delta && delta <= 119 && delta != 96 && delta != 112; } void RandomGenerator::SetRandomConstant(const NURandConstant& constant_for_load) { + c_.c_last = GetRandom(0, 255); + c_.c_id = GetRandom(0, 1023); + c_.ol_i_id = GetRandom(0, 8191); + while (!VarfiyConstantAvailableForRun(c_.c_last, constant_for_load.c_last)) { c_.c_last = GetRandom(0, 255); - c_.c_id = GetRandom(0, 1023); - c_.ol_i_id = GetRandom(0, 8191); - while (!VarfiyConstantAvailableForRun(c_.c_last, constant_for_load.c_last)) { - c_.c_last = GetRandom(0, 255); - } + } } int RandomGenerator::GetRandom(int lower, int upper) { - int ret = 0; - int err = random_r(&rand_state_, &ret); - assert(err == 0); - return lower <= upper ? (ret % (upper - lower + 1) + lower) : (ret % (lower - upper + 1) + upper); + int ret = 0; + int err = random_r(&rand_state_, &ret); + assert(err == 0); + return lower <= upper ? (ret % (upper - lower + 1) + lower) : (ret % (lower - upper + 1) + upper); } int RandomGenerator::GetRandom(int lower, int upper, int exclude) { - if (exclude > upper || exclude < lower) { - return GetRandom(lower, upper); - } else { - int rand = GetRandom(lower, upper - 1); - if (rand >= exclude) { - ++rand; - } - return rand; + if (exclude > upper || exclude < lower) { + return GetRandom(lower, upper); + } else { + int rand = GetRandom(lower, upper - 1); + if (rand >= exclude) { + ++rand; } + return rand; + } } std::string RandomGenerator::MakeAString(int lower_len, int upper_len) { - int len = GetRandom(lower_len, upper_len); - std::string ret; - for (int i = 0; i < len; ++i) { - ret += (char)('a' + GetRandom(0, 25)); - } - return ret; + int len = GetRandom(lower_len, upper_len); + std::string ret; + for (int i = 0; i < len; ++i) { + ret += (char)('a' + GetRandom(0, 25)); + } + return ret; } std::string RandomGenerator::MakeNString(int lower_len, int upper_len) { - int len = GetRandom(lower_len, upper_len); - std::string ret; - for (int i = 0; i < len; ++i) { - ret += (char)('0' + GetRandom(0, 9)); - } - return ret; + int len = GetRandom(lower_len, upper_len); + std::string ret; + for (int i = 0; i < len; ++i) { + ret += (char)('0' + GetRandom(0, 9)); + } + return ret; } float RandomGenerator::MakeFloat(float lower, float upper, int digits) { - float num = 1.0; - for (int i = 0; i < digits; ++i) { - num *= 10; - } - return GetRandom(int(lower * num + 0.5), int(upper * num + 0.5)) / num; + float num = 1.0; + for (int i = 0; i < digits; ++i) { + num *= 10; + } + return GetRandom(int(lower * num + 0.5), int(upper * num + 0.5)) / num; } std::vector RandomGenerator::MakeDisOrderList(int lower, int upper) { - std::vector ret(upper - lower + 1, -1); - for (int i = 0; i < upper - lower + 1; ++i) { - int rand_pos = GetRandom(0, upper - lower); - while (true) { - if (ret[rand_pos] == -1) { - ret[rand_pos] = lower + i; - break; - } - rand_pos = GetRandom(0, upper - lower); - } + std::vector ret(upper - lower + 1, -1); + for (int i = 0; i < upper - lower + 1; ++i) { + int rand_pos = GetRandom(0, upper - lower); + while (true) { + if (ret[rand_pos] == -1) { + ret[rand_pos] = lower + i; + break; + } + rand_pos = GetRandom(0, upper - lower); } - return ret; + } + return ret; } int RandomGenerator::NURand(int A, int x, int y) { - int C = 0; - switch(A) { - case 255: - C = c_.c_last; - break; - case 1023: - C = c_.c_id; - break; - case 8191: - C = c_.ol_i_id; - break; - default: - LOG(ERROR) << "NURand: A = " << A << " not available"; - abort(); - } - return (((GetRandom(0, A) | GetRandom(x, y)) + C) % (y - x + 1)) + x; + int C = 0; + switch (A) { + case 255: + C = c_.c_last; + break; + case 1023: + C = c_.c_id; + break; + case 8191: + C = c_.ol_i_id; + break; + default: + LOG(ERROR) << "NURand: A = " << A << " not available"; + abort(); + } + return (((GetRandom(0, A) | GetRandom(x, y)) + C) % (y - x + 1)) + x; } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/random_generator.h b/src/benchmark/tpcc/random_generator.h index c39070294..efa50ec65 100644 --- a/src/benchmark/tpcc/random_generator.h +++ b/src/benchmark/tpcc/random_generator.h @@ -17,48 +17,50 @@ namespace tera { namespace tpcc { struct NURandConstant { - int c_last; - int c_id; - int ol_i_id; + int c_last; + int c_id; + int ol_i_id; }; class RandomGenerator { -public: - RandomGenerator(); - virtual ~RandomGenerator(){} + public: + RandomGenerator(); + virtual ~RandomGenerator() {} - NURandConstant GetRandomConstant() const; - void SetRandomConstant(); - void SetRandomConstant(const NURandConstant& constant_for_load); + NURandConstant GetRandomConstant() const; + void SetRandomConstant(); + void SetRandomConstant(const NURandConstant& constant_for_load); - // make a string A len=rand[lower_len, upper_len] A[x] = set(a..z) - std::string MakeAString(int lower_len, int upper_len); + // make a string A len=rand[lower_len, upper_len] A[x] = set(a..z) + std::string MakeAString(int lower_len, int upper_len); - // make a string N len=rand[lower_len, upper_len] N[x] = set(0..9) - std::string MakeNString(int lower_len, int upper_len); + // make a string N len=rand[lower_len, upper_len] N[x] = set(0..9) + std::string MakeNString(int lower_len, int upper_len); - float MakeFloat(float lower, float upper, int digits); + float MakeFloat(float lower, float upper, int digits); - std::vector MakeDisOrderList(int lower, int upper); + std::vector MakeDisOrderList(int lower, int upper); - int NURand(int A, int lower, int upper); + int NURand(int A, int lower, int upper); - // get rand int from [lower, upper] - int GetRandom(int lower, int upper); + // get rand int from [lower, upper] + int GetRandom(int lower, int upper); - int GetRandom(int lower, int upper, int exclude); -private: - void InitRandomState(); -private: - // for system call random_r and initstate_r - char rand_state_buf_[kRandomStateSize]; - struct random_data rand_state_; - - // for NURand, need a constant - NURandConstant c_; + int GetRandom(int lower, int upper, int exclude); + + private: + void InitRandomState(); + + private: + // for system call random_r and initstate_r + char rand_state_buf_[kRandomStateSize]; + struct random_data rand_state_; + + // for NURand, need a constant + NURandConstant c_; }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_RANDOM_GENERATOR_H */ diff --git a/src/benchmark/tpcc/tera_tpccdb.cc b/src/benchmark/tpcc/tera_tpccdb.cc index 2eb454122..abdf00ad8 100644 --- a/src/benchmark/tpcc/tera_tpccdb.cc +++ b/src/benchmark/tpcc/tera_tpccdb.cc @@ -19,521 +19,504 @@ namespace tera { namespace tpcc { TeraTpccDb::TeraTpccDb() : client_(NULL) { - ErrorCode error_code; - client_ = Client::NewClient(FLAGS_tera_client_flagfile, "tera_tpcc", &error_code); - if (client_ == NULL) { - LOG(ERROR) << "new client failed. err:" << error_code.ToString(); - _Exit(EXIT_FAILURE); - } + ErrorCode error_code; + client_ = Client::NewClient(FLAGS_tera_client_flagfile, "tera_tpcc", &error_code); + if (client_ == NULL) { + LOG(ERROR) << "new client failed. err:" << error_code.ToString(); + _Exit(EXIT_FAILURE); + } } -TeraTpccDb::~TeraTpccDb() { - delete client_; -} +TeraTpccDb::~TeraTpccDb() { delete client_; } bool TeraTpccDb::CreateTables() { - ErrorCode err; - for (auto table : kTpccTables) { - std::string schema_file = FLAGS_tera_table_schema_dir + table; - TableDescriptor* desc = new TableDescriptor(); - if (ParseTableSchemaFile(schema_file, desc, &err)) { - if (client_->CreateTable(*desc, &err) && err.GetType() == ErrorCode::kOK) { - LOG(INFO) << "create table " << table << " ok"; - Table* table_ptr = client_->OpenTable(table, &err); - if (table_ptr == NULL) { - LOG(ERROR) << "open table " << table << " failed"; - delete desc; - return false; - } else { - table_map_[table] = table_ptr; - LOG(INFO) << "open table " << table << " ok"; - } - } else { - LOG(ERROR) << "create table " << table << " failed"; - delete desc; - return false; - } + ErrorCode err; + for (auto table : kTpccTables) { + std::string schema_file = FLAGS_tera_table_schema_dir + table; + TableDescriptor* desc = new TableDescriptor(); + if (ParseTableSchemaFile(schema_file, desc, &err)) { + if (client_->CreateTable(*desc, &err) && err.GetType() == ErrorCode::kOK) { + LOG(INFO) << "create table " << table << " ok"; + Table* table_ptr = client_->OpenTable(table, &err); + if (table_ptr == NULL) { + LOG(ERROR) << "open table " << table << " failed"; + delete desc; + return false; } else { - LOG(ERROR) << "load schema failed, schema_file:" << schema_file << "err:" << err.ToString(); - delete desc; - return false; + table_map_[table] = table_ptr; + LOG(INFO) << "open table " << table << " ok"; } + } else { + LOG(ERROR) << "create table " << table << " failed"; delete desc; + return false; + } + } else { + LOG(ERROR) << "load schema failed, schema_file:" << schema_file << "err:" << err.ToString(); + delete desc; + return false; } - return true; + delete desc; + } + return true; } bool TeraTpccDb::CleanTables() { - ErrorCode err; - for (auto table : kTpccTables) { - if (!client_->DisableTable(table, &err)) { - LOG(ERROR) << "fail to disable table : " << table << " err: " <(client_); - if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { - LOG(ERROR) << "table not exist: " << table; - continue; - } - uint64_t tablet_num = tablet_list.meta_size(); - VLOG(11) << tablet_num; - int wait_times = 0; - while (true) { - if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { - LOG(ERROR) << "table not exist: " << table; - break; - } - uint64_t tablet_cnt = 0; - for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - const TabletMeta& tablet = tablet_list.meta(i); - if (tablet.status() == TabletMeta::kTabletDisable || - tablet.status() == TabletMeta::kTabletOffline) { - tablet_cnt++; - } - } - if (tablet_cnt == tablet_num) { - break; - } - if (wait_times < 20) { - sleep(1); - } else { - LOG(ERROR) << "disable table : " << table << " failed, try " << wait_times << " time(s)"; - break; - } - } + ErrorCode err; + for (auto table : kTpccTables) { + if (!client_->DisableTable(table, &err)) { + LOG(ERROR) << "fail to disable table : " << table << " err: " << err.ToString(); + } else { + // make sure clean tables + TableMeta table_meta; + TabletMetaList tablet_list; + tera::ClientImpl* client_impl = static_cast(client_); + if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << table; + continue; + } + uint64_t tablet_num = tablet_list.meta_size(); + VLOG(11) << tablet_num; + int wait_times = 0; + while (true) { + if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << table; + break; + } + uint64_t tablet_cnt = 0; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& tablet = tablet_list.meta(i); + if (tablet.status() == TabletMeta::kTabletDisable || + tablet.status() == TabletMeta::kTabletOffline) { + tablet_cnt++; + } + } + if (tablet_cnt == tablet_num) { + break; } - if (!client_->DeleteTable(table, &err)) { - LOG(ERROR) << "drop table: " << table << " failed. " << err.ToString(); + if (wait_times < 20) { + sleep(1); } else { - LOG(INFO) << "drop table: "<< table << " done."; + LOG(ERROR) << "disable table : " << table << " failed, try " << wait_times << " time(s)"; + break; } + } } - return true; + if (!client_->DeleteTable(table, &err)) { + LOG(ERROR) << "drop table: " << table << " failed. " << err.ToString(); + } else { + LOG(INFO) << "drop table: " << table << " done."; + } + } + return true; } -// init db +// init db bool TeraTpccDb::InsertItem(const Item& i) { - std::string tablename = "t_item"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(i.PrimaryKey()); - mu->Put("cf0", "i_id", std::to_string(i.i_id)); - mu->Put("cf0", "i_im_id", std::to_string(i.i_im_id)); - mu->Put("cf0", "i_price", std::to_string(i.i_price)); - mu->Put("cf0", "i_name", i.i_name); - mu->Put("cf0", "i_data", i.i_data); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_item"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(i.PrimaryKey()); + mu->Put("cf0", "i_id", std::to_string(i.i_id)); + mu->Put("cf0", "i_im_id", std::to_string(i.i_im_id)); + mu->Put("cf0", "i_price", std::to_string(i.i_price)); + mu->Put("cf0", "i_name", i.i_name); + mu->Put("cf0", "i_data", i.i_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertWarehouse(const Warehouse& w) { - std::string tablename = "t_warehouse"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(w.PrimaryKey()); - mu->Put("cf0", "w_id", std::to_string(w.w_id)); - mu->Put("cf0", "w_tax", std::to_string(w.w_tax)); - mu->Put("cf0", "w_ytd", std::to_string(w.w_ytd)); - mu->Put("cf0", "w_name", w.w_name); - mu->Put("cf0", "w_street_1", w.w_street_1); - mu->Put("cf0", "w_street_2", w.w_street_2); - mu->Put("cf0", "w_city", w.w_city); - mu->Put("cf0", "w_state", w.w_state); - mu->Put("cf0", "w_zip", w.w_zip); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_warehouse"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(w.PrimaryKey()); + mu->Put("cf0", "w_id", std::to_string(w.w_id)); + mu->Put("cf0", "w_tax", std::to_string(w.w_tax)); + mu->Put("cf0", "w_ytd", std::to_string(w.w_ytd)); + mu->Put("cf0", "w_name", w.w_name); + mu->Put("cf0", "w_street_1", w.w_street_1); + mu->Put("cf0", "w_street_2", w.w_street_2); + mu->Put("cf0", "w_city", w.w_city); + mu->Put("cf0", "w_state", w.w_state); + mu->Put("cf0", "w_zip", w.w_zip); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertDistrict(const District& d) { - std::string tablename = "t_district"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(d.PrimaryKey()); - mu->Put("cf0", "d_id", std::to_string(d.d_id)); - mu->Put("cf0", "d_w_id", std::to_string(d.d_w_id)); - mu->Put("cf0", "d_tax", std::to_string(d.d_tax)); - mu->Put("cf0", "d_ytd", std::to_string(d.d_ytd)); - mu->Put("cf0", "d_next_o_id", std::to_string(d.d_next_o_id)); - mu->Put("cf0", "d_name", d.d_name); - mu->Put("cf0", "d_street_1", d.d_street_1); - mu->Put("cf0", "d_street_2", d.d_street_2); - mu->Put("cf0", "d_city", d.d_city); - mu->Put("cf0", "d_state", d.d_state); - mu->Put("cf0", "d_zip", d.d_zip); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_district"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(d.PrimaryKey()); + mu->Put("cf0", "d_id", std::to_string(d.d_id)); + mu->Put("cf0", "d_w_id", std::to_string(d.d_w_id)); + mu->Put("cf0", "d_tax", std::to_string(d.d_tax)); + mu->Put("cf0", "d_ytd", std::to_string(d.d_ytd)); + mu->Put("cf0", "d_next_o_id", std::to_string(d.d_next_o_id)); + mu->Put("cf0", "d_name", d.d_name); + mu->Put("cf0", "d_street_1", d.d_street_1); + mu->Put("cf0", "d_street_2", d.d_street_2); + mu->Put("cf0", "d_city", d.d_city); + mu->Put("cf0", "d_state", d.d_state); + mu->Put("cf0", "d_zip", d.d_zip); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertCustomer(const Customer& c) { - std::string tablename = "t_customer"; - std::string c_last_index_name = "t_customer_last_index"; - if ( table_map_.find(tablename) == table_map_.end() - || table_map_.find(c_last_index_name) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Table* t_index = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - std::string key = std::to_string(c.c_w_id) + "_" + std::to_string(c.c_d_id) - + "_" + c.c_last + "_" + std::to_string(c.c_id); - RowMutation* index_mu = t_index->NewRowMutation(key); - index_mu->Put("cf0", "c_id", std::to_string(c.c_id)); - index_mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); - index_mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); - index_mu->Put("cf0", "c_last", c.c_last); - gtxn->ApplyMutation(index_mu); - delete index_mu; - - RowMutation* mu = table->NewRowMutation(c.PrimaryKey()); - mu->Put("cf0", "c_id", std::to_string(c.c_id)); - mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); - mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); - mu->Put("cf0", "c_credit_lim", std::to_string(c.c_credit_lim)); - mu->Put("cf0", "c_discount", std::to_string(c.c_discount)); - mu->Put("cf0", "c_balance", std::to_string(c.c_balance)); - mu->Put("cf0", "c_ytd_payment", std::to_string(c.c_ytd_payment)); - mu->Put("cf0", "c_payment_cnt", std::to_string(c.c_payment_cnt)); - mu->Put("cf0", "c_delivery_cnt", std::to_string(c.c_delivery_cnt)); - mu->Put("cf0", "c_first", c.c_first); - mu->Put("cf0", "c_middle", c.c_middle); - mu->Put("cf0", "c_last", c.c_last); - mu->Put("cf0", "c_street_1", c.c_street_1); - mu->Put("cf0", "c_street_2", c.c_street_2); - mu->Put("cf0", "c_city", c.c_city); - mu->Put("cf0", "c_state", c.c_state); - mu->Put("cf0", "c_zip", c.c_zip); - mu->Put("cf0", "c_phone", c.c_phone); - mu->Put("cf0", "c_since", c.c_since); - mu->Put("cf0", "c_credit", c.c_credit); - mu->Put("cf0", "c_data", c.c_data); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_customer"; + std::string c_last_index_name = "t_customer_last_index"; + if (table_map_.find(tablename) == table_map_.end() || + table_map_.find(c_last_index_name) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* t_index = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + std::string key = std::to_string(c.c_w_id) + "_" + std::to_string(c.c_d_id) + "_" + c.c_last + + "_" + std::to_string(c.c_id); + RowMutation* index_mu = t_index->NewRowMutation(key); + index_mu->Put("cf0", "c_id", std::to_string(c.c_id)); + index_mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); + index_mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); + index_mu->Put("cf0", "c_last", c.c_last); + gtxn->ApplyMutation(index_mu); + delete index_mu; + + RowMutation* mu = table->NewRowMutation(c.PrimaryKey()); + mu->Put("cf0", "c_id", std::to_string(c.c_id)); + mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); + mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); + mu->Put("cf0", "c_credit_lim", std::to_string(c.c_credit_lim)); + mu->Put("cf0", "c_discount", std::to_string(c.c_discount)); + mu->Put("cf0", "c_balance", std::to_string(c.c_balance)); + mu->Put("cf0", "c_ytd_payment", std::to_string(c.c_ytd_payment)); + mu->Put("cf0", "c_payment_cnt", std::to_string(c.c_payment_cnt)); + mu->Put("cf0", "c_delivery_cnt", std::to_string(c.c_delivery_cnt)); + mu->Put("cf0", "c_first", c.c_first); + mu->Put("cf0", "c_middle", c.c_middle); + mu->Put("cf0", "c_last", c.c_last); + mu->Put("cf0", "c_street_1", c.c_street_1); + mu->Put("cf0", "c_street_2", c.c_street_2); + mu->Put("cf0", "c_city", c.c_city); + mu->Put("cf0", "c_state", c.c_state); + mu->Put("cf0", "c_zip", c.c_zip); + mu->Put("cf0", "c_phone", c.c_phone); + mu->Put("cf0", "c_since", c.c_since); + mu->Put("cf0", "c_credit", c.c_credit); + mu->Put("cf0", "c_data", c.c_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertHistory(const History& h) { - std::string tablename = "t_history"; - std::string history_index_name = "t_history_index"; + std::string tablename = "t_history"; + std::string history_index_name = "t_history_index"; - if (table_map_.find(tablename) == table_map_.end() || - table_map_.find(history_index_name) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Table* t_history_index = table_map_[history_index_name]; - Transaction* gtxn = client_->NewGlobalTransaction(); - - RowReader* hindex_reader = t_history_index->NewRowReader("count"); - RetTuples hindex_ret; - int cnt = -1; - TxnResult ret; - if (hindex_reader->GetError().GetType() != ErrorCode::kNotFound - && !GetValues(&ret, gtxn, hindex_reader, - {"count"}, - &hindex_ret, - "@insert_history|hindex_reader|count")) { - return false; - } else if (hindex_reader->GetError().GetType() == ErrorCode::kNotFound) { - cnt = 0; - } else { - cnt = std::stoi(hindex_ret["count"]); - } - - RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); - hindex_mu->Put("cf0", "count", std::to_string(++cnt)); - gtxn->ApplyMutation(hindex_mu); - delete hindex_mu; - - RowMutation* mu = table->NewRowMutation(std::to_string(cnt)); - mu->Put("cf0", "h_c_id", std::to_string(h.h_c_id)); - mu->Put("cf0", "h_c_d_id", std::to_string(h.h_c_d_id)); - mu->Put("cf0", "h_c_w_id", std::to_string(h.h_c_w_id)); - mu->Put("cf0", "h_d_id", std::to_string(h.h_d_id)); - mu->Put("cf0", "h_w_id", std::to_string(h.h_w_id)); - mu->Put("cf0", "h_amount", std::to_string(h.h_amount)); - mu->Put("cf0", "h_date", h.h_date); - mu->Put("cf0", "h_data", h.h_data); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + if (table_map_.find(tablename) == table_map_.end() || + table_map_.find(history_index_name) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* t_history_index = table_map_[history_index_name]; + Transaction* gtxn = client_->NewGlobalTransaction(); + + RowReader* hindex_reader = t_history_index->NewRowReader("count"); + RetTuples hindex_ret; + int cnt = -1; + TxnResult ret; + if (hindex_reader->GetError().GetType() != ErrorCode::kNotFound && + !GetValues(&ret, gtxn, hindex_reader, {"count"}, &hindex_ret, + "@insert_history|hindex_reader|count")) { + return false; + } else if (hindex_reader->GetError().GetType() == ErrorCode::kNotFound) { + cnt = 0; + } else { + cnt = std::stoi(hindex_ret["count"]); + } + + RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); + hindex_mu->Put("cf0", "count", std::to_string(++cnt)); + gtxn->ApplyMutation(hindex_mu); + delete hindex_mu; + + RowMutation* mu = table->NewRowMutation(std::to_string(cnt)); + mu->Put("cf0", "h_c_id", std::to_string(h.h_c_id)); + mu->Put("cf0", "h_c_d_id", std::to_string(h.h_c_d_id)); + mu->Put("cf0", "h_c_w_id", std::to_string(h.h_c_w_id)); + mu->Put("cf0", "h_d_id", std::to_string(h.h_d_id)); + mu->Put("cf0", "h_w_id", std::to_string(h.h_w_id)); + mu->Put("cf0", "h_amount", std::to_string(h.h_amount)); + mu->Put("cf0", "h_date", h.h_date); + mu->Put("cf0", "h_data", h.h_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertStock(const Stock& s) { - std::string tablename = "t_stock"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(s.PrimaryKey()); - - mu->Put("cf0", "s_i_id", std::to_string(s.s_i_id)); - mu->Put("cf0", "s_w_id", std::to_string(s.s_w_id)); - mu->Put("cf0", "s_quantity", std::to_string(s.s_quantity)); - mu->Put("cf0", "s_ytd", std::to_string(s.s_ytd)); - mu->Put("cf0", "s_order_cnt", std::to_string(s.s_order_cnt)); - mu->Put("cf0", "s_remote_cnt", std::to_string(s.s_remote_cnt)); - int i = 0; - for (auto dist : s.s_dist) { - mu->Put("cf0", "s_dist_" + std::to_string(++i), dist); - } - mu->Put("cf0", "s_data", s.s_data); - - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_stock"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(s.PrimaryKey()); + + mu->Put("cf0", "s_i_id", std::to_string(s.s_i_id)); + mu->Put("cf0", "s_w_id", std::to_string(s.s_w_id)); + mu->Put("cf0", "s_quantity", std::to_string(s.s_quantity)); + mu->Put("cf0", "s_ytd", std::to_string(s.s_ytd)); + mu->Put("cf0", "s_order_cnt", std::to_string(s.s_order_cnt)); + mu->Put("cf0", "s_remote_cnt", std::to_string(s.s_remote_cnt)); + int i = 0; + for (auto dist : s.s_dist) { + mu->Put("cf0", "s_dist_" + std::to_string(++i), dist); + } + mu->Put("cf0", "s_data", s.s_data); + + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertOrder(const Order& o) { - std::string tablename = "t_order"; - std::string indexname = "t_order_index"; - if ( table_map_.find(tablename) == table_map_.end() || - table_map_.find(indexname) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Table* index = table_map_[indexname]; - - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(o.PrimaryKey()); - std::string index_key = o.ForeignKey() + "_" + std::to_string(o.o_id); - RowMutation* index_mu = index->NewRowMutation(index_key); - index_mu->Put("cf0", "o_id", std::to_string(o.o_id)); - index_mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); - index_mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); - index_mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); - mu->Put("cf0", "o_id", std::to_string(o.o_id)); - mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); - mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); - mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); - mu->Put("cf0", "o_carrier_id", std::to_string(o.o_carrier_id)); - mu->Put("cf0", "o_ol_cnt", std::to_string(o.o_ol_cnt)); - mu->Put("cf0", "o_all_local", std::to_string(o.o_all_local)); - mu->Put("cf0", "o_entry_d", o.o_entry_d); - gtxn->ApplyMutation(mu); - gtxn->ApplyMutation(index_mu); - delete mu; - delete index_mu; - gtxn->Commit(); - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_order"; + std::string indexname = "t_order_index"; + if (table_map_.find(tablename) == table_map_.end() || + table_map_.find(indexname) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* index = table_map_[indexname]; + + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(o.PrimaryKey()); + std::string index_key = o.ForeignKey() + "_" + std::to_string(o.o_id); + RowMutation* index_mu = index->NewRowMutation(index_key); + index_mu->Put("cf0", "o_id", std::to_string(o.o_id)); + index_mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); + index_mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); + index_mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); + mu->Put("cf0", "o_id", std::to_string(o.o_id)); + mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); + mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); + mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); + mu->Put("cf0", "o_carrier_id", std::to_string(o.o_carrier_id)); + mu->Put("cf0", "o_ol_cnt", std::to_string(o.o_ol_cnt)); + mu->Put("cf0", "o_all_local", std::to_string(o.o_all_local)); + mu->Put("cf0", "o_entry_d", o.o_entry_d); + gtxn->ApplyMutation(mu); + gtxn->ApplyMutation(index_mu); + delete mu; + delete index_mu; + gtxn->Commit(); + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertOrderLine(const OrderLine& ol) { - std::string tablename = "t_orderline"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(ol.PrimaryKey()); - mu->Put("cf0", "ol_o_id", std::to_string(ol.ol_o_id)); - mu->Put("cf0", "ol_d_id", std::to_string(ol.ol_d_id)); - mu->Put("cf0", "ol_w_id", std::to_string(ol.ol_w_id)); - mu->Put("cf0", "ol_number", std::to_string(ol.ol_number)); - mu->Put("cf0", "ol_i_id", std::to_string(ol.ol_i_id)); - mu->Put("cf0", "ol_supply_w_id", std::to_string(ol.ol_supply_w_id)); - mu->Put("cf0", "ol_quantity", std::to_string(ol.ol_quantity)); - mu->Put("cf0", "ol_amount", std::to_string(ol.ol_amount)); - mu->Put("cf0", "ol_delivery_d", ol.ol_delivery_d); - mu->Put("cf0", "ol_dist_info", ol.ol_dist_info); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_orderline"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(ol.PrimaryKey()); + mu->Put("cf0", "ol_o_id", std::to_string(ol.ol_o_id)); + mu->Put("cf0", "ol_d_id", std::to_string(ol.ol_d_id)); + mu->Put("cf0", "ol_w_id", std::to_string(ol.ol_w_id)); + mu->Put("cf0", "ol_number", std::to_string(ol.ol_number)); + mu->Put("cf0", "ol_i_id", std::to_string(ol.ol_i_id)); + mu->Put("cf0", "ol_supply_w_id", std::to_string(ol.ol_supply_w_id)); + mu->Put("cf0", "ol_quantity", std::to_string(ol.ol_quantity)); + mu->Put("cf0", "ol_amount", std::to_string(ol.ol_amount)); + mu->Put("cf0", "ol_delivery_d", ol.ol_delivery_d); + mu->Put("cf0", "ol_dist_info", ol.ol_dist_info); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } bool TeraTpccDb::InsertNewOrder(const NewOrder& no) { - std::string tablename = "t_neworder"; - if ( table_map_.find(tablename) == table_map_.end()) { - return false; - } - Table* table = table_map_[tablename]; - Transaction* gtxn = client_->NewGlobalTransaction(); - RowMutation* mu = table->NewRowMutation(no.PrimaryKey()); - mu->Put("cf0", "no_o_id", std::to_string(no.no_o_id)); - mu->Put("cf0", "no_d_id", std::to_string(no.no_d_id)); - mu->Put("cf0", "no_w_id", std::to_string(no.no_w_id)); - gtxn->ApplyMutation(mu); - gtxn->Commit(); - delete mu; - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - LOG(ERROR) << "insert table:" << tablename << " failed. err:" - << gtxn->GetError().ToString(); - delete gtxn; - return false; - } + std::string tablename = "t_neworder"; + if (table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(no.PrimaryKey()); + mu->Put("cf0", "no_o_id", std::to_string(no.no_o_id)); + mu->Put("cf0", "no_d_id", std::to_string(no.no_d_id)); + mu->Put("cf0", "no_w_id", std::to_string(no.no_w_id)); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" << gtxn->GetError().ToString(); delete gtxn; - return true; + return false; + } + delete gtxn; + return true; } -void TeraTpccDb::SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state, +void TeraTpccDb::SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state, const std::string& msg) { - ret->SetState(state); - if (msg != "") { - ret->SetReason(gtxn->GetError().GetReason() + " msg:" + msg); - } else { - ret->SetReason(gtxn->GetError().GetReason()); - } + ret->SetState(state); + if (msg != "") { + ret->SetReason(gtxn->GetError().GetReason() + " msg:" + msg); + } else { + ret->SetReason(gtxn->GetError().GetReason()); + } } bool TeraTpccDb::GetValues(TxnResult* ret, Transaction* gtxn, RowReader* reader, std::initializer_list qu_names_initlist, - RetTuples* ret_tuples, - const std::string& if_error_msg) { - std::vector qu_names(qu_names_initlist); - for (auto& qu_name : qu_names) { - reader->AddColumn("cf0", qu_name); - } - gtxn->Get(reader); - if (gtxn->GetError().GetType() != ErrorCode::kOK) { - SetTxnResult(ret, gtxn, false, if_error_msg); - delete reader; - return false; - } else { - RowReader::TRow row; - reader->ToMap(&row); - for (auto qu_name : qu_names) { - if (row["cf0"].find(qu_name) != row["cf0"].end()) { - for (auto k : row["cf0"][qu_name]) { - ret_tuples->insert({{qu_name, k.second}}); - break; - } - } + RetTuples* ret_tuples, const std::string& if_error_msg) { + std::vector qu_names(qu_names_initlist); + for (auto& qu_name : qu_names) { + reader->AddColumn("cf0", qu_name); + } + gtxn->Get(reader); + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + SetTxnResult(ret, gtxn, false, if_error_msg); + delete reader; + return false; + } else { + RowReader::TRow row; + reader->ToMap(&row); + for (auto qu_name : qu_names) { + if (row["cf0"].find(qu_name) != row["cf0"].end()) { + for (auto k : row["cf0"][qu_name]) { + ret_tuples->insert({{qu_name, k.second}}); + break; } - delete reader; + } } - return true; + delete reader; + } + return true; } -bool TeraTpccDb::GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, +bool TeraTpccDb::GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, const std::string& last_name, int32_t customer_id, - int32_t warehouse_id, int32_t district_id, - std::string* customer_key, RetTuples* customer_ret) { - // open table - Table* t_customer_last_index = table_map_[kTpccTables[kCustomerLastIndex]]; - Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; - *customer_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; - - if (by_last_name) { - ErrorCode error_code; - std::string start_key = *customer_key + last_name + "_"; - ScanDescriptor scan_desc(start_key); - scan_desc.SetEnd(start_key + "~"); - scan_desc.AddColumnFamily("cf0"); - ResultStream* scanner = t_customer_last_index->Scan(scan_desc, &error_code); - std::vector keys; - for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { - std::string row_key = scanner->RowName(); - if (row_key.find(start_key) == std::string::npos) { - break; - } - - RowReader* index_reader = t_customer_last_index->NewRowReader(row_key); - RetTuples index_ret; - if (!GetValues(ret, gtxn, index_reader, - {"c_id"}, - &index_ret, - "@get_customer|index_reader|" + row_key)) { - delete scanner; - return false; - } - keys.push_back(index_ret["c_id"]); - } + int32_t warehouse_id, int32_t district_id, std::string* customer_key, + RetTuples* customer_ret) { + // open table + Table* t_customer_last_index = table_map_[kTpccTables[kCustomerLastIndex]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + *customer_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; + + if (by_last_name) { + ErrorCode error_code; + std::string start_key = *customer_key + last_name + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + ResultStream* scanner = t_customer_last_index->Scan(scan_desc, &error_code); + std::vector keys; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + if (row_key.find(start_key) == std::string::npos) { + break; + } + + RowReader* index_reader = t_customer_last_index->NewRowReader(row_key); + RetTuples index_ret; + if (!GetValues(ret, gtxn, index_reader, {"c_id"}, &index_ret, + "@get_customer|index_reader|" + row_key)) { delete scanner; - size_t pos = keys.size(); - pos = pos % 2 == 0 ? (pos / 2 - 1) : (pos / 2); - *customer_key += keys.at(pos); - } else { - *customer_key += std::to_string(customer_id); - } - RowReader* customer_reader = t_customer->NewRowReader(*customer_key); - if (!GetValues(ret, gtxn, customer_reader, - {"c_id", "c_d_id", "c_w_id", "c_first", "c_middle", "c_last", - "c_balance", "c_ytd_payment", "c_payment_cnt", "c_credit", - "c_data", "c_street_1", "c_street_2", "c_city", "c_state", - "c_zip", "c_phone", "c_since", "c_credit_lim", "c_discount"}, - customer_ret, - "@get_customer|customer_reader" + *customer_key)) { return false; + } + keys.push_back(index_ret["c_id"]); } - return true; + delete scanner; + size_t pos = keys.size(); + pos = pos % 2 == 0 ? (pos / 2 - 1) : (pos / 2); + *customer_key += keys.at(pos); + } else { + *customer_key += std::to_string(customer_id); + } + RowReader* customer_reader = t_customer->NewRowReader(*customer_key); + if (!GetValues( + ret, gtxn, customer_reader, + {"c_id", "c_d_id", "c_w_id", "c_first", "c_middle", "c_last", "c_balance", + "c_ytd_payment", "c_payment_cnt", "c_credit", "c_data", "c_street_1", "c_street_2", + "c_city", "c_state", "c_zip", "c_phone", "c_since", "c_credit_lim", "c_discount"}, + customer_ret, "@get_customer|customer_reader" + *customer_key)) { + return false; + } + return true; } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_tpccdb.h b/src/benchmark/tpcc/tera_tpccdb.h index a300166b0..bfb7c75e0 100644 --- a/src/benchmark/tpcc/tera_tpccdb.h +++ b/src/benchmark/tpcc/tera_tpccdb.h @@ -17,85 +17,72 @@ class TpccDb; class TxnResult; class TeraTpccDb : public TpccDb { -public: - TeraTpccDb(); - virtual ~TeraTpccDb(); - - virtual bool CreateTables(); - virtual bool CleanTables(); - - // init db - virtual bool InsertItem(const Item& i); - - virtual bool InsertWarehouse(const Warehouse& w); - - virtual bool InsertDistrict(const District& d); - - virtual bool InsertCustomer(const Customer& c); - - virtual bool InsertHistory(const History& h); - - virtual bool InsertStock(const Stock& s); - - virtual bool InsertOrder(const Order& o); - - virtual bool InsertOrderLine(const OrderLine& ol); - - virtual bool InsertNewOrder(const NewOrder& no); - - virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, - int32_t threshold, - StockLevelResult* ret); - - virtual void DeliveryTxn(int32_t warehouse_id, - int32_t carrier_id, - const std::string& delivery_datetime, - DeliveryResult* ret); - - virtual void OrderStatusTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_customer_id, - const std::string& last_name, - OrderStatusResult* ret); - - virtual void PaymentTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_warehouse_id, int32_t c_district_id, - int32_t c_customer_id, - const std::string& last_name, - int32_t h_amount, - PaymentResult* ret); - - virtual void NewOrderTxn(int32_t warehouse_id, - int32_t district_id, - int32_t customer_id, const NewOrderInfo& info, - NewOrderResult* ret); - -private: - void SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state = true, - const std::string& msg = ""); - - bool GetValues(TxnResult* ret, Transaction* gtxn, RowReader* reader, - std::initializer_list qu_names_initlist, - RetTuples* ret_tuples, - const std::string& if_error_msg); - - bool GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, - const std::string& last_name, int32_t customer_id, - int32_t warehouse_id, int32_t district_id, - std::string* customer_key, RetTuples* customer_ret); -private: - void SetPaymentSingleLineRet(const RetTuples& warehouse_ret, - const RetTuples& district_ret, - const RetTuples& customer_ret, - const RetTuples& other_ret, - RetTuples* payment_ret); -private: - Client* client_; - std::unordered_map table_map_; + public: + TeraTpccDb(); + virtual ~TeraTpccDb(); + + virtual bool CreateTables(); + virtual bool CleanTables(); + + // init db + virtual bool InsertItem(const Item& i); + + virtual bool InsertWarehouse(const Warehouse& w); + + virtual bool InsertDistrict(const District& d); + + virtual bool InsertCustomer(const Customer& c); + + virtual bool InsertHistory(const History& h); + + virtual bool InsertStock(const Stock& s); + + virtual bool InsertOrder(const Order& o); + + virtual bool InsertOrderLine(const OrderLine& ol); + + virtual bool InsertNewOrder(const NewOrder& no); + + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, int32_t threshold, + StockLevelResult* ret); + + virtual void DeliveryTxn(int32_t warehouse_id, int32_t carrier_id, + const std::string& delivery_datetime, DeliveryResult* ret); + + virtual void OrderStatusTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, const std::string& last_name, + OrderStatusResult* ret); + + virtual void PaymentTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, int32_t c_customer_id, + const std::string& last_name, int32_t h_amount, PaymentResult* ret); + + virtual void NewOrderTxn(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + const NewOrderInfo& info, NewOrderResult* ret); + + private: + void SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state = true, + const std::string& msg = ""); + + bool GetValues(TxnResult* ret, Transaction* gtxn, RowReader* reader, + std::initializer_list qu_names_initlist, RetTuples* ret_tuples, + const std::string& if_error_msg); + + bool GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, + const std::string& last_name, int32_t customer_id, int32_t warehouse_id, + int32_t district_id, std::string* customer_key, RetTuples* customer_ret); + + private: + void SetPaymentSingleLineRet(const RetTuples& warehouse_ret, const RetTuples& district_ret, + const RetTuples& customer_ret, const RetTuples& other_ret, + RetTuples* payment_ret); + + private: + Client* client_; + std::unordered_map table_map_; }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_TERA_TPCCDB_H */ diff --git a/src/benchmark/tpcc/tera_txn/delivery_txn.cc b/src/benchmark/tpcc/tera_txn/delivery_txn.cc index d1a7a3e18..e8b28f9eb 100644 --- a/src/benchmark/tpcc/tera_txn/delivery_txn.cc +++ b/src/benchmark/tpcc/tera_txn/delivery_txn.cc @@ -15,130 +15,121 @@ namespace tera { namespace tpcc { -void TeraTpccDb::DeliveryTxn(int32_t warehouse_id, - int32_t carrier_id, - const std::string& delivery_datetime, - DeliveryResult* ret) { - // open table - Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; - Table* t_order = table_map_[kTpccTables[kOrderTable]]; - Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; - Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; - // begin transaction - Transaction* gtxn = client_->NewGlobalTransaction(); - for (int32_t district_id = 1; district_id <= kDistrictCountPerWarehouse; ++district_id) { - // The row in the NEW-ORDER table with matching NO_W_ID (equals W_ID) - // and NO_D_ID (equals D_ID) and with the lowest NO_O_ID value is selected. - ErrorCode error_code; - std::string start_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; - ScanDescriptor scan_desc(start_key); - scan_desc.SetEnd(start_key + "~"); - scan_desc.AddColumnFamily("cf0"); - tera::ResultStream* scanner = t_neworder->Scan(scan_desc, &error_code); - bool not_new_order = false; - int32_t order_id = INT32_MAX; - for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { - std::string row_key = scanner->RowName(); - if (row_key.find(start_key) == std::string::npos) { - not_new_order = true; - break; - } - std::size_t found = row_key.find_last_of("_"); - int32_t found_order_id = std::stoi(row_key.substr(found + 1)); - if (order_id > found_order_id) { - order_id = found_order_id; - } - } - delete scanner; - // If no matching row is found, then the delivery of an order - // for this district is skipped. - if (not_new_order || order_id == INT32_MAX) { - continue; - } +void TeraTpccDb::DeliveryTxn(int32_t warehouse_id, int32_t carrier_id, + const std::string& delivery_datetime, DeliveryResult* ret) { + // open table + Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + // begin transaction + Transaction* gtxn = client_->NewGlobalTransaction(); + for (int32_t district_id = 1; district_id <= kDistrictCountPerWarehouse; ++district_id) { + // The row in the NEW-ORDER table with matching NO_W_ID (equals W_ID) + // and NO_D_ID (equals D_ID) and with the lowest NO_O_ID value is selected. + ErrorCode error_code; + std::string start_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + tera::ResultStream* scanner = t_neworder->Scan(scan_desc, &error_code); + bool not_new_order = false; + int32_t order_id = INT32_MAX; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + if (row_key.find(start_key) == std::string::npos) { + not_new_order = true; + break; + } + std::size_t found = row_key.find_last_of("_"); + int32_t found_order_id = std::stoi(row_key.substr(found + 1)); + if (order_id > found_order_id) { + order_id = found_order_id; + } + } + delete scanner; + // If no matching row is found, then the delivery of an order + // for this district is skipped. + if (not_new_order || order_id == INT32_MAX) { + continue; + } - // The selected row in the NEW-ORDER table is deleted - std::string no_primary_key = start_key + std::to_string(order_id); - RowReader* no_reader = t_neworder->NewRowReader(no_primary_key); - RetTuples no_ret; - if (!GetValues(ret, gtxn, no_reader, - {"no_o_id"}, - &no_ret, - "@delivery|no_reader|" + no_primary_key)) { - return; - } + // The selected row in the NEW-ORDER table is deleted + std::string no_primary_key = start_key + std::to_string(order_id); + RowReader* no_reader = t_neworder->NewRowReader(no_primary_key); + RetTuples no_ret; + if (!GetValues(ret, gtxn, no_reader, {"no_o_id"}, &no_ret, + "@delivery|no_reader|" + no_primary_key)) { + return; + } - RowMutation* no_mu = t_neworder->NewRowMutation(no_primary_key); - no_mu->DeleteColumns("cf0", "no_o_id", gtxn->GetStartTimestamp()); - no_mu->DeleteColumns("cf0", "no_d_id", gtxn->GetStartTimestamp()); - no_mu->DeleteColumns("cf0", "no_w_id", gtxn->GetStartTimestamp()); - gtxn->ApplyMutation(no_mu); - delete no_mu; + RowMutation* no_mu = t_neworder->NewRowMutation(no_primary_key); + no_mu->DeleteColumns("cf0", "no_o_id", gtxn->GetStartTimestamp()); + no_mu->DeleteColumns("cf0", "no_d_id", gtxn->GetStartTimestamp()); + no_mu->DeleteColumns("cf0", "no_w_id", gtxn->GetStartTimestamp()); + gtxn->ApplyMutation(no_mu); + delete no_mu; - // The row in the ORDER table with matching - // O_W_ID (equals W_ID), O_D_ID (equals D_ID), and O_ID (equals NO_O_ID) - // is selected, O_C_ID, the customer number, is retrieved, - // and O_CARRIER_ID is updated. - std::string order_primary_key = no_primary_key; - RowReader* order_reader = t_order->NewRowReader(order_primary_key); - RetTuples order_ret; - if (!GetValues(ret, gtxn, order_reader, - {"o_carrier_id", "o_ol_cnt", "o_c_id"}, - &order_ret, - "@delivery|order_reader|" + order_primary_key)) { - return; - } - RowMutation* order_mu = t_order->NewRowMutation(order_primary_key); - order_mu->Put("cf0", "o_carrier_id", std::to_string(carrier_id)); - gtxn->ApplyMutation(order_mu); - delete order_mu; + // The row in the ORDER table with matching + // O_W_ID (equals W_ID), O_D_ID (equals D_ID), and O_ID (equals NO_O_ID) + // is selected, O_C_ID, the customer number, is retrieved, + // and O_CARRIER_ID is updated. + std::string order_primary_key = no_primary_key; + RowReader* order_reader = t_order->NewRowReader(order_primary_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn, order_reader, {"o_carrier_id", "o_ol_cnt", "o_c_id"}, &order_ret, + "@delivery|order_reader|" + order_primary_key)) { + return; + } + RowMutation* order_mu = t_order->NewRowMutation(order_primary_key); + order_mu->Put("cf0", "o_carrier_id", std::to_string(carrier_id)); + gtxn->ApplyMutation(order_mu); + delete order_mu; - int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); - // the sum of all OL_AMOUNT. - float amount = 0.0f; - // All rows in the ORDER-LINE table with matching - // OL_W_ID (= O_W_ID), OL_D_ID (= O_D_ID), and OL_O_ID (= O_ID) are selected. - for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ ol_number) { - std::string ol_key = order_primary_key + "_" + std::to_string(ol_number); - RowReader* ol_reader = t_orderline->NewRowReader(ol_key); - RetTuples ol_ret; - if (!GetValues(ret, gtxn, ol_reader, - {"ol_amount", "ol_delivery_d"}, - &ol_ret, - "@delivery|ol_reader|" + ol_key)) { - return; - } - amount += std::stof(ol_ret["ol_amount"]); - RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); - // All OL_DELIVERY_D, the delivery dates, - // are updated to the current system time as returned by the OS - ol_mu->Put("cf0","ol_delivery_d",delivery_datetime); - gtxn->ApplyMutation(ol_mu); - delete ol_mu; - } + int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); + // the sum of all OL_AMOUNT. + float amount = 0.0f; + // All rows in the ORDER-LINE table with matching + // OL_W_ID (= O_W_ID), OL_D_ID (= O_D_ID), and OL_O_ID (= O_ID) are + // selected. + for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ol_number) { + std::string ol_key = order_primary_key + "_" + std::to_string(ol_number); + RowReader* ol_reader = t_orderline->NewRowReader(ol_key); + RetTuples ol_ret; + if (!GetValues(ret, gtxn, ol_reader, {"ol_amount", "ol_delivery_d"}, &ol_ret, + "@delivery|ol_reader|" + ol_key)) { + return; + } + amount += std::stof(ol_ret["ol_amount"]); + RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); + // All OL_DELIVERY_D, the delivery dates, + // are updated to the current system time as returned by the OS + ol_mu->Put("cf0", "ol_delivery_d", delivery_datetime); + gtxn->ApplyMutation(ol_mu); + delete ol_mu; + } - // The row in the CUSTOMER table with matching - // C_W_ID (= W_ID), C_D_ID (= D_ID), and C_ID (= O_C_ID) is selected - std::string customer_key = start_key + order_ret["o_c_id"]; - RowReader* customer_reader = t_customer->NewRowReader(customer_key); - RetTuples customer_ret; - if (!GetValues(ret, gtxn, customer_reader, - {"c_balance", "c_delivery_cnt"}, - &customer_ret, - "@delivery|customer_reader" + customer_key)) { - return; - } - // and C_BALANCE + sum(OL_AMOUNT) previously retrieved. C_DELIVERY_CNT + 1. - RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); - customer_mu->Put("cf0", "c_balance", - std::to_string(std::stof(customer_ret["c_balance"]) + amount)); - customer_mu->Put("cf0", "c_delivery_cnt", - std::to_string(std::stoi(customer_ret["c_delivery_cnt"]) + 1)); - gtxn->ApplyMutation(customer_mu); - delete customer_mu; + // The row in the CUSTOMER table with matching + // C_W_ID (= W_ID), C_D_ID (= D_ID), and C_ID (= O_C_ID) is selected + std::string customer_key = start_key + order_ret["o_c_id"]; + RowReader* customer_reader = t_customer->NewRowReader(customer_key); + RetTuples customer_ret; + if (!GetValues(ret, gtxn, customer_reader, {"c_balance", "c_delivery_cnt"}, &customer_ret, + "@delivery|customer_reader" + customer_key)) { + return; } - gtxn->Commit(); - SetTxnResult(ret, gtxn, gtxn->GetError().GetType() == ErrorCode::kOK); + // and C_BALANCE + sum(OL_AMOUNT) previously retrieved. C_DELIVERY_CNT + 1. + RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); + customer_mu->Put("cf0", "c_balance", + std::to_string(std::stof(customer_ret["c_balance"]) + amount)); + customer_mu->Put("cf0", "c_delivery_cnt", + std::to_string(std::stoi(customer_ret["c_delivery_cnt"]) + 1)); + gtxn->ApplyMutation(customer_mu); + delete customer_mu; + } + gtxn->Commit(); + SetTxnResult(ret, gtxn, gtxn->GetError().GetType() == ErrorCode::kOK); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/new_order_txn.cc b/src/benchmark/tpcc/tera_txn/new_order_txn.cc index df4100824..84d2eacb6 100644 --- a/src/benchmark/tpcc/tera_txn/new_order_txn.cc +++ b/src/benchmark/tpcc/tera_txn/new_order_txn.cc @@ -15,200 +15,189 @@ namespace tera { namespace tpcc { -void TeraTpccDb::NewOrderTxn(int32_t warehouse_id, - int32_t district_id, - int32_t customer_id, const NewOrderInfo& info, - NewOrderResult* ret) { - // open table - Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; - Table* t_district = table_map_[kTpccTables[kDistrictTable]]; - Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; - Table* t_order = table_map_[kTpccTables[kOrderTable]]; - Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; - Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; - Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; - Table* t_item = table_map_[kTpccTables[kItemTable]]; - Table* t_stock = table_map_[kTpccTables[kStockTable]]; - // begin transaction - std::unique_ptr gtxn(client_->NewGlobalTransaction()); - std::string datetime = get_curtime_str(); - std::string warehouse_key = std::to_string(warehouse_id); - std::string district_key = warehouse_key + "_" + std::to_string(district_id); - std::string customer_key = district_key + "_" + std::to_string(customer_id); - - RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); - RetTuples warehouse_ret; - if (!GetValues(ret, gtxn.get(), warehouse_reader, - {"w_tax"}, - &warehouse_ret, - "@new_order|warehouse_reader|" + warehouse_key)) { - return; - } +void TeraTpccDb::NewOrderTxn(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + const NewOrderInfo& info, NewOrderResult* ret) { + // open table + Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; + Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_item = table_map_[kTpccTables[kItemTable]]; + Table* t_stock = table_map_[kTpccTables[kStockTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string datetime = get_curtime_str(); + std::string warehouse_key = std::to_string(warehouse_id); + std::string district_key = warehouse_key + "_" + std::to_string(district_id); + std::string customer_key = district_key + "_" + std::to_string(customer_id); + + RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); + RetTuples warehouse_ret; + if (!GetValues(ret, gtxn.get(), warehouse_reader, {"w_tax"}, &warehouse_ret, + "@new_order|warehouse_reader|" + warehouse_key)) { + return; + } + + RowReader* district_reader = t_district->NewRowReader(district_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn.get(), district_reader, {"d_next_o_id", "d_tax"}, &district_ret, + "@new_order|district_reader|" + district_key)) { + return; + } + std::string d_next_o_id_str = std::to_string(std::stoi(district_ret["d_next_o_id"]) + 1); + + RowReader* customer_reader = t_customer->NewRowReader(customer_key); + RetTuples customer_ret; + if (!GetValues(ret, gtxn.get(), customer_reader, {"c_discount", "c_credit", "c_last"}, + &customer_ret, "@new_order|customer_reader|" + customer_key)) { + return; + } + + RowMutation* district_mu = t_district->NewRowMutation(district_key); + district_mu->Put("cf0", "d_next_o_id", d_next_o_id_str); + gtxn->ApplyMutation(district_mu); + delete district_mu; + + std::string order_key = district_key + "_" + d_next_o_id_str; + RowMutation* order_mu = t_order->NewRowMutation(order_key); + std::string order_index_key = customer_key + "_" + d_next_o_id_str; + RowMutation* order_index_mu = t_order_index->NewRowMutation(order_index_key); + order_index_mu->Put("cf0", "o_id", d_next_o_id_str); + order_index_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); + order_index_mu->Put("cf0", "o_d_id", std::to_string(district_id)); + order_index_mu->Put("cf0", "o_w_id", warehouse_key); + order_mu->Put("cf0", "o_id", d_next_o_id_str); + order_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); + order_mu->Put("cf0", "o_d_id", std::to_string(district_id)); + order_mu->Put("cf0", "o_w_id", warehouse_key); + order_mu->Put("cf0", "o_carrier_id", std::to_string(0)); + order_mu->Put("cf0", "o_ol_cnt", std::to_string(info.o_ol_cnt)); + order_mu->Put("cf0", "o_all_local", std::to_string(info.o_all_local)); + order_mu->Put("cf0", "o_entry_d", datetime); + gtxn->ApplyMutation(order_mu); + gtxn->ApplyMutation(order_index_mu); + delete order_mu; + delete order_index_mu; - RowReader* district_reader = t_district->NewRowReader(district_key); - RetTuples district_ret; - if (!GetValues(ret, gtxn.get(), district_reader, - {"d_next_o_id", "d_tax"}, - &district_ret, - "@new_order|district_reader|" + district_key)) { - return; + RowMutation* no_mu = t_neworder->NewRowMutation(order_key); + no_mu->Put("cf0", "no_o_id", d_next_o_id_str); + no_mu->Put("cf0", "no_d_id", std::to_string(district_id)); + no_mu->Put("cf0", "no_w_id", warehouse_key); + gtxn->ApplyMutation(no_mu); + delete no_mu; + + std::string ol_dist_info_key; + if (district_id == kDistrictCountPerWarehouse) { + ol_dist_info_key = "s_dist_10"; + } else { + ol_dist_info_key = "s_dist_0" + std::to_string(district_id); + } + + float ol_amount_sum = 0; + for (int32_t i = 0; i < info.o_ol_cnt; ++i) { + int32_t i_id = info.ol_i_ids[i]; + std::string item_key = std::to_string(i_id); + RowReader* item_reader = t_item->NewRowReader(item_key); + RetTuples item_ret; + if (!GetValues(ret, gtxn.get(), item_reader, {"i_price", "i_name", "i_data"}, &item_ret, + "@new_order|item_reader|" + item_key)) { + return; } - std::string d_next_o_id_str = std::to_string(std::stoi(district_ret["d_next_o_id"]) + 1); - - RowReader* customer_reader = t_customer->NewRowReader(customer_key); - RetTuples customer_ret; - if (!GetValues(ret, gtxn.get(), customer_reader, - {"c_discount", "c_credit", "c_last"}, - &customer_ret, - "@new_order|customer_reader|" + customer_key)) { - return; + + std::string ol_supply_w_id_str = std::to_string(info.ol_supply_w_ids[i]); + std::string stock_key = ol_supply_w_id_str + "_" + item_key; + RowReader* stock_reader = t_item->NewRowReader(stock_key); + RetTuples stock_ret; + if (!GetValues(ret, gtxn.get(), stock_reader, {"s_quantity", "s_ytd", "s_order_cnt", + "s_remote_cnt", "s_data", ol_dist_info_key}, + &stock_ret, "@new_order|stock_reader|" + stock_key)) { + return; } - RowMutation* district_mu = t_district->NewRowMutation(district_key); - district_mu->Put("cf0", "d_next_o_id", d_next_o_id_str); - gtxn->ApplyMutation(district_mu); - delete district_mu; - - std::string order_key = district_key + "_" + d_next_o_id_str; - RowMutation* order_mu = t_order->NewRowMutation(order_key); - std::string order_index_key = customer_key + "_" + d_next_o_id_str; - RowMutation* order_index_mu = t_order_index->NewRowMutation(order_index_key); - order_index_mu->Put("cf0", "o_id", d_next_o_id_str); - order_index_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); - order_index_mu->Put("cf0", "o_d_id", std::to_string(district_id)); - order_index_mu->Put("cf0", "o_w_id", warehouse_key); - order_mu->Put("cf0", "o_id", d_next_o_id_str); - order_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); - order_mu->Put("cf0", "o_d_id", std::to_string(district_id)); - order_mu->Put("cf0", "o_w_id", warehouse_key); - order_mu->Put("cf0", "o_carrier_id", std::to_string(0)); - order_mu->Put("cf0", "o_ol_cnt", std::to_string(info.o_ol_cnt)); - order_mu->Put("cf0", "o_all_local", std::to_string(info.o_all_local)); - order_mu->Put("cf0", "o_entry_d", datetime); - gtxn->ApplyMutation(order_mu); - gtxn->ApplyMutation(order_index_mu); - delete order_mu; - delete order_index_mu; - - RowMutation* no_mu = t_neworder->NewRowMutation(order_key); - no_mu->Put("cf0", "no_o_id", d_next_o_id_str); - no_mu->Put("cf0", "no_d_id", std::to_string(district_id)); - no_mu->Put("cf0", "no_w_id", warehouse_key); - gtxn->ApplyMutation(no_mu); - delete no_mu; - - std::string ol_dist_info_key; - if (district_id == kDistrictCountPerWarehouse) { - ol_dist_info_key = "s_dist_10"; + int32_t ol_quantity = info.ol_quantities[i]; + float ol_amount = std::stof(item_ret["i_price"]) * ol_quantity; + ol_amount_sum += ol_amount; + std::string ol_number_str = std::to_string(i + 1); + std::string ol_key = order_key + "_" + ol_number_str; + RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); + ol_mu->Put("cf0", "ol_o_id", d_next_o_id_str); + ol_mu->Put("cf0", "ol_d_id", std::to_string(district_id)); + ol_mu->Put("cf0", "ol_w_id", warehouse_key); + ol_mu->Put("cf0", "ol_number", ol_number_str); + ol_mu->Put("cf0", "ol_i_id", item_key); + ol_mu->Put("cf0", "ol_supply_w_id", ol_supply_w_id_str); + ol_mu->Put("cf0", "ol_delivery_d", ""); + ol_mu->Put("cf0", "ol_quantity", std::to_string(ol_quantity)); + ol_mu->Put("cf0", "ol_amount", std::to_string(ol_amount)); + ol_mu->Put("cf0", "ol_dist_info", stock_ret[ol_dist_info_key]); + gtxn->ApplyMutation(ol_mu); + delete ol_mu; + // update stock + int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); + if (s_quantity > ol_quantity + 10) { + s_quantity -= ol_quantity; } else { - ol_dist_info_key = "s_dist_0" + std::to_string(district_id); + s_quantity = (s_quantity - ol_quantity) + 91; } - - float ol_amount_sum = 0; - for (int32_t i = 0; i < info.o_ol_cnt; ++i) { - int32_t i_id = info.ol_i_ids[i]; - std::string item_key = std::to_string(i_id); - RowReader* item_reader = t_item->NewRowReader(item_key); - RetTuples item_ret; - if (!GetValues(ret, gtxn.get(), item_reader, - {"i_price", "i_name", "i_data"}, - &item_ret, - "@new_order|item_reader|" + item_key)) { - return; - } - - std::string ol_supply_w_id_str = std::to_string(info.ol_supply_w_ids[i]); - std::string stock_key = ol_supply_w_id_str+ "_" + item_key; - RowReader* stock_reader = t_item->NewRowReader(stock_key); - RetTuples stock_ret; - if (!GetValues(ret, gtxn.get(), stock_reader, - {"s_quantity", "s_ytd", "s_order_cnt", "s_remote_cnt", "s_data", ol_dist_info_key}, - &stock_ret, - "@new_order|stock_reader|" + stock_key)) { - return; - } - - int32_t ol_quantity = info.ol_quantities[i]; - float ol_amount = std::stof(item_ret["i_price"]) * ol_quantity; - ol_amount_sum += ol_amount; - std::string ol_number_str = std::to_string(i + 1); - std::string ol_key = order_key + "_" + ol_number_str; - RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); - ol_mu->Put("cf0", "ol_o_id", d_next_o_id_str); - ol_mu->Put("cf0", "ol_d_id", std::to_string(district_id)); - ol_mu->Put("cf0", "ol_w_id", warehouse_key); - ol_mu->Put("cf0", "ol_number", ol_number_str); - ol_mu->Put("cf0", "ol_i_id", item_key); - ol_mu->Put("cf0", "ol_supply_w_id", ol_supply_w_id_str); - ol_mu->Put("cf0", "ol_delivery_d", ""); - ol_mu->Put("cf0", "ol_quantity", std::to_string(ol_quantity)); - ol_mu->Put("cf0", "ol_amount", std::to_string(ol_amount)); - ol_mu->Put("cf0", "ol_dist_info", stock_ret[ol_dist_info_key]); - gtxn->ApplyMutation(ol_mu); - delete ol_mu; - // update stock - int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); - if (s_quantity > ol_quantity + 10) { - s_quantity -= ol_quantity; - } else { - s_quantity = (s_quantity - ol_quantity) + 91; - } - float s_ytd = std::stof(stock_ret["s_quantity"]) + ol_quantity; - int32_t s_order_cnt = std::stoi(stock_ret["s_order_cnt"]) + 1; - int32_t s_remote_cnt = std::stoi(stock_ret["s_remote_cnt"]); - if (info.ol_supply_w_ids[i] != warehouse_id) { - ++s_remote_cnt; - } - RowMutation* stock_mu = t_stock->NewRowMutation(stock_key); - stock_mu->Put("cf0", "s_quantity", std::to_string(s_quantity)); - stock_mu->Put("cf0", "s_ytd", std::to_string(s_ytd)); - stock_mu->Put("cf0", "s_order_cnt", std::to_string(s_order_cnt)); - stock_mu->Put("cf0", "s_remote_cnt", std::to_string(s_remote_cnt)); - gtxn->ApplyMutation(stock_mu); - delete stock_mu; - - // set result - RetTuples line; - line["ol_supply_w_id"] = ol_supply_w_id_str; - line["ol_i_id"] = item_key; - line["i_name"] = item_ret["i_name"]; - line["ol_quantity"] = std::to_string(ol_quantity); - line["s_quantity"] = std::to_string(s_quantity); - line["i_price"] = item_ret["i_price"]; - line["ol_amount"] = std::to_string(ol_amount); - std::string i_data = item_ret["i_data"]; - std::string s_data = item_ret["s_data"]; - if (i_data.find("ORIGINAL") != std::string::npos && - s_data.find("ORIGINAL") != std::string::npos) { - line["brand_generic"] = "B"; - } else { - line["brand_generic"] = "G"; - } - ret->AddLine(line); + float s_ytd = std::stof(stock_ret["s_quantity"]) + ol_quantity; + int32_t s_order_cnt = std::stoi(stock_ret["s_order_cnt"]) + 1; + int32_t s_remote_cnt = std::stoi(stock_ret["s_remote_cnt"]); + if (info.ol_supply_w_ids[i] != warehouse_id) { + ++s_remote_cnt; } - if (!info.need_failed) { - RetTuples single_line; - single_line["o_id"] = d_next_o_id_str; - single_line["o_ol_cnt"] = std::to_string(info.o_ol_cnt); - single_line["c_last"] = customer_ret["c_last"]; - single_line["c_credit"] = customer_ret["c_credit"]; - single_line["c_discount"] = customer_ret["c_discount"]; - single_line["w_tax"] = warehouse_ret["w_tax"]; - single_line["d_tax"] = district_ret["d_tax"]; - single_line["o_entry_d"] = datetime; - float c_discount = std::stof(customer_ret["c_discount"]); - float w_tax = std::stof(warehouse_ret["w_tax"]); - float d_tax = std::stof(district_ret["d_tax"]); - float total_amount = ol_amount_sum * ( 1 - c_discount) * (1 + w_tax + d_tax); - single_line["total_amount"] = std::to_string(total_amount); - ret->SetSingleLine(single_line); - gtxn->Commit(); - SetTxnResult(ret, gtxn.get()); + RowMutation* stock_mu = t_stock->NewRowMutation(stock_key); + stock_mu->Put("cf0", "s_quantity", std::to_string(s_quantity)); + stock_mu->Put("cf0", "s_ytd", std::to_string(s_ytd)); + stock_mu->Put("cf0", "s_order_cnt", std::to_string(s_order_cnt)); + stock_mu->Put("cf0", "s_remote_cnt", std::to_string(s_remote_cnt)); + gtxn->ApplyMutation(stock_mu); + delete stock_mu; + + // set result + RetTuples line; + line["ol_supply_w_id"] = ol_supply_w_id_str; + line["ol_i_id"] = item_key; + line["i_name"] = item_ret["i_name"]; + line["ol_quantity"] = std::to_string(ol_quantity); + line["s_quantity"] = std::to_string(s_quantity); + line["i_price"] = item_ret["i_price"]; + line["ol_amount"] = std::to_string(ol_amount); + std::string i_data = item_ret["i_data"]; + std::string s_data = item_ret["s_data"]; + if (i_data.find("ORIGINAL") != std::string::npos && + s_data.find("ORIGINAL") != std::string::npos) { + line["brand_generic"] = "B"; } else { - // set commit failed - SetTxnResult(ret, gtxn.get(), false, "@new_order|rowback simulation"); + line["brand_generic"] = "G"; } + ret->AddLine(line); + } + if (!info.need_failed) { + RetTuples single_line; + single_line["o_id"] = d_next_o_id_str; + single_line["o_ol_cnt"] = std::to_string(info.o_ol_cnt); + single_line["c_last"] = customer_ret["c_last"]; + single_line["c_credit"] = customer_ret["c_credit"]; + single_line["c_discount"] = customer_ret["c_discount"]; + single_line["w_tax"] = warehouse_ret["w_tax"]; + single_line["d_tax"] = district_ret["d_tax"]; + single_line["o_entry_d"] = datetime; + float c_discount = std::stof(customer_ret["c_discount"]); + float w_tax = std::stof(warehouse_ret["w_tax"]); + float d_tax = std::stof(district_ret["d_tax"]); + float total_amount = ol_amount_sum * (1 - c_discount) * (1 + w_tax + d_tax); + single_line["total_amount"] = std::to_string(total_amount); + ret->SetSingleLine(single_line); + gtxn->Commit(); + SetTxnResult(ret, gtxn.get()); + } else { + // set commit failed + SetTxnResult(ret, gtxn.get(), false, "@new_order|rowback simulation"); + } } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/order_status_txn.cc b/src/benchmark/tpcc/tera_txn/order_status_txn.cc index a88fe7e0c..c788a2950 100644 --- a/src/benchmark/tpcc/tera_txn/order_status_txn.cc +++ b/src/benchmark/tpcc/tera_txn/order_status_txn.cc @@ -15,75 +15,66 @@ namespace tera { namespace tpcc { -void TeraTpccDb::OrderStatusTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_customer_id, - const std::string& last_name, +void TeraTpccDb::OrderStatusTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, const std::string& last_name, OrderStatusResult* ret) { - // open table - Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; - Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; - Table* t_order = table_map_[kTpccTables[kOrderTable]]; - // begin transaction - std::unique_ptr gtxn(client_->NewGlobalTransaction()); - std::string customer_key = ""; - RetTuples customer_ret; - if (!GetCustomer(ret, gtxn.get(), by_last_name, last_name, c_customer_id, - warehouse_id, district_id, &customer_key, &customer_ret)) { - return; - } + // open table + Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string customer_key = ""; + RetTuples customer_ret; + if (!GetCustomer(ret, gtxn.get(), by_last_name, last_name, c_customer_id, warehouse_id, + district_id, &customer_key, &customer_ret)) { + return; + } - // find newest order from order index - ErrorCode error_code; - std::string prefix_key = std::to_string(warehouse_id) + "_" - + std::to_string(district_id) + "_"; - std::string start_key = prefix_key + customer_ret["c_id"] + "_"; - ScanDescriptor scan_desc(start_key); - scan_desc.SetEnd(start_key + "~"); - scan_desc.AddColumnFamily("cf0"); - ResultStream* scanner = t_order_index->Scan(scan_desc, &error_code); - int32_t max_order_id = -1; - for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { - std::string row_key = scanner->RowName(); - RowReader* index_reader = t_order_index->NewRowReader(row_key); - RetTuples index_ret; - if (!GetValues(ret, gtxn.get(), index_reader, - {"o_id"}, - &index_ret, - "@order_status|order_index_reader|" + row_key)) { - break; - } - if ( max_order_id < std::stoi(index_ret["o_id"])) { - max_order_id = std::stoi(index_ret["o_id"]); - } - } - delete scanner; - if (max_order_id == -1) { - SetTxnResult(ret, gtxn.get(), false, "not found order|" + start_key); - return; + // find newest order from order index + ErrorCode error_code; + std::string prefix_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; + std::string start_key = prefix_key + customer_ret["c_id"] + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + ResultStream* scanner = t_order_index->Scan(scan_desc, &error_code); + int32_t max_order_id = -1; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + RowReader* index_reader = t_order_index->NewRowReader(row_key); + RetTuples index_ret; + if (!GetValues(ret, gtxn.get(), index_reader, {"o_id"}, &index_ret, + "@order_status|order_index_reader|" + row_key)) { + break; } - std::string order_key = prefix_key + std::to_string(max_order_id); - RowReader* order_reader = t_order->NewRowReader(order_key); - RetTuples order_ret; - if (!GetValues(ret, gtxn.get(), order_reader, - {"o_ol_cnt", "o_id"}, - &order_ret, - "@order_status|order_reader|" + order_key)) { - return; + if (max_order_id < std::stoi(index_ret["o_id"])) { + max_order_id = std::stoi(index_ret["o_id"]); } - for (int32_t i = 1; i <= std::stoi(order_ret["o_ol_cnt"]); ++i) { - std::string ol_key = prefix_key + order_ret["o_id"] + "_" + std::to_string(i); - RowReader* ol_reader = t_orderline->NewRowReader(ol_key); - RetTuples ol_ret; - if (!GetValues(ret, gtxn.get(), ol_reader, - {}, // TODO - &ol_ret, - "@order_status|ol_reader|" + ol_key)) { - return; - } + } + delete scanner; + if (max_order_id == -1) { + SetTxnResult(ret, gtxn.get(), false, "not found order|" + start_key); + return; + } + std::string order_key = prefix_key + std::to_string(max_order_id); + RowReader* order_reader = t_order->NewRowReader(order_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn.get(), order_reader, {"o_ol_cnt", "o_id"}, &order_ret, + "@order_status|order_reader|" + order_key)) { + return; + } + for (int32_t i = 1; i <= std::stoi(order_ret["o_ol_cnt"]); ++i) { + std::string ol_key = prefix_key + order_ret["o_id"] + "_" + std::to_string(i); + RowReader* ol_reader = t_orderline->NewRowReader(ol_key); + RetTuples ol_ret; + if (!GetValues(ret, gtxn.get(), ol_reader, {}, // TODO + &ol_ret, "@order_status|ol_reader|" + ol_key)) { + return; } - SetTxnResult(ret, gtxn.get()); + } + SetTxnResult(ret, gtxn.get()); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/payment_txn.cc b/src/benchmark/tpcc/tera_txn/payment_txn.cc index c45d371bd..402e910db 100644 --- a/src/benchmark/tpcc/tera_txn/payment_txn.cc +++ b/src/benchmark/tpcc/tera_txn/payment_txn.cc @@ -15,180 +15,169 @@ namespace tera { namespace tpcc { -void TeraTpccDb::PaymentTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t customer_warehouse_id, int32_t customer_district_id, - int32_t c_customer_id, - const std::string& last_name, - int32_t h_amount, +void TeraTpccDb::PaymentTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t customer_warehouse_id, int32_t customer_district_id, + int32_t c_customer_id, const std::string& last_name, int32_t h_amount, PaymentResult* ret) { - // open table - Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; - Table* t_district = table_map_[kTpccTables[kDistrictTable]]; - Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; - Table* t_history = table_map_[kTpccTables[kHistoryTable]]; - Table* t_history_index = table_map_[kTpccTables[kHistoryIndex]]; - - // begin transaction - Transaction* gtxn = client_->NewGlobalTransaction(); - - // read customer - std::string customer_key = ""; - RetTuples customer_ret; - if (!GetCustomer(ret, gtxn, by_last_name, last_name, c_customer_id, - customer_warehouse_id, customer_district_id, &customer_key, &customer_ret)) { - return; + // open table + Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + Table* t_history = table_map_[kTpccTables[kHistoryTable]]; + Table* t_history_index = table_map_[kTpccTables[kHistoryIndex]]; + + // begin transaction + Transaction* gtxn = client_->NewGlobalTransaction(); + + // read customer + std::string customer_key = ""; + RetTuples customer_ret; + if (!GetCustomer(ret, gtxn, by_last_name, last_name, c_customer_id, customer_warehouse_id, + customer_district_id, &customer_key, &customer_ret)) { + return; + } + + // read warehouse + std::string warehouse_key = std::to_string(warehouse_id); + RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); + RetTuples warehouse_ret; + if (!GetValues(ret, gtxn, warehouse_reader, + {"w_ytd", "w_name", "w_street_1", "w_street_2", "w_city", "w_state", "w_zip"}, + &warehouse_ret, "@payment|warehouse_reader|" + warehouse_key)) { + return; + } + + // update warehouse + RowMutation* warehouse_mu = t_warehouse->NewRowMutation(warehouse_key); + // add amount of this payment to the ytd balance of current warehouse. + float w_ytd = std::stof(warehouse_ret["w_ytd"]) + h_amount; + warehouse_mu->Put("cf0", "w_ytd", std::to_string(w_ytd)); + gtxn->ApplyMutation(warehouse_mu); + delete warehouse_mu; + + // read district + std::string district_id_str = std::to_string(district_id); + std::string district_key = warehouse_key + "_" + district_id_str; + RowReader* district_reader = t_district->NewRowReader(district_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn, district_reader, + {"d_ytd", "d_name", "d_street_1", "d_street_2", "d_city", "d_state", "d_zip"}, + &district_ret, "@payment|district_reader|" + district_key)) { + return; + } + + // update district + RowMutation* district_mu = t_district->NewRowMutation(district_key); + // add amount of this payment to the ytd balance of current district. + float d_ytd = std::stof(district_ret["d_ytd"]) + h_amount; + district_mu->Put("cf0", "d_ytd", std::to_string(d_ytd)); + gtxn->ApplyMutation(district_mu); + delete district_mu; + + // update customer + // [Revision 5.11 - Page 34] see Clause 2.5.2.2 + // C_BALANCE is decreased by H_AMOUNT. + // C_YTD_PAYMENT is increased by H_AMOUNT. + // C_PAYMENT_CNT is incremented by 1. + RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); + std::string c_balance_str = std::to_string(std::stof(customer_ret["c_balance"]) - h_amount); + customer_mu->Put("cf0", "c_balance", c_balance_str); + customer_mu->Put("cf0", "c_ytd_payment", + std::to_string(std::stof(customer_ret["c_ytd_payment"]) + h_amount)); + customer_mu->Put("cf0", "c_payment_cnt", + std::to_string(std::stof(customer_ret["c_payment_cnt"]) + h_amount)); + + if (customer_ret["c_credit"] == "BC") { + std::string data_info = customer_key + "_" + district_key + "_" + std::to_string(h_amount); + customer_ret["c_data"].insert(0, data_info); + if (customer_ret["c_data"].size() > kCustomerDataUpperLen) { + customer_ret["c_data"].substr(0, kCustomerDataUpperLen); } - - // read warehouse - std::string warehouse_key = std::to_string(warehouse_id); - RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); - RetTuples warehouse_ret; - if (!GetValues(ret, gtxn, warehouse_reader, - {"w_ytd", "w_name", "w_street_1", "w_street_2", "w_city", "w_state", "w_zip"}, - &warehouse_ret, - "@payment|warehouse_reader|" + warehouse_key)) { - return; - } - - // update warehouse - RowMutation* warehouse_mu = t_warehouse->NewRowMutation(warehouse_key); - // add amount of this payment to the ytd balance of current warehouse. - float w_ytd = std::stof(warehouse_ret["w_ytd"]) + h_amount; - warehouse_mu->Put("cf0", "w_ytd", std::to_string(w_ytd)); - gtxn->ApplyMutation(warehouse_mu); - delete warehouse_mu; - - // read district - std::string district_id_str = std::to_string(district_id); - std::string district_key = warehouse_key + "_" + district_id_str; - RowReader* district_reader = t_district->NewRowReader(district_key); - RetTuples district_ret; - if (!GetValues(ret, gtxn, district_reader, - {"d_ytd", "d_name", "d_street_1", "d_street_2", "d_city", "d_state", "d_zip"}, - &district_ret, - "@payment|district_reader|" + district_key)) { - return; - } - - // update district - RowMutation* district_mu = t_district->NewRowMutation(district_key); - // add amount of this payment to the ytd balance of current district. - float d_ytd = std::stof(district_ret["d_ytd"]) + h_amount; - district_mu->Put("cf0", "d_ytd", std::to_string(d_ytd)); - gtxn->ApplyMutation(district_mu); - delete district_mu; - - // update customer - // [Revision 5.11 - Page 34] see Clause 2.5.2.2 - // C_BALANCE is decreased by H_AMOUNT. - // C_YTD_PAYMENT is increased by H_AMOUNT. - // C_PAYMENT_CNT is incremented by 1. - RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); - std::string c_balance_str = std::to_string(std::stof(customer_ret["c_balance"]) - h_amount); - customer_mu->Put("cf0", "c_balance", c_balance_str); - customer_mu->Put("cf0", "c_ytd_payment", - std::to_string(std::stof(customer_ret["c_ytd_payment"]) + h_amount)); - customer_mu->Put("cf0", "c_payment_cnt", - std::to_string(std::stof(customer_ret["c_payment_cnt"]) + h_amount)); - - if (customer_ret["c_credit"] == "BC") { - std::string data_info = customer_key + "_" + district_key + "_" + std::to_string(h_amount); - customer_ret["c_data"].insert(0, data_info); - if (customer_ret["c_data"].size() > kCustomerDataUpperLen) { - customer_ret["c_data"].substr(0, kCustomerDataUpperLen); - } - customer_mu->Put("cf0", "c_data", customer_ret["c_data"]); - } - gtxn->ApplyMutation(customer_mu); - delete customer_mu; - - // read history_index (find newest history) - std::string history_data = warehouse_ret["w_name"] + " " + district_ret["d_name"]; - RowReader* hindex_reader = t_history_index->NewRowReader("count"); - RetTuples hindex_ret; - if (!GetValues(ret, gtxn, hindex_reader, - {"count"}, - &hindex_ret, - "@payment|hindex_reader|count")) { - return; - } - int cnt = std::stoi(hindex_ret["count"]); - - // update history_index - RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); - hindex_mu->Put("cf0", "count", std::to_string(++cnt)); - gtxn->ApplyMutation(hindex_mu); - delete hindex_mu; - - // update history use now newest count as the primary key(row_key) of history - // default t_history don't have priamry key in tpcc - std::string history_key = std::to_string(cnt); - RowMutation* mu = t_history->NewRowMutation(history_key); - mu->Put("cf0", "h_c_id", customer_ret["c_id"]); - mu->Put("cf0", "h_c_d_id", customer_ret["c_d_id"]); - mu->Put("cf0", "h_c_w_id", customer_ret["c_w_id"]); - mu->Put("cf0", "h_d_id", district_id_str); - mu->Put("cf0", "h_w_id", warehouse_key); - mu->Put("cf0", "h_amount", std::to_string(h_amount)); - // The payment date (H_DATE) in generated within the SUT - // by using the current system date and time - std::string datetime = get_curtime_str(); - mu->Put("cf0", "h_date", datetime); - mu->Put("cf0", "h_data", history_data); - gtxn->ApplyMutation(mu); - delete mu; - - gtxn->Commit(); - RetTuples single_line; - RetTuples other_ret = { - {"w_id", warehouse_key}, - {"d_id", district_id_str}, - {"h_amount", std::to_string(h_amount)}, - {"h_date", datetime}, - {"c_balance", c_balance_str}, - {"c_data", customer_ret["c_data"].substr(0,200)} - }; - SetPaymentSingleLineRet(warehouse_ret, district_ret, customer_ret, other_ret, - &single_line); - - SetTxnResult(ret, gtxn); + customer_mu->Put("cf0", "c_data", customer_ret["c_data"]); + } + gtxn->ApplyMutation(customer_mu); + delete customer_mu; + + // read history_index (find newest history) + std::string history_data = warehouse_ret["w_name"] + " " + district_ret["d_name"]; + RowReader* hindex_reader = t_history_index->NewRowReader("count"); + RetTuples hindex_ret; + if (!GetValues(ret, gtxn, hindex_reader, {"count"}, &hindex_ret, + "@payment|hindex_reader|count")) { + return; + } + int cnt = std::stoi(hindex_ret["count"]); + + // update history_index + RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); + hindex_mu->Put("cf0", "count", std::to_string(++cnt)); + gtxn->ApplyMutation(hindex_mu); + delete hindex_mu; + + // update history use now newest count as the primary key(row_key) of history + // default t_history don't have priamry key in tpcc + std::string history_key = std::to_string(cnt); + RowMutation* mu = t_history->NewRowMutation(history_key); + mu->Put("cf0", "h_c_id", customer_ret["c_id"]); + mu->Put("cf0", "h_c_d_id", customer_ret["c_d_id"]); + mu->Put("cf0", "h_c_w_id", customer_ret["c_w_id"]); + mu->Put("cf0", "h_d_id", district_id_str); + mu->Put("cf0", "h_w_id", warehouse_key); + mu->Put("cf0", "h_amount", std::to_string(h_amount)); + // The payment date (H_DATE) in generated within the SUT + // by using the current system date and time + std::string datetime = get_curtime_str(); + mu->Put("cf0", "h_date", datetime); + mu->Put("cf0", "h_data", history_data); + gtxn->ApplyMutation(mu); + delete mu; + + gtxn->Commit(); + RetTuples single_line; + RetTuples other_ret = {{"w_id", warehouse_key}, + {"d_id", district_id_str}, + {"h_amount", std::to_string(h_amount)}, + {"h_date", datetime}, + {"c_balance", c_balance_str}, + {"c_data", customer_ret["c_data"].substr(0, 200)}}; + SetPaymentSingleLineRet(warehouse_ret, district_ret, customer_ret, other_ret, &single_line); + + SetTxnResult(ret, gtxn); } -void TeraTpccDb::SetPaymentSingleLineRet(const RetTuples& warehouse_ret, - const RetTuples& district_ret, - const RetTuples& customer_ret, - const RetTuples& other_ret, +void TeraTpccDb::SetPaymentSingleLineRet(const RetTuples& warehouse_ret, + const RetTuples& district_ret, + const RetTuples& customer_ret, const RetTuples& other_ret, RetTuples* payment_ret) { - // The following fields are displayed: - // W_ID, D_ID, C_ID, C_D_ID, C_W_ID, - // W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, - // D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, - // C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2, C_CITY, C_STATE, - // C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, C_DISCOUNT, C_BALANCE, - // the first 200 characters of C_DATA (only if C_CREDIT = "BC"), - // H_AMOUNT, and H_DATE. - payment_ret->insert(other_ret.begin(), other_ret.end()); - for (auto t : warehouse_ret) { - if (t.first != "w_ytd" && t.first != "w_name") { - payment_ret->insert(t); - } + // The following fields are displayed: + // W_ID, D_ID, C_ID, C_D_ID, C_W_ID, + // W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + // D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, + // C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2, C_CITY, C_STATE, + // C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, C_DISCOUNT, C_BALANCE, + // the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + // H_AMOUNT, and H_DATE. + payment_ret->insert(other_ret.begin(), other_ret.end()); + for (auto t : warehouse_ret) { + if (t.first != "w_ytd" && t.first != "w_name") { + payment_ret->insert(t); } - for (auto t : district_ret) { - if (t.first != "d_ytd" && t.first != "w_name") { - payment_ret->insert(t); - } + } + for (auto t : district_ret) { + if (t.first != "d_ytd" && t.first != "w_name") { + payment_ret->insert(t); } - std::unordered_set c_names = {"c_id", "c_d_id", "c_w_id", - "c_first", "c_middle", "c_last", "c_street_1", "c_street_2", "c_city", - "c_state", "c_zip", "c_phone", "c_since", "c_credit", "c_credit_lim", - "c_discount"}; - for (auto t : customer_ret) { - if (c_names.find(t.first) != c_names.end()) { - payment_ret->insert(t); - } + } + std::unordered_set c_names = {"c_id", "c_d_id", "c_w_id", "c_first", + "c_middle", "c_last", "c_street_1", "c_street_2", + "c_city", "c_state", "c_zip", "c_phone", + "c_since", "c_credit", "c_credit_lim", "c_discount"}; + for (auto t : customer_ret) { + if (c_names.find(t.first) != c_names.end()) { + payment_ret->insert(t); } + } } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc b/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc index eeb7bb06d..d28202429 100644 --- a/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc +++ b/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc @@ -15,65 +15,63 @@ namespace tera { namespace tpcc { -void TeraTpccDb::StockLevelTxn(int32_t warehouse_id, int32_t district_id, - int32_t threshold, +void TeraTpccDb::StockLevelTxn(int32_t warehouse_id, int32_t district_id, int32_t threshold, StockLevelResult* ret) { - // open table - Table* t_district = table_map_[kTpccTables[kDistrictTable]]; - Table* t_order = table_map_[kTpccTables[kOrderTable]]; - Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; - Table* t_stock = table_map_[kTpccTables[kStockTable]]; - // begin transaction - std::unique_ptr gtxn(client_->NewGlobalTransaction()); - std::string district_primary_key = std::to_string(warehouse_id) - + "_" + std::to_string(district_id); - RowReader* district_reader = t_district->NewRowReader(district_primary_key); - RetTuples district_ret; - if (!GetValues(ret, gtxn.get(), district_reader, {"d_next_o_id"}, &district_ret, - "@stock_level|district_reader|" + district_primary_key)) { - return; - } - int32_t order_id = std::stoi(district_ret["d_next_o_id"]); + // open table + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_stock = table_map_[kTpccTables[kStockTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string district_primary_key = + std::to_string(warehouse_id) + "_" + std::to_string(district_id); + RowReader* district_reader = t_district->NewRowReader(district_primary_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn.get(), district_reader, {"d_next_o_id"}, &district_ret, + "@stock_level|district_reader|" + district_primary_key)) { + return; + } + int32_t order_id = std::stoi(district_ret["d_next_o_id"]); - int32_t cnt = 0; - for (int32_t ol_o_id = order_id - 20; ol_o_id <= order_id; ++ol_o_id) { - std::string order_primary_key = std::to_string(warehouse_id) - + "_" + std::to_string(district_id) + "_" + std::to_string(ol_o_id); - RowReader* order_reader = t_order->NewRowReader(order_primary_key); - RetTuples order_ret; - if (!GetValues(ret, gtxn.get(), order_reader, {"o_ol_cnt"}, &order_ret, - "@stock_level|order_reader|" + order_primary_key)) { - return; - } - int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); - for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ ol_number) { - std::string ol_primary_key = order_primary_key + "_" + std::to_string(ol_number); - RowReader* ol_reader = t_orderline->NewRowReader(ol_primary_key); - RetTuples ol_ret; - ol_reader->AddColumn("cf0", "ol_i_id"); - if (!GetValues(ret, gtxn.get(), ol_reader, {"ol_i_id"}, &ol_ret, - "@stock_level|ol_reader|" + ol_primary_key)) { - return; - } - int32_t ol_i_id = std::stoi(ol_ret["ol_i_id"]); - std::string stock_key = std::to_string(warehouse_id) - + "_" + std::to_string(ol_i_id); - RowReader* stock_reader = t_stock->NewRowReader(stock_key); - RetTuples stock_ret; - if (!GetValues(ret, gtxn.get(), stock_reader, {"s_quantity"}, &stock_ret, - "@stock_level|stock_reader|" + stock_key)) { - return; - } - int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); - if (s_quantity < threshold) { - ++cnt; - } - } + int32_t cnt = 0; + for (int32_t ol_o_id = order_id - 20; ol_o_id <= order_id; ++ol_o_id) { + std::string order_primary_key = std::to_string(warehouse_id) + "_" + + std::to_string(district_id) + "_" + std::to_string(ol_o_id); + RowReader* order_reader = t_order->NewRowReader(order_primary_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn.get(), order_reader, {"o_ol_cnt"}, &order_ret, + "@stock_level|order_reader|" + order_primary_key)) { + return; + } + int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); + for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ol_number) { + std::string ol_primary_key = order_primary_key + "_" + std::to_string(ol_number); + RowReader* ol_reader = t_orderline->NewRowReader(ol_primary_key); + RetTuples ol_ret; + ol_reader->AddColumn("cf0", "ol_i_id"); + if (!GetValues(ret, gtxn.get(), ol_reader, {"ol_i_id"}, &ol_ret, + "@stock_level|ol_reader|" + ol_primary_key)) { + return; + } + int32_t ol_i_id = std::stoi(ol_ret["ol_i_id"]); + std::string stock_key = std::to_string(warehouse_id) + "_" + std::to_string(ol_i_id); + RowReader* stock_reader = t_stock->NewRowReader(stock_key); + RetTuples stock_ret; + if (!GetValues(ret, gtxn.get(), stock_reader, {"s_quantity"}, &stock_ret, + "@stock_level|stock_reader|" + stock_key)) { + return; + } + int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); + if (s_quantity < threshold) { + ++cnt; + } } - // only read not need commit - ret->SetLowStock(cnt); - SetTxnResult(ret, gtxn.get()); + } + // only read not need commit + ret->SetLowStock(cnt); + SetTxnResult(ret, gtxn.get()); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/data_generator_test.cc b/src/benchmark/tpcc/test/data_generator_test.cc index 6c5b71fe7..eb852f3c4 100644 --- a/src/benchmark/tpcc/test/data_generator_test.cc +++ b/src/benchmark/tpcc/test/data_generator_test.cc @@ -20,69 +20,67 @@ namespace tera { namespace tpcc { class DataGeneratorTest : public ::testing::Test { -public: - DataGeneratorTest() { - random_gen_.SetRandomConstant(); - TpccDb* db_ = (TpccDb*)(&mdb_); - data_gen_ = new DataGenerator(&random_gen_, db_); - } + public: + DataGeneratorTest() { + random_gen_.SetRandomConstant(); + TpccDb* db_ = (TpccDb*)(&mdb_); + data_gen_ = new DataGenerator(&random_gen_, db_); + } - void CleanStateCounter(int table_enum_num = -1) { - if (table_enum_num == -1) { - for (int i = 0; i < kTpccTableCnt; ++i) { - data_gen_->states_[i].first.Set(0); - data_gen_->states_[i].second.Set(0); - } - } else if (table_enum_num > -1 && table_enum_num < kTpccTableCnt) { - data_gen_->states_[table_enum_num].first.Set(0); - data_gen_->states_[table_enum_num].second.Set(0); - } + void CleanStateCounter(int table_enum_num = -1) { + if (table_enum_num == -1) { + for (int i = 0; i < kTpccTableCnt; ++i) { + data_gen_->states_[i].first.Set(0); + data_gen_->states_[i].second.Set(0); + } + } else if (table_enum_num > -1 && table_enum_num < kTpccTableCnt) { + data_gen_->states_[table_enum_num].first.Set(0); + data_gen_->states_[table_enum_num].second.Set(0); } + } - ~DataGeneratorTest() { - delete data_gen_; - } -private: - RandomGenerator random_gen_; - TpccDb* db_; - MockTpccDb mdb_; - DataGenerator* data_gen_; + ~DataGeneratorTest() { delete data_gen_; } + private: + RandomGenerator random_gen_; + TpccDb* db_; + MockTpccDb mdb_; + DataGenerator* data_gen_; }; TEST_F(DataGeneratorTest, GenItem) { - CleanStateCounter(); - mdb_.flag_ = true; - data_gen_->GenItem(1, false); - EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 1); - data_gen_->GenItem(1, false); - EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 2); - mdb_.flag_ = false; - data_gen_->GenItem(1, false); - EXPECT_TRUE(data_gen_->states_[kItemTable].second.Get() == 1); + CleanStateCounter(); + mdb_.flag_ = true; + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 1); + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 2); + mdb_.flag_ = false; + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].second.Get() == 1); } TEST_F(DataGeneratorTest, GenStock) { - CleanStateCounter(); - mdb_.flag_ = true; - data_gen_->GenStock(1, 2, false); - EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 1); - data_gen_->GenStock(1, 2, false); - EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 2); - mdb_.flag_ = false; - data_gen_->GenStock(1, 3, false); - EXPECT_TRUE(data_gen_->states_[kStockTable].second.Get() == 1); + CleanStateCounter(); + mdb_.flag_ = true; + data_gen_->GenStock(1, 2, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 1); + data_gen_->GenStock(1, 2, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 2); + mdb_.flag_ = false; + data_gen_->GenStock(1, 3, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].second.Get() == 1); } TEST_F(DataGeneratorTest, GenStocks) { - CleanStateCounter(); - mdb_.flag_ = true; - for (int i = 1; i <=FLAGS_warehouses_count; ++i) { - data_gen_->GenStocks(i); - } - data_gen_->Join(); - EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == FLAGS_warehouses_count * kItemCount); + CleanStateCounter(); + mdb_.flag_ = true; + for (int i = 1; i <= FLAGS_warehouses_count; ++i) { + data_gen_->GenStocks(i); + } + data_gen_->Join(); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == FLAGS_warehouses_count * kItemCount); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/random_generator_test.cc b/src/benchmark/tpcc/test/random_generator_test.cc index 978521739..670e8ce2f 100644 --- a/src/benchmark/tpcc/test/random_generator_test.cc +++ b/src/benchmark/tpcc/test/random_generator_test.cc @@ -14,68 +14,66 @@ namespace tpcc { class RandomGenerator; class RandomGeneratorTest : public ::testing::Test, public RandomGenerator { -public: - RandomGeneratorTest() : RandomGenerator() { - SetRandomConstant(); - } + public: + RandomGeneratorTest() : RandomGenerator() { SetRandomConstant(); } - ~RandomGeneratorTest() {} + ~RandomGeneratorTest() {} }; -TEST_F(RandomGeneratorTest, MakeFloat) { - EXPECT_EQ(MakeFloat(1.0, 1.0, 1), 1.0); - float f = MakeFloat(0, 1.0, 2); - std::cout << std::to_string(f) << std::endl; - EXPECT_TRUE(f >= 0 && f <= 1); +TEST_F(RandomGeneratorTest, MakeFloat) { + EXPECT_EQ(MakeFloat(1.0, 1.0, 1), 1.0); + float f = MakeFloat(0, 1.0, 2); + std::cout << std::to_string(f) << std::endl; + EXPECT_TRUE(f >= 0 && f <= 1); } TEST_F(RandomGeneratorTest, MakeAString) { - EXPECT_TRUE(MakeAString(0, 0) == ""); - EXPECT_TRUE((MakeAString(1, 1)).length() == 1); - std::string a_str = MakeAString(1,10); - EXPECT_TRUE(a_str.length() <= 10 && a_str.length() >= 1); - std::string a_str1 = MakeAString(26,27); - int cnt = 0; - for (int i = 0; i < a_str1.length(); ++i) { - for (int j = i + 1; j < a_str1.length(); ++j) { - if (a_str1[i] == a_str1[j]) { - ++cnt; - } - } + EXPECT_TRUE(MakeAString(0, 0) == ""); + EXPECT_TRUE((MakeAString(1, 1)).length() == 1); + std::string a_str = MakeAString(1, 10); + EXPECT_TRUE(a_str.length() <= 10 && a_str.length() >= 1); + std::string a_str1 = MakeAString(26, 27); + int cnt = 0; + for (int i = 0; i < a_str1.length(); ++i) { + for (int j = i + 1; j < a_str1.length(); ++j) { + if (a_str1[i] == a_str1[j]) { + ++cnt; + } } - EXPECT_TRUE(cnt > 0); + } + EXPECT_TRUE(cnt > 0); } TEST_F(RandomGeneratorTest, MakeNString) { - EXPECT_TRUE(MakeNString(0, 0) == ""); - EXPECT_TRUE((MakeNString(1, 1)).length() == 1); - std::string n_str = MakeNString(1,10); - EXPECT_TRUE(n_str.length() <= 10 && n_str.length() >= 1); + EXPECT_TRUE(MakeNString(0, 0) == ""); + EXPECT_TRUE((MakeNString(1, 1)).length() == 1); + std::string n_str = MakeNString(1, 10); + EXPECT_TRUE(n_str.length() <= 10 && n_str.length() >= 1); } TEST_F(RandomGeneratorTest, MakeDisOrderList) { - std::vector dis_order_list = MakeDisOrderList(10,20); - sort(dis_order_list.begin(),dis_order_list.end()); - for (int i = 10; i <= 20; ++i) { - EXPECT_EQ(dis_order_list[i-10], i); - } + std::vector dis_order_list = MakeDisOrderList(10, 20); + sort(dis_order_list.begin(), dis_order_list.end()); + for (int i = 10; i <= 20; ++i) { + EXPECT_EQ(dis_order_list[i - 10], i); + } } TEST_F(RandomGeneratorTest, SetRandomConstant) { - SetRandomConstant(); - NURandConstant c = GetRandomConstant(); - EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 255); - EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 1023); - EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 8191); + SetRandomConstant(); + NURandConstant c = GetRandomConstant(); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 255); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 1023); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 8191); } TEST_F(RandomGeneratorTest, GetRandom) { - EXPECT_EQ(GetRandom(1, 1) , 1); - int rand_num = GetRandom(0, 1); - int rand_num1 = GetRandom(1, 0); - EXPECT_TRUE(rand_num == 0 || rand_num == 1); - EXPECT_TRUE(rand_num == 0 || rand_num == 1); + EXPECT_EQ(GetRandom(1, 1), 1); + int rand_num = GetRandom(0, 1); + int rand_num1 = GetRandom(1, 0); + EXPECT_TRUE(rand_num == 0 || rand_num == 1); + EXPECT_TRUE(rand_num == 0 || rand_num == 1); } -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/tpcc_test.cc b/src/benchmark/tpcc/test/tpcc_test.cc index 04d5b4890..71a4812b6 100644 --- a/src/benchmark/tpcc/test/tpcc_test.cc +++ b/src/benchmark/tpcc/test/tpcc_test.cc @@ -1,7 +1,7 @@ // Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// +// // Author: baorenyi@baidu.com #include "gflags/gflags.h" @@ -12,10 +12,9 @@ namespace tera { namespace tpcc { int main(int argc, char* argv[]) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } - -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tpcc_flags.cc b/src/benchmark/tpcc/tpcc_flags.cc index 920740aa0..e4d5f31fe 100644 --- a/src/benchmark/tpcc/tpcc_flags.cc +++ b/src/benchmark/tpcc/tpcc_flags.cc @@ -9,7 +9,8 @@ DEFINE_int64(transactions_count, 200, "the count of transactions"); DEFINE_int32(warehouses_count, 2, "the count of warsehouses"); DEFINE_int32(tpcc_thread_pool_size, 20, "size of tpcc thread pool"); -DEFINE_int32(tpcc_run_gtxn_thread_pool_size, 20, "size of tpcc run global transactions thread pool"); +DEFINE_int32(tpcc_run_gtxn_thread_pool_size, 20, + "size of tpcc run global transactions thread pool"); DEFINE_string(db_type, "tera", "test db type"); DEFINE_string(tera_client_flagfile, "./tera.flag", "the flag file path of tera client"); DEFINE_string(tera_table_schema_dir, "./tpcc_schemas/", "table schema directory"); diff --git a/src/benchmark/tpcc/tpcc_main.cc b/src/benchmark/tpcc/tpcc_main.cc index 2e2df8e26..aae80b7dc 100644 --- a/src/benchmark/tpcc/tpcc_main.cc +++ b/src/benchmark/tpcc/tpcc_main.cc @@ -22,57 +22,56 @@ DECLARE_int64(transactions_count); DECLARE_int32(warehouses_count); DECLARE_string(db_type); -int main(int argc, char *argv[]) { - // load conf from flags - ::google::ParseCommandLineFlags(&argc, &argv, true); +int main(int argc, char* argv[]) { + // load conf from flags + ::google::ParseCommandLineFlags(&argc, &argv, true); - if (argc > 1 && strcmp(argv[1], "version") == 0) { - PrintSystemVersion(); - return 0; - } - if (FLAGS_warehouses_count > tera::tpcc::kMaxWarehouseId - && FLAGS_warehouses_count <= 0) { - LOG(ERROR) << "--warehouses_count=" << FLAGS_warehouses_count << " is not availability"; - return -1; - } + if (argc > 1 && strcmp(argv[1], "version") == 0) { + PrintSystemVersion(); + return 0; + } + if (FLAGS_warehouses_count > tera::tpcc::kMaxWarehouseId && FLAGS_warehouses_count <= 0) { + LOG(ERROR) << "--warehouses_count=" << FLAGS_warehouses_count << " is not availability"; + return -1; + } - tera::tpcc::RandomGenerator random_gen; - random_gen.SetRandomConstant(); + tera::tpcc::RandomGenerator random_gen; + random_gen.SetRandomConstant(); - tera::tpcc::TpccDb* db = tera::tpcc::TpccDb::NewTpccDb(FLAGS_db_type); - // do clean tables - if (argc == 2 && strcmp(argv[1], "clean") == 0) { - if(!db->CleanTables()) { - LOG(ERROR) << "clean tables failed, exit"; - _Exit(EXIT_FAILURE); - } - delete db; - return 0; - } - - if (!db->CreateTables()) { - LOG(ERROR) << "create tables failed, exit"; - _Exit(EXIT_FAILURE); + tera::tpcc::TpccDb* db = tera::tpcc::TpccDb::NewTpccDb(FLAGS_db_type); + // do clean tables + if (argc == 2 && strcmp(argv[1], "clean") == 0) { + if (!db->CleanTables()) { + LOG(ERROR) << "clean tables failed, exit"; + _Exit(EXIT_FAILURE); } - - tera::tpcc::DataGenerator data_gen(&random_gen, db); - int64_t beg_ts = tera::get_micros(); - data_gen.GenItems(); - data_gen.GenWarehouses(); - data_gen.Join(); - int64_t cost_t = tera::get_micros() - beg_ts; - LOG(INFO) << "Generate Tables Cost:" << cost_t << "us"; - - // init driver - tera::tpcc::NURandConstant constant = random_gen.GetRandomConstant(); - random_gen.SetRandomConstant(constant); - tera::tpcc::Driver driver(&random_gen, db); - // run test - int64_t beg_txn_ts = tera::get_micros(); - driver.RunTransactions(); - driver.Join(); - int64_t cost_txn_t = tera::get_micros() - beg_txn_ts; - LOG(INFO) << "RunTransactions Cost:" << cost_txn_t << "us"; delete db; return 0; + } + + if (!db->CreateTables()) { + LOG(ERROR) << "create tables failed, exit"; + _Exit(EXIT_FAILURE); + } + + tera::tpcc::DataGenerator data_gen(&random_gen, db); + int64_t beg_ts = tera::get_micros(); + data_gen.GenItems(); + data_gen.GenWarehouses(); + data_gen.Join(); + int64_t cost_t = tera::get_micros() - beg_ts; + LOG(INFO) << "Generate Tables Cost:" << cost_t << "us"; + + // init driver + tera::tpcc::NURandConstant constant = random_gen.GetRandomConstant(); + random_gen.SetRandomConstant(constant); + tera::tpcc::Driver driver(&random_gen, db); + // run test + int64_t beg_txn_ts = tera::get_micros(); + driver.RunTransactions(); + driver.Join(); + int64_t cost_txn_t = tera::get_micros() - beg_txn_ts; + LOG(INFO) << "RunTransactions Cost:" << cost_txn_t << "us"; + delete db; + return 0; } diff --git a/src/benchmark/tpcc/tpcc_types.h b/src/benchmark/tpcc/tpcc_types.h index c73e9f489..95ca35262 100644 --- a/src/benchmark/tpcc/tpcc_types.h +++ b/src/benchmark/tpcc/tpcc_types.h @@ -19,11 +19,9 @@ const int kTpccTableCnt = 12; // t_customer_last_index is the index of t_customer // -const char* const kTpccTables[] = {"t_item", "t_warehouse", "t_district", - "t_customer", "t_history", "t_stock", - "t_order", "t_orderline", "t_neworder", - "t_customer_last_index", "t_order_index", - "t_history_index"}; +const char* const kTpccTables[] = {"t_item", "t_warehouse", "t_district", "t_customer", "t_history", + "t_stock", "t_order", "t_orderline", "t_neworder", + "t_customer_last_index", "t_order_index", "t_history_index"}; // StockLevel 4% 4 // OrderStatus 4% 8 @@ -33,7 +31,7 @@ const char* const kTpccTables[] = {"t_item", "t_warehouse", "t_district", const int kTpccTransactionRatios[] = {4, 8, 12, 55, 100}; // http://www.man7.org/linux/man-pages/man3/initstate.3.html -// Current "optimal" values for the size of the state array n +// Current "optimal" values for the size of the state array n // are 8, 32, 64, 128, and 256 bytes; const int kRandomStateSize = 64; @@ -58,7 +56,7 @@ const int kMaxWarehouseId = 100; const int kWareHouseNameLowerLen = 6; const int kWareHouseNameUpperLen = 10; -// stock +// stock const int kMaxQuantity = 100; const int kMinQuantity = 10; const int kDistLen = 24; @@ -133,7 +131,7 @@ const float kRuntimeMaxAmount = 5000.00f; const float kRuntimeMinAmount = 1.00f; const int kRuntimeAmountDigits = 2; -} // namespace tpcc -} // namepsace tera +} // namespace tpcc +} // namepsace tera #endif /* TERA_BENCHMARK_TPCC_TPCC_TYPES_H */ diff --git a/src/benchmark/tpcc/tpccdb.cc b/src/benchmark/tpcc/tpccdb.cc index bb7e0cfb5..595fe9657 100644 --- a/src/benchmark/tpcc/tpccdb.cc +++ b/src/benchmark/tpcc/tpccdb.cc @@ -18,231 +18,184 @@ class MockTpccDb; /// ------------------------- [begin item table] -------------------------- /// std::string Item::ToString() const { - std::stringstream ss; - ss << "i_id = " << i_id - << ",i_im_id = " << i_im_id - << ",i_price = " << i_price - << ",i_name = " << i_name - << ",i_data = " << i_data; - return ss.str(); + std::stringstream ss; + ss << "i_id = " << i_id << ",i_im_id = " << i_im_id << ",i_price = " << i_price + << ",i_name = " << i_name << ",i_data = " << i_data; + return ss.str(); } /// ------------------------- [begin warehouse table] --------------------- /// std::string Warehouse::ToString() const { - std::stringstream ss; - ss << "w_id = " << w_id - << ",w_tax = " << w_tax - << ",w_ytd = " << w_ytd - << ",w_name = " << w_name - << ",w_street_1 = " << w_street_1 - << ",w_street_2 = " << w_street_2 - << ",w_city = " << w_city - << ",w_state = " << w_state - << ",w_zip = " << w_zip; - return ss.str(); + std::stringstream ss; + ss << "w_id = " << w_id << ",w_tax = " << w_tax << ",w_ytd = " << w_ytd << ",w_name = " << w_name + << ",w_street_1 = " << w_street_1 << ",w_street_2 = " << w_street_2 << ",w_city = " << w_city + << ",w_state = " << w_state << ",w_zip = " << w_zip; + return ss.str(); } /// ------------------------- [begin district table] ---------------------- /// -District::District(int32_t id, int32_t w_id, RandomGenerator* rand_gen) +District::District(int32_t id, int32_t w_id, RandomGenerator* rand_gen) : d_id(id), d_w_id(w_id), d_ytd(kInitYTD), d_next_o_id(kCustomerCountPerDistrict + 1) { - d_tax = GenTax(rand_gen); - d_name = rand_gen->MakeAString(kDistrictNameLowerLen, kDistrictNameUpperLen); - d_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - d_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - d_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); - d_state = rand_gen->MakeAString(kStateLen,kStateLen); - d_zip = GenZip(rand_gen); + d_tax = GenTax(rand_gen); + d_name = rand_gen->MakeAString(kDistrictNameLowerLen, kDistrictNameUpperLen); + d_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + d_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + d_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + d_state = rand_gen->MakeAString(kStateLen, kStateLen); + d_zip = GenZip(rand_gen); } -std::string District::PrimaryKey() const { - return std::to_string(d_w_id) + "_" - + std::to_string(d_id); +std::string District::PrimaryKey() const { + return std::to_string(d_w_id) + "_" + std::to_string(d_id); } -std::string District::ForeignKey() const { - return std::to_string(d_w_id); -} +std::string District::ForeignKey() const { return std::to_string(d_w_id); } std::string District::ToString() const { - std::stringstream ss; - ss << "d_id = " << d_id - << ",d_w_id = " << d_w_id - << ",d_tax = " << d_tax - << ",d_ytd = " << d_ytd - << ",d_next_o_id = " << d_next_o_id - << ",d_name = " << d_name - << ",d_street_1 = " << d_street_1 - << ",d_street_2 = " << d_street_2 - << ",d_city = " << d_city - << ",d_state = " << d_state - << ",d_zip = " << d_zip; - return ss.str(); + std::stringstream ss; + ss << "d_id = " << d_id << ",d_w_id = " << d_w_id << ",d_tax = " << d_tax << ",d_ytd = " << d_ytd + << ",d_next_o_id = " << d_next_o_id << ",d_name = " << d_name << ",d_street_1 = " << d_street_1 + << ",d_street_2 = " << d_street_2 << ",d_city = " << d_city << ",d_state = " << d_state + << ",d_zip = " << d_zip; + return ss.str(); } /// ------------------------- [begin stock table] ------------------------- /// -Stock::Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen) - : s_i_id (id), s_w_id(w_id) { - s_quantity = rand_gen->GetRandom(kMinQuantity, kMaxQuantity); - s_ytd = 0; - s_order_cnt = 0; - s_remote_cnt = 0; - for (int i = 0; i < kDistrictCountPerWarehouse; ++i) { - s_dist.push_back(rand_gen->MakeAString(kDistLen, kDistLen)); - } - s_data = GenData(rand_gen, kStockDataLowerLen, kStockDataUpperLen, is_original); +Stock::Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen) + : s_i_id(id), s_w_id(w_id) { + s_quantity = rand_gen->GetRandom(kMinQuantity, kMaxQuantity); + s_ytd = 0; + s_order_cnt = 0; + s_remote_cnt = 0; + for (int i = 0; i < kDistrictCountPerWarehouse; ++i) { + s_dist.push_back(rand_gen->MakeAString(kDistLen, kDistLen)); + } + s_data = GenData(rand_gen, kStockDataLowerLen, kStockDataUpperLen, is_original); } -std::string Stock::PrimaryKey() const { - return std::to_string(s_w_id) + "_" + std::to_string(s_i_id); +std::string Stock::PrimaryKey() const { + return std::to_string(s_w_id) + "_" + std::to_string(s_i_id); } -std::string Stock::ForeignKey() const { - return std::to_string(s_i_id); -} +std::string Stock::ForeignKey() const { return std::to_string(s_i_id); } std::string Stock::ToString() const { - std::stringstream ss; - ss << "s_w_id = " << s_w_id - << ",s_quantity = " << s_quantity - << ",s_ytd = " << s_ytd - << ",s_order_cnt = " << s_order_cnt - << ",s_remote_cnt = " << s_remote_cnt - << ",s_data = " << s_data - << ",s_dist = ["; - for (auto d : s_dist) { - ss << d << ","; - } - ss << "]"; - return ss.str(); + std::stringstream ss; + ss << "s_w_id = " << s_w_id << ",s_quantity = " << s_quantity << ",s_ytd = " << s_ytd + << ",s_order_cnt = " << s_order_cnt << ",s_remote_cnt = " << s_remote_cnt + << ",s_data = " << s_data << ",s_dist = ["; + for (auto d : s_dist) { + ss << d << ","; + } + ss << "]"; + return ss.str(); } /// ------------------------- [begin order table] ------------------------- /// -Order::Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, - bool new_order, const std::string& datetime, - RandomGenerator* rand_gen) - : o_id(id), o_c_id(c_id), o_d_id(d_id), o_w_id(w_id), - o_carrier_id(0), o_all_local(kInitAllLocal), +Order::Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, bool new_order, + const std::string& datetime, RandomGenerator* rand_gen) + : o_id(id), + o_c_id(c_id), + o_d_id(d_id), + o_w_id(w_id), + o_carrier_id(0), + o_all_local(kInitAllLocal), o_entry_d(datetime) { - - if (!new_order) { - o_carrier_id = rand_gen->GetRandom(kMinCarrierId, kMaxCarrierId); - } - o_ol_cnt = rand_gen->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); + if (!new_order) { + o_carrier_id = rand_gen->GetRandom(kMinCarrierId, kMaxCarrierId); + } + o_ol_cnt = rand_gen->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); } -std::string Order::PrimaryKey() const { - return std::to_string(o_w_id) + "_" - + std::to_string(o_d_id) + "_" - + std::to_string(o_id); +std::string Order::PrimaryKey() const { + return std::to_string(o_w_id) + "_" + std::to_string(o_d_id) + "_" + std::to_string(o_id); } std::string Order::ForeignKey() const { - return std::to_string(o_w_id) + "_" - + std::to_string(o_d_id) + "_" - + std::to_string(o_c_id); + return std::to_string(o_w_id) + "_" + std::to_string(o_d_id) + "_" + std::to_string(o_c_id); } std::string Order::ToString() const { - std::stringstream ss; - ss << "o_id = " << o_id - << ",o_c_id = " << o_c_id - << ",o_d_id = " << o_d_id - << ",o_w_id = " << o_w_id - << ",o_carrier_id = " << o_carrier_id - << ",o_ol_cnt = " << o_ol_cnt - << ",o_all_local = " << o_all_local - << ",o_entry_d = " << o_entry_d; - return ss.str(); + std::stringstream ss; + ss << "o_id = " << o_id << ",o_c_id = " << o_c_id << ",o_d_id = " << o_d_id + << ",o_w_id = " << o_w_id << ",o_carrier_id = " << o_carrier_id << ",o_ol_cnt = " << o_ol_cnt + << ",o_all_local = " << o_all_local << ",o_entry_d = " << o_entry_d; + return ss.str(); } /// ------------------------- [begin neworder table] ---------------------- /// - -NewOrder::NewOrder(int32_t o_id, int32_t d_id, int32_t w_id) - : no_o_id(o_id), no_d_id(d_id), no_w_id(w_id) { -} +NewOrder::NewOrder(int32_t o_id, int32_t d_id, int32_t w_id) + : no_o_id(o_id), no_d_id(d_id), no_w_id(w_id) {} std::string NewOrder::ToString() const { - std::stringstream ss; - ss << "no_o_id = " << no_o_id - << ",no_d_id = " << no_d_id - << ",no_w_id = " << no_w_id; - return ss.str(); + std::stringstream ss; + ss << "no_o_id = " << no_o_id << ",no_d_id = " << no_d_id << ",no_w_id = " << no_w_id; + return ss.str(); } std::string NewOrder::PrimaryKey() const { - return std::to_string(no_w_id) - + "_" + std::to_string(no_d_id) - + "_" + std::to_string(no_o_id); + return std::to_string(no_w_id) + "_" + std::to_string(no_d_id) + "_" + std::to_string(no_o_id); } std::string NewOrder::ForeignKey() const { - return std::to_string(no_w_id) - + "_" + std::to_string(no_d_id) - + "_" + std::to_string(no_o_id); + return std::to_string(no_w_id) + "_" + std::to_string(no_d_id) + "_" + std::to_string(no_o_id); } /// ------------------------- [begin orderline table] --------------------- /// -OrderLine::OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, - bool new_order, const std::string& datetime, - RandomGenerator* rand_gen) - : ol_o_id(o_id), ol_d_id(d_id), ol_w_id(w_id), ol_number(number), - ol_supply_w_id(w_id), ol_quantity(kInitQuantity), - ol_amount(0.00f), ol_delivery_d(datetime) { - - ol_i_id = rand_gen->GetRandom(kMinItemId, kMaxItemId); - if (new_order) { - ol_amount = rand_gen->MakeFloat(kOrderLineMinAmount, - kOrderLineMaxAmount, - kOrderLineAmountDigits); - ol_delivery_d = ""; - } - ol_dist_info = rand_gen->MakeAString(kDistLen, kDistLen); +OrderLine::OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, bool new_order, + const std::string& datetime, RandomGenerator* rand_gen) + : ol_o_id(o_id), + ol_d_id(d_id), + ol_w_id(w_id), + ol_number(number), + ol_supply_w_id(w_id), + ol_quantity(kInitQuantity), + ol_amount(0.00f), + ol_delivery_d(datetime) { + ol_i_id = rand_gen->GetRandom(kMinItemId, kMaxItemId); + if (new_order) { + ol_amount = + rand_gen->MakeFloat(kOrderLineMinAmount, kOrderLineMaxAmount, kOrderLineAmountDigits); + ol_delivery_d = ""; + } + ol_dist_info = rand_gen->MakeAString(kDistLen, kDistLen); } std::string OrderLine::PrimaryKey() const { - return std::to_string(ol_w_id) + "_" - + std::to_string(ol_d_id) + "_" - + std::to_string(ol_o_id) + "_" - + std::to_string(ol_number); + return std::to_string(ol_w_id) + "_" + std::to_string(ol_d_id) + "_" + std::to_string(ol_o_id) + + "_" + std::to_string(ol_number); } ForeignKeyMap OrderLine::ForeignKeys() const { - ForeignKeyMap foreign_keys; - std::string order_index = std::to_string(ol_w_id) + "_" - + std::to_string(ol_d_id) + "_" - + std::to_string(ol_o_id); - std::string item_index = std::to_string(ol_supply_w_id) + "_" - + std::to_string(ol_i_id); - foreign_keys["order_index"] = order_index; - foreign_keys["item_index"] = item_index; - return foreign_keys; + ForeignKeyMap foreign_keys; + std::string order_index = + std::to_string(ol_w_id) + "_" + std::to_string(ol_d_id) + "_" + std::to_string(ol_o_id); + std::string item_index = std::to_string(ol_supply_w_id) + "_" + std::to_string(ol_i_id); + foreign_keys["order_index"] = order_index; + foreign_keys["item_index"] = item_index; + return foreign_keys; } std::string OrderLine::ToString() const { - std::stringstream ss; - ss << "ol_o_id = " << ol_o_id - << ",ol_d_id = " << ol_d_id - << ",ol_w_id = " << ol_w_id - << ",ol_number = " << ol_number - << ",ol_i_id = " << ol_i_id - << ",ol_supply_w_id = " << ol_supply_w_id - << ",ol_quantity = " << ol_quantity - << ",ol_amount = " << ol_amount - << ",ol_delivery_d = " << ol_delivery_d - << ",ol_dist_info = " << ol_dist_info; - return ss.str(); + std::stringstream ss; + ss << "ol_o_id = " << ol_o_id << ",ol_d_id = " << ol_d_id << ",ol_w_id = " << ol_w_id + << ",ol_number = " << ol_number << ",ol_i_id = " << ol_i_id + << ",ol_supply_w_id = " << ol_supply_w_id << ",ol_quantity = " << ol_quantity + << ",ol_amount = " << ol_amount << ",ol_delivery_d = " << ol_delivery_d + << ",ol_dist_info = " << ol_dist_info; + return ss.str(); } /// ------------------------- [begin customer table] ---------------------- /// Customer::Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& datetime, - bool bad_credit, RandomGenerator* rand_gen) - : c_id(id), + bool bad_credit, RandomGenerator* rand_gen) + : c_id(id), c_d_id(d_id), c_w_id(w_id), c_credit_lim(kInitCreditLimit), @@ -252,109 +205,75 @@ Customer::Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& da c_delivery_cnt(kInitDeliveryCnt), c_middle("OE"), c_since(datetime) { - c_discount = rand_gen->MakeFloat(kMinDisCount, kMaxDisCount, kDisCountDigits); - c_first = rand_gen->MakeAString(kFirstLowerLen, kFirstUpperLen); - c_last = GenLastName(rand_gen, (id <= 1000 ? id : kCustomerCountPerDistrict)); - c_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - c_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - c_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); - c_state = rand_gen->MakeAString(kStateLen,kStateLen); - c_zip = GenZip(rand_gen); - c_phone = rand_gen->MakeNString(kPhoneLen,kPhoneLen); - c_credit = bad_credit ? "BC" : "GC"; - c_data = GenData(rand_gen, kCustomerDataLowerLen, kCustomerDataUpperLen, false); + c_discount = rand_gen->MakeFloat(kMinDisCount, kMaxDisCount, kDisCountDigits); + c_first = rand_gen->MakeAString(kFirstLowerLen, kFirstUpperLen); + c_last = GenLastName(rand_gen, (id <= 1000 ? id : kCustomerCountPerDistrict)); + c_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + c_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + c_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + c_state = rand_gen->MakeAString(kStateLen, kStateLen); + c_zip = GenZip(rand_gen); + c_phone = rand_gen->MakeNString(kPhoneLen, kPhoneLen); + c_credit = bad_credit ? "BC" : "GC"; + c_data = GenData(rand_gen, kCustomerDataLowerLen, kCustomerDataUpperLen, false); } -std::string Customer::PrimaryKey() const { - return std::to_string(c_w_id) + "_" + std::to_string(c_d_id) - + "_" + std::to_string(c_id); +std::string Customer::PrimaryKey() const { + return std::to_string(c_w_id) + "_" + std::to_string(c_d_id) + "_" + std::to_string(c_id); } -std::string Customer::ForeignKey() const { - return std::to_string(c_w_id) + "_" + std::to_string(c_d_id); +std::string Customer::ForeignKey() const { + return std::to_string(c_w_id) + "_" + std::to_string(c_d_id); } std::string Customer::ToString() const { - std::stringstream ss; - ss << "c_id = " << c_id - << ",c_d_id = " << c_d_id - << ",c_w_id = " << c_w_id - << ",c_credit_lim = " << c_credit_lim - << ",c_discount = " << c_discount - << ",c_balance = " << c_balance - << ",c_ytd_payment = " << c_ytd_payment - << ",c_payment_cnt = " << c_payment_cnt - << ",c_delivery_cnt = " << c_delivery_cnt - << ",c_name = [" << c_first << "," << c_middle << "," << c_last << "]" - << ",c_street_1 = " << c_street_1 - << ",c_street_2 = " << c_street_2 - << ",c_city = " << c_city - << ",c_state = " << c_state - << ",c_zip = " << c_zip - << ",c_phone = " << c_phone - << ",c_since = " << c_since - << ",c_credit = " << c_credit - << ",c_data = " << c_data; - return ss.str(); + std::stringstream ss; + ss << "c_id = " << c_id << ",c_d_id = " << c_d_id << ",c_w_id = " << c_w_id + << ",c_credit_lim = " << c_credit_lim << ",c_discount = " << c_discount + << ",c_balance = " << c_balance << ",c_ytd_payment = " << c_ytd_payment + << ",c_payment_cnt = " << c_payment_cnt << ",c_delivery_cnt = " << c_delivery_cnt + << ",c_name = [" << c_first << "," << c_middle << "," << c_last << "]" + << ",c_street_1 = " << c_street_1 << ",c_street_2 = " << c_street_2 << ",c_city = " << c_city + << ",c_state = " << c_state << ",c_zip = " << c_zip << ",c_phone = " << c_phone + << ",c_since = " << c_since << ",c_credit = " << c_credit << ",c_data = " << c_data; + return ss.str(); } /// ------------------------- [begin history table] ----------------------- /// std::string History::ToString() const { - std::stringstream ss; - ss << "h_c_id = " << h_c_id - << ",h_c_d_id = " << h_c_d_id - << ",h_c_w_id = " << h_c_w_id - << ",h_d_id = " << h_d_id - << ",h_w_id = " << h_w_id - << ",h_amount = " << h_amount - << ",h_date = " << h_date - << ",h_data = " << h_data; - return ss.str(); + std::stringstream ss; + ss << "h_c_id = " << h_c_id << ",h_c_d_id = " << h_c_d_id << ",h_c_w_id = " << h_c_w_id + << ",h_d_id = " << h_d_id << ",h_w_id = " << h_w_id << ",h_amount = " << h_amount + << ",h_date = " << h_date << ",h_data = " << h_data; + return ss.str(); } /// ------------------------- [end tables] -------------------------------- /// -bool TxnResult::State() const { - return status_; -} +bool TxnResult::State() const { return status_; } -void TxnResult::SetState(bool status) { - status_ = status; -} +void TxnResult::SetState(bool status) { status_ = status; } -void TxnResult::SetReason(const std::string& reason) { - reason_ = reason; -} +void TxnResult::SetReason(const std::string& reason) { reason_ = reason; } -void StockLevelResult::SetLowStock(int low_stock) { - low_stock_ = low_stock; -} +void StockLevelResult::SetLowStock(int low_stock) { low_stock_ = low_stock; } -int StockLevelResult::LowStock() const { - return low_stock_; -} +int StockLevelResult::LowStock() const { return low_stock_; } -void PaymentResult::SetSingleLine(const RetTuples& single_line) { - single_line_ = single_line; -} +void PaymentResult::SetSingleLine(const RetTuples& single_line) { single_line_ = single_line; } -void NewOrderResult::AddLine(const RetTuples& line) { - lines_.push_back(line); -} +void NewOrderResult::AddLine(const RetTuples& line) { lines_.push_back(line); } -void NewOrderResult::SetSingleLine(const RetTuples& single_line) { - single_line_ = single_line; -} +void NewOrderResult::SetSingleLine(const RetTuples& single_line) { single_line_ = single_line; } TpccDb* TpccDb::NewTpccDb(const std::string& db_type) { - if (db_type == "tera") { - return new TeraTpccDb(); - } else { - LOG(ERROR) << "not support db:" << db_type; - } - return NULL; + if (db_type == "tera") { + return new TeraTpccDb(); + } else { + LOG(ERROR) << "not support db:" << db_type; + } + return NULL; } -} // namespace tpcc -} // namespace tera - +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tpccdb.h b/src/benchmark/tpcc/tpccdb.h index 93b3c32f3..11abe474d 100644 --- a/src/benchmark/tpcc/tpccdb.h +++ b/src/benchmark/tpcc/tpccdb.h @@ -27,445 +27,435 @@ typedef std::unordered_set IdSet; typedef std::unordered_map ForeignKeyMap; typedef std::unordered_map RetTuples; - inline float GenTax(RandomGenerator* rand_gen) { - return rand_gen->MakeFloat(kTaxMax, kTaxMin, kTaxDigits); + return rand_gen->MakeFloat(kTaxMax, kTaxMin, kTaxDigits); } inline std::string GenZip(RandomGenerator* rand_gen) { - return rand_gen->MakeNString(kZipLen, kZipLen); + return rand_gen->MakeNString(kZipLen, kZipLen); } -inline std::string GenData(RandomGenerator* rand_gen, - int lower_len, - int upper_len, +inline std::string GenData(RandomGenerator* rand_gen, int lower_len, int upper_len, bool is_original) { - std::string ret = rand_gen->MakeAString(lower_len, upper_len); - if (is_original) { - int pos = rand_gen->GetRandom(0, ret.size() - 8); - ret = ret.replace(pos, 8, "ORIGINAL"); - } - return ret; + std::string ret = rand_gen->MakeAString(lower_len, upper_len); + if (is_original) { + int pos = rand_gen->GetRandom(0, ret.size() - 8); + ret = ret.replace(pos, 8, "ORIGINAL"); + } + return ret; } inline std::string GenLastName(RandomGenerator* rand_gen, int id) { - if (id > 999) { - id = rand_gen->NURand(255, 0, std::min(999, id - 1)); - } - std::vector labels = {"BAR", "OUGHT", "ABLE", "PRI", "PRES", - "ESE", "ANTI", "CALLY", "ATION", "EING"}; - return labels[id / 100] + labels[(id / 10) % 10] + labels[id % 10]; + if (id > 999) { + id = rand_gen->NURand(255, 0, std::min(999, id - 1)); + } + std::vector labels = {"BAR", "OUGHT", "ABLE", "PRI", "PRES", + "ESE", "ANTI", "CALLY", "ATION", "EING"}; + return labels[id / 100] + labels[(id / 10) % 10] + labels[id % 10]; } inline IdSet PickUniqueIdSet(RandomGenerator* rand_gen, size_t cnt, int lower_id, int upper_id) { - IdSet ids; - while(ids.size() < cnt) { - int tmp_id = rand_gen->GetRandom(lower_id, upper_id); - if (ids.find(tmp_id) == ids.end()) { - ids.insert(tmp_id); - } + IdSet ids; + while (ids.size() < cnt) { + int tmp_id = rand_gen->GetRandom(lower_id, upper_id); + if (ids.find(tmp_id) == ids.end()) { + ids.insert(tmp_id); } - return ids; + } + return ids; } struct Item { - int32_t i_id; - int32_t i_im_id; - float i_price; - std::string i_name; - std::string i_data; - - Item(int32_t id, bool is_original, RandomGenerator* rand_gen) : i_id(id) { - i_im_id = rand_gen->GetRandom(kItemMinIm, kItemMaxIm); - i_price = rand_gen->MakeFloat(kItemMinPrice, kItemMaxPrice, kItemPriceDigits); - i_name = rand_gen->MakeAString(kItemMinNameLen, kItemMaxNameLen); - i_data = GenData(rand_gen, kItemMinDataLen, kItemMaxDataLen, is_original); - } - - std::string PrimaryKey() const { return std::to_string(i_id); } - std::string ToString() const; + int32_t i_id; + int32_t i_im_id; + float i_price; + std::string i_name; + std::string i_data; + + Item(int32_t id, bool is_original, RandomGenerator* rand_gen) : i_id(id) { + i_im_id = rand_gen->GetRandom(kItemMinIm, kItemMaxIm); + i_price = rand_gen->MakeFloat(kItemMinPrice, kItemMaxPrice, kItemPriceDigits); + i_name = rand_gen->MakeAString(kItemMinNameLen, kItemMaxNameLen); + i_data = GenData(rand_gen, kItemMinDataLen, kItemMaxDataLen, is_original); + } + + std::string PrimaryKey() const { return std::to_string(i_id); } + std::string ToString() const; }; struct Warehouse { - int32_t w_id; - float w_tax; - float w_ytd; - std::string w_name; - std::string w_street_1; - std::string w_street_2; - std::string w_city; - std::string w_state; - std::string w_zip; - Warehouse(int32_t id, RandomGenerator* rand_gen) : w_id(id) { - w_tax = GenTax(rand_gen); - w_ytd = kInitYTD; - w_name = rand_gen->MakeAString(kWareHouseNameLowerLen, kWareHouseNameUpperLen); - w_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - w_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); - w_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); - w_state = rand_gen->MakeAString(kStateLen,kStateLen); - w_zip = GenZip(rand_gen); - } - std::string PrimaryKey() const { return std::to_string(w_id); } - std::string ToString() const; + int32_t w_id; + float w_tax; + float w_ytd; + std::string w_name; + std::string w_street_1; + std::string w_street_2; + std::string w_city; + std::string w_state; + std::string w_zip; + Warehouse(int32_t id, RandomGenerator* rand_gen) : w_id(id) { + w_tax = GenTax(rand_gen); + w_ytd = kInitYTD; + w_name = rand_gen->MakeAString(kWareHouseNameLowerLen, kWareHouseNameUpperLen); + w_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + w_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + w_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + w_state = rand_gen->MakeAString(kStateLen, kStateLen); + w_zip = GenZip(rand_gen); + } + std::string PrimaryKey() const { return std::to_string(w_id); } + std::string ToString() const; }; struct District { - int32_t d_id; - int32_t d_w_id; - float d_tax; - float d_ytd; - int32_t d_next_o_id; - std::string d_name; - std::string d_street_1; - std::string d_street_2; - std::string d_city; - std::string d_state; - std::string d_zip; - - District(int32_t id, int32_t w_id, RandomGenerator* rand_gen); - std::string PrimaryKey() const; - std::string ForeignKey() const; - std::string ToString() const; + int32_t d_id; + int32_t d_w_id; + float d_tax; + float d_ytd; + int32_t d_next_o_id; + std::string d_name; + std::string d_street_1; + std::string d_street_2; + std::string d_city; + std::string d_state; + std::string d_zip; + + District(int32_t id, int32_t w_id, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; }; struct Stock { -int32_t s_i_id; - int32_t s_w_id; - int32_t s_quantity; - int32_t s_ytd; - int32_t s_order_cnt; - int32_t s_remote_cnt; - std::vector s_dist; - std::string s_data; - - Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen); - std::string PrimaryKey() const; - std::string ForeignKey() const; - std::string ToString() const; + int32_t s_i_id; + int32_t s_w_id; + int32_t s_quantity; + int32_t s_ytd; + int32_t s_order_cnt; + int32_t s_remote_cnt; + std::vector s_dist; + std::string s_data; + + Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; }; struct Customer { - int32_t c_id; - int32_t c_d_id; - int32_t c_w_id; - float c_credit_lim; - float c_discount; - float c_balance; - float c_ytd_payment; - int32_t c_payment_cnt; - int32_t c_delivery_cnt; - std::string c_first; - std::string c_middle; - std::string c_last; - std::string c_street_1; - std::string c_street_2; - std::string c_city; - std::string c_state; - std::string c_zip; - std::string c_phone; - std::string c_since; - std::string c_credit; - std::string c_data; - Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& datetime, - bool bad_credit, RandomGenerator* rand_gen); - std::string PrimaryKey() const; - std::string ForeignKey() const; - std::string ToString() const; + int32_t c_id; + int32_t c_d_id; + int32_t c_w_id; + float c_credit_lim; + float c_discount; + float c_balance; + float c_ytd_payment; + int32_t c_payment_cnt; + int32_t c_delivery_cnt; + std::string c_first; + std::string c_middle; + std::string c_last; + std::string c_street_1; + std::string c_street_2; + std::string c_city; + std::string c_state; + std::string c_zip; + std::string c_phone; + std::string c_since; + std::string c_credit; + std::string c_data; + Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& datetime, bool bad_credit, + RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; }; struct Order { - int32_t o_id; - int32_t o_c_id; - int32_t o_d_id; - int32_t o_w_id; - int32_t o_carrier_id; - int32_t o_ol_cnt; - - // If the order includes only home order-lines, - // then O_ALL_LOCAL is set to 1, otherwise O_ALL_LOCAL is set to 0. - int32_t o_all_local; - std::string o_entry_d; - - Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, bool new_order, - const std::string& datetime, RandomGenerator* rand_gen); - std::string PrimaryKey() const; - std::string ForeignKey() const; - std::string ToString() const; + int32_t o_id; + int32_t o_c_id; + int32_t o_d_id; + int32_t o_w_id; + int32_t o_carrier_id; + int32_t o_ol_cnt; + + // If the order includes only home order-lines, + // then O_ALL_LOCAL is set to 1, otherwise O_ALL_LOCAL is set to 0. + int32_t o_all_local; + std::string o_entry_d; + + Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, bool new_order, + const std::string& datetime, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; }; -// An order-line is said to be 'home' if it is supplied by the home warehouse +// An order-line is said to be 'home' if it is supplied by the home warehouse // (i.e., when OL_SUPPLY_W_ID equals O_W_ID). -// -// An order-line is said to be remote when it is supplied by a remote warehouse +// +// An order-line is said to be remote when it is supplied by a remote warehouse // (i.e., when OL_SUPPLY_W_ID does not equal O_W_ID). // struct OrderLine { - int32_t ol_o_id; - int32_t ol_d_id; - int32_t ol_w_id; - int32_t ol_number; - int32_t ol_i_id; - int32_t ol_supply_w_id; - int32_t ol_quantity; - float ol_amount; - std::string ol_delivery_d; - std::string ol_dist_info; - - OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, - bool new_order, const std::string& datetime, - RandomGenerator* rand_gen); - std::string PrimaryKey() const; - ForeignKeyMap ForeignKeys() const; - std::string ToString() const; + int32_t ol_o_id; + int32_t ol_d_id; + int32_t ol_w_id; + int32_t ol_number; + int32_t ol_i_id; + int32_t ol_supply_w_id; + int32_t ol_quantity; + float ol_amount; + std::string ol_delivery_d; + std::string ol_dist_info; + + OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, bool new_order, + const std::string& datetime, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + ForeignKeyMap ForeignKeys() const; + std::string ToString() const; }; struct NewOrder { - int32_t no_o_id; - int32_t no_d_id; - int32_t no_w_id; - - NewOrder(int32_t o_id, int32_t d_id, int32_t w_id); - std::string PrimaryKey() const; - std::string ForeignKey() const; - std::string ToString() const; + int32_t no_o_id; + int32_t no_d_id; + int32_t no_w_id; + + NewOrder(int32_t o_id, int32_t d_id, int32_t w_id); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; }; struct History { - int32_t h_c_id; - int32_t h_c_d_id; - int32_t h_c_w_id; - int32_t h_d_id; - int32_t h_w_id; - float h_amount; - std::string h_date; - std::string h_data; - - History(int32_t c_id, int32_t d_id, int32_t w_id, const std::string& datetime, - RandomGenerator* rand_gen) - : h_c_id(c_id), h_c_d_id(d_id), h_c_w_id(w_id), h_d_id(d_id), h_w_id(w_id), - h_amount(kInitHistoryAmount), h_date(datetime) { - h_data = rand_gen->MakeAString(kHistoryDataLowerLen, kHistoryDataUpperLen); - } - std::string PrimaryKey() const { return std::to_string(h_c_id); } - std::string ToString() const; + int32_t h_c_id; + int32_t h_c_d_id; + int32_t h_c_w_id; + int32_t h_d_id; + int32_t h_w_id; + float h_amount; + std::string h_date; + std::string h_data; + + History(int32_t c_id, int32_t d_id, int32_t w_id, const std::string& datetime, + RandomGenerator* rand_gen) + : h_c_id(c_id), + h_c_d_id(d_id), + h_c_w_id(w_id), + h_d_id(d_id), + h_w_id(w_id), + h_amount(kInitHistoryAmount), + h_date(datetime) { + h_data = rand_gen->MakeAString(kHistoryDataLowerLen, kHistoryDataUpperLen); + } + std::string PrimaryKey() const { return std::to_string(h_c_id); } + std::string ToString() const; }; struct NewOrderInfo { - bool need_failed; - int32_t o_all_local; - int32_t o_ol_cnt; - std::vector ol_supply_w_ids; - std::vector ol_i_ids; - std::vector ol_quantities; + bool need_failed; + int32_t o_all_local; + int32_t o_ol_cnt; + std::vector ol_supply_w_ids; + std::vector ol_i_ids; + std::vector ol_quantities; }; -enum TpccTables -{ - kItemTable = 0, - kWarehouseTable = 1, - kDistrictTable = 2, - kCustomerTable = 3, - kHistoryTable = 4, - kStockTable = 5, - kOrderTable = 6, - kOrderLineTable = 7, - kNewOrderTable = 8, - - // the index of table - kCustomerLastIndex = 9, - kOrderIndex = 10, - kHistoryIndex = 11 +enum TpccTables { + kItemTable = 0, + kWarehouseTable = 1, + kDistrictTable = 2, + kCustomerTable = 3, + kHistoryTable = 4, + kStockTable = 5, + kOrderTable = 6, + kOrderLineTable = 7, + kNewOrderTable = 8, + + // the index of table + kCustomerLastIndex = 9, + kOrderIndex = 10, + kHistoryIndex = 11 }; /// ------------------------- transaction result ---------------------------/// class TxnResult { -public: - void SetState(bool status); - bool State() const; - void SetReason(const std::string& reason); - const std::string& Reason() const; -private: - bool status_; - std::string reason_; + public: + void SetState(bool status); + bool State() const; + void SetReason(const std::string& reason); + const std::string& Reason() const; + + private: + bool status_; + std::string reason_; }; class StockLevelResult : public TxnResult { -public: - void SetLowStock(int low_stock); - int LowStock() const; -private: - int low_stock_; + public: + void SetLowStock(int low_stock); + int LowStock() const; + + private: + int low_stock_; }; class PaymentResult : public TxnResult { -public: - void SetSingleLine(const RetTuples& single_line); -private: - RetTuples single_line_; -}; + public: + void SetSingleLine(const RetTuples& single_line); -class NewOrderResult : public TxnResult { -public: - void AddLine(const RetTuples& line); - void SetSingleLine(const RetTuples& single_line); -private: - std::vector lines_; - RetTuples single_line_; + private: + RetTuples single_line_; }; -class OrderStatusResult : public TxnResult { +class NewOrderResult : public TxnResult { + public: + void AddLine(const RetTuples& line); + void SetSingleLine(const RetTuples& single_line); + private: + std::vector lines_; + RetTuples single_line_; }; -class DeliveryResult : public TxnResult { - -}; +class OrderStatusResult : public TxnResult {}; + +class DeliveryResult : public TxnResult {}; class TpccDb { -public: - TpccDb(){} - virtual ~TpccDb(){} - - // init db - virtual bool CreateTables() = 0; - virtual bool CleanTables() = 0; - - // for insert table - virtual bool InsertItem(const Item& i) = 0; - - virtual bool InsertWarehouse(const Warehouse& w) = 0; - - virtual bool InsertDistrict(const District& d) = 0; - - virtual bool InsertCustomer(const Customer& c) = 0; - - virtual bool InsertHistory(const History& h) = 0; - - virtual bool InsertStock(const Stock& s) = 0; - - virtual bool InsertOrder(const Order& o) = 0; - - virtual bool InsertOrderLine(const OrderLine& ol) = 0; - - virtual bool InsertNewOrder(const NewOrder& no) = 0; - - // for transaction - - // The Stock-Level Transaction [Revision 5.11 - Page 44] - // - // (warehouse_id, district_id) - // is the primarykey of t_district - // Each terminal must use a unique value of (W_ID, D_ID) that is constant - // over the whole measurement, i.e., D_IDs cannot be re-used within a warehouse - // - // threshold - // The threshold of minimum quantity in stock (threshold) is selected - // at random within [10 .. 20]. - // - virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, - int32_t threshold, - StockLevelResult* ret) = 0; - - // The Delivery Transaction [Revision 5.11 - Page 40] - // - // warehouse_id - // For any given terminal, the home warehouse number (W_ID) is constant - // over the whole measurement interval - // - // carrier_id - // The carrier number (O_CARRIER_ID) is randomly selected within [1 .. 10]. - // - // delivery_datetime - // The delivery date (OL_DELIVERY_D) is generated within the - // SUT by using the current system date and time. - // - virtual void DeliveryTxn(int32_t warehouse_id, - int32_t carrier_id, - const std::string& delivery_datetime, - DeliveryResult* ret) = 0; - - // The Order-Status Transaction [Revision 5.11 - Page 37] - // - // warehouse_id - // For any given terminal, the home warehouse number (W_ID) is constant - // over the whole measurement interval - // - // district_id - // The district number (D_ID) is randomly selected within [1 .. 10] - // from the home warehouse (D_W_ID = W_ID). - // - // c_warehouse_id, c_district_id, last_name - // customer is randomly selected - // 60% of the time by last name (C_W_ID, C_D_ID, C_LAST) - // from the selected district (C_D_ID = D_ID) - // and the home warehouse number (C_W_ID = W_ID). - // - // c_warehouse_id, c_district_id, customer_id - // 40% of the time by number (C_W_ID, C_D_ID, C_ID) - // from the selected district (C_D_ID = D_ID) - // and the home warehouse number (C_W_ID = W_ID). - // - virtual void OrderStatusTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_customer_id, - const std::string& last_name, - OrderStatusResult* ret) = 0; - - // The Payment Transaction [Revision 5.11 - Page 33] - // - // warehouse_id - // For any given terminal, the home warehouse number (W_ID) is constant - // over the whole measurement interval - // - // district_id - // The district number (D_ID) is randomly selected within [1 .. 10] - // from the home warehouse (D_W_ID = W_ID). - // - // c_warehouse_id, c_district_id, last_name - // The customer is randomly selected - // 1) 60% of the time by last name (C_W_ID , C_D_ID, C_LAST) - // c_warehouse_id, c_district_id, customer_id - // The customer is randomly selected - // 2) 40% of the time by number (C_W_ID , C_D_ID , C_ID). - // - // h_amount - // The payment amount (H_AMOUNT) is randomly selected within - // [1.00 .. 5,000.00]. - // - virtual void PaymentTxn(bool by_last_name, - int32_t warehouse_id, int32_t district_id, - int32_t c_warehouse_id, int32_t c_district_id, - int32_t c_customer_id, - const std::string& last_name, - int32_t h_amount, - PaymentResult* ret) = 0; - - - // The New-Order Transaction [Revision 5.11 - Page 28] - // warehouse_id - // For any given terminal, the home warehouse number (W_ID) is constant - // over the whole measurement interval - // - // district_id - // The district number (D_ID) is randomly selected within [1 .. 10] - // from the home warehouse (D_W_ID = W_ID). - // - // customer_id - // The non-uniform random customer number (C_ID) is selected using - // the NURand(1023,1,3000) function from the selected district - // number (C_D_ID = D_ID) and the home warehouse number (C_W_ID = W_ID). - // - virtual void NewOrderTxn(int32_t warehouse_id, - int32_t district_id, - int32_t customer_id, const NewOrderInfo& info, - NewOrderResult* ret) = 0; - - static TpccDb* NewTpccDb(const std::string& db_type); + public: + TpccDb() {} + virtual ~TpccDb() {} + + // init db + virtual bool CreateTables() = 0; + virtual bool CleanTables() = 0; + + // for insert table + virtual bool InsertItem(const Item& i) = 0; + + virtual bool InsertWarehouse(const Warehouse& w) = 0; + + virtual bool InsertDistrict(const District& d) = 0; + + virtual bool InsertCustomer(const Customer& c) = 0; + + virtual bool InsertHistory(const History& h) = 0; + + virtual bool InsertStock(const Stock& s) = 0; + + virtual bool InsertOrder(const Order& o) = 0; + + virtual bool InsertOrderLine(const OrderLine& ol) = 0; + + virtual bool InsertNewOrder(const NewOrder& no) = 0; + + // for transaction + + // The Stock-Level Transaction [Revision 5.11 - Page 44] + // + // (warehouse_id, district_id) + // is the primarykey of t_district + // Each terminal must use a unique value of (W_ID, D_ID) that is constant + // over the whole measurement, i.e., D_IDs cannot be re-used within a + // warehouse + // + // threshold + // The threshold of minimum quantity in stock (threshold) is selected + // at random within [10 .. 20]. + // + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, int32_t threshold, + StockLevelResult* ret) = 0; + + // The Delivery Transaction [Revision 5.11 - Page 40] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // carrier_id + // The carrier number (O_CARRIER_ID) is randomly selected within [1 .. + // 10]. + // + // delivery_datetime + // The delivery date (OL_DELIVERY_D) is generated within the + // SUT by using the current system date and time. + // + virtual void DeliveryTxn(int32_t warehouse_id, int32_t carrier_id, + const std::string& delivery_datetime, DeliveryResult* ret) = 0; + + // The Order-Status Transaction [Revision 5.11 - Page 37] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, last_name + // customer is randomly selected + // 60% of the time by last name (C_W_ID, C_D_ID, C_LAST) + // from the selected district (C_D_ID = D_ID) + // and the home warehouse number (C_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, customer_id + // 40% of the time by number (C_W_ID, C_D_ID, C_ID) + // from the selected district (C_D_ID = D_ID) + // and the home warehouse number (C_W_ID = W_ID). + // + virtual void OrderStatusTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, const std::string& last_name, + OrderStatusResult* ret) = 0; + + // The Payment Transaction [Revision 5.11 - Page 33] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, last_name + // The customer is randomly selected + // 1) 60% of the time by last name (C_W_ID , C_D_ID, C_LAST) + // c_warehouse_id, c_district_id, customer_id + // The customer is randomly selected + // 2) 40% of the time by number (C_W_ID , C_D_ID , C_ID). + // + // h_amount + // The payment amount (H_AMOUNT) is randomly selected within + // [1.00 .. 5,000.00]. + // + virtual void PaymentTxn(bool by_last_name, int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, int32_t c_customer_id, + const std::string& last_name, int32_t h_amount, PaymentResult* ret) = 0; + + // The New-Order Transaction [Revision 5.11 - Page 28] + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // customer_id + // The non-uniform random customer number (C_ID) is selected using + // the NURand(1023,1,3000) function from the selected district + // number (C_D_ID = D_ID) and the home warehouse number (C_W_ID = W_ID). + // + virtual void NewOrderTxn(int32_t warehouse_id, int32_t district_id, int32_t customer_id, + const NewOrderInfo& info, NewOrderResult* ret) = 0; + + static TpccDb* NewTpccDb(const std::string& db_type); }; -} // namespace tpcc -} // namespace tera +} // namespace tpcc +} // namespace tera #endif /* TERA_BENCHMARK_TPCC_TPCCDB_H */ diff --git a/src/common/atomic.h b/src/common/atomic.h index 195a7b0da..195f16ec1 100644 --- a/src/common/atomic.h +++ b/src/common/atomic.h @@ -2,107 +2,52 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include +#include namespace tera { -static inline int atomic_add(volatile int *mem, int add) -{ - asm volatile( - "lock xadd %0, (%1);" - : "=a"(add) - : "r"(mem), "a"(add) - : "memory" - ); - return add; +static inline int atomic_add(volatile int *mem, int add) { + asm volatile("lock xadd %0, (%1);" : "=a"(add) : "r"(mem), "a"(add) : "memory"); + return add; } -static inline int64_t atomic_add64(volatile int64_t* mem, int64_t add) -{ - asm volatile( - "lock xaddq %0, (%1)" - : "=a" (add) - : "r" (mem), "a" (add) - : "memory" - ); - return add; +static inline int64_t atomic_add64(volatile int64_t *mem, int64_t add) { + asm volatile("lock xaddq %0, (%1)" : "=a"(add) : "r"(mem), "a"(add) : "memory"); + return add; } -static inline void atomic_inc(volatile int *mem) -{ - asm volatile( - "lock incl %0;" - : "=m"(*mem) - : "m"(*mem) - ); +static inline void atomic_inc(volatile int *mem) { + asm volatile("lock incl %0;" : "=m"(*mem) : "m"(*mem)); } -static inline void atomic_inc64(volatile int64_t *mem) -{ - asm volatile( - "lock incq %0;" - : "=m"(*mem) - : "m"(*mem) - ); +static inline void atomic_inc64(volatile int64_t *mem) { + asm volatile("lock incq %0;" : "=m"(*mem) : "m"(*mem)); } -static inline void atomic_dec(volatile int *mem) -{ - asm volatile( - "lock decl %0;" - : "=m"(*mem) - : "m"(*mem) - ); +static inline void atomic_dec(volatile int *mem) { + asm volatile("lock decl %0;" : "=m"(*mem) : "m"(*mem)); } -static inline void atomic_dec64(volatile int64_t *mem) -{ - asm volatile( - "lock decq %0;" - : "=m"(*mem) - : "m"(*mem) - ); +static inline void atomic_dec64(volatile int64_t *mem) { + asm volatile("lock decq %0;" : "=m"(*mem) : "m"(*mem)); } -static inline int atomic_swap(volatile void *lockword, int value) -{ - asm volatile( - "lock xchg %0, (%1);" - : "=a"(value) - : "r"(lockword), "a"(value) - : "memory" - ); - return value; +static inline int atomic_swap(volatile void *lockword, int value) { + asm volatile("lock xchg %0, (%1);" : "=a"(value) : "r"(lockword), "a"(value) : "memory"); + return value; } -static inline int64_t atomic_swap64(volatile void *lockword, int64_t value) -{ - asm volatile( - "lock xchg %0, (%1);" - : "=a"(value) - : "r"(lockword), "a"(value) - : "memory" - ); - return value; +static inline int64_t atomic_swap64(volatile void *lockword, int64_t value) { + asm volatile("lock xchg %0, (%1);" : "=a"(value) : "r"(lockword), "a"(value) : "memory"); + return value; } -static inline int atomic_comp_swap(volatile void *mem, int xchg, int cmp) -{ - asm volatile( - "lock cmpxchg %1, (%2)" - :"=a"(cmp) - :"d"(xchg), "r"(mem), "a"(cmp) - ); - return cmp; +static inline int atomic_comp_swap(volatile void *mem, int xchg, int cmp) { + asm volatile("lock cmpxchg %1, (%2)" : "=a"(cmp) : "d"(xchg), "r"(mem), "a"(cmp)); + return cmp; } -static inline int64_t atomic_comp_swap64(volatile void *mem, int64_t xchg, int64_t cmp) -{ - asm volatile( - "lock cmpxchg %1, (%2)" - :"=a"(cmp) - :"d"(xchg), "r"(mem), "a"(cmp) - ); - return cmp; +static inline int64_t atomic_comp_swap64(volatile void *mem, int64_t xchg, int64_t cmp) { + asm volatile("lock cmpxchg %1, (%2)" : "=a"(cmp) : "d"(xchg), "r"(mem), "a"(cmp)); + return cmp; +} } - -} diff --git a/src/common/base/ascii.h b/src/common/base/ascii.h index 7406bccc7..490adf747 100644 --- a/src/common/base/ascii.h +++ b/src/common/base/ascii.h @@ -14,123 +14,72 @@ /// 里定义的函数。 // 用 struct 来用做强的 namespace,禁止使用者 using。 -struct Ascii -{ -private: - Ascii(); - ~Ascii(); +struct Ascii { + private: + Ascii(); + ~Ascii(); -private: - /// 字符类型的掩码 - enum CharTypeMask - { - kUpper = 1 << 0, - kLower = 1 << 1, - kDigit = 1 << 2, - kHexDigit = 1 << 3, - kBlank = 1 << 4, - kSpace = 1 << 5, - kControl = 1 << 6, - kPunct = 1 << 7, - kPrint = 1 << 8, - kGraph = 1 << 9, - }; + private: + /// 字符类型的掩码 + enum CharTypeMask { + kUpper = 1 << 0, + kLower = 1 << 1, + kDigit = 1 << 2, + kHexDigit = 1 << 3, + kBlank = 1 << 4, + kSpace = 1 << 5, + kControl = 1 << 6, + kPunct = 1 << 7, + kPrint = 1 << 8, + kGraph = 1 << 9, + }; -public: - /** 判断是不是有效的 ASCII 码 */ - static bool IsValid(char c) - { - return (c & 0x80) == 0; - } + public: + /** 判断是不是有效的 ASCII 码 */ + static bool IsValid(char c) { return (c & 0x80) == 0; } - static inline bool IsLower(char c) - { - return CharIncludeAnyTypeMask(c, kLower); - } + static inline bool IsLower(char c) { return CharIncludeAnyTypeMask(c, kLower); } - static inline bool IsUpper(char c) - { - return CharIncludeAnyTypeMask(c, kUpper); - } + static inline bool IsUpper(char c) { return CharIncludeAnyTypeMask(c, kUpper); } - /** 判断是否为字母 */ - static bool IsAlpha(char c) - { - return CharIncludeAnyTypeMask(c, kUpper | kLower); - } + /** 判断是否为字母 */ + static bool IsAlpha(char c) { return CharIncludeAnyTypeMask(c, kUpper | kLower); } - /** 判断是否为数字 */ - static bool IsDigit(char c) - { - return CharIncludeAnyTypeMask(c, kDigit); - } + /** 判断是否为数字 */ + static bool IsDigit(char c) { return CharIncludeAnyTypeMask(c, kDigit); } - /** 判断是否为英文或数字 */ - static bool IsAlphaNumber(char c) - { - return CharIncludeAnyTypeMask(c, kUpper | kLower | kDigit); - } + /** 判断是否为英文或数字 */ + static bool IsAlphaNumber(char c) { return CharIncludeAnyTypeMask(c, kUpper | kLower | kDigit); } - /** 判断是否为空白字符。空格,'\t', ' ' 算作空白字符*/ - static bool IsBlank(char c) - { - return CharIncludeAnyTypeMask(c, kBlank); - } + /** 判断是否为空白字符。空格,'\t', ' ' 算作空白字符*/ + static bool IsBlank(char c) { return CharIncludeAnyTypeMask(c, kBlank); } - /** 判断是否为间隔字符。*/ - static inline bool IsSpace(char c) - { - return CharIncludeAnyTypeMask(c, kSpace); - } + /** 判断是否为间隔字符。*/ + static inline bool IsSpace(char c) { return CharIncludeAnyTypeMask(c, kSpace); } - /** 判断是否为控制字符。*/ - static bool IsControl(char c) - { - return CharIncludeAnyTypeMask(c, kControl); - } + /** 判断是否为控制字符。*/ + static bool IsControl(char c) { return CharIncludeAnyTypeMask(c, kControl); } - /** 判断是否为标点符号字符。*/ - static inline bool IsPunct(char c) - { - return CharIncludeAnyTypeMask(c, kPunct); - } + /** 判断是否为标点符号字符。*/ + static inline bool IsPunct(char c) { return CharIncludeAnyTypeMask(c, kPunct); } - /** 判断是否为十六进制数字字符。*/ - static inline bool IsHexDigit(char c) - { - return CharIncludeAnyTypeMask(c, kHexDigit); - } + /** 判断是否为十六进制数字字符。*/ + static inline bool IsHexDigit(char c) { return CharIncludeAnyTypeMask(c, kHexDigit); } - /** 判断是否为可见字符。*/ - static inline bool IsGraph(char c) - { - return CharIncludeAnyTypeMask(c, kGraph); - } + /** 判断是否为可见字符。*/ + static inline bool IsGraph(char c) { return CharIncludeAnyTypeMask(c, kGraph); } - /** 判断是否为可打印字符。*/ - static inline bool IsPrint(char c) - { - return CharIncludeAnyTypeMask(c, kPrint); - } + /** 判断是否为可打印字符。*/ + static inline bool IsPrint(char c) { return CharIncludeAnyTypeMask(c, kPrint); } - static inline char ToAscii(char c) - { - return c & 0x7F; - } + static inline char ToAscii(char c) { return c & 0x7F; } - static inline char ToLower(char c) - { - return IsUpper(c) ? c + ('a' - 'A') : c; - } + static inline char ToLower(char c) { return IsUpper(c) ? c + ('a' - 'A') : c; } - static inline char ToUpper(char c) - { - return IsLower(c) ? c - ('a' - 'A') : c; - } + static inline char ToUpper(char c) { return IsLower(c) ? c - ('a' - 'A') : c; } -private: - static inline int GetCharTypeMask(char c) - { + private: + static inline int GetCharTypeMask(char c) { #if 0 // // 此表由以下代码生成: // #include @@ -158,152 +107,147 @@ struct Ascii // 编译后以下面的命令运行: // $ LC_ALL=C ./a.out #endif - static const uint16_t table[UCHAR_MAX + 1] = - { - /* 0x00( ) */ kControl | 0, - /* 0x01( ) */ kControl | 0, - /* 0x02( ) */ kControl | 0, - /* 0x03( ) */ kControl | 0, - /* 0x04( ) */ kControl | 0, - /* 0x05( ) */ kControl | 0, - /* 0x06( ) */ kControl | 0, - /* 0x07( ) */ kControl | 0, - /* 0x08( ) */ kControl | 0, - /* 0x09( ) */ kBlank | kSpace | kControl | 0, - /* 0x0a( ) */ kSpace | kControl | 0, - /* 0x0b( ) */ kSpace | kControl | 0, - /* 0x0c( ) */ kSpace | kControl | 0, - /* 0x0d( ) */ kSpace | kControl | 0, - /* 0x0e( ) */ kControl | 0, - /* 0x0f( ) */ kControl | 0, - /* 0x10( ) */ kControl | 0, - /* 0x11( ) */ kControl | 0, - /* 0x12( ) */ kControl | 0, - /* 0x13( ) */ kControl | 0, - /* 0x14( ) */ kControl | 0, - /* 0x15( ) */ kControl | 0, - /* 0x16( ) */ kControl | 0, - /* 0x17( ) */ kControl | 0, - /* 0x18( ) */ kControl | 0, - /* 0x19( ) */ kControl | 0, - /* 0x1a( ) */ kControl | 0, - /* 0x1b( ) */ kControl | 0, - /* 0x1c( ) */ kControl | 0, - /* 0x1d( ) */ kControl | 0, - /* 0x1e( ) */ kControl | 0, - /* 0x1f( ) */ kControl | 0, - /* 0x20( ) */ kBlank | kSpace | kPrint | 0, - /* 0x21(!) */ kPunct | kGraph | kPrint | 0, - /* 0x22(") */ kPunct | kGraph | kPrint | 0, - /* 0x23(#) */ kPunct | kGraph | kPrint | 0, - /* 0x24($) */ kPunct | kGraph | kPrint | 0, - /* 0x25(%) */ kPunct | kGraph | kPrint | 0, - /* 0x26(&) */ kPunct | kGraph | kPrint | 0, - /* 0x27(') */ kPunct | kGraph | kPrint | 0, - /* 0x28(() */ kPunct | kGraph | kPrint | 0, - /* 0x29()) */ kPunct | kGraph | kPrint | 0, - /* 0x2a(*) */ kPunct | kGraph | kPrint | 0, - /* 0x2b(+) */ kPunct | kGraph | kPrint | 0, - /* 0x2c(,) */ kPunct | kGraph | kPrint | 0, - /* 0x2d(-) */ kPunct | kGraph | kPrint | 0, - /* 0x2e(.) */ kPunct | kGraph | kPrint | 0, - /* 0x2f(/) */ kPunct | kGraph | kPrint | 0, - /* 0x30(0) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x31(1) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x32(2) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x33(3) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x34(4) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x35(5) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x36(6) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x37(7) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x38(8) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x39(9) */ kDigit | kHexDigit | kGraph | kPrint | 0, - /* 0x3a(:) */ kPunct | kGraph | kPrint | 0, - /* 0x3b(;) */ kPunct | kGraph | kPrint | 0, - /* 0x3c(<) */ kPunct | kGraph | kPrint | 0, - /* 0x3d(=) */ kPunct | kGraph | kPrint | 0, - /* 0x3e(>) */ kPunct | kGraph | kPrint | 0, - /* 0x3f(?) */ kPunct | kGraph | kPrint | 0, - /* 0x40(@) */ kPunct | kGraph | kPrint | 0, - /* 0x41(A) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x42(B) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x43(C) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x44(D) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x45(E) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x46(F) */ kUpper | kHexDigit | kGraph | kPrint | 0, - /* 0x47(G) */ kUpper | kGraph | kPrint | 0, - /* 0x48(H) */ kUpper | kGraph | kPrint | 0, - /* 0x49(I) */ kUpper | kGraph | kPrint | 0, - /* 0x4a(J) */ kUpper | kGraph | kPrint | 0, - /* 0x4b(K) */ kUpper | kGraph | kPrint | 0, - /* 0x4c(L) */ kUpper | kGraph | kPrint | 0, - /* 0x4d(M) */ kUpper | kGraph | kPrint | 0, - /* 0x4e(N) */ kUpper | kGraph | kPrint | 0, - /* 0x4f(O) */ kUpper | kGraph | kPrint | 0, - /* 0x50(P) */ kUpper | kGraph | kPrint | 0, - /* 0x51(Q) */ kUpper | kGraph | kPrint | 0, - /* 0x52(R) */ kUpper | kGraph | kPrint | 0, - /* 0x53(S) */ kUpper | kGraph | kPrint | 0, - /* 0x54(T) */ kUpper | kGraph | kPrint | 0, - /* 0x55(U) */ kUpper | kGraph | kPrint | 0, - /* 0x56(V) */ kUpper | kGraph | kPrint | 0, - /* 0x57(W) */ kUpper | kGraph | kPrint | 0, - /* 0x58(X) */ kUpper | kGraph | kPrint | 0, - /* 0x59(Y) */ kUpper | kGraph | kPrint | 0, - /* 0x5a(Z) */ kUpper | kGraph | kPrint | 0, - /* 0x5b([) */ kPunct | kGraph | kPrint | 0, - /* 0x5c(\) */ kPunct | kGraph | kPrint | 0, - /* 0x5d(]) */ kPunct | kGraph | kPrint | 0, - /* 0x5e(^) */ kPunct | kGraph | kPrint | 0, - /* 0x5f(_) */ kPunct | kGraph | kPrint | 0, - /* 0x60(`) */ kPunct | kGraph | kPrint | 0, - /* 0x61(a) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x62(b) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x63(c) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x64(d) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x65(e) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x66(f) */ kLower | kHexDigit | kGraph | kPrint | 0, - /* 0x67(g) */ kLower | kGraph | kPrint | 0, - /* 0x68(h) */ kLower | kGraph | kPrint | 0, - /* 0x69(i) */ kLower | kGraph | kPrint | 0, - /* 0x6a(j) */ kLower | kGraph | kPrint | 0, - /* 0x6b(k) */ kLower | kGraph | kPrint | 0, - /* 0x6c(l) */ kLower | kGraph | kPrint | 0, - /* 0x6d(m) */ kLower | kGraph | kPrint | 0, - /* 0x6e(n) */ kLower | kGraph | kPrint | 0, - /* 0x6f(o) */ kLower | kGraph | kPrint | 0, - /* 0x70(p) */ kLower | kGraph | kPrint | 0, - /* 0x71(q) */ kLower | kGraph | kPrint | 0, - /* 0x72(r) */ kLower | kGraph | kPrint | 0, - /* 0x73(s) */ kLower | kGraph | kPrint | 0, - /* 0x74(t) */ kLower | kGraph | kPrint | 0, - /* 0x75(u) */ kLower | kGraph | kPrint | 0, - /* 0x76(v) */ kLower | kGraph | kPrint | 0, - /* 0x77(w) */ kLower | kGraph | kPrint | 0, - /* 0x78(x) */ kLower | kGraph | kPrint | 0, - /* 0x79(y) */ kLower | kGraph | kPrint | 0, - /* 0x7a(z) */ kLower | kGraph | kPrint | 0, - /* 0x7b({) */ kPunct | kGraph | kPrint | 0, - /* 0x7c(|) */ kPunct | kGraph | kPrint | 0, - /* 0x7d(}) */ kPunct | kGraph | kPrint | 0, - /* 0x7e(~) */ kPunct | kGraph | kPrint | 0, - /* 0x7f( ) */ kControl | 0, - // 以下全为 0 - }; - return table[static_cast(c)]; - } + static const uint16_t table[UCHAR_MAX + 1] = { + /* 0x00( ) */ kControl | 0, + /* 0x01( ) */ kControl | 0, + /* 0x02( ) */ kControl | 0, + /* 0x03( ) */ kControl | 0, + /* 0x04( ) */ kControl | 0, + /* 0x05( ) */ kControl | 0, + /* 0x06( ) */ kControl | 0, + /* 0x07( ) */ kControl | 0, + /* 0x08( ) */ kControl | 0, + /* 0x09( ) */ kBlank | kSpace | kControl | 0, + /* 0x0a( ) */ kSpace | kControl | 0, + /* 0x0b( ) */ kSpace | kControl | 0, + /* 0x0c( ) */ kSpace | kControl | 0, + /* 0x0d( ) */ kSpace | kControl | 0, + /* 0x0e( ) */ kControl | 0, + /* 0x0f( ) */ kControl | 0, + /* 0x10( ) */ kControl | 0, + /* 0x11( ) */ kControl | 0, + /* 0x12( ) */ kControl | 0, + /* 0x13( ) */ kControl | 0, + /* 0x14( ) */ kControl | 0, + /* 0x15( ) */ kControl | 0, + /* 0x16( ) */ kControl | 0, + /* 0x17( ) */ kControl | 0, + /* 0x18( ) */ kControl | 0, + /* 0x19( ) */ kControl | 0, + /* 0x1a( ) */ kControl | 0, + /* 0x1b( ) */ kControl | 0, + /* 0x1c( ) */ kControl | 0, + /* 0x1d( ) */ kControl | 0, + /* 0x1e( ) */ kControl | 0, + /* 0x1f( ) */ kControl | 0, + /* 0x20( ) */ kBlank | kSpace | kPrint | 0, + /* 0x21(!) */ kPunct | kGraph | kPrint | 0, + /* 0x22(") */ kPunct | kGraph | kPrint | 0, + /* 0x23(#) */ kPunct | kGraph | kPrint | 0, + /* 0x24($) */ kPunct | kGraph | kPrint | 0, + /* 0x25(%) */ kPunct | kGraph | kPrint | 0, + /* 0x26(&) */ kPunct | kGraph | kPrint | 0, + /* 0x27(') */ kPunct | kGraph | kPrint | 0, + /* 0x28(() */ kPunct | kGraph | kPrint | 0, + /* 0x29()) */ kPunct | kGraph | kPrint | 0, + /* 0x2a(*) */ kPunct | kGraph | kPrint | 0, + /* 0x2b(+) */ kPunct | kGraph | kPrint | 0, + /* 0x2c(,) */ kPunct | kGraph | kPrint | 0, + /* 0x2d(-) */ kPunct | kGraph | kPrint | 0, + /* 0x2e(.) */ kPunct | kGraph | kPrint | 0, + /* 0x2f(/) */ kPunct | kGraph | kPrint | 0, + /* 0x30(0) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x31(1) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x32(2) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x33(3) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x34(4) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x35(5) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x36(6) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x37(7) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x38(8) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x39(9) */ kDigit | kHexDigit | kGraph | kPrint | 0, + /* 0x3a(:) */ kPunct | kGraph | kPrint | 0, + /* 0x3b(;) */ kPunct | kGraph | kPrint | 0, + /* 0x3c(<) */ kPunct | kGraph | kPrint | 0, + /* 0x3d(=) */ kPunct | kGraph | kPrint | 0, + /* 0x3e(>) */ kPunct | kGraph | kPrint | 0, + /* 0x3f(?) */ kPunct | kGraph | kPrint | 0, + /* 0x40(@) */ kPunct | kGraph | kPrint | 0, + /* 0x41(A) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x42(B) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x43(C) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x44(D) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x45(E) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x46(F) */ kUpper | kHexDigit | kGraph | kPrint | 0, + /* 0x47(G) */ kUpper | kGraph | kPrint | 0, + /* 0x48(H) */ kUpper | kGraph | kPrint | 0, + /* 0x49(I) */ kUpper | kGraph | kPrint | 0, + /* 0x4a(J) */ kUpper | kGraph | kPrint | 0, + /* 0x4b(K) */ kUpper | kGraph | kPrint | 0, + /* 0x4c(L) */ kUpper | kGraph | kPrint | 0, + /* 0x4d(M) */ kUpper | kGraph | kPrint | 0, + /* 0x4e(N) */ kUpper | kGraph | kPrint | 0, + /* 0x4f(O) */ kUpper | kGraph | kPrint | 0, + /* 0x50(P) */ kUpper | kGraph | kPrint | 0, + /* 0x51(Q) */ kUpper | kGraph | kPrint | 0, + /* 0x52(R) */ kUpper | kGraph | kPrint | 0, + /* 0x53(S) */ kUpper | kGraph | kPrint | 0, + /* 0x54(T) */ kUpper | kGraph | kPrint | 0, + /* 0x55(U) */ kUpper | kGraph | kPrint | 0, + /* 0x56(V) */ kUpper | kGraph | kPrint | 0, + /* 0x57(W) */ kUpper | kGraph | kPrint | 0, + /* 0x58(X) */ kUpper | kGraph | kPrint | 0, + /* 0x59(Y) */ kUpper | kGraph | kPrint | 0, + /* 0x5a(Z) */ kUpper | kGraph | kPrint | 0, + /* 0x5b([) */ kPunct | kGraph | kPrint | 0, + /* 0x5c(\) */ kPunct | kGraph | kPrint | 0, + /* 0x5d(]) */ kPunct | kGraph | kPrint | 0, + /* 0x5e(^) */ kPunct | kGraph | kPrint | 0, + /* 0x5f(_) */ kPunct | kGraph | kPrint | 0, + /* 0x60(`) */ kPunct | kGraph | kPrint | 0, + /* 0x61(a) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x62(b) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x63(c) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x64(d) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x65(e) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x66(f) */ kLower | kHexDigit | kGraph | kPrint | 0, + /* 0x67(g) */ kLower | kGraph | kPrint | 0, + /* 0x68(h) */ kLower | kGraph | kPrint | 0, + /* 0x69(i) */ kLower | kGraph | kPrint | 0, + /* 0x6a(j) */ kLower | kGraph | kPrint | 0, + /* 0x6b(k) */ kLower | kGraph | kPrint | 0, + /* 0x6c(l) */ kLower | kGraph | kPrint | 0, + /* 0x6d(m) */ kLower | kGraph | kPrint | 0, + /* 0x6e(n) */ kLower | kGraph | kPrint | 0, + /* 0x6f(o) */ kLower | kGraph | kPrint | 0, + /* 0x70(p) */ kLower | kGraph | kPrint | 0, + /* 0x71(q) */ kLower | kGraph | kPrint | 0, + /* 0x72(r) */ kLower | kGraph | kPrint | 0, + /* 0x73(s) */ kLower | kGraph | kPrint | 0, + /* 0x74(t) */ kLower | kGraph | kPrint | 0, + /* 0x75(u) */ kLower | kGraph | kPrint | 0, + /* 0x76(v) */ kLower | kGraph | kPrint | 0, + /* 0x77(w) */ kLower | kGraph | kPrint | 0, + /* 0x78(x) */ kLower | kGraph | kPrint | 0, + /* 0x79(y) */ kLower | kGraph | kPrint | 0, + /* 0x7a(z) */ kLower | kGraph | kPrint | 0, + /* 0x7b({) */ kPunct | kGraph | kPrint | 0, + /* 0x7c(|) */ kPunct | kGraph | kPrint | 0, + /* 0x7d(}) */ kPunct | kGraph | kPrint | 0, + /* 0x7e(~) */ kPunct | kGraph | kPrint | 0, + /* 0x7f( ) */ kControl | 0, + // 以下全为 0 + }; + return table[static_cast(c)]; + } - static bool CharIncludeAnyTypeMask(char c, int mask) - { - return (GetCharTypeMask(c) & mask) != 0; - } + static bool CharIncludeAnyTypeMask(char c, int mask) { return (GetCharTypeMask(c) & mask) != 0; } - static bool CharIncludeAllTypeMask(char c, int mask) - { - return (GetCharTypeMask(c) & mask) == mask; - } + static bool CharIncludeAllTypeMask(char c, int mask) { + return (GetCharTypeMask(c) & mask) == mask; + } }; // } // namespace common -#endif // COMMON_ENCODING_ASCII_H +#endif // COMMON_ENCODING_ASCII_H diff --git a/src/common/base/bounded_queue.h b/src/common/base/bounded_queue.h new file mode 100644 index 000000000..42132d0ac --- /dev/null +++ b/src/common/base/bounded_queue.h @@ -0,0 +1,80 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include +#include +#include +namespace common { + +// A simple bounded queue based on std::queue +template +class BoundedQueue { + using size_type = typename std::deque::size_type; + using value_type = typename std::deque::value_type; + using reference = typename std::deque::reference; + using const_reference = typename std::deque::const_reference; + + public: + explicit BoundedQueue(size_type limit) : limit_{limit} {} + + void Push(const value_type& v) { + qu_.push_back(v); + Drop(); + } + + void Push(value_type&& v) { + qu_.push_back(std::move(v)); + Drop(); + } + + template + void Emplace(Args&&... args) { + qu_.emplace_back(std::forward(args)...); + Drop(); + } + + reference Front() { return qu_.front(); } + + const_reference Front() const { return qu_.front(); } + + reference Back() { return qu_.back(); } + + const_reference Back() const { return qu_.back(); } + + bool Empty() const { return qu_.empty(); } + + void Pop() { qu_.pop_front(); } + + size_type Size() { return qu_.size(); } + + // tera specified + value_type Sum() { + static_assert(std::is_arithmetic::value, + "Only arithmetic value is able to use Sum method"); + if (Empty()) { + return 0; + } + return std::accumulate(std::begin(qu_), std::end(qu_), (value_type)0, + [](value_type x, const value_type& y) { return x + y; }); + } + + value_type Average() { + static_assert(std::is_arithmetic::value, + "Only arithmetic value is able to use Average method"); + if (Empty()) { + return 0; + } + return Sum() / Size(); + } + + private: + void Drop() { + while (Size() > limit_) { + Pop(); + } + } + std::deque qu_; + size_type limit_; +}; +} diff --git a/src/common/base/byte_order.h b/src/common/base/byte_order.h index ef39ea6ea..43f9348f5 100644 --- a/src/common/base/byte_order.h +++ b/src/common/base/byte_order.h @@ -14,7 +14,7 @@ #endif #ifdef _MSC_VER -#include // for _byteswap_* +#include // for _byteswap_* #endif /// define __LITTLE_ENDIAN @@ -51,7 +51,7 @@ #define BYTE_ORDER __BYTE_ORDER #endif -#ifdef _WIN32 // winsock APIs +#ifdef _WIN32 // winsock APIs #define BYTEORDER_WINSOCK_API_LINKAGE __declspec(dllimport) #define BYTEORDER_WSAAPI __stdcall @@ -59,298 +59,223 @@ extern "C" { BYTEORDER_WINSOCK_API_LINKAGE -unsigned long -BYTEORDER_WSAAPI -htonl( - unsigned long hostlong -); +unsigned long BYTEORDER_WSAAPI htonl(unsigned long hostlong); BYTEORDER_WINSOCK_API_LINKAGE -unsigned short -BYTEORDER_WSAAPI -htons( - unsigned short hostshort -); +unsigned short BYTEORDER_WSAAPI htons(unsigned short hostshort); BYTEORDER_WINSOCK_API_LINKAGE -unsigned long -BYTEORDER_WSAAPI -ntohl( - unsigned long netlong -); +unsigned long BYTEORDER_WSAAPI ntohl(unsigned long netlong); BYTEORDER_WINSOCK_API_LINKAGE -unsigned short -BYTEORDER_WSAAPI -ntohs( - unsigned short netshort -); +unsigned short BYTEORDER_WSAAPI ntohs(unsigned short netshort); -} // extern "C" +} // extern "C" -#endif // _WIN32 +#endif // _WIN32 #ifndef __linux__ -# ifdef _MSC_VER -static unsigned short bswap_16(unsigned short x) // NOLINT(runtime/int) +#ifdef _MSC_VER +static unsigned short bswap_16(unsigned short x) // NOLINT(runtime/int) { - return _byteswap_ushort(x); + return _byteswap_ushort(x); } -static unsigned int bswap_32(unsigned int x) // NOLINT(runtime/int) +static unsigned int bswap_32(unsigned int x) // NOLINT(runtime/int) { - return _byteswap_ulong(x); + return _byteswap_ulong(x); } -static unsigned long long bswap_64(unsigned long long x) // NOLINT(runtime/int) +static unsigned long long bswap_64(unsigned long long x) // NOLINT(runtime/int) { - return _byteswap_uint64(x); + return _byteswap_uint64(x); } -# else -static unsigned short bswap_16(unsigned short x) // NOLINT(runtime/int) +#else +static unsigned short bswap_16(unsigned short x) // NOLINT(runtime/int) { - return (((x >> 8) & 0xff) | ((x & 0xff) << 8)); + return (((x >> 8) & 0xff) | ((x & 0xff) << 8)); } -static unsigned int bswap_32(unsigned int x) // NOLINT(runtime/int) +static unsigned int bswap_32(unsigned int x) // NOLINT(runtime/int) { - return - (((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | - ((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24)); + return (((x & 0xff000000) >> 24) | ((x & 0x00ff0000) >> 8) | ((x & 0x0000ff00) << 8) | + ((x & 0x000000ff) << 24)); } -static unsigned long long bswap_64(unsigned long long x) // NOLINT(runtime/int) +static unsigned long long bswap_64(unsigned long long x) // NOLINT(runtime/int) { - return - (((x & 0xff00000000000000ull) >> 56) - | ((x & 0x00ff000000000000ull) >> 40) - | ((x & 0x0000ff0000000000ull) >> 24) - | ((x & 0x000000ff00000000ull) >> 8) - | ((x & 0x00000000ff000000ull) << 8) - | ((x & 0x0000000000ff0000ull) << 24) - | ((x & 0x000000000000ff00ull) << 40) - | ((x & 0x00000000000000ffull) << 56)); + return (((x & 0xff00000000000000ull) >> 56) | ((x & 0x00ff000000000000ull) >> 40) | + ((x & 0x0000ff0000000000ull) >> 24) | ((x & 0x000000ff00000000ull) >> 8) | + ((x & 0x00000000ff000000ull) << 8) | ((x & 0x0000000000ff0000ull) << 24) | + ((x & 0x000000000000ff00ull) << 40) | ((x & 0x00000000000000ffull) << 56)); } -# endif +#endif #endif #if BYTE_ORDER == LITTLE_ENDIAN -inline unsigned long long htonll(unsigned long long n) // NOLINT(runtime/int) +inline unsigned long long htonll(unsigned long long n) // NOLINT(runtime/int) { - return bswap_64(n); + return bswap_64(n); } #else -inline unsigned long long htonll(unsigned long long n) // NOLINT(runtime/int) +inline unsigned long long htonll(unsigned long long n) // NOLINT(runtime/int) { - return n; + return n; } #endif -inline unsigned long long ntohll(unsigned long long n) // NOLINT(runtime/int) +inline unsigned long long ntohll(unsigned long long n) // NOLINT(runtime/int) { - return htonll(n); + return htonll(n); } // using as a strong namespace -struct ByteOrder -{ -private: - ByteOrder(); - ~ByteOrder(); +struct ByteOrder { + private: + ByteOrder(); + ~ByteOrder(); -public: - static bool IsBigEndian() - { + public: + static bool IsBigEndian() { #if __linux__ - return __BYTE_ORDER == __BIG_ENDIAN; -#elif defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_X64) - // known little architectures - return false; -#else // unknown - int x = 1; - return reinterpret_cast(x) == 0; + return __BYTE_ORDER == __BIG_ENDIAN; +#elif defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_IA64) || \ + defined(_M_X64) + // known little architectures + return false; +#else // unknown + int x = 1; + return reinterpret_cast(x) == 0; #endif - } - - static bool IsLittleEndian() - { - return !IsBigEndian(); - } - - // one byte, NOP - static char Swap(char value) { return value; } - static signed char Swap(signed char value) { return value; } - static unsigned char Swap(unsigned char value) { return value; } - - static short Swap(short value) { return bswap_16(value); } // NOLINT(runtime/int) - static unsigned short Swap(unsigned short value) // NOLINT(runtime/int) - { return bswap_16(value); } - - static int Swap(int value) // NOLINT(runtime/int) - { return bswap_32(value); } - static unsigned int Swap(unsigned int value) // NOLINT(runtime/int) - { return bswap_32(value); } - - static long Swap(long value) // NOLINT(runtime/int) - { - if (sizeof(value) == 4) - return bswap_32(value); - else - return (long) bswap_64(value); // NOLINT(runtime/int) - } - - static unsigned long Swap(unsigned long value) // NOLINT(runtime/int) - { - if (sizeof(value) == 4) - return bswap_32(value); - else - return (unsigned long) bswap_64(value); // NOLINT(runtime/int) - } - - static long long Swap(long long value) // NOLINT(runtime/int) - { - return (long long) bswap_64(value); // NOLINT(runtime/int) - } - static unsigned long long Swap(unsigned long long value) // NOLINT(runtime/int) - { - return bswap_64(value); - } - - template - static void Swap(T* value) - { - *value = Swap(*value); - } - - /////////////////////////////////////////////////////////////////////////// - // float number can only be inplace swap - - static void Swap(float* value) - { - unsigned int *p = reinterpret_cast(value); - Swap(p); - } - - static void Swap(double* value) - { - unsigned long long *p = reinterpret_cast(value); // NOLINT(runtime/int) - Swap(p); - } - - /////////////////////////////////////////////////////////////////////////// - // local byte order to network byte order - - static char LocalToNet(char x) { return x; } - static signed char LocalToNet(signed char x) { return x; } - static unsigned char LocalToNet(unsigned char x) { return x; } - - static short LocalToNet(short x) - { - return htons(x); - } - static unsigned short LocalToNet(unsigned short x) - { - return htons(x); - } - static int LocalToNet(int x) - { - return htonl(x); - } - static unsigned int LocalToNet(unsigned int x) - { - return htonl(x); - } - - static long LocalToNet(long x) - { - return (sizeof(x) == 4) ? - htonl(x) : (long) htonll(static_cast(x)); - } - static unsigned long LocalToNet(unsigned long x) - { - return (sizeof(x) == 4) ? - htonl(x) : (unsigned long) htonll(static_cast(x)); - } - static long long LocalToNet(long long x) - { - return htonll(static_cast(x)); - } - static unsigned long long LocalToNet(unsigned long long x) - { - return htonll(static_cast(x)); - } - - template - static void LocalToNet(T* value) - { - *value = LocalToNet(*value); - } - - static void LocalToNet(float* value) - { - if (IsLittleEndian()) - Swap(value); - } - - static void LocalToNet(double* value) - { - if (IsLittleEndian()) - Swap(value); - } - - /////////////////////////////////////////////////////////////////////////// - // network byte order to local byte order - static char NetToLocal(char x) { return x; } - static signed char NetToLocal(signed char x) { return x; } - static unsigned char NetToLocal(unsigned char x) { return x; } - - static short NetToLocal(short x) - { - return ntohs(x); - } - static unsigned short NetToLocal(unsigned short x) - { - return ntohs(x); - } - static int NetToLocal(int x) - { - return ntohl(x); - } - static unsigned int NetToLocal(unsigned int x) - { - return ntohl(x); - } - - static long NetToLocal(long x) - { - return (sizeof(x) == 4) ? ntohl(x) : (long) ntohll(x); - } - static unsigned long NetToLocal(unsigned long x) - { - return (sizeof(x) == 4) ? ntohl(x) : (unsigned long) ntohll(x); - } - static long long NetToLocal(long long x) - { - return ntohll(x); - } - static unsigned long long NetToLocal(unsigned long long x) - { - return ntohll(x); - } - - template - static void NetToLocal(T* value) - { - *value = NetToLocal(*value); - } - - static void NetToLocal(float* value) - { - if (IsLittleEndian()) - Swap(value); - } - - static void NetToLocal(double* value) - { - if (IsLittleEndian()) - Swap(value); - } + } + + static bool IsLittleEndian() { return !IsBigEndian(); } + + // one byte, NOP + static char Swap(char value) { return value; } + static signed char Swap(signed char value) { return value; } + static unsigned char Swap(unsigned char value) { return value; } + + static short Swap(short value) { return bswap_16(value); } // NOLINT(runtime/int) + static unsigned short Swap(unsigned short value) // NOLINT(runtime/int) + { + return bswap_16(value); + } + + static int Swap(int value) // NOLINT(runtime/int) + { + return bswap_32(value); + } + static unsigned int Swap(unsigned int value) // NOLINT(runtime/int) + { + return bswap_32(value); + } + + static long Swap(long value) // NOLINT(runtime/int) + { + if (sizeof(value) == 4) + return bswap_32(value); + else + return (long)bswap_64(value); // NOLINT(runtime/int) + } + + static unsigned long Swap(unsigned long value) // NOLINT(runtime/int) + { + if (sizeof(value) == 4) + return bswap_32(value); + else + return (unsigned long)bswap_64(value); // NOLINT(runtime/int) + } + + static long long Swap(long long value) // NOLINT(runtime/int) + { + return (long long)bswap_64(value); // NOLINT(runtime/int) + } + static unsigned long long Swap(unsigned long long value) // NOLINT(runtime/int) + { + return bswap_64(value); + } + + template + static void Swap(T* value) { + *value = Swap(*value); + } + + /////////////////////////////////////////////////////////////////////////// + // float number can only be inplace swap + + static void Swap(float* value) { + unsigned int* p = reinterpret_cast(value); + Swap(p); + } + + static void Swap(double* value) { + unsigned long long* p = reinterpret_cast(value); // NOLINT(runtime/int) + Swap(p); + } + + /////////////////////////////////////////////////////////////////////////// + // local byte order to network byte order + + static char LocalToNet(char x) { return x; } + static signed char LocalToNet(signed char x) { return x; } + static unsigned char LocalToNet(unsigned char x) { return x; } + + static short LocalToNet(short x) { return htons(x); } + static unsigned short LocalToNet(unsigned short x) { return htons(x); } + static int LocalToNet(int x) { return htonl(x); } + static unsigned int LocalToNet(unsigned int x) { return htonl(x); } + + static long LocalToNet(long x) { + return (sizeof(x) == 4) ? htonl(x) : (long)htonll(static_cast(x)); + } + static unsigned long LocalToNet(unsigned long x) { + return (sizeof(x) == 4) ? htonl(x) : (unsigned long)htonll(static_cast(x)); + } + static long long LocalToNet(long long x) { return htonll(static_cast(x)); } + static unsigned long long LocalToNet(unsigned long long x) { + return htonll(static_cast(x)); + } + + template + static void LocalToNet(T* value) { + *value = LocalToNet(*value); + } + + static void LocalToNet(float* value) { + if (IsLittleEndian()) Swap(value); + } + + static void LocalToNet(double* value) { + if (IsLittleEndian()) Swap(value); + } + + /////////////////////////////////////////////////////////////////////////// + // network byte order to local byte order + static char NetToLocal(char x) { return x; } + static signed char NetToLocal(signed char x) { return x; } + static unsigned char NetToLocal(unsigned char x) { return x; } + + static short NetToLocal(short x) { return ntohs(x); } + static unsigned short NetToLocal(unsigned short x) { return ntohs(x); } + static int NetToLocal(int x) { return ntohl(x); } + static unsigned int NetToLocal(unsigned int x) { return ntohl(x); } + + static long NetToLocal(long x) { return (sizeof(x) == 4) ? ntohl(x) : (long)ntohll(x); } + static unsigned long NetToLocal(unsigned long x) { + return (sizeof(x) == 4) ? ntohl(x) : (unsigned long)ntohll(x); + } + static long long NetToLocal(long long x) { return ntohll(x); } + static unsigned long long NetToLocal(unsigned long long x) { return ntohll(x); } + + template + static void NetToLocal(T* value) { + *value = NetToLocal(*value); + } + + static void NetToLocal(float* value) { + if (IsLittleEndian()) Swap(value); + } + + static void NetToLocal(double* value) { + if (IsLittleEndian()) Swap(value); + } }; -#endif // TERA_COMMON_BASE_BYTE_ORDER_H_ +#endif // TERA_COMMON_BASE_BYTE_ORDER_H_ diff --git a/src/common/base/preprocess.h b/src/common/base/preprocess.h index 0c120f01d..560ba15ed 100644 --- a/src/common/base/preprocess.h +++ b/src/common/base/preprocess.h @@ -36,10 +36,10 @@ /// #define SOMEMACRO() PP_DISALLOW_IN_HEADER_FILE() /// A compile error will be issued if SOMEMACRO() is used in header files #ifdef __GNUC__ -# define PP_DISALLOW_IN_HEADER_FILE() \ - STATIC_ASSERT(__INCLUDE_LEVEL__ == 0, "This macro can not be used in header files"); +#define PP_DISALLOW_IN_HEADER_FILE() \ + STATIC_ASSERT(__INCLUDE_LEVEL__ == 0, "This macro can not be used in header files"); #else -# define PP_DISALLOW_IN_HEADER_FILE() +#define PP_DISALLOW_IN_HEADER_FILE() #endif -#endif // TERA_COMMON_BASE_PREPROCESS_H_ +#endif // TERA_COMMON_BASE_PREPROCESS_H_ diff --git a/src/common/base/scoped_ptr.h b/src/common/base/scoped_ptr.h index d7c52e9de..171916983 100644 --- a/src/common/base/scoped_ptr.h +++ b/src/common/base/scoped_ptr.h @@ -53,8 +53,10 @@ // namespace common { -template class scoped_ptr; -template class scoped_array; +template +class scoped_ptr; +template +class scoped_array; // A scoped_ptr is like a T*, except that the destructor of scoped_ptr // automatically deletes the pointer it holds (if any). @@ -65,91 +67,89 @@ template class scoped_array; // sizeof(scoped_ptr) == sizeof(C*) template class scoped_ptr { -public: - // The element type - typedef C element_type; - - // Constructor. Defaults to intializing with NULL. - // There is no way to create an uninitialized scoped_ptr. - // The input parameter must be allocated with new. - explicit scoped_ptr(C* p = NULL) : ptr_(p) { } - - // Destructor. If there is a C object, delete it. - // We don't need to test ptr_ == NULL because C++ does that for us. - ~scoped_ptr() { - enum { type_must_be_complete = sizeof(C) }; - delete ptr_; - ptr_ = reinterpret_cast(-1); - } - - // implicit cast to bool - operator bool() const { - return ptr_ != NULL; - } - - bool operator!() const { - return ptr_ == 0; - } - - // Reset. Deletes the current owned object, if any. - // Then takes ownership of a new object, if given. - // this->reset(this->get()) works. - void reset(C* p = NULL) { - if (p != ptr_) { - enum { type_must_be_complete = sizeof(C) }; - delete ptr_; - ptr_ = p; - } - } - - // Accessors to get the owned object. - // operator* and operator-> will assert() if there is no current object. - C& operator*() const { - assert(ptr_ != NULL); - return *ptr_; - } - C* operator->() const { - assert(ptr_ != NULL); - return ptr_; - } - C* get() const { return ptr_; } - - // Comparison operators. - // These return whether two scoped_ptr refer to the same object, not just to - // two different but equal objects. - bool operator==(C* p) const { return ptr_ == p; } - bool operator!=(C* p) const { return ptr_ != p; } - - // Swap two scoped pointers. - void swap(scoped_ptr& p2) { - C* tmp = ptr_; - ptr_ = p2.ptr_; - p2.ptr_ = tmp; - } - - // Release a pointer. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - C* release() { - C* retVal = ptr_; - ptr_ = NULL; - return retVal; - } - -private: - C* ptr_; - - // Forbid comparison of scoped_ptr types. If C2 != C, it totally doesn't - // make sense, and if C2 == C, it still doesn't make sense because you should - // never have the same object owned by two different scoped_ptrs. - template bool operator==(scoped_ptr const& p2) const; - template bool operator!=(scoped_ptr const& p2) const; - - // Disallow evil constructors - scoped_ptr(const scoped_ptr&); - void operator=(const scoped_ptr&); + public: + // The element type + typedef C element_type; + + // Constructor. Defaults to intializing with NULL. + // There is no way to create an uninitialized scoped_ptr. + // The input parameter must be allocated with new. + explicit scoped_ptr(C* p = NULL) : ptr_(p) {} + + // Destructor. If there is a C object, delete it. + // We don't need to test ptr_ == NULL because C++ does that for us. + ~scoped_ptr() { + enum { type_must_be_complete = sizeof(C) }; + delete ptr_; + ptr_ = reinterpret_cast(-1); + } + + // implicit cast to bool + operator bool() const { return ptr_ != NULL; } + + bool operator!() const { return ptr_ == 0; } + + // Reset. Deletes the current owned object, if any. + // Then takes ownership of a new object, if given. + // this->reset(this->get()) works. + void reset(C* p = NULL) { + if (p != ptr_) { + enum { type_must_be_complete = sizeof(C) }; + delete ptr_; + ptr_ = p; + } + } + + // Accessors to get the owned object. + // operator* and operator-> will assert() if there is no current object. + C& operator*() const { + assert(ptr_ != NULL); + return *ptr_; + } + C* operator->() const { + assert(ptr_ != NULL); + return ptr_; + } + C* get() const { return ptr_; } + + // Comparison operators. + // These return whether two scoped_ptr refer to the same object, not just to + // two different but equal objects. + bool operator==(C* p) const { return ptr_ == p; } + bool operator!=(C* p) const { return ptr_ != p; } + + // Swap two scoped pointers. + void swap(scoped_ptr& p2) { + C* tmp = ptr_; + ptr_ = p2.ptr_; + p2.ptr_ = tmp; + } + + // Release a pointer. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + C* release() { + C* retVal = ptr_; + ptr_ = NULL; + return retVal; + } + + private: + C* ptr_; + + // Forbid comparison of scoped_ptr types. If C2 != C, it totally doesn't + // make sense, and if C2 == C, it still doesn't make sense because you should + // never have the same object owned by two different scoped_ptrs. + template + bool operator==(scoped_ptr const& p2) const; + template + bool operator!=(scoped_ptr const& p2) const; + + // Disallow evil constructors + scoped_ptr(const scoped_ptr&); + void operator=(const scoped_ptr&); }; // scoped_array is like scoped_ptr, except that the caller must allocate @@ -161,91 +161,87 @@ class scoped_ptr { // Size: sizeof(scoped_array) == sizeof(C*) template class scoped_array { -public: - // The element type - typedef C element_type; - - // Constructor. Defaults to intializing with NULL. - // There is no way to create an uninitialized scoped_array. - // The input parameter must be allocated with new []. - explicit scoped_array(C* p = NULL) : array_(p) { } - - // Destructor. If there is a C object, delete it. - // We don't need to test ptr_ == NULL because C++ does that for us. - ~scoped_array() { - enum { type_must_be_complete = sizeof(C) }; - delete[] array_; - array_ = reinterpret_cast(-1); - } - - // implicit cast to bool - operator bool() const { - return array_ != NULL; - } - - bool operator!() const { - return array_ == 0; - } - - // Reset. Deletes the current owned object, if any. - // Then takes ownership of a new object, if given. - // this->reset(this->get()) works. - void reset(C* p = NULL) { - if (p != array_) { - enum { type_must_be_complete = sizeof(C) }; - delete[] array_; - array_ = p; - } - } - - // Get one element of the current object. - // Will assert() if there is no current object, or index i is negative. - C& operator[](std::ptrdiff_t i) const { - assert(i >= 0); - assert(array_ != NULL); - return array_[i]; - } - - // Get a pointer to the zeroth element of the current object. - // If there is no current object, return NULL. - C* get() const { - return array_; - } - - // Comparison operators. - // These return whether two scoped_array refer to the same object, not just to - // two different but equal objects. - bool operator==(C* p) const { return array_ == p; } - bool operator!=(C* p) const { return array_ != p; } - - // Swap two scoped arrays. - void swap(scoped_array& p2) { - C* tmp = array_; - array_ = p2.array_; - p2.array_ = tmp; - } - - // Release an array. - // The return value is the current pointer held by this object. - // If this object holds a NULL pointer, the return value is NULL. - // After this operation, this object will hold a NULL pointer, - // and will not own the object any more. - C* release() { - C* retVal = array_; - array_ = NULL; - return retVal; - } - -private: - C* array_; - - // Forbid comparison of different scoped_array types. - template bool operator==(scoped_array const& p2) const; - template bool operator!=(scoped_array const& p2) const; - - // Disallow evil constructors - scoped_array(const scoped_array&); - void operator=(const scoped_array&); + public: + // The element type + typedef C element_type; + + // Constructor. Defaults to intializing with NULL. + // There is no way to create an uninitialized scoped_array. + // The input parameter must be allocated with new []. + explicit scoped_array(C* p = NULL) : array_(p) {} + + // Destructor. If there is a C object, delete it. + // We don't need to test ptr_ == NULL because C++ does that for us. + ~scoped_array() { + enum { type_must_be_complete = sizeof(C) }; + delete[] array_; + array_ = reinterpret_cast(-1); + } + + // implicit cast to bool + operator bool() const { return array_ != NULL; } + + bool operator!() const { return array_ == 0; } + + // Reset. Deletes the current owned object, if any. + // Then takes ownership of a new object, if given. + // this->reset(this->get()) works. + void reset(C* p = NULL) { + if (p != array_) { + enum { type_must_be_complete = sizeof(C) }; + delete[] array_; + array_ = p; + } + } + + // Get one element of the current object. + // Will assert() if there is no current object, or index i is negative. + C& operator[](std::ptrdiff_t i) const { + assert(i >= 0); + assert(array_ != NULL); + return array_[i]; + } + + // Get a pointer to the zeroth element of the current object. + // If there is no current object, return NULL. + C* get() const { return array_; } + + // Comparison operators. + // These return whether two scoped_array refer to the same object, not just to + // two different but equal objects. + bool operator==(C* p) const { return array_ == p; } + bool operator!=(C* p) const { return array_ != p; } + + // Swap two scoped arrays. + void swap(scoped_array& p2) { + C* tmp = array_; + array_ = p2.array_; + p2.array_ = tmp; + } + + // Release an array. + // The return value is the current pointer held by this object. + // If this object holds a NULL pointer, the return value is NULL. + // After this operation, this object will hold a NULL pointer, + // and will not own the object any more. + C* release() { + C* retVal = array_; + array_ = NULL; + return retVal; + } + + private: + C* array_; + + // Forbid comparison of different scoped_array types. + template + bool operator==(scoped_array const& p2) const; + template + bool operator!=(scoped_array const& p2) const; + + // Disallow evil constructors + scoped_array(const scoped_array&); + void operator=(const scoped_array&); }; // } // namespace common diff --git a/src/common/base/static_assert.h b/src/common/base/static_assert.h index ee3abf5d1..47bfc74cc 100644 --- a/src/common/base/static_assert.h +++ b/src/common/base/static_assert.h @@ -13,13 +13,16 @@ // namespace common { -template struct static_assertion_failure; +template +struct static_assertion_failure; -template <> struct static_assertion_failure { - enum { value = 1 }; +template <> +struct static_assertion_failure { + enum { value = 1 }; }; -template struct static_assert_test {}; +template +struct static_assert_test {}; /// 编译期间的静态断言 /// @param e 常量表达式 @@ -27,14 +30,12 @@ template struct static_assert_test {}; /// @code /// STATIC_ASSERT(sizeof(Foo) == 48, "Foo 的大小必须为 48"); /// @endcode -#define STATIC_ASSERT(e, ...) \ -typedef static_assert_test< \ - sizeof(static_assertion_failure<(bool)(e)>)> \ - PP_JOIN(static_assert_failed, __LINE__) +#define STATIC_ASSERT(e, ...) \ + typedef static_assert_test)> PP_JOIN( \ + static_assert_failed, __LINE__) #endif // } // namespace common -#endif // COMMON_BASE_STATIC_ASSERT_H - +#endif // COMMON_BASE_STATIC_ASSERT_H diff --git a/src/common/base/stdint.h b/src/common/base/stdint.h index a8f8b112f..032fe45fa 100644 --- a/src/common/base/stdint.h +++ b/src/common/base/stdint.h @@ -11,7 +11,7 @@ #define __STDC_LIMIT_MACROS #endif #if defined __GNUC__ -#include_next +#include_next "stdint.h" #elif defined _MSC_VER #include "common/base/vc_stdint.h" #else @@ -22,124 +22,124 @@ // without __STDC_LIMIT_MACROS #ifndef __WORDSIZE -# ifndef _WIN32 -# ifdef _WIN64 -# define __WORDSIZE 64 -# else -# define __WORDSIZE 32 -# endif -# endif +#ifndef _WIN32 +#ifdef _WIN64 +#define __WORDSIZE 64 +#else +#define __WORDSIZE 32 +#endif +#endif #endif #ifndef __INT64_C -# if __WORDSIZE == 64 -# define __INT64_C(c) c ## L -# define __UINT64_C(c) c ## UL -# else -# define __INT64_C(c) c ## LL -# define __UINT64_C(c) c ## ULL -# endif +#if __WORDSIZE == 64 +#define __INT64_C(c) c##L +#define __UINT64_C(c) c##UL +#else +#define __INT64_C(c) c##LL +#define __UINT64_C(c) c##ULL +#endif #endif #ifndef INT64_MIN /* Minimum of signed integral types. */ -# define INT8_MIN (-128) -# define INT16_MIN (-32767-1) -# define INT32_MIN (-2147483647-1) -# define INT64_MIN (-__INT64_C(9223372036854775807)-1) +#define INT8_MIN (-128) +#define INT16_MIN (-32767 - 1) +#define INT32_MIN (-2147483647 - 1) +#define INT64_MIN (-__INT64_C(9223372036854775807) - 1) /* Maximum of signed integral types. */ -# define INT8_MAX (127) -# define INT16_MAX (32767) -# define INT32_MAX (2147483647) -# define INT64_MAX (__INT64_C(9223372036854775807)) +#define INT8_MAX (127) +#define INT16_MAX (32767) +#define INT32_MAX (2147483647) +#define INT64_MAX (__INT64_C(9223372036854775807)) /* Maximum of unsigned integral types. */ -# define UINT8_MAX (255) -# define UINT16_MAX (65535) -# define UINT32_MAX (4294967295U) -# define UINT64_MAX (__UINT64_C(18446744073709551615)) +#define UINT8_MAX (255) +#define UINT16_MAX (65535) +#define UINT32_MAX (4294967295U) +#define UINT64_MAX (__UINT64_C(18446744073709551615)) /* Values to test for integral types holding `void *' pointer. */ -# if __WORDSIZE == 64 -# define INTPTR_MIN (-9223372036854775807L-1) -# define INTPTR_MAX (9223372036854775807L) -# define UINTPTR_MAX (18446744073709551615UL) -# else -# define INTPTR_MIN (-2147483647-1) -# define INTPTR_MAX (2147483647) -# define UINTPTR_MAX (4294967295U) -# endif +#if __WORDSIZE == 64 +#define INTPTR_MIN (-9223372036854775807L - 1) +#define INTPTR_MAX (9223372036854775807L) +#define UINTPTR_MAX (18446744073709551615UL) +#else +#define INTPTR_MIN (-2147483647 - 1) +#define INTPTR_MAX (2147483647) +#define UINTPTR_MAX (4294967295U) +#endif /* Minimum for largest signed integral type. */ -# define INTMAX_MIN (-__INT64_C(9223372036854775807)-1) +#define INTMAX_MIN (-__INT64_C(9223372036854775807) - 1) /* Maximum for largest signed integral type. */ -# define INTMAX_MAX (__INT64_C(9223372036854775807)) +#define INTMAX_MAX (__INT64_C(9223372036854775807)) /* Maximum for largest unsigned integral type. */ -# define UINTMAX_MAX (__UINT64_C(18446744073709551615)) +#define UINTMAX_MAX (__UINT64_C(18446744073709551615)) /* Limits of other integer types. */ /* Limits of `ptrdiff_t' type. */ -# if __WORDSIZE == 64 -# define PTRDIFF_MIN (-9223372036854775807L-1) -# define PTRDIFF_MAX (9223372036854775807L) -# else -# define PTRDIFF_MIN (-2147483647-1) -# define PTRDIFF_MAX (2147483647) -# endif +#if __WORDSIZE == 64 +#define PTRDIFF_MIN (-9223372036854775807L - 1) +#define PTRDIFF_MAX (9223372036854775807L) +#else +#define PTRDIFF_MIN (-2147483647 - 1) +#define PTRDIFF_MAX (2147483647) +#endif /* Limit of `size_t' type. */ -# ifndef SIZE_MAX -# if __WORDSIZE == 64 -# define SIZE_MAX (18446744073709551615UL) -# else -# define SIZE_MAX (4294967295U) -# endif -# endif +#ifndef SIZE_MAX +#if __WORDSIZE == 64 +#define SIZE_MAX (18446744073709551615UL) +#else +#define SIZE_MAX (4294967295U) +#endif +#endif /* Limits of `wchar_t'. */ -# ifndef WCHAR_MIN +#ifndef WCHAR_MIN /* These constants might also be defined in . */ -# define WCHAR_MIN __WCHAR_MIN -# define WCHAR_MAX __WCHAR_MAX -# endif +#define WCHAR_MIN __WCHAR_MIN +#define WCHAR_MAX __WCHAR_MAX +#endif /* Limits of `wint_t'. */ -# define WINT_MIN (0u) -# define WINT_MAX (4294967295u) +#define WINT_MIN (0u) +#define WINT_MAX (4294967295u) #endif #ifndef INT8_C /* Signed. */ -# define INT8_C(c) c -# define INT16_C(c) c -# define INT32_C(c) c -# if __WORDSIZE == 64 -# define INT64_C(c) c ## L -# else -# define INT64_C(c) c ## LL -# endif +#define INT8_C(c) c +#define INT16_C(c) c +#define INT32_C(c) c +#if __WORDSIZE == 64 +#define INT64_C(c) c##L +#else +#define INT64_C(c) c##LL +#endif /* Unsigned. */ -# define UINT8_C(c) c ## U -# define UINT16_C(c) c ## U -# define UINT32_C(c) c ## U -# if __WORDSIZE == 64 -# define UINT64_C(c) c ## UL -# else -# define UINT64_C(c) c ## ULL -# endif +#define UINT8_C(c) c##U +#define UINT16_C(c) c##U +#define UINT32_C(c) c##U +#if __WORDSIZE == 64 +#define UINT64_C(c) c##UL +#else +#define UINT64_C(c) c##ULL +#endif /* Maximal type. */ -# if __WORDSIZE == 64 -# define INTMAX_C(c) c ## L -# define UINTMAX_C(c) c ## UL -# else -# define INTMAX_C(c) c ## LL -# define UINTMAX_C(c) c ## ULL -# endif +#if __WORDSIZE == 64 +#define INTMAX_C(c) c##L +#define UINTMAX_C(c) c##UL +#else +#define INTMAX_C(c) c##LL +#define UINTMAX_C(c) c##ULL +#endif #endif -#endif // TERA_COMMON_BASE_STDINT_H_ +#endif // TERA_COMMON_BASE_STDINT_H_ diff --git a/src/common/base/string_ext.cc b/src/common/base/string_ext.cc index 488939ba3..05cd1cf3d 100644 --- a/src/common/base/string_ext.cc +++ b/src/common/base/string_ext.cc @@ -4,103 +4,99 @@ #include "common/base/string_ext.h" -void SplitString(const std::string& full, - const std::string& delim, +void SplitString(const std::string& full, const std::string& delim, std::vector* result) { - result->clear(); - if (full.empty()) { - return; - } + result->clear(); + if (full.empty()) { + return; + } - std::string tmp; - std::string::size_type pos_begin = full.find_first_not_of(delim); - std::string::size_type comma_pos = 0; + std::string tmp; + std::string::size_type pos_begin = full.find_first_not_of(delim); + std::string::size_type comma_pos = 0; - while (pos_begin != std::string::npos) { - comma_pos = full.find(delim, pos_begin); - if (comma_pos != std::string::npos) { - tmp = full.substr(pos_begin, comma_pos - pos_begin); - pos_begin = comma_pos + delim.length(); - } else { - tmp = full.substr(pos_begin); - pos_begin = comma_pos; - } + while (pos_begin != std::string::npos) { + comma_pos = full.find(delim, pos_begin); + if (comma_pos != std::string::npos) { + tmp = full.substr(pos_begin, comma_pos - pos_begin); + pos_begin = comma_pos + delim.length(); + } else { + tmp = full.substr(pos_begin); + pos_begin = comma_pos; + } - if (!tmp.empty()) { - result->push_back(tmp); - tmp.clear(); - } + if (!tmp.empty()) { + result->push_back(tmp); + tmp.clear(); } + } } -void SplitStringEnd(const std::string& full, std::string* begin_part, - std::string* end_part, std::string delim) { - std::string::size_type pos = full.find_last_of(delim); - if (pos != std::string::npos && pos != 0) { - if (end_part) { - *end_part = full.substr(pos + 1); - } - if (begin_part) { - *begin_part = full.substr(0, pos); - } - } else { - if (end_part) { - *end_part = full; - } +void SplitStringEnd(const std::string& full, std::string* begin_part, std::string* end_part, + std::string delim) { + std::string::size_type pos = full.find_last_of(delim); + if (pos != std::string::npos && pos != 0) { + if (end_part) { + *end_part = full.substr(pos + 1); } + if (begin_part) { + *begin_part = full.substr(0, pos); + } + } else { + if (end_part) { + *end_part = full; + } + } } -std::string ReplaceString(const std::string& str, const std::string& src, - const std::string& dest) { - std::string ret; +std::string ReplaceString(const std::string& str, const std::string& src, const std::string& dest) { + std::string ret; - std::string::size_type pos_begin = 0; - std::string::size_type pos = str.find(src); - while (pos != std::string::npos) { - // cout <<"replacexxx:" << pos_begin <<" " << pos <<"\n"; - ret.append(str.data() + pos_begin, pos - pos_begin); - ret += dest; - pos_begin = pos + src.length(); - pos = str.find(src, pos_begin); - } - if (pos_begin < str.length()) { - ret.append(str.begin() + pos_begin, str.end()); - } - return ret; + std::string::size_type pos_begin = 0; + std::string::size_type pos = str.find(src); + while (pos != std::string::npos) { + // cout <<"replacexxx:" << pos_begin <<" " << pos <<"\n"; + ret.append(str.data() + pos_begin, pos - pos_begin); + ret += dest; + pos_begin = pos + src.length(); + pos = str.find(src, pos_begin); + } + if (pos_begin < str.length()) { + ret.append(str.begin() + pos_begin, str.end()); + } + return ret; } std::string TrimString(const std::string& str, const std::string& trim) { - std::string::size_type pos = str.find_first_not_of(trim); - if (pos == std::string::npos) { - return str; - } - std::string::size_type pos2 = str.find_last_not_of(trim); - if (pos2 != std::string::npos) { - return str.substr(pos, pos2 - pos + 1); - } - return str.substr(pos); + std::string::size_type pos = str.find_first_not_of(trim); + if (pos == std::string::npos) { + return str; + } + std::string::size_type pos2 = str.find_last_not_of(trim); + if (pos2 != std::string::npos) { + return str.substr(pos, pos2 - pos + 1); + } + return str.substr(pos); } bool StringEndsWith(const std::string& str, const std::string& sub_str) { - if (str.length() < sub_str.length()) { - return false; - } - if (str.substr(str.length() - sub_str.length()) != sub_str) { - return false; - } - return true; + if (str.length() < sub_str.length()) { + return false; + } + if (str.substr(str.length() - sub_str.length()) != sub_str) { + return false; + } + return true; } bool StringStartWith(const std::string& str, const std::string& sub_str) { - if (str.length() < sub_str.length()) { - return false; - } - if (str.substr(0, sub_str.length()) != sub_str) { - return false; - } - return true; + if (str.length() < sub_str.length()) { + return false; + } + if (str.substr(0, sub_str.length()) != sub_str) { + return false; + } + return true; } -char* StringAsArray(std::string* str) { - return str->empty() ? NULL : &*str->begin(); -} +char* StringAsArray(std::string* str) { return str->empty() ? NULL : &*str->begin(); } diff --git a/src/common/base/string_ext.h b/src/common/base/string_ext.h index db1897a43..1c4b227d1 100644 --- a/src/common/base/string_ext.h +++ b/src/common/base/string_ext.h @@ -8,29 +8,20 @@ #include #include -void SplitString(const std::string& full, - const std::string& delim, +void SplitString(const std::string& full, const std::string& delim, std::vector* result); -void SplitStringEnd(const std::string& full, - std::string* begin_part, - std::string* end_part, +void SplitStringEnd(const std::string& full, std::string* begin_part, std::string* end_part, std::string delim = "."); -std::string ReplaceString(const std::string& str, - const std::string& src, - const std::string& dest); +std::string ReplaceString(const std::string& str, const std::string& src, const std::string& dest); +std::string TrimString(const std::string& str, const std::string& trim = " "); -std::string TrimString(const std::string& str, - const std::string& trim = " "); +bool StringEndsWith(const std::string& str, const std::string& sub_str); -bool StringEndsWith(const std::string& str, - const std::string& sub_str); - -bool StringStartWith(const std::string& str, - const std::string& sub_str); +bool StringStartWith(const std::string& str, const std::string& sub_str); char* StringAsArray(std::string* str); -#endif // TERA_COMMON_STRING_EXT_H_ +#endif // TERA_COMMON_STRING_EXT_H_ diff --git a/src/common/base/string_format.cc b/src/common/base/string_format.cc index 2d29d69d4..add2750bf 100644 --- a/src/common/base/string_format.cc +++ b/src/common/base/string_format.cc @@ -5,67 +5,66 @@ #include "common/base/string_format.h" size_t StringFormatAppendVA(std::string* dst, const char* format, va_list ap) { - // First try with a small fixed size buffer - char space[1024]; - // It's possible for methods that use a va_list to invalidate - // the data in it upon use. The fix is to make a copy - // of the structure before using it and use that copy instead. - va_list backup_ap; + // First try with a small fixed size buffer + char space[1024]; + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, sizeof(space), format, backup_ap); + va_end(backup_ap); + if ((result >= 0) && (result < static_cast(sizeof(space)))) { + dst->append(space, result); + return result; + } + // Repeatedly increase buffer size until it fits + int length = sizeof(space); + while (true) { + if (result < 0) { + // Older behavior: just try doubling the buffer size + length *= 2; + } else { + // We need exactly "result+1" characters + length = result + 1; + } + char* buf = new char[length]; + // Restore the va_list before we use it again va_copy(backup_ap, ap); - int result = vsnprintf(space, sizeof(space), format, backup_ap); + result = vsnprintf(buf, length, format, backup_ap); va_end(backup_ap); - if ((result >= 0) && (result < static_cast(sizeof(space)))) { - dst->append(space, result); - return result; + if ((result >= 0) && (result < length)) { + dst->append(buf, result); + delete[] buf; + break; } - // Repeatedly increase buffer size until it fits - int length = sizeof(space); - while (true) { - if (result < 0) { - // Older behavior: just try doubling the buffer size - length *= 2; - } else { - // We need exactly "result+1" characters - length = result + 1; - } - char* buf = new char[length]; - // Restore the va_list before we use it again - va_copy(backup_ap, ap); - result = vsnprintf(buf, length, format, backup_ap); - va_end(backup_ap); - if ((result >= 0) && (result < length)) { - dst->append(buf, result); - delete[] buf; - break; - } - delete[] buf; - } - return result; + delete[] buf; + } + return result; } size_t StringFormatAppend(std::string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - size_t result = StringFormatAppendVA(dst, format, ap); - va_end(ap); - return result; + va_list ap; + va_start(ap, format); + size_t result = StringFormatAppendVA(dst, format, ap); + va_end(ap); + return result; } size_t StringFormatTo(std::string* dst, const char* format, ...) { - va_list ap; - va_start(ap, format); - dst->clear(); - size_t result = StringFormatAppendVA(dst, format, ap); - va_end(ap); - return result; + va_list ap; + va_start(ap, format); + dst->clear(); + size_t result = StringFormatAppendVA(dst, format, ap); + va_end(ap); + return result; } std::string StringFormat(const char* format, ...) { - va_list ap; - va_start(ap, format); - std::string result; - StringFormatAppendVA(&result, format, ap); - va_end(ap); - return result; + va_list ap; + va_start(ap, format); + std::string result; + StringFormatAppendVA(&result, format, ap); + va_end(ap); + return result; } - diff --git a/src/common/base/string_format.h b/src/common/base/string_format.h index 146f5da34..9f3d67678 100644 --- a/src/common/base/string_format.h +++ b/src/common/base/string_format.h @@ -18,4 +18,4 @@ size_t StringFormatTo(std::string* dst, const char* format, ...); std::string StringFormat(const char* format, ...); -#endif // TERA_COMMON_BASE_STRING_FORMAT_H_ +#endif // TERA_COMMON_BASE_STRING_FORMAT_H_ diff --git a/src/common/base/string_number.cc b/src/common/base/string_number.cc index b530b03bf..19de687c1 100644 --- a/src/common/base/string_number.cc +++ b/src/common/base/string_number.cc @@ -16,783 +16,714 @@ // GLOBAL_NOLINT(runtime/int) -#define ARRAY_SIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / (size_t)(!(sizeof(a) % sizeof(*(a))))) +#define ARRAY_SIZE(a) ((sizeof(a) / sizeof(*(a))) / (size_t)(!(sizeof(a) % sizeof(*(a))))) using namespace std; // namespace common { -namespace -{ +namespace { template -struct StringToNumber -{ - // static T Convert(const char* str, char** endptr, int base); +struct StringToNumber { + // static T Convert(const char* str, char** endptr, int base); }; template <> -struct StringToNumber -{ - static long Convert(const char* str, char** endptr, int base) - { - return strtol(str, endptr, base); - } +struct StringToNumber { + static long Convert(const char* str, char** endptr, int base) { + return strtol(str, endptr, base); + } }; template <> -struct StringToNumber -{ - static unsigned long Convert(const char* str, char** endptr, int base) - { - return strtoul(str, endptr, base); - } +struct StringToNumber { + static unsigned long Convert(const char* str, char** endptr, int base) { + return strtoul(str, endptr, base); + } }; template <> -struct StringToNumber -{ - static long long Convert(const char* str, char** endptr, int base) - { - return strtoll(str, endptr, base); - } +struct StringToNumber { + static long long Convert(const char* str, char** endptr, int base) { + return strtoll(str, endptr, base); + } }; template <> -struct StringToNumber -{ - static unsigned long long Convert(const char* str, char** endptr, int base) - { - return strtoull(str, endptr, base); - } +struct StringToNumber { + static unsigned long long Convert(const char* str, char** endptr, int base) { + return strtoull(str, endptr, base); + } }; template -bool ParseNumberT(const char* str, T* value, char** endptr, int base) -{ -// STATIC_ASSERT(TypeTraits::IsSignedInteger::Value == -// TypeTraits::IsSignedInteger::Value); -// STATIC_ASSERT(sizeof(T) <= sizeof(IntermediaType)); - - int old_errno = errno; - errno = 0; - IntermediaType number = StringToNumber::Convert(str, endptr, base); - if (errno != 0) - return false; - - if (sizeof(T) < sizeof(IntermediaType) && - (number > std::numeric_limits::max() || number < std::numeric_limits::min())) - { - errno = ERANGE; - return false; - } +bool ParseNumberT(const char* str, T* value, char** endptr, int base) { + // STATIC_ASSERT(TypeTraits::IsSignedInteger::Value == + // TypeTraits::IsSignedInteger::Value); + // STATIC_ASSERT(sizeof(T) <= sizeof(IntermediaType)); - if (*endptr == str) - { - errno = EINVAL; - return false; - } + int old_errno = errno; + errno = 0; + IntermediaType number = StringToNumber::Convert(str, endptr, base); + if (errno != 0) return false; - errno = old_errno; - *value = static_cast(number); - return true; -} + if (sizeof(T) < sizeof(IntermediaType) && + (number > std::numeric_limits::max() || number < std::numeric_limits::min())) { + errno = ERANGE; + return false; + } + + if (*endptr == str) { + errno = EINVAL; + return false; + } + errno = old_errno; + *value = static_cast(number); + return true; +} } -bool ParseNumber(const char* str, signed char* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, signed char* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, unsigned char* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, unsigned char* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, short* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, short* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, unsigned short* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, unsigned short* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, int* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, int* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, unsigned int* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, unsigned int* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, long* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, long* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, unsigned long* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, unsigned long* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, long long* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, long long* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -bool ParseNumber(const char* str, unsigned long long* value, char** endptr, int base) -{ - return ParseNumberT(str, value, endptr, base); +bool ParseNumber(const char* str, unsigned long long* value, char** endptr, int base) { + return ParseNumberT(str, value, endptr, base); } -namespace -{ +namespace { -template struct StringToFloat { }; +template +struct StringToFloat {}; template <> -struct StringToFloat -{ - static float Convert(const char* str, char** endptr) - { - return strtof(str, endptr); - } +struct StringToFloat { + static float Convert(const char* str, char** endptr) { return strtof(str, endptr); } }; template <> -struct StringToFloat -{ - static double Convert(const char* str, char** endptr) - { - return strtod(str, endptr); - } +struct StringToFloat { + static double Convert(const char* str, char** endptr) { return strtod(str, endptr); } }; template <> -struct StringToFloat -{ - static long double Convert(const char* str, char** endptr) - { - return strtold(str, endptr); - } +struct StringToFloat { + static long double Convert(const char* str, char** endptr) { return strtold(str, endptr); } }; template -bool ParseFloatNumber(const char* str, T* value, char** endptr) -{ - int old_errno = errno; - errno = 0; - *value = StringToFloat::Convert(str, endptr); - if (errno != 0) - return false; - if (*endptr == str) - errno = EINVAL; - errno = old_errno; - return true; +bool ParseFloatNumber(const char* str, T* value, char** endptr) { + int old_errno = errno; + errno = 0; + *value = StringToFloat::Convert(str, endptr); + if (errno != 0) return false; + if (*endptr == str) errno = EINVAL; + errno = old_errno; + return true; } - } -bool ParseNumber(const char* str, float* value, char** endptr) -{ - return ParseFloatNumber(str, value, endptr); +bool ParseNumber(const char* str, float* value, char** endptr) { + return ParseFloatNumber(str, value, endptr); } -bool ParseNumber(const char* str, double* value, char** endptr) -{ - return ParseFloatNumber(str, value, endptr); +bool ParseNumber(const char* str, double* value, char** endptr) { + return ParseFloatNumber(str, value, endptr); } -bool ParseNumber(const char* str, long double* value, char** endptr) -{ - return ParseFloatNumber(str, value, endptr); +bool ParseNumber(const char* str, long double* value, char** endptr) { + return ParseFloatNumber(str, value, endptr); } // --------------------------------------------------------- // unsigned int to hex buffer or string. // --------------------------------------------------------- -static char *UIntToHexBufferInternal(uint64_t value, char* buffer, int num_byte) -{ - static const char hexdigits[] = "0123456789abcdef"; - int digit_byte = 2 * num_byte; - for (int i = digit_byte - 1; i >= 0; i--) - { - buffer[i] = hexdigits[uint32_t(value) & 0xf]; - value >>= 4; - } - return buffer + digit_byte; +static char* UIntToHexBufferInternal(uint64_t value, char* buffer, int num_byte) { + static const char hexdigits[] = "0123456789abcdef"; + int digit_byte = 2 * num_byte; + for (int i = digit_byte - 1; i >= 0; i--) { + buffer[i] = hexdigits[uint32_t(value) & 0xf]; + value >>= 4; + } + return buffer + digit_byte; } -char* WriteHexUInt16ToBuffer(uint16_t value, char* buffer) -{ - return UIntToHexBufferInternal(value, buffer, sizeof(value)); +char* WriteHexUInt16ToBuffer(uint16_t value, char* buffer) { + return UIntToHexBufferInternal(value, buffer, sizeof(value)); } -char* WriteHexUInt32ToBuffer(uint32_t value, char* buffer) -{ - return UIntToHexBufferInternal(value, buffer, sizeof(value)); +char* WriteHexUInt32ToBuffer(uint32_t value, char* buffer) { + return UIntToHexBufferInternal(value, buffer, sizeof(value)); } -char* WriteHexUInt64ToBuffer(uint64_t value, char* buffer) -{ - return UIntToHexBufferInternal(value, buffer, sizeof(value)); +char* WriteHexUInt64ToBuffer(uint64_t value, char* buffer) { + return UIntToHexBufferInternal(value, buffer, sizeof(value)); } -char* UInt16ToHexString(uint16_t value, char* buffer) -{ - *WriteHexUInt16ToBuffer(value, buffer) = '\0'; - return buffer; +char* UInt16ToHexString(uint16_t value, char* buffer) { + *WriteHexUInt16ToBuffer(value, buffer) = '\0'; + return buffer; } -char* UInt32ToHexString(uint32_t value, char* buffer) -{ - *WriteHexUInt32ToBuffer(value, buffer) = '\0'; - return buffer; +char* UInt32ToHexString(uint32_t value, char* buffer) { + *WriteHexUInt32ToBuffer(value, buffer) = '\0'; + return buffer; } -char* UInt64ToHexString(uint64_t value, char* buffer) -{ - *WriteHexUInt64ToBuffer(value, buffer) = '\0'; - return buffer; +char* UInt64ToHexString(uint64_t value, char* buffer) { + *WriteHexUInt64ToBuffer(value, buffer) = '\0'; + return buffer; } -string UInt16ToHexString(uint16_t value) -{ - char buffer[2*sizeof(value) + 1]; - return std::string(buffer, WriteHexUInt16ToBuffer(value, buffer)); +string UInt16ToHexString(uint16_t value) { + char buffer[2 * sizeof(value) + 1]; + return std::string(buffer, WriteHexUInt16ToBuffer(value, buffer)); } -string UInt32ToHexString(uint32_t value) -{ - char buffer[2*sizeof(value) + 1]; - return std::string(buffer, WriteHexUInt32ToBuffer(value, buffer)); +string UInt32ToHexString(uint32_t value) { + char buffer[2 * sizeof(value) + 1]; + return std::string(buffer, WriteHexUInt32ToBuffer(value, buffer)); } -string UInt64ToHexString(uint64_t value) -{ - char buffer[2*sizeof(value) + 1]; - return std::string(buffer, WriteHexUInt64ToBuffer(value, buffer)); +string UInt64ToHexString(uint64_t value) { + char buffer[2 * sizeof(value) + 1]; + return std::string(buffer, WriteHexUInt64ToBuffer(value, buffer)); } // ----------------------------------------------------------------- // Double to string or buffer. // Make sure buffer size >= kMaxDoubleStringSize // ----------------------------------------------------------------- -char* WriteDoubleToBuffer(double value, char* buffer) -{ - // DBL_DIG is 15 on almost all platforms. - // If it's too big, the buffer will overflow -// STATIC_ASSERT(DBL_DIG < 20, "DBL_DIG is too big"); +char* WriteDoubleToBuffer(double value, char* buffer) { + // DBL_DIG is 15 on almost all platforms. + // If it's too big, the buffer will overflow + // STATIC_ASSERT(DBL_DIG < 20, "DBL_DIG is too big"); - if (value >= numeric_limits::infinity()) - { - strcpy(buffer, "inf"); // NOLINT - return buffer + 3; - } - else if (value <= -numeric_limits::infinity()) - { - strcpy(buffer, "-inf"); // NOLINT - return buffer + 4; - } - else if (IsNaN(value)) - { - strcpy(buffer, "nan"); // NOLINT - return buffer + 3; - } + if (value >= numeric_limits::infinity()) { + strcpy(buffer, "inf"); // NOLINT + return buffer + 3; + } else if (value <= -numeric_limits::infinity()) { + strcpy(buffer, "-inf"); // NOLINT + return buffer + 4; + } else if (IsNaN(value)) { + strcpy(buffer, "nan"); // NOLINT + return buffer + 3; + } - return buffer + snprintf(buffer, kMaxDoubleStringSize, "%.*g", DBL_DIG, value); + return buffer + snprintf(buffer, kMaxDoubleStringSize, "%.*g", DBL_DIG, value); } // ------------------------------------------------------------- // Float to string or buffer. // Makesure buffer size >= kMaxFloatStringSize // ------------------------------------------------------------- -char* WriteFloatToBuffer(float value, char* buffer) -{ - // FLT_DIG is 6 on almost all platforms. - // If it's too big, the buffer will overflow -// STATIC_ASSERT(FLT_DIG < 10, "FLT_DIG is too big"); - if (value >= numeric_limits::infinity()) - { - strcpy(buffer, "inf"); // NOLINT - return buffer + 3; - } - else if (value <= -numeric_limits::infinity()) - { - strcpy(buffer, "-inf"); // NOLINT - return buffer + 4; - } - else if (IsNaN(value)) - { - strcpy(buffer, "nan"); // NOLINT - return buffer + 3; - } +char* WriteFloatToBuffer(float value, char* buffer) { + // FLT_DIG is 6 on almost all platforms. + // If it's too big, the buffer will overflow + // STATIC_ASSERT(FLT_DIG < 10, "FLT_DIG is too big"); + if (value >= numeric_limits::infinity()) { + strcpy(buffer, "inf"); // NOLINT + return buffer + 3; + } else if (value <= -numeric_limits::infinity()) { + strcpy(buffer, "-inf"); // NOLINT + return buffer + 4; + } else if (IsNaN(value)) { + strcpy(buffer, "nan"); // NOLINT + return buffer + 3; + } - return buffer + snprintf(buffer, kMaxFloatStringSize, "%.*g", FLT_DIG, value); + return buffer + snprintf(buffer, kMaxFloatStringSize, "%.*g", FLT_DIG, value); } -char* DoubleToString(double n, char* buffer) -{ - WriteDoubleToBuffer(n, buffer); - return buffer; +char* DoubleToString(double n, char* buffer) { + WriteDoubleToBuffer(n, buffer); + return buffer; } -char* FloatToString(float n, char* buffer) -{ - WriteFloatToBuffer(n, buffer); - return buffer; +char* FloatToString(float n, char* buffer) { + WriteFloatToBuffer(n, buffer); + return buffer; } -string DoubleToString(double value) -{ - char buffer[kMaxDoubleStringSize]; - return std::string(buffer, WriteDoubleToBuffer(value, buffer)); +string DoubleToString(double value) { + char buffer[kMaxDoubleStringSize]; + return std::string(buffer, WriteDoubleToBuffer(value, buffer)); } -string FloatToString(float value) -{ - char buffer[kMaxFloatStringSize]; - return std::string(buffer, WriteFloatToBuffer(value, buffer)); +string FloatToString(float value) { + char buffer[kMaxFloatStringSize]; + return std::string(buffer, WriteFloatToBuffer(value, buffer)); } // ------------------------------------------------------ // Int to string or buffer. // The following data and functions are for internal use. // ------------------------------------------------------ -static const char two_ASCII_digits[100][2] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, - {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, - {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, - {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, - {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, - {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, - {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, - {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, - {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, - {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, - {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, - {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, - {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, - {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, - {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, - {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, - {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'} -}; - -template -static OutputIterator OutputUInt32AsString(uint32_t u, OutputIterator output) -{ - int digits; - const char *ASCII_digits = NULL; - if (u >= 1000000000) // >= 1,000,000,000 - { - digits = u / 100000000; // 100,000,000 - ASCII_digits = two_ASCII_digits[digits]; - *output++ = ASCII_digits[0]; - *output++ = ASCII_digits[1]; -sublt100_000_000: - u -= digits * 100000000; // 100,000,000 -lt100_000_000: - digits = u / 1000000; // 1,000,000 - ASCII_digits = two_ASCII_digits[digits]; - *output++ = ASCII_digits[0]; - *output++ = ASCII_digits[1]; -sublt1_000_000: - u -= digits * 1000000; // 1,000,000 -lt1_000_000: - digits = u / 10000; // 10,000 - ASCII_digits = two_ASCII_digits[digits]; - *output++ = ASCII_digits[0]; - *output++ = ASCII_digits[1]; -sublt10_000: - u -= digits * 10000; // 10,000 -lt10_000: - digits = u / 100; - ASCII_digits = two_ASCII_digits[digits]; - *output++ = ASCII_digits[0]; - *output++ = ASCII_digits[1]; -sublt100: - u -= digits * 100; -lt100: - digits = u; - ASCII_digits = two_ASCII_digits[digits]; - *output++ = ASCII_digits[0]; - *output++ = ASCII_digits[1]; -done: - return output; - } - - if (u < 100) - { - digits = u; - if (u >= 10) goto lt100; - *output++ = '0' + digits; - goto done; - } - if (u < 10000) // 10,000 - { - if (u >= 1000) goto lt10_000; - digits = u / 100; - *output++ = '0' + digits; - goto sublt100; - } - if (u < 1000000) // 1,000,000 - { - if (u >= 100000) goto lt1_000_000; - digits = u / 10000; // 10,000 - *output++ = '0' + digits; - goto sublt10_000; - } - if (u < 100000000) // 100,000,000 - { - if (u >= 10000000) goto lt100_000_000; - digits = u / 1000000; // 1,000,000 - *output++ = '0' + digits; - goto sublt1_000_000; - } - // u < 1,000,000,000 - digits = u / 100000000; // 100,000,000 - *output++ = '0' + digits; - goto sublt100_000_000; -} - -template -OutputIterator OutputInt32AsString(int32_t i, OutputIterator output) -{ - uint32_t u = i; - if (i < 0) - { - *output++ = '-'; - u = -i; - } - return OutputUInt32AsString(u, output); -} +static const char two_ASCII_digits[100][2] = {{'0', '0'}, + {'0', '1'}, + {'0', '2'}, + {'0', '3'}, + {'0', '4'}, + {'0', '5'}, + {'0', '6'}, + {'0', '7'}, + {'0', '8'}, + {'0', '9'}, + {'1', '0'}, + {'1', '1'}, + {'1', '2'}, + {'1', '3'}, + {'1', '4'}, + {'1', '5'}, + {'1', '6'}, + {'1', '7'}, + {'1', '8'}, + {'1', '9'}, + {'2', '0'}, + {'2', '1'}, + {'2', '2'}, + {'2', '3'}, + {'2', '4'}, + {'2', '5'}, + {'2', '6'}, + {'2', '7'}, + {'2', '8'}, + {'2', '9'}, + {'3', '0'}, + {'3', '1'}, + {'3', '2'}, + {'3', '3'}, + {'3', '4'}, + {'3', '5'}, + {'3', '6'}, + {'3', '7'}, + {'3', '8'}, + {'3', '9'}, + {'4', '0'}, + {'4', '1'}, + {'4', '2'}, + {'4', '3'}, + {'4', '4'}, + {'4', '5'}, + {'4', '6'}, + {'4', '7'}, + {'4', '8'}, + {'4', '9'}, + {'5', '0'}, + {'5', '1'}, + {'5', '2'}, + {'5', '3'}, + {'5', '4'}, + {'5', '5'}, + {'5', '6'}, + {'5', '7'}, + {'5', '8'}, + {'5', '9'}, + {'6', '0'}, + {'6', '1'}, + {'6', '2'}, + {'6', '3'}, + {'6', '4'}, + {'6', '5'}, + {'6', '6'}, + {'6', '7'}, + {'6', '8'}, + {'6', '9'}, + {'7', '0'}, + {'7', '1'}, + {'7', '2'}, + {'7', '3'}, + {'7', '4'}, + {'7', '5'}, + {'7', '6'}, + {'7', '7'}, + {'7', '8'}, + {'7', '9'}, + {'8', '0'}, + {'8', '1'}, + {'8', '2'}, + {'8', '3'}, + {'8', '4'}, + {'8', '5'}, + {'8', '6'}, + {'8', '7'}, + {'8', '8'}, + {'8', '9'}, + {'9', '0'}, + {'9', '1'}, + {'9', '2'}, + {'9', '3'}, + {'9', '4'}, + {'9', '5'}, + {'9', '6'}, + {'9', '7'}, + {'9', '8'}, + {'9', '9'}}; template -OutputIterator OutputUInt64AsString(uint64_t u64, OutputIterator output) -{ - int digits; - const char *ASCII_digits = NULL; - - uint32_t u = static_cast(u64); - if (u == u64) return OutputUInt32AsString(u, output); - - uint64_t top_11_digits = u64 / 1000000000; - output = OutputUInt64AsString(top_11_digits, output); - u = static_cast(u64 - (top_11_digits * 1000000000)); - - digits = u / 10000000; // 10,000,000 +static OutputIterator OutputUInt32AsString(uint32_t u, OutputIterator output) { + int digits; + const char* ASCII_digits = NULL; + if (u >= 1000000000) // >= 1,000,000,000 + { + digits = u / 100000000; // 100,000,000 ASCII_digits = two_ASCII_digits[digits]; *output++ = ASCII_digits[0]; *output++ = ASCII_digits[1]; - u -= digits * 10000000; // 10,000,000 - digits = u / 100000; // 100,000 + sublt100_000_000: + u -= digits * 100000000; // 100,000,000 + lt100_000_000: + digits = u / 1000000; // 1,000,000 ASCII_digits = two_ASCII_digits[digits]; *output++ = ASCII_digits[0]; *output++ = ASCII_digits[1]; - u -= digits * 100000; // 100,000 - digits = u / 1000; // 1,000 + sublt1_000_000: + u -= digits * 1000000; // 1,000,000 + lt1_000_000: + digits = u / 10000; // 10,000 ASCII_digits = two_ASCII_digits[digits]; *output++ = ASCII_digits[0]; *output++ = ASCII_digits[1]; - u -= digits * 1000; // 1,000 - digits = u / 10; + sublt10_000: + u -= digits * 10000; // 10,000 + lt10_000: + digits = u / 100; ASCII_digits = two_ASCII_digits[digits]; *output++ = ASCII_digits[0]; *output++ = ASCII_digits[1]; - u -= digits * 10; + sublt100: + u -= digits * 100; + lt100: digits = u; - *output++ = '0' + digits; + ASCII_digits = two_ASCII_digits[digits]; + *output++ = ASCII_digits[0]; + *output++ = ASCII_digits[1]; + done: return output; + } + + if (u < 100) { + digits = u; + if (u >= 10) goto lt100; + *output++ = '0' + digits; + goto done; + } + if (u < 10000) // 10,000 + { + if (u >= 1000) goto lt10_000; + digits = u / 100; + *output++ = '0' + digits; + goto sublt100; + } + if (u < 1000000) // 1,000,000 + { + if (u >= 100000) goto lt1_000_000; + digits = u / 10000; // 10,000 + *output++ = '0' + digits; + goto sublt10_000; + } + if (u < 100000000) // 100,000,000 + { + if (u >= 10000000) goto lt100_000_000; + digits = u / 1000000; // 1,000,000 + *output++ = '0' + digits; + goto sublt1_000_000; + } + // u < 1,000,000,000 + digits = u / 100000000; // 100,000,000 + *output++ = '0' + digits; + goto sublt100_000_000; } template -OutputIterator OutputInt64AsString(int64_t i, OutputIterator output) -{ - uint64_t u = i; - if (i < 0) - { - *output++ = '-'; - u = -i; - } - return OutputUInt64AsString(u, output); +OutputIterator OutputInt32AsString(int32_t i, OutputIterator output) { + uint32_t u = i; + if (i < 0) { + *output++ = '-'; + u = -i; + } + return OutputUInt32AsString(u, output); +} + +template +OutputIterator OutputUInt64AsString(uint64_t u64, OutputIterator output) { + int digits; + const char* ASCII_digits = NULL; + + uint32_t u = static_cast(u64); + if (u == u64) return OutputUInt32AsString(u, output); + + uint64_t top_11_digits = u64 / 1000000000; + output = OutputUInt64AsString(top_11_digits, output); + u = static_cast(u64 - (top_11_digits * 1000000000)); + + digits = u / 10000000; // 10,000,000 + ASCII_digits = two_ASCII_digits[digits]; + *output++ = ASCII_digits[0]; + *output++ = ASCII_digits[1]; + u -= digits * 10000000; // 10,000,000 + digits = u / 100000; // 100,000 + ASCII_digits = two_ASCII_digits[digits]; + *output++ = ASCII_digits[0]; + *output++ = ASCII_digits[1]; + u -= digits * 100000; // 100,000 + digits = u / 1000; // 1,000 + ASCII_digits = two_ASCII_digits[digits]; + *output++ = ASCII_digits[0]; + *output++ = ASCII_digits[1]; + u -= digits * 1000; // 1,000 + digits = u / 10; + ASCII_digits = two_ASCII_digits[digits]; + *output++ = ASCII_digits[0]; + *output++ = ASCII_digits[1]; + u -= digits * 10; + digits = u; + *output++ = '0' + digits; + return output; +} + +template +OutputIterator OutputInt64AsString(int64_t i, OutputIterator output) { + uint64_t u = i; + if (i < 0) { + *output++ = '-'; + u = -i; + } + return OutputUInt64AsString(u, output); } /////////////////////////////////////////////////////////////////////////// // generic interface template -OutputIterator OutputIntegerAsString(int n, OutputIterator output) -{ - return OutputInt32AsString(n, output); +OutputIterator OutputIntegerAsString(int n, OutputIterator output) { + return OutputInt32AsString(n, output); } template -OutputIterator OutputIntegerAsString(unsigned int n, OutputIterator output) -{ - return OutputUInt32AsString(n, output); +OutputIterator OutputIntegerAsString(unsigned int n, OutputIterator output) { + return OutputUInt32AsString(n, output); } template -OutputIterator OutputIntegerAsString(long n, OutputIterator output) -{ - return sizeof(n) == 4 ? - OutputInt32AsString(static_cast(n), output): - OutputInt64AsString(static_cast(n), output); +OutputIterator OutputIntegerAsString(long n, OutputIterator output) { + return sizeof(n) == 4 ? OutputInt32AsString(static_cast(n), output) + : OutputInt64AsString(static_cast(n), output); } template -OutputIterator OutputIntegerAsString(unsigned long n, OutputIterator output) -{ - return sizeof(n) == 4 ? - OutputUInt32AsString(static_cast(n), output): - OutputUInt64AsString(static_cast(n), output); +OutputIterator OutputIntegerAsString(unsigned long n, OutputIterator output) { + return sizeof(n) == 4 ? OutputUInt32AsString(static_cast(n), output) + : OutputUInt64AsString(static_cast(n), output); } template -OutputIterator OutputIntegerAsString(long long n, OutputIterator output) -{ - return sizeof(n) == 4 ? - OutputInt32AsString(static_cast(n), output): - OutputInt64AsString(static_cast(n), output); +OutputIterator OutputIntegerAsString(long long n, OutputIterator output) { + return sizeof(n) == 4 ? OutputInt32AsString(static_cast(n), output) + : OutputInt64AsString(static_cast(n), output); } template -OutputIterator OutputIntegerAsString(unsigned long long n, OutputIterator output) -{ - return sizeof(n) == 4 ? - OutputUInt32AsString(static_cast(n), output): - OutputUInt64AsString(static_cast(n), output); +OutputIterator OutputIntegerAsString(unsigned long long n, OutputIterator output) { + return sizeof(n) == 4 ? OutputUInt32AsString(static_cast(n), output) + : OutputUInt64AsString(static_cast(n), output); } template -class CountOutputIterator -{ -public: - CountOutputIterator() : m_count(0) {} - CountOutputIterator& operator++() - { - ++m_count; - return *this; - } - CountOutputIterator operator++(int) - { - CountOutputIterator org(*this); - ++*this; - return org; - } - CountOutputIterator& operator*() - { - return *this; - } - CountOutputIterator& operator=(T value) - { - return *this; - } - size_t Count() const - { - return m_count; - } -private: - size_t m_count; +class CountOutputIterator { + public: + CountOutputIterator() : m_count(0) {} + CountOutputIterator& operator++() { + ++m_count; + return *this; + } + CountOutputIterator operator++(int) { + CountOutputIterator org(*this); + ++*this; + return org; + } + CountOutputIterator& operator*() { return *this; } + CountOutputIterator& operator=(T value) { return *this; } + size_t Count() const { return m_count; } + + private: + size_t m_count; }; -size_t IntegerStringLength(int n) -{ - return OutputIntegerAsString(n, CountOutputIterator()).Count(); +size_t IntegerStringLength(int n) { + return OutputIntegerAsString(n, CountOutputIterator()).Count(); } /// output n to buffer as string /// @return end position /// @note buffer must be large enougn, and no ending '\0' append -char* WriteUInt32ToBuffer(uint32_t n, char* buffer) -{ - return OutputUInt32AsString(n, buffer); -} +char* WriteUInt32ToBuffer(uint32_t n, char* buffer) { return OutputUInt32AsString(n, buffer); } /// output n to buffer as string /// @return end position /// @note buffer must be large enougn, and no ending '\0' append -char* WriteInt32ToBuffer(int32_t n, char* buffer) -{ - return OutputInt32AsString(n, buffer); -} +char* WriteInt32ToBuffer(int32_t n, char* buffer) { return OutputInt32AsString(n, buffer); } -char* WriteUInt64ToBuffer(uint64_t n, char* buffer) -{ - return OutputUInt64AsString(n, buffer); -} +char* WriteUInt64ToBuffer(uint64_t n, char* buffer) { return OutputUInt64AsString(n, buffer); } -char* WriteInt64ToBuffer(int64_t n, char* buffer) -{ - return OutputInt64AsString(n, buffer); -} +char* WriteInt64ToBuffer(int64_t n, char* buffer) { return OutputInt64AsString(n, buffer); } -char* WriteIntegerToBuffer(int n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); -} +char* WriteIntegerToBuffer(int n, char* buffer) { return OutputIntegerAsString(n, buffer); } -char* WriteIntegerToBuffer(unsigned int n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); +char* WriteIntegerToBuffer(unsigned int n, char* buffer) { + return OutputIntegerAsString(n, buffer); } -char* WriteIntegerToBuffer(long n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); -} +char* WriteIntegerToBuffer(long n, char* buffer) { return OutputIntegerAsString(n, buffer); } -char* WriteIntegerToBuffer(unsigned long n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); +char* WriteIntegerToBuffer(unsigned long n, char* buffer) { + return OutputIntegerAsString(n, buffer); } -char* WriteIntegerToBuffer(long long n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); -} +char* WriteIntegerToBuffer(long long n, char* buffer) { return OutputIntegerAsString(n, buffer); } -char* WriteIntegerToBuffer(unsigned long long n, char* buffer) -{ - return OutputIntegerAsString(n, buffer); +char* WriteIntegerToBuffer(unsigned long long n, char* buffer) { + return OutputIntegerAsString(n, buffer); } -void AppendIntegerToString(int n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(int n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } -void AppendIntegerToString(unsigned int n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(unsigned int n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } -void AppendIntegerToString(long n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(long n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } -void AppendIntegerToString(unsigned long n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(unsigned long n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } -void AppendIntegerToString(long long n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(long long n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } -void AppendIntegerToString(unsigned long long n, std::string* str) -{ - OutputIntegerAsString(n, std::back_inserter(*str)); +void AppendIntegerToString(unsigned long long n, std::string* str) { + OutputIntegerAsString(n, std::back_inserter(*str)); } /////////////////////////////////////////////////////////////////////////// // output number to buffer as string, with ending '\0' -char* UInt32ToString(uint32_t u, char* buffer) -{ - *OutputUInt32AsString(u, buffer) = '\0'; - return buffer; +char* UInt32ToString(uint32_t u, char* buffer) { + *OutputUInt32AsString(u, buffer) = '\0'; + return buffer; } -char* Int32ToString(int32_t i, char* buffer) -{ - *OutputInt32AsString(i, buffer) = '\0'; - return buffer; +char* Int32ToString(int32_t i, char* buffer) { + *OutputInt32AsString(i, buffer) = '\0'; + return buffer; } -char* UInt64ToString(uint64_t u64, char* buffer) -{ - *OutputUInt64AsString(u64, buffer) = '\0'; - return buffer; +char* UInt64ToString(uint64_t u64, char* buffer) { + *OutputUInt64AsString(u64, buffer) = '\0'; + return buffer; } -char* Int64ToString(int64_t i, char* buffer) -{ - *OutputInt64AsString(i, buffer) = '\0'; - return buffer; +char* Int64ToString(int64_t i, char* buffer) { + *OutputInt64AsString(i, buffer) = '\0'; + return buffer; } // ----------------------------------------------------- // interface for int to string or buffer // Make sure the buffer is big enough // ----------------------------------------------------- -char* IntegerToString(int i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(int i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -char* IntegerToString(unsigned int i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(unsigned int i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -char* IntegerToString(long i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(long i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -char* IntegerToString(unsigned long i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(unsigned long i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -char* IntegerToString(long long i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(long long i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -char* IntegerToString(unsigned long long i, char* buffer) -{ - *OutputIntegerAsString(i, buffer) = '\0'; - return buffer; +char* IntegerToString(unsigned long long i, char* buffer) { + *OutputIntegerAsString(i, buffer) = '\0'; + return buffer; } -string IntegerToString(int i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(int i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } -string IntegerToString(long i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(long i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } -string IntegerToString(long long i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(long long i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } -string IntegerToString(unsigned int i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(unsigned int i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } -string IntegerToString(unsigned long i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(unsigned long i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } -string IntegerToString(unsigned long long i) -{ - char buffer[kMaxIntegerStringSize]; - return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); +string IntegerToString(unsigned long long i) { + char buffer[kMaxIntegerStringSize]; + return std::string(buffer, OutputIntegerAsString(i, buffer) - buffer); } ////////////////////////////////////////////////////////////////////////////// @@ -801,118 +732,89 @@ string IntegerToString(unsigned long long i) namespace { template -void GetMantissaAndShift( - double number, - int min_shift, - int max_shift, - double* mantissa, - int* shift - ) -{ - double n = number; - *shift = 0; +void GetMantissaAndShift(double number, int min_shift, int max_shift, double* mantissa, + int* shift) { + double n = number; + *shift = 0; - if (isnan(n) || isinf(n)) - { - *mantissa = n; - return; - } + if (isnan(n) || isinf(n)) { + *mantissa = n; + return; + } - if (n >= 1) - { - while (n >= Base) - { - n /= Base; - ++*shift; - } + if (n >= 1) { + while (n >= Base) { + n /= Base; + ++*shift; } - else + } else { + if (n > 0 || n < 0) // bypass float-equal warning { - if (n > 0 || n < 0) // bypass float-equal warning - { - while (n < 1) - { - n *= Base; - --*shift; - } - } + while (n < 1) { + n *= Base; + --*shift; + } } + } - if (*shift < min_shift) - { - n = number; - *shift = 0; - } - else if (*shift > max_shift) - { - n = number; - *shift = 0; - } + if (*shift < min_shift) { + n = number; + *shift = 0; + } else if (*shift > max_shift) { + n = number; + *shift = 0; + } - *mantissa = n; + *mantissa = n; } template -std::string NumberToHumanReadableString( - T number, - const char* const*prefixes, - const char* unit, - int min_shift, - int max_shift - ) -{ - bool neg = number < 0; - double n = fabs(number); - - int shift; - GetMantissaAndShift(n, min_shift, max_shift, &n, &shift); - - const char* sep = ""; - if (unit[0] == ' ') - { - ++unit; - // ignore unit if it is " " and prefix is unnecessary - if (shift != 0 || unit[0] != '\0') - sep = " "; - } +std::string NumberToHumanReadableString(T number, const char* const* prefixes, const char* unit, + int min_shift, int max_shift) { + bool neg = number < 0; + double n = fabs(number); + + int shift; + GetMantissaAndShift(n, min_shift, max_shift, &n, &shift); + + const char* sep = ""; + if (unit[0] == ' ') { + ++unit; + // ignore unit if it is " " and prefix is unnecessary + if (shift != 0 || unit[0] != '\0') sep = " "; + } - char buffer[16]; - int length = snprintf(buffer, sizeof(buffer), "%s%.*g%s%s", // NOLINT(runtime/printf) - neg ? "-": "", n < 1000 ? 3 : 4, n, sep, - prefixes[shift]); - std::string result(buffer, length); - result += unit; - return result; + char buffer[16]; + int length = snprintf(buffer, sizeof(buffer), "%s%.*g%s%s", // NOLINT(runtime/printf) + neg ? "-" : "", n < 1000 ? 3 : 4, n, sep, prefixes[shift]); + std::string result(buffer, length); + result += unit; + return result; } -} // anonymous namespace +} // anonymous namespace -std::string FormatMeasure(double n, const char* unit) -{ - // see http://zh.wikipedia.org/wiki/%E5%9B%BD%E9%99%85%E5%8D%95%E4%BD%8D%E5%88%B6%E8%AF%8D%E5%A4%B4 - static const char* const base_prefixes[] = { - "y", "z", "a", "f", "p", "n", "u", "m", // negative exponential - "", "k", "M", "G", "T", "P", "E", "Z", "Y" - }; - static const int negative_prefixes_size = 8; - static const int prefixes_size = ARRAY_SIZE(base_prefixes); +std::string FormatMeasure(double n, const char* unit) { + // see + // http://zh.wikipedia.org/wiki/%E5%9B%BD%E9%99%85%E5%8D%95%E4%BD%8D%E5%88%B6%E8%AF%8D%E5%A4%B4 + static const char* const base_prefixes[] = { + "y", "z", "a", "f", "p", "n", "u", "m", // negative exponential + "", "k", "M", "G", "T", "P", "E", "Z", "Y"}; + static const int negative_prefixes_size = 8; + static const int prefixes_size = ARRAY_SIZE(base_prefixes); - const char* const* prefixes = base_prefixes + negative_prefixes_size; + const char* const* prefixes = base_prefixes + negative_prefixes_size; - return NumberToHumanReadableString( - n, prefixes, unit, -negative_prefixes_size, - prefixes_size - negative_prefixes_size - 1); + return NumberToHumanReadableString(n, prefixes, unit, -negative_prefixes_size, + prefixes_size - negative_prefixes_size - 1); } -std::string FormatBinaryMeasure(int64_t n, const char* unit) -{ - // see http://zh.wikipedia.org/wiki/%E4%BA%8C%E8%BF%9B%E5%88%B6%E4%B9%98%E6%95%B0%E8%AF%8D%E5%A4%B4 - static const char* const prefixes[] = { - "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" - }; +std::string FormatBinaryMeasure(int64_t n, const char* unit) { + // see + // http://zh.wikipedia.org/wiki/%E4%BA%8C%E8%BF%9B%E5%88%B6%E4%B9%98%E6%95%B0%E8%AF%8D%E5%A4%B4 + static const char* const prefixes[] = {"", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"}; - return NumberToHumanReadableString( - n, prefixes, unit, 0, ARRAY_SIZE(prefixes) - 1); + return NumberToHumanReadableString(n, prefixes, unit, 0, ARRAY_SIZE(prefixes) - 1); } // } // namespace common diff --git a/src/common/base/string_number.h b/src/common/base/string_number.h index a9f8be487..510e8a018 100644 --- a/src/common/base/string_number.h +++ b/src/common/base/string_number.h @@ -45,15 +45,13 @@ bool ParseNumber(const char* str, long double* value, char** endptr); /// @brief interface for parsing string to number /// --------------------------------------------------------------- template -bool ParseNumber(const std::string& str, Type* value, char** endptr, int base) -{ - return ParseNumber(str.c_str(), value, endptr, base); +bool ParseNumber(const std::string& str, Type* value, char** endptr, int base) { + return ParseNumber(str.c_str(), value, endptr, base); } template -bool ParseNumber(const std::string& str, Type* value, char** endptr) -{ - return ParseNumber(str.c_str(), value, endptr); +bool ParseNumber(const std::string& str, Type* value, char** endptr) { + return ParseNumber(str.c_str(), value, endptr); } /// --------------------------------------------------------------- @@ -61,37 +59,35 @@ bool ParseNumber(const std::string& str, Type* value, char** endptr) /// @return true if total string is successfully parsed. /// --------------------------------------------------------------- template -bool StringToNumber(const std::string& str, Type* value, int base) -{ -// STATIC_ASSERT(TypeTraits::IsInteger::Value, "Type must be integral type"); - char* endptr; - bool ret = ParseNumber(str.c_str(), value, &endptr, base); - return (ret && *endptr == '\0'); +bool StringToNumber(const std::string& str, Type* value, int base) { + // STATIC_ASSERT(TypeTraits::IsInteger::Value, "Type must be + // integral type"); + char* endptr; + bool ret = ParseNumber(str.c_str(), value, &endptr, base); + return (ret && *endptr == '\0'); } template -bool StringToNumber(const char* str, Type* value, int base) -{ -// STATIC_ASSERT(TypeTraits::IsInteger::Value, "Type must be integral type"); - char* endptr; - bool ret = ParseNumber(str, value, &endptr, base); - return (ret && *endptr == '\0'); +bool StringToNumber(const char* str, Type* value, int base) { + // STATIC_ASSERT(TypeTraits::IsInteger::Value, "Type must be + // integral type"); + char* endptr; + bool ret = ParseNumber(str, value, &endptr, base); + return (ret && *endptr == '\0'); } template -bool StringToNumber(const std::string& str, Type* value) -{ - char* endptr; - bool ret = ParseNumber(str.c_str(), value, &endptr); - return (ret && *endptr == '\0'); +bool StringToNumber(const std::string& str, Type* value) { + char* endptr; + bool ret = ParseNumber(str.c_str(), value, &endptr); + return (ret && *endptr == '\0'); } template -bool StringToNumber(const char* str, Type* value) -{ - char* endptr; - bool ret = ParseNumber(str, value, &endptr); - return (ret && *endptr == '\0'); +bool StringToNumber(const char* str, Type* value) { + char* endptr; + bool ret = ParseNumber(str, value, &endptr); + return (ret && *endptr == '\0'); } /// --------------------------------------------------------------- @@ -103,10 +99,7 @@ const int kMaxFloatStringSize = 24; const int kMaxIntStringSize = kMaxIntegerStringSize; /// @brief judge a number if it's nan -inline bool IsNaN(double value) -{ - return !(value > value) && !(value <= value); -} +inline bool IsNaN(double value) { return !(value > value) && !(value <= value); } /// @brief write number to buffer as string /// @return end of result @@ -212,4 +205,4 @@ std::string FormatBinaryMeasure(int64_t n, const char* unit = ""); // } // namespace common -#endif // TERA_COMMON_BASE_STRING_STRING_NUMBER_H_ +#endif // TERA_COMMON_BASE_STRING_STRING_NUMBER_H_ diff --git a/src/common/base/test/ascii_test.cc b/src/common/base/test/ascii_test.cc index 766f2b84f..88bb1515c 100644 --- a/src/common/base/test/ascii_test.cc +++ b/src/common/base/test/ascii_test.cc @@ -10,18 +10,14 @@ // namespace common { -TEST(Ascii, Init) -{ - setlocale(LC_ALL, "C"); -} - -#define ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(type, New, std) \ -TEST(Ascii, New) \ -{ \ - for (int c = 0; c <= UCHAR_MAX; ++c) \ - EXPECT_EQ(static_cast(std(c)), Ascii::New(c)) \ - << c << "(" << (isprint(c) ? static_cast(c): ' ') << ")"; \ -} +TEST(Ascii, Init) { setlocale(LC_ALL, "C"); } + +#define ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(type, New, std) \ + TEST(Ascii, New) { \ + for (int c = 0; c <= UCHAR_MAX; ++c) \ + EXPECT_EQ(static_cast(std(c)), Ascii::New(c)) \ + << c << "(" << (isprint(c) ? static_cast(c) : ' ') << ")"; \ + } ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsLower, islower) ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsUpper, isupper) @@ -30,7 +26,7 @@ ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsDigit, isdigit) ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsAlphaNumber, isalnum) ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsHexDigit, isxdigit) -#ifdef __GNUC__ // windows has no function named 'isblank' +#ifdef __GNUC__ // windows has no function named 'isblank' ASCII_TEST_CTYPE_FUNCTION_EQUIVALENCE(bool, IsBlank, isblank) #endif diff --git a/src/common/base/test/byte_order_test.cc b/src/common/base/test/byte_order_test.cc index 9fe9ba9bd..57dd5568d 100644 --- a/src/common/base/test/byte_order_test.cc +++ b/src/common/base/test/byte_order_test.cc @@ -7,114 +7,102 @@ #include #include "thirdparty/gtest/gtest.h" -TEST(ByteOrder, SwapShort) -{ - short x = 0x1234; - EXPECT_EQ(0x3412, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(0x3412, x); +TEST(ByteOrder, SwapShort) { + short x = 0x1234; + EXPECT_EQ(0x3412, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(0x3412, x); } -TEST(ByteOrder, SwapUShort) -{ - unsigned short x = 0x1234; - EXPECT_EQ(0x3412, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(0x3412, x); +TEST(ByteOrder, SwapUShort) { + unsigned short x = 0x1234; + EXPECT_EQ(0x3412, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(0x3412, x); } -TEST(ByteOrder, SwapInt) -{ - int x = 0x12345678; - EXPECT_EQ(0x78563412, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(0x78563412, x); +TEST(ByteOrder, SwapInt) { + int x = 0x12345678; + EXPECT_EQ(0x78563412, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(0x78563412, x); } -TEST(ByteOrder, SwapUInt) -{ - unsigned int x = 0x12345678U; - EXPECT_EQ(0x78563412U, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(0x78563412U, x); +TEST(ByteOrder, SwapUInt) { + unsigned int x = 0x12345678U; + EXPECT_EQ(0x78563412U, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(0x78563412U, x); } -TEST(ByteOrder, SwapLong) -{ +TEST(ByteOrder, SwapLong) { #ifdef _LP64 - long x = 0x0123456789ABCDEFL; - long expected = 0xEFCDAB8967452301L; + long x = 0x0123456789ABCDEFL; + long expected = 0xEFCDAB8967452301L; #else - long x = 0x12345678L; - long expected = 0x78563412L; + long x = 0x12345678L; + long expected = 0x78563412L; #endif - EXPECT_EQ(expected, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(expected, x); + EXPECT_EQ(expected, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(expected, x); } -TEST(ByteOrder, SwapULong) -{ +TEST(ByteOrder, SwapULong) { #ifdef _LP64 - unsigned long x = 0x0123456789ABCDEFULL; - unsigned long expected = 0xEFCDAB8967452301L; + unsigned long x = 0x0123456789ABCDEFULL; + unsigned long expected = 0xEFCDAB8967452301L; #else - unsigned long x = 0x12345678L; - unsigned long expected = 0x78563412L; + unsigned long x = 0x12345678L; + unsigned long expected = 0x78563412L; #endif - EXPECT_EQ(expected, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(expected, x); + EXPECT_EQ(expected, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(expected, x); } -TEST(ByteOrder, SwapULLong) -{ - unsigned long long x = 0x0123456789ABCDEFULL; - EXPECT_EQ(0xEFCDAB8967452301ULL, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ(0xEFCDAB8967452301ULL, x); +TEST(ByteOrder, SwapULLong) { + unsigned long long x = 0x0123456789ABCDEFULL; + EXPECT_EQ(0xEFCDAB8967452301ULL, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ(0xEFCDAB8967452301ULL, x); } -TEST(ByteOrder, SwapLLong) -{ - long long x = 0x0123456789ABCDEFLL; - EXPECT_EQ((long long)0xEFCDAB8967452301LL, ByteOrder::Swap(x)); - ByteOrder::Swap(&x); - EXPECT_EQ((long long)0xEFCDAB8967452301LL, x); +TEST(ByteOrder, SwapLLong) { + long long x = 0x0123456789ABCDEFLL; + EXPECT_EQ((long long)0xEFCDAB8967452301LL, ByteOrder::Swap(x)); + ByteOrder::Swap(&x); + EXPECT_EQ((long long)0xEFCDAB8967452301LL, x); } -TEST(ByteOrder, htonll) -{ - ASSERT_EQ(0x0100000000000000ULL, htonll(0x1ULL)); - ASSERT_EQ(0x0807060504030201ULL, htonll(0x0102030405060708ULL)); - ASSERT_NE(1ULL, htonll(0x1ULL)); +TEST(ByteOrder, htonll) { + ASSERT_EQ(0x0100000000000000ULL, htonll(0x1ULL)); + ASSERT_EQ(0x0807060504030201ULL, htonll(0x0102030405060708ULL)); + ASSERT_NE(1ULL, htonll(0x1ULL)); } #undef htons #undef htonl -TEST(ByteOrder, LocalToNet) -{ - ASSERT_EQ((htons(0x1234)), ByteOrder::LocalToNet((short)0x1234)); - ASSERT_EQ(htons(0x1234), ByteOrder::LocalToNet((unsigned short)0x1234)); - ASSERT_EQ(htonl(0x12345678), (unsigned int)ByteOrder::LocalToNet(0x12345678)); - ASSERT_EQ(htonl(0x12345678), ByteOrder::LocalToNet(0x12345678U)); - ASSERT_EQ(htonll(0x1234567890ABCDEFLL), - (unsigned long long)ByteOrder::LocalToNet(0x1234567890ABCDEFLL)); - ASSERT_EQ(htonll(0x1234567890ABCDEFULL), ByteOrder::LocalToNet(0x1234567890ABCDEFULL)); +TEST(ByteOrder, LocalToNet) { + ASSERT_EQ((htons(0x1234)), ByteOrder::LocalToNet((short)0x1234)); + ASSERT_EQ(htons(0x1234), ByteOrder::LocalToNet((unsigned short)0x1234)); + ASSERT_EQ(htonl(0x12345678), (unsigned int)ByteOrder::LocalToNet(0x12345678)); + ASSERT_EQ(htonl(0x12345678), ByteOrder::LocalToNet(0x12345678U)); + ASSERT_EQ(htonll(0x1234567890ABCDEFLL), + (unsigned long long)ByteOrder::LocalToNet(0x1234567890ABCDEFLL)); + ASSERT_EQ(htonll(0x1234567890ABCDEFULL), ByteOrder::LocalToNet(0x1234567890ABCDEFULL)); } #undef ntohs #undef ntohl -TEST(ByteOrder, NetToLocal) -{ - ASSERT_EQ((ntohs(0x1234)), ByteOrder::NetToLocal((short)0x1234)); - ASSERT_EQ(ntohs(0x1234), ByteOrder::NetToLocal((unsigned short)0x1234)); - ASSERT_EQ(ntohl(0x12345678), (unsigned int)ByteOrder::NetToLocal(0x12345678)); - ASSERT_EQ(ntohl(0x12345678), ByteOrder::NetToLocal(0x12345678U)); - ASSERT_EQ(ntohll(0x1234567890ABCDEFLL), - (unsigned long long)ByteOrder::NetToLocal(0x1234567890ABCDEFLL)); - ASSERT_EQ(ntohll(0x1234567890ABCDEFULL), ByteOrder::NetToLocal(0x1234567890ABCDEFULL)); +TEST(ByteOrder, NetToLocal) { + ASSERT_EQ((ntohs(0x1234)), ByteOrder::NetToLocal((short)0x1234)); + ASSERT_EQ(ntohs(0x1234), ByteOrder::NetToLocal((unsigned short)0x1234)); + ASSERT_EQ(ntohl(0x12345678), (unsigned int)ByteOrder::NetToLocal(0x12345678)); + ASSERT_EQ(ntohl(0x12345678), ByteOrder::NetToLocal(0x12345678U)); + ASSERT_EQ(ntohll(0x1234567890ABCDEFLL), + (unsigned long long)ByteOrder::NetToLocal(0x1234567890ABCDEFLL)); + ASSERT_EQ(ntohll(0x1234567890ABCDEFULL), ByteOrder::NetToLocal(0x1234567890ABCDEFULL)); } - diff --git a/src/common/base/test/string_ext_test.cc b/src/common/base/test/string_ext_test.cc index 064fef1ff..c36f7adf8 100644 --- a/src/common/base/test/string_ext_test.cc +++ b/src/common/base/test/string_ext_test.cc @@ -4,17 +4,16 @@ #include "bvs/common/base/string_ext.h" -int main(int argc, char* argv[]) -{ - cout << strtool::trim(" nihao ") <<"\n"; +int main(int argc, char* argv[]) { + cout << strtool::trim(" nihao ") << "\n"; - vector vt; - strtool::split(",o h,,,nice,,,,,,,", vt); - for (size_t i = 0; i < vt.size(); ++i) { - cout <<"out:" << vt[i] <<"\n"; - } + vector vt; + strtool::split(",o h,,,nice,,,,,,,", vt); + for (size_t i = 0; i < vt.size(); ++i) { + cout << "out:" << vt[i] << "\n"; + } - string ret = strtool::replace("xxAxxxAxxAxx", "A", "B"); - cout <<"replace:" << ret <<"\n"; - return 0; + string ret = strtool::replace("xxAxxxAxxAxx", "A", "B"); + cout << "replace:" << ret << "\n"; + return 0; } diff --git a/src/common/base/test/string_number_test.cc b/src/common/base/test/string_number_test.cc index e6a4a6a56..4a6878b90 100644 --- a/src/common/base/test/string_number_test.cc +++ b/src/common/base/test/string_number_test.cc @@ -8,305 +8,287 @@ // namespace common { -TEST(StringNumber, IsNaN) -{ - float f = 1.000; - ASSERT_FALSE(IsNaN(f)); - f = 1.223e+20; - ASSERT_FALSE(IsNaN(f)); +TEST(StringNumber, IsNaN) { + float f = 1.000; + ASSERT_FALSE(IsNaN(f)); + f = 1.223e+20; + ASSERT_FALSE(IsNaN(f)); #ifdef __GNUC__ - f = INFINITY; - ASSERT_FALSE(IsNaN(f)); + f = INFINITY; + ASSERT_FALSE(IsNaN(f)); #endif - f = sqrt(-1.0); - ASSERT_TRUE(IsNaN(f)); + f = sqrt(-1.0); + ASSERT_TRUE(IsNaN(f)); } -TEST(StringNumber, IntegerToStringBuffer) -{ - char buffer[1024]; - int32_t n1 = INT_MAX; - int32_t n2 = -INT_MAX; - int32_t n3 = 0; - int32_t n4 = 100000; - uint32_t n5 = 3147483647U; - int32_t n6 = -123456789; - - int64_t s1 = LLONG_MAX; - int64_t s2 = INT_MAX; - int64_t s3 = 0; - int64_t s4 = 1234567890123LL; - int64_t s5 = 1000000000000LL; - int64_t s6 = -1234567890034500LL; - int64_t s7 = LLONG_MIN; - - ASSERT_STREQ("2147483647", IntegerToString(n1, buffer)); - ASSERT_STREQ("-2147483647", IntegerToString(n2, buffer)); - ASSERT_STREQ("0", IntegerToString(n3, buffer)); - ASSERT_STREQ("100000", IntegerToString(n4, buffer)); - ASSERT_STREQ("3147483647", IntegerToString(n5, buffer)); - ASSERT_STREQ("-123456789", IntegerToString(n6, buffer)); - - ASSERT_STREQ("9223372036854775807", IntegerToString(s1, buffer)); - ASSERT_STREQ("2147483647", IntegerToString(s2, buffer)); - ASSERT_STREQ("0", IntegerToString(s3, buffer)); - ASSERT_STREQ("1234567890123", IntegerToString(s4, buffer)); - ASSERT_STREQ("1000000000000", IntegerToString(s5, buffer)); - ASSERT_STREQ("-1234567890034500", IntegerToString(s6, buffer)); - ASSERT_STREQ("-9223372036854775808", IntegerToString(s7, buffer)); +TEST(StringNumber, IntegerToStringBuffer) { + char buffer[1024]; + int32_t n1 = INT_MAX; + int32_t n2 = -INT_MAX; + int32_t n3 = 0; + int32_t n4 = 100000; + uint32_t n5 = 3147483647U; + int32_t n6 = -123456789; + + int64_t s1 = LLONG_MAX; + int64_t s2 = INT_MAX; + int64_t s3 = 0; + int64_t s4 = 1234567890123LL; + int64_t s5 = 1000000000000LL; + int64_t s6 = -1234567890034500LL; + int64_t s7 = LLONG_MIN; + + ASSERT_STREQ("2147483647", IntegerToString(n1, buffer)); + ASSERT_STREQ("-2147483647", IntegerToString(n2, buffer)); + ASSERT_STREQ("0", IntegerToString(n3, buffer)); + ASSERT_STREQ("100000", IntegerToString(n4, buffer)); + ASSERT_STREQ("3147483647", IntegerToString(n5, buffer)); + ASSERT_STREQ("-123456789", IntegerToString(n6, buffer)); + + ASSERT_STREQ("9223372036854775807", IntegerToString(s1, buffer)); + ASSERT_STREQ("2147483647", IntegerToString(s2, buffer)); + ASSERT_STREQ("0", IntegerToString(s3, buffer)); + ASSERT_STREQ("1234567890123", IntegerToString(s4, buffer)); + ASSERT_STREQ("1000000000000", IntegerToString(s5, buffer)); + ASSERT_STREQ("-1234567890034500", IntegerToString(s6, buffer)); + ASSERT_STREQ("-9223372036854775808", IntegerToString(s7, buffer)); } -TEST(StringNumber, IntegerToString) -{ - int32_t n1 = INT_MAX; - int32_t n2 = -INT_MAX; - int32_t n3 = 0; - int32_t n4 = 100000; - uint32_t n5 = 3147483647U; - int32_t n6 = -123456789; - - int64_t s1 = LLONG_MAX; - int64_t s2 = INT_MAX; - int64_t s3 = 0; - int64_t s4 = 1234567890123LL; - int64_t s5 = 1000000000000LL; - long long s6 = -1234567890034500LL; - long long s7 = LLONG_MIN; - - ASSERT_EQ("2147483647", IntegerToString(n1)); - ASSERT_EQ("-2147483647", IntegerToString(n2)); - ASSERT_EQ("0", IntegerToString(n3)); - ASSERT_EQ("100000", IntegerToString(n4)); - ASSERT_EQ("3147483647", IntegerToString(n5)); - ASSERT_EQ("-123456789", IntegerToString(n6)); - - ASSERT_EQ("9223372036854775807", IntegerToString(s1)); - ASSERT_EQ("2147483647", IntegerToString(s2)); - ASSERT_EQ("0", IntegerToString(s3)); - ASSERT_EQ("1234567890123", IntegerToString(s4)); - ASSERT_EQ("1000000000000", IntegerToString(s5)); - ASSERT_EQ("-1234567890034500", IntegerToString(s6)); - ASSERT_EQ("-9223372036854775808", IntegerToString(s7)); +TEST(StringNumber, IntegerToString) { + int32_t n1 = INT_MAX; + int32_t n2 = -INT_MAX; + int32_t n3 = 0; + int32_t n4 = 100000; + uint32_t n5 = 3147483647U; + int32_t n6 = -123456789; + + int64_t s1 = LLONG_MAX; + int64_t s2 = INT_MAX; + int64_t s3 = 0; + int64_t s4 = 1234567890123LL; + int64_t s5 = 1000000000000LL; + long long s6 = -1234567890034500LL; + long long s7 = LLONG_MIN; + + ASSERT_EQ("2147483647", IntegerToString(n1)); + ASSERT_EQ("-2147483647", IntegerToString(n2)); + ASSERT_EQ("0", IntegerToString(n3)); + ASSERT_EQ("100000", IntegerToString(n4)); + ASSERT_EQ("3147483647", IntegerToString(n5)); + ASSERT_EQ("-123456789", IntegerToString(n6)); + + ASSERT_EQ("9223372036854775807", IntegerToString(s1)); + ASSERT_EQ("2147483647", IntegerToString(s2)); + ASSERT_EQ("0", IntegerToString(s3)); + ASSERT_EQ("1234567890123", IntegerToString(s4)); + ASSERT_EQ("1000000000000", IntegerToString(s5)); + ASSERT_EQ("-1234567890034500", IntegerToString(s6)); + ASSERT_EQ("-9223372036854775808", IntegerToString(s7)); } -TEST(StringNumber, UIntToHexString) -{ - EXPECT_EQ("9527", UInt16ToHexString(0x9527)); - EXPECT_EQ("95279527", UInt32ToHexString(0x95279527)); - EXPECT_EQ("9527952795279527", UInt64ToHexString(0x9527952795279527ULL)); +TEST(StringNumber, UIntToHexString) { + EXPECT_EQ("9527", UInt16ToHexString(0x9527)); + EXPECT_EQ("95279527", UInt32ToHexString(0x95279527)); + EXPECT_EQ("9527952795279527", UInt64ToHexString(0x9527952795279527ULL)); } -TEST(StringNumber, StringToNumber) -{ - int16_t i16; - int32_t i32; - int64_t i64; - long long ll; - unsigned long long ull; - ASSERT_FALSE(StringToNumber("223372036854775807", &i32)); - ASSERT_TRUE(StringToNumber("223372036854775807", &i64)); - ASSERT_TRUE(StringToNumber("223372036854775807", &ll)); - ASSERT_TRUE(StringToNumber("223372036854775807", &ull)); - ASSERT_EQ(i64, 223372036854775807LL); - ASSERT_EQ(ll, 223372036854775807LL); - ASSERT_EQ(ull, 223372036854775807ULL); - ASSERT_FALSE(StringToNumber("1147483647", &i16)); - ASSERT_TRUE(StringToNumber("1147483647", &i32)); - ASSERT_TRUE(StringToNumber("1147483647", &i64)); - ASSERT_EQ(i32, 1147483647); - ASSERT_EQ(i64, 1147483647); - - uint32_t u32; - ASSERT_TRUE(StringToNumber("1147483647", &u32)); - - char buffer[1024]; - double d = 1.0003; - - ASSERT_STREQ(DoubleToString(d, buffer), "1.0003"); - d = std::numeric_limits::infinity(); - ASSERT_STREQ(DoubleToString(d, buffer), "inf"); - d = -std::numeric_limits::infinity(); - ASSERT_STREQ(DoubleToString(d, buffer), "-inf"); -#ifdef __GNUC__ // divided by zero is not allowed in msvc - d = NAN; - ASSERT_STREQ(DoubleToString(d, buffer), "nan"); +TEST(StringNumber, StringToNumber) { + int16_t i16; + int32_t i32; + int64_t i64; + long long ll; + unsigned long long ull; + ASSERT_FALSE(StringToNumber("223372036854775807", &i32)); + ASSERT_TRUE(StringToNumber("223372036854775807", &i64)); + ASSERT_TRUE(StringToNumber("223372036854775807", &ll)); + ASSERT_TRUE(StringToNumber("223372036854775807", &ull)); + ASSERT_EQ(i64, 223372036854775807LL); + ASSERT_EQ(ll, 223372036854775807LL); + ASSERT_EQ(ull, 223372036854775807ULL); + ASSERT_FALSE(StringToNumber("1147483647", &i16)); + ASSERT_TRUE(StringToNumber("1147483647", &i32)); + ASSERT_TRUE(StringToNumber("1147483647", &i64)); + ASSERT_EQ(i32, 1147483647); + ASSERT_EQ(i64, 1147483647); + + uint32_t u32; + ASSERT_TRUE(StringToNumber("1147483647", &u32)); + + char buffer[1024]; + double d = 1.0003; + + ASSERT_STREQ(DoubleToString(d, buffer), "1.0003"); + d = std::numeric_limits::infinity(); + ASSERT_STREQ(DoubleToString(d, buffer), "inf"); + d = -std::numeric_limits::infinity(); + ASSERT_STREQ(DoubleToString(d, buffer), "-inf"); +#ifdef __GNUC__ // divided by zero is not allowed in msvc + d = NAN; + ASSERT_STREQ(DoubleToString(d, buffer), "nan"); #endif - float f = 1e+22; - ASSERT_STREQ(FloatToString(f, buffer), "1e+22"); - f = 0.000325; - ASSERT_STREQ(FloatToString(f, buffer), "0.000325"); - f = std::numeric_limits::infinity(); - ASSERT_STREQ(FloatToString(f, buffer), "inf"); - f = -std::numeric_limits::infinity(); - ASSERT_STREQ(FloatToString(f, buffer), "-inf"); - -#ifdef __GNUC__ // divided by zero is not allowed in msvc - f = NAN; - ASSERT_STREQ(FloatToString(f, buffer), "nan"); - - f = INFINITY; - ASSERT_STREQ(FloatToString(f, buffer), "inf"); + float f = 1e+22; + ASSERT_STREQ(FloatToString(f, buffer), "1e+22"); + f = 0.000325; + ASSERT_STREQ(FloatToString(f, buffer), "0.000325"); + f = std::numeric_limits::infinity(); + ASSERT_STREQ(FloatToString(f, buffer), "inf"); + f = -std::numeric_limits::infinity(); + ASSERT_STREQ(FloatToString(f, buffer), "-inf"); + +#ifdef __GNUC__ // divided by zero is not allowed in msvc + f = NAN; + ASSERT_STREQ(FloatToString(f, buffer), "nan"); + + f = INFINITY; + ASSERT_STREQ(FloatToString(f, buffer), "inf"); #endif - f = -std::numeric_limits::infinity(); - ASSERT_STREQ(FloatToString(f, buffer), "-inf"); - - uint32_t i = 255; - ASSERT_STREQ(UInt32ToHexString(i, buffer), "000000ff"); - - std::string str = "1110.32505QQ"; - char* endptr; - ASSERT_TRUE(ParseNumber(str.c_str(), &d, &endptr)); - ASSERT_TRUE(d == 1110.32505); - ASSERT_FALSE(StringToNumber(str.c_str(), &d)); - - ASSERT_TRUE(ParseNumber(str.c_str(), &f, &endptr)); - ASSERT_TRUE(f == 1110.32505f); - ASSERT_FALSE(StringToNumber(str.c_str(), &f)); - - ASSERT_TRUE(ParseNumber(str.c_str(), &i, &endptr)); - ASSERT_EQ(1110U, i); - ASSERT_FALSE(StringToNumber(str.c_str(), &i)); - - str = "1110.32505"; - d = 0; - f = 0; - i = 0; - ASSERT_TRUE(StringToNumber(str.c_str(), &d)); - ASSERT_TRUE(d == 1110.32505); - ASSERT_TRUE(StringToNumber(str.c_str(), &f)); - ASSERT_TRUE(f == 1110.32505f); - ASSERT_FALSE(StringToNumber(str.c_str(), &i)); - str = "-1110"; - int32_t x; - ASSERT_TRUE(StringToNumber(str.c_str(), &x)); - ASSERT_EQ(x, -1110); + f = -std::numeric_limits::infinity(); + ASSERT_STREQ(FloatToString(f, buffer), "-inf"); + + uint32_t i = 255; + ASSERT_STREQ(UInt32ToHexString(i, buffer), "000000ff"); + + std::string str = "1110.32505QQ"; + char* endptr; + ASSERT_TRUE(ParseNumber(str.c_str(), &d, &endptr)); + ASSERT_TRUE(d == 1110.32505); + ASSERT_FALSE(StringToNumber(str.c_str(), &d)); + + ASSERT_TRUE(ParseNumber(str.c_str(), &f, &endptr)); + ASSERT_TRUE(f == 1110.32505f); + ASSERT_FALSE(StringToNumber(str.c_str(), &f)); + + ASSERT_TRUE(ParseNumber(str.c_str(), &i, &endptr)); + ASSERT_EQ(1110U, i); + ASSERT_FALSE(StringToNumber(str.c_str(), &i)); + + str = "1110.32505"; + d = 0; + f = 0; + i = 0; + ASSERT_TRUE(StringToNumber(str.c_str(), &d)); + ASSERT_TRUE(d == 1110.32505); + ASSERT_TRUE(StringToNumber(str.c_str(), &f)); + ASSERT_TRUE(f == 1110.32505f); + ASSERT_FALSE(StringToNumber(str.c_str(), &i)); + str = "-1110"; + int32_t x; + ASSERT_TRUE(StringToNumber(str.c_str(), &x)); + ASSERT_EQ(x, -1110); } -class StringToNumberPerformanceTest : public testing::Test -{ -protected: - static const unsigned int kNumber = 0x42576010U; - static const char kString[]; -private: - void SetUp() - { - unsigned int n; - sscanf(kString, "%x", &n); // NOLINT(runtime/printf) - ASSERT_EQ(0x42576010U, n); - ASSERT_TRUE(StringToNumber(kString, &n)); - ASSERT_EQ(0x42576010U, n); - } +class StringToNumberPerformanceTest : public testing::Test { + protected: + static const unsigned int kNumber = 0x42576010U; + static const char kString[]; + + private: + void SetUp() { + unsigned int n; + sscanf(kString, "%x", &n); // NOLINT(runtime/printf) + ASSERT_EQ(0x42576010U, n); + ASSERT_TRUE(StringToNumber(kString, &n)); + ASSERT_EQ(0x42576010U, n); + } }; const char StringToNumberPerformanceTest::kString[] = "0x42576010"; -TEST_F(StringToNumberPerformanceTest, SScanfPerformance) -{ - for (int i = 0; i < 1000000; i++) - { - unsigned int n; - sscanf(kString, "%x", &n); // NOLINT(runtime/printf) - } +TEST_F(StringToNumberPerformanceTest, SScanfPerformance) { + for (int i = 0; i < 1000000; i++) { + unsigned int n; + sscanf(kString, "%x", &n); // NOLINT(runtime/printf) + } } -TEST_F(StringToNumberPerformanceTest, StringToNumberPerformance) -{ - for (int i = 0; i < 1000000; i++) - { - unsigned int n; - StringToNumber(kString, &n); - } +TEST_F(StringToNumberPerformanceTest, StringToNumberPerformance) { + for (int i = 0; i < 1000000; i++) { + unsigned int n; + StringToNumber(kString, &n); + } } -TEST(StringNumber, NumberToStringPerformance) -{ - double d = 1110.32505f; - for (size_t i = 0; i < 100000; i++) - { - DoubleToString(d); - } +TEST(StringNumber, NumberToStringPerformance) { + double d = 1110.32505f; + for (size_t i = 0; i < 100000; i++) { + DoubleToString(d); + } } -TEST(StringNumber, FormatMeasure) -{ - EXPECT_EQ("1", FormatMeasure(1)); - EXPECT_EQ("123", FormatMeasure(123)); - EXPECT_EQ("1.23k", FormatMeasure(1234)); - EXPECT_EQ("12.3k", FormatMeasure(12345)); - EXPECT_EQ("123k", FormatMeasure(123456)); - EXPECT_EQ("1.23M", FormatMeasure(1234567)); - EXPECT_EQ("12.3M", FormatMeasure(12345678)); - EXPECT_EQ("123M", FormatMeasure(123456789)); - - EXPECT_EQ("1bps", FormatMeasure(1, "bps")); - EXPECT_EQ("123bps", FormatMeasure(123, "bps")); - EXPECT_EQ("1.23kbps", FormatMeasure(1234, "bps")); - EXPECT_EQ("12.3kbps", FormatMeasure(12345, "bps")); - EXPECT_EQ("123kbps", FormatMeasure(123456, "bps")); - EXPECT_EQ("1.23Mbps", FormatMeasure(1234567, "bps")); - EXPECT_EQ("12.3Mbps", FormatMeasure(12345678, "bps")); - EXPECT_EQ("123Mbps", FormatMeasure(123456789, "bps")); - EXPECT_EQ("0bps", FormatMeasure(0, "bps")); - EXPECT_EQ("100 mF", FormatMeasure(0.1, " F")); - EXPECT_EQ("12.3 mF", FormatMeasure(0.0123, " F")); - EXPECT_EQ("1.23 mF", FormatMeasure(0.001234, " F")); - EXPECT_EQ("123 uF", FormatMeasure(0.00012345, " F")); - EXPECT_EQ("12.3 uF", FormatMeasure(0.0000123456, " F")); - EXPECT_EQ("1.23 uF", FormatMeasure(0.000001234567, " F")); - EXPECT_EQ("123 nF", FormatMeasure(0.00000012345678, " F")); - EXPECT_EQ("12.3 nF", FormatMeasure(0.0000000123456789, " F")); +TEST(StringNumber, FormatMeasure) { + EXPECT_EQ("1", FormatMeasure(1)); + EXPECT_EQ("123", FormatMeasure(123)); + EXPECT_EQ("1.23k", FormatMeasure(1234)); + EXPECT_EQ("12.3k", FormatMeasure(12345)); + EXPECT_EQ("123k", FormatMeasure(123456)); + EXPECT_EQ("1.23M", FormatMeasure(1234567)); + EXPECT_EQ("12.3M", FormatMeasure(12345678)); + EXPECT_EQ("123M", FormatMeasure(123456789)); + + EXPECT_EQ("1bps", FormatMeasure(1, "bps")); + EXPECT_EQ("123bps", FormatMeasure(123, "bps")); + EXPECT_EQ("1.23kbps", FormatMeasure(1234, "bps")); + EXPECT_EQ("12.3kbps", FormatMeasure(12345, "bps")); + EXPECT_EQ("123kbps", FormatMeasure(123456, "bps")); + EXPECT_EQ("1.23Mbps", FormatMeasure(1234567, "bps")); + EXPECT_EQ("12.3Mbps", FormatMeasure(12345678, "bps")); + EXPECT_EQ("123Mbps", FormatMeasure(123456789, "bps")); + EXPECT_EQ("0bps", FormatMeasure(0, "bps")); + EXPECT_EQ("100 mF", FormatMeasure(0.1, " F")); + EXPECT_EQ("12.3 mF", FormatMeasure(0.0123, " F")); + EXPECT_EQ("1.23 mF", FormatMeasure(0.001234, " F")); + EXPECT_EQ("123 uF", FormatMeasure(0.00012345, " F")); + EXPECT_EQ("12.3 uF", FormatMeasure(0.0000123456, " F")); + EXPECT_EQ("1.23 uF", FormatMeasure(0.000001234567, " F")); + EXPECT_EQ("123 nF", FormatMeasure(0.00000012345678, " F")); + EXPECT_EQ("12.3 nF", FormatMeasure(0.0000000123456789, " F")); } -TEST(StringNumber, PhysicalConstantsMeasure) -{ - EXPECT_EQ("300 Mm/s", FormatMeasure(299792458, " m/s")); - EXPECT_EQ("160 zC", FormatMeasure(1.60217733e-19, " C")); - EXPECT_EQ("6.63e-34 J.s", FormatMeasure(6.6260755e-34, " J.s")); - EXPECT_EQ("2.82 fm", FormatMeasure(2.81794092e-15, " m")); - EXPECT_EQ("13.8 yJ/K", FormatMeasure(1.380658e-23, " J/K")); +TEST(StringNumber, PhysicalConstantsMeasure) { + EXPECT_EQ("300 Mm/s", FormatMeasure(299792458, " m/s")); + EXPECT_EQ("160 zC", FormatMeasure(1.60217733e-19, " C")); + EXPECT_EQ("6.63e-34 J.s", FormatMeasure(6.6260755e-34, " J.s")); + EXPECT_EQ("2.82 fm", FormatMeasure(2.81794092e-15, " m")); + EXPECT_EQ("13.8 yJ/K", FormatMeasure(1.380658e-23, " J/K")); } -TEST(StringNumber, FormatBinaryMeasure) -{ - EXPECT_EQ("1 B/s", FormatBinaryMeasure(1, " B/s")); - EXPECT_EQ("123 B/s", FormatBinaryMeasure(123, " B/s")); - EXPECT_EQ("1023 B/s", FormatBinaryMeasure(1023, " B/s")); - EXPECT_EQ("1.21 KiB/s", FormatBinaryMeasure(1234, " B/s")); - EXPECT_EQ("12.1 KiB/s", FormatBinaryMeasure(12345, " B/s")); - EXPECT_EQ("121 KiB/s", FormatBinaryMeasure(123456, " B/s")); - EXPECT_EQ("1.18 MiB/s", FormatBinaryMeasure(1234567, " B/s")); - EXPECT_EQ("11.8 MiB/s", FormatBinaryMeasure(12345678, " B/s")); - EXPECT_EQ("118 MiB/s", FormatBinaryMeasure(123456789, " B/s")); - EXPECT_EQ("1.15 GiB/s", FormatBinaryMeasure(1234567890, " B/s")); - EXPECT_EQ("11.5 GiB/s", FormatBinaryMeasure(12345678900ULL, " B/s")); - EXPECT_EQ("115 GiB/s", FormatBinaryMeasure(123456789000ULL, " B/s")); - EXPECT_EQ("1.12 TiB/s", FormatBinaryMeasure(1234567890000ULL, " B/s")); +TEST(StringNumber, FormatBinaryMeasure) { + EXPECT_EQ("1 B/s", FormatBinaryMeasure(1, " B/s")); + EXPECT_EQ("123 B/s", FormatBinaryMeasure(123, " B/s")); + EXPECT_EQ("1023 B/s", FormatBinaryMeasure(1023, " B/s")); + EXPECT_EQ("1.21 KiB/s", FormatBinaryMeasure(1234, " B/s")); + EXPECT_EQ("12.1 KiB/s", FormatBinaryMeasure(12345, " B/s")); + EXPECT_EQ("121 KiB/s", FormatBinaryMeasure(123456, " B/s")); + EXPECT_EQ("1.18 MiB/s", FormatBinaryMeasure(1234567, " B/s")); + EXPECT_EQ("11.8 MiB/s", FormatBinaryMeasure(12345678, " B/s")); + EXPECT_EQ("118 MiB/s", FormatBinaryMeasure(123456789, " B/s")); + EXPECT_EQ("1.15 GiB/s", FormatBinaryMeasure(1234567890, " B/s")); + EXPECT_EQ("11.5 GiB/s", FormatBinaryMeasure(12345678900ULL, " B/s")); + EXPECT_EQ("115 GiB/s", FormatBinaryMeasure(123456789000ULL, " B/s")); + EXPECT_EQ("1.12 TiB/s", FormatBinaryMeasure(1234567890000ULL, " B/s")); } -TEST(StringNumber, MeasureUnderflow) -{ - EXPECT_EQ("1y", FormatMeasure(1e-24)); - EXPECT_EQ("1e-25", FormatMeasure(1e-25)); - EXPECT_EQ("1e-100", FormatMeasure(1e-100)); +TEST(StringNumber, MeasureUnderflow) { + EXPECT_EQ("1y", FormatMeasure(1e-24)); + EXPECT_EQ("1e-25", FormatMeasure(1e-25)); + EXPECT_EQ("1e-100", FormatMeasure(1e-100)); } -TEST(StringNumber, MeasureOverflow) -{ - EXPECT_EQ("1Y", FormatMeasure(1e24)); - EXPECT_EQ("100Y", FormatMeasure(1e26)); - EXPECT_EQ("1e+27", FormatMeasure(1e27)); - EXPECT_EQ("1e+100", FormatMeasure(1e100)); +TEST(StringNumber, MeasureOverflow) { + EXPECT_EQ("1Y", FormatMeasure(1e24)); + EXPECT_EQ("100Y", FormatMeasure(1e26)); + EXPECT_EQ("1e+27", FormatMeasure(1e27)); + EXPECT_EQ("1e+100", FormatMeasure(1e100)); } -TEST(StringNumber, MeasureNanAndInf) -{ - EXPECT_EQ("inf", FormatMeasure(INFINITY)); - EXPECT_EQ("inf US$", FormatMeasure(INFINITY, " US$")); - EXPECT_EQ("infUS$", FormatMeasure(INFINITY, "US$")); - EXPECT_EQ("nan", FormatMeasure(NAN)); - EXPECT_EQ("nan X", FormatMeasure(NAN, " X")); +TEST(StringNumber, MeasureNanAndInf) { + EXPECT_EQ("inf", FormatMeasure(INFINITY)); + EXPECT_EQ("inf US$", FormatMeasure(INFINITY, " US$")); + EXPECT_EQ("infUS$", FormatMeasure(INFINITY, "US$")); + EXPECT_EQ("nan", FormatMeasure(NAN)); + EXPECT_EQ("nan X", FormatMeasure(NAN, " X")); } // } // namespace common diff --git a/src/common/console/progress_bar.cc b/src/common/console/progress_bar.cc index 6f1e4bfae..8f8bd586e 100644 --- a/src/common/console/progress_bar.cc +++ b/src/common/console/progress_bar.cc @@ -11,12 +11,8 @@ namespace common { -ProgressBar::ProgressBar(DisplayMode mode, - uint64_t total_size, - uint32_t length, - std::string unit, - char ch1, - char ch2) +ProgressBar::ProgressBar(DisplayMode mode, uint64_t total_size, uint32_t length, + const std::string& unit, char ch1, char ch2) : mode_(mode), total_size_(total_size), cur_size_(0), @@ -27,160 +23,154 @@ ProgressBar::ProgressBar(DisplayMode mode, start_time_(0), cur_time_(0), flush_buffer_(NULL) { - assert(total_size_ > 0); - if (bar_length_ <= 0) { - if (mode_ == BRIEF) { - bar_length_ = 80; - } else { - bar_length_ = GetScreenWidth() - 5; - } + assert(total_size_ > 0); + if (bar_length_ <= 0) { + if (mode_ == BRIEF) { + bar_length_ = 80; + } else { + bar_length_ = GetScreenWidth() - 5; } - flush_buffer_ = new char[bar_length_]; + } + flush_buffer_ = new char[bar_length_]; } -ProgressBar::~ProgressBar() { - delete[] flush_buffer_; -} +ProgressBar::~ProgressBar() { delete[] flush_buffer_; } void ProgressBar::Refresh(int32_t cur_size) { - if (start_time_ == 0) { - fflush(NULL); - start_time_ = time(NULL); - } - - if (cur_size > total_size_) { - cur_size = total_size_; - } else if (cur_size < 0) { - cur_size = 0; - } - cur_size_ = cur_size; - - if (cur_time_ == time(NULL) && cur_size != total_size_) { - return; - } - cur_time_ = time(NULL); - - putchar('\r'); - if (mode_ == BRIEF) { - FillFlushBufferBrief(cur_size); - } else if (mode_ == ENHANCED) { - FillFlushBufferEnhanced(cur_size); - } - fwrite(flush_buffer_, 1, bar_length_, stdout); - fflush(stdout); + if (start_time_ == 0) { + fflush(NULL); + start_time_ = time(NULL); + } + + if (cur_size > total_size_) { + cur_size = total_size_; + } else if (cur_size < 0) { + cur_size = 0; + } + cur_size_ = cur_size; + + if (cur_time_ == time(NULL) && cur_size != total_size_) { + return; + } + cur_time_ = time(NULL); + + putchar('\r'); + if (mode_ == BRIEF) { + FillFlushBufferBrief(cur_size); + } else if (mode_ == ENHANCED) { + FillFlushBufferEnhanced(cur_size); + } + fwrite(flush_buffer_, 1, bar_length_, stdout); + fflush(stdout); } void ProgressBar::AddAndRefresh(int32_t size) { - cur_size_ += size; - Refresh(cur_size_); + cur_size_ += size; + Refresh(cur_size_); } void ProgressBar::Done() { - cur_size_ = 0; - putchar('\n'); + cur_size_ = 0; + putchar('\n'); } void ProgressBar::FillFlushBufferBrief(int64_t cur_size) { - int32_t percent = cur_size * 100 / total_size_; - int32_t char_1_num = percent * (bar_length_ - 6) / 100; - int32_t i = 0; - while (i < char_1_num) { - flush_buffer_[i++] = char_1_; - } - while (i < bar_length_ - 6) { - flush_buffer_[i++] = char_2_; - } - snprintf(flush_buffer_ + i, 6, " %3d%%", percent); + int32_t percent = cur_size * 100 / total_size_; + int32_t char_1_num = percent * (bar_length_ - 6) / 100; + int32_t i = 0; + while (i < char_1_num) { + flush_buffer_[i++] = char_1_; + } + while (i < bar_length_ - 6) { + flush_buffer_[i++] = char_2_; + } + snprintf(flush_buffer_ + i, 6, " %3d%%", percent); } void ProgressBar::FillFlushBufferEnhanced(int64_t cur_size) { - int32_t bar_remain = bar_length_; - int32_t disp_len; - std::string unit; - - // fill time field - int32_t time_s; - time_s = GetTime(); - if (time_s == 0) { - time_s = 1; - } - disp_len = 9; - bar_remain -= disp_len; - snprintf(flush_buffer_ + bar_remain, disp_len, "%02d:%02d:%02d ", - time_s / 3600, - time_s % 3600 / 60, - time_s % 60); - flush_buffer_[bar_remain + disp_len - 1] = ' '; - - // fill speed field - double disp_size = (double)cur_size / time_s; - if (disp_size >= 1024 * 1024) { - unit = "M" + unit_; - disp_size = disp_size / (1024 * 1024); - } else if (disp_size >= 1024) { - unit = "K" + unit_; - disp_size = disp_size / 1024; - } else { - unit = unit_; - } - disp_len = 9 + unit.length(); - bar_remain -= disp_len; - snprintf(flush_buffer_ + bar_remain, 6, "%5.3f", disp_size); - snprintf(flush_buffer_ + bar_remain + 5, disp_len - 5, "%s/s ", unit.c_str()); - flush_buffer_[bar_remain + disp_len - 1] = ' '; - - // fill cur_size field - if (cur_size >= 1024 * 1024 * 1024) { - unit = "G" + unit_; - disp_size = cur_size / (1024.0 * 1024 * 1024); - } else if (cur_size >= 1024 * 1024) { - unit = "M" + unit_; - disp_size = cur_size / (1024.0 * 1024); - } else if (cur_size >= 1024) { - unit = "K" + unit_; - disp_size = cur_size / 1024.0; - } else { - unit = unit_; - disp_size = cur_size; - } - disp_len = 7 + unit.length(); - bar_remain -= disp_len; - snprintf(flush_buffer_ + bar_remain, 6, "%5.3f", disp_size); - snprintf(flush_buffer_ + bar_remain + 5, disp_len - 5, "%s ", unit.c_str()); - flush_buffer_[bar_remain + disp_len - 1] = ' '; - - // fill percent field - disp_len = 7; - bar_remain -= disp_len; - int32_t percent = cur_size * 100 / total_size_; - snprintf(flush_buffer_ + bar_remain, disp_len, " %3d%% ", percent); - flush_buffer_[bar_remain + disp_len - 1] = ' '; - - // file process bar - int32_t char_1_num = percent * bar_remain / 100; - int32_t i = 0; - while (i < char_1_num) { - flush_buffer_[i++] = char_1_; - } - while (i < bar_remain) { - flush_buffer_[i++] = char_2_; - } + int32_t bar_remain = bar_length_; + int32_t disp_len; + std::string unit; + + // fill time field + int32_t time_s; + time_s = GetTime(); + if (time_s == 0) { + time_s = 1; + } + disp_len = 9; + bar_remain -= disp_len; + snprintf(flush_buffer_ + bar_remain, disp_len, "%02d:%02d:%02d ", time_s / 3600, + time_s % 3600 / 60, time_s % 60); + flush_buffer_[bar_remain + disp_len - 1] = ' '; + + // fill speed field + double disp_size = (double)cur_size / time_s; + if (disp_size >= 1024 * 1024) { + unit = "M" + unit_; + disp_size = disp_size / (1024 * 1024); + } else if (disp_size >= 1024) { + unit = "K" + unit_; + disp_size = disp_size / 1024; + } else { + unit = unit_; + } + disp_len = 9 + unit.length(); + bar_remain -= disp_len; + snprintf(flush_buffer_ + bar_remain, 6, "%5.3f", disp_size); + snprintf(flush_buffer_ + bar_remain + 5, disp_len - 5, "%s/s ", unit.c_str()); + flush_buffer_[bar_remain + disp_len - 1] = ' '; + + // fill cur_size field + if (cur_size >= 1024 * 1024 * 1024) { + unit = "G" + unit_; + disp_size = cur_size / (1024.0 * 1024 * 1024); + } else if (cur_size >= 1024 * 1024) { + unit = "M" + unit_; + disp_size = cur_size / (1024.0 * 1024); + } else if (cur_size >= 1024) { + unit = "K" + unit_; + disp_size = cur_size / 1024.0; + } else { + unit = unit_; + disp_size = cur_size; + } + disp_len = 7 + unit.length(); + bar_remain -= disp_len; + snprintf(flush_buffer_ + bar_remain, 6, "%5.3f", disp_size); + snprintf(flush_buffer_ + bar_remain + 5, disp_len - 5, "%s ", unit.c_str()); + flush_buffer_[bar_remain + disp_len - 1] = ' '; + + // fill percent field + disp_len = 7; + bar_remain -= disp_len; + int32_t percent = cur_size * 100 / total_size_; + snprintf(flush_buffer_ + bar_remain, disp_len, " %3d%% ", percent); + flush_buffer_[bar_remain + disp_len - 1] = ' '; + + // file process bar + int32_t char_1_num = percent * bar_remain / 100; + int32_t i = 0; + while (i < char_1_num) { + flush_buffer_[i++] = char_1_; + } + while (i < bar_remain) { + flush_buffer_[i++] = char_2_; + } } int32_t ProgressBar::GetScreenWidth() { - int32_t fd; - struct winsize wsz; - - fd = fileno(stderr); - if (ioctl(fd, TIOCGWINSZ, &wsz) < 0) { - perror("fail to get screen width"); - return -1; - } - return wsz.ws_col; + int32_t fd; + struct winsize wsz; + + fd = fileno(stderr); + if (ioctl(fd, TIOCGWINSZ, &wsz) < 0) { + perror("fail to get screen width"); + return -1; + } + return wsz.ws_col; } -int32_t ProgressBar::GetTime() { - return (int32_t)(time(0) - start_time_); -} +int32_t ProgressBar::GetTime() { return (int32_t)(time(0) - start_time_); } } // namespace common diff --git a/src/common/console/progress_bar.h b/src/common/console/progress_bar.h index 0c9e4e481..beff0ff27 100644 --- a/src/common/console/progress_bar.h +++ b/src/common/console/progress_bar.h @@ -4,8 +4,8 @@ // // Author: xupeilin@baidu.com -#ifndef TERA_COMMON_PROGRESS_BAR_H -#define TERA_COMMON_PROGRESS_BAR_H +#ifndef TERA_COMMON_PROGRESS_BAR_H +#define TERA_COMMON_PROGRESS_BAR_H #include #include @@ -18,51 +18,45 @@ namespace common { class ProgressBar { -public: - enum DisplayMode { - // brief mode, only a progress bar and easy to use - // >>>>>>>>>>>------------------------------- 28% - BRIEF, - - // add some enhanced config - // >>>>>>>>>-------------------------- 28% 30M 20KB/s 00:00:05 - ENHANCED - }; - - ProgressBar(DisplayMode mode = BRIEF, - uint64_t total_size = 100, - uint32_t length = 80, - std::string unit = "", - char ch1 = '>', - char ch2 = '-'); - ~ProgressBar(); - - void Refresh(int32_t cur_size); - void AddAndRefresh(int32_t size); - void Done(); - - int32_t GetPercent() { - return (int32_t)(cur_size_ * 100 / total_size_); - } - -private: - void FillFlushBufferBrief(int64_t cur_size); - void FillFlushBufferEnhanced(int64_t cur_size); - int32_t GetScreenWidth(); - int32_t GetTime(); - -private: - DisplayMode mode_; - int64_t total_size_; - int64_t cur_size_; - int32_t bar_length_; - std::string unit_; - char char_1_; - char char_2_; - - time_t start_time_; - time_t cur_time_; - char *flush_buffer_; + public: + enum DisplayMode { + // brief mode, only a progress bar and easy to use + // >>>>>>>>>>>------------------------------- 28% + BRIEF, + + // add some enhanced config + // >>>>>>>>>-------------------------- 28% 30M 20KB/s 00:00:05 + ENHANCED + }; + + ProgressBar(DisplayMode mode = BRIEF, uint64_t total_size = 100, uint32_t length = 80, + const std::string& unit = "", char ch1 = '>', char ch2 = '-'); + ~ProgressBar(); + + void Refresh(int32_t cur_size); + void AddAndRefresh(int32_t size); + void Done(); + + int32_t GetPercent() { return (int32_t)(cur_size_ * 100 / total_size_); } + + private: + void FillFlushBufferBrief(int64_t cur_size); + void FillFlushBufferEnhanced(int64_t cur_size); + int32_t GetScreenWidth(); + int32_t GetTime(); + + private: + DisplayMode mode_; + int64_t total_size_; + int64_t cur_size_; + int32_t bar_length_; + std::string unit_; + char char_1_; + char char_2_; + + time_t start_time_; + time_t cur_time_; + char* flush_buffer_; }; } // namespace common diff --git a/src/common/console/progress_bar_test.cc b/src/common/console/progress_bar_test.cc deleted file mode 100644 index 6031ad795..000000000 --- a/src/common/console/progress_bar_test.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// Author: xupeilin@baidu.com - -#include "progress_bar.h" - -#include -#include -#include - -using common::ProgressBar; - -TEST(ProgressBarTest, Test) { - int cur_size = 0; - int total_size = 100000000; - - ProgressBar progress_bar(ProgressBar::ENHANCED, total_size, 100, "B"); - - srand((uint32_t)time(NULL)); - timespec interval = {0, 1000}; - while (cur_size < total_size) { - cur_size += rand() % 10000; - progress_bar.Refresh(cur_size); - nanosleep(&interval, &interval); - } - progress_bar.Done(); -} diff --git a/src/common/counter.h b/src/common/counter.h index 2d9193f37..59c0deacc 100644 --- a/src/common/counter.h +++ b/src/common/counter.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_COMMON_COUNTER_H_ -#define TERA_COMMON_COUNTER_H_ +#ifndef TERA_COMMON_COUNTER_H_ +#define TERA_COMMON_COUNTER_H_ #include @@ -13,63 +13,46 @@ namespace tera { class Counter { -public: - Counter() : val_(0) {} - int64_t Add(int64_t v) { - return atomic_add64(&val_, v) + v; - } - int64_t Sub(int64_t v) { - return atomic_add64(&val_, -v) - v; - } - int64_t Inc() { - return atomic_add64(&val_, 1) + 1; - } - int64_t Dec() { - return atomic_add64(&val_, -1) - 1; - } - int64_t Get() { - return val_; - } - int64_t Set(int64_t v) { - return atomic_swap64(&val_, v); - } - int64_t Clear() { - return atomic_swap64(&val_, 0); - } - -private: - volatile int64_t val_; + public: + Counter() : val_(0) {} + int64_t Add(int64_t v) { return atomic_add64(&val_, v) + v; } + int64_t Sub(int64_t v) { return atomic_add64(&val_, -v) - v; } + int64_t Inc() { return atomic_add64(&val_, 1) + 1; } + int64_t Dec() { return atomic_add64(&val_, -1) - 1; } + int64_t Get() const { return val_; } + int64_t Set(int64_t v) { return atomic_swap64(&val_, v); } + int64_t Clear() { return atomic_swap64(&val_, 0); } + + private: + volatile int64_t val_; }; class AutoCounter { -public: - AutoCounter(Counter* counter, const char* msg1, const char* msg2 = NULL) - : counter_(counter), - msg1_(msg1), - msg2_(msg2) { - start_ = get_micros(); - } - ~AutoCounter() { - int64_t end = get_micros(); - if (end - start_ > 5000000) { - counter_->Inc(); - int64_t t = (end - start_) / 1000000; - if (!msg2_) { - fprintf(stderr, "%s [AutoCounter] %s hang for %ld s\n", - get_curtime_str().data(), msg1_, t); - } else { - fprintf(stderr, "%s [AutoCounter] %s %s hang for %ld s\n", - get_curtime_str().data(), msg1_, msg2_, t); - } - } - } - -private: - Counter* counter_; - int64_t start_; - const char* msg1_; - const char* msg2_; + public: + AutoCounter(Counter* counter, const char* msg1, const char* msg2 = NULL) + : counter_(counter), msg1_(msg1), msg2_(msg2) { + start_ = get_micros(); + } + ~AutoCounter() { + int64_t end = get_micros(); + if (end - start_ > 5000000) { + counter_->Inc(); + int64_t t = (end - start_) / 1000000; + if (!msg2_) { + fprintf(stderr, "%s [AutoCounter] %s hang for %ld s\n", get_curtime_str().data(), msg1_, t); + } else { + fprintf(stderr, "%s [AutoCounter] %s %s hang for %ld s\n", get_curtime_str().data(), msg1_, + msg2_, t); + } + } + } + + private: + Counter* counter_; + int64_t start_; + const char* msg1_; + const char* msg2_; }; -} // namespace common +} // namespace common #endif // TERA_COMMON_COUNTER_H_ diff --git a/src/common/cpu_profiler.cc b/src/common/cpu_profiler.cc index 014c5ffb8..d831504fb 100644 --- a/src/common/cpu_profiler.cc +++ b/src/common/cpu_profiler.cc @@ -9,47 +9,45 @@ namespace tera { -CpuProfiler::CpuProfiler(const std::string& profiler_file): - exit_(false), - profiler_file_(profiler_file), - thread_(&CpuProfiler::run, this) {} +CpuProfiler::CpuProfiler(const std::string& profiler_file) + : exit_(false), profiler_file_(profiler_file), thread_(&CpuProfiler::run, this) {} CpuProfiler::~CpuProfiler() { - exit_ = true; - cv_.notify_one(); - thread_.join(); - ProfilerState ps; - ProfilerGetCurrentState(&ps); - if (ps.enabled) { - ProfilerStop(); - } + exit_ = true; + cv_.notify_one(); + thread_.join(); + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled) { + ProfilerStop(); + } } void CpuProfiler::run() { - while (!exit_.load()) { - bool enable; - { - std::unique_lock lock(lock_); - enable = enable_; - } - if (enable) { - ProfilerState ps; - ProfilerGetCurrentState(&ps); - if (ps.enabled == 0) { - ProfilerStart(profiler_file_.c_str()); - } - ProfilerFlush(); - LOG(INFO) << "[Cpu Profiler] Cpu Profiler Dumped"; - } else { - ProfilerState ps; - ProfilerGetCurrentState(&ps); - if (ps.enabled) { - ProfilerStop(); - } - } - std::unique_lock lock(lock_); - cv_.wait_for(lock, interval_); + while (!exit_.load()) { + bool enable; + { + std::unique_lock lock(lock_); + enable = enable_; + } + if (enable) { + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled == 0) { + ProfilerStart(profiler_file_.c_str()); + } + ProfilerFlush(); + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Dumped"; + } else { + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled) { + ProfilerStop(); + } } + std::unique_lock lock(lock_); + cv_.wait_for(lock, interval_); + } } -} // namespace tera +} // namespace tera diff --git a/src/common/cpu_profiler.h b/src/common/cpu_profiler.h index a1d9c1ab7..48dd99b32 100644 --- a/src/common/cpu_profiler.h +++ b/src/common/cpu_profiler.h @@ -17,54 +17,54 @@ namespace tera { class CpuProfiler { -public: - /** - * @brief Init CpuProfiler and the detect thread will start - **/ - explicit CpuProfiler(const std::string& profiler_file="CPU"); + public: + /** + * @brief Init CpuProfiler and the detect thread will start + **/ + explicit CpuProfiler(const std::string& profiler_file = "CPU"); - ~CpuProfiler(); + ~CpuProfiler(); - CpuProfiler& SetEnable(bool enable) { - if (enable) { - LOG(INFO) << "[Cpu Profiler] Cpu Profiler Enabled"; - } else { - LOG(INFO) << "[Cpu Profiler] Cpu Profiler Disabled"; - } + CpuProfiler& SetEnable(bool enable) { + if (enable) { + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Enabled"; + } else { + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Disabled"; + } - { - std::unique_lock lock(lock_); - enable_ = enable; - } - cv_.notify_one(); - return *this; + { + std::unique_lock lock(lock_); + enable_ = enable; } + cv_.notify_one(); + return *this; + } - CpuProfiler& SetInterval(int second) { - { - std::unique_lock lock(lock_); - interval_ = std::chrono::seconds(second); - } - cv_.notify_one(); - return *this; + CpuProfiler& SetInterval(int second) { + { + std::unique_lock lock(lock_); + interval_ = std::chrono::seconds(second); } + cv_.notify_one(); + return *this; + } -private: - void run(); + private: + void run(); -private: - std::atomic exit_; - bool enable_{false}; - std::chrono::seconds interval_{10}; - //Never Changed, So we can use profiler_file_.c_str() in safe. - const std::string profiler_file_; - std::mutex lock_; - std::condition_variable cv_; - std::thread thread_; + private: + std::atomic exit_; + bool enable_{false}; + std::chrono::seconds interval_{10}; + // Never Changed, So we can use profiler_file_.c_str() in safe. + const std::string profiler_file_; + std::mutex lock_; + std::condition_variable cv_; + std::thread thread_; }; -} // namespace tera +} // namespace tera -#endif //TERA_CPU_PROFILER_H +#endif // TERA_CPU_PROFILER_H /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/common/event.h b/src/common/event.h index 9a6770ece..d28d32b4a 100644 --- a/src/common/event.h +++ b/src/common/event.h @@ -4,115 +4,111 @@ // // Author: yanshiguang02@baidu.com -#ifndef TERA_COMMON_EVENT_H_ -#define TERA_COMMON_EVENT_H_ +#ifndef TERA_COMMON_EVENT_H_ +#define TERA_COMMON_EVENT_H_ #include "mutex.h" namespace common { class AutoResetEvent { -public: - AutoResetEvent() - : cv_(&mutex_), signaled_(false) { + public: + AutoResetEvent() : cv_(&mutex_), signaled_(false) {} + /// Wait for signal + void Wait() { + MutexLock lock(&mutex_); + while (!signaled_) { + cv_.Wait(); } - /// Wait for signal - void Wait() { - MutexLock lock(&mutex_); - while (!signaled_) { - cv_.Wait(); - } - signaled_ = false; + signaled_ = false; + } + bool TimeWait(int64_t timeout) { + MutexLock lock(&mutex_); + if (!signaled_) { + cv_.TimeWait(timeout); } - bool TimeWait(int64_t timeout) { - MutexLock lock(&mutex_); - if (!signaled_) { - cv_.TimeWait(timeout); - } - bool ret = signaled_; - signaled_ = false; - return ret; - } - /// Signal one - void Set() { - MutexLock lock(&mutex_); - signaled_ = true; - cv_.Signal(); - } - -private: - Mutex mutex_; - CondVar cv_; - bool signaled_; + bool ret = signaled_; + signaled_ = false; + return ret; + } + /// Signal one + void Set() { + MutexLock lock(&mutex_); + signaled_ = true; + cv_.Signal(); + } + + private: + Mutex mutex_; + CondVar cv_; + bool signaled_; }; class CompletedEvent { -public: - CompletedEvent() - : cv_(&mutex_), cnt_(0), triggered_(false) {} - - CompletedEvent(int64_t task_cnt) - : cv_(&mutex_), cnt_(task_cnt), triggered_(false) {} - - // add event source, - // tasks maybe add while others finished or doing, like a task queue - void AddEventSources(int64_t task_cnt) { - MutexLock lock(&mutex_); - if (!triggered_) { - cnt_ += task_cnt; - } - } + public: + CompletedEvent() : cv_(&mutex_), cnt_(0), triggered_(false) {} - // call after all tasks added to EventSource, - // trigger other thread's Wait() function take effect. - void Trigger() { - MutexLock lock(&mutex_); - triggered_ = true; - if (cnt_ <= 0) { - cv_.Signal(); - } - } + CompletedEvent(int64_t task_cnt) : cv_(&mutex_), cnt_(task_cnt), triggered_(false) {} - // wait until cnt_ == 0 and triggered_ == true - void Wait() { - MutexLock lock(&mutex_); - // cnt_ > 0 - while (cnt_ > 0 || !triggered_) { - cv_.Wait(); - } + // add event source, + // tasks maybe add while others finished or doing, like a task queue + void AddEventSources(int64_t task_cnt) { + MutexLock lock(&mutex_); + if (!triggered_) { + cnt_ += task_cnt; } - - // wait for 'timeout' ms, don't careful cnt_ and triggered_ - // if last event source completed, this will returned early 'timeout' - bool TimeWait(int64_t timeout) { - MutexLock lock(&mutex_); - if (cnt_ > 0 || !triggered_) { - cv_.TimeWait(timeout); - } - return cnt_ > 0 ? false : true; + } + + // call after all tasks added to EventSource, + // trigger other thread's Wait() function take effect. + void Trigger() { + MutexLock lock(&mutex_); + triggered_ = true; + if (cnt_ <= 0) { + cv_.Signal(); } - - // last event source complated and triggered_ == true, will be notify - // Wait or TimeWait - void Complete(int64_t task_cnt = 1) { - MutexLock lock(&mutex_); - cnt_ -= task_cnt; - // use 'triggered_' to make sure all tasks call 'AddEventSources' - if (cnt_ <= 0 && triggered_) { - cv_.Signal(); - } + } + + // wait until cnt_ == 0 and triggered_ == true + void Wait() { + MutexLock lock(&mutex_); + // cnt_ > 0 + while (cnt_ > 0 || !triggered_) { + cv_.Wait(); } - -private: - CompletedEvent(const CompletedEvent&) = delete; - CompletedEvent &operator=(const CompletedEvent&) = delete; - Mutex mutex_; - CondVar cv_; - int64_t cnt_; - bool triggered_; + } + + // wait for 'timeout' ms, don't careful cnt_ and triggered_ + // if last event source completed, this will returned early 'timeout' + bool TimeWait(int64_t timeout) { + MutexLock lock(&mutex_); + if (cnt_ > 0 || !triggered_) { + cv_.TimeWait(timeout); + } + return cnt_ > 0 ? false : true; + } + + // last event source complated and triggered_ == true, will be notify + // Wait or TimeWait + void Complete(int64_t task_cnt = 1) { + MutexLock lock(&mutex_); + cnt_ -= task_cnt; + // use 'triggered_' to make sure all tasks call 'AddEventSources' + if (cnt_ <= 0 && triggered_) { + cv_.Signal(); + } + } + + private: + CompletedEvent(const CompletedEvent &) = delete; + CompletedEvent &operator=(const CompletedEvent &) = delete; + Mutex mutex_; + CondVar cv_; + int64_t cnt_; + bool triggered_; }; -} // namespace common +} // namespace common using common::AutoResetEvent; using common::CompletedEvent; diff --git a/src/common/file/file_flags.cc b/src/common/file/file_flags.cc index a9b73a2bc..a899ba630 100644 --- a/src/common/file/file_flags.cc +++ b/src/common/file/file_flags.cc @@ -4,6 +4,4 @@ #include -DEFINE_int32(file_op_retry_times, 3, - "the max retry times when file operation occurred error"); - +DEFINE_int32(file_op_retry_times, 3, "the max retry times when file operation occurred error"); diff --git a/src/common/file/file_path.cc b/src/common/file/file_path.cc index ea3a8ef08..a4ffd653b 100644 --- a/src/common/file/file_path.cc +++ b/src/common/file/file_path.cc @@ -20,199 +20,185 @@ DECLARE_int32(file_op_retry_times); -void SplitStringPath(const std::string& full_path, - std::string* dir_part, - std::string* file_part) { - std::string::size_type pos = full_path.rfind("/"); - if (pos != std::string::npos) { - if (dir_part) { - *dir_part = full_path.substr(0, pos); - } - if (file_part) { - *file_part = full_path.substr(pos + 1); - } - } else { - if (dir_part) { - *dir_part = full_path; - } +void SplitStringPath(const std::string& full_path, std::string* dir_part, std::string* file_part) { + std::string::size_type pos = full_path.rfind("/"); + if (pos != std::string::npos) { + if (dir_part) { + *dir_part = full_path.substr(0, pos); } -} - -std::string ConcatStringPath(const std::vector& sections, - const std::string& delim) { - std::string file_path; - if (sections.size() == 0) { - return file_path; + if (file_part) { + *file_part = full_path.substr(pos + 1); } - - for (uint32_t i = 0; i < sections.size() - 1; ++i) { - file_path += (sections[i] + delim); + } else { + if (dir_part) { + *dir_part = full_path; } - return file_path + sections[sections.size() - 1]; + } } +std::string ConcatStringPath(const std::vector& sections, const std::string& delim) { + std::string file_path; + if (sections.size() == 0) { + return file_path; + } -std::string GetPathPrefix(const std::string& full_path, - const std::string& delim) { - std::string prefix; - if (full_path.empty()) { - return prefix; - } - size_t idx = full_path.find(delim, 1); - if (idx == std::string::npos) { - return prefix; - } - if (idx + 1 != full_path.length()) { - return full_path.substr(0, idx + 1); - } else { - return full_path; - } + for (uint32_t i = 0; i < sections.size() - 1; ++i) { + file_path += (sections[i] + delim); + } + return file_path + sections[sections.size() - 1]; +} + +std::string GetPathPrefix(const std::string& full_path, const std::string& delim) { + std::string prefix; + if (full_path.empty()) { + return prefix; + } + size_t idx = full_path.find(delim, 1); + if (idx == std::string::npos) { + return prefix; + } + if (idx + 1 != full_path.length()) { + return full_path.substr(0, idx + 1); + } else { + return full_path; + } } bool CreateDirWithRetry(const std::string& dir_path) { - if (dir_path.length() == 0) { - return false; - } - std::vector path_sections; - SplitString(dir_path, "/", &path_sections); - bool is_success = true; - std::string path; - if (dir_path[0] == '/') { - path.append("/"); - } - for (size_t d = 0; d < path_sections.size() && is_success; ++d) { - if (path_sections[d] == ".") { - continue; - } - path += path_sections[d] + "/"; - if (path_sections[d] == "..") { - continue; - } - - if (IsExist(path)) { - continue; - } - - int i = 0; - for (; i < FLAGS_file_op_retry_times; i++) { - if (0 == mkdir(path.c_str(), 0755)) { - break; - } - } - if (i == FLAGS_file_op_retry_times) { - is_success = false; - } - } - return is_success; + if (dir_path.length() == 0) { + return false; + } + std::vector path_sections; + SplitString(dir_path, "/", &path_sections); + bool is_success = true; + std::string path; + if (dir_path[0] == '/') { + path.append("/"); + } + for (size_t d = 0; d < path_sections.size() && is_success; ++d) { + if (path_sections[d] == ".") { + continue; + } + path += path_sections[d] + "/"; + if (path_sections[d] == "..") { + continue; + } + + if (IsExist(path)) { + continue; + } + + int i = 0; + for (; i < FLAGS_file_op_retry_times; i++) { + if (0 == mkdir(path.c_str(), 0755)) { + break; + } + } + if (i == FLAGS_file_op_retry_times) { + is_success = false; + } + } + return is_success; } std::string UidToName(uid_t uid) { - struct passwd *temp = NULL; - if (NULL == (temp=getpwuid(uid))) { - return ""; - } else { - return temp->pw_name; - } + struct passwd* temp = NULL; + if (NULL == (temp = getpwuid(uid))) { + return ""; + } else { + return temp->pw_name; + } } std::string GidToName(gid_t gid) { - struct group *temp = NULL; - if (NULL == (temp = getgrgid(gid))) { - return ""; - } else { - return temp->gr_name; - } + struct group* temp = NULL; + if (NULL == (temp = getgrgid(gid))) { + return ""; + } else { + return temp->gr_name; + } } - -bool ListCurrentDir(const std::string& dir_path, - std::vector* file_list) { - DIR *dir = NULL; - struct dirent *ptr = NULL; - dir = opendir(dir_path.c_str()); - if (dir == NULL) { - closedir(dir); - return false; - } - while ((ptr = readdir(dir)) != NULL) { - /// if (ptr->d_type == DT_REG) { - /// file_list->push_back(ptr->d_name); - /// } - if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { - file_list->push_back(ptr->d_name); - } - } +bool ListCurrentDir(const std::string& dir_path, std::vector* file_list) { + DIR* dir = NULL; + struct dirent* ptr = NULL; + dir = opendir(dir_path.c_str()); + if (dir == NULL) { closedir(dir); - return true; + return false; + } + while ((ptr = readdir(dir)) != NULL) { + /// if (ptr->d_type == DT_REG) { + /// file_list->push_back(ptr->d_name); + /// } + if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { + file_list->push_back(ptr->d_name); + } + } + closedir(dir); + return true; } -bool ListCurrentDirWithStat(const std::string& dir_path, - std::vector* file_list) { - DIR *dir = NULL; - struct dirent *ptr = NULL; - dir = opendir(dir_path.c_str()); - if (dir == NULL) { - return false; - } - bool stat_all_succ = true; - while ((ptr = readdir(dir)) != NULL) { - if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { - struct stat st; - std::string file_name(ptr->d_name); - file_name = dir_path + "/" + file_name; - if (lstat(file_name.c_str(), &st) == 0) { - file_list->push_back(std::make_pair(file_name, st)); - } else { - // break if stat fail and return false later - stat_all_succ = false; - break; - } - } - } - closedir(dir); - return stat_all_succ; +bool ListCurrentDirWithStat(const std::string& dir_path, std::vector* file_list) { + DIR* dir = NULL; + struct dirent* ptr = NULL; + dir = opendir(dir_path.c_str()); + if (dir == NULL) { + return false; + } + bool stat_all_succ = true; + while ((ptr = readdir(dir)) != NULL) { + if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { + struct stat st; + std::string file_name(ptr->d_name); + file_name = dir_path + "/" + file_name; + if (lstat(file_name.c_str(), &st) == 0) { + file_list->push_back(std::make_pair(file_name, st)); + } else { + // break if stat fail and return false later + stat_all_succ = false; + break; + } + } + } + closedir(dir); + return stat_all_succ; } -bool IsExist(const std::string& path) { - return access(path.c_str(), R_OK) == 0; -} +bool IsExist(const std::string& path) { return access(path.c_str(), R_OK) == 0; } bool IsDir(const std::string& path) { - if (!IsExist(path)) { - return false; - } - - struct stat st; - if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFDIR) != 0) { - return true; - } + if (!IsExist(path)) { return false; + } + + struct stat st; + if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFDIR) != 0) { + return true; + } + return false; } bool IsEmpty(const std::string& path) { - std::vector children; - if (!IsDir(path) || - !ListCurrentDir(path, &children) || - children.size() != 0) { - return false; - } - return true; + std::vector children; + if (!IsDir(path) || !ListCurrentDir(path, &children) || children.size() != 0) { + return false; + } + return true; } bool RemoveLocalFile(const std::string& path) { - bool done = false; - for (int32_t i = 0; i < FLAGS_file_op_retry_times && !done; ++i) { - done = (remove(path.c_str()) == 0); - } - return done; + bool done = false; + for (int32_t i = 0; i < FLAGS_file_op_retry_times && !done; ++i) { + done = (remove(path.c_str()) == 0); + } + return done; } - -bool MoveLocalFile(const std::string& src_file, - const std::string& dst_file) { - bool done = false; - for (int32_t i = 0; i < FLAGS_file_op_retry_times && !done; ++i) { - done = (rename(src_file.c_str(), dst_file.c_str()) == 0); - } - return done; +bool MoveLocalFile(const std::string& src_file, const std::string& dst_file) { + bool done = false; + for (int32_t i = 0; i < FLAGS_file_op_retry_times && !done; ++i) { + done = (rename(src_file.c_str(), dst_file.c_str()) == 0); + } + return done; } diff --git a/src/common/file/file_path.h b/src/common/file/file_path.h index d5e04ea99..faaebd459 100644 --- a/src/common/file/file_path.h +++ b/src/common/file/file_path.h @@ -5,21 +5,18 @@ #ifndef TERA_COMMON_FILE_FILE_PATH_H_ #define TERA_COMMON_FILE_FILE_PATH_H_ -#include -#include -#include -#include +#include +#include +#include +#include #include -void SplitStringPath(const std::string& full_path, - std::string* dir_part, - std::string* file_part); +void SplitStringPath(const std::string& full_path, std::string* dir_part, std::string* file_part); std::string ConcatStringPath(const std::vector& sections, const std::string& delim = "."); -std::string GetPathPrefix(const std::string& full_path, - const std::string& delim = "/"); +std::string GetPathPrefix(const std::string& full_path, const std::string& delim = "/"); bool CreateDirWithRetry(const std::string& dir_path); @@ -27,13 +24,11 @@ std::string GidToName(gid_t gid); std::string UidToName(uid_t uid); -bool ListCurrentDir(const std::string& dir_path, - std::vector* file_list); +bool ListCurrentDir(const std::string& dir_path, std::vector* file_list); typedef std::pair FileStateInfo; -bool ListCurrentDirWithStat(const std::string& dir_path, - std::vector* file_list); +bool ListCurrentDirWithStat(const std::string& dir_path, std::vector* file_list); bool IsExist(const std::string& path); @@ -44,7 +39,6 @@ bool IsEmpty(const std::string& path); bool RemoveLocalFile(const std::string& path); -bool MoveLocalFile(const std::string& src_file, - const std::string& dst_file); +bool MoveLocalFile(const std::string& src_file, const std::string& dst_file); -#endif // TERA_COMMON_FILE_FILE_PATH_H_ +#endif // TERA_COMMON_FILE_FILE_PATH_H_ diff --git a/src/common/file/file_stream.cc b/src/common/file/file_stream.cc index 4700f46c4..77bf416ee 100644 --- a/src/common/file/file_stream.cc +++ b/src/common/file/file_stream.cc @@ -16,231 +16,221 @@ DECLARE_int32(file_op_retry_times); -FileStream::FileStream() - : fp_(NULL) {} - -bool FileStream::Open(const std::string& file_path, FileOpenMode flag, - FileErrorCode* error_code) { - std::string open_mode = FileOpenModeToString(flag); - fp_ = fopen(file_path.c_str(), open_mode.c_str()); - if (fp_ == NULL) { - SetErrorCode(error_code, kFileErrOpenFail); - return false; - } - - SetErrorCode(error_code, kFileSuccess); - return true; +FileStream::FileStream() : fp_(NULL) {} + +bool FileStream::Open(const std::string& file_path, FileOpenMode flag, FileErrorCode* error_code) { + std::string open_mode = FileOpenModeToString(flag); + fp_ = fopen(file_path.c_str(), open_mode.c_str()); + if (fp_ == NULL) { + SetErrorCode(error_code, kFileErrOpenFail); + return false; + } + + SetErrorCode(error_code, kFileSuccess); + return true; } bool FileStream::Close(FileErrorCode* error_code) { - if (fp_ == NULL) { - SetErrorCode(error_code, kFileErrNotOpen); - return false; - } - - Flush(); - - if (fclose(fp_) != 0) { - LOG(ERROR) << "fail to close file, errno: " << strerror(errno); - SetErrorCode(error_code, kFileErrClose); - return false; - } - return true; + if (fp_ == NULL) { + SetErrorCode(error_code, kFileErrNotOpen); + return false; + } + + Flush(); + + if (fclose(fp_) != 0) { + LOG(ERROR) << "fail to close file, errno: " << strerror(errno); + SetErrorCode(error_code, kFileErrClose); + return false; + } + return true; } -int64_t FileStream::Read(void* buffer, int64_t buffer_size, - FileErrorCode* error_code) { - if (fp_ == NULL) { - SetErrorCode(error_code, kFileErrNotOpen); - return -1; - } - if (!buffer || buffer_size <= 0) { - SetErrorCode(error_code, kFileErrParameter); - return -1; - } - - int64_t read_bytes = fread(buffer, 1, static_cast(buffer_size), fp_); - bool success = true; - if (read_bytes != buffer_size) { - if (ferror(fp_)) success = false; - } - if (success) { - SetErrorCode(error_code, kFileSuccess); - } else { - LOG(ERROR) << "error occurred in reader, errono:" << strerror(errno); - SetErrorCode(error_code, kFileErrRead); - } - return success ? read_bytes : -1; +int64_t FileStream::Read(void* buffer, int64_t buffer_size, FileErrorCode* error_code) { + if (fp_ == NULL) { + SetErrorCode(error_code, kFileErrNotOpen); + return -1; + } + if (!buffer || buffer_size <= 0) { + SetErrorCode(error_code, kFileErrParameter); + return -1; + } + + int64_t read_bytes = fread(buffer, 1, static_cast(buffer_size), fp_); + bool success = true; + if (read_bytes != buffer_size) { + if (ferror(fp_)) success = false; + } + if (success) { + SetErrorCode(error_code, kFileSuccess); + } else { + LOG(ERROR) << "error occurred in reader, errono:" << strerror(errno); + SetErrorCode(error_code, kFileErrRead); + } + return success ? read_bytes : -1; } -int64_t FileStream::Write(const void* buffer, int64_t buffer_size, - FileErrorCode* error_code) { - if (fp_ == NULL) { - SetErrorCode(error_code, kFileErrNotOpen); - return -1; - } - if (!buffer || buffer_size <= 0) { - SetErrorCode(error_code, kFileErrParameter); - return -1; - } - - int64_t total_size = 0; - const char* byte_buf = static_cast(buffer); - for (int32_t retry = 0; retry < FLAGS_file_op_retry_times; ++retry) { - size_t expect_size = static_cast(buffer_size - total_size); - if (expect_size == 0u) break; - - size_t ret_size = fwrite(byte_buf + total_size, 1, expect_size, fp_); - total_size += static_cast(ret_size); - - if (ret_size < expect_size) { - LOG(ERROR) << "write down enough bytes and fail, [" - << "buffur_size = " << buffer_size - << ", writen down total size = " << total_size - << ", expect_size = " << expect_size - << ", this writen size = " << ret_size - << "], reason: " << strerror(errno); - CHECK(ferror(fp_)) << "file writer is broken"; - if (errno != EINTR && ret_size == 0u) break; - } - } - if (total_size == buffer_size) { - SetErrorCode(error_code, kFileSuccess); - } else { - LOG(ERROR) << "error occurred in writter, errono:" << strerror(errno); - SetErrorCode(error_code, kFileErrWrite); - } - return total_size > 0 ? total_size : -1; +int64_t FileStream::Write(const void* buffer, int64_t buffer_size, FileErrorCode* error_code) { + if (fp_ == NULL) { + SetErrorCode(error_code, kFileErrNotOpen); + return -1; + } + if (!buffer || buffer_size <= 0) { + SetErrorCode(error_code, kFileErrParameter); + return -1; + } + + int64_t total_size = 0; + const char* byte_buf = static_cast(buffer); + for (int32_t retry = 0; retry < FLAGS_file_op_retry_times; ++retry) { + size_t expect_size = static_cast(buffer_size - total_size); + if (expect_size == 0u) break; + + size_t ret_size = fwrite(byte_buf + total_size, 1, expect_size, fp_); + total_size += static_cast(ret_size); + + if (ret_size < expect_size) { + LOG(ERROR) << "write down enough bytes and fail, [" + << "buffur_size = " << buffer_size << ", writen down total size = " << total_size + << ", expect_size = " << expect_size << ", this writen size = " << ret_size + << "], reason: " << strerror(errno); + CHECK(ferror(fp_)) << "file writer is broken"; + if (errno != EINTR && ret_size == 0u) break; + } + } + if (total_size == buffer_size) { + SetErrorCode(error_code, kFileSuccess); + } else { + LOG(ERROR) << "error occurred in writter, errono:" << strerror(errno); + SetErrorCode(error_code, kFileErrWrite); + } + return total_size > 0 ? total_size : -1; } bool FileStream::Flush() { - if (fp_ == NULL) { - return false; - } - return (fflush(fp_) == 0) && (fsync(fileno(fp_)) == 0); + if (fp_ == NULL) { + return false; + } + return (fflush(fp_) == 0) && (fsync(fileno(fp_)) == 0); } int64_t FileStream::Seek(int64_t offset, int32_t origin, FileErrorCode* error_code) { - if (fp_ == NULL) { - return -1; - } - if (fseeko(fp_, offset, origin) < 0) { -// SetErrorCode(error_code, errono); - return -1; - } - return Tell(error_code); + if (fp_ == NULL) { + return -1; + } + if (fseeko(fp_, offset, origin) < 0) { + // SetErrorCode(error_code, errono); + return -1; + } + return Tell(error_code); } int64_t FileStream::Tell(FileErrorCode* error_code) { - if (fp_ == NULL) { - return -1; - } - int64_t ret = ftello(fp_); - if (ret < 0) { - // set error code - } else { - // set error code - } - return ret; + if (fp_ == NULL) { + return -1; + } + int64_t ret = ftello(fp_); + if (ret < 0) { + // set error code + } else { + // set error code + } + return ret; } -int64_t FileStream::GetSize(const std::string& file_path, - FileErrorCode* error_code) { - int64_t file_size = -1; - int32_t file_exist = access(file_path.c_str(), F_OK); - if (file_exist != 0) { - SetErrorCode(error_code, kFileErrNotExit); - if (error_code == NULL) { - LOG(ERROR) << "file " << file_path << " not exists"; - } - return file_size; +int64_t FileStream::GetSize(const std::string& file_path, FileErrorCode* error_code) { + int64_t file_size = -1; + int32_t file_exist = access(file_path.c_str(), F_OK); + if (file_exist != 0) { + SetErrorCode(error_code, kFileErrNotExit); + if (error_code == NULL) { + LOG(ERROR) << "file " << file_path << " not exists"; } + return file_size; + } - struct stat stat_buf; - if (stat(file_path.c_str(), &stat_buf) < 0) { - file_size = -1; -// SetErrorCode(error_code, errno); - LOG(ERROR) << "stat error for " << file_path; - return file_size; - } + struct stat stat_buf; + if (stat(file_path.c_str(), &stat_buf) < 0) { + file_size = -1; + // SetErrorCode(error_code, errno); + LOG(ERROR) << "stat error for " << file_path; + return file_size; + } - if (S_ISDIR(stat_buf.st_mode)) { - file_size = -1; -// SetErrorCode(error_code, errno); - LOG(ERROR) << "input file name " << file_path - << " is a directory"; - return file_size; - } else { - SetErrorCode(error_code, kFileSuccess); - file_size = static_cast(stat_buf.st_size); - } + if (S_ISDIR(stat_buf.st_mode)) { + file_size = -1; + // SetErrorCode(error_code, errno); + LOG(ERROR) << "input file name " << file_path << " is a directory"; return file_size; + } else { + SetErrorCode(error_code, kFileSuccess); + file_size = static_cast(stat_buf.st_size); + } + return file_size; } int32_t FileStream::ReadLine(void* buffer, int32_t max_size) { - if (fp_ == NULL) { - return -1; - } - - if (!buffer || max_size <= 0) return -1; - char* read_buffer = static_cast(buffer); - off64_t org_offest = ftello(fp_); - if (org_offest < 0) - return -1; - char* readed_buffer = fgets(read_buffer, max_size, fp_); - if (readed_buffer == NULL) { - if (feof(fp_)) { - return 0; - } else { - return -1; - } + if (fp_ == NULL) { + return -1; + } + + if (!buffer || max_size <= 0) return -1; + char* read_buffer = static_cast(buffer); + off64_t org_offest = ftello(fp_); + if (org_offest < 0) return -1; + char* readed_buffer = fgets(read_buffer, max_size, fp_); + if (readed_buffer == NULL) { + if (feof(fp_)) { + return 0; } else { - off64_t new_offset = ftello(fp_); - if (new_offset < 0) - return -1; - return new_offset - org_offest; + return -1; } + } else { + off64_t new_offset = ftello(fp_); + if (new_offset < 0) return -1; + return new_offset - org_offest; + } } int32_t FileStream::ReadLine(std::string* result) { - result->resize(0); - - while (true) { - const int32_t kBufferSize = 4 * 1024; - std::string buffer(kBufferSize, 0); - int32_t bytes = ReadLine(StringAsArray(&buffer), kBufferSize); - - if (bytes < 0) { - result->resize(0); - return bytes; - } - if (bytes == 0) { - return result->size(); - } - if (bytes > 0) { - buffer.resize(bytes); - result->append(buffer); - if (StringEndsWith(*result, "\n")) { - return result->size(); - } - } + result->resize(0); + + while (true) { + const int32_t kBufferSize = 4 * 1024; + std::string buffer(kBufferSize, 0); + int32_t bytes = ReadLine(StringAsArray(&buffer), kBufferSize); + + if (bytes < 0) { + result->resize(0); + return bytes; + } + if (bytes == 0) { + return result->size(); + } + if (bytes > 0) { + buffer.resize(bytes); + result->append(buffer); + if (StringEndsWith(*result, "\n")) { + return result->size(); + } } + } } void FileStream::SetErrorCode(FileErrorCode* error_code, FileErrorCode code) { - if (error_code) { - *error_code = code; - } + if (error_code) { + *error_code = code; + } } std::string FileStream::FileOpenModeToString(uint32_t flag) { - std::string mode; - if ((flag & FILE_READ) == FILE_READ) { - mode += "r"; - } else if ((flag & FILE_WRITE) == FILE_WRITE) { - mode += "w"; - } else if ((flag & FILE_APPEND) == FILE_APPEND) { - mode += "a"; - } - return mode; + std::string mode; + if ((flag & FILE_READ) == FILE_READ) { + mode += "r"; + } else if ((flag & FILE_WRITE) == FILE_WRITE) { + mode += "w"; + } else if ((flag & FILE_APPEND) == FILE_APPEND) { + mode += "a"; + } + return mode; } diff --git a/src/common/file/file_stream.h b/src/common/file/file_stream.h index 13f1c8be3..3f2e85396 100644 --- a/src/common/file/file_stream.h +++ b/src/common/file/file_stream.h @@ -11,40 +11,34 @@ #include "common/file/file_types.h" class FileStream { -public: - FileStream(); - ~FileStream() {} + public: + FileStream(); + ~FileStream() {} - bool Open(const std::string& file_path, - FileOpenMode flag, - FileErrorCode* error_code = NULL); - bool Close(FileErrorCode* error_code = NULL); + bool Open(const std::string& file_path, FileOpenMode flag, FileErrorCode* error_code = NULL); + bool Close(FileErrorCode* error_code = NULL); - int64_t Write(const void* buffer, int64_t buffer_size, - FileErrorCode* error_code = NULL); + int64_t Write(const void* buffer, int64_t buffer_size, FileErrorCode* error_code = NULL); - int64_t Read(void* buffer, int64_t buffer_size, - FileErrorCode* error_code = NULL); + int64_t Read(void* buffer, int64_t buffer_size, FileErrorCode* error_code = NULL); - bool Flush(); + bool Flush(); - int64_t Seek(int64_t offset, int32_t origin, - FileErrorCode* error_code = NULL); + int64_t Seek(int64_t offset, int32_t origin, FileErrorCode* error_code = NULL); - int64_t Tell(FileErrorCode* error_code = NULL); + int64_t Tell(FileErrorCode* error_code = NULL); - int64_t GetSize(const std::string& file_path, - FileErrorCode* error_code = NULL); + int64_t GetSize(const std::string& file_path, FileErrorCode* error_code = NULL); - int32_t ReadLine(void* buffer, int32_t max_size); - int32_t ReadLine(std::string* result); + int32_t ReadLine(void* buffer, int32_t max_size); + int32_t ReadLine(std::string* result); -private: - void SetErrorCode(FileErrorCode* error_code, FileErrorCode code); - std::string FileOpenModeToString(uint32_t flag); + private: + void SetErrorCode(FileErrorCode* error_code, FileErrorCode code); + std::string FileOpenModeToString(uint32_t flag); -private: - FILE* fp_; + private: + FILE* fp_; }; -#endif // TERA_COMMON_FILE_FILE_STREAM_H_ +#endif // TERA_COMMON_FILE_FILE_STREAM_H_ diff --git a/src/common/file/file_types.h b/src/common/file/file_types.h index 53cdca875..ceaa808da 100644 --- a/src/common/file/file_types.h +++ b/src/common/file/file_types.h @@ -7,21 +7,17 @@ #include -enum FileOpenMode { - FILE_READ = 0x01, - FILE_WRITE = 0x02, - FILE_APPEND = 0x04 -}; +enum FileOpenMode { FILE_READ = 0x01, FILE_WRITE = 0x02, FILE_APPEND = 0x04 }; enum FileErrorCode { - kFileSuccess, - kFileErrParameter, - kFileErrOpenFail, - kFileErrNotOpen, - kFileErrWrite, - kFileErrRead, - kFileErrClose, - kFileErrNotExit + kFileSuccess, + kFileErrParameter, + kFileErrOpenFail, + kFileErrNotOpen, + kFileErrWrite, + kFileErrRead, + kFileErrClose, + kFileErrNotExit }; -#endif // TERA_COMMON_FILE_FILE_DEF_H_ +#endif // TERA_COMMON_FILE_FILE_DEF_H_ diff --git a/src/common/file/recordio/record_io.cc b/src/common/file/recordio/record_io.cc index 8b793eb6d..192f5664b 100644 --- a/src/common/file/recordio/record_io.cc +++ b/src/common/file/recordio/record_io.cc @@ -6,177 +6,171 @@ #include -RecordWriter::RecordWriter(): file_(NULL) {} +RecordWriter::RecordWriter() : file_(NULL) {} RecordWriter::~RecordWriter() {} bool RecordWriter::Reset(FileStream *file) { - DCHECK(file != NULL); - file_ = file; - return true; + DCHECK(file != NULL); + file_ = file; + return true; } -bool RecordWriter::WriteMessage(const ::google::protobuf::Message& message) { - std::string output; - if (!message.IsInitialized()) { - LOG(WARNING) << "Missing required fields." - << message.InitializationErrorString(); - return false; - } - if (!message.AppendToString(&output)) { - return false; - } - if (!WriteRecord(output.data(), output.size())) { - return false; - } - return true; +bool RecordWriter::WriteMessage(const ::google::protobuf::Message &message) { + std::string output; + if (!message.IsInitialized()) { + LOG(WARNING) << "Missing required fields." << message.InitializationErrorString(); + return false; + } + if (!message.AppendToString(&output)) { + return false; + } + if (!WriteRecord(output.data(), output.size())) { + return false; + } + return true; } bool RecordWriter::WriteRecord(const char *data, uint32_t size) { - if (!Write(reinterpret_cast(&size), sizeof(size))) { - return false; - } - if (!Write(data, size)) { - return false; - } - return true; + if (!Write(reinterpret_cast(&size), sizeof(size))) { + return false; + } + if (!Write(data, size)) { + return false; + } + return true; } -bool RecordWriter::WriteRecord(const std::string& data) { - return WriteRecord(data.data(), data.size()); +bool RecordWriter::WriteRecord(const std::string &data) { + return WriteRecord(data.data(), data.size()); } bool RecordWriter::Write(const char *data, uint32_t size) { - uint32_t write_size = 0; - while (write_size < size) { - int32_t ret = file_->Write(data + write_size, size - write_size); - if (ret == -1) { - LOG(ERROR) << "RecordWriter error."; - return false; - } - write_size += ret; + uint32_t write_size = 0; + while (write_size < size) { + int32_t ret = file_->Write(data + write_size, size - write_size); + if (ret == -1) { + LOG(ERROR) << "RecordWriter error."; + return false; } - file_->Flush(); + write_size += ret; + } + file_->Flush(); - return true; + return true; } - RecordReader::RecordReader() - : file_(NULL), - file_size_(0), - buffer_size_(1 * 1024 * 1024), - data_size_(0) { - buffer_.reset(new char[buffer_size_]); + : file_(NULL), file_size_(0), buffer_size_(1 * 1024 * 1024), data_size_(0) { + buffer_.reset(new char[buffer_size_]); } RecordReader::~RecordReader() {} bool RecordReader::Reset(FileStream *file) { - DCHECK(file != NULL); - file_ = file; - if (-1 == file_->Seek(0, SEEK_END)) { - LOG(ERROR) << "RecordReader Reset error."; - return false; - } - file_size_ = file_->Tell(); - if (-1 == file_->Seek(0, SEEK_SET)) { - LOG(ERROR) << "RecordReader Reset error."; - return false; - } - return true; + DCHECK(file != NULL); + file_ = file; + if (-1 == file_->Seek(0, SEEK_END)) { + LOG(ERROR) << "RecordReader Reset error."; + return false; + } + file_size_ = file_->Tell(); + if (-1 == file_->Seek(0, SEEK_SET)) { + LOG(ERROR) << "RecordReader Reset error."; + return false; + } + return true; } int RecordReader::Next() { - // read size - int64_t ret = file_->Tell(); - if (ret == -1) { - LOG(ERROR) << "Tell error."; - return -1; + // read size + int64_t ret = file_->Tell(); + if (ret == -1) { + LOG(ERROR) << "Tell error."; + return -1; + } + + if (ret == file_size_) { + return 0; + } else if (file_size_ - ret >= static_cast(sizeof(data_size_))) { // NO_LINT + if (!Read(reinterpret_cast(&data_size_), sizeof(data_size_))) { + LOG(ERROR) << "Read size error."; + return -1; } - - if (ret == file_size_) { - return 0; - } else if (file_size_ - ret >= static_cast(sizeof(data_size_))) { // NO_LINT - if (!Read(reinterpret_cast(&data_size_), sizeof(data_size_))) { - LOG(ERROR) << "Read size error."; - return -1; - } - } - - // read data - ret = file_->Tell(); - if (ret == -1) { - LOG(ERROR) << "Tell error."; - return -1; + } + + // read data + ret = file_->Tell(); + if (ret == -1) { + LOG(ERROR) << "Tell error."; + return -1; + } + + if (ret >= file_size_ && data_size_ != 0) { + LOG(ERROR) << "read error."; + return -1; + } else if (file_size_ - ret >= data_size_) { // NO_LINT + if (data_size_ > buffer_size_) { + while (data_size_ > buffer_size_) { + buffer_size_ *= 2; + } + buffer_.reset(new char[buffer_size_]); } - if (ret >= file_size_ && data_size_ != 0) { - LOG(ERROR) << "read error."; - return -1; - } else if (file_size_ - ret >= data_size_) { // NO_LINT - if (data_size_ > buffer_size_) { - while (data_size_ > buffer_size_) { - buffer_size_ *= 2; - } - buffer_.reset(new char[buffer_size_]); - } - - if (!Read(buffer_.get(), data_size_)) { - LOG(ERROR) << "Read data error."; - return -1; - } - } else { - LOG(ERROR) << "data_size_ of current record is invalid: " - << data_size_ << " bigger than " - << (file_size_ - ret); - return -1; + if (!Read(buffer_.get(), data_size_)) { + LOG(ERROR) << "Read data error."; + return -1; } + } else { + LOG(ERROR) << "data_size_ of current record is invalid: " << data_size_ << " bigger than " + << (file_size_ - ret); + return -1; + } - return 1; + return 1; } bool RecordReader::ReadMessage(::google::protobuf::Message *message) { - std::string str(buffer_.get(), data_size_); - if (!message->ParseFromArray(buffer_.get(), data_size_)) { - LOG(WARNING) << "Missing required fields."; - return false; - } - return true; + std::string str(buffer_.get(), data_size_); + if (!message->ParseFromArray(buffer_.get(), data_size_)) { + LOG(WARNING) << "Missing required fields."; + return false; + } + return true; } bool RecordReader::ReadNextMessage(::google::protobuf::Message *message) { - while (Next() == 1) { - std::string str(buffer_.get(), data_size_); - if (message->ParseFromArray(buffer_.get(), data_size_)) { - return true; - } + while (Next() == 1) { + std::string str(buffer_.get(), data_size_); + if (message->ParseFromArray(buffer_.get(), data_size_)) { + return true; } - return false; + } + return false; } bool RecordReader::ReadRecord(const char **data, uint32_t *size) { - *data = buffer_.get(); - *size = data_size_; - return true; + *data = buffer_.get(); + *size = data_size_; + return true; } bool RecordReader::ReadRecord(std::string *data) { - data->assign(buffer_.get()); - return true; + data->assign(buffer_.get()); + return true; } bool RecordReader::Read(char *data, uint32_t size) { - // Read - uint32_t read_size = 0; - while (read_size < size) { - int64_t ret = file_->Read(data + read_size, size - read_size); - if (ret == -1) { - LOG(ERROR) << "Read error."; - return false; - } - read_size += ret; + // Read + uint32_t read_size = 0; + while (read_size < size) { + int64_t ret = file_->Read(data + read_size, size - read_size); + if (ret == -1) { + LOG(ERROR) << "Read error."; + return false; } + read_size += ret; + } - return true; + return true; } diff --git a/src/common/file/recordio/record_io.h b/src/common/file/recordio/record_io.h index b30efbffb..f212ccea2 100644 --- a/src/common/file/recordio/record_io.h +++ b/src/common/file/recordio/record_io.h @@ -12,48 +12,48 @@ #include "common/file/file_stream.h" class RecordWriter { -public: - RecordWriter(); - ~RecordWriter(); + public: + RecordWriter(); + ~RecordWriter(); - bool Reset(FileStream *file); - bool WriteMessage(const ::google::protobuf::Message& message); - bool WriteRecord(const char *data, uint32_t size); - bool WriteRecord(const std::string& data); + bool Reset(FileStream *file); + bool WriteMessage(const ::google::protobuf::Message &message); + bool WriteRecord(const char *data, uint32_t size); + bool WriteRecord(const std::string &data); -private: - bool Write(const char *data, uint32_t size); + private: + bool Write(const char *data, uint32_t size); -private: - FileStream* file_; + private: + FileStream *file_; }; class RecordReader { -public: - RecordReader(); - ~RecordReader(); - - bool Reset(FileStream *file); - - // for ok, return 1; - // for no more data, return 0; - // for error, return -1; - int Next(); - - bool ReadMessage(::google::protobuf::Message *message); - bool ReadNextMessage(::google::protobuf::Message *message); - bool ReadRecord(const char **data, uint32_t *size); - bool ReadRecord(std::string *data); - -private: - bool Read(char *data, uint32_t size); - -private: - FileStream* file_; - scoped_array buffer_; - uint32_t file_size_; - uint32_t buffer_size_; - uint32_t data_size_; + public: + RecordReader(); + ~RecordReader(); + + bool Reset(FileStream *file); + + // for ok, return 1; + // for no more data, return 0; + // for error, return -1; + int Next(); + + bool ReadMessage(::google::protobuf::Message *message); + bool ReadNextMessage(::google::protobuf::Message *message); + bool ReadRecord(const char **data, uint32_t *size); + bool ReadRecord(std::string *data); + + private: + bool Read(char *data, uint32_t size); + + private: + FileStream *file_; + scoped_array buffer_; + uint32_t file_size_; + uint32_t buffer_size_; + uint32_t data_size_; }; -#endif // TERA_COMMON_FILE_RECORDIO_RECORD_IO_H_ +#endif // TERA_COMMON_FILE_RECORDIO_RECORD_IO_H_ diff --git a/src/common/func_scope_guard.h b/src/common/func_scope_guard.h new file mode 100644 index 000000000..01fabb25b --- /dev/null +++ b/src/common/func_scope_guard.h @@ -0,0 +1,33 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include + +namespace common { + +class FuncScopeGuard { + public: + explicit FuncScopeGuard(std::function on_exit_scope) + : on_exit_scope_(on_exit_scope), dismissed_(false) {} + + FuncScopeGuard(FuncScopeGuard const&) = delete; + FuncScopeGuard& operator=(const FuncScopeGuard&) = delete; + + virtual ~FuncScopeGuard() { + if (!dismissed_) { + on_exit_scope_(); + } + } + + void Dismiss() { dismissed_ = true; } + + private: + std::function on_exit_scope_; + bool dismissed_; +}; +} + +using common::FuncScopeGuard; diff --git a/src/common/heap_profiler.cc b/src/common/heap_profiler.cc index dab5654f4..628bc0318 100644 --- a/src/common/heap_profiler.cc +++ b/src/common/heap_profiler.cc @@ -9,83 +9,80 @@ #include "common/heap_profiler.h" -DEFINE_int64(heap_profile_allocation_interval, 1073741824, "Env variable for heap profiler's allocation interval"); -DEFINE_int64(heap_profile_inuse_interval, 1073741824, "Env variable for heap profiler's inuse interval"); - +DEFINE_int64(heap_profile_allocation_interval, 1073741824, + "Env variable for heap profiler's allocation interval"); +DEFINE_int64(heap_profile_inuse_interval, 1073741824, + "Env variable for heap profiler's inuse interval"); namespace tera { -HeapProfiler::HeapProfiler(const std::string& profiler_file): - exit_(false), - profiler_file_(profiler_file), - thread_(&HeapProfiler::run, this) {} +HeapProfiler::HeapProfiler(const std::string& profiler_file) + : exit_(false), profiler_file_(profiler_file), thread_(&HeapProfiler::run, this) {} HeapProfiler::~HeapProfiler() { - exit_ = true; - cv_.notify_one(); - thread_.join(); - if (IsHeapProfilerRunning()) { - HeapProfilerStop(); - } + exit_ = true; + cv_.notify_one(); + thread_.join(); + if (IsHeapProfilerRunning()) { + HeapProfilerStop(); + } } void HeapProfiler::run() { - while (!exit_.load()) { - bool enable; - { - std::unique_lock lock(lock_); - enable = enable_; - } - if (enable) { - // "reason" is time - std::time_t t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - char ts[128]; - ctime_r(&t, ts); - ts[strlen(ts) - 1] = '\0'; // erase \n - - if (IsHeapProfilerRunning() == 0) { - HeapProfilerStart(profiler_file_.c_str()); - } - HeapProfilerDump(ts); - LOG(INFO) << "[Heap Profiler] Heap Profiler Dumped"; - } else { - if (IsHeapProfilerRunning()) { - HeapProfilerStop(); - } - } - std::unique_lock lock(lock_); - cv_.wait_for(lock, interval_); + while (!exit_.load()) { + bool enable; + { + std::unique_lock lock(lock_); + enable = enable_; + } + if (enable) { + // "reason" is time + std::time_t t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + char ts[128]; + ctime_r(&t, ts); + ts[strlen(ts) - 1] = '\0'; // erase \n + + if (IsHeapProfilerRunning() == 0) { + HeapProfilerStart(profiler_file_.c_str()); + } + HeapProfilerDump(ts); + LOG(INFO) << "[Heap Profiler] Heap Profiler Dumped"; + } else { + if (IsHeapProfilerRunning()) { + HeapProfilerStop(); + } } + std::unique_lock lock(lock_); + cv_.wait_for(lock, interval_); + } } HeapProfiler& HeapProfiler::SetEnable(bool enable) { - if (enable) { - setenv("HEAP_PROFILE_ALLOCATION_INTERVAL", - std::to_string(FLAGS_heap_profile_allocation_interval).c_str(), - 1); + if (enable) { + setenv("HEAP_PROFILE_ALLOCATION_INTERVAL", + std::to_string(FLAGS_heap_profile_allocation_interval).c_str(), 1); - setenv("HEAP_PROFILE_INUSE_INTERVAL", - std::to_string(FLAGS_heap_profile_inuse_interval).c_str(), - 1); + setenv("HEAP_PROFILE_INUSE_INTERVAL", std::to_string(FLAGS_heap_profile_inuse_interval).c_str(), + 1); - LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_ALLOCATION_INTERVAL: " - << getenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); - LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_INUSE_INTERVAL: " - << getenv("HEAP_PROFILE_INUSE_INTERVAL"); - LOG(INFO) << "[Heap Profiler] Heap Profiler Enabled"; - } else { - unsetenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); - unsetenv("HEAP_PROFILE_INUSE_INTERVAL"); - LOG(INFO) << "[Heap Profiler] Heap Profiler Disabled"; - } + LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_ALLOCATION_INTERVAL: " + << getenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); + LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_INUSE_INTERVAL: " + << getenv("HEAP_PROFILE_INUSE_INTERVAL"); + LOG(INFO) << "[Heap Profiler] Heap Profiler Enabled"; + } else { + unsetenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); + unsetenv("HEAP_PROFILE_INUSE_INTERVAL"); + LOG(INFO) << "[Heap Profiler] Heap Profiler Disabled"; + } - { - std::unique_lock lock(lock_); - enable_ = enable; - } + { + std::unique_lock lock(lock_); + enable_ = enable; + } - cv_.notify_one(); - return *this; + cv_.notify_one(); + return *this; } -} // namespace tera \ No newline at end of file +} // namespace tera \ No newline at end of file diff --git a/src/common/heap_profiler.h b/src/common/heap_profiler.h index 9f4d0af48..d74968fa0 100644 --- a/src/common/heap_profiler.h +++ b/src/common/heap_profiler.h @@ -18,44 +18,44 @@ namespace tera { class HeapProfiler { -public: - - /** - * @brief Init HeapProfiler and the detect thread will start - **/ - explicit HeapProfiler(const std::string& profiler_file="HEAP"); - /** - * @brief: the heap profiler will stop after descontrucor called - * - **/ - ~HeapProfiler(); - HeapProfiler& SetEnable(bool enable); - - HeapProfiler& SetInterval(int second) { - { - std::unique_lock lock(lock_); - interval_ = std::chrono::seconds(second); - } - - cv_.notify_one(); - return *this; + public: + /** + * @brief Init HeapProfiler and the detect thread will start + **/ + explicit HeapProfiler(const std::string& profiler_file = "HEAP"); + /** + * @brief: the heap profiler will stop after descontrucor called + * + **/ + ~HeapProfiler(); + HeapProfiler& SetEnable(bool enable); + + HeapProfiler& SetInterval(int second) { + { + std::unique_lock lock(lock_); + interval_ = std::chrono::seconds(second); } -private: - void run(); -private: - std::atomic exit_; - bool enable_{false}; - std::chrono::seconds interval_{10}; - //Never Changed, So we can use profiler_file_.c_str() in safe. - const std::string profiler_file_; - std::mutex lock_; - std::condition_variable cv_; - std::thread thread_; + cv_.notify_one(); + return *this; + } + + private: + void run(); + + private: + std::atomic exit_; + bool enable_{false}; + std::chrono::seconds interval_{10}; + // Never Changed, So we can use profiler_file_.c_str() in safe. + const std::string profiler_file_; + std::mutex lock_; + std::condition_variable cv_; + std::thread thread_; }; -} // namespace tera +} // namespace tera -#endif //TERA_HEAP_PROFILER +#endif // TERA_HEAP_PROFILER /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/common/log/log_cleaner.cc b/src/common/log/log_cleaner.cc index 6b5474a1d..8180fed81 100644 --- a/src/common/log/log_cleaner.cc +++ b/src/common/log/log_cleaner.cc @@ -1,7 +1,7 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - + #include "common/log/log_cleaner.h" #include @@ -20,62 +20,55 @@ DECLARE_string(tera_leveldb_log_path); DECLARE_int64(tera_info_log_clean_period_second); DECLARE_int64(tera_info_log_expire_second); DECLARE_string(ins_log_file); - + namespace common { -static const int64_t kMinCleanPeriodMs = 1000; // 1s -static const int64_t kMinInfoLogExpireSec = 1; // 1s +static const int64_t kMinCleanPeriodMs = 1000; // 1s +static const int64_t kMinInfoLogExpireSec = 1; // 1s static const size_t kPathMaxLen = 64; Mutex LogCleaner::inst_init_mutex_; LogCleaner* LogCleaner::singleton_instance_ = NULL; static std::string GetProcFdPath() { - char path_buf[kPathMaxLen]; - snprintf(path_buf, kPathMaxLen, "/proc/%d/fd", getpid()); - return std::string(path_buf); + char path_buf[kPathMaxLen]; + snprintf(path_buf, kPathMaxLen, "/proc/%d/fd", getpid()); + return std::string(path_buf); } static std::string GetFileNameFromPath(const std::string& path) { - std::string::size_type pos = path.rfind("/"); - if (pos == std::string::npos) { - return path; - } else { - return path.substr(pos + 1); - } + std::string::size_type pos = path.rfind("/"); + if (pos == std::string::npos) { + return path; + } else { + return path.substr(pos + 1); + } } - -LogCleaner* LogCleaner::GetInstance(ThreadPool *thread_pool) { - if (singleton_instance_ == NULL) { - singleton_instance_ = new LogCleaner(FLAGS_log_dir, - FLAGS_tera_info_log_clean_period_second, - FLAGS_tera_info_log_expire_second, - thread_pool); - singleton_instance_->AddPrefix(FLAGS_tera_log_prefix); - singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_tera_leveldb_log_path)); - singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_ins_log_file)); - } - return singleton_instance_; +LogCleaner* LogCleaner::GetInstance(ThreadPool* thread_pool) { + if (singleton_instance_ == NULL) { + singleton_instance_ = new LogCleaner(FLAGS_log_dir, FLAGS_tera_info_log_clean_period_second, + FLAGS_tera_info_log_expire_second, thread_pool); + singleton_instance_->AddPrefix(FLAGS_tera_log_prefix); + singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_tera_leveldb_log_path)); + singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_ins_log_file)); + } + return singleton_instance_; } -bool LogCleaner::StartCleaner(ThreadPool *thread_pool) { - return GetInstance()->Start(); -} +bool LogCleaner::StartCleaner(ThreadPool* thread_pool) { return GetInstance()->Start(); } void LogCleaner::StopCleaner() { - MutexLock l(&inst_init_mutex_, "Destroy log cleaner"); - if (singleton_instance_ != NULL) { - singleton_instance_->Stop(); - delete singleton_instance_; - singleton_instance_ = NULL; - } + MutexLock l(&inst_init_mutex_, "Destroy log cleaner"); + if (singleton_instance_ != NULL) { + singleton_instance_->Stop(); + delete singleton_instance_; + singleton_instance_ = NULL; + } } - -LogCleaner::LogCleaner(const std::string& log_dir, - int64_t period_second, - int64_t expire_second, - ThreadPool *thread_pool) + +LogCleaner::LogCleaner(const std::string& log_dir, int64_t period_second, int64_t expire_second, + ThreadPool* thread_pool) : thread_pool_(thread_pool), thread_pool_own_(false), mutex_(), @@ -87,236 +80,226 @@ LogCleaner::LogCleaner(const std::string& log_dir, bg_exit_(false), bg_cond_(&mutex_), bg_func_(std::bind(&LogCleaner::CleanTaskWrap, this)), - bg_task_id_(-1), + bg_task_id_(-1), proc_fd_path_(GetProcFdPath()) {} - -LogCleaner::~LogCleaner() { - DestroyOwnThreadPool(); -} -static bool CheckDirPath(const std::string &dir_path) { - return !dir_path.empty() && IsDir(dir_path); +LogCleaner::~LogCleaner() { DestroyOwnThreadPool(); } + +static bool CheckDirPath(const std::string& dir_path) { + return !dir_path.empty() && IsDir(dir_path); } bool LogCleaner::CheckOptions() const { - return CheckDirPath(info_log_dir_) && - info_log_clean_period_ms_ > 0 && - info_log_expire_sec_ > 0; + return CheckDirPath(info_log_dir_) && info_log_clean_period_ms_ > 0 && info_log_expire_sec_ > 0; } bool LogCleaner::Start() { - if (!CheckOptions()) { - return false; - } - - MutexLock l(&mutex_, "Start info log cleaner"); - - // double check - if (IsRunning()) { - return true; - } - - stop_ = false; - bg_exit_ = false; - if (nullptr == thread_pool_) { - NewThreadPool(); - } + if (!CheckOptions()) { + return false; + } - if (bg_task_id_ <= 0) { - // start immediately - bg_task_id_ = thread_pool_->DelayTask(0, bg_func_); - } + MutexLock l(&mutex_, "Start info log cleaner"); + + // double check + if (IsRunning()) { return true; + } + + stop_ = false; + bg_exit_ = false; + if (nullptr == thread_pool_) { + NewThreadPool(); + } + + if (bg_task_id_ <= 0) { + // start immediately + bg_task_id_ = thread_pool_->DelayTask(0, bg_func_); + } + return true; } void LogCleaner::Stop() { - MutexLock l(&mutex_, "Stop info log cleaner"); - stop_ = true; - bool is_running = false; - if (bg_task_id_ > 0) { - bg_exit_ = thread_pool_->CancelTask(bg_task_id_, true, &is_running); - } else { - bg_exit_ = true; - } - - CHECK(is_running || bg_exit_); - while(!bg_exit_) { - bg_cond_.Wait(); - } - bg_task_id_ = -1; + MutexLock l(&mutex_, "Stop info log cleaner"); + stop_ = true; + bool is_running = false; + if (bg_task_id_ > 0) { + bg_exit_ = thread_pool_->CancelTask(bg_task_id_, true, &is_running); + } else { + bg_exit_ = true; + } + + CHECK(is_running || bg_exit_); + while (!bg_exit_) { + bg_cond_.Wait(); + } + bg_task_id_ = -1; } void LogCleaner::CleanTaskWrap() { - MutexLock l(&mutex_); - DoCleanLocalLogs(); - if (stop_) { - bg_task_id_ = -1; - bg_exit_ = true; - } else { - bg_task_id_ = thread_pool_->DelayTask(info_log_clean_period_ms_, bg_func_); - } - bg_cond_.Signal(); + MutexLock l(&mutex_); + DoCleanLocalLogs(); + if (stop_) { + bg_task_id_ = -1; + bg_exit_ = true; + } else { + bg_task_id_ = thread_pool_->DelayTask(info_log_clean_period_ms_, bg_func_); + } + bg_cond_.Signal(); } bool LogCleaner::CheckLogPrefix(const std::string& filename) const { - std::set::const_iterator prefix_iter = log_prefix_list_.begin(); - for (; prefix_iter != log_prefix_list_.end(); ++prefix_iter) { - const std::string& prefix = *prefix_iter; - if (filename.size() < prefix.size()) { - // do not need to compare - continue; - } - - if (strncmp(prefix.c_str(), filename.c_str(), prefix.size()) == 0) { - // return true if match any prefix - return true; - } + std::set::const_iterator prefix_iter = log_prefix_list_.begin(); + for (; prefix_iter != log_prefix_list_.end(); ++prefix_iter) { + const std::string& prefix = *prefix_iter; + if (filename.size() < prefix.size()) { + // do not need to compare + continue; } - return false; + + if (strncmp(prefix.c_str(), filename.c_str(), prefix.size()) == 0) { + // return true if match any prefix + return true; + } + } + return false; } bool LogCleaner::DoCleanLocalLogs() { - if (log_prefix_list_.empty()) { - LOG(WARNING) << "[LogCleaner] Log prefix is not set yet."; - return false; - } - if (!CheckDirPath(info_log_dir_) || IsEmpty(info_log_dir_)) { - LOG(WARNING) << "[LogCleaner] Log dir " << info_log_dir_ << " not exsit logs."; - return false; + if (log_prefix_list_.empty()) { + LOG(WARNING) << "[LogCleaner] Log prefix is not set yet."; + return false; + } + if (!CheckDirPath(info_log_dir_) || IsEmpty(info_log_dir_)) { + LOG(WARNING) << "[LogCleaner] Log dir " << info_log_dir_ << " not exsit logs."; + return false; + } + int64_t now_time = tera::get_millis() / 1000; + int64_t clean_time = now_time - info_log_expire_sec_; + LOG(INFO) << "[LogCleaner] Start clean log dir: " << info_log_dir_ << ", now_time = " << now_time + << ", clean_time = " << clean_time; + + long path_maxlen = pathconf(info_log_dir_.c_str(), _PC_PATH_MAX); + std::vector log_file_list; + if (!ListCurrentDir(info_log_dir_, &log_file_list)) { + // list failed + LOG(WARNING) << "[LogCleaner] List log dir " << info_log_dir_ << " failed. Cancel clean."; + return false; + } + + // reserved_set: filenames that should not to be clean + std::set reserved_set; + if (!GetCurrentOpendLogs(&reserved_set)) { + LOG(WARNING) << "[LogCleaner] GetCurrentOpendLogs failed. Cancel clean."; + return false; + } + + std::vector::const_iterator it = log_file_list.begin(); + for (; it != log_file_list.end(); ++it) { + if (reserved_set.find(*it) != reserved_set.end()) { + // already reserved + continue; } - int64_t now_time = tera::get_millis() / 1000; - int64_t clean_time = now_time - info_log_expire_sec_; - LOG(INFO) << "[LogCleaner] Start clean log dir: " << info_log_dir_ - << ", now_time = " << now_time - << ", clean_time = " << clean_time; - - long path_maxlen = pathconf(info_log_dir_.c_str(), _PC_PATH_MAX); - std::vector log_file_list; - if (!ListCurrentDir(info_log_dir_, &log_file_list)) { - // list failed - LOG(WARNING) << "[LogCleaner] List log dir " << info_log_dir_ - << " failed. Cancel clean."; - return false; + + const std::string& file_name = *it; + + // check if filename start with log_prefix_ + // if leveldb_log_prefix_ is not empty, check also + if (!CheckLogPrefix(file_name)) { + VLOG(16) << "[LogCleaner] Reserve log file: " << file_name << ", which not match prefix."; + reserved_set.insert(file_name); + continue; } - // reserved_set: filenames that should not to be clean - std::set reserved_set; - if (!GetCurrentOpendLogs(&reserved_set)) { - LOG(WARNING) << "[LogCleaner] GetCurrentOpendLogs failed. Cancel clean."; - return false; + // get file stat + std::string file_path = info_log_dir_ + "/" + file_name; + struct stat file_st; + if (lstat(file_path.c_str(), &file_st) != 0) { + // cancel clean if any file stat failed + LOG(WARNING) << "[LogCleaner] Stat log file: " << file_path << " fail. Cancel log clean."; + return false; } - std::vector::const_iterator it = log_file_list.begin(); - for (; it != log_file_list.end(); ++it) { - if (reserved_set.find(*it) != reserved_set.end()) { - // already reserved - continue; - } - - const std::string& file_name = *it; - - // check if filename start with log_prefix_ - // if leveldb_log_prefix_ is not empty, check also - if (!CheckLogPrefix(file_name)) { - VLOG(16) << "[LogCleaner] Reserve log file: " << file_name - << ", which not match prefix."; - reserved_set.insert(file_name); - continue; - } - - // get file stat - std::string file_path = info_log_dir_ + "/" + file_name; - struct stat file_st; - if (lstat(file_path.c_str(), &file_st) != 0) { - // cancel clean if any file stat failed - LOG(WARNING) << "[LogCleaner] Stat log file: " << file_path << " fail. Cancel log clean."; - return false; - } - - if (S_ISLNK(file_st.st_mode)) { - // handle symbolic link - VLOG(16) << "[LogCleaner] Reserve symbolic link log: " << file_name; - reserved_set.insert(file_name); - char path_buf[path_maxlen]; - int ret = readlink(file_path.c_str(), path_buf, path_maxlen); - if (ret < 0 || ret >= path_maxlen) { - continue; - } else { - // reserve link target - path_buf[ret] = '\0'; - std::string target_filename = GetFileNameFromPath(path_buf); - VLOG(16) << "[LogCleaner] Reserve link target: " << target_filename - << " for link: " << file_path; - reserved_set.insert(target_filename); - } - } else if (!S_ISREG(file_st.st_mode)) { - VLOG(16) << "[LogCleaner] Reserve not regular file: " << file_name; - reserved_set.insert(file_name); - } else if (file_st.st_mtime >= clean_time) { - VLOG(16) << "[LogCleaner] Reserve not expire log: " << file_name - << ", mtime: " << file_st.st_mtime << ", clean_time: " << clean_time; - reserved_set.insert(file_name); - } - VLOG(16) << "stat filename: " << file_name - << ", is_symbolic_link: " << S_ISLNK(file_st.st_mode) - << ", is_dir: " << S_ISDIR(file_st.st_mode) - << ", is_regular_file: " << S_ISREG(file_st.st_mode) - << ", last mod time: " << file_st.st_mtime - << ", link number: " << file_st.st_nlink - << ", reserve: " << (reserved_set.find(file_name) != reserved_set.end()); + if (S_ISLNK(file_st.st_mode)) { + // handle symbolic link + VLOG(16) << "[LogCleaner] Reserve symbolic link log: " << file_name; + reserved_set.insert(file_name); + char path_buf[path_maxlen]; + int ret = readlink(file_path.c_str(), path_buf, path_maxlen); + if (ret < 0 || ret >= path_maxlen) { + continue; + } else { + // reserve link target + path_buf[ret] = '\0'; + std::string target_filename = GetFileNameFromPath(path_buf); + VLOG(16) << "[LogCleaner] Reserve link target: " << target_filename + << " for link: " << file_path; + reserved_set.insert(target_filename); + } + } else if (!S_ISREG(file_st.st_mode)) { + VLOG(16) << "[LogCleaner] Reserve not regular file: " << file_name; + reserved_set.insert(file_name); + } else if (file_st.st_mtime >= clean_time) { + VLOG(16) << "[LogCleaner] Reserve not expire log: " << file_name + << ", mtime: " << file_st.st_mtime << ", clean_time: " << clean_time; + reserved_set.insert(file_name); } + VLOG(16) << "stat filename: " << file_name << ", is_symbolic_link: " << S_ISLNK(file_st.st_mode) + << ", is_dir: " << S_ISDIR(file_st.st_mode) + << ", is_regular_file: " << S_ISREG(file_st.st_mode) + << ", last mod time: " << file_st.st_mtime << ", link number: " << file_st.st_nlink + << ", reserve: " << (reserved_set.find(file_name) != reserved_set.end()); + } - // clean log - size_t clean_cnt = 0; - it = log_file_list.begin(); - for (; it != log_file_list.end(); ++it) { - const std::string &file_name = *it; - std::string file_path = info_log_dir_ + "/" + file_name; - if (reserved_set.find(file_name) == reserved_set.end()) { - LOG(INFO) << "[LogCleaner] log: " << file_path << " will be clean"; - if (!RemoveLocalFile(file_path)){ - LOG(WARNING) << "[LogCleaner] log clean fail: " << file_path; - } else { - ++clean_cnt; - } - } + // clean log + size_t clean_cnt = 0; + it = log_file_list.begin(); + for (; it != log_file_list.end(); ++it) { + const std::string& file_name = *it; + std::string file_path = info_log_dir_ + "/" + file_name; + if (reserved_set.find(file_name) == reserved_set.end()) { + LOG(INFO) << "[LogCleaner] log: " << file_path << " will be clean"; + if (!RemoveLocalFile(file_path)) { + LOG(WARNING) << "[LogCleaner] log clean fail: " << file_path; + } else { + ++clean_cnt; + } } - LOG(INFO) << "[LogCleaner] Found log: " << log_file_list.size() - << ", clean: " << clean_cnt; - return true; + } + LOG(INFO) << "[LogCleaner] Found log: " << log_file_list.size() << ", clean: " << clean_cnt; + return true; } bool LogCleaner::GetCurrentOpendLogs(std::set* opend_logs) { - long path_maxlen = pathconf(proc_fd_path_.c_str(), _PC_PATH_MAX); - if (path_maxlen < 0) { - LOG(ERROR) << "[LogCleaner] Get Path Max Len Failed"; - return false; - } - std::vector opend_logs_list; - VLOG(16) << "[LogCleaner] Search fd_path: " << proc_fd_path_; - if (!ListCurrentDirWithStat(proc_fd_path_, &opend_logs_list)) { - VLOG(16) << "[LogCleaner] list fd_path: " << proc_fd_path_ << " failed."; - return false; - } + long path_maxlen = pathconf(proc_fd_path_.c_str(), _PC_PATH_MAX); + if (path_maxlen < 0) { + LOG(ERROR) << "[LogCleaner] Get Path Max Len Failed"; + return false; + } + std::vector opend_logs_list; + VLOG(16) << "[LogCleaner] Search fd_path: " << proc_fd_path_; + if (!ListCurrentDirWithStat(proc_fd_path_, &opend_logs_list)) { + VLOG(16) << "[LogCleaner] list fd_path: " << proc_fd_path_ << " failed."; + return false; + } - std::vector::const_iterator it = opend_logs_list.begin(); - for (; it != opend_logs_list.end(); ++it) { - const std::string& filename = it->first; - const struct stat& st = it->second; - if (S_ISLNK(st.st_mode)) { - char path_buf[path_maxlen]; - int ret = readlink(filename.c_str(), path_buf, path_maxlen); - if (ret > 0 && ret < path_maxlen && path_buf[0] == '/') { - path_buf[ret] = '\0'; - std::string target_filename = GetFileNameFromPath(path_buf); - VLOG(16) << "[LogCleaner] Reserve log in use: " << target_filename; - opend_logs->insert(target_filename); - } - } + std::vector::const_iterator it = opend_logs_list.begin(); + for (; it != opend_logs_list.end(); ++it) { + const std::string& filename = it->first; + const struct stat& st = it->second; + if (S_ISLNK(st.st_mode)) { + char path_buf[path_maxlen]; + int ret = readlink(filename.c_str(), path_buf, path_maxlen); + if (ret > 0 && ret < path_maxlen && path_buf[0] == '/') { + path_buf[ret] = '\0'; + std::string target_filename = GetFileNameFromPath(path_buf); + VLOG(16) << "[LogCleaner] Reserve log in use: " << target_filename; + opend_logs->insert(target_filename); + } } - return true; + } + return true; } - -} // end namespace common - + +} // end namespace common + /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/log/log_cleaner.h b/src/common/log/log_cleaner.h index 53830a733..e13e294d5 100644 --- a/src/common/log/log_cleaner.h +++ b/src/common/log/log_cleaner.h @@ -1,7 +1,7 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - + #ifndef TERA_COMMON_LOG_CLEANER_H_ #define TERA_COMMON_LOG_CLEANER_H_ @@ -13,102 +13,99 @@ #include "common/thread_pool.h" namespace common { - + class LogCleaner { -private: - // set private since singleton - LogCleaner(const std::string& log_dir, - int64_t period_second, - int64_t expire_second, - ThreadPool* thread_pool); - ~LogCleaner(); - // disallow copy - LogCleaner(const LogCleaner& other) = delete; - LogCleaner & operator = (const LogCleaner& other) = delete; - -public: - bool CheckOptions() const; - bool Start(); - void Stop(); - bool IsRunning() const { return bg_task_id_ > 0; } - - bool AddPrefix(const std::string& prefix) { - if (prefix.empty()) { - // empty prefix is not allowed - return false; - } else { - MutexLock l(&mutex_); - log_prefix_list_.insert(prefix); - return true; - } - } - - void RemovePrefix(const std::string& prefix) { - MutexLock l(&mutex_); - log_prefix_list_.erase(prefix); - } - -private: - // singleton - static Mutex inst_init_mutex_; - static LogCleaner* singleton_instance_; - - // get singleton instance but not start - // for unittest - static LogCleaner* GetInstance(ThreadPool *thread_pool = NULL); - -public: - static bool StartCleaner(ThreadPool *thread_pool = NULL); - static void StopCleaner(); - -private: - // do under lock - void NewThreadPool() { - if (NULL == thread_pool_) { - thread_pool_ = new ThreadPool(1); - thread_pool_own_ = true; - } + private: + // set private since singleton + LogCleaner(const std::string& log_dir, int64_t period_second, int64_t expire_second, + ThreadPool* thread_pool); + ~LogCleaner(); + // disallow copy + LogCleaner(const LogCleaner& other) = delete; + LogCleaner& operator=(const LogCleaner& other) = delete; + + public: + bool CheckOptions() const; + bool Start(); + void Stop(); + bool IsRunning() const { return bg_task_id_ > 0; } + + bool AddPrefix(const std::string& prefix) { + if (prefix.empty()) { + // empty prefix is not allowed + return false; + } else { + MutexLock l(&mutex_); + log_prefix_list_.insert(prefix); + return true; } - void DestroyOwnThreadPool() { - if (thread_pool_own_ && NULL != thread_pool_) { - thread_pool_->Stop(true); - delete thread_pool_; - thread_pool_ = NULL; - thread_pool_own_ = false; - } + } + + void RemovePrefix(const std::string& prefix) { + MutexLock l(&mutex_); + log_prefix_list_.erase(prefix); + } + + private: + // singleton + static Mutex inst_init_mutex_; + static LogCleaner* singleton_instance_; + + // get singleton instance but not start + // for unittest + static LogCleaner* GetInstance(ThreadPool* thread_pool = NULL); + + public: + static bool StartCleaner(ThreadPool* thread_pool = NULL); + static void StopCleaner(); + + private: + // do under lock + void NewThreadPool() { + if (NULL == thread_pool_) { + thread_pool_ = new ThreadPool(1); + thread_pool_own_ = true; } + } + void DestroyOwnThreadPool() { + if (thread_pool_own_ && NULL != thread_pool_) { + thread_pool_->Stop(true); + delete thread_pool_; + thread_pool_ = NULL; + thread_pool_own_ = false; + } + } + + void CleanTaskWrap(); - void CleanTaskWrap(); + bool CheckLogPrefix(const std::string& filename) const; - bool CheckLogPrefix(const std::string& filename) const; + bool DoCleanLocalLogs(); - bool DoCleanLocalLogs(); + bool GetCurrentOpendLogs(std::set* opend_logs); - bool GetCurrentOpendLogs(std::set* opend_logs); + private: + ThreadPool* thread_pool_; + bool thread_pool_own_; + mutable Mutex mutex_; -private: - ThreadPool* thread_pool_; - bool thread_pool_own_; - mutable Mutex mutex_; + // options + std::string info_log_dir_; + std::set log_prefix_list_; + int64_t info_log_clean_period_ms_; // milli second + int64_t info_log_expire_sec_; // second - // options - std::string info_log_dir_; - std::set log_prefix_list_; - int64_t info_log_clean_period_ms_; // milli second - int64_t info_log_expire_sec_; // second + bool stop_; + bool bg_exit_; + CondVar bg_cond_; + const ThreadPool::Task bg_func_; + int64_t bg_task_id_; - bool stop_; - bool bg_exit_; - CondVar bg_cond_; - const ThreadPool::Task bg_func_; - int64_t bg_task_id_; - - std::string proc_fd_path_; + std::string proc_fd_path_; }; - -} // end namespace common - -#endif // TERA_COMMON_LOG_CLEANER_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace common + +#endif // TERA_COMMON_LOG_CLEANER_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/cache_collector.h b/src/common/metric/cache_collector.h index ae415b0d8..0e01863f9 100644 --- a/src/common/metric/cache_collector.h +++ b/src/common/metric/cache_collector.h @@ -4,105 +4,110 @@ #ifndef TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ #define TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ - -#include + +#include #include - -#include "common/metric/collector_report_publisher.h" + +#include "common/metric/collector_report_publisher.h" #include "common/metric/collector.h" #include "db/table_cache.h" -#include "leveldb/cache.h" - -namespace tera { - +#include "leveldb/cache.h" + +namespace tera { + enum class CacheCollectType { - kHitRate, - kEntries, - kCharge, + kHitRate, + kEntries, + kCharge, }; class BaseCacheCollector : public Collector { -public: - explicit BaseCacheCollector(CacheCollectType cache_type) : cache_type_(cache_type) {} - virtual ~BaseCacheCollector() {} - - virtual int64_t Collect() { - switch (cache_type_) { - case CacheCollectType::kHitRate: - return HitRate(); - case CacheCollectType::kEntries: - return Entries(); - case CacheCollectType::kCharge: - return TotalCharge(); - default: - return 0; - } + public: + explicit BaseCacheCollector(CacheCollectType cache_type) : cache_type_(cache_type) {} + virtual ~BaseCacheCollector() {} + + virtual int64_t Collect() { + switch (cache_type_) { + case CacheCollectType::kHitRate: + return HitRate(); + case CacheCollectType::kEntries: + return Entries(); + case CacheCollectType::kCharge: + return TotalCharge(); + default: + return 0; } - -protected: - virtual int64_t HitRate() = 0; - virtual int64_t Entries() = 0; - virtual int64_t TotalCharge() = 0; - -protected: - CacheCollectType cache_type_; + } + + protected: + virtual int64_t HitRate() = 0; + virtual int64_t Entries() = 0; + virtual int64_t TotalCharge() = 0; + + protected: + CacheCollectType cache_type_; }; class LRUCacheCollector : public BaseCacheCollector { -public: - LRUCacheCollector(leveldb::Cache* cache, - CacheCollectType cache_type): - BaseCacheCollector(cache_type), - cache_(cache) {} - - virtual ~LRUCacheCollector() {} - -protected: - int64_t HitRate() override { - if (cache_ == NULL) { - return 0; - } - - double hit_rate = cache_->HitRate(true); - return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + public: + LRUCacheCollector(leveldb::Cache* cache, CacheCollectType cache_type) + : BaseCacheCollector(cache_type), cache_(cache) {} + + virtual ~LRUCacheCollector() {} + + protected: + int64_t HitRate() override { + if (cache_ == NULL) { + return 0; } - - int64_t Entries() override { return cache_ == NULL ? 0 : static_cast(cache_->Entries()); } - - int64_t TotalCharge() override { return cache_ == NULL ? 0 : static_cast(cache_->TotalCharge()); } -private: - leveldb::Cache* cache_; + + double hit_rate = cache_->HitRate(true); + return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + } + + int64_t Entries() override { + return cache_ == NULL ? 0 : static_cast(cache_->Entries()); + } + + int64_t TotalCharge() override { + return cache_ == NULL ? 0 : static_cast(cache_->TotalCharge()); + } + + private: + leveldb::Cache* cache_; }; class TableCacheCollector : public BaseCacheCollector { -public: - TableCacheCollector(leveldb::TableCache* cache, - CacheCollectType cache_type): - BaseCacheCollector(cache_type), - cache_(cache) {} - - virtual ~TableCacheCollector() {} - -protected: - int64_t HitRate() override { - if (cache_ == NULL) { - return 0; - } - - double hit_rate = cache_->HitRate(true); - return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + public: + TableCacheCollector(leveldb::TableCache* cache, CacheCollectType cache_type) + : BaseCacheCollector(cache_type), cache_(cache) {} + + virtual ~TableCacheCollector() {} + + protected: + int64_t HitRate() override { + if (cache_ == NULL) { + return 0; } - - int64_t Entries() override { return cache_ == NULL ? 0 : static_cast(cache_->TableEntries()); } - - int64_t TotalCharge() override { return cache_ == NULL ? 0 : static_cast(cache_->ByteSize()); } -private: - leveldb::TableCache* cache_; -}; - -} // end namespace tera - -#endif // TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + double hit_rate = cache_->HitRate(true); + return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + } + + int64_t Entries() override { + return cache_ == NULL ? 0 : static_cast(cache_->TableEntries()); + } + + int64_t TotalCharge() override { + return cache_ == NULL ? 0 : static_cast(cache_->ByteSize()); + } + + private: + leveldb::TableCache* cache_; +}; + +} // end namespace tera + +#endif // TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/collector.h b/src/common/metric/collector.h index 0b31bb446..53a5045a1 100644 --- a/src/common/metric/collector.h +++ b/src/common/metric/collector.h @@ -5,11 +5,11 @@ #include #include -namespace tera{ +namespace tera { class Collector { -public: - virtual ~Collector() {} - // return a instant value of the metric for tera to dump log and other usage - virtual int64_t Collect() = 0; + public: + virtual ~Collector() {} + // return a instant value of the metric for tera to dump log and other usage + virtual int64_t Collect() = 0; }; } diff --git a/src/common/metric/collector_report.h b/src/common/metric/collector_report.h index 8c453dcaa..cebbd981b 100644 --- a/src/common/metric/collector_report.h +++ b/src/common/metric/collector_report.h @@ -3,47 +3,46 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include -#include -#include - +#include +#include + #include "common/metric/metric_id.h" -#include "common/mutex.h" +#include "common/mutex.h" #include "common/metric/collector.h" #include "common/metric/subscriber.h" - -namespace tera { - + +namespace tera { + using CollectorReportMap = std::unordered_map; struct CollectorReport { - int64_t timestamp_ms; // timestamp of the report - int64_t interval_ms; // time interval since last report - - // metric_id to metric snapshot - CollectorReport() : timestamp_ms(get_millis()) {} - - // find methods, return 0 if not found - int64_t FindMetricValue(const MetricId& metric_id) const { - auto iter = report.find(metric_id); - return iter == report.end() ? 0 : iter->second; - }; - - int64_t FindMetricValue(const std::string& metric_name) const { - return FindMetricValue(MetricId(metric_name)); - } - - int64_t FindMetricValue(const std::string& metric_name, const std::string& label_str) const { - MetricId metric_id; - if (!MetricId::ParseFromString(metric_name, label_str, &metric_id)) { - return 0; - } else { - return FindMetricValue(metric_id); - } + int64_t timestamp_ms; // timestamp of the report + int64_t interval_ms; // time interval since last report + + // metric_id to metric snapshot + CollectorReport() : timestamp_ms(get_millis()) {} + + // find methods, return 0 if not found + int64_t FindMetricValue(const MetricId& metric_id) const { + auto iter = report.find(metric_id); + return iter == report.end() ? 0 : iter->second; + }; + + int64_t FindMetricValue(const std::string& metric_name) const { + return FindMetricValue(MetricId(metric_name)); + } + + int64_t FindMetricValue(const std::string& metric_name, const std::string& label_str) const { + MetricId metric_id; + if (!MetricId::ParseFromString(metric_name, label_str, &metric_id)) { + return 0; + } else { + return FindMetricValue(metric_id); } + } - CollectorReportMap report; -}; -} // end namespace tera + CollectorReportMap report; +}; +} // end namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ - diff --git a/src/common/metric/collector_report_publisher.cc b/src/common/metric/collector_report_publisher.cc index 620cc4107..8f11ba036 100644 --- a/src/common/metric/collector_report_publisher.cc +++ b/src/common/metric/collector_report_publisher.cc @@ -1,8 +1,8 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. -#include "common/metric/collector_report_publisher.h" +#include "common/metric/collector_report_publisher.h" #include "glog/logging.h" @@ -10,141 +10,133 @@ #include "common/timer.h" #include "common/metric/collector.h" #include "common/metric/prometheus_subscriber.h" - + namespace tera { CollectorReportPublisher& CollectorReportPublisher::GetInstance() { - static CollectorReportPublisher instance; - return instance; + static CollectorReportPublisher instance; + return instance; } -CollectorReportPublisher::CollectorReportPublisher(): - last_report_timestamp_(get_millis()), - last_collector_report_(new CollectorReport) { - AddHardwareCollectors(); +CollectorReportPublisher::CollectorReportPublisher() + : last_report_timestamp_(get_millis()), last_collector_report_(new CollectorReport) { + AddHardwareCollectors(); } CollectorReportPublisher::~CollectorReportPublisher() {} std::shared_ptr CollectorReportPublisher::GetSubscriberReport() { - std::lock_guard lock(mutex_); - - std::shared_ptr new_report(new SubscriberReport()); - int64_t start_ts = get_millis(); - // do collect - for (auto& subscriber_pair : subscribers_) { - const MetricId& metric_id = subscriber_pair.first; - new_report->insert(std::make_pair(metric_id, subscriber_pair.second->Collect())); - } - - int64_t end_ts = get_millis(); - VLOG(12) << "[Metric] Get Subscriber Summary Cost: " << (end_ts - start_ts) << " ms."; - return new_report; + std::lock_guard lock(mutex_); + + std::shared_ptr new_report(new SubscriberReport()); + int64_t start_ts = get_millis(); + // do collect + for (auto& subscriber_pair : subscribers_) { + const MetricId& metric_id = subscriber_pair.first; + new_report->insert(std::make_pair(metric_id, subscriber_pair.second->Collect())); + } + + int64_t end_ts = get_millis(); + VLOG(12) << "[Metric] Get Subscriber Summary Cost: " << (end_ts - start_ts) << " ms."; + return new_report; } std::shared_ptr CollectorReportPublisher::GetCollectorReport() { - std::lock_guard lock(mutex_); - return last_collector_report_; + std::lock_guard lock(mutex_); + return last_collector_report_; } void CollectorReportPublisher::Refresh() { - std::lock_guard lock(mutex_); - - std::shared_ptr new_report(new CollectorReport()); - int64_t start_ts = new_report->timestamp_ms; - new_report->interval_ms = new_report->timestamp_ms - last_report_timestamp_; - - // do collect - for (auto& metric_pair : collectors_) { - const MetricId& metric_id = metric_pair.first; - int64_t value = metric_pair.second->Collect(); - new_report->report[metric_id] = value; - } - - last_report_timestamp_ = start_ts; - int64_t end_ts = get_millis(); - VLOG(12) << "[Metric] Refresh Collectors Cost: " << (end_ts - start_ts) << " ms."; - last_collector_report_ = new_report; - NotifySubscribers(); + std::lock_guard lock(mutex_); + + std::shared_ptr new_report(new CollectorReport()); + int64_t start_ts = new_report->timestamp_ms; + new_report->interval_ms = new_report->timestamp_ms - last_report_timestamp_; + + // do collect + for (auto& metric_pair : collectors_) { + const MetricId& metric_id = metric_pair.first; + int64_t value = metric_pair.second->Collect(); + new_report->report[metric_id] = value; + } + + last_report_timestamp_ = start_ts; + int64_t end_ts = get_millis(); + VLOG(12) << "[Metric] Refresh Collectors Cost: " << (end_ts - start_ts) << " ms."; + last_collector_report_ = new_report; + NotifySubscribers(); } -bool CollectorReportPublisher::AddCollector(const MetricId& metric_id, - std::unique_ptr&& metric_collector, - SubscriberTypeList type_list) { - if (!metric_id.IsValid() || !metric_collector) { - return false; - } - - std::lock_guard lock(mutex_); - auto insert_ret = collectors_.insert(std::make_pair(metric_id, std::move(metric_collector))); - if (!insert_ret.second) { - return false; - } - - for (auto type : type_list) { - if (!AddSubscriber(std::unique_ptr(new PrometheusSubscriber(metric_id, type)))) { - LOG(ERROR) << "[METRIC] Add Subscriber For " << metric_id.ToString() << " Failed!"; - } +bool CollectorReportPublisher::AddCollector(const MetricId& metric_id, + std::unique_ptr&& metric_collector, + SubscriberTypeList type_list) { + if (!metric_id.IsValid() || !metric_collector) { + return false; + } + + std::lock_guard lock(mutex_); + auto insert_ret = collectors_.insert(std::make_pair(metric_id, std::move(metric_collector))); + if (!insert_ret.second) { + return false; + } + + for (auto type : type_list) { + if (!AddSubscriber(std::unique_ptr(new PrometheusSubscriber(metric_id, type)))) { + LOG(ERROR) << "[METRIC] Add Subscriber For " << metric_id.ToString() << " Failed!"; } + } - return true; + return true; } -bool CollectorReportPublisher::AddSubscriber(std::unique_ptr&& prometheus_subscriber_ptr) { - if (!prometheus_subscriber_ptr || - !prometheus_subscriber_ptr->GetMetricId().IsValid()) { - // invalid arguments - return false; - } +bool CollectorReportPublisher::AddSubscriber( + std::unique_ptr&& prometheus_subscriber_ptr) { + if (!prometheus_subscriber_ptr || !prometheus_subscriber_ptr->GetMetricId().IsValid()) { + // invalid arguments + return false; + } - std::lock_guard lock(mutex_); - subscribers_.insert(std::make_pair(prometheus_subscriber_ptr->GetMetricId(), - std::move(prometheus_subscriber_ptr))); + std::lock_guard lock(mutex_); + subscribers_.insert(std::make_pair(prometheus_subscriber_ptr->GetMetricId(), + std::move(prometheus_subscriber_ptr))); - return true; + return true; } void CollectorReportPublisher::NotifySubscribers() { - std::lock_guard lock(mutex_); - for (auto& subscriber_pair : subscribers_) { - subscriber_pair.second->OnUpdate(last_collector_report_); - } + std::lock_guard lock(mutex_); + for (auto& subscriber_pair : subscribers_) { + subscriber_pair.second->OnUpdate(last_collector_report_); + } } bool CollectorReportPublisher::HasCollector(const MetricId& metric_id) const { - std::lock_guard lock(mutex_); - return collectors_.find(metric_id) != collectors_.end(); + std::lock_guard lock(mutex_); + return collectors_.find(metric_id) != collectors_.end(); } bool CollectorReportPublisher::DeleteCollector(const MetricId& metric_id) { - std::lock_guard lock(mutex_); - DeleteSubscriber(metric_id); - return collectors_.erase(metric_id) > 0; + std::lock_guard lock(mutex_); + DeleteSubscriber(metric_id); + return collectors_.erase(metric_id) > 0; } bool CollectorReportPublisher::DeleteSubscriber(const MetricId& metric_id) { - std::lock_guard lock(mutex_); - return subscribers_.erase(metric_id) > 0; + std::lock_guard lock(mutex_); + return subscribers_.erase(metric_id) > 0; } -void CollectorReportPublisher::DeleteSubscribers() { - subscribers_.clear(); -} +void CollectorReportPublisher::DeleteSubscribers() { subscribers_.clear(); } void CollectorReportPublisher::AddHardwareCollectors() { - // register hardware metrics - AddCollector(MetricId(kInstCpuMetricName), std::unique_ptr(new CpuUsageCollector())); - AddCollector(MetricId(kInstMemMetricName), std::unique_ptr(new MemUsageCollector())); - - AddCollector(MetricId(kInstNetRXMetricName), - std::unique_ptr(new NetUsageCollector(RECEIVE)), - {SubscriberType::MAX}); - - AddCollector(MetricId(kInstNetTXMetricName), - std::unique_ptr(new NetUsageCollector(TRANSMIT)), - {SubscriberType::MAX}); + // register hardware metrics + AddCollector(MetricId(kInstCpuMetricName), std::unique_ptr(new CpuUsageCollector())); + AddCollector(MetricId(kInstMemMetricName), std::unique_ptr(new MemUsageCollector())); + AddCollector(MetricId(kInstNetRXMetricName), + std::unique_ptr(new NetUsageCollector(RECEIVE)), {SubscriberType::MAX}); + AddCollector(MetricId(kInstNetTXMetricName), + std::unique_ptr(new NetUsageCollector(TRANSMIT)), {SubscriberType::MAX}); } -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/collector_report_publisher.h b/src/common/metric/collector_report_publisher.h index 1290f2000..9ea547037 100644 --- a/src/common/metric/collector_report_publisher.h +++ b/src/common/metric/collector_report_publisher.h @@ -13,150 +13,143 @@ #include #include #include - -#include "common/metric/metric_id.h" + +#include "common/metric/metric_id.h" #include "common/metric/collector_report.h" #include "common/metric/collector.h" #include "common/metric/subscriber.h" namespace tera { -// Base class for metric value collector +// Base class for metric value collector using SubscriberTypeList = std::initializer_list; class CollectorReportPublisher { -private: - // set private for singleton - CollectorReportPublisher(); - ~CollectorReportPublisher(); - - // disallow copy - CollectorReportPublisher(const CollectorReportPublisher&) = delete; - CollectorReportPublisher& operator = (const CollectorReportPublisher&) = delete; - -public: - static CollectorReportPublisher& GetInstance(); - - void Refresh(); - /// report the instant values of collectors - std::shared_ptr GetCollectorReport(); - std::shared_ptr GetSubscriberReport(); - - /// Add a collector with a given metric_id - /// collector should be a right value reference of std::unique_ptr - /// return true if register success, - /// retrun false if argument is invalid or metric_id name has been registered already. - bool AddCollector(const MetricId& metric_id, - std::unique_ptr&& metric_collector, - SubscriberTypeList type_list = {SubscriberType::LATEST}); - - - /// weather a collector has been Added - bool HasCollector(const MetricId& metric_id) const; - /// Delete a collector - bool DeleteCollector(const MetricId& metric_id); - - - /// Add a subscriber to a given metricId. - /// Different type of subscribers can be registered to a same metricId. - bool AddSubscriber(std::unique_ptr&& subscriber); - /// Delete a subscriber - bool DeleteSubscriber(const MetricId& metric_id); - void DeleteSubscribers(); - -private: - void NotifySubscribers(); - void AddHardwareCollectors(); - -private: - mutable std::recursive_mutex mutex_; - - using CollectorMap = std::unordered_map>; - - using SubscriberMap = std::unordered_multimap>; - CollectorMap collectors_; - SubscriberMap subscribers_; - - int64_t last_report_timestamp_; - - std::shared_ptr last_collector_report_; + private: + // set private for singleton + CollectorReportPublisher(); + ~CollectorReportPublisher(); + + // disallow copy + CollectorReportPublisher(const CollectorReportPublisher&) = delete; + CollectorReportPublisher& operator=(const CollectorReportPublisher&) = delete; + + public: + static CollectorReportPublisher& GetInstance(); + + void Refresh(); + /// report the instant values of collectors + std::shared_ptr GetCollectorReport(); + std::shared_ptr GetSubscriberReport(); + + /// Add a collector with a given metric_id + /// collector should be a right value reference of std::unique_ptr + /// return true if register success, + /// retrun false if argument is invalid or metric_id name has been registered + /// already. + bool AddCollector(const MetricId& metric_id, std::unique_ptr&& metric_collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}); + + /// weather a collector has been Added + bool HasCollector(const MetricId& metric_id) const; + /// Delete a collector + bool DeleteCollector(const MetricId& metric_id); + + /// Add a subscriber to a given metricId. + /// Different type of subscribers can be registered to a same metricId. + bool AddSubscriber(std::unique_ptr&& subscriber); + /// Delete a subscriber + bool DeleteSubscriber(const MetricId& metric_id); + void DeleteSubscribers(); + + private: + void NotifySubscribers(); + void AddHardwareCollectors(); + + private: + mutable std::recursive_mutex mutex_; + + using CollectorMap = std::unordered_map>; + + using SubscriberMap = std::unordered_multimap>; + CollectorMap collectors_; + SubscriberMap subscribers_; + + int64_t last_report_timestamp_; + + std::shared_ptr last_collector_report_; }; class AutoCollectorRegister { -public: - AutoCollectorRegister(const MetricId& id, - std::unique_ptr&& collector, - SubscriberTypeList type_list = {SubscriberType::LATEST}): - registered_(false), - id_(id) { - registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); - } - - // create a metric with empty label - AutoCollectorRegister(const std::string& name, - std::unique_ptr&& collector, - SubscriberTypeList type_list = {SubscriberType::LATEST}): - registered_(false), - id_(name) { - if (name.empty()) { - throw std::invalid_argument("name"); - } - registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); - } - - // create a metric with name and label - // label_str format: k1:v1,k2:v2,... - // can build by LabelStringBuilder().Append("k1", "v1").Append("k2","v2").ToString(); - AutoCollectorRegister(const std::string& name, - const std::string& label_str, - std::unique_ptr&& collector, - SubscriberTypeList type_list = {SubscriberType::LATEST}): - registered_(false) { - // parse metric id - MetricId::ParseFromStringWithThrow(name, label_str, &id_); - registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); - } - - ~AutoCollectorRegister() { - if (registered_) { - CollectorReportPublisher::GetInstance().DeleteCollector(id_); - } + public: + AutoCollectorRegister(const MetricId& id, std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}) + : registered_(false), id_(id) { + registered_ = + CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + // create a metric with empty label + AutoCollectorRegister(const std::string& name, std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}) + : registered_(false), id_(name) { + if (name.empty()) { + throw std::invalid_argument("name"); } - - const MetricId& GetId() const { - return id_; + registered_ = + CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + // create a metric with name and label + // label_str format: k1:v1,k2:v2,... + // can build by LabelStringBuilder().Append("k1", + // "v1").Append("k2","v2").ToString(); + AutoCollectorRegister(const std::string& name, const std::string& label_str, + std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}) + : registered_(false) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, label_str, &id_); + registered_ = + CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + ~AutoCollectorRegister() { + if (registered_) { + CollectorReportPublisher::GetInstance().DeleteCollector(id_); } - - bool IsRegistered() const { - return registered_; - } - -private: - bool registered_; - MetricId id_; -}; + } + + const MetricId& GetId() const { return id_; } + bool IsRegistered() const { return registered_; } + + private: + bool registered_; + MetricId id_; +}; class AutoSubscriberRegister { -public: - AutoSubscriberRegister(std::unique_ptr&& subscriber_ptr):registered_(false) { - if (subscriber_ptr) { - metric_id_ = subscriber_ptr->GetMetricId(); - registered_ = CollectorReportPublisher::GetInstance().AddSubscriber(std::move(subscriber_ptr)); - } + public: + AutoSubscriberRegister(std::unique_ptr&& subscriber_ptr) : registered_(false) { + if (subscriber_ptr) { + metric_id_ = subscriber_ptr->GetMetricId(); + registered_ = + CollectorReportPublisher::GetInstance().AddSubscriber(std::move(subscriber_ptr)); } - ~AutoSubscriberRegister(){ - if (registered_) { - CollectorReportPublisher::GetInstance().DeleteSubscriber(metric_id_); - } + } + ~AutoSubscriberRegister() { + if (registered_) { + CollectorReportPublisher::GetInstance().DeleteSubscriber(metric_id_); } -private: - bool registered_; - MetricId metric_id_; + } + + private: + bool registered_; + MetricId metric_id_; }; -} // end namespace tera - -#endif // TERA_COMMON_METRIC_METRICS_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +#endif // TERA_COMMON_METRIC_METRICS_H_ +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/counter_collector.h b/src/common/metric/counter_collector.h index eeefa56f7..53add4a9c 100644 --- a/src/common/metric/counter_collector.h +++ b/src/common/metric/counter_collector.h @@ -7,34 +7,35 @@ #include "common/metric/collector.h" #include "common/counter.h" - -namespace tera { + +namespace tera { class CounterCollector : public Collector { -public: - /// if is_periodic is true, the counter will be cleared when collect - /// this parameter is usually true, but it's false with some instantaneous value - /// Eg: read_pending_count, scan_pending_count, which can't be clear during collect. - explicit CounterCollector(Counter* counter, - bool is_periodic = true): - counter_(counter), - is_periodic_(is_periodic) {} - - ~CounterCollector() override {} - - int64_t Collect() override { - if (counter_ == NULL) { - return -1; - } else { - return is_periodic_ ? counter_->Clear() : counter_->Get(); - } + public: + /// if is_periodic is true, the counter will be cleared when collect + /// this parameter is usually true, but it's false with some instantaneous + /// value + /// Eg: read_pending_count, scan_pending_count, which can't be clear during + /// collect. + explicit CounterCollector(Counter* counter, bool is_periodic = true) + : counter_(counter), is_periodic_(is_periodic) {} + + ~CounterCollector() override {} + + int64_t Collect() override { + if (counter_ == NULL) { + return -1; + } else { + return is_periodic_ ? counter_->Clear() : counter_->Get(); } -private: - Counter* const counter_; - const bool is_periodic_; + } + + private: + Counter* const counter_; + const bool is_periodic_; }; -} // end namespace tera - -#endif // TERA_COMMON_METRIC_COUNTER_COLLECTOR_H_ - +} // end namespace tera + +#endif // TERA_COMMON_METRIC_COUNTER_COLLECTOR_H_ + /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ \ No newline at end of file diff --git a/src/common/metric/hardware_collectors.cc b/src/common/metric/hardware_collectors.cc index bebad75cd..7c4582a6a 100644 --- a/src/common/metric/hardware_collectors.cc +++ b/src/common/metric/hardware_collectors.cc @@ -18,172 +18,174 @@ #include "common/metric/hardware_collectors.h" -DEFINE_int64(tera_hardware_collect_period_second, 5, "hardware metrics checking period (in second)"); +DEFINE_int64(tera_hardware_collect_period_second, 5, + "hardware metrics checking period (in second)"); namespace tera { -using FileWrapper = std::function; -static FileWrapper file_wrapper = [](FILE* f){if(f) fclose(f);}; +using FileWrapper = std::function; +static FileWrapper file_wrapper = [](FILE* f) { + if (f) fclose(f); +}; // return number of cpu(cores) static uint32_t GetCpuCount() { #if defined(_SC_NPROCESSORS_ONLN) - return sysconf(_SC_NPROCESSORS_ONLN); + return sysconf(_SC_NPROCESSORS_ONLN); #else - std::unique_ptr fp(fopen("/proc/stat", "r"), file_wrapper); - if (!fp) { - LOG(ERROR) << "[HardWare Metric] open /proc/stat failed."; - return 1; - } - static const size_t kLineMaxLen = 256; // enough in here - std::unique_ptr aline(new char[kLineMaxLen]); - if (!aline) { - LOG(ERROR) << "[HardWare Metric] malloc failed."; - return 1; - } - static const size_t kHeaderMaxLen = 10; - char header[kHeaderMaxLen]; - uint32_t i = 0; - size_t len = 0; - char* line_ptr = aline.get(); - getline(&line_ptr, &len, fp.get()); // drop the first line - while (getline(&line_ptr, &len, fp.get())) { - i++; - sscanf(line_ptr, "%s", header); - if (!strncmp(header, "intr", kHeaderMaxLen)) { - break; - } - } - return std::max(i - 1, 1); + std::unique_ptr fp(fopen("/proc/stat", "r"), file_wrapper); + if (!fp) { + LOG(ERROR) << "[HardWare Metric] open /proc/stat failed."; + return 1; + } + static const size_t kLineMaxLen = 256; // enough in here + std::unique_ptr aline(new char[kLineMaxLen]); + if (!aline) { + LOG(ERROR) << "[HardWare Metric] malloc failed."; + return 1; + } + static const size_t kHeaderMaxLen = 10; + char header[kHeaderMaxLen]; + uint32_t i = 0; + size_t len = 0; + char* line_ptr = aline.get(); + getline(&line_ptr, &len, fp.get()); // drop the first line + while (getline(&line_ptr, &len, fp.get())) { + i++; + sscanf(line_ptr, "%s", header); + if (!strncmp(header, "intr", kHeaderMaxLen)) { + break; + } + } + return std::max(i - 1, 1); #endif } // return the number of ticks(jiffies) that this process // has been scheduled in user and kernel mode. static bool ProcessCpuTick(const std::string& stat_path, int64_t* tick) { - if (tick == NULL) { - return false; - } - std::unique_ptr fp(fopen(stat_path.c_str(), "r"), file_wrapper); - if (!fp) { - LOG(ERROR) << "[HardWare Metric] open " << stat_path << " failed."; - return false; - } - long long utime = 0; - long long stime = 0; - if (fscanf(fp.get(), "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %lld %lld", - &utime, &stime) < 2) { - LOG(ERROR) << "[HardWare Metric] get cpu tick from " << stat_path << " failed."; - return false; - } - *tick = utime + stime; - return true; + if (tick == NULL) { + return false; + } + std::unique_ptr fp(fopen(stat_path.c_str(), "r"), file_wrapper); + if (!fp) { + LOG(ERROR) << "[HardWare Metric] open " << stat_path << " failed."; + return false; + } + long long utime = 0; + long long stime = 0; + if (fscanf(fp.get(), "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %lld %lld", &utime, + &stime) < 2) { + LOG(ERROR) << "[HardWare Metric] get cpu tick from " << stat_path << " failed."; + return false; + } + *tick = utime + stime; + return true; } -CpuUsageCollector::CpuUsageCollector(): - pid_(getpid()), - cpu_core_num_(GetCpuCount()), - cpu_hertz_(sysconf(_SC_CLK_TCK)), - stat_path_(std::string("/proc/") + std::to_string(pid_) + "/stat"), - last_check_time_ms_(get_millis()), - last_tick_total_(0), - cpu_usage_(0) {} +CpuUsageCollector::CpuUsageCollector() + : pid_(getpid()), + cpu_core_num_(GetCpuCount()), + cpu_hertz_(sysconf(_SC_CLK_TCK)), + stat_path_(std::string("/proc/") + std::to_string(pid_) + "/stat"), + last_check_time_ms_(get_millis()), + last_tick_total_(0), + cpu_usage_(0) {} CpuUsageCollector::~CpuUsageCollector() {} int64_t CpuUsageCollector::Collect() { - int64_t cur_ts = get_millis(); - int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; - if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { - return cpu_usage_; - } else { - return CheckCpuUsage(cur_ts, false); - } + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { + return cpu_usage_; + } else { + return CheckCpuUsage(cur_ts, false); + } } int64_t CpuUsageCollector::CheckCpuUsage(int64_t cur_ts, bool is_irix_on) { - int64_t new_tick_total = 0; - if (!ProcessCpuTick(stat_path_, &new_tick_total)) { - // read proc file failed. - return 0; - } - - float interval_sec = static_cast(cur_ts - last_check_time_ms_) / 1000.0f; - // percentage per tick during time interval - float interval_total_ticks = static_cast(cpu_hertz_) * interval_sec; - if (!is_irix_on) { - interval_total_ticks *= cpu_core_num_; - } - - float usage_percentage = static_cast(new_tick_total - last_tick_total_) * 100.0f / interval_total_ticks; - usage_percentage = std::min(usage_percentage, 99.9f); - - // update - last_tick_total_ = new_tick_total; - cpu_usage_ = static_cast(usage_percentage); - last_check_time_ms_ = cur_ts; - VLOG(15) << "[Hardware Metric] %CPU: " << usage_percentage; - return cpu_usage_; + int64_t new_tick_total = 0; + if (!ProcessCpuTick(stat_path_, &new_tick_total)) { + // read proc file failed. + return 0; + } + + float interval_sec = static_cast(cur_ts - last_check_time_ms_) / 1000.0f; + // percentage per tick during time interval + float interval_total_ticks = static_cast(cpu_hertz_) * interval_sec; + if (!is_irix_on) { + interval_total_ticks *= cpu_core_num_; + } + + float usage_percentage = + static_cast(new_tick_total - last_tick_total_) * 100.0f / interval_total_ticks; + usage_percentage = std::min(usage_percentage, 99.9f); + + // update + last_tick_total_ = new_tick_total; + cpu_usage_ = static_cast(usage_percentage); + last_check_time_ms_ = cur_ts; + VLOG(15) << "[Hardware Metric] %CPU: " << usage_percentage; + return cpu_usage_; } -MemUsageCollector::MemUsageCollector(): - pid_(getpid()), - stat_path_(std::string("/proc/") + std::to_string(pid_) + "/statm"), - last_check_time_ms_(get_millis()), - mem_usage_(0) {} - +MemUsageCollector::MemUsageCollector() + : pid_(getpid()), + stat_path_(std::string("/proc/") + std::to_string(pid_) + "/statm"), + last_check_time_ms_(get_millis()), + mem_usage_(0) {} MemUsageCollector::~MemUsageCollector() {} int64_t MemUsageCollector::Collect() { - int64_t cur_ts = get_millis(); - int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; - if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { - return mem_usage_; - } else { - return CheckMemUsage(cur_ts); - } + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { + return mem_usage_; + } else { + return CheckMemUsage(cur_ts); + } } int64_t MemUsageCollector::CheckMemUsage(int64_t cur_ts) { - std::unique_ptr stat_file(fopen(stat_path_.c_str(), "r"), file_wrapper); - if (!stat_file) { - LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << " failed."; - return false; - } - - int64_t mem_pages = 0; - fscanf(stat_file.get(), "%*d %ld", &mem_pages); - - mem_usage_ = mem_pages * 4 * 1024; - last_check_time_ms_ = cur_ts; - VLOG(15) << "[Hardware Metric] Memory: " << mem_usage_; - return mem_usage_; + std::unique_ptr stat_file(fopen(stat_path_.c_str(), "r"), file_wrapper); + if (!stat_file) { + LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << " failed."; + return false; + } + + int64_t mem_pages = 0; + fscanf(stat_file.get(), "%*d %ld", &mem_pages); + + mem_usage_ = mem_pages * 4 * 1024; + last_check_time_ms_ = cur_ts; + VLOG(15) << "[Hardware Metric] Memory: " << mem_usage_; + return mem_usage_; } NetUsageCollector::NetInfoChecker NetUsageCollector::net_info_checker_; -NetUsageCollector::NetUsageCollector(NetUsageType n_type): - net_usage_type_(n_type) {} +NetUsageCollector::NetUsageCollector(NetUsageType n_type) : net_usage_type_(n_type) {} NetUsageCollector::~NetUsageCollector() {} int64_t NetUsageCollector::Collect() { - int64_t cur_ts = get_millis(); - int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; - if (collect_period_ms > 0 && - cur_ts < net_info_checker_.last_check_time_ms_ + collect_period_ms) { - return net_usage_type_ == RECEIVE ? net_info_checker_.net_rx_usage_ : net_info_checker_.net_tx_usage_; + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && cur_ts < net_info_checker_.last_check_time_ms_ + collect_period_ms) { + return net_usage_type_ == RECEIVE ? net_info_checker_.net_rx_usage_ + : net_info_checker_.net_tx_usage_; + } else { + int64_t value = 0; + if (net_usage_type_ == RECEIVE) { + // check net info and get receive usage + net_info_checker_.CheckNetUsage(cur_ts, &value, NULL); } else { - int64_t value = 0; - if (net_usage_type_ == RECEIVE) { - // check net info and get receive usage - net_info_checker_.CheckNetUsage(cur_ts, &value, NULL); - } else { - // check net info and get transmit usage - net_info_checker_.CheckNetUsage(cur_ts, NULL, &value); - } - return value; + // check net info and get transmit usage + net_info_checker_.CheckNetUsage(cur_ts, NULL, &value); } + return value; + } } NetUsageCollector::NetInfoChecker::NetInfoChecker() @@ -194,56 +196,57 @@ NetUsageCollector::NetInfoChecker::NetInfoChecker() last_tx_total_(0), net_rx_usage_(0), net_tx_usage_(0) { - GetCurrentTotal(&last_rx_total_, &last_tx_total_); + GetCurrentTotal(&last_rx_total_, &last_tx_total_); } -bool NetUsageCollector::NetInfoChecker::GetCurrentTotal(int64_t *rx_total, int64_t *tx_total) { - std::unique_ptr stat_file(fopen(stat_path_.c_str(), "r"), file_wrapper); - if (!stat_file) { - LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << "failed."; - return false; - } - int ret = fseek(stat_file.get(), 327, SEEK_SET); - CHECK_EQ(ret, 0); - for (int i = 0; i < 10; i++) { - while (':' != fgetc(stat_file.get())); - ret = fscanf(stat_file.get(), "%ld%*d%*d%*d%*d%*d%*d%*d%ld", rx_total, tx_total); - if (ret >= 2 && rx_total > 0 && tx_total > 0) { - break; - } - } - - return true; +bool NetUsageCollector::NetInfoChecker::GetCurrentTotal(int64_t* rx_total, int64_t* tx_total) { + std::unique_ptr stat_file(fopen(stat_path_.c_str(), "r"), file_wrapper); + if (!stat_file) { + LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << "failed."; + return false; + } + int ret = fseek(stat_file.get(), 327, SEEK_SET); + CHECK_EQ(ret, 0); + for (int i = 0; i < 10; i++) { + while (':' != fgetc(stat_file.get())) + ; + ret = fscanf(stat_file.get(), "%ld%*d%*d%*d%*d%*d%*d%*d%ld", rx_total, tx_total); + if (ret >= 2 && rx_total > 0 && tx_total > 0) { + break; + } + } + + return true; } -bool NetUsageCollector::NetInfoChecker::CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, int64_t *tx_usage) { - int64_t new_rx_total = 0; - int64_t new_tx_total = 0; - - if (!GetCurrentTotal(&new_rx_total, &new_tx_total)) { - return false; - } - int64_t interval_ms = cur_ts - last_check_time_ms_; - // update - net_rx_usage_ = (new_rx_total - last_rx_total_) * 1000 / interval_ms; - net_tx_usage_ = (new_tx_total - last_tx_total_) * 1000 / interval_ms; - last_rx_total_ = new_rx_total; - last_tx_total_ = new_tx_total; - last_check_time_ms_ = cur_ts; - - if (rx_usage) { - *rx_usage = net_rx_usage_; - } - - if (tx_usage) { - *tx_usage = net_tx_usage_; - } - - VLOG(15) << "[Hardware Metric] Network RX/TX: " << last_rx_total_ << " / " << last_tx_total_; - return true; +bool NetUsageCollector::NetInfoChecker::CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, + int64_t* tx_usage) { + int64_t new_rx_total = 0; + int64_t new_tx_total = 0; + + if (!GetCurrentTotal(&new_rx_total, &new_tx_total)) { + return false; + } + int64_t interval_ms = cur_ts - last_check_time_ms_; + // update + net_rx_usage_ = (new_rx_total - last_rx_total_) * 1000 / interval_ms; + net_tx_usage_ = (new_tx_total - last_tx_total_) * 1000 / interval_ms; + last_rx_total_ = new_rx_total; + last_tx_total_ = new_tx_total; + last_check_time_ms_ = cur_ts; + + if (rx_usage) { + *rx_usage = net_rx_usage_; + } + + if (tx_usage) { + *tx_usage = net_tx_usage_; + } + + VLOG(15) << "[Hardware Metric] Network RX/TX: " << last_rx_total_ << " / " << last_tx_total_; + return true; } -} // end namespace tera +} // end namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ - diff --git a/src/common/metric/hardware_collectors.h b/src/common/metric/hardware_collectors.h index be04e4165..e8ec8095a 100644 --- a/src/common/metric/hardware_collectors.h +++ b/src/common/metric/hardware_collectors.h @@ -4,13 +4,13 @@ #ifndef TERA_COMMON_METRIC_HARDWARE_METRICS_H_ #define TERA_COMMON_METRIC_HARDWARE_METRICS_H_ - + #include #include "common/metric/collector_report_publisher.h" #include "common/metric/collector.h" - -namespace tera { + +namespace tera { const char* const kInstCpuMetricName = "tera_instance_cpu_usage_percent"; const char* const kInstMemMetricName = "tera_instance_mem_usage_bytes"; @@ -18,87 +18,90 @@ const char* const kInstNetRXMetricName = "tera_instance_net_receive_bytes"; const char* const kInstNetTXMetricName = "tera_instance_net_transmit_bytes"; class CpuUsageCollector : public Collector { -public: - CpuUsageCollector(); - virtual ~CpuUsageCollector(); - - virtual int64_t Collect(); -private: - int64_t CheckCpuUsage(int64_t cur_ts, bool is_irix_on); - -private: - // proc info - int pid_; - uint32_t cpu_core_num_; - int64_t cpu_hertz_; - std::string stat_path_; - - // last check info - int64_t last_check_time_ms_; - int64_t last_tick_total_; // cpu total ticks at last check - int64_t cpu_usage_; // (new_tick_total - last_tick_total_) / (total ticks in interval) + public: + CpuUsageCollector(); + virtual ~CpuUsageCollector(); + + virtual int64_t Collect(); + + private: + int64_t CheckCpuUsage(int64_t cur_ts, bool is_irix_on); + + private: + // proc info + int pid_; + uint32_t cpu_core_num_; + int64_t cpu_hertz_; + std::string stat_path_; + + // last check info + int64_t last_check_time_ms_; + int64_t last_tick_total_; // cpu total ticks at last check + int64_t cpu_usage_; // (new_tick_total - last_tick_total_) / (total ticks in + // interval) }; class MemUsageCollector : public Collector { -public: - MemUsageCollector(); - virtual ~MemUsageCollector(); - - virtual int64_t Collect(); -private: - int64_t CheckMemUsage(int64_t cur_ts); - -private: + public: + MemUsageCollector(); + virtual ~MemUsageCollector(); + + virtual int64_t Collect(); + + private: + int64_t CheckMemUsage(int64_t cur_ts); + + private: + // proc info + int pid_; + std::string stat_path_; + + // last check info + int64_t last_check_time_ms_; + int64_t mem_usage_; +}; + +enum NetUsageType { + RECEIVE, // net_rx + TRANSMIT, // net_tx +}; + +class NetUsageCollector : public Collector { + public: + explicit NetUsageCollector(NetUsageType n_type); + virtual ~NetUsageCollector(); + + virtual int64_t Collect(); + + private: + struct NetInfoChecker { // proc info int pid_; std::string stat_path_; - + // last check info int64_t last_check_time_ms_; - int64_t mem_usage_; -}; + int64_t last_rx_total_; // total rx bytes at last check + int64_t last_tx_total_; // total tx bytes at last check -enum NetUsageType { - RECEIVE, // net_rx - TRANSMIT, // net_tx -}; + // metric value cache + int64_t net_rx_usage_; // (new_rx_total - last_rx_total_) / check_interval + int64_t net_tx_usage_; // (new_tx_total - last_tx_total_) / check_interval -class NetUsageCollector : public Collector { -public: - explicit NetUsageCollector(NetUsageType n_type); - virtual ~NetUsageCollector(); - - virtual int64_t Collect(); -private: - struct NetInfoChecker { - // proc info - int pid_; - std::string stat_path_; - - // last check info - int64_t last_check_time_ms_; - int64_t last_rx_total_; // total rx bytes at last check - int64_t last_tx_total_; // total tx bytes at last check - - // metric value cache - int64_t net_rx_usage_; // (new_rx_total - last_rx_total_) / check_interval - int64_t net_tx_usage_; // (new_tx_total - last_tx_total_) / check_interval - - NetInfoChecker(); - - bool GetCurrentTotal(int64_t*, int64_t*); - bool CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, int64_t *tx_usage); - }; - - static NetInfoChecker net_info_checker_; - -private: - NetUsageType net_usage_type_; + NetInfoChecker(); + + bool GetCurrentTotal(int64_t*, int64_t*); + bool CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, int64_t* tx_usage); + }; + + static NetInfoChecker net_info_checker_; + + private: + NetUsageType net_usage_type_; }; - -} // end namespace tera - -#endif // TERA_COMMON_METRIC_HARDWARE_METRICS_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +#endif // TERA_COMMON_METRIC_HARDWARE_METRICS_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/metric_counter.h b/src/common/metric/metric_counter.h index 55b4c59fe..10c649bcb 100644 --- a/src/common/metric/metric_counter.h +++ b/src/common/metric/metric_counter.h @@ -10,84 +10,75 @@ #include "common/metric/counter_collector.h" #include "common/counter.h" -namespace tera{ +namespace tera { class MetricCounter : public Counter { -public: - // create a metric with empty label - explicit MetricCounter(const std::string& name, - SubscriberTypeList type_list = {SubscriberType::LATEST}, - bool is_periodic = true): - Counter(), - registered_(false), - metric_id_(name), - type_list_(type_list), - is_periodic_(is_periodic) { - if (name.empty()) { - // throw a exception and make process exit with coredump - throw std::invalid_argument("metric name is empty"); - } - registered_ = CollectorReportPublisher::GetInstance().AddCollector( - metric_id_, - std::unique_ptr(new CounterCollector(this, is_periodic_)), - type_list_); + public: + // create a metric with empty label + explicit MetricCounter(const std::string& name, + SubscriberTypeList type_list = {SubscriberType::LATEST}, + bool is_periodic = true) + : Counter(), + registered_(false), + metric_id_(name), + type_list_(type_list), + is_periodic_(is_periodic) { + if (name.empty()) { + // throw a exception and make process exit with coredump + throw std::invalid_argument("metric name is empty"); } + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list_); + } - // create a metric with name and label - // label_str format: k1:v1,k2:v2,... - // can build by LabelStringBuilder().Append("k1", "v1").Append("k2","v2").ToString(); - MetricCounter(const std::string& name, - const std::string& label_str, - SubscriberTypeList type_list = {SubscriberType::LATEST}, - bool is_periodic = true): - Counter(), - registered_(false), - type_list_(type_list), - is_periodic_(is_periodic) { - // parse metric id - MetricId::ParseFromStringWithThrow(name, label_str, &metric_id_); - // legal label str format, do register - registered_ = CollectorReportPublisher::GetInstance().AddCollector( - metric_id_, - std::unique_ptr(new CounterCollector(this, is_periodic_)), - type_list); - } + // create a metric with name and label + // label_str format: k1:v1,k2:v2,... + // can build by LabelStringBuilder().Append("k1", + // "v1").Append("k2","v2").ToString(); + MetricCounter(const std::string& name, const std::string& label_str, + SubscriberTypeList type_list = {SubscriberType::LATEST}, bool is_periodic = true) + : Counter(), registered_(false), type_list_(type_list), is_periodic_(is_periodic) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, label_str, &metric_id_); + // legal label str format, do register + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list); + } - MetricCounter(MetricCounter&& counter) { - // parse metric id - if (counter.registered_) { - CollectorReportPublisher::GetInstance().DeleteCollector(counter.metric_id_); - } - registered_ = counter.registered_; - metric_id_ = counter.metric_id_; - is_periodic_ = counter.is_periodic_; - type_list_ = counter.type_list_; - Set(counter.Get()); - counter.registered_ = false; - registered_ = CollectorReportPublisher::GetInstance().AddCollector( - metric_id_, - std::unique_ptr(new CounterCollector(this, is_periodic_)), - type_list_); + MetricCounter(MetricCounter&& counter) { + // parse metric id + if (counter.registered_) { + CollectorReportPublisher::GetInstance().DeleteCollector(counter.metric_id_); } + registered_ = counter.registered_; + metric_id_ = counter.metric_id_; + is_periodic_ = counter.is_periodic_; + type_list_ = counter.type_list_; + Set(counter.Get()); + counter.registered_ = false; + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list_); + } - virtual ~MetricCounter() { - if (registered_) { - // do unregister - CollectorReportPublisher::GetInstance().DeleteCollector(metric_id_); - } + virtual ~MetricCounter() { + if (registered_) { + // do unregister + CollectorReportPublisher::GetInstance().DeleteCollector(metric_id_); } + } - bool IsRegistered() const { - return registered_; - } + bool IsRegistered() const { return registered_; } - //Never copyied - MetricCounter(const MetricCounter&) = delete; - MetricCounter& operator=(const MetricCounter&) = delete; + // Never copyied + MetricCounter(const MetricCounter&) = delete; + MetricCounter& operator=(const MetricCounter&) = delete; -private: - bool registered_; - MetricId metric_id_; - SubscriberTypeList type_list_; - bool is_periodic_; + private: + bool registered_; + MetricId metric_id_; + SubscriberTypeList type_list_; + bool is_periodic_; }; } diff --git a/src/common/metric/metric_http_server.cc b/src/common/metric/metric_http_server.cc index fdb01910c..9627a4d21 100644 --- a/src/common/metric/metric_http_server.cc +++ b/src/common/metric/metric_http_server.cc @@ -1,8 +1,8 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. -#include "common/metric/metric_http_server.h" +#include "common/metric/metric_http_server.h" #include #include @@ -14,219 +14,213 @@ #include "common/metric/collector_report.h" using std::string; - -namespace tera { + +namespace tera { void ResponseBodyBuilder::BuildType(string* body, const string& metric_name, const string& type) { - body->append("# TYPE " + metric_name + " " + type + "\n"); + body->append("# TYPE " + metric_name + " " + type + "\n"); } -void ResponseBodyBuilder::BuildHelp(string* body, const string& metric_name, const string& help_info) { - body->append("# HELP " + metric_name + " " + help_info + "\n"); +void ResponseBodyBuilder::BuildHelp(string* body, const string& metric_name, + const string& help_info) { + body->append("# HELP " + metric_name + " " + help_info + "\n"); } -void ResponseBodyBuilder::BuildMetricItem(string* body, const MetricId& metric_id, const ReportItem& report_item) { +void ResponseBodyBuilder::BuildMetricItem(string* body, const MetricId& metric_id, + const ReportItem& report_item) { + VLOG(12) << "[Building Metric] name: " << metric_id.GetName() + << "\tValue: " << static_cast(report_item.Value()) + << "\tTimeStamp: " << report_item.Time() << "\tType: " << report_item.Type(); + + if (report_item.Time() == -1) { + return; + } + + body->append(metric_id.GetName() + "{"); + const auto& label_map = metric_id.GetLabelMap(); + auto iter = label_map.begin(); + bool has_label = false; + if (iter != label_map.end()) { + body->append(iter->first + "=" + "\"" + iter->second + "\""); + has_label = true; + ++iter; + } + while (iter != label_map.end()) { + body->append("," + iter->first + "=" + "\"" + iter->second + "\""); + ++iter; + } + + if (has_label) { + body->append(",value_type=\"" + report_item.Type() + "\""); + } else { + body->append("value_type=\"" + report_item.Type() + "\""); + } + + body->append("} " + std::to_string(report_item.Value()) + " " + + std::to_string(report_item.Time())); + body->append("\n"); +} - VLOG(12) << "[Building Metric] name: " << metric_id.GetName() - << "\tValue: " << static_cast(report_item.Value()) - << "\tTimeStamp: " << report_item.Time() - << "\tType: " << report_item.Type(); +static const int kMongoosePollTimeoutMs = 1000; - if (report_item.Time() == -1) { - return; - } +static void LogRequest(struct http_message* request) { + VLOG(16) << "[MetricHttpServer] Recv http request." + << " method [" << std::string(request->method.p, request->method.len) << "]" + << " uri [" << std::string(request->uri.p, request->uri.len) << "]" + << " proto [" << std::string(request->proto.p, request->proto.len) << "]" + << " query [" << std::string(request->query_string.p, request->query_string.len) << "]" + << " body [" << std::string(request->body.p, request->body.len) << "]"; +} - body->append(metric_id.GetName() + "{"); - const auto& label_map = metric_id.GetLabelMap(); - auto iter = label_map.begin(); - bool has_label = false; - if (iter != label_map.end()) { - body->append(iter->first + "=" + "\"" + iter->second + "\""); - has_label = true; - ++iter; - } - while (iter != label_map.end()) { - body->append("," + iter->first + "=" + "\"" + iter->second + "\""); - ++iter; +void MetricHttpServer::EventHandler(struct mg_connection* conn, int event, void* p_data) { + if (event == MG_EV_HTTP_REQUEST) { + if (conn == NULL || conn->mgr == NULL || p_data == NULL) { + LOG(WARNING) << "[MetricHttpServer] handle invalid request."; + return; } - if (has_label) { - body->append(",value_type=\"" + report_item.Type() + "\""); - } else { - body->append("value_type=\"" + report_item.Type() + "\""); + // get user data + void* user_data = conn->mgr->user_data; + if (user_data == NULL) { + LOG(WARNING) << "[MetricHttpServer] Connection missing user data."; + return; } - body->append("} " + std::to_string(report_item.Value()) + " " + std::to_string(report_item.Time())); - body->append("\n"); -} - -static const int kMongoosePollTimeoutMs = 1000; - -static void LogRequest(struct http_message *request) { - VLOG(16) << "[MetricHttpServer] Recv http request." - << " method [" << std::string(request->method.p, request->method.len) << "]" - << " uri [" << std::string(request->uri.p, request->uri.len) << "]" - << " proto [" << std::string(request->proto.p, request->proto.len) << "]" - << " query [" << std::string(request->query_string.p, request->query_string.len) << "]" - << " body [" << std::string(request->body.p, request->body.len) << "]"; + MetricHttpServer* server = reinterpret_cast(user_data); + struct http_message* request = reinterpret_cast(p_data); + server->HandleHttpRequest(conn, request); + } + // ignore other events } -void MetricHttpServer::EventHandler(struct mg_connection *conn, int event, void *p_data) { - if (event == MG_EV_HTTP_REQUEST) { - if (conn == NULL || conn->mgr == NULL || p_data == NULL) { - LOG(WARNING) << "[MetricHttpServer] handle invalid request."; - return; - } - - // get user data - void* user_data = conn->mgr->user_data; - if (user_data == NULL) { - LOG(WARNING) << "[MetricHttpServer] Connection missing user data."; - return; - } - - MetricHttpServer *server = reinterpret_cast(user_data); - struct http_message *request = reinterpret_cast(p_data); - server->HandleHttpRequest(conn, request); - } - // ignore other events -} - -MetricHttpServer::MetricHttpServer(): - is_running_(false), - stop_(false), - listen_port_(-1) {} +MetricHttpServer::MetricHttpServer() : is_running_(false), stop_(false), listen_port_(-1) {} -MetricHttpServer::~MetricHttpServer() {} +MetricHttpServer::~MetricHttpServer() { Stop(); } bool MetricHttpServer::Start(int32_t listen_port) { - if (listen_port <= 0) { - LOG(WARNING) << "[MetricHttpServer] Start got invalid listen port: " << listen_port; - return false; - } - - MutexLock lock(&mutex_); - if (IsRunning()) { - LOG(WARNING) << "[MetricHttpServer] Server is already running, listening: " << listen_port_; - return false; - } - - // init mongoose use this as user_data - mg_mgr_init(&mongoose_mgr_, this); - - // bind listen port - std::string bind_addr = std::to_string(listen_port); - struct mg_connection *conn = mg_bind(&mongoose_mgr_, bind_addr.c_str(), &MetricHttpServer::EventHandler); - - if (conn == NULL) { - LOG(WARNING) << "[MetricHttpServer] Bind port [" << listen_port << "] failed."; - mg_mgr_free(&mongoose_mgr_); - return false; - } + if (listen_port <= 0) { + LOG(WARNING) << "[MetricHttpServer] Start got invalid listen port: " << listen_port; + return false; + } + + MutexLock lock(&mutex_); + if (IsRunning()) { + LOG(WARNING) << "[MetricHttpServer] Server is already running, listening: " << listen_port_; + return false; + } + + // init mongoose use this as user_data + mg_mgr_init(&mongoose_mgr_, this); + + // bind listen port + std::string bind_addr = std::to_string(listen_port); + struct mg_connection* conn = + mg_bind(&mongoose_mgr_, bind_addr.c_str(), &MetricHttpServer::EventHandler); + + if (conn == NULL) { + LOG(WARNING) << "[MetricHttpServer] Bind port [" << listen_port << "] failed."; + mg_mgr_free(&mongoose_mgr_); + return false; + } - mg_set_protocol_http_websocket(conn); - LOG(INFO) << "[MetricHttpServer] Bind port [" << listen_port << "] success."; - - stop_.store(false); - if (!bg_thread_.Start(std::bind(&MetricHttpServer::BackgroundWorkWrapper, this))) { - mg_mgr_free(&mongoose_mgr_); - LOG(WARNING) << "[MetricHttpServer] Start background thread failed."; - return false; - } - return true; + mg_set_protocol_http_websocket(conn); + LOG(INFO) << "[MetricHttpServer] Bind port [" << listen_port << "] success."; + + stop_.store(false); + bg_thread_ = std::thread{&MetricHttpServer::BackgroundWorkWrapper, this}; + return true; } void MetricHttpServer::Stop() { - MutexLock lock(&mutex_); - if (!IsRunning()) { - return; - } - - stop_.store(true); - bg_thread_.Join(); - listen_port_ = -1; + MutexLock lock(&mutex_); + if (!IsRunning()) { + return; + } + + stop_.store(true); + bg_thread_.join(); + listen_port_ = -1; } void MetricHttpServer::BackgroundWorkWrapper() { - LOG(INFO) << "[MetricHttpServer] Start background work"; - is_running_.store(true); - while (!stop_.load()) { - mg_mgr_poll(&mongoose_mgr_, kMongoosePollTimeoutMs); - } - is_running_.store(false); - mg_mgr_free(&mongoose_mgr_); - LOG(INFO) << "[MetricHttpServer] Exit background work"; + LOG(INFO) << "[MetricHttpServer] Start background work"; + is_running_.store(true); + while (!stop_.load()) { + mg_mgr_poll(&mongoose_mgr_, kMongoosePollTimeoutMs); + } + is_running_.store(false); + mg_mgr_free(&mongoose_mgr_); + LOG(INFO) << "[MetricHttpServer] Exit background work"; } -void MetricHttpServer::HandleHttpRequest(struct mg_connection *conn, struct http_message *request) { - int64_t start_ts = get_micros(); - LogRequest(request); - - // select real handler based on uri - std::string uri(request->uri.p, request->uri.len); - if (uri == "/metrics") { - HandleMetrics(conn, request); - } else { - HandleUnknowUri(conn, request); - } - int64_t end_ts = get_micros(); - VLOG(16) << "[MetricHttpServer] Handle uri [" << uri << "] cost [" << (end_ts - start_ts) << "] us."; +void MetricHttpServer::HandleHttpRequest(struct mg_connection* conn, struct http_message* request) { + int64_t start_ts = get_micros(); + LogRequest(request); + + // select real handler based on uri + std::string uri(request->uri.p, request->uri.len); + if (uri == "/metrics") { + HandleMetrics(conn, request); + } else { + HandleUnknowUri(conn, request); + } + int64_t end_ts = get_micros(); + VLOG(16) << "[MetricHttpServer] Handle uri [" << uri << "] cost [" << (end_ts - start_ts) + << "] us."; } -void MetricHttpServer::HandleUnknowUri(struct mg_connection *conn, struct http_message *request) { - VLOG(16) << "[MetricHttpServer] Handle unknow uri [" - << std::string(request->uri.p, request->uri.len) << "] ..."; - mg_send_head(conn, 404, 0, "Content-Type: text/plain"); +void MetricHttpServer::HandleUnknowUri(struct mg_connection* conn, struct http_message* request) { + VLOG(16) << "[MetricHttpServer] Handle unknow uri [" + << std::string(request->uri.p, request->uri.len) << "] ..."; + mg_send_head(conn, 404, 0, "Content-Type: text/plain"); } -void MetricHttpServer::HandleMetrics(struct mg_connection *conn, struct http_message *request) { - std::string body(GetResponseBody()); - mg_printf(conn, "HTTP/1.1 200 OK\r\nContent-Type: %s\r\n", "text/plain"); - mg_printf(conn, "Content-Length: %lu\r\n\r\n", static_cast(body.size())); - mg_send(conn, body.data(), body.size()); +void MetricHttpServer::HandleMetrics(struct mg_connection* conn, struct http_message* request) { + std::string body(GetResponseBody()); + mg_printf(conn, "HTTP/1.1 200 OK\r\nContent-Type: %s\r\n", "text/plain"); + mg_printf(conn, "Content-Length: %lu\r\n\r\n", static_cast(body.size())); + mg_send(conn, body.data(), body.size()); } string MetricHttpServer::GetResponseBody() { - int64_t start_ts = get_millis(); - std::shared_ptr cur_report = - CollectorReportPublisher::GetInstance().GetSubscriberReport(); - - if (!cur_report) { - LOG(WARNING) << "[MetricHttpServer] Subscriber Report Is Empty"; - return ""; - } - - //pair - using MetricIdValuePair = SubscriberReport::value_type; - //Vector of pair - using MetricIdValueVec = std::vector; - // MetricNameMap: map< metric_name, vector< pair > > - using MetricNameMap = std::unordered_map; - - MetricNameMap metric_name_map; - - for (const auto& report_item : *cur_report) { - const std::string& metric_name = report_item.first.GetName(); - metric_name_map[metric_name].push_back(&report_item); - } - - std::string body; - // fill MetricFamilyVec - for (const auto& metric_item : metric_name_map) { - ResponseBodyBuilder::BuildHelp(&body, metric_item.first, metric_item.first); - ResponseBodyBuilder::BuildType(&body, metric_item.first, "gauge"); - - const MetricIdValueVec& metric_vec = metric_item.second; - - std::for_each(metric_vec.begin(), metric_vec.end(), [&body, this](const MetricIdValuePair* x) { - ResponseBodyBuilder::BuildMetricItem(&body, x->first, x->second); - }); - } - VLOG(12) << "[MetricHttpServer] Get Response Body cost: " << - get_millis() - start_ts << " ms"; - return std::move(body); + int64_t start_ts = get_millis(); + std::shared_ptr cur_report = + CollectorReportPublisher::GetInstance().GetSubscriberReport(); + + if (!cur_report) { + LOG(WARNING) << "[MetricHttpServer] Subscriber Report Is Empty"; + return ""; + } + + // pair + using MetricIdValuePair = SubscriberReport::value_type; + // Vector of pair + using MetricIdValueVec = std::vector; + // MetricNameMap: map< metric_name, vector< pair > > + using MetricNameMap = std::unordered_map; + + MetricNameMap metric_name_map; + + for (const auto& report_item : *cur_report) { + const std::string& metric_name = report_item.first.GetName(); + metric_name_map[metric_name].push_back(&report_item); + } + + std::string body; + // fill MetricFamilyVec + for (const auto& metric_item : metric_name_map) { + ResponseBodyBuilder::BuildHelp(&body, metric_item.first, metric_item.first); + ResponseBodyBuilder::BuildType(&body, metric_item.first, "gauge"); + + const MetricIdValueVec& metric_vec = metric_item.second; + + std::for_each(metric_vec.begin(), metric_vec.end(), [&body, this](const MetricIdValuePair* x) { + ResponseBodyBuilder::BuildMetricItem(&body, x->first, x->second); + }); + } + VLOG(12) << "[MetricHttpServer] Get Response Body cost: " << get_millis() - start_ts << " ms"; + return std::move(body); } -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/metric_http_server.h b/src/common/metric/metric_http_server.h index a0b735450..b949aa68f 100644 --- a/src/common/metric/metric_http_server.h +++ b/src/common/metric/metric_http_server.h @@ -4,81 +4,75 @@ #ifndef TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ #define TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ - + #include -#include +#include +#include #include - -#include "mongoose.h" + +#include "mongoose.h" #include "common/metric/collector_report_publisher.h" -#include "common/mutex.h" -#include "common/thread.h" - +#include "common/mutex.h" + namespace tera { struct ResponseBodyBuilder { - static void BuildType(std::string* body, - const std::string& metric_name, - const std::string& type); + static void BuildType(std::string* body, const std::string& metric_name, const std::string& type); - static void BuildHelp(std::string* body, - const std::string& metric_name, - const std::string& help_info); + static void BuildHelp(std::string* body, const std::string& metric_name, + const std::string& help_info); - static void BuildMetricItem(std::string* body, - const MetricId& metric_id, - const ReportItem& report_item); + static void BuildMetricItem(std::string* body, const MetricId& metric_id, + const ReportItem& report_item); }; // a simple http server based on mongoose class MetricHttpServer { -public: - MetricHttpServer(); - ~MetricHttpServer(); - -private: - // disallow copy - MetricHttpServer(const MetricHttpServer&) = delete; - MetricHttpServer& operator = (const MetricHttpServer&) = delete; - -private: - static void EventHandler(struct mg_connection *conn, int event, void *p_data); - -public: - bool Start(int32_t listen_port); - void Stop(); - - bool IsRunning() const { - return is_running_.load(); - } - -private: - void BackgroundWorkWrapper(); - - // http request handlers - void HandleHttpRequest(struct mg_connection *conn, struct http_message *request); - void HandleMetrics(struct mg_connection *conn, struct http_message *request); - void HandleUnknowUri(struct mg_connection *conn, struct http_message *request); - - // prometheus handle functions - std::string GetResponseBody(); - -private: - mutable Mutex mutex_; - std::atomic is_running_; - std::atomic stop_; - int32_t listen_port_; - - // background thread - common::Thread bg_thread_; - - // mongoose info - struct mg_mgr mongoose_mgr_; -}; - -} // end namespace tera - -#endif // TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ - + public: + MetricHttpServer(); + ~MetricHttpServer(); + + private: + // disallow copy + MetricHttpServer(const MetricHttpServer&) = delete; + MetricHttpServer& operator=(const MetricHttpServer&) = delete; + + private: + static void EventHandler(struct mg_connection* conn, int event, void* p_data); + + public: + bool Start(int32_t listen_port); + void Stop(); + + bool IsRunning() const { return is_running_.load(); } + + private: + void BackgroundWorkWrapper(); + + // http request handlers + void HandleHttpRequest(struct mg_connection* conn, struct http_message* request); + void HandleMetrics(struct mg_connection* conn, struct http_message* request); + void HandleUnknowUri(struct mg_connection* conn, struct http_message* request); + + // prometheus handle functions + std::string GetResponseBody(); + + private: + mutable Mutex mutex_; + std::atomic is_running_; + std::atomic stop_; + int32_t listen_port_; + + // background thread + std::thread bg_thread_; + + // mongoose info + struct mg_mgr mongoose_mgr_; +}; + +} // end namespace tera + +#endif // TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ + /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/metric_id.cc b/src/common/metric/metric_id.cc index b77ee095c..bed512112 100644 --- a/src/common/metric/metric_id.cc +++ b/src/common/metric/metric_id.cc @@ -1,156 +1,145 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. -#include "common/metric/metric_id.h" +#include "common/metric/metric_id.h" #include #include - -namespace tera { + +namespace tera { static const std::string kInvalidLabel = ""; MetricId::MetricId(const std::string& name, const std::string& label_str) { - ParseFromStringWithThrow(name, label_str, this); + ParseFromStringWithThrow(name, label_str, this); } static std::string MetricLabelsToString(const MetricLabels& label_map) { - if (label_map.empty()) { - return ""; - } - std::ostringstream label_oss; - auto iter = label_map.begin(); - // do not append kLabelPairDelimiter for the first pair - label_oss << iter->first << kLabelKVDelimiter << iter->second; - ++iter; - - for (; iter != label_map.end(); ++iter) { - label_oss << kLabelPairDelimiter << iter->first << kLabelKVDelimiter << iter->second; - } - return label_oss.str(); + if (label_map.empty()) { + return ""; + } + std::ostringstream label_oss; + auto iter = label_map.begin(); + // do not append kLabelPairDelimiter for the first pair + label_oss << iter->first << kLabelKVDelimiter << iter->second; + ++iter; + + for (; iter != label_map.end(); ++iter) { + label_oss << kLabelPairDelimiter << iter->first << kLabelKVDelimiter << iter->second; + } + return label_oss.str(); } std::string MetricId::GenMetricIdStr(const std::string& name, const MetricLabels& label_map) { - if (label_map.empty()) { - return name; - } - - std::ostringstream id_oss; - id_oss << name << kNameLabelsDelimiter << MetricLabelsToString(label_map); - return id_oss.str(); + if (label_map.empty()) { + return name; + } + + std::ostringstream id_oss; + id_oss << name << kNameLabelsDelimiter << MetricLabelsToString(label_map); + return id_oss.str(); } -void MetricId::ParseFromStringWithThrow(const std::string& name, - const std::string& label_str, +void MetricId::ParseFromStringWithThrow(const std::string& name, const std::string& label_str, MetricId* metric_id) throw(std::invalid_argument) { - if (metric_id == NULL) { - throw std::invalid_argument("metric_id is invalid"); - } - if (name.empty()) { - throw std::invalid_argument("metric name is invalid"); - } - - metric_id->name_ = name; - metric_id->labels_.clear(); - - if (label_str.empty()) { - metric_id->id_str_ = metric_id->name_; - return; - } - - // label_str format: k1:v1,k2:v2,... - std::vector label_str_splits; - boost::algorithm::split(label_str_splits, label_str, - boost::algorithm::is_any_of(kLabelPairDelimiter)); - for (const std::string& label_kv_str : label_str_splits) { - std::vector label_kv_splits; - boost::algorithm::split(label_kv_splits, label_kv_str, - boost::algorithm::is_any_of(kLabelKVDelimiter)); - if (label_kv_splits.size() != 2) { - // invalid label str format - throw std::invalid_argument("label_str"); - } - - metric_id->labels_.insert(std::make_pair(label_kv_splits[0], label_kv_splits[1])); - } - - // gen identifier string - metric_id->id_str_ = metric_id->name_ + kNameLabelsDelimiter + label_str; + if (metric_id == NULL) { + throw std::invalid_argument("metric_id is invalid"); + } + if (name.empty()) { + throw std::invalid_argument("metric name is invalid"); + } + + metric_id->name_ = name; + metric_id->labels_.clear(); + + if (label_str.empty()) { + metric_id->id_str_ = metric_id->name_; return; + } + + // label_str format: k1:v1,k2:v2,... + std::vector label_str_splits; + boost::algorithm::split(label_str_splits, label_str, + boost::algorithm::is_any_of(kLabelPairDelimiter)); + for (const std::string& label_kv_str : label_str_splits) { + std::vector label_kv_splits; + boost::algorithm::split(label_kv_splits, label_kv_str, + boost::algorithm::is_any_of(kLabelKVDelimiter)); + if (label_kv_splits.size() != 2) { + // invalid label str format + throw std::invalid_argument("label_str"); + } + + metric_id->labels_.insert(std::make_pair(label_kv_splits[0], label_kv_splits[1])); + } + + // gen identifier string + metric_id->id_str_ = metric_id->name_ + kNameLabelsDelimiter + label_str; + return; } -bool MetricId::ParseFromString(const std::string& name, - const std::string& label_str, +bool MetricId::ParseFromString(const std::string& name, const std::string& label_str, MetricId* metric_id) throw() { - try { - ParseFromStringWithThrow(name, label_str, metric_id); - return true; - } catch (std::invalid_argument&) { - return false; - } + try { + ParseFromStringWithThrow(name, label_str, metric_id); + return true; + } catch (std::invalid_argument&) { + return false; + } } MetricId::MetricId() : name_(), labels_(), id_str_() {} -MetricId::MetricId(const std::string& name) - : name_(name), - labels_(), - id_str_(GenMetricIdStr(name_, labels_)) {} +MetricId::MetricId(const std::string& name) + : name_(name), labels_(), id_str_(GenMetricIdStr(name_, labels_)) {} MetricId::MetricId(const std::string& name, const MetricLabels& label_map) - : name_(name), - labels_(label_map), - id_str_(GenMetricIdStr(name_, labels_)) {} + : name_(name), labels_(label_map), id_str_(GenMetricIdStr(name_, labels_)) {} MetricId::MetricId(const MetricId& other) - : name_(other.name_), - labels_(other.labels_), - id_str_(other.id_str_) {} - + : name_(other.name_), labels_(other.labels_), id_str_(other.id_str_) {} + MetricId::~MetricId() {} -MetricId& MetricId::operator = (const MetricId& other) { - name_ = other.name_; - labels_ = other.labels_; - id_str_ = other.id_str_; - return *this; +MetricId& MetricId::operator=(const MetricId& other) { + name_ = other.name_; + labels_ = other.labels_; + id_str_ = other.id_str_; + return *this; } - + const std::string& MetricId::GetLabel(const std::string& name) const { - auto iter = labels_.find(name); - if (iter == labels_.end()) { - return kInvalidLabel; - } else { - return iter->second; - } -} - + auto iter = labels_.find(name); + if (iter == labels_.end()) { + return kInvalidLabel; + } else { + return iter->second; + } +} + bool MetricId::ExistLabel(const std::string& name) const { - return labels_.find(name) != labels_.end(); + return labels_.find(name) != labels_.end(); } bool MetricId::CheckLabel(const std::string& name, const std::string& expected_value) const { - auto iter = labels_.find(name); - if (iter == labels_.end()) { - return false; - } else { - return (iter->second == expected_value); - } -} + auto iter = labels_.find(name); + if (iter == labels_.end()) { + return false; + } else { + return (iter->second == expected_value); + } +} LabelStringBuilder& LabelStringBuilder::Append(const std::string& name, const std::string& value) { - if (!name.empty() && !value.empty()) { - labels_[name] = value; - } - return *this; + if (!name.empty() && !value.empty()) { + labels_[name] = value; + } + return *this; } -std::string LabelStringBuilder::ToString() const { - return MetricLabelsToString(labels_); -} - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +std::string LabelStringBuilder::ToString() const { return MetricLabelsToString(labels_); } + +} // end namespace tera +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/metric_id.h b/src/common/metric/metric_id.h index cff30448e..e3f20ac71 100644 --- a/src/common/metric/metric_id.h +++ b/src/common/metric/metric_id.h @@ -3,15 +3,15 @@ // found in the LICENSE file. #ifndef TERA_COMMON_METRIC_METRIC_ID_H_ -#define TERA_COMMON_METRIC_METRIC_ID_H_ - -#include +#define TERA_COMMON_METRIC_METRIC_ID_H_ + +#include #include #include #include -#include - -namespace tera { +#include + +namespace tera { // use ordered map to ensure the order of labels in id_str typedef std::map MetricLabels; @@ -26,118 +26,108 @@ const char* const kLabelKVDelimiter = ":"; // // Can get name and labels from MetricId class MetricId { -public: - MetricId(); - explicit MetricId(const std::string& name); - MetricId(const std::string& name, const MetricLabels& label_map); - MetricId(const std::string& name, const std::string& label_str); - MetricId(const MetricId& other); - ~MetricId(); - - MetricId& operator = (const MetricId& other); - - bool IsValid() const { - return !name_.empty(); - } - - const std::string& GetName() const { - return name_; - } - - const MetricLabels& GetLabelMap() const { - return labels_; - } - - const std::string& ToString() const { - return id_str_; - } - - // access labels - const std::string& GetLabel(const std::string& name) const; - bool ExistLabel(const std::string& name) const; - bool CheckLabel(const std::string& name, const std::string& expected_value) const; - -public: - // Parse MetricId from name and formated label string - // nothrow std::invalid_argument if got illegal format arguments - static void ParseFromStringWithThrow(const std::string& name, - const std::string& label_str, - MetricId* metric_id) throw(std::invalid_argument); - // Parse MetricId from name and formated label string - // nothrow version - static bool ParseFromString(const std::string& name, - const std::string& label_str, - MetricId* metric_id) throw(); - -private: - static std::string GenMetricIdStr(const std::string& name, const MetricLabels& label_map); -private: - std::string name_; - MetricLabels labels_; - std::string id_str_; -}; - + public: + MetricId(); + explicit MetricId(const std::string& name); + MetricId(const std::string& name, const MetricLabels& label_map); + MetricId(const std::string& name, const std::string& label_str); + MetricId(const MetricId& other); + ~MetricId(); + + MetricId& operator=(const MetricId& other); + + bool IsValid() const { return !name_.empty(); } + + const std::string& GetName() const { return name_; } + + const MetricLabels& GetLabelMap() const { return labels_; } + + const std::string& ToString() const { return id_str_; } + + // access labels + const std::string& GetLabel(const std::string& name) const; + bool ExistLabel(const std::string& name) const; + bool CheckLabel(const std::string& name, const std::string& expected_value) const; + + public: + // Parse MetricId from name and formated label string + // nothrow std::invalid_argument if got illegal format arguments + static void ParseFromStringWithThrow(const std::string& name, const std::string& label_str, + MetricId* metric_id) throw(std::invalid_argument); + // Parse MetricId from name and formated label string + // nothrow version + static bool ParseFromString(const std::string& name, const std::string& label_str, + MetricId* metric_id) throw(); + + private: + static std::string GenMetricIdStr(const std::string& name, const MetricLabels& label_map); + + private: + std::string name_; + MetricLabels labels_; + std::string id_str_; +}; + // relational operators // make MetricId can be the key of std::map and std::unordered_map -inline bool operator == (const MetricId& id1, const MetricId& id2) { - return id1.ToString() == id2.ToString(); +inline bool operator==(const MetricId& id1, const MetricId& id2) { + return id1.ToString() == id2.ToString(); } -inline bool operator != (const MetricId& id1, const MetricId& id2) { - return id1.ToString() != id2.ToString(); +inline bool operator!=(const MetricId& id1, const MetricId& id2) { + return id1.ToString() != id2.ToString(); } -inline bool operator < (const MetricId& id1, const MetricId& id2) { - return id1.ToString() < id2.ToString(); +inline bool operator<(const MetricId& id1, const MetricId& id2) { + return id1.ToString() < id2.ToString(); } -inline bool operator <= (const MetricId& id1, const MetricId& id2) { - return id1.ToString() <= id2.ToString(); +inline bool operator<=(const MetricId& id1, const MetricId& id2) { + return id1.ToString() <= id2.ToString(); } -inline bool operator > (const MetricId& id1, const MetricId& id2) { - return id1.ToString() > id2.ToString(); +inline bool operator>(const MetricId& id1, const MetricId& id2) { + return id1.ToString() > id2.ToString(); } -inline bool operator >= (const MetricId& id1, const MetricId& id2) { - return id1.ToString() >= id2.ToString(); +inline bool operator>=(const MetricId& id1, const MetricId& id2) { + return id1.ToString() >= id2.ToString(); } // A helper class to build formated label string -// Usage: label_str = LabelStringBuilder().Append("k1","v1").Append("k2","v2").ToString(); +// Usage: label_str = +// LabelStringBuilder().Append("k1","v1").Append("k2","v2").ToString(); class LabelStringBuilder { -public: - LabelStringBuilder() {} - ~LabelStringBuilder() {} - - // append a k-v pair - LabelStringBuilder& Append(const std::string& name, const std::string& value); - - // build formated string - std::string ToString() const; - -private: - MetricLabels labels_; + public: + LabelStringBuilder() {} + ~LabelStringBuilder() {} + + // append a k-v pair + LabelStringBuilder& Append(const std::string& name, const std::string& value); + + // build formated string + std::string ToString() const; + + private: + MetricLabels labels_; }; - -} // end namespace tera + +} // end namespace tera namespace std { // specialization std::hash for tera::MetricId // make MetricId can be the key of unordered_map -template<> +template <> struct hash<::tera::MetricId> { -public: - size_t operator () (const ::tera::MetricId& id) const { - return str_hash_(id.ToString()); - } -private: - hash str_hash_; + public: + size_t operator()(const ::tera::MetricId& id) const { return str_hash_(id.ToString()); } + + private: + hash str_hash_; }; -} // end namespace std - -#endif // TERA_COMMON_METRIC_METRIC_ID_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace std +#endif // TERA_COMMON_METRIC_METRIC_ID_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/percentile_counter.h b/src/common/metric/percentile_counter.h index 06c4b8ed6..e1840bd6f 100644 --- a/src/common/metric/percentile_counter.h +++ b/src/common/metric/percentile_counter.h @@ -7,98 +7,83 @@ #include "common/metric/collector_report_publisher.h" #include "leveldb/util/histogram.h" -namespace tera{ +namespace tera { class PercentileCounter; class PercentileCollector : public Collector { -public: - virtual ~PercentileCollector() {} - // return a instant value of the metric for tera to dump log and other usage - PercentileCollector(PercentileCounter* pc): - pc_(pc) {}; + public: + virtual ~PercentileCollector() {} + // return a instant value of the metric for tera to dump log and other usage + PercentileCollector(PercentileCounter* pc) : pc_(pc){}; - inline virtual int64_t Collect() override; + inline virtual int64_t Collect() override; -private: - PercentileCounter* pc_; + private: + PercentileCounter* pc_; }; class PercentileCounter { -public: - // create a metric with name and label - // label_str format: k1:v1,k2:v2,... - // can build by LabelStringBuilder().Append("k1", "v1").Append("k2","v2").ToString(); - PercentileCounter(const std::string& name, - const std::string& label_str, - double percentile, - SubscriberTypeList type_list = {SubscriberType::LATEST}): - percentile_(percentile), - registered_(false) { - // parse metric id - MetricId::ParseFromStringWithThrow(name, label_str, &metric_id_); - // legal label str format, do register - registered_ = CollectorReportPublisher::GetInstance().AddCollector( - metric_id_, - std::unique_ptr(new PercentileCollector(this)), - type_list); - } + public: + // create a metric with name and label + // label_str format: k1:v1,k2:v2,... + // can build by LabelStringBuilder().Append("k1", + // "v1").Append("k2","v2").ToString(); + PercentileCounter(const std::string& name, const std::string& label_str, double percentile, + SubscriberTypeList type_list = {SubscriberType::LATEST}) + : percentile_(percentile), registered_(false) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, label_str, &metric_id_); + // legal label str format, do register + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, std::unique_ptr(new PercentileCollector(this)), type_list); + } - PercentileCounter(const std::string& name, - double percentile, - SubscriberTypeList type_list = {SubscriberType::LATEST}): - percentile_(percentile), - registered_(false) { - // parse metric id - MetricId::ParseFromStringWithThrow(name, "", &metric_id_); - // legal label str format, do register - registered_ = CollectorReportPublisher::GetInstance().AddCollector( - metric_id_, - std::unique_ptr(new PercentileCollector(this)), - type_list); - } + PercentileCounter(const std::string& name, double percentile, + SubscriberTypeList type_list = {SubscriberType::LATEST}) + : percentile_(percentile), registered_(false) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, "", &metric_id_); + // legal label str format, do register + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, std::unique_ptr(new PercentileCollector(this)), type_list); + } - virtual ~PercentileCounter() { - if (registered_) { - // do unregister - CollectorReportPublisher::GetInstance().DeleteCollector(metric_id_); - } + virtual ~PercentileCounter() { + if (registered_) { + // do unregister + CollectorReportPublisher::GetInstance().DeleteCollector(metric_id_); } + } - bool IsRegistered() const { - return registered_; - } + bool IsRegistered() const { return registered_; } - int64_t Get() { - double percentile_value = hist_.Percentile(percentile_); - if (isnan(percentile_value)) { - return -1; - } - return (int64_t) percentile_value; + int64_t Get() { + double percentile_value = hist_.Percentile(percentile_); + if (isnan(percentile_value)) { + return -1; } + return (int64_t)percentile_value; + } - void Clear() { - hist_.Clear(); - } + void Clear() { hist_.Clear(); } - void Append(int64_t v) { - hist_.Add((double)v); - } + void Append(int64_t v) { hist_.Add((double)v); } - //Never copyied - PercentileCounter(const PercentileCounter&) = delete; - PercentileCounter& operator=(const PercentileCounter&) = delete; + // Never copyied + PercentileCounter(const PercentileCounter&) = delete; + PercentileCounter& operator=(const PercentileCounter&) = delete; -private: - double percentile_; - bool registered_; - MetricId metric_id_; - leveldb::Histogram hist_; + private: + double percentile_; + bool registered_; + MetricId metric_id_; + leveldb::Histogram hist_; }; int64_t PercentileCollector::Collect() { - int64_t val = (int64_t)pc_->Get(); - pc_->Clear(); - return val; + int64_t val = (int64_t)pc_->Get(); + pc_->Clear(); + return val; } } \ No newline at end of file diff --git a/src/common/metric/prometheus_subscriber.cc b/src/common/metric/prometheus_subscriber.cc index 8180f008b..bce9f8e3e 100644 --- a/src/common/metric/prometheus_subscriber.cc +++ b/src/common/metric/prometheus_subscriber.cc @@ -7,141 +7,135 @@ #include "common/metric/prometheus_subscriber.h" #include "common/metric/collector_report.h" -DEFINE_int64(tera_metric_hold_max_time, 300000, "interval of prometheus collectors push a value to hold_queue in ms"); -DEFINE_bool(tera_prometheus_subscriber_dump_log, false, "Whether to dump prometheus subscriber log"); +DEFINE_int64(tera_metric_hold_max_time, 300000, + "interval of prometheus collectors push a value to hold_queue in ms"); +DEFINE_bool(tera_prometheus_subscriber_dump_log, false, + "Whether to dump prometheus subscriber log"); -namespace tera{ +namespace tera { void PrometheusSubscriber::OnUpdate(std::shared_ptr report) { - int64_t value = report->FindMetricValue(metric_id_); - Append(report->timestamp_ms, value); + int64_t value = report->FindMetricValue(metric_id_); + Append(report->timestamp_ms, value); } ReportItem PrometheusSubscriber::Collect() { - ReportItem ret; - std::shared_ptr tera_queue_ptr; - int64_t last_collect_ts; - - { - std::lock_guard lock_mtx(mtx_); - if (tera_queue_ptr_->empty()) { - LOG_IF(WARNING, FLAGS_tera_prometheus_subscriber_dump_log) << "[PROMETHEUS SUBSCRIBER] Empty Tera Queue"; - return ret; - } - - last_collect_ts = last_collect_ts_; - last_collect_ts_ = tera_queue_ptr_->back().first; - tera_queue_ptr = tera_queue_ptr_; - tera_queue_ptr_.reset(new TimeValueQueue); + ReportItem ret; + std::shared_ptr tera_queue_ptr; + int64_t last_collect_ts; + + { + std::lock_guard lock_mtx(mtx_); + if (tera_queue_ptr_->empty()) { + LOG_IF(WARNING, FLAGS_tera_prometheus_subscriber_dump_log) + << "[PROMETHEUS SUBSCRIBER] Empty Tera Queue"; + return ret; } - int64_t value = GetSpecificValue(tera_queue_ptr); + last_collect_ts = last_collect_ts_; + last_collect_ts_ = tera_queue_ptr_->back().first; + tera_queue_ptr = tera_queue_ptr_; + tera_queue_ptr_.reset(new TimeValueQueue); + } - if (type_ == SubscriberType::QPS || - type_ == SubscriberType::THROUGHPUT) { - int64_t time_interval = tera_queue_ptr->back().first - last_collect_ts; - value = (time_interval != 0 ? value * 1000 / time_interval : 0); - } + int64_t value = GetSpecificValue(tera_queue_ptr); - ret.SetTimeValue({tera_queue_ptr->back().first, value}); - ret.SetType(GetTypeName()); + if (type_ == SubscriberType::QPS || type_ == SubscriberType::THROUGHPUT) { + int64_t time_interval = tera_queue_ptr->back().first - last_collect_ts; + value = (time_interval != 0 ? value * 1000 / time_interval : 0); + } - return ret; + ret.SetTimeValue({tera_queue_ptr->back().first, value}); + ret.SetType(GetTypeName()); + + return ret; } void PrometheusSubscriber::Append(int64_t time_stamp, int64_t current_value) { - std::lock_guard mtx_lock(mtx_); - tera_queue_ptr_->emplace_back(time_stamp, current_value); - LOG_IF(WARNING, FLAGS_tera_prometheus_subscriber_dump_log) - << "[PROMETHEUS APPEND] " << metric_id_.GetName() - << "\tValue: " << current_value - << "\tQueue Size:" << tera_queue_ptr_->size(); - if (has_inited_) { - DropExpiredValue(); - } else { - last_collect_ts_ = time_stamp; - has_inited_ = true; - } + std::lock_guard mtx_lock(mtx_); + tera_queue_ptr_->emplace_back(time_stamp, current_value); + LOG_IF(WARNING, FLAGS_tera_prometheus_subscriber_dump_log) + << "[PROMETHEUS APPEND] " << metric_id_.GetName() << "\tValue: " << current_value + << "\tQueue Size:" << tera_queue_ptr_->size(); + if (has_inited_) { + DropExpiredValue(); + } else { + last_collect_ts_ = time_stamp; + has_inited_ = true; + } } std::string PrometheusSubscriber::GetTypeName() { - switch (type_) - { - + switch (type_) { case SubscriberType::LATEST: - return "Latest"; + return "Latest"; case SubscriberType::MAX: - return "Max"; + return "Max"; case SubscriberType::MIN: - return "Min"; + return "Min"; case SubscriberType::SUM: - return "Sum"; + return "Sum"; case SubscriberType::QPS: - return "Qps"; + return "Qps"; case SubscriberType::THROUGHPUT: - return "ThroughPut"; + return "ThroughPut"; default: - LOG(ERROR) << "Unknown collector type: "; - abort(); - - } - //Never reach here - return ""; + LOG(ERROR) << "Unknown collector type: "; + abort(); + } + // Never reach here + return ""; } void PrometheusSubscriber::DropExpiredValue() { - if (tera_queue_ptr_->empty()) { - return; - } - - auto last_enqueue_ts = tera_queue_ptr_->back().first; - int64_t drop_cnt = 0; - while (last_enqueue_ts - tera_queue_ptr_->front().first >= FLAGS_tera_metric_hold_max_time) { - VLOG(30) << "[PROMETHEUS SUBSCRIBER] drop last_enqueue_ts: " << last_enqueue_ts - << "first_ts: " << tera_queue_ptr_->front().first - << "name: " << metric_id_.GetName(); - - ++drop_cnt; - last_collect_ts_ = tera_queue_ptr_->front().first; - tera_queue_ptr_->pop_front(); - } - - if (drop_cnt != 0) { - VLOG(30) << "[PROMETHEUS SUBSCRIBER] drop " << drop_cnt << "values"; - } + if (tera_queue_ptr_->empty()) { + return; + } + + auto last_enqueue_ts = tera_queue_ptr_->back().first; + int64_t drop_cnt = 0; + while (last_enqueue_ts - tera_queue_ptr_->front().first >= FLAGS_tera_metric_hold_max_time) { + VLOG(30) << "[PROMETHEUS SUBSCRIBER] drop last_enqueue_ts: " << last_enqueue_ts + << "first_ts: " << tera_queue_ptr_->front().first << "name: " << metric_id_.GetName(); + + ++drop_cnt; + last_collect_ts_ = tera_queue_ptr_->front().first; + tera_queue_ptr_->pop_front(); + } + + if (drop_cnt != 0) { + VLOG(30) << "[PROMETHEUS SUBSCRIBER] drop " << drop_cnt << "values"; + } } int64_t PrometheusSubscriber::GetSpecificValue(std::shared_ptr tera_queue_ptr) { - switch (type_) - { - + switch (type_) { case SubscriberType::LATEST: - return GetLatest(tera_queue_ptr); + return GetLatest(tera_queue_ptr); case SubscriberType::MAX: - return GetMax(tera_queue_ptr); + return GetMax(tera_queue_ptr); case SubscriberType::MIN: - return GetMin(tera_queue_ptr); + return GetMin(tera_queue_ptr); - //Both of SUM, Qps, and THROUGHPUT use GetSum here + // Both of SUM, Qps, and THROUGHPUT use GetSum here case SubscriberType::SUM: case SubscriberType::QPS: case SubscriberType::THROUGHPUT: - return GetSum(tera_queue_ptr); + return GetSum(tera_queue_ptr); default: - LOG(ERROR) << "Unknown collector type"; - abort(); - - } - //Never reach here - return -1; + LOG(ERROR) << "Unknown collector type"; + abort(); + } + // Never reach here + return -1; } } diff --git a/src/common/metric/prometheus_subscriber.h b/src/common/metric/prometheus_subscriber.h index 67affa7bb..3482dc6f3 100644 --- a/src/common/metric/prometheus_subscriber.h +++ b/src/common/metric/prometheus_subscriber.h @@ -17,65 +17,60 @@ namespace tera { using TimeValueQueue = std::deque; class PrometheusSubscriber : public Subscriber { -public: - PrometheusSubscriber(const MetricId& metric_id, SubscriberType type = SubscriberType::LATEST): - tera_queue_ptr_(new TimeValueQueue), - last_collect_ts_(0), + public: + PrometheusSubscriber(const MetricId& metric_id, SubscriberType type = SubscriberType::LATEST) + : tera_queue_ptr_(new TimeValueQueue), + last_collect_ts_(0), has_inited_(false), type_(type), - metric_id_(metric_id) { } + metric_id_(metric_id) {} - ~PrometheusSubscriber() override {} - ReportItem Collect() override; - void OnUpdate(const std::shared_ptr) override; + ~PrometheusSubscriber() override {} + ReportItem Collect() override; + void OnUpdate(const std::shared_ptr) override; - std::string GetTypeName() override; + std::string GetTypeName() override; - const MetricId& GetMetricId() override { - return metric_id_; - } + const MetricId& GetMetricId() override { return metric_id_; } -private: - void Append(int64_t time_stamp, int64_t current_value); - void DropExpiredValue(); - int64_t GetSpecificValue(std::shared_ptr); + private: + void Append(int64_t time_stamp, int64_t current_value); + void DropExpiredValue(); + int64_t GetSpecificValue(std::shared_ptr); - int64_t GetMax(std::shared_ptr tera_queue_ptr) { - return std::max_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), - [](const TimeValuePair& x, const TimeValuePair& y) { - return x.second < y.second; - })->second; - } + int64_t GetMax(std::shared_ptr tera_queue_ptr) { + return std::max_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), + [](const TimeValuePair& x, const TimeValuePair& y) { + return x.second < y.second; + })->second; + } - int64_t GetMin(std::shared_ptr tera_queue_ptr) { - return std::min_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), - [](const TimeValuePair& x, const TimeValuePair& y) { - return x.second < y.second; - })->second; - } + int64_t GetMin(std::shared_ptr tera_queue_ptr) { + return std::min_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), + [](const TimeValuePair& x, const TimeValuePair& y) { + return x.second < y.second; + })->second; + } - int64_t GetLatest(std::shared_ptr tera_queue_ptr) { - return tera_queue_ptr->back().second; - } + int64_t GetLatest(std::shared_ptr tera_queue_ptr) { + return tera_queue_ptr->back().second; + } - int64_t GetSum(std::shared_ptr tera_queue_ptr) { - return std::accumulate(tera_queue_ptr->begin(), tera_queue_ptr->end(), (int64_t)0, - [](const int64_t val, const TimeValuePair& x) { - return val + x.second; - }); - } + int64_t GetSum(std::shared_ptr tera_queue_ptr) { + return std::accumulate( + tera_queue_ptr->begin(), tera_queue_ptr->end(), (int64_t)0, + [](const int64_t val, const TimeValuePair& x) { return val + x.second; }); + } - - std::mutex mtx_; - //queue of tera timestamp-value - std::shared_ptr tera_queue_ptr_; - //timestamp of prometheus_queue_ptr_'s last enqueue operation - int64_t last_collect_ts_; - //Is this class inited? - bool has_inited_; - //subscriber type - const SubscriberType type_; - MetricId metric_id_; + std::mutex mtx_; + // queue of tera timestamp-value + std::shared_ptr tera_queue_ptr_; + // timestamp of prometheus_queue_ptr_'s last enqueue operation + int64_t last_collect_ts_; + // Is this class inited? + bool has_inited_; + // subscriber type + const SubscriberType type_; + MetricId metric_id_; }; - } \ No newline at end of file diff --git a/src/common/metric/ratio_collector.h b/src/common/metric/ratio_collector.h index 3a933adef..c65b1b505 100644 --- a/src/common/metric/ratio_collector.h +++ b/src/common/metric/ratio_collector.h @@ -4,42 +4,38 @@ #ifndef TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ #define TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ - -#include + +#include #include "common/metric/collector_report_publisher.h" - -namespace tera { + +namespace tera { class RatioCollector : public Collector { -public: - explicit RatioCollector(Counter* first_counter, - Counter* second_counter, - bool is_periodic = true): - first_counter_(first_counter), - second_counter_(second_counter), - is_periodic_(is_periodic) {} - - int64_t Collect() override { - if (NULL == first_counter_ || NULL == second_counter_) { - return 0; - } else { - double ratio = (double)first_counter_->Get() / second_counter_->Get(); - if (is_periodic_) { - first_counter_->Clear(); - second_counter_->Clear(); - } - return isnan(ratio) ? -1 : static_cast(ratio * 100); - } + public: + explicit RatioCollector(Counter* first_counter, Counter* second_counter, bool is_periodic = true) + : first_counter_(first_counter), second_counter_(second_counter), is_periodic_(is_periodic) {} + + int64_t Collect() override { + if (NULL == first_counter_ || NULL == second_counter_) { + return 0; + } else { + double ratio = (double)first_counter_->Get() / second_counter_->Get(); + if (is_periodic_) { + first_counter_->Clear(); + second_counter_->Clear(); + } + return isnan(ratio) ? -1 : static_cast(ratio * 100); } -private: - Counter* const first_counter_; - Counter* const second_counter_; - const bool is_periodic_; + } + + private: + Counter* const first_counter_; + Counter* const second_counter_; + const bool is_periodic_; }; -} // end namespace tera - -#endif // TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +#endif // TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/ratio_subscriber.h b/src/common/metric/ratio_subscriber.h index 32656b46b..94e14adf8 100644 --- a/src/common/metric/ratio_subscriber.h +++ b/src/common/metric/ratio_subscriber.h @@ -8,51 +8,44 @@ namespace tera { class RatioSubscriber : public Subscriber { -public: - RatioSubscriber(const MetricId& metric_id, - std::unique_ptr&& subscriber1, - std::unique_ptr&& subscriber2): - metric_id_(metric_id), + public: + RatioSubscriber(const MetricId& metric_id, std::unique_ptr&& subscriber1, + std::unique_ptr&& subscriber2) + : metric_id_(metric_id), subscriber1_(std::move(subscriber1)), subscriber2_(std::move(subscriber2)) { - type_name_ = "Ratio: (" - + subscriber1_->GetMetricId().GetName() + ":" + subscriber1_->GetTypeName() + " / " - + subscriber2_->GetMetricId().GetName() + ":" + subscriber2_->GetTypeName() + ")"; - } - - virtual std::string GetTypeName() override { - return type_name_; - } - - virtual void OnUpdate(const std::shared_ptr report_ptr) override { - subscriber1_->OnUpdate(report_ptr); - subscriber2_->OnUpdate(report_ptr); - } - - virtual ReportItem Collect() override { - ReportItem ret; - auto subscriber1_ret = subscriber1_->Collect(); - auto subscriber2_ret = subscriber2_->Collect(); - //timestamp should be equal; - assert(subscriber1_ret.Time() == subscriber2_ret.Time()); - double ratio = (double)subscriber1_ret.Value() / subscriber2_ret.Value(); - ret.SetTimeValue({subscriber1_ret.Time(), - (isnan(ratio) ? -1 : static_cast(ratio))}); - ret.SetType(GetTypeName()); - return ret; - } - - const MetricId& GetMetricId() override { - return metric_id_; - } - - virtual ~RatioSubscriber() override {} - -private: - MetricId metric_id_; - std::unique_ptr subscriber1_; - std::unique_ptr subscriber2_; - std::string type_name_; + type_name_ = "Ratio: (" + subscriber1_->GetMetricId().GetName() + ":" + + subscriber1_->GetTypeName() + " / " + subscriber2_->GetMetricId().GetName() + ":" + + subscriber2_->GetTypeName() + ")"; + } + + virtual std::string GetTypeName() override { return type_name_; } + + virtual void OnUpdate(const std::shared_ptr report_ptr) override { + subscriber1_->OnUpdate(report_ptr); + subscriber2_->OnUpdate(report_ptr); + } + + virtual ReportItem Collect() override { + ReportItem ret; + auto subscriber1_ret = subscriber1_->Collect(); + auto subscriber2_ret = subscriber2_->Collect(); + // timestamp should be equal; + assert(subscriber1_ret.Time() == subscriber2_ret.Time()); + double ratio = (double)subscriber1_ret.Value() / subscriber2_ret.Value(); + ret.SetTimeValue({subscriber1_ret.Time(), (isnan(ratio) ? -1 : static_cast(ratio))}); + ret.SetType(GetTypeName()); + return ret; + } + + const MetricId& GetMetricId() override { return metric_id_; } + + virtual ~RatioSubscriber() override {} + + private: + MetricId metric_id_; + std::unique_ptr subscriber1_; + std::unique_ptr subscriber2_; + std::string type_name_; }; } - diff --git a/src/common/metric/subscriber.h b/src/common/metric/subscriber.h index 6b0eb394b..d6c78177c 100644 --- a/src/common/metric/subscriber.h +++ b/src/common/metric/subscriber.h @@ -4,7 +4,7 @@ // found in the LICENSE file. #include #include -#include +#include #include "common/metric/metric_id.h" namespace tera { @@ -14,51 +14,33 @@ using TimeValuePair = std::pair; class CollectorReport; struct ReportItem { - TimeValuePair time_value_pair; - std::string type; - ReportItem(TimeValuePair tvp = {-1, -1}, const std::string& t = ""): - time_value_pair(tvp), - type(t) { } + TimeValuePair time_value_pair; + std::string type; + ReportItem(TimeValuePair tvp = {-1, -1}, const std::string& t = "") + : time_value_pair(tvp), type(t) {} - int64_t Value() const { - return time_value_pair.second; - } + int64_t Value() const { return time_value_pair.second; } - int64_t Time() const { - return time_value_pair.first; - } + int64_t Time() const { return time_value_pair.first; } - void SetTimeValue(const TimeValuePair& tvp) { - time_value_pair = tvp; - } + void SetTimeValue(const TimeValuePair& tvp) { time_value_pair = tvp; } - void SetType(const std::string& tp) { - type = tp; - } + void SetType(const std::string& tp) { type = tp; } - std::string Type() const { - return type; - } + std::string Type() const { return type; } }; class Subscriber { -public: - enum class SubscriberType { - LATEST, - MAX, - MIN, - QPS, - SUM, - THROUGHPUT - }; - virtual ~Subscriber() {} - // return a pair of to Prometheus - virtual ReportItem Collect() = 0; - // Update subscriber, depends to subscriber type - // Called in CollectorReportPublisher::Report() - virtual void OnUpdate(const std::shared_ptr) = 0; - virtual std::string GetTypeName() = 0; - virtual const MetricId& GetMetricId() = 0; + public: + enum class SubscriberType { LATEST, MAX, MIN, QPS, SUM, THROUGHPUT }; + virtual ~Subscriber() {} + // return a pair of to Prometheus + virtual ReportItem Collect() = 0; + // Update subscriber, depends to subscriber type + // Called in CollectorReportPublisher::Report() + virtual void OnUpdate(const std::shared_ptr) = 0; + virtual std::string GetTypeName() = 0; + virtual const MetricId& GetMetricId() = 0; }; using SubscriberType = Subscriber::SubscriberType; diff --git a/src/common/metric/tcmalloc_collector.cc b/src/common/metric/tcmalloc_collector.cc new file mode 100644 index 000000000..10c2403b2 --- /dev/null +++ b/src/common/metric/tcmalloc_collector.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "common/metric/tcmalloc_collector.h" +#include "common/timer.h" +#include "gflags/gflags.h" + +DEFINE_int64(tera_tcmalloc_collect_period_second, 60, + "tcmalloc metrics checking period (in second)"); + +namespace tera { +int64_t TcmallocCollector::Collect() { + auto current_time_ms = get_millis(); + if (current_time_ms - last_check_ms_ >= FLAGS_tera_tcmalloc_collect_period_second * 1000) { + size_t ret = 0; + switch (type_) { + case TcmallocMetricType::kInUse: { + MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &ret); + break; + } + case TcmallocMetricType::kCentralCacheFreeList: { + MallocExtension::instance()->GetNumericProperty("tcmalloc.central_cache_free_bytes", &ret); + break; + } + case TcmallocMetricType::kThreadCacheFreeList: { + MallocExtension::instance()->GetNumericProperty("tcmalloc.thread_cache_free_bytes", &ret); + break; + } + } + val_ = static_cast(ret); + last_check_ms_ = current_time_ms; + } + return val_; +} +} // end namespace tera diff --git a/src/common/metric/tcmalloc_collector.h b/src/common/metric/tcmalloc_collector.h new file mode 100644 index 000000000..cdc8a6ce3 --- /dev/null +++ b/src/common/metric/tcmalloc_collector.h @@ -0,0 +1,45 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +#pragma once + +#include "common/metric/collector_report_publisher.h" +#include "common/metric/collector.h" + +namespace tera { + +enum class TcmallocMetricType { + kInUse, + kCentralCacheFreeList, + kThreadCacheFreeList, +}; + +class TcmallocCollector : public Collector { + public: + explicit TcmallocCollector(const TcmallocMetricType& type) : type_(type) {} + int64_t Collect() override; + + private: + TcmallocMetricType type_; + int64_t val_ = 0; + int64_t last_check_ms_ = 0; +}; + +inline void RegisterTcmallocCollectors() { + auto& instance = CollectorReportPublisher::GetInstance(); + + instance.AddCollector( + MetricId("tcmalloc_allocated_bytes"), + std::unique_ptr(new TcmallocCollector(TcmallocMetricType::kInUse)), + {SubscriberType::LATEST}); + instance.AddCollector( + MetricId("tcmalloc_central_free_list_bytes"), + std::unique_ptr(new TcmallocCollector(TcmallocMetricType::kCentralCacheFreeList)), + {SubscriberType::LATEST}); + instance.AddCollector( + MetricId("tcmalloc_thread_free_list_bytes"), + std::unique_ptr(new TcmallocCollector(TcmallocMetricType::kThreadCacheFreeList)), + {SubscriberType::LATEST}); +} +} // namespace tera diff --git a/src/common/mutex.h b/src/common/mutex.h old mode 100755 new mode 100644 index 381a69218..d56f7e6e6 --- a/src/common/mutex.h +++ b/src/common/mutex.h @@ -3,9 +3,7 @@ // found in the LICENSE file. // // Author: yanshiguang02@baidu.com - -#ifndef TERA_COMMON_MUTEX_H_ -#define TERA_COMMON_MUTEX_H_ +#pragma once #include #include @@ -19,165 +17,152 @@ namespace common { // #define MUTEX_DEBUG -static void PthreadCall(const char* label, int result) { - if (result != 0) { - fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); - abort(); - } +static void PthreadCall(const char *label, int result) { + if (result != 0) { + fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); + abort(); + } } // A Mutex represents an exclusive lock. class Mutex { -public: - Mutex() - : owner_(0), msg_(NULL), msg_threshold_(0), lock_time_(0) { - pthread_mutexattr_t attr; - PthreadCall("init mutexattr", pthread_mutexattr_init(&attr)); - PthreadCall("set mutexattr", pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); - PthreadCall("init mutex", pthread_mutex_init(&mu_, &attr)); - PthreadCall("destroy mutexattr", pthread_mutexattr_destroy(&attr)); - } - ~Mutex() { - PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); - } - // Lock the mutex. - // Will deadlock if the mutex is already locked by this thread. - void Lock(const char* msg = NULL, int64_t msg_threshold = 100) { - #ifdef MUTEX_DEBUG - int64_t s = 0; - if (msg) { - s = get_micros(); - } - #endif - PthreadCall("mutex lock", pthread_mutex_lock(&mu_)); - AfterLock(msg, msg_threshold); - #ifdef MUTEX_DEBUG_ - if (msg && lock_time_ - s > msg_threshold) { - printf("%s wait lock %.3f ms\n", msg, (lock_time_ -s) / 1000.0); - } - #endif - } - // Unlock the mutex. - void Unlock() { - BeforeUnlock(); - PthreadCall("mutex unlock", pthread_mutex_unlock(&mu_)); - } - // Crash if this thread does not hold this mutex. - void AssertHeld() { - if (0 == pthread_equal(owner_, pthread_self())) { - abort(); - } - } - -private: - void AfterLock(const char* msg, int64_t msg_threshold) { - #ifdef MUTEX_DEBUG - msg_ = msg; - msg_threshold_ = msg_threshold; - if (msg_) { - lock_time_ = get_micros(); - } - #endif - owner_ = pthread_self(); - } - void BeforeUnlock() { - #ifdef MUTEX_DEBUG - if (msg_ && get_micros() - lock_time_ > msg_threshold_) { - printf("%s locked %.3f ms\n", - msg_, (get_micros() - lock_time_) / 1000.0); - } - msg_ = NULL; - #endif - owner_ = 0; - } - -private: - friend class CondVar; - Mutex(const Mutex&); - void operator=(const Mutex&); - pthread_mutex_t mu_; - pthread_t owner_; - const char* msg_; - int64_t msg_threshold_; - int64_t lock_time_; + public: + Mutex() : owner_(0), msg_(NULL), msg_threshold_(0), lock_time_(0) { + pthread_mutexattr_t attr; + PthreadCall("init mutexattr", pthread_mutexattr_init(&attr)); + PthreadCall("set mutexattr", pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); + PthreadCall("init mutex", pthread_mutex_init(&mu_, &attr)); + PthreadCall("destroy mutexattr", pthread_mutexattr_destroy(&attr)); + } + ~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } + // Lock the mutex. + // Will deadlock if the mutex is already locked by this thread. + void Lock(const char *msg = NULL, int64_t msg_threshold = 100) { +#ifdef MUTEX_DEBUG + int64_t s = 0; + if (msg) { + s = get_micros(); + } +#endif + PthreadCall("mutex lock", pthread_mutex_lock(&mu_)); + AfterLock(msg, msg_threshold); +#ifdef MUTEX_DEBUG_ + if (msg && lock_time_ - s > msg_threshold) { + printf("%s wait lock %.3f ms\n", msg, (lock_time_ - s) / 1000.0); + } +#endif + } + // Unlock the mutex. + void Unlock() { + BeforeUnlock(); + PthreadCall("mutex unlock", pthread_mutex_unlock(&mu_)); + } + // Crash if this thread does not hold this mutex. + void AssertHeld() { + if (0 == pthread_equal(owner_, pthread_self())) { + abort(); + } + } + + private: + void AfterLock(const char *msg, int64_t msg_threshold) { +#ifdef MUTEX_DEBUG + msg_ = msg; + msg_threshold_ = msg_threshold; + if (msg_) { + lock_time_ = get_micros(); + } +#endif + owner_ = pthread_self(); + } + void BeforeUnlock() { +#ifdef MUTEX_DEBUG + if (msg_ && get_micros() - lock_time_ > msg_threshold_) { + printf("%s locked %.3f ms\n", msg_, (get_micros() - lock_time_) / 1000.0); + } + msg_ = NULL; +#endif + owner_ = 0; + } + + private: + friend class CondVar; + Mutex(const Mutex &); + void operator=(const Mutex &); + pthread_mutex_t mu_; + pthread_t owner_; + const char *msg_; + int64_t msg_threshold_; + int64_t lock_time_; }; // Mutex lock guard class MutexLock { -public: - explicit MutexLock(Mutex *mu, const char* msg = NULL) : mu_(mu) { - mu_->Lock(msg); - } - ~MutexLock() { - mu_->Unlock(); - } -private: - Mutex *const mu_; - MutexLock(const MutexLock&); - void operator=(const MutexLock&); + public: + explicit MutexLock(Mutex *mu, const char *msg = NULL) : mu_(mu) { mu_->Lock(msg); } + ~MutexLock() { mu_->Unlock(); } + + private: + Mutex *const mu_; + MutexLock(const MutexLock &); + void operator=(const MutexLock &); }; // Conditional variable class CondVar { -public: - explicit CondVar(Mutex* mu) : mu_(mu) { - // use monotonic clock - PthreadCall("condattr init ", pthread_condattr_init(&attr_)); - PthreadCall("condattr setclock ", pthread_condattr_setclock(&attr_, CLOCK_MONOTONIC)); - PthreadCall("condvar init with attr", pthread_cond_init(&cond_, &attr_)); - } - ~CondVar() { - PthreadCall("condvar destroy", pthread_cond_destroy(&cond_)); - PthreadCall("condattr destroy", pthread_condattr_destroy(&attr_)); - } - void Wait(const char* msg = NULL) { - int64_t msg_threshold = mu_->msg_threshold_; - mu_->BeforeUnlock(); - PthreadCall("condvar wait", pthread_cond_wait(&cond_, &mu_->mu_)); - mu_->AfterLock(msg, msg_threshold); - } - // Time wait in us - // timeout < 0 would cause ETIMEOUT and return false immediately - bool TimeWaitInUs(int64_t timeout, const char* msg = NULL) { - // ref: http://www.qnx.com/developers/docs/6.5.0SP1.update/com.qnx.doc.neutrino_lib_ref/p/pthread_cond_timedwait.html - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - int64_t nsec = timeout * 1000 + ts.tv_nsec; - - assert(nsec > 0); - - ts.tv_sec += nsec / 1000000000; - ts.tv_nsec = nsec % 1000000000; - - int64_t msg_threshold = mu_->msg_threshold_; - mu_->BeforeUnlock(); - int ret = pthread_cond_timedwait(&cond_, &mu_->mu_, &ts); - mu_->AfterLock(msg, msg_threshold); - return (ret == 0); - } - // Time wait in ms - // timeout < 0 would cause ETIMEOUT and return false immediately - bool TimeWait(int timeout, const char* msg = NULL) { - return TimeWaitInUs(timeout * 1000LL, msg); - } - void Signal() { - PthreadCall("signal", pthread_cond_signal(&cond_)); - } - void Broadcast() { - PthreadCall("broadcast", pthread_cond_broadcast(&cond_)); - } - -private: - CondVar(const CondVar&); - void operator=(const CondVar&); - Mutex* mu_; - pthread_cond_t cond_; - pthread_condattr_t attr_; + public: + explicit CondVar(Mutex *mu) : mu_(mu) { + // use monotonic clock + PthreadCall("condattr init ", pthread_condattr_init(&attr_)); + PthreadCall("condattr setclock ", pthread_condattr_setclock(&attr_, CLOCK_MONOTONIC)); + PthreadCall("condvar init with attr", pthread_cond_init(&cond_, &attr_)); + } + ~CondVar() { + PthreadCall("condvar destroy", pthread_cond_destroy(&cond_)); + PthreadCall("condattr destroy", pthread_condattr_destroy(&attr_)); + } + void Wait(const char *msg = NULL) { + int64_t msg_threshold = mu_->msg_threshold_; + mu_->BeforeUnlock(); + PthreadCall("condvar wait", pthread_cond_wait(&cond_, &mu_->mu_)); + mu_->AfterLock(msg, msg_threshold); + } + // Time wait in us + // timeout < 0 would cause ETIMEOUT and return false immediately + bool TimeWaitInUs(int64_t timeout, const char *msg = NULL) { + // ref: + // http://www.qnx.com/developers/docs/6.5.0SP1.update/com.qnx.doc.neutrino_lib_ref/p/pthread_cond_timedwait.html + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + int64_t nsec = timeout * 1000 + ts.tv_nsec; + + assert(nsec > 0); + + ts.tv_sec += nsec / 1000000000; + ts.tv_nsec = nsec % 1000000000; + + int64_t msg_threshold = mu_->msg_threshold_; + mu_->BeforeUnlock(); + int ret = pthread_cond_timedwait(&cond_, &mu_->mu_, &ts); + mu_->AfterLock(msg, msg_threshold); + return (ret == 0); + } + // Time wait in ms + // timeout < 0 would cause ETIMEOUT and return false immediately + bool TimeWait(int timeout, const char *msg = NULL) { return TimeWaitInUs(timeout * 1000LL, msg); } + void Signal() { PthreadCall("signal", pthread_cond_signal(&cond_)); } + void Broadcast() { PthreadCall("broadcast", pthread_cond_broadcast(&cond_)); } + + private: + CondVar(const CondVar &); + void operator=(const CondVar &); + Mutex *mu_; + pthread_cond_t cond_; + pthread_condattr_t attr_; }; + } // namespace common using common::Mutex; using common::MutexLock; using common::CondVar; - -#endif // TERA_COMMON_MUTEX_H_ diff --git a/src/common/net/ip_address.cc b/src/common/net/ip_address.cc index dc6d982d7..a578b79d3 100644 --- a/src/common/net/ip_address.cc +++ b/src/common/net/ip_address.cc @@ -10,78 +10,65 @@ #include "common/base/string_ext.h" #include "common/base/string_number.h" - const std::string delim = ":"; -IpAddress::IpAddress() - : port_(0), valid_address_(false) {} +IpAddress::IpAddress() : port_(0), valid_address_(false) {} -IpAddress::IpAddress(const std::string& ip_port) - : port_(0), valid_address_(false) { - if (!ip_port.empty()) { - Assign(ip_port); - } +IpAddress::IpAddress(const std::string& ip_port) : port_(0), valid_address_(false) { + if (!ip_port.empty()) { + Assign(ip_port); + } } IpAddress::IpAddress(const std::string& ip, const std::string& port) : port_(0), valid_address_(false) { - Assign(ip, port); + Assign(ip, port); } -IpAddress::IpAddress(const std::string& ip, uint16_t port) - : port_(0), valid_address_(false) { - Assign(ip, port); +IpAddress::IpAddress(const std::string& ip, uint16_t port) : port_(0), valid_address_(false) { + Assign(ip, port); } -std::string IpAddress::ToString() const { - return ip_ + delim + GetPortString(); -} +std::string IpAddress::ToString() const { return ip_ + delim + GetPortString(); } -std::string IpAddress::GetIp() const { - return ip_; -} -uint16_t IpAddress::GetPort() const { - return port_; -} - -std::string IpAddress::GetPortString() const { - return NumberToString(port_); -} +std::string IpAddress::GetIp() const { return ip_; } +uint16_t IpAddress::GetPort() const { return port_; } +std::string IpAddress::GetPortString() const { return NumberToString(port_); } bool IpAddress::Assign(const std::string& ip_port) { - CHECK(!ip_port.empty()); - valid_address_ = false; - std::vector items; - SplitString(ip_port, delim, &items); - if (items.size() != 2) { - LOG(WARNING) << "invalid ip address: " << ip_port; - return false; - } - - if (!StringToNumber(items[1], &port_)) { - LOG(ERROR) << "invalid port number: " << items[1]; - return false; - } - ip_ = items[0]; - valid_address_ = true; - return valid_address_; + CHECK(!ip_port.empty()); + valid_address_ = false; + std::vector items; + SplitString(ip_port, delim, &items); + if (items.size() != 2) { + LOG(WARNING) << "invalid ip address: " << ip_port; + return false; + } + + if (!StringToNumber(items[1], &port_)) { + LOG(ERROR) << "invalid port number: " << items[1]; + return false; + } + ip_ = items[0]; + valid_address_ = true; + return valid_address_; } bool IpAddress::Assign(const std::string& ip, const std::string& port) { - valid_address_ = false; - if (!StringToNumber(port, &port_)) { - LOG(ERROR) << "invalid port number: " << port; - return valid_address_; - } - ip_ = ip; - valid_address_ = true; + valid_address_ = false; + if (!StringToNumber(port, &port_)) { + LOG(ERROR) << "invalid port number: " << port; return valid_address_; + } + ip_ = ip; + valid_address_ = true; + return valid_address_; } bool IpAddress::Assign(const std::string& ip, uint16_t port) { - ip_ = ip; - port_ = port; - valid_address_ = true; - return valid_address_; + ip_ = ip; + port_ = port; + valid_address_ = true; + return valid_address_; } diff --git a/src/common/net/ip_address.h b/src/common/net/ip_address.h index 337e3b0b8..a6acd40c6 100644 --- a/src/common/net/ip_address.h +++ b/src/common/net/ip_address.h @@ -10,32 +10,30 @@ #include class IpAddress { -public: - IpAddress(); - IpAddress(const std::string& ip_port); - IpAddress(const std::string& ip, const std::string& port); - IpAddress(const std::string& ip, uint16_t port); + public: + IpAddress(); + IpAddress(const std::string& ip_port); + IpAddress(const std::string& ip, const std::string& port); + IpAddress(const std::string& ip, uint16_t port); - ~IpAddress() {} + ~IpAddress() {} - std::string ToString() const; - std::string GetIp() const; - uint16_t GetPort() const; - std::string GetPortString() const; + std::string ToString() const; + std::string GetIp() const; + uint16_t GetPort() const; + std::string GetPortString() const; - bool IsValid() const { - return valid_address_; - } + bool IsValid() const { return valid_address_; } - bool Assign(const std::string& ip_port); - bool Assign(const std::string& ip, const std::string& port); - bool Assign(const std::string& ip, uint16_t port); + bool Assign(const std::string& ip_port); + bool Assign(const std::string& ip, const std::string& port); + bool Assign(const std::string& ip, uint16_t port); -private: - std::string ip_; - uint16_t port_; + private: + std::string ip_; + uint16_t port_; - bool valid_address_; + bool valid_address_; }; -#endif // TERA_COMMON_NET_IP_ADDRESS_H_ +#endif // TERA_COMMON_NET_IP_ADDRESS_H_ diff --git a/src/common/request_done_wrapper.h b/src/common/request_done_wrapper.h index cd6b7b3b7..afa85b5ee 100644 --- a/src/common/request_done_wrapper.h +++ b/src/common/request_done_wrapper.h @@ -3,27 +3,22 @@ namespace tera { class RequestDoneWrapper : public google::protobuf::Closure { -public: - static google::protobuf::Closure* NewInstance(google::protobuf::Closure* done) { - return new RequestDoneWrapper(done); - } + public: + static google::protobuf::Closure* NewInstance(google::protobuf::Closure* done) { + return new RequestDoneWrapper(done); + } - //Self-Deleted, never access it after Run(); - //Default do nothing; - virtual void Run() override { - delete this; - } + // Self-Deleted, never access it after Run(); + // Default do nothing; + virtual void Run() override { delete this; } - virtual ~RequestDoneWrapper() { - done_->Run(); - } + virtual ~RequestDoneWrapper() { done_->Run(); } -protected: - //Can Only Create on Heap; - RequestDoneWrapper(google::protobuf::Closure* done): - done_(done) { } + protected: + // Can Only Create on Heap; + RequestDoneWrapper(google::protobuf::Closure* done) : done_(done) {} -private: - google::protobuf::Closure* done_; + private: + google::protobuf::Closure* done_; }; } \ No newline at end of file diff --git a/src/common/rwmutex.h b/src/common/rwmutex.h new file mode 100644 index 000000000..5af7848ff --- /dev/null +++ b/src/common/rwmutex.h @@ -0,0 +1,94 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include "mutex.h" + +namespace common { + +class RWMutex { + public: + RWMutex() { PthreadCall("init mutex", pthread_rwlock_init(&mu_, nullptr)); } + ~RWMutex() { PthreadCall("destroy mutex", pthread_rwlock_destroy(&mu_)); } + + void ReadLock() { PthreadCall("read lock", pthread_rwlock_rdlock(&mu_)); } + + void WriteLock() { PthreadCall("write lock", pthread_rwlock_wrlock(&mu_)); } + + void ReadUnlock() { PthreadCall("read unlock", pthread_rwlock_unlock(&mu_)); } + + void WriteUnlock() { PthreadCall("write unlock", pthread_rwlock_unlock(&mu_)); } + + void AssertHeld() {} + + RWMutex(const RWMutex &) = delete; + + RWMutex &operator=(const RWMutex &) = delete; + + private: + pthread_rwlock_t mu_; +}; + +// Acquire a ReadLock on the specified RWMutex. +// The Lock will be automatically released then the +// object goes out of scope. +class ReadLock { + public: + explicit ReadLock(RWMutex *mu) : mu_(mu) { this->mu_->ReadLock(); } + + ~ReadLock() { this->mu_->ReadUnlock(); } + + ReadLock(const ReadLock &) = delete; + + ReadLock &operator=(const ReadLock &) = delete; + + private: + RWMutex *const mu_; +}; + +// Acquire a WriteLock on the specified RWMutex. +// The Lock will be automatically released then the +// object goes out of scope. +class WriteLock { + public: + explicit WriteLock(RWMutex *mu) : mu_(mu) { this->mu_->WriteLock(); } + + ~WriteLock() { this->mu_->WriteUnlock(); } + + WriteLock(const WriteLock &) = delete; + + WriteLock &operator=(const WriteLock &) = delete; + + private: + RWMutex *const mu_; +}; + +// +// Automatically unlock a locked mutex when the object is destroyed +// +class ReadUnlock { + public: + explicit ReadUnlock(RWMutex *mu) : mu_(mu) { mu->AssertHeld(); } + + ~ReadUnlock() { mu_->ReadUnlock(); } + + // No copying allowed + ReadUnlock(const ReadUnlock &) = delete; + + ReadUnlock &operator=(const ReadUnlock &) = delete; + + private: + RWMutex *const mu_; +}; +} // namespace common + +using common::RWMutex; +using common::WriteLock; +using common::ReadLock; +using common::ReadUnlock; diff --git a/src/common/semaphore.h b/src/common/semaphore.h index dc0d3d4dc..836c10083 100644 --- a/src/common/semaphore.h +++ b/src/common/semaphore.h @@ -8,34 +8,41 @@ namespace common { class Semaphore { -public: - Semaphore(const Semaphore&) = delete; - Semaphore& operator=(const Semaphore&) = delete; - Semaphore(Semaphore&&) = delete; - Semaphore& operator=(Semaphore&&) = delete; - - explicit Semaphore(int64_t counter) - : cv_(&mutex_), counter_(counter) { - } - ~Semaphore() {} - - void Acquire() { - MutexLock lock(&mutex_); - while (counter_ <= 0) { - cv_.Wait(); - } - --counter_; + public: + Semaphore(const Semaphore&) = delete; + Semaphore& operator=(const Semaphore&) = delete; + Semaphore(Semaphore&&) = delete; + Semaphore& operator=(Semaphore&&) = delete; + + explicit Semaphore(int64_t counter) : cv_(&mutex_), counter_(counter) {} + ~Semaphore() {} + + bool TryAcquire() { + MutexLock lock(&mutex_); + if (counter_ <= 0) { + return false; } - void Release() { - MutexLock lock(&mutex_); - ++counter_; - cv_.Signal(); + --counter_; + return true; + } + + void Acquire() { + MutexLock lock(&mutex_); + while (counter_ <= 0) { + cv_.Wait(); } + --counter_; + } + void Release() { + MutexLock lock(&mutex_); + ++counter_; + cv_.Signal(); + } -private: - Mutex mutex_; - CondVar cv_; - int64_t counter_; + private: + Mutex mutex_; + CondVar cv_; + int64_t counter_; }; -} // namespace common +} // namespace common diff --git a/src/common/tera_entry.cc b/src/common/tera_entry.cc new file mode 100644 index 000000000..81000b432 --- /dev/null +++ b/src/common/tera_entry.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "tera/tera_entry.h" +#include "common/this_thread.h" + +namespace tera { + +TeraEntry::TeraEntry() : started_(false) {} + +TeraEntry::~TeraEntry() {} + +bool TeraEntry::Start() { + if (ShouldStart()) { + return StartServer(); + } + return false; +} + +bool TeraEntry::Run() { + ThisThread::Sleep(2000); + return true; +} + +bool TeraEntry::Shutdown() { + if (ShouldShutdown()) { + ShutdownServer(); + return true; + } + return false; +} + +bool TeraEntry::ShouldStart() { + bool has_started = false; + return started_.compare_exchange_strong(has_started, true); +} + +bool TeraEntry::ShouldShutdown() { + bool has_shutdown = true; + return started_.compare_exchange_strong(has_shutdown, false); +} + +} // namespace tera diff --git a/src/common/test/bounded_queue_test.cc b/src/common/test/bounded_queue_test.cc new file mode 100644 index 000000000..daa148778 --- /dev/null +++ b/src/common/test/bounded_queue_test.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include "gtest/gtest.h" +#include "common/base/bounded_queue.h" + +namespace tera { + +class BoundedQueueTest : public ::testing::Test { + public: + BoundedQueueTest() {} + + virtual void SetUp() {} + + virtual void TearDown() {} + + virtual void Reset(int64_t limit) { bq_.reset(new common::BoundedQueue{limit}); } + + private: + std::unique_ptr> bq_; +}; + +TEST_F(BoundedQueueTest, BaseTest) { + Reset(5); + EXPECT_TRUE(bq_.get()); + EXPECT_EQ(bq_->Average(), 0); + EXPECT_EQ(bq_->Sum(), 0); + bq_->Push(1); + bq_->Push(2); + bq_->Push(3); + bq_->Push(4); + bq_->Push(5); + EXPECT_EQ(bq_->Average(), 3); + EXPECT_EQ(bq_->Sum(), 15); + bq_->Push(2); + bq_->Push(2); + bq_->Push(2); + bq_->Push(2); + bq_->Push(2); + EXPECT_EQ(bq_->Average(), 2); + EXPECT_EQ(bq_->Sum(), 10); + EXPECT_EQ(bq_->qu_.size(), bq_->Size()); + EXPECT_EQ(bq_->qu_.size(), 5); +} +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/collector_report_test.cc b/src/common/test/collector_report_test.cc index e01972cc9..e32da772b 100644 --- a/src/common/test/collector_report_test.cc +++ b/src/common/test/collector_report_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -9,171 +9,179 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/metric_counter.h" + +#include "common/metric/metric_counter.h" #include "common/metric/collector_report.h" -#include "common/this_thread.h" - -namespace tera { - +#include "common/this_thread.h" + +namespace tera { + class CollectorReportTest : public ::testing::Test { -public: - CollectorReportTest() - : nonperiod_counter1_label(LabelStringBuilder().Append("key1", "value1").ToString()), - nonperiod_counter1("counter1", nonperiod_counter1_label, {}, false), - nonperiod_counter2("counter2", {}, false), - period_counter1_label(LabelStringBuilder().Append("key2", "value2").ToString()), - period_counter1("counter1", period_counter1_label, {}, true), - period_counter3("counter3", {}, true) { - other_whatever_ids.push_back(MetricId()); - other_whatever_ids.push_back(MetricId("whatevername")); - - MetricLabels whatever_labels; - whatever_labels["haha"] = "hehe"; - whatever_labels["heihei"] = "hoho"; - other_whatever_ids.push_back(MetricId("", whatever_labels)); - other_whatever_ids.push_back(MetricId("whatevername", whatever_labels)); - } - - virtual void SetUp() { - nonperiod_counter1.Set(1); - nonperiod_counter2.Set(2); - period_counter1.Set(3); - period_counter3.Set(4); - } - - virtual void TearDown() { - // reset cache to initial status - CollectorReportPublisher::GetInstance().last_collector_report_.reset(new CollectorReport()); - } -private: - std::string nonperiod_counter1_label; - MetricCounter nonperiod_counter1; - MetricCounter nonperiod_counter2; - std::string period_counter1_label; - MetricCounter period_counter1; - MetricCounter period_counter3; - - std::vector other_whatever_ids; -}; + public: + CollectorReportTest() + : nonperiod_counter1_label(LabelStringBuilder().Append("key1", "value1").ToString()), + nonperiod_counter1("counter1", nonperiod_counter1_label, {}, false), + nonperiod_counter2("counter2", {}, false), + period_counter1_label(LabelStringBuilder().Append("key2", "value2").ToString()), + period_counter1("counter1", period_counter1_label, {}, true), + period_counter3("counter3", {}, true) { + other_whatever_ids.push_back(MetricId()); + other_whatever_ids.push_back(MetricId("whatevername")); + + MetricLabels whatever_labels; + whatever_labels["haha"] = "hehe"; + whatever_labels["heihei"] = "hoho"; + other_whatever_ids.push_back(MetricId("", whatever_labels)); + other_whatever_ids.push_back(MetricId("whatevername", whatever_labels)); + } + + virtual void SetUp() { + nonperiod_counter1.Set(1); + nonperiod_counter2.Set(2); + period_counter1.Set(3); + period_counter3.Set(4); + } + + virtual void TearDown() { + // reset cache to initial status + CollectorReportPublisher::GetInstance().last_collector_report_.reset(new CollectorReport()); + } + + private: + std::string nonperiod_counter1_label; + MetricCounter nonperiod_counter1; + MetricCounter nonperiod_counter2; + std::string period_counter1_label; + MetricCounter period_counter1; + MetricCounter period_counter3; + + std::vector other_whatever_ids; +}; TEST_F(CollectorReportTest, FindTest) { - int64_t value = 0; - CollectorReportPublisher::GetInstance().Refresh(); - std::shared_ptr report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - - // check report - EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); - - // nonperiod_counter1 - value = report->FindMetricValue("counter1", nonperiod_counter1_label); - EXPECT_EQ(value, 1); - value = report->FindMetricValue(nonperiod_counter1.metric_id_); - EXPECT_EQ(value, 1); - value = report->FindMetricValue("counter1"); - EXPECT_EQ(value, 0); - value = report->FindMetricValue("counter1", "other not exist label"); - EXPECT_EQ(value, 0); - value = report->FindMetricValue("not exist name", nonperiod_counter1_label); - EXPECT_EQ(value, 0); - value = report->FindMetricValue(MetricId("counter1")); + int64_t value = 0; + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + + // check report + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + + // nonperiod_counter1 + value = report->FindMetricValue("counter1", nonperiod_counter1_label); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(nonperiod_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue("counter1"); + EXPECT_EQ(value, 0); + value = report->FindMetricValue("counter1", "other not exist label"); + EXPECT_EQ(value, 0); + value = report->FindMetricValue("not exist name", nonperiod_counter1_label); + EXPECT_EQ(value, 0); + value = report->FindMetricValue(MetricId("counter1")); + EXPECT_EQ(value, 0); + + // nonperiod_counter2 + value = report->FindMetricValue("counter2"); + EXPECT_EQ(value, 2); + value = report->FindMetricValue("counter2", ""); + EXPECT_EQ(value, 2); + value = report->FindMetricValue(MetricId("counter2")); + EXPECT_EQ(value, 2); + value = report->FindMetricValue("counter2", "whatever_label"); + EXPECT_EQ(value, 0); + + // period_counter1 + value = report->FindMetricValue("counter1", period_counter1_label); + EXPECT_EQ(value, 3); + value = report->FindMetricValue(period_counter1.metric_id_); + EXPECT_EQ(value, 3); + + // period_counter3 + value = report->FindMetricValue("counter3"); + EXPECT_EQ(value, 4); + value = report->FindMetricValue(period_counter3.metric_id_); + EXPECT_EQ(value, 4); + + // invalid + for (const MetricId& not_exist_id : other_whatever_ids) { + value = report->FindMetricValue(not_exist_id.GetName()); EXPECT_EQ(value, 0); - - // nonperiod_counter2 - value = report->FindMetricValue("counter2"); - EXPECT_EQ(value, 2); - value = report->FindMetricValue("counter2", ""); - EXPECT_EQ(value, 2); - value = report->FindMetricValue(MetricId("counter2")); - EXPECT_EQ(value, 2); - value = report->FindMetricValue("counter2", "whatever_label"); + value = report->FindMetricValue(not_exist_id.ToString()); EXPECT_EQ(value, 0); - - // period_counter1 - value = report->FindMetricValue("counter1", period_counter1_label); - EXPECT_EQ(value, 3); - value = report->FindMetricValue(period_counter1.metric_id_); - EXPECT_EQ(value, 3); - - // period_counter3 - value = report->FindMetricValue("counter3"); - EXPECT_EQ(value, 4); - value = report->FindMetricValue(period_counter3.metric_id_); - EXPECT_EQ(value, 4); - - // invalid - for (const MetricId& not_exist_id : other_whatever_ids) { - value = report->FindMetricValue(not_exist_id.GetName()); - EXPECT_EQ(value, 0); - value = report->FindMetricValue(not_exist_id.ToString()); - EXPECT_EQ(value, 0); - value = report->FindMetricValue(not_exist_id); - EXPECT_EQ(value, 0); - } - - // report again - nonperiod_counter1.Inc(); - nonperiod_counter2.Inc(); - period_counter1.Inc(); - period_counter3.Inc(); - MetricCounter another_counter1("another1"); - MetricCounter another_counter2("another2"); - another_counter1.Inc(); - CollectorReportPublisher::GetInstance().Refresh(); - report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); - - value = report->FindMetricValue(nonperiod_counter1.metric_id_); - EXPECT_EQ(value, 2); - value = report->FindMetricValue(nonperiod_counter2.metric_id_); - EXPECT_EQ(value, 3); - value = report->FindMetricValue(period_counter1.metric_id_); - EXPECT_EQ(value, 1); - value = report->FindMetricValue(period_counter3.metric_id_); - EXPECT_EQ(value, 1); - value = report->FindMetricValue(another_counter1.metric_id_); - EXPECT_EQ(value, 1); - value = report->FindMetricValue(another_counter2.metric_id_); + value = report->FindMetricValue(not_exist_id); EXPECT_EQ(value, 0); + } + + // report again + nonperiod_counter1.Inc(); + nonperiod_counter2.Inc(); + period_counter1.Inc(); + period_counter3.Inc(); + MetricCounter another_counter1("another1"); + MetricCounter another_counter2("another2"); + another_counter1.Inc(); + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + + value = report->FindMetricValue(nonperiod_counter1.metric_id_); + EXPECT_EQ(value, 2); + value = report->FindMetricValue(nonperiod_counter2.metric_id_); + EXPECT_EQ(value, 3); + value = report->FindMetricValue(period_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(period_counter3.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(another_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(another_counter2.metric_id_); + EXPECT_EQ(value, 0); } TEST_F(CollectorReportTest, CacheTest) { - // do not update yet - std::shared_ptr initial_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_TRUE(initial_report.get() != NULL); - EXPECT_TRUE(initial_report->report.empty()); - - // update - CollectorReportPublisher::GetInstance().Refresh(); - std::shared_ptr report1 = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_EQ(report1->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); - EXPECT_TRUE(report1.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); - - // modify counters and report again - nonperiod_counter1.Inc(); - nonperiod_counter2.Inc(); - period_counter1.Inc(); - period_counter3.Inc(); - MetricCounter another_counter1("another1"); - MetricCounter another_counter2("another2"); - another_counter1.Inc(); - - // get report before update, return same ptr - std::shared_ptr report2 = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_TRUE(report2.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); - EXPECT_TRUE(report2.get() == report1.get()); - EXPECT_EQ(report2->FindMetricValue(period_counter3.metric_id_), 4); - - // update and get - CollectorReportPublisher::GetInstance().Refresh(); - std::shared_ptr report3 = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_TRUE(report3.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); - EXPECT_FALSE(report3.get() == report1.get()); - EXPECT_EQ(report3->report.size(), report2->report.size() + 2); - EXPECT_EQ(report3->FindMetricValue(period_counter3.metric_id_), 1); + // do not update yet + std::shared_ptr initial_report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(initial_report.get() != NULL); + EXPECT_TRUE(initial_report->report.empty()); + + // update + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report1 = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(report1->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + EXPECT_TRUE(report1.get() == + CollectorReportPublisher::GetInstance().last_collector_report_.get()); + + // modify counters and report again + nonperiod_counter1.Inc(); + nonperiod_counter2.Inc(); + period_counter1.Inc(); + period_counter3.Inc(); + MetricCounter another_counter1("another1"); + MetricCounter another_counter2("another2"); + another_counter1.Inc(); + + // get report before update, return same ptr + std::shared_ptr report2 = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(report2.get() == + CollectorReportPublisher::GetInstance().last_collector_report_.get()); + EXPECT_TRUE(report2.get() == report1.get()); + EXPECT_EQ(report2->FindMetricValue(period_counter3.metric_id_), 4); + + // update and get + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report3 = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(report3.get() == + CollectorReportPublisher::GetInstance().last_collector_report_.get()); + EXPECT_FALSE(report3.get() == report1.get()); + EXPECT_EQ(report3->report.size(), report2->report.size() + 2); + EXPECT_EQ(report3->FindMetricValue(period_counter3.metric_id_), 1); } - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/common_test_main.cc b/src/common/test/common_test_main.cc index 90c3b06dd..2b2bd7868 100644 --- a/src/common/test/common_test_main.cc +++ b/src/common/test/common_test_main.cc @@ -11,20 +11,19 @@ #include "utils/utils_cmd.h" int main(int argc, char** argv) { - ::google::InitGoogleLogging(argv[0]); - FLAGS_v = 16; - FLAGS_minloglevel=0; - FLAGS_log_dir = "./log"; - if (access(FLAGS_log_dir.c_str(), F_OK)) { - mkdir(FLAGS_log_dir.c_str(), 0777); - } - std::string pragram_name("tera"); - tera::utils::SetupLog(pragram_name); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + ::google::InitGoogleLogging(argv[0]); + FLAGS_v = 16; + FLAGS_minloglevel = 0; + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("tera"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/counter_test.cc b/src/common/test/counter_test.cc index 598c98f04..3143ba9d2 100644 --- a/src/common/test/counter_test.cc +++ b/src/common/test/counter_test.cc @@ -21,106 +21,104 @@ int loop_num = 100000; int thread_num = 1000; void callback_add(Counter* counter) { - for (int i = 0; i < loop_num; ++i) { - counter->Add(100000); - } - MutexLock lock(&mutex); -// std::cout << "add: " << counter->Get() << std::endl; - ref--; + for (int i = 0; i < loop_num; ++i) { + counter->Add(100000); + } + MutexLock lock(&mutex); + // std::cout << "add: " << counter->Get() << std::endl; + ref--; } void callback_sub(Counter* counter) { - for (int i = 0; i < loop_num; ++i) { - counter->Sub(100000); - } - MutexLock lock(&mutex); -// std::cout << "sub: " << counter->Get() << std::endl; - ref--; + for (int i = 0; i < loop_num; ++i) { + counter->Sub(100000); + } + MutexLock lock(&mutex); + // std::cout << "sub: " << counter->Get() << std::endl; + ref--; } void callback_inc(Counter* counter) { - for (int i = 0; i < loop_num; ++i) { - counter->Inc(); - } - MutexLock lock(&mutex); -// std::cout << "inc: " << counter->Get() << std::endl; - ref--; + for (int i = 0; i < loop_num; ++i) { + counter->Inc(); + } + MutexLock lock(&mutex); + // std::cout << "inc: " << counter->Get() << std::endl; + ref--; } void callback_dec(Counter* counter) { - for (int i = 0; i < loop_num; ++i) { - counter->Dec(); - } - MutexLock lock(&mutex); -// std::cout << "dec: " << counter->Get() << std::endl; - ref--; + for (int i = 0; i < loop_num; ++i) { + counter->Dec(); + } + MutexLock lock(&mutex); + // std::cout << "dec: " << counter->Get() << std::endl; + ref--; } void callback_clear(Counter* counter) { - for (int i = 0; i < loop_num / 300; ++i) { - ASSERT_GE(counter->Clear(), 0); - } - MutexLock lock(&mutex); -// std::cout << "clear: " << counter->Get() << std::endl; - ref--; + for (int i = 0; i < loop_num / 300; ++i) { + ASSERT_GE(counter->Clear(), 0); + } + MutexLock lock(&mutex); + // std::cout << "clear: " << counter->Get() << std::endl; + ref--; } TEST(CounterTest, Basic) { - Counter counter; - ThreadPool* pool = new ThreadPool(thread_num); - for (int i = 0; i < thread_num / 4; ++i) { - std::function callback = - std::bind(&callback_add, &counter); - pool->AddTask(callback); - - callback = std::bind(&callback_sub, &counter); - pool->AddTask(callback); - - callback = std::bind(&callback_inc, &counter); - pool->AddTask(callback); - - callback = std::bind(&callback_dec, &counter); - pool->AddTask(callback); - - MutexLock locker(&mutex); - ref += 4; + Counter counter; + ThreadPool* pool = new ThreadPool(thread_num); + for (int i = 0; i < thread_num / 4; ++i) { + std::function callback = std::bind(&callback_add, &counter); + pool->AddTask(callback); + + callback = std::bind(&callback_sub, &counter); + pool->AddTask(callback); + + callback = std::bind(&callback_inc, &counter); + pool->AddTask(callback); + + callback = std::bind(&callback_dec, &counter); + pool->AddTask(callback); + + MutexLock locker(&mutex); + ref += 4; + } + while (1) { + MutexLock locker(&mutex); + if (ref == 0) { + break; } - while (1) { - MutexLock locker(&mutex); - if (ref == 0) { - break; - } - } - ASSERT_EQ(counter.Get(), 0); - delete pool; + } + ASSERT_EQ(counter.Get(), 0); + delete pool; } TEST(CounterTest, Clear) { - Counter counter; - ThreadPool* pool = new ThreadPool(thread_num); - for (int i = 0; i < thread_num / 3; ++i) { - std::function callback = - std::bind(&callback_add, &counter); - pool->AddTask(callback); + Counter counter; + ThreadPool* pool = new ThreadPool(thread_num); + for (int i = 0; i < thread_num / 3; ++i) { + std::function callback = std::bind(&callback_add, &counter); + pool->AddTask(callback); - callback = std::bind(&callback_inc, &counter); - pool->AddTask(callback); + callback = std::bind(&callback_inc, &counter); + pool->AddTask(callback); - callback = std::bind(&callback_clear, &counter); - pool->AddTask(callback); + callback = std::bind(&callback_clear, &counter); + pool->AddTask(callback); - MutexLock lock(&mutex); - ref += 3; - } - while (1) { - MutexLock lock(&mutex); - if (ref == 0) { - break; - } + MutexLock lock(&mutex); + ref += 3; + } + while (1) { + MutexLock lock(&mutex); + if (ref == 0) { + break; } - ASSERT_GE(counter.Clear(), 0); - ASSERT_EQ(counter.Get(), 0); - delete pool; + } + ASSERT_GE(counter.Clear(), 0); + ASSERT_EQ(counter.Get(), 0); + delete pool; } -} // namespace tera +} // namespace tera diff --git a/src/common/test/log_cleaner_test.cc b/src/common/test/log_cleaner_test.cc index 8fbf3ef9f..256380ac5 100644 --- a/src/common/test/log_cleaner_test.cc +++ b/src/common/test/log_cleaner_test.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - #include #include #include @@ -32,215 +31,214 @@ static size_t g_touch_file_count = 0; static size_t g_expect_clean_count = 0; const static int64_t kTestLogExpireSecond = 5; -std::string TouchFile(const std::string& dir_path, const std::string& filename, bool need_close = true) { - std::string full_path = dir_path + "/" + filename; - int fd = open(full_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0777); - if (need_close && fd > 0) { - close(fd); - } - ++g_touch_file_count; - return full_path; +std::string TouchFile(const std::string &dir_path, const std::string &filename, + bool need_close = true) { + std::string full_path = dir_path + "/" + filename; + int fd = open(full_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0777); + if (need_close && fd > 0) { + close(fd); + } + ++g_touch_file_count; + return full_path; } void SetupTestEnv() { - std::string leveldb_log_prefix = "leveldb.log"; - FLAGS_tera_leveldb_log_path = "./log/" + leveldb_log_prefix; - // fake options, change log dir for cleaner - FLAGS_log_dir = "./test_log"; - FLAGS_tera_log_prefix = "tera_test"; - FLAGS_tera_info_log_clean_period_second = 1; - FLAGS_tera_info_log_expire_second = kTestLogExpireSecond; - std::string other_prefix = "tera_other_prefix"; - - // make test log dir, ignore failture - mkdir(FLAGS_log_dir.c_str(), 0777); - g_touch_file_count = 0; - g_expect_clean_count = 0; - - // touch file unlinked - std::string unlinked_info = FLAGS_tera_log_prefix + ".INFO.unlink"; - TouchFile(FLAGS_log_dir, unlinked_info); - std::string unlinked_warn = FLAGS_tera_log_prefix + ".WARNING.unlink"; - TouchFile(FLAGS_log_dir, unlinked_warn); - std::string unlinked_err = FLAGS_tera_log_prefix + ".stderr.unlink"; - TouchFile(FLAGS_log_dir, unlinked_err); - g_expect_clean_count += 3; // expect clean unlinked file - - // touch file linked - std::string linked_info = FLAGS_tera_log_prefix + ".INFO.linked"; - std::string info_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".INFO"; - std::string linked_info_path = TouchFile(FLAGS_log_dir, linked_info); - // link full path - remove(info_link_path.c_str()); - symlink(linked_info_path.c_str(), info_link_path.c_str()); - ++g_touch_file_count; - - std::string linked_warn = FLAGS_tera_log_prefix + ".WARNING.linked"; - std::string warn_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".WARNING"; - TouchFile(FLAGS_log_dir, linked_warn); - // link filename only - remove(warn_link_path.c_str()); - symlink(linked_warn.c_str(), warn_link_path.c_str()); - ++g_touch_file_count; - - // touch file opened - std::string opened_info = FLAGS_tera_log_prefix + ".INFO.opened"; - TouchFile(FLAGS_log_dir, opened_info, false); - std::string opened_warn = FLAGS_tera_log_prefix + ".WARNING.opened"; - TouchFile(FLAGS_log_dir, opened_warn, false); - std::string opened_err = FLAGS_tera_log_prefix + ".stderr.opened"; - TouchFile(FLAGS_log_dir, opened_err, false); - - // touch file not start with prefix - std::string other_pre_info = other_prefix + ".INFO.otherpre"; - TouchFile(FLAGS_log_dir, other_pre_info); - std::string other_pre_warn = other_prefix + ".WARNING.otherpre"; - TouchFile(FLAGS_log_dir, other_pre_warn); - std::string other_pre_err = other_prefix + ".stderr.otherpre"; - TouchFile(FLAGS_log_dir, other_pre_err); - - // touch file start with leveldb_log_prefix and open one of them - std::string ldb_pre_info = leveldb_log_prefix; - TouchFile(FLAGS_log_dir, ldb_pre_info, false); - std::string ldb_pre_info_lod = leveldb_log_prefix + ".old"; - TouchFile(FLAGS_log_dir, ldb_pre_info_lod); - g_expect_clean_count++; // expect clean leveldb_log_prefix.old + std::string leveldb_log_prefix = "leveldb.log"; + FLAGS_tera_leveldb_log_path = "./log/" + leveldb_log_prefix; + // fake options, change log dir for cleaner + FLAGS_log_dir = "./test_log"; + FLAGS_tera_log_prefix = "tera_test"; + FLAGS_tera_info_log_clean_period_second = 1; + FLAGS_tera_info_log_expire_second = kTestLogExpireSecond; + std::string other_prefix = "tera_other_prefix"; + + // make test log dir, ignore failture + mkdir(FLAGS_log_dir.c_str(), 0777); + g_touch_file_count = 0; + g_expect_clean_count = 0; + + // touch file unlinked + std::string unlinked_info = FLAGS_tera_log_prefix + ".INFO.unlink"; + TouchFile(FLAGS_log_dir, unlinked_info); + std::string unlinked_warn = FLAGS_tera_log_prefix + ".WARNING.unlink"; + TouchFile(FLAGS_log_dir, unlinked_warn); + std::string unlinked_err = FLAGS_tera_log_prefix + ".stderr.unlink"; + TouchFile(FLAGS_log_dir, unlinked_err); + g_expect_clean_count += 3; // expect clean unlinked file + + // touch file linked + std::string linked_info = FLAGS_tera_log_prefix + ".INFO.linked"; + std::string info_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".INFO"; + std::string linked_info_path = TouchFile(FLAGS_log_dir, linked_info); + // link full path + remove(info_link_path.c_str()); + symlink(linked_info_path.c_str(), info_link_path.c_str()); + ++g_touch_file_count; + + std::string linked_warn = FLAGS_tera_log_prefix + ".WARNING.linked"; + std::string warn_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".WARNING"; + TouchFile(FLAGS_log_dir, linked_warn); + // link filename only + remove(warn_link_path.c_str()); + symlink(linked_warn.c_str(), warn_link_path.c_str()); + ++g_touch_file_count; + + // touch file opened + std::string opened_info = FLAGS_tera_log_prefix + ".INFO.opened"; + TouchFile(FLAGS_log_dir, opened_info, false); + std::string opened_warn = FLAGS_tera_log_prefix + ".WARNING.opened"; + TouchFile(FLAGS_log_dir, opened_warn, false); + std::string opened_err = FLAGS_tera_log_prefix + ".stderr.opened"; + TouchFile(FLAGS_log_dir, opened_err, false); + + // touch file not start with prefix + std::string other_pre_info = other_prefix + ".INFO.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_info); + std::string other_pre_warn = other_prefix + ".WARNING.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_warn); + std::string other_pre_err = other_prefix + ".stderr.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_err); + + // touch file start with leveldb_log_prefix and open one of them + std::string ldb_pre_info = leveldb_log_prefix; + TouchFile(FLAGS_log_dir, ldb_pre_info, false); + std::string ldb_pre_info_lod = leveldb_log_prefix + ".old"; + TouchFile(FLAGS_log_dir, ldb_pre_info_lod); + g_expect_clean_count++; // expect clean leveldb_log_prefix.old } TEST(LogCleanerTest, InitialStatus) { - // ensure stop firstly - LogCleaner::StopCleaner(); - ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); - SetupTestEnv(); - LogCleaner *cleaner = LogCleaner::GetInstance(); - - ASSERT_FALSE(cleaner == NULL); - ASSERT_FALSE(cleaner->IsRunning()); - ASSERT_TRUE(cleaner->CheckOptions()); - ASSERT_FALSE(cleaner->stop_); + // ensure stop firstly + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); + SetupTestEnv(); + LogCleaner *cleaner = LogCleaner::GetInstance(); + + ASSERT_FALSE(cleaner == NULL); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->CheckOptions()); + ASSERT_FALSE(cleaner->stop_); } TEST(LogCleanerTest, Basic) { - SetupTestEnv(); - // get instance - LogCleaner *cleaner = LogCleaner::GetInstance(); - ASSERT_FALSE(cleaner == NULL); - - // check log dir before clean - std::vector reserved_file_list; - bool list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); - ASSERT_TRUE(list_ret); - - // print filelist before clean - std::cout << "before clean. file count: " << reserved_file_list.size() << std::endl; - for (size_t i = 0; i < reserved_file_list.size(); ++i) { - std::cout << reserved_file_list[i] << std::endl; - } - ASSERT_EQ(reserved_file_list.size(), g_touch_file_count); - - // start and stop - cleaner->Start(); - ASSERT_TRUE(cleaner->IsRunning()); - ASSERT_FALSE(cleaner->stop_); - - { - // wait schedule clean first times - MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); - cleaner->bg_cond_.Wait(); - } - - // check clean result - reserved_file_list.clear(); - list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); - ASSERT_TRUE(list_ret); - // print filelist after clean - std::cout << "first clean. expect clean nothing since not expire yet" << std::endl; - EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); - - { - // wait schedule clean second times - MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); - cleaner->bg_cond_.Wait(); - } - // check clean result - reserved_file_list.clear(); - list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); - ASSERT_TRUE(list_ret); - std::cout << "second clean. expect clean nothing since not expire yet" << std::endl; - EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); - - for (size_t i = 3; i < kTestLogExpireSecond + 5; ++i) { - // wait schedule clean several times - std::cout << "wait " << i << " times clean." << std::endl; - MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); - cleaner->bg_cond_.Wait(); - } - // check clean result - reserved_file_list.clear(); - list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); - ASSERT_TRUE(list_ret); - std::cout << "after " << kTestLogExpireSecond - << " times clean. expect clean " << g_expect_clean_count - << " logs: " << std::endl; - // print filelist after clean - for (size_t i = 0; i < reserved_file_list.size(); ++i) { - std::cout << reserved_file_list[i] << std::endl; - } - EXPECT_EQ(reserved_file_list.size(), g_touch_file_count - g_expect_clean_count); - - // stop cleaner - cleaner->Stop(); - ASSERT_FALSE(cleaner->IsRunning()); - ASSERT_TRUE(cleaner->stop_); - ASSERT_FALSE(cleaner == NULL); - - // destroy - LogCleaner::StopCleaner(); - ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); + SetupTestEnv(); + // get instance + LogCleaner *cleaner = LogCleaner::GetInstance(); + ASSERT_FALSE(cleaner == NULL); + + // check log dir before clean + std::vector reserved_file_list; + bool list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + + // print filelist before clean + std::cout << "before clean. file count: " << reserved_file_list.size() << std::endl; + for (size_t i = 0; i < reserved_file_list.size(); ++i) { + std::cout << reserved_file_list[i] << std::endl; + } + ASSERT_EQ(reserved_file_list.size(), g_touch_file_count); + + // start and stop + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + ASSERT_FALSE(cleaner->stop_); + + { + // wait schedule clean first times + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + // print filelist after clean + std::cout << "first clean. expect clean nothing since not expire yet" << std::endl; + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); + + { + // wait schedule clean second times + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + std::cout << "second clean. expect clean nothing since not expire yet" << std::endl; + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); + + for (size_t i = 3; i < kTestLogExpireSecond + 5; ++i) { + // wait schedule clean several times + std::cout << "wait " << i << " times clean." << std::endl; + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + std::cout << "after " << kTestLogExpireSecond << " times clean. expect clean " + << g_expect_clean_count << " logs: " << std::endl; + // print filelist after clean + for (size_t i = 0; i < reserved_file_list.size(); ++i) { + std::cout << reserved_file_list[i] << std::endl; + } + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count - g_expect_clean_count); + + // stop cleaner + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->stop_); + ASSERT_FALSE(cleaner == NULL); + + // destroy + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); } TEST(LogCleanerTest, MultiStartAndStop) { - // ensure stop firstly - LogCleaner::StopCleaner(); - ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); - - SetupTestEnv(); - // get instance - LogCleaner *cleaner = LogCleaner::GetInstance(); - - // stop while not start - cleaner->Stop(); - ASSERT_FALSE(cleaner->IsRunning()); - ASSERT_TRUE(cleaner->stop_); - - // start three times - cleaner->Start(); - ASSERT_TRUE(cleaner->IsRunning()); - cleaner->Start(); - ASSERT_TRUE(cleaner->IsRunning()); - cleaner->Start(); - ASSERT_TRUE(cleaner->IsRunning()); - - { - // wait schedule clean - MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); - cleaner->bg_cond_.Wait(); - } - - // stop twice - cleaner->Stop(); - ASSERT_FALSE(cleaner->IsRunning()); - cleaner->Stop(); - ASSERT_FALSE(cleaner->IsRunning()); - - // start again - cleaner->Start(); - ASSERT_TRUE(cleaner->IsRunning()); - - // stop and destroy - LogCleaner::StopCleaner(); - ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); -} + // ensure stop firstly + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); + + SetupTestEnv(); + // get instance + LogCleaner *cleaner = LogCleaner::GetInstance(); -} // end namespace common + // stop while not start + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->stop_); + + // start three times + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + + { + // wait schedule clean + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + + // stop twice + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + + // start again + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + + // stop and destroy + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); +} +} // end namespace common diff --git a/src/common/test/metric_counter_test.cc b/src/common/test/metric_counter_test.cc index 00062b8ff..60784f63d 100644 --- a/src/common/test/metric_counter_test.cc +++ b/src/common/test/metric_counter_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -10,88 +10,87 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/metric_counter.h" - -namespace tera { - + +#include "common/metric/metric_counter.h" + +namespace tera { + class MetricCounterTest : public ::testing::Test { -public: - virtual void SetUp() { - label_str_ = LabelStringBuilder() - .Append("test_label1", "test_value1") - .Append("test_label2", "test_value2") - .ToString(); - } - - virtual void TearDown() {} - -private: - std::string label_str_; + public: + virtual void SetUp() { + label_str_ = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + } + + virtual void TearDown() {} + + private: + std::string label_str_; }; TEST_F(MetricCounterTest, RegisterTest) { - MetricId test_id; - { - // with name and labels - MetricCounter counter1("counter1", label_str_); - test_id = counter1.metric_id_; - - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter1.metric_id_)) - << "metric_id " << counter1.metric_id_.ToString() << std::endl; - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - EXPECT_TRUE(counter1.IsRegistered()); - } - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - - { - // with name only - MetricCounter counter2("counter2", {}, true); - test_id = counter2.metric_id_; - - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter2.metric_id_)) - << "metric_id " << counter2.metric_id_.ToString() << std::endl; - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - EXPECT_TRUE(counter2.IsRegistered()); - } - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - - // with illegal label string - ASSERT_THROW(MetricCounter("counter3", "illegal_label_string", {}, true), std::invalid_argument); - - // with empty name - ASSERT_THROW(MetricCounter("", label_str_, {}, true), std::invalid_argument); - ASSERT_THROW(MetricCounter("", {}, true), std::invalid_argument); + MetricId test_id; + { + // with name and labels + MetricCounter counter1("counter1", label_str_); + test_id = counter1.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter1.metric_id_)) + << "metric_id " << counter1.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter1.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + { + // with name only + MetricCounter counter2("counter2", {}, true); + test_id = counter2.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter2.metric_id_)) + << "metric_id " << counter2.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter2.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + // with illegal label string + ASSERT_THROW(MetricCounter("counter3", "illegal_label_string", {}, true), std::invalid_argument); + + // with empty name + ASSERT_THROW(MetricCounter("", label_str_, {}, true), std::invalid_argument); + ASSERT_THROW(MetricCounter("", {}, true), std::invalid_argument); } - + TEST_F(MetricCounterTest, CollectTest) { - MetricCounter periodic_counter("periodic", label_str_, {}, true); - MetricCounter nonperiodic_counter("nonperiodic", label_str_, {}, false); - - for (size_t i = 0; i < 3; ++i) { - periodic_counter.Inc(); - nonperiodic_counter.Inc(); - } - EXPECT_EQ(periodic_counter.Get(), 3); - EXPECT_EQ(nonperiodic_counter.Get(), 3); - - // do collect - CollectorReportPublisher::GetInstance().Refresh(); - - EXPECT_EQ(periodic_counter.Get(), 0); - EXPECT_EQ(nonperiodic_counter.Get(), 3); - + MetricCounter periodic_counter("periodic", label_str_, {}, true); + MetricCounter nonperiodic_counter("nonperiodic", label_str_, {}, false); + + for (size_t i = 0; i < 3; ++i) { periodic_counter.Inc(); nonperiodic_counter.Inc(); - EXPECT_EQ(periodic_counter.Get(), 1); - EXPECT_EQ(nonperiodic_counter.Get(), 4); + } + EXPECT_EQ(periodic_counter.Get(), 3); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // do collect + CollectorReportPublisher::GetInstance().Refresh(); + + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + periodic_counter.Inc(); + nonperiodic_counter.Inc(); + EXPECT_EQ(periodic_counter.Get(), 1); + EXPECT_EQ(nonperiodic_counter.Get(), 4); } - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/metric_http_server_test.cc b/src/common/test/metric_http_server_test.cc index c911b438e..ee8edadc2 100644 --- a/src/common/test/metric_http_server_test.cc +++ b/src/common/test/metric_http_server_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -9,130 +9,125 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/metric_counter.h" -#include "common/metric/metric_http_server.h" -#include "common/metric/collector_report.h" -#include "common/base/string_ext.h" - -namespace tera { - -class MetricHttpServerTest : public ::testing::Test { -public: - virtual void SetUp() { - // register metrics - test_counter = new MetricCounter("counter", {SubscriberType::LATEST}); - server = new MetricHttpServer; - test_counter->Set(1); - } - virtual void TearDown() { - delete test_counter; - delete server; - } -private: - MetricCounter* test_counter; - MetricHttpServer* server; -}; - -TEST_F(MetricHttpServerTest, BuildType) { - std::string body; - ResponseBodyBuilder::BuildType(&body, "good", "gauge"); - EXPECT_STREQ(body.c_str(), "# TYPE good gauge\n"); - ResponseBodyBuilder::BuildType(&body, "bad", "summary"); - EXPECT_STREQ(body.c_str(), "# TYPE good gauge\n" - "# TYPE bad summary\n"); +#include "common/metric/metric_counter.h" +#include "common/metric/metric_http_server.h" +#include "common/metric/collector_report.h" +#include "common/base/string_ext.h" + +namespace tera { + +class MetricHttpServerTest : public ::testing::Test { + public: + virtual void SetUp() { + // register metrics + test_counter = new MetricCounter("counter", {SubscriberType::LATEST}); + server = new MetricHttpServer; + test_counter->Set(1); + } + virtual void TearDown() { + delete test_counter; + delete server; + } + + private: + MetricCounter* test_counter; + MetricHttpServer* server; +}; + +TEST_F(MetricHttpServerTest, BuildType) { + std::string body; + ResponseBodyBuilder::BuildType(&body, "good", "gauge"); + EXPECT_STREQ(body.c_str(), "# TYPE good gauge\n"); + ResponseBodyBuilder::BuildType(&body, "bad", "summary"); + EXPECT_STREQ(body.c_str(), + "# TYPE good gauge\n" + "# TYPE bad summary\n"); } -TEST_F(MetricHttpServerTest, BuildHelp) { - std::string body; - ResponseBodyBuilder::BuildHelp(&body, "good", "good"); - EXPECT_STREQ(body.c_str(), "# HELP good good\n"); - ResponseBodyBuilder::BuildHelp(&body, "bad", "bad"); - EXPECT_STREQ(body.c_str(), "# HELP good good\n" - "# HELP bad bad\n"); +TEST_F(MetricHttpServerTest, BuildHelp) { + std::string body; + ResponseBodyBuilder::BuildHelp(&body, "good", "good"); + EXPECT_STREQ(body.c_str(), "# HELP good good\n"); + ResponseBodyBuilder::BuildHelp(&body, "bad", "bad"); + EXPECT_STREQ(body.c_str(), + "# HELP good good\n" + "# HELP bad bad\n"); } -TEST_F(MetricHttpServerTest, BuildMetricItem) { - CollectorReportPublisher::GetInstance().Refresh(); - auto report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); +TEST_F(MetricHttpServerTest, BuildMetricItem) { + CollectorReportPublisher::GetInstance().Refresh(); + auto report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); - std::string body; - int64_t time_stamp; + std::string body; + int64_t time_stamp; - for (const auto& item : *report) { - if (item.first.GetName() == "counter") { - ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); - time_stamp = item.second.Time(); - } + for (const auto& item : *report) { + if (item.first.GetName() == "counter") { + ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); + time_stamp = item.second.Time(); } - std::string expect_body = "counter{value_type=\"Latest\"} 1 " + - std::to_string(time_stamp) + "\n"; + } + std::string expect_body = "counter{value_type=\"Latest\"} 1 " + std::to_string(time_stamp) + "\n"; - EXPECT_EQ(body, expect_body); - EXPECT_EQ(test_counter->Get(), 0); - test_counter->Set(2); + EXPECT_EQ(body, expect_body); + EXPECT_EQ(test_counter->Get(), 0); + test_counter->Set(2); - CollectorReportPublisher::GetInstance().Refresh(); - report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); - for (const auto& item : *report) { - if (item.first.GetName() == "counter") { - ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); - time_stamp = item.second.Time(); - } + for (const auto& item : *report) { + if (item.first.GetName() == "counter") { + ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); + time_stamp = item.second.Time(); } + } - expect_body += "counter{value_type=\"Latest\"} 2 " + - std::to_string(time_stamp) + "\n"; + expect_body += "counter{value_type=\"Latest\"} 2 " + std::to_string(time_stamp) + "\n"; - EXPECT_EQ(body, expect_body); + EXPECT_EQ(body, expect_body); } -TEST_F(MetricHttpServerTest, GetResponseBody) { - CollectorReportPublisher::GetInstance().Refresh(); - int64_t timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; - std::string body = server->GetResponseBody(); - std::vector splited_string; - SplitString(body, "\n", &splited_string); - bool find_counter = false; - for (int idx = 0; idx != splited_string.size(); ++ idx) { - if (splited_string[idx].substr(0, 8) == "counter{") { - find_counter = true; - EXPECT_STREQ(splited_string[idx - 2].c_str(), - "# HELP counter counter"); - EXPECT_STREQ(splited_string[idx - 1].c_str(), - "# TYPE counter gauge"); - std::string expected_line = "counter{value_type=\"Latest\"} 1 " + std::to_string(timestamp); - EXPECT_EQ(expected_line, splited_string[idx]); - } +TEST_F(MetricHttpServerTest, GetResponseBody) { + CollectorReportPublisher::GetInstance().Refresh(); + int64_t timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; + std::string body = server->GetResponseBody(); + std::vector splited_string; + SplitString(body, "\n", &splited_string); + bool find_counter = false; + for (int idx = 0; idx != splited_string.size(); ++idx) { + if (splited_string[idx].substr(0, 8) == "counter{") { + find_counter = true; + EXPECT_STREQ(splited_string[idx - 2].c_str(), "# HELP counter counter"); + EXPECT_STREQ(splited_string[idx - 1].c_str(), "# TYPE counter gauge"); + std::string expected_line = "counter{value_type=\"Latest\"} 1 " + std::to_string(timestamp); + EXPECT_EQ(expected_line, splited_string[idx]); } - EXPECT_TRUE(find_counter); - EXPECT_EQ(test_counter->Get(), 0); - test_counter->Set(19); - find_counter = false; - - CollectorReportPublisher::GetInstance().Refresh(); - timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; - body = server->GetResponseBody(); - splited_string.clear(); - SplitString(body, "\n", &splited_string); - for (int idx = 0; idx != splited_string.size(); ++ idx) { - if (splited_string[idx].substr(0, 8) == "counter{") { - find_counter = true; - EXPECT_STREQ(splited_string[idx - 2].c_str(), - "# HELP counter counter"); - EXPECT_STREQ(splited_string[idx - 1].c_str(), - "# TYPE counter gauge"); - std::string expected_line = "counter{value_type=\"Latest\"} 19 " + std::to_string(timestamp); - EXPECT_EQ(expected_line, splited_string[idx]); - } + } + EXPECT_TRUE(find_counter); + EXPECT_EQ(test_counter->Get(), 0); + test_counter->Set(19); + find_counter = false; + + CollectorReportPublisher::GetInstance().Refresh(); + timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; + body = server->GetResponseBody(); + splited_string.clear(); + SplitString(body, "\n", &splited_string); + for (int idx = 0; idx != splited_string.size(); ++idx) { + if (splited_string[idx].substr(0, 8) == "counter{") { + find_counter = true; + EXPECT_STREQ(splited_string[idx - 2].c_str(), "# HELP counter counter"); + EXPECT_STREQ(splited_string[idx - 1].c_str(), "# TYPE counter gauge"); + std::string expected_line = "counter{value_type=\"Latest\"} 19 " + std::to_string(timestamp); + EXPECT_EQ(expected_line, splited_string[idx]); } + } - EXPECT_TRUE(find_counter); - EXPECT_EQ(test_counter->Get(), 0); + EXPECT_TRUE(find_counter); + EXPECT_EQ(test_counter->Get(), 0); } -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/metric_id_test.cc b/src/common/test/metric_id_test.cc index ad2795073..cfebef574 100644 --- a/src/common/test/metric_id_test.cc +++ b/src/common/test/metric_id_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -9,170 +9,168 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/metric_id.h" - -namespace tera { - + +#include "common/metric/metric_id.h" + +namespace tera { + static const std::string kTestMetricName = "test_name"; - + class MetricIdTest : public ::testing::Test { -public: - virtual void SetUp() { - empty_id_ = new MetricId(); - id_with_name_ = new MetricId(kTestMetricName); - - MetricLabels label_map; - label_map.insert(std::make_pair("test_label1", "test_value1")); - label_map.insert(std::make_pair("test_label2", "test_value2")); - label_str_ = "test_label1:test_value1,test_label2:test_value2"; - - id_with_label_ = new MetricId("", label_map); - id_with_name_and_label_ = new MetricId(kTestMetricName, label_map); - } - - virtual void TearDown() { - delete empty_id_; - delete id_with_name_; - delete id_with_label_; - delete id_with_name_and_label_; - } - -private: - MetricId *empty_id_; - MetricId *id_with_name_; - MetricId *id_with_label_; - MetricId *id_with_name_and_label_; - std::string label_str_; + public: + virtual void SetUp() { + empty_id_ = new MetricId(); + id_with_name_ = new MetricId(kTestMetricName); + + MetricLabels label_map; + label_map.insert(std::make_pair("test_label1", "test_value1")); + label_map.insert(std::make_pair("test_label2", "test_value2")); + label_str_ = "test_label1:test_value1,test_label2:test_value2"; + + id_with_label_ = new MetricId("", label_map); + id_with_name_and_label_ = new MetricId(kTestMetricName, label_map); + } + + virtual void TearDown() { + delete empty_id_; + delete id_with_name_; + delete id_with_label_; + delete id_with_name_and_label_; + } + + private: + MetricId *empty_id_; + MetricId *id_with_name_; + MetricId *id_with_label_; + MetricId *id_with_name_and_label_; + std::string label_str_; }; TEST_F(MetricIdTest, BasicTest) { - // empty id - ASSERT_FALSE(empty_id_->IsValid()); - ASSERT_TRUE(empty_id_->GetName().empty()); - ASSERT_TRUE(empty_id_->GetLabelMap().empty()); - ASSERT_TRUE(empty_id_->ToString().empty()); - ASSERT_TRUE(empty_id_->GetLabel("whatever_label").empty()); - ASSERT_FALSE(empty_id_->ExistLabel("whatever_label")); - ASSERT_FALSE(empty_id_->CheckLabel("whatever_label", "whatever_value")); - - // id with name, empty label - ASSERT_TRUE(id_with_name_->IsValid()); - ASSERT_STREQ(id_with_name_->GetName().c_str(), kTestMetricName.c_str()); - ASSERT_TRUE(id_with_name_->GetLabelMap().empty()); - ASSERT_STREQ(id_with_name_->ToString().c_str(), kTestMetricName.c_str()); - ASSERT_TRUE(id_with_name_->GetLabel("whatever_label").empty()); - ASSERT_FALSE(id_with_name_->ExistLabel("whatever_label")); - ASSERT_FALSE(id_with_name_->CheckLabel("whatever_label", "whatever_value")); - - // id with name and label - ASSERT_TRUE(id_with_name_and_label_->IsValid()); - ASSERT_STREQ(id_with_name_and_label_->GetName().c_str(), kTestMetricName.c_str()); - ASSERT_EQ(id_with_name_and_label_->GetLabelMap().size(), 2); - - std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + label_str_; - ASSERT_STREQ(id_with_name_and_label_->ToString().c_str(), expected_id_str.c_str()); - ASSERT_STREQ(id_with_name_and_label_->GetLabel("test_label1").c_str(), "test_value1"); - ASSERT_TRUE(id_with_name_and_label_->ExistLabel("test_label1")); - ASSERT_TRUE(id_with_name_and_label_->CheckLabel("test_label1", "test_value1")); - - ASSERT_TRUE(id_with_name_and_label_->GetLabel("not_exist_label").empty()); - ASSERT_FALSE(id_with_name_and_label_->ExistLabel("not_exist_label")); - ASSERT_FALSE(id_with_name_and_label_->CheckLabel("not_exist_label", "test_value1")); - ASSERT_FALSE(id_with_name_and_label_->CheckLabel("test_label1", "test_value2")); - - // id with label, empty name - ASSERT_FALSE(id_with_label_->IsValid()); + // empty id + ASSERT_FALSE(empty_id_->IsValid()); + ASSERT_TRUE(empty_id_->GetName().empty()); + ASSERT_TRUE(empty_id_->GetLabelMap().empty()); + ASSERT_TRUE(empty_id_->ToString().empty()); + ASSERT_TRUE(empty_id_->GetLabel("whatever_label").empty()); + ASSERT_FALSE(empty_id_->ExistLabel("whatever_label")); + ASSERT_FALSE(empty_id_->CheckLabel("whatever_label", "whatever_value")); + + // id with name, empty label + ASSERT_TRUE(id_with_name_->IsValid()); + ASSERT_STREQ(id_with_name_->GetName().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(id_with_name_->GetLabelMap().empty()); + ASSERT_STREQ(id_with_name_->ToString().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(id_with_name_->GetLabel("whatever_label").empty()); + ASSERT_FALSE(id_with_name_->ExistLabel("whatever_label")); + ASSERT_FALSE(id_with_name_->CheckLabel("whatever_label", "whatever_value")); + + // id with name and label + ASSERT_TRUE(id_with_name_and_label_->IsValid()); + ASSERT_STREQ(id_with_name_and_label_->GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(id_with_name_and_label_->GetLabelMap().size(), 2); + + std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + label_str_; + ASSERT_STREQ(id_with_name_and_label_->ToString().c_str(), expected_id_str.c_str()); + ASSERT_STREQ(id_with_name_and_label_->GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(id_with_name_and_label_->ExistLabel("test_label1")); + ASSERT_TRUE(id_with_name_and_label_->CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(id_with_name_and_label_->GetLabel("not_exist_label").empty()); + ASSERT_FALSE(id_with_name_and_label_->ExistLabel("not_exist_label")); + ASSERT_FALSE(id_with_name_and_label_->CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(id_with_name_and_label_->CheckLabel("test_label1", "test_value2")); + + // id with label, empty name + ASSERT_FALSE(id_with_label_->IsValid()); } - + TEST_F(MetricIdTest, CopyTest) { - // copy id - MetricId copy_id(*id_with_name_and_label_); - ASSERT_TRUE(copy_id.IsValid()); - ASSERT_STREQ(copy_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); - ASSERT_EQ(copy_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); - ASSERT_STREQ(copy_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); - ASSERT_STREQ(copy_id.GetLabel("test_label1").c_str(), "test_value1"); - ASSERT_TRUE(copy_id.ExistLabel("test_label1")); - ASSERT_TRUE(copy_id.CheckLabel("test_label1", "test_value1")); - - ASSERT_TRUE(copy_id.GetLabel("not_exist_label").empty()); - ASSERT_FALSE(copy_id.ExistLabel("not_exist_label")); - ASSERT_FALSE(copy_id.CheckLabel("not_exist_label", "test_value1")); - ASSERT_FALSE(copy_id.CheckLabel("test_label1", "test_value2")); - ASSERT_TRUE(copy_id == *id_with_name_and_label_); - - // assign id - MetricId assign_id; - assign_id = *id_with_name_and_label_; - ASSERT_TRUE(assign_id.IsValid()); - ASSERT_STREQ(assign_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); - ASSERT_EQ(assign_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); - ASSERT_STREQ(assign_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); - ASSERT_STREQ(assign_id.GetLabel("test_label1").c_str(), "test_value1"); - ASSERT_TRUE(assign_id.ExistLabel("test_label1")); - ASSERT_TRUE(assign_id.CheckLabel("test_label1", "test_value1")); - - ASSERT_TRUE(assign_id.GetLabel("not_exist_label").empty()); - ASSERT_FALSE(assign_id.ExistLabel("not_exist_label")); - ASSERT_FALSE(assign_id.CheckLabel("not_exist_label", "test_value1")); - ASSERT_FALSE(assign_id.CheckLabel("test_label1", "test_value2")); - ASSERT_TRUE(assign_id == *id_with_name_and_label_); + // copy id + MetricId copy_id(*id_with_name_and_label_); + ASSERT_TRUE(copy_id.IsValid()); + ASSERT_STREQ(copy_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); + ASSERT_EQ(copy_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + ASSERT_STREQ(copy_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); + ASSERT_STREQ(copy_id.GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(copy_id.ExistLabel("test_label1")); + ASSERT_TRUE(copy_id.CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(copy_id.GetLabel("not_exist_label").empty()); + ASSERT_FALSE(copy_id.ExistLabel("not_exist_label")); + ASSERT_FALSE(copy_id.CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(copy_id.CheckLabel("test_label1", "test_value2")); + ASSERT_TRUE(copy_id == *id_with_name_and_label_); + + // assign id + MetricId assign_id; + assign_id = *id_with_name_and_label_; + ASSERT_TRUE(assign_id.IsValid()); + ASSERT_STREQ(assign_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); + ASSERT_EQ(assign_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + ASSERT_STREQ(assign_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); + ASSERT_STREQ(assign_id.GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(assign_id.ExistLabel("test_label1")); + ASSERT_TRUE(assign_id.CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(assign_id.GetLabel("not_exist_label").empty()); + ASSERT_FALSE(assign_id.ExistLabel("not_exist_label")); + ASSERT_FALSE(assign_id.CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(assign_id.CheckLabel("test_label1", "test_value2")); + ASSERT_TRUE(assign_id == *id_with_name_and_label_); } TEST_F(MetricIdTest, BuildTest) { - MetricId test_id; - bool ret = false; - - std::string legal_label_str = LabelStringBuilder() - .Append("test_label1", "test_value1") - .Append("test_label2", "test_value2") - .ToString(); - ASSERT_STREQ(legal_label_str.c_str(), label_str_.c_str()); - - ret = MetricId::ParseFromString(kTestMetricName, legal_label_str, &test_id); - ASSERT_TRUE(ret) << "Parse label string: " << legal_label_str << ", failed" << std::endl; - ASSERT_TRUE(test_id.IsValid()); - ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); - ASSERT_EQ(test_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); - std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + legal_label_str; - ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); - - std::string single_label_str = LabelStringBuilder() - .Append("test_label1", "test_value1") - .ToString(); - ASSERT_STREQ(single_label_str.c_str(), "test_label1:test_value1"); - ret = MetricId::ParseFromString(kTestMetricName, single_label_str, &test_id); - ASSERT_TRUE(ret) << "Parse label string: " << single_label_str << ", failed" << std::endl; - ASSERT_TRUE(test_id.IsValid()); - ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); - ASSERT_EQ(test_id.GetLabelMap().size(), 1); - expected_id_str = kTestMetricName + kNameLabelsDelimiter + single_label_str; - ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); - - std::string empty_label_str = LabelStringBuilder().ToString(); - ASSERT_STREQ(empty_label_str.c_str(), ""); - ret = MetricId::ParseFromString(kTestMetricName, empty_label_str, &test_id); - ASSERT_TRUE(ret); - ASSERT_TRUE(test_id.IsValid()); - ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); - ASSERT_TRUE(test_id.GetLabelMap().empty()); - ASSERT_STREQ(test_id.ToString().c_str(), kTestMetricName.c_str()); - - std::vector illegal_label_str_vec; - illegal_label_str_vec.push_back("haha:hehe,,,,"); - illegal_label_str_vec.push_back("haha:hehe,hoho"); - illegal_label_str_vec.push_back("haha:hehe,hoho:heihei,"); - illegal_label_str_vec.push_back("haha"); - illegal_label_str_vec.push_back(",lalala"); - - for (const std::string& illegal_label : illegal_label_str_vec) { - ret = MetricId::ParseFromString(kTestMetricName, illegal_label, &test_id); - ASSERT_FALSE(ret); - } + MetricId test_id; + bool ret = false; + + std::string legal_label_str = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + ASSERT_STREQ(legal_label_str.c_str(), label_str_.c_str()); + + ret = MetricId::ParseFromString(kTestMetricName, legal_label_str, &test_id); + ASSERT_TRUE(ret) << "Parse label string: " << legal_label_str << ", failed" << std::endl; + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(test_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + legal_label_str; + ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); + + std::string single_label_str = + LabelStringBuilder().Append("test_label1", "test_value1").ToString(); + ASSERT_STREQ(single_label_str.c_str(), "test_label1:test_value1"); + ret = MetricId::ParseFromString(kTestMetricName, single_label_str, &test_id); + ASSERT_TRUE(ret) << "Parse label string: " << single_label_str << ", failed" << std::endl; + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(test_id.GetLabelMap().size(), 1); + expected_id_str = kTestMetricName + kNameLabelsDelimiter + single_label_str; + ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); + + std::string empty_label_str = LabelStringBuilder().ToString(); + ASSERT_STREQ(empty_label_str.c_str(), ""); + ret = MetricId::ParseFromString(kTestMetricName, empty_label_str, &test_id); + ASSERT_TRUE(ret); + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(test_id.GetLabelMap().empty()); + ASSERT_STREQ(test_id.ToString().c_str(), kTestMetricName.c_str()); + + std::vector illegal_label_str_vec; + illegal_label_str_vec.push_back("haha:hehe,,,,"); + illegal_label_str_vec.push_back("haha:hehe,hoho"); + illegal_label_str_vec.push_back("haha:hehe,hoho:heihei,"); + illegal_label_str_vec.push_back("haha"); + illegal_label_str_vec.push_back(",lalala"); + + for (const std::string &illegal_label : illegal_label_str_vec) { + ret = MetricId::ParseFromString(kTestMetricName, illegal_label, &test_id); + ASSERT_FALSE(ret); + } } - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/metrics_test.cc b/src/common/test/metrics_test.cc index 7bc5e9abb..e80c47942 100644 --- a/src/common/test/metrics_test.cc +++ b/src/common/test/metrics_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -9,179 +9,181 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/metric_counter.h" + +#include "common/metric/metric_counter.h" #include "common/metric/hardware_collectors.h" #include "common/metric/collector_report_publisher.h" -#include "common/this_thread.h" +#include "common/this_thread.h" DECLARE_int64(tera_hardware_collect_period_second); - -namespace tera { - + +namespace tera { + class MetricsTest : public ::testing::Test { -public: - virtual void SetUp() { - // shorter period for test - FLAGS_tera_hardware_collect_period_second = 1; - CollectorReportPublisher::GetInstance().AddHardwareCollectors(); - - label_map_["test_label1"] = "test_value1"; - label_map_["test_label2"] = "test_value2"; - } - - virtual void TearDown() { - CollectorReportPublisher::GetInstance().collectors_.clear(); - label_map_.clear(); - } - -private: - MetricLabels label_map_; + public: + virtual void SetUp() { + // shorter period for test + FLAGS_tera_hardware_collect_period_second = 1; + CollectorReportPublisher::GetInstance().AddHardwareCollectors(); + + label_map_["test_label1"] = "test_value1"; + label_map_["test_label2"] = "test_value2"; + } + + virtual void TearDown() { + CollectorReportPublisher::GetInstance().collectors_.clear(); + label_map_.clear(); + } + + private: + MetricLabels label_map_; }; static void PrintCollectorReportPublisher() { - std::cout << "Print Metric Registry: " << std::endl; - auto& metric_map = CollectorReportPublisher::GetInstance().collectors_; - auto metric_iter = metric_map.begin(); - for (; metric_iter != metric_map.end(); ++metric_iter) { - std::cout << metric_iter->first.ToString() << std::endl; - } + std::cout << "Print Metric Registry: " << std::endl; + auto& metric_map = CollectorReportPublisher::GetInstance().collectors_; + auto metric_iter = metric_map.begin(); + for (; metric_iter != metric_map.end(); ++metric_iter) { + std::cout << metric_iter->first.ToString() << std::endl; + } } TEST_F(MetricsTest, RegisterTest) { - // hardware metrics - ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstCpuMetricName))); - ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstMemMetricName))); - ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetRXMetricName))); - ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetTXMetricName))); - - bool ret = false; - Counter* test_counters = new Counter[5]; - // register a counter - MetricId test_id_1("test_counter", label_map_); - ret = CollectorReportPublisher::GetInstance().AddCollector( - test_id_1, std::unique_ptr(new CounterCollector(&test_counters[0]))); - EXPECT_TRUE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); - PrintCollectorReportPublisher(); - - // register a counter with different name - MetricId test_id_2("test_counter_2", label_map_); - ret = CollectorReportPublisher::GetInstance().AddCollector( - test_id_2, std::unique_ptr(new CounterCollector(&test_counters[0]))); - EXPECT_TRUE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); - PrintCollectorReportPublisher(); - - // register a counter with name only - ret = CollectorReportPublisher::GetInstance().AddCollector( - MetricId("test_counter3"), std::unique_ptr(new CounterCollector(&test_counters[2]))); - EXPECT_TRUE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); - PrintCollectorReportPublisher(); - - // register a counter with same name and different labels - label_map_["test_label2"] = "other_label_value"; - MetricId test_id_4("test_counter", label_map_); - ret = CollectorReportPublisher::GetInstance().AddCollector( - test_id_4, std::unique_ptr(new CounterCollector(&test_counters[3]))); - EXPECT_TRUE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_4)); - PrintCollectorReportPublisher(); - - // register a counter with same id - ret = CollectorReportPublisher::GetInstance().AddCollector( - test_id_1, std::unique_ptr(new CounterCollector(&test_counters[4]))); - EXPECT_FALSE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); - PrintCollectorReportPublisher(); - - ret = CollectorReportPublisher::GetInstance().AddCollector( - MetricId("test_counter3"), std::unique_ptr(new CounterCollector(&test_counters[4]))); - EXPECT_FALSE(ret); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); - PrintCollectorReportPublisher(); - - // unregister - ret = CollectorReportPublisher::GetInstance().DeleteCollector(test_id_1); - EXPECT_TRUE(ret); - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); - - ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("test_counter3")); - EXPECT_TRUE(ret); - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); - - MetricId not_registered_id("not_registered_name", label_map_); - ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id); - EXPECT_FALSE(ret); - - label_map_["test_label2"] = "not_registered_value"; - MetricId not_registered_id_2("test_counter", label_map_); - ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id_2); - EXPECT_FALSE(ret); - - ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("not_registered_name")); - EXPECT_FALSE(ret); - - delete[] test_counters; + // hardware metrics + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstCpuMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstMemMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetRXMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetTXMetricName))); + + bool ret = false; + Counter* test_counters = new Counter[5]; + // register a counter + MetricId test_id_1("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_1, std::unique_ptr(new CounterCollector(&test_counters[0]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + PrintCollectorReportPublisher(); + + // register a counter with different name + MetricId test_id_2("test_counter_2", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_2, std::unique_ptr(new CounterCollector(&test_counters[0]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + PrintCollectorReportPublisher(); + + // register a counter with name only + ret = CollectorReportPublisher::GetInstance().AddCollector( + MetricId("test_counter3"), + std::unique_ptr(new CounterCollector(&test_counters[2]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + PrintCollectorReportPublisher(); + + // register a counter with same name and different labels + label_map_["test_label2"] = "other_label_value"; + MetricId test_id_4("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_4, std::unique_ptr(new CounterCollector(&test_counters[3]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_4)); + PrintCollectorReportPublisher(); + + // register a counter with same id + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_1, std::unique_ptr(new CounterCollector(&test_counters[4]))); + EXPECT_FALSE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + PrintCollectorReportPublisher(); + + ret = CollectorReportPublisher::GetInstance().AddCollector( + MetricId("test_counter3"), + std::unique_ptr(new CounterCollector(&test_counters[4]))); + EXPECT_FALSE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + PrintCollectorReportPublisher(); + + // unregister + ret = CollectorReportPublisher::GetInstance().DeleteCollector(test_id_1); + EXPECT_TRUE(ret); + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + + ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("test_counter3")); + EXPECT_TRUE(ret); + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + + MetricId not_registered_id("not_registered_name", label_map_); + ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id); + EXPECT_FALSE(ret); + + label_map_["test_label2"] = "not_registered_value"; + MetricId not_registered_id_2("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id_2); + EXPECT_FALSE(ret); + + ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("not_registered_name")); + EXPECT_FALSE(ret); + + delete[] test_counters; } TEST_F(MetricsTest, ReportTest) { - // check report cache - int64_t value = 0; - - // register 2 counter - std::string label_str = LabelStringBuilder() - .Append("test_label1", "test_value1") - .Append("test_label2", "test_value2") - .ToString(); - MetricCounter periodic_counter("periodic", label_str, {}, true); - MetricCounter nonperiodic_counter("nonperiodic", label_str, {}, false); - - for (size_t i = 0; i < 3; ++i) { - periodic_counter.Inc(); - nonperiodic_counter.Inc(); - } - EXPECT_EQ(periodic_counter.Get(), 3); - EXPECT_EQ(nonperiodic_counter.Get(), 3); - - // do collect - ThisThread::Sleep(10); - - CollectorReportPublisher::GetInstance().Refresh(); - std::shared_ptr report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - - EXPECT_EQ(periodic_counter.Get(), 0); - EXPECT_EQ(nonperiodic_counter.Get(), 3); - - // check report - EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); - value = report->FindMetricValue("periodic", label_str); - EXPECT_EQ(value, 3); - value = report->FindMetricValue("nonperiodic", label_str); - EXPECT_EQ(value, 3); - - // change counter value + // check report cache + int64_t value = 0; + + // register 2 counter + std::string label_str = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + MetricCounter periodic_counter("periodic", label_str, {}, true); + MetricCounter nonperiodic_counter("nonperiodic", label_str, {}, false); + + for (size_t i = 0; i < 3; ++i) { periodic_counter.Inc(); - nonperiodic_counter.Dec(); - EXPECT_EQ(periodic_counter.Get(), 1); - EXPECT_EQ(nonperiodic_counter.Get(), 2); - - // report again - CollectorReportPublisher::GetInstance().Refresh(); - report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - EXPECT_EQ(periodic_counter.Get(), 0); - EXPECT_EQ(nonperiodic_counter.Get(), 2); - - value = report->FindMetricValue("periodic", label_str); - EXPECT_EQ(value, 1); - value = report->FindMetricValue("nonperiodic", label_str); - EXPECT_EQ(value, 2); + nonperiodic_counter.Inc(); + } + EXPECT_EQ(periodic_counter.Get(), 3); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // do collect + ThisThread::Sleep(10); + + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // check report + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + value = report->FindMetricValue("periodic", label_str); + EXPECT_EQ(value, 3); + value = report->FindMetricValue("nonperiodic", label_str); + EXPECT_EQ(value, 3); + + // change counter value + periodic_counter.Inc(); + nonperiodic_counter.Dec(); + EXPECT_EQ(periodic_counter.Get(), 1); + EXPECT_EQ(nonperiodic_counter.Get(), 2); + + // report again + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 2); + + value = report->FindMetricValue("periodic", label_str); + EXPECT_EQ(value, 1); + value = report->FindMetricValue("nonperiodic", label_str); + EXPECT_EQ(value, 2); } - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/percentile_counter_test.cc b/src/common/test/percentile_counter_test.cc index 350ce118b..65db6d52f 100644 --- a/src/common/test/percentile_counter_test.cc +++ b/src/common/test/percentile_counter_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -9,108 +9,107 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - -#include "common/metric/percentile_counter.h" - -namespace tera { - + +#include "common/metric/percentile_counter.h" + +namespace tera { + class PercentileCounterTest : public ::testing::Test { -public: - virtual void SetUp() { - label_str_ = LabelStringBuilder() - .Append("test_label1", "test_value1") - .Append("test_label2", "test_value2") - .ToString(); - } - - virtual void TearDown() {} - -private: - std::string label_str_; + public: + virtual void SetUp() { + label_str_ = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + } + + virtual void TearDown() {} + + private: + std::string label_str_; }; TEST_F(PercentileCounterTest, RegisterTest) { - MetricId test_id; - { - // with name and labels - PercentileCounter counter1("counter1", label_str_, 10); - test_id = counter1.metric_id_; - - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter1.metric_id_)) - << "metric_id " << counter1.metric_id_.ToString() << std::endl; - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - EXPECT_TRUE(counter1.IsRegistered()); - } - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - - { - // with name only - PercentileCounter counter2("counter2", 10); - test_id = counter2.metric_id_; - - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter2.metric_id_)) - << "metric_id " << counter2.metric_id_.ToString() << std::endl; - EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - EXPECT_TRUE(counter2.IsRegistered()); - } - EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) - << "metric_id " << test_id.ToString() << std::endl; - - // with illegal label string - ASSERT_THROW(PercentileCounter("counter3", "illegal_label_string", 10, {}), std::invalid_argument); - - // with empty name - ASSERT_THROW(PercentileCounter("", label_str_, 10, {}), std::invalid_argument); - ASSERT_THROW(PercentileCounter("", 10, {}), std::invalid_argument); + MetricId test_id; + { + // with name and labels + PercentileCounter counter1("counter1", label_str_, 10); + test_id = counter1.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter1.metric_id_)) + << "metric_id " << counter1.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter1.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + { + // with name only + PercentileCounter counter2("counter2", 10); + test_id = counter2.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter2.metric_id_)) + << "metric_id " << counter2.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter2.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + // with illegal label string + ASSERT_THROW(PercentileCounter("counter3", "illegal_label_string", 10, {}), + std::invalid_argument); + + // with empty name + ASSERT_THROW(PercentileCounter("", label_str_, 10, {}), std::invalid_argument); + ASSERT_THROW(PercentileCounter("", 10, {}), std::invalid_argument); } - + TEST_F(PercentileCounterTest, CounterTest) { - PercentileCounter Percent_80("80Percent", label_str_, 80, {SubscriberType::LATEST}); - PercentileCounter Percent_90("90Percent", label_str_, 90, {SubscriberType::LATEST}); - PercentileCounter Percent_99("99Percent", label_str_, 99, {SubscriberType::LATEST}); - - for (int i = 1; i <= 100; ++i) { - Percent_80.Append(i); - Percent_90.Append(i); - Percent_99.Append(i); - } - - EXPECT_EQ(Percent_80.Get(), 81); - EXPECT_EQ(Percent_90.Get(), 91); - EXPECT_EQ(Percent_99.Get(), 100); - - // do collect - CollectorReportPublisher::GetInstance().Refresh(); - auto report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); - - EXPECT_EQ(Percent_80.Get(), -1); - EXPECT_EQ(Percent_90.Get(), -1); - EXPECT_EQ(Percent_99.Get(), -1); - - EXPECT_TRUE(report->find(MetricId("80Percent", label_str_)) != report->end()); - EXPECT_TRUE(report->find(MetricId("90Percent", label_str_)) != report->end()); - EXPECT_TRUE(report->find(MetricId("99Percent", label_str_)) != report->end()); - - EXPECT_EQ(report->find(MetricId("80Percent", label_str_))->second.Value(), 81); - EXPECT_EQ(report->find(MetricId("90Percent", label_str_))->second.Value(), 91); - EXPECT_EQ(report->find(MetricId("99Percent", label_str_))->second.Value(), 100); - - for (int i = 1; i <= 10; ++i) { - Percent_80.Append(i); - Percent_90.Append(i); - Percent_99.Append(i); - } - - EXPECT_EQ(Percent_80.Get(), 9); - EXPECT_EQ(Percent_90.Get(), 10); - EXPECT_EQ(Percent_99.Get(), 10); - + PercentileCounter Percent_80("80Percent", label_str_, 80, {SubscriberType::LATEST}); + PercentileCounter Percent_90("90Percent", label_str_, 90, {SubscriberType::LATEST}); + PercentileCounter Percent_99("99Percent", label_str_, 99, {SubscriberType::LATEST}); + + for (int i = 1; i <= 100; ++i) { + Percent_80.Append(i); + Percent_90.Append(i); + Percent_99.Append(i); + } + + EXPECT_EQ(Percent_80.Get(), 81); + EXPECT_EQ(Percent_90.Get(), 91); + EXPECT_EQ(Percent_99.Get(), 100); + + // do collect + CollectorReportPublisher::GetInstance().Refresh(); + auto report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); + + EXPECT_EQ(Percent_80.Get(), -1); + EXPECT_EQ(Percent_90.Get(), -1); + EXPECT_EQ(Percent_99.Get(), -1); + + EXPECT_TRUE(report->find(MetricId("80Percent", label_str_)) != report->end()); + EXPECT_TRUE(report->find(MetricId("90Percent", label_str_)) != report->end()); + EXPECT_TRUE(report->find(MetricId("99Percent", label_str_)) != report->end()); + + EXPECT_EQ(report->find(MetricId("80Percent", label_str_))->second.Value(), 81); + EXPECT_EQ(report->find(MetricId("90Percent", label_str_))->second.Value(), 91); + EXPECT_EQ(report->find(MetricId("99Percent", label_str_))->second.Value(), 100); + + for (int i = 1; i <= 10; ++i) { + Percent_80.Append(i); + Percent_90.Append(i); + Percent_99.Append(i); + } + + EXPECT_EQ(Percent_80.Get(), 9); + EXPECT_EQ(Percent_90.Get(), 10); + EXPECT_EQ(Percent_99.Get(), 10); } - -} // end namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/test/profiler_test.cc b/src/common/test/profiler_test.cc index 1454ad72f..ca3ed1c6e 100644 --- a/src/common/test/profiler_test.cc +++ b/src/common/test/profiler_test.cc @@ -16,57 +16,56 @@ namespace tera { class ProfilerTest : public ::testing::Test { -public: - virtual void SetUp() {} + public: + virtual void SetUp() {} - virtual void TearDown() {} + virtual void TearDown() {} -private: - CpuProfiler cpu_profiler_; - HeapProfiler heap_profiler_; + private: + CpuProfiler cpu_profiler_; + HeapProfiler heap_profiler_; }; TEST_F(ProfilerTest, SetEnableTest) { - ProfilerState ps; - EXPECT_FALSE(cpu_profiler_.enable_); - EXPECT_FALSE(heap_profiler_.enable_); - ProfilerGetCurrentState(&ps); - EXPECT_FALSE(ps.enabled); - EXPECT_FALSE(IsHeapProfilerRunning()); + ProfilerState ps; + EXPECT_FALSE(cpu_profiler_.enable_); + EXPECT_FALSE(heap_profiler_.enable_); + ProfilerGetCurrentState(&ps); + EXPECT_FALSE(ps.enabled); + EXPECT_FALSE(IsHeapProfilerRunning()); - cpu_profiler_.SetEnable(true); + cpu_profiler_.SetEnable(true); - heap_profiler_.SetEnable(true); + heap_profiler_.SetEnable(true); - EXPECT_TRUE(cpu_profiler_.enable_); - EXPECT_TRUE(heap_profiler_.enable_); + EXPECT_TRUE(cpu_profiler_.enable_); + EXPECT_TRUE(heap_profiler_.enable_); - ThisThread::Sleep(2000); - ProfilerGetCurrentState(&ps); - EXPECT_TRUE(ps.enabled); - EXPECT_TRUE(IsHeapProfilerRunning()); + ThisThread::Sleep(2000); + ProfilerGetCurrentState(&ps); + EXPECT_TRUE(ps.enabled); + EXPECT_TRUE(IsHeapProfilerRunning()); - cpu_profiler_.SetEnable(false); - heap_profiler_.SetEnable(false); + cpu_profiler_.SetEnable(false); + heap_profiler_.SetEnable(false); - EXPECT_FALSE(cpu_profiler_.enable_); - EXPECT_FALSE(heap_profiler_.enable_); + EXPECT_FALSE(cpu_profiler_.enable_); + EXPECT_FALSE(heap_profiler_.enable_); - ThisThread::Sleep(2000); - ProfilerGetCurrentState(&ps); - EXPECT_FALSE(ps.enabled); - EXPECT_FALSE(IsHeapProfilerRunning()); + ThisThread::Sleep(2000); + ProfilerGetCurrentState(&ps); + EXPECT_FALSE(ps.enabled); + EXPECT_FALSE(IsHeapProfilerRunning()); } TEST_F(ProfilerTest, SetInvervalTest) { - EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(10)); - EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(10)); - cpu_profiler_.SetInterval(1); - heap_profiler_.SetInterval(2); - EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(1)); - EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(2)); + EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(10)); + EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(10)); + cpu_profiler_.SetInterval(1); + heap_profiler_.SetInterval(2); + EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(1)); + EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(2)); } -} // end namespace tera +} // end namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ - diff --git a/src/common/test/progress_bar_test.cc b/src/common/test/progress_bar_test.cc new file mode 100644 index 000000000..1cb375451 --- /dev/null +++ b/src/common/test/progress_bar_test.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: xupeilin@baidu.com + +#include "common/console/progress_bar.h" + +#include +#include +#include + +using common::ProgressBar; + +TEST(ProgressBarTest, Test) { + int cur_size = 0; + int total_size = 100000000; + + ProgressBar progress_bar(ProgressBar::ENHANCED, total_size, 100, "B"); + + srand((uint32_t)time(NULL)); + timespec interval = {0, 1000}; + while (cur_size < total_size) { + cur_size += rand() % 10000; + progress_bar.Refresh(cur_size); + nanosleep(&interval, &interval); + } + progress_bar.Done(); +} diff --git a/src/common/test/thread_pool_test.cc b/src/common/test/thread_pool_test.cc index 8462b78a5..1ccc7dca7 100644 --- a/src/common/test/thread_pool_test.cc +++ b/src/common/test/thread_pool_test.cc @@ -22,73 +22,64 @@ namespace tera { TEST(TimerTest, Basic) { - struct timespec ts1, ts2, ts3; - struct timeval tv; - - clock_gettime(CLOCK_MONOTONIC, &ts1); - clock_gettime(CLOCK_MONOTONIC_RAW, &ts3); - clock_gettime(CLOCK_REALTIME, &ts2); - gettimeofday(&tv, NULL); - - std::cout << "ts1.tv_sec " << ts1.tv_sec - << ", ts1.tv_nsec " << ts1.tv_nsec - << std::endl; - std::cout << "ts2.tv_sec " << ts2.tv_sec - << ", ts2.tv_nsec " << ts2.tv_nsec - << std::endl; - std::cout << "ts3.tv_sec " << ts3.tv_sec - << ", ts3.tv_nsec " << ts3.tv_nsec - << std::endl; - std::cout << "tv.tv_sec " << tv.tv_sec - << ", tv.tv_usec " << tv.tv_usec - << std::endl; - - int delta = 0; - delta = ts2.tv_sec - tv.tv_sec; - ASSERT_TRUE(-1 <= delta && delta <= 1); - ASSERT_TRUE(ts1.tv_sec < ts2.tv_sec); - ASSERT_TRUE(ts1.tv_sec < tv.tv_sec); + struct timespec ts1, ts2, ts3; + struct timeval tv; + + clock_gettime(CLOCK_MONOTONIC, &ts1); + clock_gettime(CLOCK_MONOTONIC_RAW, &ts3); + clock_gettime(CLOCK_REALTIME, &ts2); + gettimeofday(&tv, NULL); + + std::cout << "ts1.tv_sec " << ts1.tv_sec << ", ts1.tv_nsec " << ts1.tv_nsec << std::endl; + std::cout << "ts2.tv_sec " << ts2.tv_sec << ", ts2.tv_nsec " << ts2.tv_nsec << std::endl; + std::cout << "ts3.tv_sec " << ts3.tv_sec << ", ts3.tv_nsec " << ts3.tv_nsec << std::endl; + std::cout << "tv.tv_sec " << tv.tv_sec << ", tv.tv_usec " << tv.tv_usec << std::endl; + + int delta = 0; + delta = ts2.tv_sec - tv.tv_sec; + ASSERT_TRUE(-1 <= delta && delta <= 1); + ASSERT_TRUE(ts1.tv_sec < ts2.tv_sec); + ASSERT_TRUE(ts1.tv_sec < tv.tv_sec); } TEST(TimerTest, test1) { - struct timespec ts1; - struct timeval tv; - - clock_gettime(CLOCK_REALTIME, &ts1); - gettimeofday(&tv, NULL); - int64_t ts = get_micros(); - - int delta = 0; - delta = ts1.tv_sec - tv.tv_sec; - ASSERT_TRUE(-1 <= delta && delta <= 1); - delta = ts / 1000000 - tv.tv_sec; - ASSERT_TRUE(-1 <= delta && delta <= 1); + struct timespec ts1; + struct timeval tv; + + clock_gettime(CLOCK_REALTIME, &ts1); + gettimeofday(&tv, NULL); + int64_t ts = get_micros(); + + int delta = 0; + delta = ts1.tv_sec - tv.tv_sec; + ASSERT_TRUE(-1 <= delta && delta <= 1); + delta = ts / 1000000 - tv.tv_sec; + ASSERT_TRUE(-1 <= delta && delta <= 1); } common::Mutex mu; common::CondVar cv(&mu); void DelayTask_issue1(int32_t time, int32_t time_ms) { - struct timespec ts1; - clock_gettime(CLOCK_MONOTONIC, &ts1); - int delta = ts1.tv_sec - (time + time_ms / 1000); - ASSERT_TRUE(-1 <= delta && delta <= 1); - cv.Signal(); - return; + struct timespec ts1; + clock_gettime(CLOCK_MONOTONIC, &ts1); + int delta = ts1.tv_sec - (time + time_ms / 1000); + ASSERT_TRUE(-1 <= delta && delta <= 1); + cv.Signal(); + return; } TEST(ThreadPoolTest, Basic) { - mu.Lock(); - common::ThreadPool* pool = new common::ThreadPool(1000); - struct timespec ts1; - clock_gettime(CLOCK_MONOTONIC, &ts1); - ThreadPool::Task task = - std::bind(&DelayTask_issue1, ts1.tv_sec, 5000); - pool->DelayTask(5000, task); - - cv.Wait(); - mu.Unlock(); - delete pool; + mu.Lock(); + common::ThreadPool* pool = new common::ThreadPool(1000); + struct timespec ts1; + clock_gettime(CLOCK_MONOTONIC, &ts1); + ThreadPool::Task task = std::bind(&DelayTask_issue1, ts1.tv_sec, 5000); + pool->DelayTask(5000, task); + + cv.Wait(); + mu.Unlock(); + delete pool; } -} // namespace tera +} // namespace tera diff --git a/src/common/this_thread.h b/src/common/this_thread.h index 7f2b38188..89d286d4d 100644 --- a/src/common/this_thread.h +++ b/src/common/this_thread.h @@ -4,37 +4,44 @@ // // Author: yanshiguang02@baidu.com -#ifndef TERA_COMMON_THIS_THREAD_H_ -#define TERA_COMMON_THIS_THREAD_H_ +#ifndef TERA_COMMON_THIS_THREAD_H_ +#define TERA_COMMON_THIS_THREAD_H_ #include #include #include #include #include +#include namespace common { class ThisThread { -public: - /// Sleep in ms - static void Sleep(int64_t time_ms) { - if (time_ms > 0) { - timespec ts = {time_ms / 1000, (time_ms % 1000) * 1000000 }; - nanosleep(&ts, &ts); - } - } - /// Get thread id - static int GetId() { - return syscall(__NR_gettid); - } - /// Yield cpu - static void Yield() { - sched_yield(); + public: + /// Sleep in ms + static void Sleep(int64_t time_ms) { + if (time_ms > 0) { + timespec ts = {time_ms / 1000, (time_ms % 1000) * 1000000}; + nanosleep(&ts, &ts); } + } + /// Get thread id + static int GetId() { return syscall(__NR_gettid); } + + /// Yield cpu + static void Yield() { sched_yield(); } + + /// Thread-safe random generator + template + static T GetRandomValue(T min, T max) { + static thread_local std::random_device rd; + static thread_local std::mt19937 gen(rd()); + std::uniform_int_distribution dist(min, max); + return dist(gen); + } }; -} // namespace common +} // namespace common using common::ThisThread; diff --git a/src/common/thread.h b/src/common/thread.h deleted file mode 100644 index 24e6842e1..000000000 --- a/src/common/thread.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// Author: yanshiguang02@baidu.com - -#ifndef TERA_COMMON_THREAD_H_ -#define TERA_COMMON_THREAD_H_ - -#include -#include - -namespace common { - -class Thread { -public: - Thread() : tid_(0) {} - bool Start(std::function thread_proc) { - user_proc_ = thread_proc; - int ret = pthread_create(&tid_, NULL, ProcWrapper, this); - return (ret == 0); - } - bool Join() { - int ret = pthread_join(tid_, NULL); - return (ret == 0); - } - -private: - static void* ProcWrapper(void* arg) { - reinterpret_cast(arg)->user_proc_(); - return NULL; - } - -private: - std::function user_proc_; - pthread_t tid_; -}; - -} // namespace common - -#endif // TERA_COMMON_THREAD_H_ diff --git a/src/common/thread_attributes.h b/src/common/thread_attributes.h index d415feb47..4b5193559 100644 --- a/src/common/thread_attributes.h +++ b/src/common/thread_attributes.h @@ -14,59 +14,55 @@ #include "common/base/stdint.h" class ThreadAttributes { -public: - ThreadAttributes() { - cpu_num_ = sysconf(_SC_NPROCESSORS_CONF); - mask_ = GetCpuAffinity(); - } - ~ThreadAttributes() {} + public: + ThreadAttributes() { + cpu_num_ = sysconf(_SC_NPROCESSORS_CONF); + mask_ = GetCpuAffinity(); + } + ~ThreadAttributes() {} - int32_t GetCpuNum() { - return cpu_num_; - } + int32_t GetCpuNum() { return cpu_num_; } - cpu_set_t GetCpuAffinity() { - ResetCpuMask(); - if (sched_getaffinity(0, sizeof(mask_), &mask_) == -1) { - ResetCpuMask(); - } - return mask_; + cpu_set_t GetCpuAffinity() { + ResetCpuMask(); + if (sched_getaffinity(0, sizeof(mask_), &mask_) == -1) { + ResetCpuMask(); } - bool SetCpuAffinity() { - if (sched_setaffinity(0, sizeof(mask_), &mask_) == -1) { - return false; - } - return true; + return mask_; + } + bool SetCpuAffinity() { + if (sched_setaffinity(0, sizeof(mask_), &mask_) == -1) { + return false; } + return true; + } - bool SetCpuMask(int32_t cpu_id) { - if (cpu_id < 0 || cpu_id >= cpu_num_) { - return false; - } - - if (CPU_ISSET(cpu_id, &mask_)) { - return true; - } - CPU_SET(cpu_id, &mask_); - return true; + bool SetCpuMask(int32_t cpu_id) { + if (cpu_id < 0 || cpu_id >= cpu_num_) { + return false; } - void ResetCpuMask() { - CPU_ZERO(&mask_); - } - void MarkCurMask() { - CPU_ZERO(&last_mask_); - last_mask_ = mask_; - } - bool RevertCpuAffinity() { - ResetCpuMask(); - mask_ = last_mask_; - return SetCpuAffinity(); + + if (CPU_ISSET(cpu_id, &mask_)) { + return true; } + CPU_SET(cpu_id, &mask_); + return true; + } + void ResetCpuMask() { CPU_ZERO(&mask_); } + void MarkCurMask() { + CPU_ZERO(&last_mask_); + last_mask_ = mask_; + } + bool RevertCpuAffinity() { + ResetCpuMask(); + mask_ = last_mask_; + return SetCpuAffinity(); + } -private: - int32_t cpu_num_; - cpu_set_t mask_; - cpu_set_t last_mask_; + private: + int32_t cpu_num_; + cpu_set_t mask_; + cpu_set_t last_mask_; }; -#endif // TERA_COMMON_THREAD_THREAD_ATTRIBUTES_H_ +#endif // TERA_COMMON_THREAD_THREAD_ATTRIBUTES_H_ diff --git a/src/common/thread_pool.h b/src/common/thread_pool.h index 461c54fbe..03d379a1b 100644 --- a/src/common/thread_pool.h +++ b/src/common/thread_pool.h @@ -4,9 +4,10 @@ // // Author: yanshiguang02@baidu.com -#ifndef TERA_COMMON_THREAD_POOL_H_ -#define TERA_COMMON_THREAD_POOL_H_ +#ifndef TERA_COMMON_THREAD_POOL_H_ +#define TERA_COMMON_THREAD_POOL_H_ +#include #include #include #include @@ -21,262 +22,258 @@ namespace common { // An unscalable thread pool implimention. class ThreadPool { -public: - ThreadPool(int thread_num = 10) - : threads_num_(thread_num), - pending_num_(0), - work_cv_(&mutex_), - stop_(false), - last_task_id_(0), - schedule_cost_sum_(0), - schedule_count_(0), - task_cost_sum_(0), - task_count_(0) { - Start(); + public: + ThreadPool(int thread_num = 10) + : threads_num_(thread_num), + pending_num_(0), + work_cv_(&mutex_), + stop_(false), + last_task_id_(0), + schedule_cost_sum_(0), + schedule_count_(0), + task_cost_sum_(0), + task_count_(0) { + Start(); + } + ~ThreadPool() { Stop(false); } + // Start a thread_num threads pool. + bool Start() { + MutexLock lock(&mutex_); + if (tids_.size()) { + return false; } - ~ThreadPool() { - Stop(false); - } - // Start a thread_num threads pool. - bool Start() { - MutexLock lock(&mutex_); - if (tids_.size()) { - return false; - } - stop_ = false; - for (int i = 0; i < threads_num_; i++) { - pthread_t tid; - int ret = pthread_create(&tid, NULL, ThreadWrapper, this); - if (ret) { - abort(); - } - tids_.push_back(tid); - } - return true; + stop_ = false; + for (int i = 0; i < threads_num_; i++) { + pthread_t tid; + int ret = pthread_create(&tid, NULL, ThreadWrapper, this); + if (ret) { + abort(); + } + tids_.push_back(tid); } + return true; + } - // Stop the thread pool. - // Wait for all pending task to complete if wait is true. - bool Stop(bool wait) { - if (wait) { - while (pending_num_ > 0) { - struct timespec ts = {0, 10000000}; - nanosleep(&ts, NULL); - } - } + // Stop the thread pool. + // Wait for all pending task to complete if wait is true. + bool Stop(bool wait) { + if (wait) { + while (pending_num_ > 0) { + struct timespec ts = {0, 10000000}; + nanosleep(&ts, NULL); + } + } - { - MutexLock lock(&mutex_); - stop_ = true; - work_cv_.Broadcast(); - } - for (uint32_t i = 0; i < tids_.size(); i++) { - pthread_join(tids_[i], NULL); - } - tids_.clear(); - return true; + { + MutexLock lock(&mutex_); + stop_ = true; + work_cv_.Broadcast(); + } + for (uint32_t i = 0; i < tids_.size(); i++) { + pthread_join(tids_[i], NULL); } + tids_.clear(); + return true; + } - // Task definition. - typedef std::function Task; + // Task definition. + typedef std::function Task; - // Add a task to the thread pool. - void AddTask(const Task& task) { - MutexLock lock(&mutex_, "AddTask"); - queue_.push_back(BGItem(0, get_micros(), task)); - ++pending_num_; - work_cv_.Signal(); + // Add a task to the thread pool. + void AddTask(const Task& task) { + MutexLock lock(&mutex_, "AddTask"); + queue_.push_back(BGItem(0, get_micros(), task)); + ++pending_num_; + work_cv_.Signal(); + } + void AddPriorityTask(const Task& task) { + MutexLock lock(&mutex_); + queue_.push_front(BGItem(0, get_micros(), task)); + ++pending_num_; + work_cv_.Signal(); + } + int64_t DelayTask(int64_t delay, const Task& task) { + MutexLock lock(&mutex_); + int64_t now_time = get_micros(); + int64_t exe_time = now_time + delay * 1000; + BGItem bg_item(++last_task_id_, exe_time, task); + time_queue_.push(bg_item); + latest_[bg_item.id] = bg_item; + work_cv_.Signal(); + return bg_item.id; + } + /// Cancel a delayed task + /// if running, wait if non_block==false; return immediately if + /// non_block==true + bool CancelTask(int64_t task_id, bool non_block = false, bool* is_running = NULL) { + if (task_id == 0) { + if (is_running != NULL) { + *is_running = false; + } + return false; } - void AddPriorityTask(const Task& task) { + while (1) { + { MutexLock lock(&mutex_); - queue_.push_front(BGItem(0, get_micros(), task)); - ++pending_num_; - work_cv_.Signal(); - } - int64_t DelayTask(int64_t delay, const Task& task) { - MutexLock lock(&mutex_); - int64_t now_time = get_micros(); - int64_t exe_time = now_time + delay * 1000; - BGItem bg_item(++last_task_id_, exe_time, task); - time_queue_.push(bg_item); - latest_[bg_item.id] = bg_item; - work_cv_.Signal(); - return bg_item.id; - } - /// Cancel a delayed task - /// if running, wait if non_block==false; return immediately if non_block==true - bool CancelTask(int64_t task_id, bool non_block = false, bool* is_running = NULL) { - if (task_id == 0) { + if (running_task_ids_.find(task_id) == running_task_ids_.end()) { + BGMap::iterator it = latest_.find(task_id); + if (it == latest_.end()) { if (is_running != NULL) { - *is_running = false; + *is_running = false; } return false; + } + latest_.erase(it); + return true; + } else if (non_block) { + if (is_running != NULL) { + *is_running = true; + } + return false; } - while (1) { - { - MutexLock lock(&mutex_); - if (running_task_ids_.find(task_id) == running_task_ids_.end()) { - BGMap::iterator it = latest_.find(task_id); - if (it == latest_.end()) { - if (is_running != NULL) { - *is_running = false; - } - return false; - } - latest_.erase(it); - return true; - } else if (non_block) { - if (is_running != NULL) { - *is_running = true; - } - return false; - } - } - timespec ts = {0, 100000}; - nanosleep(&ts, &ts); - } - } - int64_t PendingNum() const { - return pending_num_; + } + timespec ts = {0, 100000}; + nanosleep(&ts, &ts); } + } + int64_t PendingNum() const { return pending_num_; } - // log format: 3 numbers seperated by " ", e.g. "15 24 32" - // 1st: thread pool schedule average cost (ms) - // 2nd: user task average cost (ms) - // 3rd: total task count since last ProfilingLog called - std::string ProfilingLog() { - int64_t schedule_cost_sum; - int64_t schedule_count; - int64_t task_cost_sum; - int64_t task_count; - { - MutexLock lock(&mutex_); - schedule_cost_sum = schedule_cost_sum_; - schedule_cost_sum_ = 0; - schedule_count = schedule_count_; - schedule_count_ = 0; - task_cost_sum = task_cost_sum_; - task_cost_sum_ = 0; - task_count = task_count_; - task_count_ = 0; - } - std::stringstream ss; - ss << (schedule_count == 0 ? 0 : schedule_cost_sum / schedule_count / 1000) - << " " << (task_count == 0 ? 0 : task_cost_sum / task_count / 1000) - << " " << task_count; - return ss.str(); + // log format: 3 numbers seperated by " ", e.g. "15 24 32" + // 1st: thread pool schedule average cost (ms) + // 2nd: user task average cost (ms) + // 3rd: total task count since last ProfilingLog called + std::string ProfilingLog() { + int64_t schedule_cost_sum; + int64_t schedule_count; + int64_t task_cost_sum; + int64_t task_count; + { + MutexLock lock(&mutex_); + schedule_cost_sum = schedule_cost_sum_; + schedule_cost_sum_ = 0; + schedule_count = schedule_count_; + schedule_count_ = 0; + task_cost_sum = task_cost_sum_; + task_cost_sum_ = 0; + task_count = task_count_; + task_count_ = 0; } + std::stringstream ss; + ss << (schedule_count == 0 ? 0 : schedule_cost_sum / schedule_count / 1000) << " " + << (task_count == 0 ? 0 : task_cost_sum / task_count / 1000) << " " << task_count; + return ss.str(); + } -private: - ThreadPool(const ThreadPool&); - void operator=(const ThreadPool&); + private: + ThreadPool(const ThreadPool&); + void operator=(const ThreadPool&); - int64_t get_micros() { // get us before machine reboot - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; - } + int64_t get_micros() { // get us before machine reboot + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; + } - static void* ThreadWrapper(void* arg) { - reinterpret_cast(arg)->ThreadProc(); - return NULL; - } - void ThreadProc() { - while (true) { - Task task; - MutexLock lock(&mutex_, "ThreadProc"); - while (time_queue_.empty() && queue_.empty() && !stop_) { - work_cv_.Wait("ThreadProcWait"); - } - if (stop_) { - break; - } - // Timer task - if (!time_queue_.empty()) { - int64_t now_time = get_micros(); - BGItem bg_item = time_queue_.top(); - int64_t wait_time = bg_item.exe_time - now_time; // in us - if (wait_time <= 0) { - time_queue_.pop(); - BGMap::iterator it = latest_.find(bg_item.id); - if (it != latest_.end() && it->second.exe_time == bg_item.exe_time) { - schedule_cost_sum_ += now_time - bg_item.exe_time; - schedule_count_++; - task = bg_item.task; - latest_.erase(it); - running_task_ids_.insert(bg_item.id); - mutex_.Unlock(); - task(bg_item.id); - mutex_.Lock("ThreadProcRelock"); - task_cost_sum_ += get_micros() - now_time; - task_count_++; - running_task_ids_.erase(bg_item.id); - } - continue; - } else if (queue_.empty() && !stop_) { - work_cv_.TimeWaitInUs(wait_time, "ThreadProcTimeWait"); - continue; - } - } - // Normal task; - if (!queue_.empty()) { - task = queue_.front().task; - int64_t exe_time = queue_.front().exe_time; - queue_.pop_front(); - --pending_num_; - int64_t start_time = get_micros(); - schedule_cost_sum_ += start_time - exe_time; - schedule_count_++; - mutex_.Unlock(); - task(0); - mutex_.Lock("ThreadProcRelock2"); - task_cost_sum_ += get_micros() - start_time; - task_count_++; - } + static void* ThreadWrapper(void* arg) { + reinterpret_cast(arg)->ThreadProc(); + return NULL; + } + void ThreadProc() { + while (true) { + Task task; + MutexLock lock(&mutex_, "ThreadProc"); + while (time_queue_.empty() && queue_.empty() && !stop_) { + work_cv_.Wait("ThreadProcWait"); + } + if (stop_) { + break; + } + // Timer task + if (!time_queue_.empty()) { + int64_t now_time = get_micros(); + BGItem bg_item = time_queue_.top(); + int64_t wait_time = bg_item.exe_time - now_time; // in us + if (wait_time <= 0) { + time_queue_.pop(); + BGMap::iterator it = latest_.find(bg_item.id); + if (it != latest_.end() && it->second.exe_time == bg_item.exe_time) { + schedule_cost_sum_ += now_time - bg_item.exe_time; + schedule_count_++; + task = bg_item.task; + latest_.erase(it); + running_task_ids_.insert(bg_item.id); + mutex_.Unlock(); + task(bg_item.id); + mutex_.Lock("ThreadProcRelock"); + task_cost_sum_ += get_micros() - now_time; + task_count_++; + running_task_ids_.erase(bg_item.id); + } + continue; + } else if (queue_.empty() && !stop_) { + work_cv_.TimeWaitInUs(wait_time, "ThreadProcTimeWait"); + continue; } + } + // Normal task; + if (!queue_.empty()) { + task = queue_.front().task; + int64_t exe_time = queue_.front().exe_time; + queue_.pop_front(); + --pending_num_; + int64_t start_time = get_micros(); + schedule_cost_sum_ += start_time - exe_time; + schedule_count_++; + mutex_.Unlock(); + task(0); + mutex_.Lock("ThreadProcRelock2"); + task_cost_sum_ += get_micros() - start_time; + task_count_++; + } } + } -private: - struct BGItem { - int64_t id; - int64_t exe_time; - Task task; - bool operator<(const BGItem& item) const { - if (exe_time != item.exe_time) { - return exe_time > item.exe_time; - } else { - return id > item.id; - } - } + private: + struct BGItem { + int64_t id; + int64_t exe_time; + Task task; + bool operator<(const BGItem& item) const { + if (exe_time != item.exe_time) { + return exe_time > item.exe_time; + } else { + return id > item.id; + } + } - BGItem() : id(0), exe_time(0) {} - BGItem(int64_t id_t, int64_t exe_time_t, const Task& task_t) - : id(id_t), exe_time(exe_time_t), task(task_t) {} - }; - typedef std::priority_queue BGQueue; - typedef std::map BGMap; + BGItem() : id(0), exe_time(0) {} + BGItem(int64_t id_t, int64_t exe_time_t, const Task& task_t) + : id(id_t), exe_time(exe_time_t), task(task_t) {} + }; + typedef std::priority_queue BGQueue; + typedef std::map BGMap; - int32_t threads_num_; - std::deque queue_; - volatile int pending_num_; - Mutex mutex_; - CondVar work_cv_; - bool stop_; - std::vector tids_; - std::set running_task_ids_; + int32_t threads_num_; + std::deque queue_; + std::atomic pending_num_; + Mutex mutex_; + CondVar work_cv_; + bool stop_; + std::vector tids_; + std::set running_task_ids_; - BGQueue time_queue_; - BGMap latest_; - int64_t last_task_id_; + BGQueue time_queue_; + BGMap latest_; + int64_t last_task_id_; - // for profiling - int64_t schedule_cost_sum_; - int64_t schedule_count_; - int64_t task_cost_sum_; - int64_t task_count_; + // for profiling + int64_t schedule_cost_sum_; + int64_t schedule_count_; + int64_t task_cost_sum_; + int64_t task_count_; }; -} // namespace common +} // namespace common using common::ThreadPool; diff --git a/src/common/timer.h b/src/common/timer.h index b035e18c9..62e70b8df 100644 --- a/src/common/timer.h +++ b/src/common/timer.h @@ -5,72 +5,61 @@ // // Author: yanshiguang02@baidu.com - #include #include #include #include -namespace tera{ +namespace tera { static inline int64_t get_timestamp_from_str(const std::string& time) { - struct tm tm; - memset(&tm, 0, sizeof(tm)); + struct tm tm; + memset(&tm, 0, sizeof(tm)); - sscanf(time.c_str(), "%4d%2d%2d-%d:%d:%d", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec); + sscanf(time.c_str(), "%4d%2d%2d-%d:%d:%d", &tm.tm_year, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, + &tm.tm_min, &tm.tm_sec); - tm.tm_year -= 1900; - tm.tm_mon--; + tm.tm_year -= 1900; + tm.tm_mon--; - return mktime(&tm); + return mktime(&tm); } static inline std::string get_time_str(int64_t timestamp) { - struct tm tt; - char buf[20]; - time_t t = timestamp; - strftime(buf, 20, "%Y%m%d-%H:%M:%S", localtime_r(&t, &tt)); - return std::string(buf, 17); + struct tm tt; + char buf[20]; + time_t t = timestamp; + strftime(buf, 20, "%Y%m%d-%H:%M:%S", localtime_r(&t, &tt)); + return std::string(buf, 17); } -static inline std::string get_curtime_str() { - return get_time_str(time(NULL)); -} +static inline std::string get_curtime_str() { return get_time_str(time(NULL)); } static inline std::string get_curtime_str_plain() { - struct tm tt; - char buf[20]; - time_t t = time(NULL); - strftime(buf, 20, "%Y%m%d%H%M%S", localtime_r(&t, &tt)); - return std::string(buf); + struct tm tt; + char buf[20]; + time_t t = time(NULL); + strftime(buf, 20, "%Y%m%d%H%M%S", localtime_r(&t, &tt)); + return std::string(buf); } static inline int64_t get_micros() { - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; } -static inline int64_t get_millis() { - return get_micros() / 1000; -} +static inline int64_t get_millis() { return get_micros() / 1000; } static inline int64_t get_unique_micros(int64_t ref) { - int64_t now; - do { - now = get_micros(); - } while (now == ref); - return now; + int64_t now; + do { + now = get_micros(); + } while (now == ref); + return now; } -static inline int64_t GetTimeStampInUs() { - return get_micros(); -} +static inline int64_t GetTimeStampInUs() { return get_micros(); } -static inline int64_t GetTimeStampInMs() { - return get_millis(); -} +static inline int64_t GetTimeStampInMs() { return get_millis(); } } - diff --git a/src/io/atomic_merge_strategy.cc b/src/io/atomic_merge_strategy.cc index 1ad77695a..891dc14f5 100644 --- a/src/io/atomic_merge_strategy.cc +++ b/src/io/atomic_merge_strategy.cc @@ -10,13 +10,11 @@ namespace tera { namespace io { bool IsAtomicOP(leveldb::TeraKeyType keyType) { - if (keyType == leveldb::TKT_ADD || - keyType == leveldb::TKT_ADDINT64 || - keyType == leveldb::TKT_PUT_IFABSENT || - keyType == leveldb::TKT_APPEND) { - return true; - } - return false; + if (keyType == leveldb::TKT_ADD || keyType == leveldb::TKT_ADDINT64 || + keyType == leveldb::TKT_PUT_IFABSENT || keyType == leveldb::TKT_APPEND) { + return true; + } + return false; } AtomicMergeStrategy::AtomicMergeStrategy() @@ -24,95 +22,91 @@ AtomicMergeStrategy::AtomicMergeStrategy() merged_value_(NULL), latest_key_type_(leveldb::TKT_FORSEEK), counter_(0), - int64_(0) { -} + int64_(0) {} -void AtomicMergeStrategy::Init(std::string* merged_key, - std::string* merged_value, - const leveldb::Slice& latest_key, - const leveldb::Slice& latest_value, +void AtomicMergeStrategy::Init(std::string* merged_key, std::string* merged_value, + const leveldb::Slice& latest_key, const leveldb::Slice& latest_value, leveldb::TeraKeyType latest_key_type) { - merged_key_ = merged_key; - merged_value_ = merged_value; - assert(merged_key_); - assert(merged_value_); - latest_key_type_ = latest_key_type; + merged_key_ = merged_key; + merged_value_ = merged_value; + assert(merged_key_); + assert(merged_value_); + latest_key_type_ = latest_key_type; - switch (latest_key_type) { - case leveldb::TKT_ADD: - merged_key_->assign(latest_key.data(), latest_key.size()); - counter_ = io::DecodeBigEndain(latest_value.data()); - break; - case leveldb::TKT_ADDINT64: - merged_key_->assign(latest_key.data(), latest_key.size()); - int64_ = *(int64_t*)latest_value.data(); - break; - case leveldb::TKT_PUT_IFABSENT: - merged_key_->assign(latest_key.data(), latest_key.size()); - merged_value_->assign(latest_value.data(), latest_value.size()); - break; - case leveldb::TKT_APPEND: - merged_key_->assign(latest_key.data(), latest_key.size()); - append_buffer_.assign(latest_value.data(), latest_value.size()); - break; - default: - assert(0); // invalid status - break; - } + switch (latest_key_type) { + case leveldb::TKT_ADD: + merged_key_->assign(latest_key.data(), latest_key.size()); + counter_ = io::DecodeBigEndain(latest_value.data()); + break; + case leveldb::TKT_ADDINT64: + merged_key_->assign(latest_key.data(), latest_key.size()); + int64_ = *(int64_t*)latest_value.data(); + break; + case leveldb::TKT_PUT_IFABSENT: + merged_key_->assign(latest_key.data(), latest_key.size()); + merged_value_->assign(latest_value.data(), latest_value.size()); + break; + case leveldb::TKT_APPEND: + merged_key_->assign(latest_key.data(), latest_key.size()); + append_buffer_.assign(latest_value.data(), latest_value.size()); + break; + default: + assert(0); // invalid status + break; + } } -void AtomicMergeStrategy::MergeStep(const leveldb::Slice& key, - const leveldb::Slice& value, +void AtomicMergeStrategy::MergeStep(const leveldb::Slice& key, const leveldb::Slice& value, leveldb::TeraKeyType key_type) { - switch (latest_key_type_) { - case leveldb::TKT_ADD: - if (key_type == leveldb::TKT_ADD || key_type == leveldb::TKT_VALUE) { - counter_ += io::DecodeBigEndain(value.data()); - } - break; - case leveldb::TKT_ADDINT64: - if (key_type == leveldb::TKT_ADDINT64 || key_type == leveldb::TKT_VALUE) { - int64_ += *(int64_t*)value.data(); - } - break; - case leveldb::TKT_PUT_IFABSENT: - if (key_type == leveldb::TKT_PUT_IFABSENT || key_type == leveldb::TKT_VALUE) { - merged_value_->assign(value.data(), value.size()); - } - break; - case leveldb::TKT_APPEND: - if (key_type == leveldb::TKT_APPEND || key_type == leveldb::TKT_VALUE) { - append_buffer_.insert(0, std::string(value.data(), value.size())); - } - break; - default: - assert(0); // invalid status - break; - } + switch (latest_key_type_) { + case leveldb::TKT_ADD: + if (key_type == leveldb::TKT_ADD || key_type == leveldb::TKT_VALUE) { + counter_ += io::DecodeBigEndain(value.data()); + } + break; + case leveldb::TKT_ADDINT64: + if (key_type == leveldb::TKT_ADDINT64 || key_type == leveldb::TKT_VALUE) { + int64_ += *(int64_t*)value.data(); + } + break; + case leveldb::TKT_PUT_IFABSENT: + if (key_type == leveldb::TKT_PUT_IFABSENT || key_type == leveldb::TKT_VALUE) { + merged_value_->assign(value.data(), value.size()); + } + break; + case leveldb::TKT_APPEND: + if (key_type == leveldb::TKT_APPEND || key_type == leveldb::TKT_VALUE) { + append_buffer_.insert(0, std::string(value.data(), value.size())); + } + break; + default: + assert(0); // invalid status + break; + } } bool AtomicMergeStrategy::Finish() { - switch (latest_key_type_) { - case leveldb::TKT_ADD: - char buf[sizeof(int64_t)]; - io::EncodeBigEndian(buf, counter_); - merged_value_->assign(buf, sizeof(buf)); - break; - case leveldb::TKT_ADDINT64: - merged_value_->assign(std::string((char*)&int64_, sizeof(int64_t))); - break; - case leveldb::TKT_PUT_IFABSENT: - // do nothing - break; - case leveldb::TKT_APPEND: - *merged_value_ = append_buffer_; - break; - default: - assert(0); // invalid status - break; - } - return true; + switch (latest_key_type_) { + case leveldb::TKT_ADD: + char buf[sizeof(int64_t)]; + io::EncodeBigEndian(buf, counter_); + merged_value_->assign(buf, sizeof(buf)); + break; + case leveldb::TKT_ADDINT64: + merged_value_->assign(std::string((char*)&int64_, sizeof(int64_t))); + break; + case leveldb::TKT_PUT_IFABSENT: + // do nothing + break; + case leveldb::TKT_APPEND: + *merged_value_ = append_buffer_; + break; + default: + assert(0); // invalid status + break; + } + return true; } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/atomic_merge_strategy.h b/src/io/atomic_merge_strategy.h index 807057d31..49c7c5cb5 100644 --- a/src/io/atomic_merge_strategy.h +++ b/src/io/atomic_merge_strategy.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_IO_ATOMIC_MERGE_STRATEGY_H_ -#define TERA_IO_ATOMIC_MERGE_STRATEGY_H_ +#ifndef TERA_IO_ATOMIC_MERGE_STRATEGY_H_ +#define TERA_IO_ATOMIC_MERGE_STRATEGY_H_ #include "leveldb/raw_key_operator.h" #include "leveldb/slice.h" @@ -14,31 +14,27 @@ namespace io { bool IsAtomicOP(leveldb::TeraKeyType keyType); class AtomicMergeStrategy { -public: - AtomicMergeStrategy(); - - void Init(std::string* merged_key, - std::string* merged_value, - const leveldb::Slice& latest_key, - const leveldb::Slice& latest_value, - leveldb::TeraKeyType latest_key_type); - - void MergeStep(const leveldb::Slice& key, - const leveldb::Slice& value, - leveldb::TeraKeyType key_type); - - bool Finish(); - -private: - std::string* merged_key_; - std::string* merged_value_; - leveldb::TeraKeyType latest_key_type_; - int64_t counter_; // for ADD - int64_t int64_; // for int64(add) - std::string append_buffer_; // for Append + public: + AtomicMergeStrategy(); + + void Init(std::string* merged_key, std::string* merged_value, const leveldb::Slice& latest_key, + const leveldb::Slice& latest_value, leveldb::TeraKeyType latest_key_type); + + void MergeStep(const leveldb::Slice& key, const leveldb::Slice& value, + leveldb::TeraKeyType key_type); + + bool Finish(); + + private: + std::string* merged_key_; + std::string* merged_value_; + leveldb::TeraKeyType latest_key_type_; + int64_t counter_; // for ADD + int64_t int64_; // for int64(add) + std::string append_buffer_; // for Append }; -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera #endif // TERA_IO_ATOMIC_MERGE_STRATEGY_H_ diff --git a/src/io/coding.cc b/src/io/coding.cc index 3e1aefa9b..afd680f81 100644 --- a/src/io/coding.cc +++ b/src/io/coding.cc @@ -9,62 +9,59 @@ namespace tera { namespace io { -bool ParseKeySlice(const leveldb::Slice& key, - int64_t* timestamp, - UserKeyType* type, - leveldb::Slice* short_key, - leveldb::Slice* time_key) { - if (key.size() < sizeof(UserKeyType)) { - return false; - } else if (time_key) { - *time_key = leveldb::Slice(key.data(), key.size() - sizeof(UserKeyType)); - } +bool ParseKeySlice(const leveldb::Slice& key, int64_t* timestamp, UserKeyType* type, + leveldb::Slice* short_key, leveldb::Slice* time_key) { + if (key.size() < sizeof(UserKeyType)) { + return false; + } else if (time_key) { + *time_key = leveldb::Slice(key.data(), key.size() - sizeof(UserKeyType)); + } - if (key.size() < sizeof(uint64_t)) { - return false; - } + if (key.size() < sizeof(uint64_t)) { + return false; + } - if (short_key) { - *short_key = leveldb::Slice(key.data(), key.size() - sizeof(uint64_t)); - } - uint64_t num = DecodeFixed64(key.data() + key.size() - sizeof(uint64_t)); - if (type) { - *type = static_cast(num & 0xff); - } - if (timestamp) { - *timestamp = static_cast(num >> sizeof(UserKeyType)); - } - return true; + if (short_key) { + *short_key = leveldb::Slice(key.data(), key.size() - sizeof(uint64_t)); + } + uint64_t num = DecodeFixed64(key.data() + key.size() - sizeof(uint64_t)); + if (type) { + *type = static_cast(num & 0xff); + } + if (timestamp) { + *timestamp = static_cast(num >> sizeof(UserKeyType)); + } + return true; } -void PackUserKey(const std::string& key, int64_t timestamp, - UserKeyType type, std::string* packed_key) { - packed_key->assign(key); - PutFixed64(packed_key, PackTimestampAndType(timestamp, type)); +void PackUserKey(const std::string& key, int64_t timestamp, UserKeyType type, + std::string* packed_key) { + packed_key->assign(key); + PutFixed64(packed_key, PackTimestampAndType(timestamp, type)); } -bool UnpackUserKey(const leveldb::Slice& packed_key, - leveldb::Slice* short_key, int64_t* timestamp, UserKeyType* type) { - return ParseKeySlice(packed_key, timestamp, type, short_key, NULL); +bool UnpackUserKey(const leveldb::Slice& packed_key, leveldb::Slice* short_key, int64_t* timestamp, + UserKeyType* type) { + return ParseKeySlice(packed_key, timestamp, type, short_key, NULL); } leveldb::Slice ExtractTimeKey(const leveldb::Slice& key_slice) { - leveldb::Slice sub_key; - CHECK(ParseKeySlice(key_slice, NULL, NULL, NULL, &sub_key)); - return sub_key; + leveldb::Slice sub_key; + CHECK(ParseKeySlice(key_slice, NULL, NULL, NULL, &sub_key)); + return sub_key; } leveldb::Slice ExtractShortKey(const leveldb::Slice& key_slice) { - leveldb::Slice sub_key; - CHECK(ParseKeySlice(key_slice, NULL, NULL, &sub_key, NULL)); - return sub_key; + leveldb::Slice sub_key; + CHECK(ParseKeySlice(key_slice, NULL, NULL, &sub_key, NULL)); + return sub_key; } UserKeyType ExtractKeyType(const leveldb::Slice& key_slice) { - UserKeyType type; - CHECK(ParseKeySlice(key_slice, NULL, &type, NULL, NULL)); - return type; + UserKeyType type; + CHECK(ParseKeySlice(key_slice, NULL, &type, NULL, NULL)); + return type; } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/coding.h b/src/io/coding.h index 3b4c06204..9aa2d3c5f 100644 --- a/src/io/coding.h +++ b/src/io/coding.h @@ -12,94 +12,87 @@ namespace tera { namespace io { inline uint32_t DecodeFixed32(const char* ptr) { - uint32_t result; - memcpy(&result, ptr, sizeof(result)); - return result; + uint32_t result; + memcpy(&result, ptr, sizeof(result)); + return result; } inline uint64_t DecodeFixed64(const char* ptr) { - uint64_t result; - memcpy(&result, ptr, sizeof(result)); - return result; -} - -inline void EncodeFixed64(char* buf, uint64_t value) { - memcpy(buf, &value, sizeof(value)); + uint64_t result; + memcpy(&result, ptr, sizeof(result)); + return result; } +inline void EncodeFixed64(char* buf, uint64_t value) { memcpy(buf, &value, sizeof(value)); } inline void PutFixed64(std::string* dst, uint64_t value) { - char buf[sizeof(value)]; - EncodeFixed64(buf, value); - dst->append(buf, sizeof(buf)); + char buf[sizeof(value)]; + EncodeFixed64(buf, value); + dst->append(buf, sizeof(buf)); } inline void EncodeBigEndian32(char* buf, uint32_t value) { - buf[0] = (value >> 24) & 0xff; - buf[1] = (value >> 16) & 0xff; - buf[2] = (value >> 8) & 0xff; - buf[3] = value & 0xff; + buf[0] = (value >> 24) & 0xff; + buf[1] = (value >> 16) & 0xff; + buf[2] = (value >> 8) & 0xff; + buf[3] = value & 0xff; } inline uint32_t DecodeBigEndain32(const char* ptr) { - return ((static_cast(static_cast(ptr[3]))) - | (static_cast(static_cast(ptr[2])) << 8) - | (static_cast(static_cast(ptr[1])) << 16) - | (static_cast(static_cast(ptr[0])) << 24)); + return ((static_cast(static_cast(ptr[3]))) | + (static_cast(static_cast(ptr[2])) << 8) | + (static_cast(static_cast(ptr[1])) << 16) | + (static_cast(static_cast(ptr[0])) << 24)); } inline void EncodeBigEndian(char* buf, uint64_t value) { - buf[0] = (value >> 56) & 0xff; - buf[1] = (value >> 48) & 0xff; - buf[2] = (value >> 40) & 0xff; - buf[3] = (value >> 32) & 0xff; - buf[4] = (value >> 24) & 0xff; - buf[5] = (value >> 16) & 0xff; - buf[6] = (value >> 8) & 0xff; - buf[7] = value & 0xff; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; } inline uint64_t DecodeBigEndain(const char* ptr) { - uint64_t lo = DecodeBigEndain32(ptr + 4); - uint64_t hi = DecodeBigEndain32(ptr); - return (hi << 32) | lo; + uint64_t lo = DecodeBigEndain32(ptr + 4); + uint64_t hi = DecodeBigEndain32(ptr); + return (hi << 32) | lo; } inline int32_t DecodeBigEndain32Sign(const char* ptr) { - return ((static_cast(static_cast(ptr[3]))) - | (static_cast(static_cast(ptr[2])) << 8) - | (static_cast(static_cast(ptr[1])) << 16) - | (static_cast(static_cast(ptr[0])) << 24)); + return ((static_cast(static_cast(ptr[3]))) | + (static_cast(static_cast(ptr[2])) << 8) | + (static_cast(static_cast(ptr[1])) << 16) | + (static_cast(static_cast(ptr[0])) << 24)); } inline int64_t DecodeBigEndainSign(const char* ptr) { - uint64_t lo = DecodeBigEndain32(ptr + 4); - int64_t hi = DecodeBigEndain32Sign(ptr); - return (hi << 32) | lo; + uint64_t lo = DecodeBigEndain32(ptr + 4); + int64_t hi = DecodeBigEndain32Sign(ptr); + return (hi << 32) | lo; } -enum UserKeyType { - UKT_VALUE = 0, - UKT_FORSEEK -}; +enum UserKeyType { UKT_VALUE = 0, UKT_FORSEEK }; inline uint64_t PackTimestampAndType(int64_t timestamp, UserKeyType key_type) { - uint64_t stamp = 0; - if (timestamp >= 0) { - stamp = static_cast(timestamp); - } - return (stamp << sizeof(UserKeyType)) | key_type; + uint64_t stamp = 0; + if (timestamp >= 0) { + stamp = static_cast(timestamp); + } + return (stamp << sizeof(UserKeyType)) | key_type; } -bool ParseKeySlice(const leveldb::Slice& key, int64_t* timestamp, - UserKeyType* type, leveldb::Slice* short_key, - leveldb::Slice* time_key); +bool ParseKeySlice(const leveldb::Slice& key, int64_t* timestamp, UserKeyType* type, + leveldb::Slice* short_key, leveldb::Slice* time_key); -void PackUserKey(const std::string& key, int64_t timestamp, - UserKeyType type, std::string* packed_key); +void PackUserKey(const std::string& key, int64_t timestamp, UserKeyType type, + std::string* packed_key); -bool UnpackUserKey(const leveldb::Slice& packed_key, - leveldb::Slice* short_key, int64_t* timestamp, UserKeyType* type); +bool UnpackUserKey(const leveldb::Slice& packed_key, leveldb::Slice* short_key, int64_t* timestamp, + UserKeyType* type); leveldb::Slice ExtractTimeKey(const leveldb::Slice& key_slice); @@ -107,7 +100,7 @@ leveldb::Slice ExtractShortKey(const leveldb::Slice& key_slice); UserKeyType ExtractKeyType(const leveldb::Slice& key_slice); -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_CODING_H_ +#endif // TERA_IO_CODING_H_ diff --git a/src/io/default_compact_strategy.cc b/src/io/default_compact_strategy.cc index f559c266d..39a74a6dd 100644 --- a/src/io/default_compact_strategy.cc +++ b/src/io/default_compact_strategy.cc @@ -6,6 +6,7 @@ #include "io/atomic_merge_strategy.h" #include "io/default_compact_strategy.h" #include "leveldb/slice.h" +#include namespace tera { namespace io { @@ -18,478 +19,479 @@ DefaultCompactStrategy::DefaultCompactStrategy(const TableSchema& schema, cf_indexs_(cf_indexs), raw_key_operator_(raw_key_operator), cmp_(cmp), - last_ts_(-1), last_type_(leveldb::TKT_FORSEEK), cur_type_(leveldb::TKT_FORSEEK), - del_row_ts_(-1), del_col_ts_(-1), del_qual_ts_(-1), cur_ts_(-1), - del_row_seq_(0), del_col_seq_(0), del_qual_seq_(0), version_num_(0), + last_ts_(-1), + last_type_(leveldb::TKT_FORSEEK), + cur_type_(leveldb::TKT_FORSEEK), + del_row_ts_(-1), + del_col_ts_(-1), + del_qual_ts_(-1), + cur_ts_(-1), + del_row_seq_(0), + del_col_seq_(0), + del_qual_seq_(0), + version_num_(0), snapshot_(leveldb::kMaxSequenceNumber) { - has_put_ = false; - VLOG(11) << "DefaultCompactStrategy construct"; + has_put_ = false; + VLOG(11) << "DefaultCompactStrategy construct"; } -const leveldb::Comparator* DefaultCompactStrategy::RowKeyComparator() { - return cmp_; +const leveldb::Comparator* DefaultCompactStrategy::RowKeyComparator() { return cmp_; } + +void DefaultCompactStrategy::ExtractRowKey(const Slice& tera_key, std::string* raw_row_key) { + Slice row_key; + if (raw_key_operator_.ExtractTeraKey(tera_key, &row_key, NULL, NULL, NULL, NULL)) { + std::string tera_key_forseek; + raw_key_operator_.EncodeTeraKey(row_key.ToString(), "", "", kLatestTs, leveldb::TKT_FORSEEK, + &tera_key_forseek); + *raw_row_key = tera_key_forseek; + } else { + *raw_row_key = tera_key.ToString(); + } } -const char* DefaultCompactStrategy::Name() const { - return "tera.DefaultCompactStrategy"; -} +const char* DefaultCompactStrategy::Name() const { return "tera.DefaultCompactStrategy"; } void DefaultCompactStrategy::SetSnapshot(uint64_t snapshot) { - VLOG(11) << "tera.DefaultCompactStrategy: set snapshot to " << snapshot; - snapshot_ = snapshot; + VLOG(11) << "tera.DefaultCompactStrategy: set snapshot to " << snapshot; + snapshot_ = snapshot; } bool DefaultCompactStrategy::Drop(const Slice& tera_key, uint64_t n, const std::string& lower_bound) { - Slice key, col, qual; - int64_t ts = -1; - leveldb::TeraKeyType type; + Slice key, col, qual; + int64_t ts = -1; + leveldb::TeraKeyType type; - if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); - return true; - } - - cur_type_ = type; - cur_ts_ = ts; - int32_t cf_id = -1; - if (type != leveldb::TKT_DEL && DropIllegalColumnFamily(col.ToString(), &cf_id)) { - // drop illegal column family - return true; - } + if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); + return true; + } - if (type >= leveldb::TKT_VALUE && DropByLifeTime(cf_id, ts)) { - // drop illegal column family - return true; - } + cur_type_ = type; + cur_ts_ = ts; + int32_t cf_id = -1; + if (type != leveldb::TKT_DEL && DropIllegalColumnFamily(col.ToString(), &cf_id)) { + // drop illegal column family + return true; + } - if (key.compare(last_key_) != 0) { - // reach a new row - last_key_.assign(key.data(), key.size()); - last_col_.assign(col.data(), col.size()); - last_qual_.assign(qual.data(), qual.size()); - del_row_ts_ = del_col_ts_ = del_qual_ts_ = -1; - version_num_ = 0; - has_put_ = false; - // no break in switch: need to set multiple variables - switch (type) { - case leveldb::TKT_DEL: - del_row_ts_ = ts; - del_row_seq_ = n; - case leveldb::TKT_DEL_COLUMN: - del_col_ts_ = ts; - del_col_seq_ = n; - case leveldb::TKT_DEL_QUALIFIERS: { - del_qual_ts_ = ts; - del_qual_seq_ = n; - if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { - VLOG(15) << "tera.DefaultCompactStrategy: can drop delete row tag"; - return true; - } - } - default:; - } - } else if (del_row_ts_ >= ts && del_row_seq_ <= snapshot_) { - // skip deleted row and the same row_del mark - return true; - } else if (col.compare(last_col_) != 0) { - // reach a new column family - last_col_.assign(col.data(), col.size()); - last_qual_.assign(qual.data(), qual.size()); - del_col_ts_ = del_qual_ts_ = -1; - version_num_ = 0; - has_put_ = false; - // no break in switch: need to set multiple variables - switch (type) { - case leveldb::TKT_DEL_COLUMN: - del_col_ts_ = ts; - del_col_seq_ = n; - case leveldb::TKT_DEL_QUALIFIERS: { - del_qual_ts_ = ts; - del_qual_seq_ = n; - if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { - VLOG(15) << "tera.DefaultCompactStrategy: can drop delete col tag"; - return true; - } - } - default:; + if (type >= leveldb::TKT_VALUE && DropByLifeTime(cf_id, ts)) { + // drop illegal column family + return true; + } + + if (key.compare(last_key_) != 0) { + // reach a new row + last_key_.assign(key.data(), key.size()); + last_col_.assign(col.data(), col.size()); + last_qual_.assign(qual.data(), qual.size()); + del_row_ts_ = del_col_ts_ = del_qual_ts_ = -1; + version_num_ = 0; + has_put_ = false; + // no break in switch: need to set multiple variables + switch (type) { + case leveldb::TKT_DEL: + del_row_ts_ = ts; + del_row_seq_ = n; + case leveldb::TKT_DEL_COLUMN: + del_col_ts_ = ts; + del_col_seq_ = n; + case leveldb::TKT_DEL_QUALIFIERS: { + del_qual_ts_ = ts; + del_qual_seq_ = n; + if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { + VLOG(15) << "tera.DefaultCompactStrategy: can drop delete row tag"; + return true; } - } else if (del_col_ts_ > ts && del_col_seq_ <= snapshot_) { - // skip deleted column family - return true; - } else if (qual.compare(last_qual_) != 0) { - // reach a new qualifier - last_qual_.assign(qual.data(), qual.size()); - del_qual_ts_ = -1; - version_num_ = 0; - has_put_ = false; - if (type == leveldb::TKT_DEL_QUALIFIERS) { - del_qual_ts_ = ts; - del_qual_seq_ = n; - if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { - VLOG(15) << "tera.DefaultCompactStrategy: can drop delete qualifiers tag"; - return true; - } + } + default: + ; + } + } else if (del_row_ts_ >= ts && del_row_seq_ <= snapshot_) { + // skip deleted row and the same row_del mark + return true; + } else if (col.compare(last_col_) != 0) { + // reach a new column family + last_col_.assign(col.data(), col.size()); + last_qual_.assign(qual.data(), qual.size()); + del_col_ts_ = del_qual_ts_ = -1; + version_num_ = 0; + has_put_ = false; + // no break in switch: need to set multiple variables + switch (type) { + case leveldb::TKT_DEL_COLUMN: + del_col_ts_ = ts; + del_col_seq_ = n; + case leveldb::TKT_DEL_QUALIFIERS: { + del_qual_ts_ = ts; + del_qual_seq_ = n; + if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { + VLOG(15) << "tera.DefaultCompactStrategy: can drop delete col tag"; + return true; } - } else if (del_qual_ts_ > ts && del_qual_seq_ <= snapshot_) { - // skip deleted qualifier + } + default: + ; + } + } else if (del_col_ts_ >= ts && del_col_seq_ <= snapshot_) { + // skip deleted column family + return true; + } else if (qual.compare(last_qual_) != 0) { + // reach a new qualifier + last_qual_.assign(qual.data(), qual.size()); + del_qual_ts_ = -1; + version_num_ = 0; + has_put_ = false; + if (type == leveldb::TKT_DEL_QUALIFIERS) { + del_qual_ts_ = ts; + del_qual_seq_ = n; + if (CheckCompactLowerBound(key, lower_bound) && snapshot_ == leveldb::kMaxSequenceNumber) { + VLOG(15) << "tera.DefaultCompactStrategy: can drop delete qualifiers tag"; return true; + } } - - if (type == leveldb::TKT_VALUE) { - has_put_ = true; - if (n <= snapshot_) { - if (++version_num_ > static_cast(schema_.column_families(cf_id).max_versions())) { - // drop out-of-range version - VLOG(20) << "compact drop true: " << key.ToString() - << ", version " << version_num_ - << ", timestamp " << ts; - return true; - } - } - } - - if (type == leveldb::TKT_DEL_QUALIFIER) { - if (n <= snapshot_) { - uint32_t max_versions = static_cast(schema_.column_families(cf_id).max_versions()); - if (version_num_ >= max_versions) { - // drop out-of-range delete qualifier mark - VLOG(20) << "compact drop true: " << key.ToString() - << ", version " << version_num_ - << ", timestamp " << ts; - return true; - } - } + } else if (del_qual_ts_ >= ts && del_qual_seq_ <= snapshot_) { + // skip deleted qualifier + return true; + } + + if (type == leveldb::TKT_VALUE) { + has_put_ = true; + if (n <= snapshot_) { + if (++version_num_ > static_cast(schema_.column_families(cf_id).max_versions())) { + // drop out-of-range version + VLOG(20) << "compact drop true: " << key.ToString() << ", version " << version_num_ + << ", timestamp " << ts; + return true; + } } - - if (IsAtomicOP(type) && has_put_) { - // drop ADDs which is later than Put + } + + if (type == leveldb::TKT_DEL_QUALIFIER) { + if (n <= snapshot_) { + uint32_t max_versions = static_cast(schema_.column_families(cf_id).max_versions()); + if (version_num_ >= max_versions) { + // drop out-of-range delete qualifier mark + VLOG(20) << "compact drop true: " << key.ToString() << ", version " << version_num_ + << ", timestamp " << ts; return true; + } } - VLOG(20) << "compact drop false: " << key.ToString() - << ", version " << version_num_ - << ", timestamp " << ts; - return false; + } + + if (IsAtomicOP(type) && has_put_) { + // drop ADDs which is later than Put + return true; + } + VLOG(20) << "compact drop false: " << key.ToString() << ", version " << version_num_ + << ", timestamp " << ts; + return false; } -bool DefaultCompactStrategy::ScanMergedValue(leveldb::Iterator* it, - std::string* merged_value, +bool DefaultCompactStrategy::ScanMergedValue(leveldb::Iterator* it, std::string* merged_value, int64_t* merged_num) { - std::string merged_key; - bool has_merge = InternalMergeProcess(it, merged_value, &merged_key, - true, false, merged_num); - return has_merge; + std::string merged_key; + bool has_merge = InternalMergeProcess(it, merged_value, &merged_key, true, false, merged_num); + return has_merge; } -bool DefaultCompactStrategy::MergeAtomicOPs(leveldb::Iterator* it, - std::string* merged_value, +bool DefaultCompactStrategy::MergeAtomicOPs(leveldb::Iterator* it, std::string* merged_value, std::string* merged_key) { - bool merge_put_flag = false; // don't merge the last PUT if we have - return InternalMergeProcess(it, merged_value, merged_key, merge_put_flag, - true, NULL); + bool merge_put_flag = false; // don't merge the last PUT if we have + return InternalMergeProcess(it, merged_value, merged_key, merge_put_flag, true, NULL); } -bool DefaultCompactStrategy::InternalMergeProcess(leveldb::Iterator* it, - std::string* merged_value, - std::string* merged_key, - bool merge_put_flag, - bool is_internal_key, - int64_t* merged_num) { - if (!tera::io::IsAtomicOP(cur_type_)) { - return false; - } - assert(merged_key); - assert(merged_value); +bool DefaultCompactStrategy::InternalMergeProcess(leveldb::Iterator* it, std::string* merged_value, + std::string* merged_key, bool merge_put_flag, + bool is_internal_key, int64_t* merged_num) { + if (!tera::io::IsAtomicOP(cur_type_)) { + return false; + } + assert(merged_key); + assert(merged_value); - AtomicMergeStrategy atom_merge; - atom_merge.Init(merged_key, merged_value, it->key(), it->value(), cur_type_); + AtomicMergeStrategy atom_merge; + atom_merge.Init(merged_key, merged_value, it->key(), it->value(), cur_type_); - it->Next(); - int64_t merged_num_t = 1; - int64_t last_ts_atomic = cur_ts_; - int64_t version_num = 0; + it->Next(); + int64_t merged_num_t = 1; + int64_t last_ts_atomic = cur_ts_; + int64_t version_num = 0; - while (it->Valid()) { - if (version_num >= 1) { - break; //avoid accumulate to many versions - } - Slice itkey = it->key(); - Slice key; - Slice col; - Slice qual; - int64_t ts = -1; - leveldb::TeraKeyType type; - - if (is_internal_key) { - leveldb::ParsedInternalKey ikey; - leveldb::ParseInternalKey(itkey, &ikey); - if (ikey.sequence > snapshot_) { - break; - } - if (!raw_key_operator_.ExtractTeraKey(ikey.user_key, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid internal key for tera: " << itkey.ToString(); - break; - } - } else { - if (!raw_key_operator_.ExtractTeraKey(itkey, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid tera key: " << itkey.ToString(); - break; - } - } + while (it->Valid()) { + if (version_num >= 1) { + break; // avoid accumulate to many versions + } + Slice itkey = it->key(); + Slice key; + Slice col; + Slice qual; + int64_t ts = -1; + leveldb::TeraKeyType type; - if (last_qual_ != qual || last_col_ != col || last_key_ != key) { - break; // out of the current cell - } + if (is_internal_key) { + leveldb::ParsedInternalKey ikey; + leveldb::ParseInternalKey(itkey, &ikey); + if (ikey.sequence > snapshot_) { + break; + } + if (!raw_key_operator_.ExtractTeraKey(ikey.user_key, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid internal key for tera: " << itkey.ToString(); + break; + } + } else { + if (!raw_key_operator_.ExtractTeraKey(itkey, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid tera key: " << itkey.ToString(); + break; + } + } - if (!IsAtomicOP(type) && type != leveldb::TKT_VALUE) { - break; - } else if (type == leveldb::TKT_VALUE) { - if (!merge_put_flag || ++version_num > 1) { - break; - } - } + if (last_qual_ != qual || last_col_ != col || last_key_ != key) { + break; // out of the current cell + } - if (ts != last_ts_atomic || type == leveldb::TKT_VALUE) { - atom_merge.MergeStep(it->key(), it->value(), type); - } - last_ts_atomic = ts; - it->Next(); - merged_num_t++; + if (!IsAtomicOP(type) && type != leveldb::TKT_VALUE) { + break; + } else if (type == leveldb::TKT_VALUE) { + if (!merge_put_flag || ++version_num > 1) { + break; + } } - atom_merge.Finish(); - if (merged_num) { - *merged_num = merged_num_t; + + if (ts != last_ts_atomic || type == leveldb::TKT_VALUE) { + atom_merge.MergeStep(it->key(), it->value(), type); } - return true; + last_ts_atomic = ts; + it->Next(); + merged_num_t++; + } + atom_merge.Finish(); + if (merged_num) { + *merged_num = merged_num_t; + } + return true; } bool DefaultCompactStrategy::ScanDrop(const Slice& tera_key, uint64_t n) { - bool key_col_qual_same = false; - Slice key, col, qual; - int64_t ts = -1; - leveldb::TeraKeyType type; + bool key_col_qual_same = false; + Slice key, col, qual; + int64_t ts = -1; + leveldb::TeraKeyType type; - if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); - return true; - } + if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); + return true; + } + + cur_type_ = type; + last_ts_ = cur_ts_; + cur_ts_ = ts; + int32_t cf_id = -1; + if (type != leveldb::TKT_DEL && DropIllegalColumnFamily(col.ToString(), &cf_id)) { + // drop illegal column family + return true; + } - cur_type_ = type; - last_ts_ = cur_ts_; - cur_ts_ = ts; - int32_t cf_id = -1; - if (type != leveldb::TKT_DEL && DropIllegalColumnFamily(col.ToString(), &cf_id)) { - // drop illegal column family - return true; - } + if (type >= leveldb::TKT_VALUE && DropByLifeTime(cf_id, ts)) { + // drop out-of-life-time record + return true; + } + + if (key.compare(last_key_) != 0) { + // reach a new row + last_key_.assign(key.data(), key.size()); + last_col_.assign(col.data(), col.size()); + last_qual_.assign(qual.data(), qual.size()); + last_type_ = type; + version_num_ = 0; + del_row_ts_ = del_col_ts_ = del_qual_ts_ = -1; + has_put_ = false; - if (type >= leveldb::TKT_VALUE && DropByLifeTime(cf_id, ts)) { - // drop out-of-life-time record - return true; + // no break in switch: need to set multiple variables + switch (type) { + case leveldb::TKT_DEL: + del_row_ts_ = ts; + case leveldb::TKT_DEL_COLUMN: + del_col_ts_ = ts; + case leveldb::TKT_DEL_QUALIFIERS: + del_qual_ts_ = ts; + default: + ; } - - if (key.compare(last_key_) != 0) { - // reach a new row - last_key_.assign(key.data(), key.size()); - last_col_.assign(col.data(), col.size()); - last_qual_.assign(qual.data(), qual.size()); - last_type_ = type; - version_num_ = 0; - del_row_ts_ = del_col_ts_ = del_qual_ts_ = -1; - has_put_ = false; - - // no break in switch: need to set multiple variables - switch (type) { - case leveldb::TKT_DEL: - del_row_ts_ = ts; - case leveldb::TKT_DEL_COLUMN: - del_col_ts_ = ts; - case leveldb::TKT_DEL_QUALIFIERS: - del_qual_ts_ = ts; - default:; - } - } else if (del_row_ts_ >= ts) { - // skip deleted row and the same row_del mark - return true; - } else if (col.compare(last_col_) != 0) { - // reach a new column family - last_col_.assign(col.data(), col.size()); - last_qual_.assign(qual.data(), qual.size()); - last_type_ = type; - version_num_ = 0; - del_col_ts_ = del_qual_ts_ = -1; - has_put_ = false; - // set both variables when type is leveldb::TKT_DEL_COLUMN - switch (type) { - case leveldb::TKT_DEL_COLUMN: - del_col_ts_ = ts; - case leveldb::TKT_DEL_QUALIFIERS: - del_qual_ts_ = ts; - default:; - } - } else if (del_col_ts_ > ts) { - // skip deleted column family - return true; - } else if (qual.compare(last_qual_) != 0) { - // reach a new qualifier - last_qual_.assign(qual.data(), qual.size()); - last_type_ = type; - version_num_ = 0; - del_qual_ts_ = -1; - has_put_ = false; - if (type == leveldb::TKT_DEL_QUALIFIERS) { - del_qual_ts_ = ts; - } - } else if (del_qual_ts_ > ts) { - // skip deleted qualifier - return true; - } else if (type == leveldb::TKT_DEL_QUALIFIERS) { - // reach a delete-all-qualifier mark + } else if (del_row_ts_ >= ts) { + // skip deleted row and the same row_del mark + return true; + } else if (col.compare(last_col_) != 0) { + // reach a new column family + last_col_.assign(col.data(), col.size()); + last_qual_.assign(qual.data(), qual.size()); + last_type_ = type; + version_num_ = 0; + del_col_ts_ = del_qual_ts_ = -1; + has_put_ = false; + // set both variables when type is leveldb::TKT_DEL_COLUMN + switch (type) { + case leveldb::TKT_DEL_COLUMN: + del_col_ts_ = ts; + case leveldb::TKT_DEL_QUALIFIERS: del_qual_ts_ = ts; - } else if (last_type_ == leveldb::TKT_DEL_QUALIFIER) { - // skip latest deleted version - last_type_ = type; - if (type == leveldb::TKT_VALUE) { - version_num_++; - } - return true; - } else { - key_col_qual_same = true; - last_type_ = type; + default: + ; } - - if (type != leveldb::TKT_VALUE && !IsAtomicOP(type)) { - return true; + } else if (del_col_ts_ >= ts) { + // skip deleted column family + return true; + } else if (qual.compare(last_qual_) != 0) { + // reach a new qualifier + last_qual_.assign(qual.data(), qual.size()); + last_type_ = type; + version_num_ = 0; + del_qual_ts_ = -1; + has_put_ = false; + if (type == leveldb::TKT_DEL_QUALIFIERS) { + del_qual_ts_ = ts; } - + } else if (del_qual_ts_ >= ts) { + // skip deleted qualifier + return true; + } else if (type == leveldb::TKT_DEL_QUALIFIERS) { + // reach a delete-all-qualifier mark + del_qual_ts_ = ts; + } else if (last_type_ == leveldb::TKT_DEL_QUALIFIER) { + // skip latest deleted version + last_type_ = type; if (type == leveldb::TKT_VALUE) { - has_put_ = true; + version_num_++; } + return true; + } else { + key_col_qual_same = true; + last_type_ = type; + } - if (IsAtomicOP(type) && has_put_) { - return true; - } + if (type != leveldb::TKT_VALUE && !IsAtomicOP(type)) { + return true; + } - CHECK(cf_id >= 0) << "illegel column family"; - if (type == leveldb::TKT_VALUE) { - if (cur_ts_ == last_ts_ && key_col_qual_same) { - // this is the same key, do not chang version num - } else { - version_num_++; - } - if (version_num_ > - static_cast(schema_.column_families(cf_id).max_versions())) { - // drop out-of-range version - VLOG(20) << "scan drop true: " << key.ToString() - << ", version " << version_num_ - << ", timestamp " << ts; - return true; - } + if (type == leveldb::TKT_VALUE) { + has_put_ = true; + } + + if (IsAtomicOP(type) && has_put_) { + return true; + } + + CHECK(cf_id >= 0) << "illegel column family"; + if (type == leveldb::TKT_VALUE) { + if (cur_ts_ == last_ts_ && key_col_qual_same) { + // this is the same key, do not chang version num + } else { + version_num_++; } - VLOG(20) << "scan drop false: " << key.ToString() - << ", version " << version_num_ - << ", timestamp " << ts; - return false; + if (version_num_ > static_cast(schema_.column_families(cf_id).max_versions())) { + // drop out-of-range version + VLOG(20) << "scan drop true: " << key.ToString() << ", version " << version_num_ + << ", timestamp " << ts; + return true; + } + } + VLOG(20) << "scan drop false: " << key.ToString() << ", version " << version_num_ + << ", timestamp " << ts; + return false; } bool DefaultCompactStrategy::DropIllegalColumnFamily(const std::string& column_family, - int32_t* cf_idx) const { - std::map::const_iterator it = - cf_indexs_.find(column_family); - if (it == cf_indexs_.end()) { - return true; - } - if (cf_idx) { - *cf_idx = it->second; - } - return false; + int32_t* cf_idx) const { + std::map::const_iterator it = cf_indexs_.find(column_family); + if (it == cf_indexs_.end()) { + return true; + } + if (cf_idx) { + *cf_idx = it->second; + } + return false; } bool DefaultCompactStrategy::DropByLifeTime(int32_t cf_idx, int64_t timestamp) const { - int64_t ttl = schema_.column_families(cf_idx).time_to_live() * 1000000LL; - if (ttl <= 0) { - // do not drop - return false; - } - int64_t cur_time = get_micros(); - if (timestamp + ttl > cur_time) { - return false; - } else { - return true; - } + int64_t ttl = schema_.column_families(cf_idx).time_to_live() * 1000000LL; + if (ttl <= 0) { + // do not drop + return false; + } + int64_t cur_time = get_micros(); + if (timestamp + ttl > cur_time) { + return false; + } else { + return true; + } } bool DefaultCompactStrategy::CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) { - *del_tag = false; - *ttl_tag = -1; - Slice key, col, qual; - int64_t ts = -1; - leveldb::TeraKeyType type; - - if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); - return false; - } - - if (type == leveldb::TKT_DEL || - type == leveldb::TKT_DEL_COLUMN || - type == leveldb::TKT_DEL_QUALIFIERS || - type == leveldb::TKT_DEL_QUALIFIER) { - *del_tag = true; - } - int32_t cf = -1; - int64_t ttl = -1; - if (!DropIllegalColumnFamily(col.ToString(), &cf) && - schema_.column_families(cf).time_to_live() > 0) { - ttl = schema_.column_families(cf).time_to_live(); - *ttl_tag = ts + ttl * 1000000LL; - } - VLOG(11) << "default strategy, del " << *del_tag << ", key_ts " << ts - << ", ttl " << ttl - << ", ttl_tag " << *ttl_tag; - return true; + *del_tag = false; + *ttl_tag = -1; + Slice key, col, qual; + int64_t ts = -1; + leveldb::TeraKeyType type; + + if (!raw_key_operator_.ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid tera key: " << tera_key.ToString(); + return false; + } + + if (type == leveldb::TKT_DEL || type == leveldb::TKT_DEL_COLUMN || + type == leveldb::TKT_DEL_QUALIFIERS || type == leveldb::TKT_DEL_QUALIFIER) { + *del_tag = true; + } + int32_t cf = -1; + int64_t ttl = -1; + if (!DropIllegalColumnFamily(col.ToString(), &cf) && + schema_.column_families(cf).time_to_live() > 0) { + ttl = schema_.column_families(cf).time_to_live(); + *ttl_tag = ts + ttl * 1000000LL; + } + VLOG(11) << "default strategy, del " << *del_tag << ", key_ts " << ts << ", ttl " << ttl + << ", ttl_tag " << *ttl_tag; + return true; } bool DefaultCompactStrategy::CheckCompactLowerBound(const Slice& cur_key, const std::string& lower_bound) { - if (lower_bound.empty()) { - return false; - } + if (lower_bound.empty()) { + return false; + } - Slice rkey; - CHECK (raw_key_operator_.ExtractTeraKey(lower_bound, &rkey, NULL, NULL, NULL, NULL)); - int res = rkey.compare(cur_key); - if (res > 0) { - return true; - } else { - return false; - } + Slice rkey; + CHECK(raw_key_operator_.ExtractTeraKey(lower_bound, &rkey, NULL, NULL, NULL, NULL)); + int res = rkey.compare(cur_key); + if (res > 0) { + return true; + } else { + return false; + } } DefaultCompactStrategyFactory::DefaultCompactStrategyFactory(const TableSchema& schema) : schema_(schema), raw_key_operator_(GetRawKeyOperatorFromSchema(schema_)), cmp_(NewRowKeyComparator(raw_key_operator_)) { - // build index at tablet io loading - for (int32_t i = 0; i < schema_.column_families_size(); ++i) { - const std::string& name = schema_.column_families(i).name(); - cf_indexs_[name] = i; - } + // build index at tablet io loading + for (int32_t i = 0; i < schema_.column_families_size(); ++i) { + const std::string& name = schema_.column_families(i).name(); + cf_indexs_[name] = i; + } } -DefaultCompactStrategyFactory::~DefaultCompactStrategyFactory() { - delete cmp_; -} +DefaultCompactStrategyFactory::~DefaultCompactStrategyFactory() { delete cmp_; } void DefaultCompactStrategyFactory::SetArg(const void* arg) { - MutexLock lock(&mutex_); - schema_.CopyFrom(*(TableSchema*)arg); + MutexLock lock(&mutex_); + schema_.CopyFrom(*(TableSchema*)arg); } DefaultCompactStrategy* DefaultCompactStrategyFactory::NewInstance() { - MutexLock lock(&mutex_); - return new DefaultCompactStrategy(schema_, cf_indexs_, *raw_key_operator_, cmp_); + MutexLock lock(&mutex_); + return new DefaultCompactStrategy(schema_, cf_indexs_, *raw_key_operator_, cmp_); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/default_compact_strategy.h b/src/io/default_compact_strategy.h index b780495fe..b7c2d07c4 100644 --- a/src/io/default_compact_strategy.h +++ b/src/io/default_compact_strategy.h @@ -19,91 +19,84 @@ namespace io { using leveldb::Slice; class DefaultCompactStrategy : public leveldb::CompactStrategy { -public: - DefaultCompactStrategy(const TableSchema& schema, - const std::map& cf_indexs, - const leveldb::RawKeyOperator& raw_key_operator, - leveldb::Comparator* cmp); - - virtual ~DefaultCompactStrategy() {} - - virtual bool Drop(const Slice& k, uint64_t n, - const std::string& lower_bound); - - virtual const leveldb::Comparator* RowKeyComparator(); - - // tera-specific, based on all-level iterators. - // used in LowLevelScan - virtual bool ScanDrop(const Slice& k, uint64_t n); - - virtual const char* Name() const; - - virtual void SetSnapshot(uint64_t snapshot); - virtual bool CheckTag(const leveldb::Slice& tera_key, bool* del_tag, int64_t* ttl_tag); - - virtual bool ScanMergedValue(leveldb::Iterator* it, - std::string* merged_value, - int64_t* merged_num = NULL); - - virtual bool MergeAtomicOPs(leveldb::Iterator* it, std::string* merged_value, - std::string* merged_key); - -private: - bool DropIllegalColumnFamily(const std::string& column_family, - int32_t* cf_idx = NULL) const; - bool DropByLifeTime(int32_t cf_idx, int64_t timestamp) const; - - bool InternalMergeProcess(leveldb::Iterator* it, std::string* merged_value, - std::string* merged_key, - bool merge_put_flag, bool is_internal_key, - int64_t* merged_num); - - bool CheckCompactLowerBound(const Slice& cur_key, - const std::string& lower_bound); - -private: - const TableSchema& schema_; - const std::map& cf_indexs_; - const leveldb::RawKeyOperator& raw_key_operator_; - leveldb::Comparator* cmp_; - - std::string last_key_; - std::string last_col_; - std::string last_qual_; - int64_t last_ts_; - leveldb::TeraKeyType last_type_; - leveldb::TeraKeyType cur_type_; - int64_t del_row_ts_; - int64_t del_col_ts_; - int64_t del_qual_ts_; - int64_t cur_ts_; - uint64_t del_row_seq_; - uint64_t del_col_seq_; - uint64_t del_qual_seq_; - uint32_t version_num_; - uint64_t snapshot_; - bool has_put_; + public: + DefaultCompactStrategy(const TableSchema& schema, const std::map& cf_indexs, + const leveldb::RawKeyOperator& raw_key_operator, leveldb::Comparator* cmp); + + virtual ~DefaultCompactStrategy() {} + + virtual bool Drop(const Slice& k, uint64_t n, const std::string& lower_bound); + + virtual const leveldb::Comparator* RowKeyComparator(); + + virtual void ExtractRowKey(const Slice& tera_key, std::string* row_key); + + // tera-specific, based on all-level iterators. + // used in LowLevelScan + virtual bool ScanDrop(const Slice& k, uint64_t n); + + virtual const char* Name() const; + + virtual void SetSnapshot(uint64_t snapshot); + virtual bool CheckTag(const leveldb::Slice& tera_key, bool* del_tag, int64_t* ttl_tag); + + virtual bool ScanMergedValue(leveldb::Iterator* it, std::string* merged_value, + int64_t* merged_num = NULL); + + virtual bool MergeAtomicOPs(leveldb::Iterator* it, std::string* merged_value, + std::string* merged_key); + + private: + bool DropIllegalColumnFamily(const std::string& column_family, int32_t* cf_idx = NULL) const; + bool DropByLifeTime(int32_t cf_idx, int64_t timestamp) const; + + bool InternalMergeProcess(leveldb::Iterator* it, std::string* merged_value, + std::string* merged_key, bool merge_put_flag, bool is_internal_key, + int64_t* merged_num); + + bool CheckCompactLowerBound(const Slice& cur_key, const std::string& lower_bound); + + private: + const TableSchema& schema_; + const std::map& cf_indexs_; + const leveldb::RawKeyOperator& raw_key_operator_; + leveldb::Comparator* cmp_; + + std::string last_key_; + std::string last_col_; + std::string last_qual_; + int64_t last_ts_; + leveldb::TeraKeyType last_type_; + leveldb::TeraKeyType cur_type_; + int64_t del_row_ts_; + int64_t del_col_ts_; + int64_t del_qual_ts_; + int64_t cur_ts_; + uint64_t del_row_seq_; + uint64_t del_col_seq_; + uint64_t del_qual_seq_; + uint32_t version_num_; + uint64_t snapshot_; + bool has_put_; }; class DefaultCompactStrategyFactory : public leveldb::CompactStrategyFactory { -public: - DefaultCompactStrategyFactory(const TableSchema& schema); - virtual ~DefaultCompactStrategyFactory(); - virtual DefaultCompactStrategy* NewInstance(); - virtual void SetArg(const void* arg); - virtual const char* Name() const { - return "tera.DefaultCompactStrategyFactory"; - } - -private: - TableSchema schema_; - std::map cf_indexs_; - const leveldb::RawKeyOperator* raw_key_operator_; - leveldb::Comparator* cmp_; - mutable Mutex mutex_; + public: + DefaultCompactStrategyFactory(const TableSchema& schema); + virtual ~DefaultCompactStrategyFactory(); + virtual DefaultCompactStrategy* NewInstance(); + virtual void SetArg(const void* arg); + virtual const char* Name() const { return "tera.DefaultCompactStrategyFactory"; } + + private: + TableSchema schema_; + std::map cf_indexs_; + const leveldb::RawKeyOperator* raw_key_operator_; + leveldb::Comparator* cmp_; + mutable Mutex mutex_; }; -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_DEFAULT_COMPACT_STRATEGY_H_ +#endif // TERA_IO_DEFAULT_COMPACT_STRATEGY_H_ diff --git a/src/io/io_flags.cc b/src/io/io_flags.cc index b8ecbdeff..5756cea39 100644 --- a/src/io/io_flags.cc +++ b/src/io/io_flags.cc @@ -5,59 +5,102 @@ #include "common/base/stdint.h" #include "gflags/gflags.h" -DEFINE_int32(tera_leveldb_slow_down_level0_score_limit, 100, "control level 0 score compute, score / 2 or sqrt(score / 2)"); +DEFINE_int32(tera_leveldb_slow_down_level0_score_limit, 100, + "control level 0 score compute, score / 2 or sqrt(score / 2)"); DEFINE_int32(tera_leveldb_max_background_compactions, 8, "multi-thread compaction number"); DEFINE_int32(tera_tablet_max_sub_parallel_compaction, 10, "max sub compaction in parallel"); DEFINE_bool(tera_leveldb_ignore_corruption_in_open, false, "ignore fs error when open db"); -DEFINE_int32(tera_tablet_del_percentage, 20, "percentage of del tag in sst file begin to trigger compaction"); -DEFINE_int32(tera_tablet_ttl_percentage, 99, "percentage of ttl tag in sst file begin to trigger compaction"); -DEFINE_int32(tera_tablet_level0_file_limit, 20000, "the max level0 file num before write busy"); +DEFINE_int32(tera_tablet_del_percentage, 20, + "percentage of del tag in sst file begin to trigger compaction"); +DEFINE_int32(tera_tablet_ttl_percentage, 99, + "percentage of ttl tag in sst file begin to trigger compaction"); +DEFINE_int32(tera_tablet_level0_file_limit, 20, "the max level0 file num before write busy"); DEFINE_int32(tera_asyncwriter_sync_size_threshold, 1024, "force sync per X KB"); -DEFINE_int32(tera_asyncwriter_pending_limit, 10000, "the max pending data size (KB) in async writer"); -DEFINE_int32(tera_asyncwriter_sync_interval, 10, "the interval (in ms) to sync write buffer to disk"); +DEFINE_int32(tera_asyncwriter_pending_limit, 10000, + "the max pending data size (KB) in async writer"); +DEFINE_int32(tera_asyncwriter_sync_interval, 10, + "the interval (in ms) to sync write buffer to disk"); DEFINE_bool(tera_enable_level0_limit, true, "enable level0 limit"); -DEFINE_int32(tera_tabletnode_scanner_cache_size, 5, "default tablet scanner manager cache no more than 100 stream"); +DEFINE_int32(tera_tabletnode_scanner_cache_size, 5, + "default tablet scanner manager cache no more than 100 stream"); DEFINE_uint64(tera_tabletnode_prefetch_scan_size, 1 << 20, "Max size for prefetch scan"); DEFINE_int32(tera_asyncwriter_batch_size, 1024, "write batch to leveldb per X KB"); -DEFINE_int32(tera_tablet_max_block_log_number, 50, "max number of unsed log files produced by switching log"); +DEFINE_int32(tera_tablet_max_block_log_number, 50, + "max number of unsed log files produced by switching log"); DEFINE_int64(tera_tablet_write_log_time_out, 5, "max time(sec) to wait for log writing or sync"); DEFINE_bool(tera_log_async_mode, true, "enable async mode for log writing and sync"); DEFINE_int64(tera_tablet_log_file_size, 32, "the log file size (in MB) for tablet"); -DEFINE_int64(tera_tablet_max_write_buffer_size, 32, "the buffer size (in MB) for tablet write buffer"); +DEFINE_int64(tera_tablet_max_write_buffer_size, 32, + "the buffer size (in MB) for tablet write buffer"); DEFINE_int64(tera_tablet_living_period, -1, "the living period of tablet"); DEFINE_int32(tera_tablet_flush_log_num, 100000, "the max log number before flush memtable"); -DEFINE_bool(tera_tablet_use_memtable_on_leveldb, false, "enable memtable based on in-memory leveldb"); -DEFINE_int64(tera_tablet_memtable_ldb_write_buffer_size, 1000, "the buffer size(in KB) for memtable on leveldb"); +DEFINE_bool(tera_tablet_use_memtable_on_leveldb, false, + "enable memtable based on in-memory leveldb"); +DEFINE_int64(tera_tablet_memtable_ldb_write_buffer_size, 1000, + "the buffer size(in KB) for memtable on leveldb"); DEFINE_bool(tera_sync_log, true, "flush all in-memory parts of log file to stable storage"); DEFINE_bool(tera_io_cache_path_vanish_allowed, false, "if true, allow cache path not exist"); +DEFINE_int32(tera_tabletnode_cache_update_thread_num, 4, "thread num for update cache"); +DEFINE_bool(tera_tabletnode_cache_force_read_from_cache, true, + "force update cache before any read"); DEFINE_string(tera_dfs_so_path, "", "the dfs implementation path"); DEFINE_string(tera_dfs_conf, "", "the dfs configuration file path"); -DEFINE_string(tera_leveldb_env_dfs_type, "hdfs", "the default type for leveldb IO dfs environment, [hdfs | nfs]"); +DEFINE_string(tera_leveldb_env_dfs_type, "hdfs", + "the default type for leveldb IO dfs environment, [hdfs | nfs]"); DEFINE_string(tera_leveldb_env_hdfs2_nameservice_list, "default", "the nameservice list of hdfs2"); DEFINE_string(tera_leveldb_env_nfs_mountpoint, "/disk/tera", "the mountpoint of nfs"); DEFINE_string(tera_leveldb_env_nfs_conf_path, "../conf/nfs.conf", "the config file path of nfs"); DEFINE_int32(tera_io_retry_period, 100, "the retry interval period (in ms) when operate file"); DEFINE_int32(tera_io_retry_max_times, 20, "the max retry times when meets trouble"); -DEFINE_int32(tera_leveldb_env_local_seek_latency, 50000, "the random access latency (in ns) of local storage device"); -DEFINE_int32(tera_leveldb_env_dfs_seek_latency, 10000000, "the random access latency (in ns) of dfs storage device"); +DEFINE_int32(tera_leveldb_env_local_seek_latency, 50000, + "the random access latency (in ns) of local storage device"); +DEFINE_int32(tera_leveldb_env_dfs_seek_latency, 10000000, + "the random access latency (in ns) of dfs storage device"); DEFINE_int32(tera_memenv_table_cache_size, 100, "the max open file number in leveldb table_cache"); -DEFINE_int32(tera_memenv_block_cache_size, 10000, "(MB) block cache size for leveldb which do not use share block cache"); +DEFINE_int32(tera_memenv_block_cache_size, 10000, + "(MB) block cache size for leveldb which do not use share block cache"); DEFINE_bool(tera_use_flash_for_memenv, true, "Use flashenv for memery lg"); DEFINE_int32(tera_leveldb_block_cache_env_thread_num, 30, "thread num of flash blcok cache"); -DEFINE_string(tera_leveldb_compact_strategy, "default", "the default strategy to drive consum compaction, should be [default|LG|dummy]"); -DEFINE_bool(tera_leveldb_verify_checksums, true, "enable verify data read from storage against checksums"); -DEFINE_bool(tera_leveldb_ignore_corruption_in_compaction, false, "skip corruption blocks of sst file in compaction"); +DEFINE_string(tera_leveldb_compact_strategy, "default", + "the default strategy to drive consum compaction, should be " + "[default|LG|dummy]"); +DEFINE_bool(tera_leveldb_verify_checksums, true, + "enable verify data read from storage against checksums"); +DEFINE_bool(tera_leveldb_ignore_corruption_in_compaction, false, + "skip corruption blocks of sst file in compaction"); DEFINE_bool(tera_leveldb_use_file_lock, false, "hold file lock during loading leveldb"); -DEFINE_bool(tera_leveldb_use_direct_io_read, true, "enable random read from local SATA or SSD device use Direct I/O"); -DEFINE_bool(tera_leveldb_use_direct_io_write, true, "enable write to local SATA or SSD device use Direct I/O"); -DEFINE_uint64(tera_leveldb_posix_write_buffer_size, 512<<10, "write buffer size for PosixWritableFile"); -DEFINE_uint64(tera_leveldb_table_builder_write_batch_size, 256<<10, "table builder's batch write size, 0 means disable table builder batch write"); +DEFINE_bool(tera_leveldb_use_direct_io_read, true, + "enable random read from local SATA or SSD device use Direct I/O"); +DEFINE_bool(tera_leveldb_use_direct_io_write, true, + "enable write to local SATA or SSD device use Direct I/O"); +DEFINE_uint64(tera_leveldb_posix_write_buffer_size, 512 << 10, + "write buffer size for PosixWritableFile"); +DEFINE_uint64(tera_leveldb_table_builder_write_batch_size, 256 << 10, + "table builder's batch write size, 0 means disable table builder " + "batch write"); -DEFINE_int32(tera_tablet_unload_count_limit, 3, "the upper bound of try unload, broken this limit will speed up unloading"); +DEFINE_int32(tera_tablet_unload_count_limit, 3, + "the upper bound of try unload, broken this limit will speed up unloading"); +DEFINE_int32(tera_leveldb_memtable_shard_num, 4, "shard memtable num"); +DEFINE_uint64(tera_leveldb_manifest_switch_size_MB, 2, "manifest file switch size (in MB)"); /*** Only for DEBUG online ***/ DEFINE_bool(debug_tera_tablet_unload, false, "enable to print tablet unload log more detail"); + +DEFINE_string(tera_tabletnode_cache_paths, "", + "paths for cached data storage. Mutiple definition like: \"./path1/;./path2/\""); +DEFINE_string(persistent_cache_sizes_in_MB, "", + "Sizes for persistent cache. Mutiple definition like: \"size1;size2\", leave empty " + "if the total disk is used for persistent cache."); +DEFINE_bool(tera_enable_persistent_cache, true, "enable persistent cache instead of env flash"); +DEFINE_bool(tera_enable_persistent_cache_transfer_flash_env_files, true, + "enable transfer existing cache files to persistent_cache"); +DEFINE_uint64(persistent_cache_write_retry_times, 5, + "persistent cache file append retry times when reserve space failed"); +DEFINE_bool(enable_dfs_read_thread_limiter, true, + "enable dfs read thread limiter to reserve threads for read ssd"); +DEFINE_double(dfs_read_thread_ratio, 0.7, "ratio of read threads that read-from-dfs can use"); diff --git a/src/io/io_utils.cc b/src/io/io_utils.cc index 057b18251..7e7db540d 100644 --- a/src/io/io_utils.cc +++ b/src/io/io_utils.cc @@ -7,39 +7,41 @@ namespace tera { StatusCode LeveldbCodeToTeraCode(const leveldb::Status& status) { - if (status.ok()) { - return kTabletNodeOk; - } else if (status.IsNotFound()) { - return kKeyNotExist; - } else if (status.IsCorruption()) { - return kTableCorrupt; - } - return kIOError; + if (status.ok()) { + return kTabletNodeOk; + } else if (status.IsNotFound()) { + return kKeyNotExist; + } else if (status.IsCorruption()) { + return kTableCorrupt; + } else if (status.IsReject()) { + return kTabletNodeIsBusy; + } + return kIOError; } void SetStatusCode(const leveldb::Status& db_status, StatusCode* tera_status) { - if (tera_status) { - *tera_status = LeveldbCodeToTeraCode(db_status); - } + if (tera_status) { + *tera_status = LeveldbCodeToTeraCode(db_status); + } } void SetStatusCode(const io::TabletIO::TabletStatus& tablet_status, StatusCode* tera_status) { - if (tera_status) { - *tera_status = static_cast(tablet_status); - } + if (tera_status) { + *tera_status = static_cast(tablet_status); + } } const leveldb::RawKeyOperator* GetRawKeyOperatorFromSchema(TableSchema& schema) { - // key_translator should be lg property, but here only support table - // property. In future work, key_translator should be done in leveldb. - RawKey raw_key = schema.raw_key(); - switch (raw_key) { - case Binary: - return leveldb::BinaryRawKeyOperator(); - case Readable: - return leveldb::ReadableRawKeyOperator(); - default: - return leveldb::KvRawKeyOperator(); - } + // key_translator should be lg property, but here only support table + // property. In future work, key_translator should be done in leveldb. + RawKey raw_key = schema.raw_key(); + switch (raw_key) { + case Binary: + return leveldb::BinaryRawKeyOperator(); + case Readable: + return leveldb::ReadableRawKeyOperator(); + default: + return leveldb::KvRawKeyOperator(); + } } -} // namespace tera +} // namespace tera diff --git a/src/io/io_utils.h b/src/io/io_utils.h index 4500767eb..34df4487a 100644 --- a/src/io/io_utils.h +++ b/src/io/io_utils.h @@ -5,6 +5,10 @@ #ifndef TERA_IO_IO_UTILS_H_ #define TERA_IO_IO_UTILS_H_ +#include + +#include "common/semaphore.h" +#include "common/rwmutex.h" #include "leveldb/raw_key_operator.h" #include "leveldb/status.h" #include "io/tablet_io.h" @@ -20,7 +24,6 @@ void SetStatusCode(const leveldb::Status& db_status, StatusCode* tera_status); void SetStatusCode(const io::TabletIO::TabletStatus& tablet_status, StatusCode* tera_status); const leveldb::RawKeyOperator* GetRawKeyOperatorFromSchema(TableSchema& schema); +} // namespace tera -} // namespace tera - -#endif // TERA_IO_IO_UTILS_H_ +#endif // TERA_IO_IO_UTILS_H_ diff --git a/src/io/mock_tablet_io.h b/src/io/mock_tablet_io.h index e824451a7..00d689b56 100644 --- a/src/io/mock_tablet_io.h +++ b/src/io/mock_tablet_io.h @@ -13,70 +13,38 @@ namespace tera { namespace io { class MockTabletIO : public TabletIO { -public: - MOCK_CONST_METHOD0(GetCompactStatus, - CompactStatus()); - MOCK_CONST_METHOD0(GetSchema, - const TableSchema&()); - MOCK_METHOD10(Load, - bool(const TableSchema& schema, - const std::string& key_start, - const std::string& key_end, - const std::string& path, - const std::vector& parent_tablets, - std::map snapshots, - leveldb::Logger* logger, - leveldb::Cache* block_cache, - leveldb::TableCache* table_cache, - StatusCode* status)); - MOCK_METHOD1(Unload, - bool(StatusCode* status)); - MOCK_METHOD2(Split, - bool(std::string* split_key, - StatusCode* status)); - MOCK_METHOD1(Compact, - bool(StatusCode* status)); - MOCK_METHOD1(GetDataSize, - int64_t(StatusCode* status)); - MOCK_METHOD3(GetDataSize, - int64_t(const std::string& start_key, - const std::string& end_key, - StatusCode* status)); - MOCK_METHOD4(Read, - bool(const leveldb::Slice& key, - std::string* value, - uint64_t snapshot_id, - StatusCode* status)); - MOCK_METHOD5(Read, - bool(const KeyList& key_list, - BytesList* value_list, - uint32_t* success_num, - uint64_t snapshot_id, - StatusCode* status)); - MOCK_METHOD3(ReadCells, - bool(const RowReaderInfo& row_reader, - RowResult* value_list, - StatusCode* status)); - MOCK_METHOD7(Write, - bool(const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, - const std::vector* index_list, - Counter* done_counter, - WriteRpcTimer* timer, - StatusCode* status)); - MOCK_METHOD4(Scan, - bool(const ScanOption& option, - KeyValueList* kv_list, - bool* complete, - StatusCode* status)); - MOCK_METHOD3(ScanRows, - bool(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done)); + public: + MockTabletIO() : TabletIO("", "", "", 0, 0) {} + MOCK_CONST_METHOD0(GetCompactStatus, CompactStatus()); + MOCK_CONST_METHOD0(GetSchema, TableSchema()); + MOCK_METHOD10(Load, bool(const TableSchema& schema, const std::string& key_start, + const std::string& key_end, const std::string& path, + const std::vector& parent_tablets, + std::map snapshots, leveldb::Logger* logger, + leveldb::Cache* block_cache, leveldb::TableCache* table_cache, + StatusCode* status)); + MOCK_METHOD1(Unload, bool(StatusCode* status)); + MOCK_METHOD2(Split, bool(std::string* split_key, StatusCode* status)); + MOCK_METHOD1(Compact, bool(StatusCode* status)); + MOCK_METHOD1(GetDataSize, int64_t(StatusCode* status)); + MOCK_METHOD3(GetDataSize, int64_t(const std::string& start_key, const std::string& end_key, + StatusCode* status)); + MOCK_METHOD4(Read, bool(const leveldb::Slice& key, std::string* value, uint64_t snapshot_id, + StatusCode* status)); + MOCK_METHOD5(Read, bool(const KeyList& key_list, BytesList* value_list, uint32_t* success_num, + uint64_t snapshot_id, StatusCode* status)); + MOCK_METHOD5(ReadCells, bool(const RowReaderInfo& row_reader, RowResult* value_list, + uint64_t snapshot_id, StatusCode* status, int64_t timeout_ms)); + MOCK_METHOD5(Write, bool(std::vector* row_mutation_vec, + std::vector* status_vec, bool is_instant, + WriteCallback callback, StatusCode* status)); + MOCK_METHOD6(Scan, bool(const ScanOption& option, KeyValueList* kv_list, uint32_t* read_row_count, + uint32_t* read_bytes, bool* complete, StatusCode* status)); + MOCK_METHOD3(ScanRows, bool(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done)); }; } // namespace io } // namespace tera -#endif // TERA_IO_MOCK_TABLET_IO_H_ +#endif // TERA_IO_MOCK_TABLET_IO_H_ diff --git a/src/io/tablet_io.cc b/src/io/tablet_io.cc index 3914929cb..772478639 100644 --- a/src/io/tablet_io.cc +++ b/src/io/tablet_io.cc @@ -5,6 +5,7 @@ #include "io/tablet_io.h" #include +#include #include "common/counter.h" #include "common/metric/prometheus_subscriber.h" @@ -31,9 +32,10 @@ #include "io/utils_leveldb.h" #include "tabletnode/tabletnode_metric_name.h" #include "types.h" -#include "utils/scan_filter.h" #include "utils/string_util.h" #include "utils/utils_cmd.h" +#include "io_utils.h" +#include "sdk/filter_utils.h" DECLARE_string(tera_leveldb_env_type); DECLARE_int64(tera_tablet_log_file_size); @@ -54,6 +56,7 @@ DECLARE_int32(tera_io_retry_max_times); DECLARE_string(tera_master_meta_table_name); DECLARE_string(tera_tabletnode_path_prefix); +DECLARE_uint64(tera_leveldb_manifest_switch_size_MB); DECLARE_string(tera_leveldb_compact_strategy); DECLARE_bool(tera_leveldb_verify_checksums); DECLARE_bool(tera_leveldb_ignore_corruption_in_compaction); @@ -65,7 +68,6 @@ DECLARE_int32(tera_leveldb_env_local_seek_latency); DECLARE_int32(tera_leveldb_env_dfs_seek_latency); DECLARE_int32(tera_memenv_table_cache_size); DECLARE_bool(tera_use_flash_for_memenv); -DECLARE_bool(tera_tabletnode_flash_block_cache_enabled); DECLARE_bool(tera_tablet_use_memtable_on_leveldb); DECLARE_int64(tera_tablet_memtable_ldb_write_buffer_size); @@ -83,6 +85,10 @@ DECLARE_bool(tera_leveldb_use_direct_io_read); DECLARE_bool(tera_leveldb_use_direct_io_write); DECLARE_uint64(tera_leveldb_posix_write_buffer_size); DECLARE_uint64(tera_leveldb_table_builder_write_batch_size); +DECLARE_int32(tera_leveldb_memtable_shard_num); + +DECLARE_bool(tera_enable_persistent_cache); +DECLARE_bool(enable_dfs_read_thread_limiter); namespace tera { namespace io { @@ -97,2318 +103,2500 @@ using tera::tabletnode::kApiLabelWrite; using tera::tabletnode::kLowLevelReadMetric; using tera::tabletnode::kScanDropCountMetric; +using tera::tabletnode::kScanFilterCountMetric; using tera::tabletnode::kBatchScanCountMetric; using tera::tabletnode::kSyncScanCountMetric; tera::MetricCounter low_level_read_count(kLowLevelReadMetric, {SubscriberType::QPS}); tera::MetricCounter scan_drop_count(kScanDropCountMetric, {SubscriberType::QPS}); +tera::MetricCounter scan_filter_count(kScanFilterCountMetric, {SubscriberType::QPS}); tera::MetricCounter batch_scan_count(kBatchScanCountMetric, {SubscriberType::QPS}); tera::MetricCounter sync_scan_count(kSyncScanCountMetric, {SubscriberType::QPS}); tera::MetricCounter row_read_delay(kRowDelayMetric, kApiLabelRead, {}); tera::MetricCounter row_read_count(kRowCountMetric, kApiLabelRead, {SubscriberType::QPS}); -tera::MetricCounter row_read_bytes(kRowThroughPutMetric, kApiLabelRead, {SubscriberType::THROUGHPUT}); +tera::MetricCounter row_read_bytes(kRowThroughPutMetric, kApiLabelRead, + {SubscriberType::THROUGHPUT}); tera::MetricCounter row_scan_delay(kRowDelayMetric, kApiLabelScan, {}); tera::MetricCounter row_scan_count(kRowCountMetric, kApiLabelScan, {SubscriberType::QPS}); -tera::MetricCounter row_scan_bytes(kRowThroughPutMetric, kApiLabelScan, {SubscriberType::THROUGHPUT}); - -tera::MetricCounter row_write_bytes(kRowThroughPutMetric, kApiLabelWrite, {SubscriberType::THROUGHPUT}); - -tera::AutoSubscriberRegister row_read_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( - MetricId("tera_ts_row_read_delay_us_per_row"), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelRead), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelRead), SubscriberType::SUM))))); - -tera::AutoSubscriberRegister row_scan_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( - MetricId("tera_ts_row_scan_delay_us_per_row"), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelScan), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelScan), SubscriberType::SUM))))); - - -std::ostream& operator << (std::ostream& o, const TabletIO& tablet_io) { - o << tablet_io.short_path_ - << " [" << DebugString(tablet_io.start_key_) - << ", " << DebugString(tablet_io.end_key_) << "]"; - return o; +tera::MetricCounter row_scan_bytes(kRowThroughPutMetric, kApiLabelScan, + {SubscriberType::THROUGHPUT}); + +tera::MetricCounter row_write_bytes(kRowThroughPutMetric, kApiLabelWrite, + {SubscriberType::THROUGHPUT}); + +tera::AutoSubscriberRegister row_read_delay_per_row(std::unique_ptr( + new tera::RatioSubscriber(MetricId("tera_ts_row_read_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowDelayMetric, kApiLabelRead), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowCountMetric, kApiLabelRead), + SubscriberType::SUM))))); + +tera::AutoSubscriberRegister row_scan_delay_per_row(std::unique_ptr( + new tera::RatioSubscriber(MetricId("tera_ts_row_scan_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowDelayMetric, kApiLabelScan), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowCountMetric, kApiLabelScan), + SubscriberType::SUM))))); + +std::ostream& operator<<(std::ostream& o, const TabletIO& tablet_io) { + o << tablet_io.short_path_ << " [" << DebugString(tablet_io.start_key_) << ", " + << DebugString(tablet_io.end_key_) << "]"; + return o; } std::string MetricLabelToString(const std::string& tablet_path) { - size_t sep_pos = tablet_path.find_last_of("/"); - if (sep_pos == std::string::npos) { - // meta tablet - return LabelStringBuilder().Append("table", tablet_path).Append("tablet", tablet_path).ToString(); - } else { - std::string table_name = tablet_path.substr(0, sep_pos); - return LabelStringBuilder().Append("table", table_name).Append("tablet", tablet_path).ToString(); - } + size_t sep_pos = tablet_path.find_last_of("/"); + if (sep_pos == std::string::npos) { + // meta tablet + return LabelStringBuilder() + .Append("table", tablet_path) + .Append("tablet", tablet_path) + .ToString(); + } else { + std::string table_name = tablet_path.substr(0, sep_pos); + return LabelStringBuilder() + .Append("table", table_name) + .Append("tablet", tablet_path) + .ToString(); + } } - TabletIO::TabletIO(const std::string& key_start, const std::string& key_end, const std::string& path) + : TabletIO(key_start, key_end, path, 0, 0) {} + +TabletIO::TabletIO(const std::string& key_start, const std::string& key_end, + const std::string& path, int64_t ctime, uint64_t version) : async_writer_(NULL), scan_context_manager_(NULL), start_key_(key_start), end_key_(key_end), + ctime_(ctime), + version_(version), short_path_(path), compact_status_(kTableNotCompact), status_(kNotInit), tablet_status_(static_cast(kTabletReady)), last_err_msg_(""), - ref_count_(1), db_ref_count_(0), db_(NULL), + ref_count_(1), + db_ref_count_(0), + db_(NULL), m_memory_cache(NULL), kv_only_(false), key_operator_(NULL), try_unload_count_(0), counter_(short_path_), - mock_env_(NULL) { -} + mock_env_(NULL) {} TabletIO::~TabletIO() { - if (status_ != kNotInit && !Unload()) { - if (async_writer_ != NULL) { - async_writer_->Stop(); - delete async_writer_; - async_writer_ = NULL; - } - delete db_; + if (status_ != kNotInit && !Unload()) { + if (async_writer_ != NULL) { + async_writer_->Stop(); + delete async_writer_; + async_writer_ = NULL; } + delete db_; + } } -void TabletIO::SetMockEnv(leveldb::Env* e) { - mock_env_ = e; -} +void TabletIO::SetMockEnv(leveldb::Env* e) { mock_env_ = e; } std::string TabletIO::GetTableName() const { - MutexLock lock(&schema_mutex_); - return table_schema_.name(); + MutexLock lock(&schema_mutex_); + return table_schema_.name(); } std::string TabletIO::GetTablePath() const { - if (!tablet_path_.empty()) { - std::string path = - tablet_path_.substr(FLAGS_tera_tabletnode_path_prefix.size()); - if (path.at(0) == '/') { - path = path.substr(1); - } - return path; - } else { - return tablet_path_; + if (!tablet_path_.empty()) { + std::string path = tablet_path_.substr(FLAGS_tera_tabletnode_path_prefix.size()); + if (path.at(0) == '/') { + path = path.substr(1); } + return path; + } else { + return tablet_path_; + } } -std::string TabletIO::GetStartKey() const { - return start_key_; -} +std::string TabletIO::GetStartKey() const { return start_key_; } -std::string TabletIO::GetEndKey() const { - return end_key_; -} +std::string TabletIO::GetEndKey() const { return end_key_; } -const std::string& TabletIO::GetMetricLabel() const { - return counter_.label; -} +const std::string& TabletIO::GetMetricLabel() const { return counter_.label; } -CompactStatus TabletIO::GetCompactStatus() const { - return compact_status_; -} +CompactStatus TabletIO::GetCompactStatus() const { return compact_status_; } -void TabletIO::SetSchema(const TableSchema& schema) { - table_schema_.CopyFrom(schema); -} +void TabletIO::SetSchema(const TableSchema& schema) { table_schema_.CopyFrom(schema); } TableSchema TabletIO::GetSchema() const { - MutexLock lock(&schema_mutex_); - return table_schema_; + MutexLock lock(&schema_mutex_); + return table_schema_; } RawKey TabletIO::RawKeyType() const { - MutexLock lock(&schema_mutex_); - return table_schema_.raw_key(); + MutexLock lock(&schema_mutex_); + return table_schema_.raw_key(); }; -TabletIO::StatCounter& TabletIO::GetCounter() { - return counter_; -} +TabletIO::StatCounter& TabletIO::GetCounter() { return counter_; } -void TabletIO::SetMemoryCache(leveldb::Cache* cache) { - m_memory_cache = cache; -} +void TabletIO::SetMemoryCache(leveldb::Cache* cache) { m_memory_cache = cache; } -bool TabletIO::Load(const TableSchema& schema, - const std::string& path, +bool TabletIO::Load(const TableSchema& schema, const std::string& path, const std::vector& parent_tablets, - const std::set& ignore_err_lgs, - leveldb::Logger* logger, - leveldb::Cache* block_cache, - leveldb::TableCache* table_cache, + const std::set& ignore_err_lgs, leveldb::Logger* logger, + leveldb::Cache* block_cache, leveldb::TableCache* table_cache, StatusCode* status) { - { - MutexLock lock(&mutex_); - if (status_ == kReady) { - return true; - } else if (status_ != kNotInit) { - SetStatusCode(status_, status); - return false; - } - status_ = kOnLoad; - db_ref_count_++; - } - - // any type of table should have at least 1lg+1cf. - table_schema_.CopyFrom(schema); - if (table_schema_.locality_groups_size() == 0) { - // only prepare for kv-only mode, no need to set fields of it. - table_schema_.add_locality_groups(); - } - - RawKey raw_key = table_schema_.raw_key(); - if (raw_key == TTLKv || raw_key == GeneralKv) { - kv_only_ = true; - } else { - // for compatible - if (table_schema_.column_families_size() == 0) { - // only prepare for kv-only mode, no need to set fields of it. - table_schema_.add_column_families(); - kv_only_ = true; - } else { - kv_only_ = table_schema_.kv_only(); - } - } - - key_operator_ = GetRawKeyOperatorFromSchema(table_schema_); - // [raw_start_key_, raw_end_key_) - raw_start_key_ = start_key_; - if (!kv_only_ && !start_key_.empty()) { - key_operator_->EncodeTeraKey(start_key_, "", "", kLatestTs, - leveldb::TKT_FORSEEK, &raw_start_key_); - } else if (kv_only_ && table_schema_.raw_key() == TTLKv && !start_key_.empty()) { - key_operator_->EncodeTeraKey(start_key_, "", "", 0, leveldb::TKT_FORSEEK, &raw_start_key_); - } - raw_end_key_ = end_key_; - if (!kv_only_ && !end_key_.empty()) { - key_operator_->EncodeTeraKey(end_key_, "", "", kLatestTs, - leveldb::TKT_FORSEEK, &raw_end_key_); - } else if (kv_only_ && table_schema_.raw_key() == TTLKv && !end_key_.empty()) { - key_operator_->EncodeTeraKey(end_key_, "", "", 0, leveldb::TKT_FORSEEK, &raw_end_key_); - } - - ldb_options_.key_start = raw_start_key_; - ldb_options_.key_end = raw_end_key_; - ldb_options_.l0_slowdown_writes_trigger = FLAGS_tera_tablet_level0_file_limit; - ldb_options_.max_sub_parallel_compaction = FLAGS_tera_tablet_max_sub_parallel_compaction; - ldb_options_.ttl_percentage = FLAGS_tera_tablet_ttl_percentage; - ldb_options_.del_percentage = FLAGS_tera_tablet_del_percentage; - ldb_options_.block_size = FLAGS_tera_tablet_write_block_size * 1024; - ldb_options_.max_block_log_number = FLAGS_tera_tablet_max_block_log_number; - ldb_options_.write_log_time_out = FLAGS_tera_tablet_write_log_time_out; - ldb_options_.log_async_mode = FLAGS_tera_log_async_mode; - ldb_options_.info_log = logger; - ldb_options_.max_open_files = FLAGS_tera_memenv_table_cache_size; - ldb_options_.max_background_compactions = FLAGS_tera_leveldb_max_background_compactions; - ldb_options_.slow_down_level0_score_limit = FLAGS_tera_leveldb_slow_down_level0_score_limit; - ldb_options_.ignore_corruption_in_open = FLAGS_tera_leveldb_ignore_corruption_in_open; - - ldb_options_.use_memtable_on_leveldb = FLAGS_tera_tablet_use_memtable_on_leveldb; - ldb_options_.memtable_ldb_write_buffer_size = - FLAGS_tera_tablet_memtable_ldb_write_buffer_size * 1024; - ldb_options_.memtable_ldb_block_size = FLAGS_tera_tablet_memtable_ldb_block_size * 1024; - if (FLAGS_tera_tablet_use_memtable_on_leveldb) { - LOG(INFO) << "enable mem-ldb for this tablet-server:" - << " buffer_size:" << ldb_options_.memtable_ldb_write_buffer_size - << ", block_size:" << ldb_options_.memtable_ldb_block_size; - } - - if (kv_only_ && table_schema_.raw_key() == TTLKv) { - ldb_options_.filter_policy = leveldb::NewTTLKvBloomFilterPolicy(10); - } else if (kv_only_) { - ldb_options_.filter_policy = leveldb::NewBloomFilterPolicy(10); - } else if (table_schema_.raw_key() == Readable) { - ldb_options_.filter_policy = - leveldb::NewRowKeyBloomFilterPolicy(10, leveldb::ReadableRawKeyOperator()); + { + MutexLock lock(&mutex_); + if (status_ == kReady) { + return true; + } else if (status_ != kNotInit) { + SetStatusCode(status_, status); + return false; + } + status_ = kOnLoad; + db_ref_count_++; + } + + // any type of table should have at least 1lg+1cf. + table_schema_.CopyFrom(schema); + if (table_schema_.locality_groups_size() == 0) { + // only prepare for kv-only mode, no need to set fields of it. + table_schema_.add_locality_groups(); + } + + RawKey raw_key = table_schema_.raw_key(); + if (raw_key == TTLKv || raw_key == GeneralKv) { + kv_only_ = true; + } else { + // for compatible + if (table_schema_.column_families_size() == 0) { + // only prepare for kv-only mode, no need to set fields of it. + table_schema_.add_column_families(); + kv_only_ = true; } else { - CHECK_EQ(table_schema_.raw_key(), Binary); - ldb_options_.filter_policy = - leveldb::NewRowKeyBloomFilterPolicy(10, leveldb::BinaryRawKeyOperator()); - } - ldb_options_.block_cache = block_cache; - ldb_options_.table_cache = table_cache; - ldb_options_.flush_triggered_log_num = FLAGS_tera_tablet_flush_log_num; - ldb_options_.log_file_size = FLAGS_tera_tablet_log_file_size * 1024 * 1024; - ldb_options_.parent_tablets = parent_tablets; - if (table_schema_.raw_key() == Binary) { - ldb_options_.raw_key_format = leveldb::kBinary; - ldb_options_.comparator = leveldb::TeraBinaryComparator(); - } else if (table_schema_.raw_key() == TTLKv) { // KV-Pair-With-TTL - ldb_options_.raw_key_format = leveldb::kTTLKv; - ldb_options_.comparator = leveldb::TeraTTLKvComparator(); - ldb_options_.enable_strategy_when_get = true; // active usage of strategy in DB::Get - } else { // Readable-Table && KV-Pair-Without-TTL - ldb_options_.raw_key_format = leveldb::kReadable; - ldb_options_.comparator = leveldb::BytewiseComparator(); - } - ldb_options_.verify_checksums_in_compaction = FLAGS_tera_leveldb_verify_checksums; - ldb_options_.ignore_corruption_in_compaction = FLAGS_tera_leveldb_ignore_corruption_in_compaction; - ldb_options_.use_file_lock = FLAGS_tera_leveldb_use_file_lock; - ldb_options_.disable_wal = table_schema_.disable_wal(); - SetupOptionsForLG(ignore_err_lgs); - - std::string path_prefix = FLAGS_tera_tabletnode_path_prefix; - if (*path_prefix.rbegin() != '/') { - path_prefix.push_back('/'); - } - - tablet_path_ = path_prefix + path; - LOG(INFO) << "[Load] Start Open " << tablet_path_ - << ", kv_only " << kv_only_ << ", raw_key_operator " << key_operator_->Name(); - - leveldb::Status db_status = leveldb::DB::Open(ldb_options_, tablet_path_, &db_); - - if (!db_status.ok()) { - LOG(ERROR) << "fail to open table: " << tablet_path_ - << ", " << db_status.ToString(); - { - MutexLock lock(&mutex_); - status_ = kNotInit; - last_err_msg_ = db_status.ToString(); - db_ref_count_--; - } - SetStatusCode(db_status, status); -// delete ldb_options_.env; - return false; + kv_only_ = table_schema_.kv_only(); + } + } + + if (kv_only_) { + ldb_options_.memtable_shard_num = 0; + } else { + ldb_options_.memtable_shard_num = FLAGS_tera_leveldb_memtable_shard_num; + } + + key_operator_ = GetRawKeyOperatorFromSchema(table_schema_); + // [raw_start_key_, raw_end_key_) + raw_start_key_ = start_key_; + if (!kv_only_ && !start_key_.empty()) { + key_operator_->EncodeTeraKey(start_key_, "", "", kLatestTs, leveldb::TKT_FORSEEK, + &raw_start_key_); + } else if (kv_only_ && table_schema_.raw_key() == TTLKv && !start_key_.empty()) { + key_operator_->EncodeTeraKey(start_key_, "", "", 0, leveldb::TKT_FORSEEK, &raw_start_key_); + } + raw_end_key_ = end_key_; + if (!kv_only_ && !end_key_.empty()) { + key_operator_->EncodeTeraKey(end_key_, "", "", kLatestTs, leveldb::TKT_FORSEEK, &raw_end_key_); + } else if (kv_only_ && table_schema_.raw_key() == TTLKv && !end_key_.empty()) { + key_operator_->EncodeTeraKey(end_key_, "", "", 0, leveldb::TKT_FORSEEK, &raw_end_key_); + } + + ldb_options_.key_start = raw_start_key_; + ldb_options_.key_end = raw_end_key_; + ldb_options_.l0_slowdown_writes_trigger = FLAGS_tera_tablet_level0_file_limit; + ldb_options_.max_sub_parallel_compaction = FLAGS_tera_tablet_max_sub_parallel_compaction; + ldb_options_.ttl_percentage = FLAGS_tera_tablet_ttl_percentage; + ldb_options_.del_percentage = FLAGS_tera_tablet_del_percentage; + ldb_options_.block_size = FLAGS_tera_tablet_write_block_size * 1024; + ldb_options_.max_block_log_number = FLAGS_tera_tablet_max_block_log_number; + ldb_options_.write_log_time_out = FLAGS_tera_tablet_write_log_time_out; + ldb_options_.log_async_mode = FLAGS_tera_log_async_mode; + ldb_options_.info_log = logger; + ldb_options_.max_open_files = FLAGS_tera_memenv_table_cache_size; + ldb_options_.manifest_switch_size = FLAGS_tera_leveldb_manifest_switch_size_MB; + ldb_options_.max_background_compactions = FLAGS_tera_leveldb_max_background_compactions; + ldb_options_.slow_down_level0_score_limit = FLAGS_tera_leveldb_slow_down_level0_score_limit; + ldb_options_.ignore_corruption_in_open = FLAGS_tera_leveldb_ignore_corruption_in_open; + + ldb_options_.use_memtable_on_leveldb = FLAGS_tera_tablet_use_memtable_on_leveldb; + ldb_options_.memtable_ldb_write_buffer_size = + FLAGS_tera_tablet_memtable_ldb_write_buffer_size * 1024; + ldb_options_.memtable_ldb_block_size = FLAGS_tera_tablet_memtable_ldb_block_size * 1024; + if (FLAGS_tera_tablet_use_memtable_on_leveldb) { + LOG(INFO) << "enable mem-ldb for this tablet-server:" + << " buffer_size:" << ldb_options_.memtable_ldb_write_buffer_size + << ", block_size:" << ldb_options_.memtable_ldb_block_size; + } + + uint32_t bloom_filter_bits_per_key = table_schema_.has_bloom_filter_bits_per_key() + ? table_schema_.bloom_filter_bits_per_key() + : 10; + + LOG(INFO) << "Use " << bloom_filter_bits_per_key << " bits per key for bloom filter."; + + if (kv_only_ && table_schema_.raw_key() == TTLKv) { + ldb_options_.filter_policy = leveldb::NewTTLKvBloomFilterPolicy(bloom_filter_bits_per_key); + } else if (kv_only_) { + ldb_options_.filter_policy = leveldb::NewBloomFilterPolicy(bloom_filter_bits_per_key); + } else if (table_schema_.raw_key() == Readable) { + ldb_options_.filter_policy = leveldb::NewRowKeyBloomFilterPolicy( + bloom_filter_bits_per_key, leveldb::ReadableRawKeyOperator()); + } else { + CHECK_EQ(table_schema_.raw_key(), Binary); + ldb_options_.filter_policy = leveldb::NewRowKeyBloomFilterPolicy( + bloom_filter_bits_per_key, leveldb::BinaryRawKeyOperator()); + } + ldb_options_.block_cache = block_cache; + ldb_options_.table_cache = table_cache; + ldb_options_.flush_triggered_log_num = FLAGS_tera_tablet_flush_log_num; + ldb_options_.log_file_size = FLAGS_tera_tablet_log_file_size * 1024 * 1024; + ldb_options_.parent_tablets = parent_tablets; + if (table_schema_.raw_key() == Binary) { + ldb_options_.raw_key_format = leveldb::kBinary; + ldb_options_.comparator = leveldb::TeraBinaryComparator(); + } else if (table_schema_.raw_key() == TTLKv) { // KV-Pair-With-TTL + ldb_options_.raw_key_format = leveldb::kTTLKv; + ldb_options_.comparator = leveldb::TeraTTLKvComparator(); + ldb_options_.enable_strategy_when_get = true; // active usage of strategy in DB::Get + } else { // Readable-Table && KV-Pair-Without-TTL + ldb_options_.raw_key_format = leveldb::kReadable; + ldb_options_.comparator = leveldb::BytewiseComparator(); + } + ldb_options_.verify_checksums_in_compaction = FLAGS_tera_leveldb_verify_checksums; + ldb_options_.ignore_corruption_in_compaction = FLAGS_tera_leveldb_ignore_corruption_in_compaction; + ldb_options_.use_file_lock = FLAGS_tera_leveldb_use_file_lock; + ldb_options_.disable_wal = table_schema_.disable_wal(); + SetupOptionsForLG(ignore_err_lgs); + + std::string path_prefix = FLAGS_tera_tabletnode_path_prefix; + if (*path_prefix.rbegin() != '/') { + path_prefix.push_back('/'); + } + + tablet_path_ = path_prefix + path; + ldb_options_.dfs_storage_path_prefix = path_prefix; + LOG(INFO) << "[Load] Start Open " << tablet_path_ << ", kv_only " << kv_only_ + << ", raw_key_operator " << key_operator_->Name(); + + leveldb::Status db_status = leveldb::DB::Open(ldb_options_, tablet_path_, &db_); + + if (!db_status.ok()) { + LOG(ERROR) << "fail to open table: " << tablet_path_ << ", " << db_status.ToString(); + { + MutexLock lock(&mutex_); + status_ = kNotInit; + last_err_msg_ = db_status.ToString(); + db_ref_count_--; } + SetStatusCode(db_status, status); + // delete ldb_options_.env; + return false; + } - async_writer_ = new TabletWriter(this); - async_writer_->Start(); + async_writer_ = new TabletWriter(this); + async_writer_->Start(); - scan_context_manager_ = new ScanContextManager; + scan_context_manager_ = new ScanContextManager; - { - MutexLock lock(&mutex_); - status_ = kReady; - //reset try unload count to 0 for ready - try_unload_count_ = 0; - db_ref_count_--; - } + { + MutexLock lock(&mutex_); + status_ = kReady; + // reset try unload count to 0 for ready + try_unload_count_ = 0; + db_ref_count_--; + } - LOG(INFO) << "[Load] Load " << tablet_path_ << " done"; - return true; + LOG(INFO) << "[Load] Load " << tablet_path_ << " done"; + return true; } bool TabletIO::ShouldForceUnloadOnError() { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return false; - } - db_ref_count_++; - } - // If TabletIO is Ready but has encountered some fatal errors - bool ret = db_->ShouldForceUnloadOnError(); - { - MutexLock lock(&mutex_); - db_ref_count_--; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; } - return ret; + db_ref_count_++; + } + // If TabletIO is Ready but has encountered some fatal errors + bool ret = db_->ShouldForceUnloadOnError(); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return ret; } bool TabletIO::Unload(StatusCode* status) { - { - MutexLock lock(&mutex_); - // inc try unload times - ++try_unload_count_; - LOG(INFO) << "tablet " << tablet_path_ - << " unload try times:" << try_unload_count_; - if (status_ != kReady) { - SetStatusCode(status_, status); - return false; - } - status_ = kUnLoading; - db_ref_count_++; - } - - LOG(INFO) << "[Unload] start shutdown1 " << tablet_path_; - leveldb::Status s = db_->Shutdown1(); - { - MutexLock lock(&mutex_); - status_ = kUnLoading2; - } - - uint32_t retry = 0; - while (db_ref_count_ > 1) { - LOG(ERROR) << "tablet is busy, db ref: " << db_ref_count_ - << ", try again unload: " << retry++ << " " << tablet_path_; - ThisThread::Sleep(FLAGS_tera_io_retry_period); - } - - LOG(INFO) << "[Unload] stop async writer " << tablet_path_; - async_writer_->Stop(); - delete async_writer_; - async_writer_ = NULL; - - if (s.ok()) { - LOG(INFO) << "[Unload] start shutdown2 " << tablet_path_; - db_->Shutdown2(); - } else { - LOG(INFO) << "[Unload] shutdown1 failed, keep log " << tablet_path_; + { + MutexLock lock(&mutex_); + // inc try unload times + ++try_unload_count_; + LOG(INFO) << "tablet " << tablet_path_ << " unload try times:" << try_unload_count_; + if (status_ != kReady) { + SetStatusCode(status_, status); + return false; } + status_ = kUnloading; + db_ref_count_++; + } - delete scan_context_manager_; - delete db_; - db_ = NULL; - - delete ldb_options_.filter_policy; - TearDownOptionsForLG(); - LOG(INFO) << "[Unload] done " << tablet_path_; - - { - MutexLock lock(&mutex_); - status_ = kNotInit; - db_ref_count_--; - } - return true; + LOG(INFO) << "[Unload] start shutdown1 " << tablet_path_; + leveldb::Status s = db_->Shutdown1(); + { + MutexLock lock(&mutex_); + status_ = kUnloading2; + } + + uint32_t retry = 0; + while (db_ref_count_ > 1) { + LOG(ERROR) << "tablet is busy, db ref: " << db_ref_count_ << ", try again unload: " << retry++ + << " " << tablet_path_; + ThisThread::Sleep(FLAGS_tera_io_retry_period); + } + + LOG(INFO) << "[Unload] stop async writer " << tablet_path_; + async_writer_->Stop(); + delete async_writer_; + async_writer_ = NULL; + + if (s.ok()) { + LOG(INFO) << "[Unload] start shutdown2 " << tablet_path_; + db_->Shutdown2(); + } else { + LOG(INFO) << "[Unload] shutdown1 failed, keep log " << tablet_path_; + } + + delete scan_context_manager_; + delete db_; + db_ = NULL; + + delete ldb_options_.filter_policy; + TearDownOptionsForLG(); + LOG(INFO) << "[Unload] done " << tablet_path_; + + { + MutexLock lock(&mutex_); + status_ = kNotInit; + db_ref_count_--; + } + return true; } // Find average string from input string // E.g. "abc" & "abe" return "abd" // "a" & "b" return "a_" -bool TabletIO::FindAverageKey(const std::string& start, const std::string& end, - std::string* res) { - std::string s = start; - std::string e = end; - if (e == "") { - // make sure end > start - e.resize(s.size() + 1, '\xFF'); - } - CHECK(s < e); - int max_len = s.size() > e.size() ? s.size() : e.size(); - max_len++; // max_len should be >0 - s.resize(max_len, '\x00'); - e.resize(max_len, '\x00'); - if (s == e) { - // find failed, e.g. s == "a" && e == "a\0" - return false; - } - - // algorithm: use big number ADD and division - unsigned int carry[max_len + 1]; - unsigned int sum[max_len]; - carry[max_len] = 0; - for (int i = max_len - 1; i >= 0; --i) { - sum[i] = (unsigned char)s[i] + (unsigned char)e[i] + carry[i + 1]; - carry[i] = sum[i] / 256; - sum[i] %= 256; - } - memset((char*)carry + sizeof(int), '\0', (max_len) * sizeof(int)); - for (int i = 0; i < max_len; ++i) { - carry[i + 1] = (sum[i] + carry[i] * 256) % 2; - sum[i] = (sum[i] + carry[i] * 256) / 2; - } - std::string ave_key; - for (int i = 0; i < max_len; ++i) { - ave_key.append(1, char(sum[i])); - if (ave_key > start && (end == "" || ave_key < end)) { - break; - } - } - CHECK(ave_key > start && (end == "" || ave_key < end)); - *res = ave_key; - return true; +bool TabletIO::FindAverageKey(const std::string& start, const std::string& end, std::string* res) { + std::string s = start; + std::string e = end; + if (e == "") { + // make sure end > start + e.resize(s.size() + 1, '\xFF'); + } + CHECK(s < e); + int max_len = s.size() > e.size() ? s.size() : e.size(); + max_len++; // max_len should be >0 + s.resize(max_len, '\x00'); + e.resize(max_len, '\x00'); + if (s == e) { + // find failed, e.g. s == "a" && e == "a\0" + return false; + } + + // algorithm: use big number ADD and division + unsigned int carry[max_len + 1]; + unsigned int sum[max_len]; + carry[max_len] = 0; + for (int i = max_len - 1; i >= 0; --i) { + sum[i] = (unsigned char)s[i] + (unsigned char)e[i] + carry[i + 1]; + carry[i] = sum[i] / 256; + sum[i] %= 256; + } + memset((char*)carry + sizeof(int), '\0', (max_len) * sizeof(int)); + for (int i = 0; i < max_len; ++i) { + carry[i + 1] = (sum[i] + carry[i] * 256) % 2; + sum[i] = (sum[i] + carry[i] * 256) / 2; + } + std::string ave_key; + for (int i = 0; i < max_len; ++i) { + ave_key.append(1, char(sum[i])); + if (ave_key > start && (end == "" || ave_key < end)) { + break; + } + } + CHECK(ave_key > start && (end == "" || ave_key < end)); + *res = ave_key; + return true; } bool TabletIO::ParseRowKey(const std::string& tera_key, std::string* row_key) { - leveldb::Slice row; - if ((RawKeyType() == GeneralKv) - || (kv_only_ && RawKeyType() == Readable)) { - row = tera_key; - } else { // Table && TTL-KV - if (!key_operator_->ExtractTeraKey(tera_key, &row, - NULL, NULL, NULL, NULL)) { - VLOG(5) << "fail to extract split key"; - return false; - } + leveldb::Slice row; + if ((RawKeyType() == GeneralKv) || (kv_only_ && RawKeyType() == Readable)) { + row = tera_key; + } else { // Table && TTL-KV + if (!key_operator_->ExtractTeraKey(tera_key, &row, NULL, NULL, NULL, NULL)) { + VLOG(5) << "fail to extract split key"; + return false; } - *row_key = row.ToString(); - return true; + } + *row_key = row.ToString(); + return true; } bool TabletIO::Split(std::string* split_key, StatusCode* status) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - SetStatusCode(status_, status); - return false; - } - if (compact_status_ == kTableOnCompact) { - SetStatusCode(kTableNotSupport, status); - return false; - } - db_ref_count_++; - } - - if (split_key->empty()) { - std::string raw_split_key; - if (db_->FindSplitKey(0.5, &raw_split_key)) { - ParseRowKey(raw_split_key, split_key); - } - - if (split_key->empty() || *split_key == end_key_) { - // could not find split_key, try calc average key - std::string smallest_key, largest_key; - CHECK(db_->FindKeyRange(&smallest_key, &largest_key)); - - std::string srow_key, lrow_key; - if (!smallest_key.empty()) { - ParseRowKey(smallest_key, &srow_key); - } else { - srow_key = start_key_; - } - if (!largest_key.empty()) { - ParseRowKey(largest_key, &lrow_key); - } else { - lrow_key = end_key_; - } - FindAverageKey(srow_key, lrow_key, split_key); - } - } - { - MutexLock lock(&mutex_); - db_ref_count_--; - } + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + SetStatusCode(status_, status); + return false; + } + if (compact_status_ == kTableOnCompact) { + SetStatusCode(kTableNotSupport, status); + return false; + } + db_ref_count_++; + } + + if (split_key->empty()) { + std::string raw_split_key; + if (db_->FindSplitKey(0.5, &raw_split_key)) { + ParseRowKey(raw_split_key, split_key); + } + + if (split_key->empty() || *split_key == end_key_) { + // could not find split_key, try calc average key + std::string smallest_key, largest_key; + CHECK(db_->FindKeyRange(&smallest_key, &largest_key)); + + std::string srow_key, lrow_key; + if (!smallest_key.empty()) { + ParseRowKey(smallest_key, &srow_key); + } else { + srow_key = start_key_; + } + if (!largest_key.empty()) { + ParseRowKey(largest_key, &lrow_key); + } else { + lrow_key = end_key_; + } + FindAverageKey(srow_key, lrow_key, split_key); + } + } + { + MutexLock lock(&mutex_); + db_ref_count_--; + } - VLOG(5) << "start: [" << DebugString(start_key_) - << "], end: [" << DebugString(end_key_) - << "], split: [" << DebugString(*split_key) << "]"; + VLOG(5) << "start: [" << DebugString(start_key_) << "], end: [" << DebugString(end_key_) + << "], split: [" << DebugString(*split_key) << "]"; - if (*split_key != "" - && *split_key > start_key_ - && (end_key_ == "" || *split_key < end_key_)) { - return true; - } else { - SetStatusCode(kTableNotSupport, status); - return false; - } + if (*split_key != "" && *split_key > start_key_ && (end_key_ == "" || *split_key < end_key_)) { + return true; + } else { + SetStatusCode(kTableNotSupport, status); + return false; + } } bool TabletIO::Compact(int lg_no, StatusCode* status, CompactionType type) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - SetStatusCode(status_, status); - return false; - } - if (compact_status_ == kTableOnCompact) { - return false; - } - compact_status_ = kTableOnCompact; - db_ref_count_++; - } - CHECK_NOTNULL(db_); - if (type == kManualCompaction) { - db_->CompactRange(NULL, NULL, lg_no); - } else if (type == kMinorCompaction) { - db_->MinorCompact(); - } - - { - MutexLock lock(&mutex_); - compact_status_ = kTableCompacted; - db_ref_count_--; - } - return true; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + SetStatusCode(status_, status); + return false; + } + if (compact_status_ == kTableOnCompact) { + return false; + } + compact_status_ = kTableOnCompact; + db_ref_count_++; + } + CHECK_NOTNULL(db_); + if (type == kManualCompaction) { + db_->CompactRange(NULL, NULL, lg_no); + } else if (type == kMinorCompaction) { + db_->MinorCompact(); + } + + { + MutexLock lock(&mutex_); + compact_status_ = kTableCompacted; + db_ref_count_--; + } + return true; } bool TabletIO::AddInheritedLiveFiles(std::vector >* live) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - LOG(INFO) << "[gc] tablet not ready, skip it."; - return false; - } - db_ref_count_++; - } - { - MutexLock lock(&schema_mutex_); - if (live->size() == 0) { - live->resize(table_schema_.locality_groups_size()); - } else { - CHECK(live->size() == static_cast(table_schema_.locality_groups_size())); - } + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + LOG(INFO) << "[gc] tablet not ready, skip it."; + return false; } - db_->AddInheritedLiveFiles(live); - { - MutexLock lock(&mutex_); - db_ref_count_--; + db_ref_count_++; + } + { + MutexLock lock(&schema_mutex_); + if (live->size() == 0) { + live->resize(table_schema_.locality_groups_size()); + } else { + CHECK(live->size() == static_cast(table_schema_.locality_groups_size())); } - return true; + } + db_->AddInheritedLiveFiles(live); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return true; } bool TabletIO::IsBusy() { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return false; - } - db_ref_count_++; - } - bool is_busy = db_->BusyWrite(); - is_busy = is_busy ? true : async_writer_->IsBusy(); - { - MutexLock lock(&mutex_); - db_ref_count_--; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; } - return is_busy; + db_ref_count_++; + } + bool is_busy = db_->BusyWrite(); + is_busy = is_busy ? true : async_writer_->IsBusy(); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return is_busy; } bool TabletIO::Workload(double* write_workload) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return false; - } - db_ref_count_++; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; } + db_ref_count_++; + } - // if busy cause by write log, set workload score more than 10, because level 0 - // limits to 20 sst files by default, which score is 10. - db_->Workload(write_workload); - if (*write_workload < 10.618 && async_writer_->IsBusy()) { - *write_workload = 10.618; - } + // if busy cause by write log, set workload score more than 10, because level + // 0 + // limits to 20 sst files by default, which score is 10. + db_->Workload(write_workload); + if (*write_workload < 10.618 && async_writer_->IsBusy()) { + *write_workload = 10.618; + } - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return true; + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return true; } bool TabletIO::SnapshotIDToSeq(uint64_t snapshot_id, uint64_t* snapshot_sequence) { - std::map::iterator it = id_to_snapshot_num_.find(snapshot_id); - if (it == id_to_snapshot_num_.end()) { - return false; - } - *snapshot_sequence = it->second; - return true; + std::map::iterator it = id_to_snapshot_num_.find(snapshot_id); + if (it == id_to_snapshot_num_.end()) { + return false; + } + *snapshot_sequence = it->second; + return true; } -bool TabletIO::GetDataSize(uint64_t* size, std::vector* lgsize, +bool TabletIO::GetDataSize(uint64_t* size, std::vector* lgsize, uint64_t* mem_table_size, StatusCode* status) { - { - MutexLock lock(&mutex_); - if ((status_ != kReady && status_ != kUnLoading) || IsUrgentUnload()) { - SetStatusCode(status_, status); - return false; - } - db_ref_count_++; - } - - db_->GetApproximateSizes(size, lgsize); - VLOG(10) << "GetDataSize(" << tablet_path_ << ") : " << *size; - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - if (size && *size == 0) { - // return reserved buffer size - *size = FLAGS_tera_tablet_write_block_size * 1024; + { + MutexLock lock(&mutex_); + if ((status_ != kReady && status_ != kUnloading) || IsUrgentUnload()) { + SetStatusCode(status_, status); + return false; } - return true; -} + db_ref_count_++; + } -bool TabletIO::Read(const leveldb::Slice& key, std::string* value, - uint64_t snapshot_id, StatusCode* status) { - CHECK_NOTNULL(db_); - leveldb::ReadOptions read_option(&ldb_options_); - read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; - if (snapshot_id != 0) { - if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { - *status = kSnapshotNotExist; - return false; - } - } - read_option.rollbacks = rollbacks_; - leveldb::Status db_status = db_->Get(read_option, key, value); - if (!db_status.ok()) { - // LOG(ERROR) << "fail to read value for key: " << key.data() - // << " from tablet: " << tablet_path_; - SetStatusCode(db_status, status); - return false; - } - return true; + db_->GetApproximateSizes(size, lgsize, mem_table_size); + VLOG(10) << "GetDataSize(" << tablet_path_ << ") : " << *size; + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + if (*size == 0) { + // return reserved buffer size + *size = FLAGS_tera_tablet_write_block_size * 1024; + } + return true; } -StatusCode TabletIO::InitedScanIterator(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - leveldb::Iterator** scan_it) { - leveldb::Slice start_key, start_col, start_qual; - key_operator_->ExtractTeraKey(start_tera_key, &start_key, &start_col, - &start_qual, NULL, NULL); - - leveldb::ReadOptions read_option(&ldb_options_); - read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; - SetupIteratorOptions(scan_options, &read_option); - uint64_t snapshot_id = scan_options.snapshot_id; - if (snapshot_id != 0) { - if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { - TearDownIteratorOptions(&read_option); - return kSnapshotNotExist; - } - } - read_option.rollbacks = rollbacks_; - // single row scan +bool TabletIO::Read(const leveldb::Slice& key, std::string* value, uint64_t snapshot_id, + StatusCode* status) { + CHECK_NOTNULL(db_); + leveldb::ReadOptions read_option(&ldb_options_); + read_option.enable_dfs_read_thread_limiter = FLAGS_enable_dfs_read_thread_limiter; + read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; + if (snapshot_id != 0) { + if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { + *status = kSnapshotNotExist; + return false; + } + } + read_option.rollbacks = rollbacks_; + leveldb::Status db_status = db_->Get(read_option, key, value); + if (!db_status.ok()) { + // LOG(ERROR) << "fail to read value for key: " << key.data() + // << " from tablet: " << tablet_path_; + SetStatusCode(db_status, status); + return false; + } + return true; +} + +StatusCode TabletIO::InitScanIterator(const std::string& start_tera_key, + const std::string& end_row_key, + const ScanOptions& scan_options, + leveldb::Iterator** scan_it) { + leveldb::ReadOptions read_option(&ldb_options_); + read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; + SetupIteratorOptions(scan_options, &read_option); + uint64_t snapshot_id = scan_options.snapshot_id; + if (snapshot_id != 0) { + if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { + TearDownIteratorOptions(&read_option); + return kSnapshotNotExist; + } + } + + leveldb::Slice start_key, start_col, start_qual; + if (!kv_only_) { + key_operator_->ExtractTeraKey(start_tera_key, &start_key, &start_col, &start_qual, NULL, NULL); + // single row scan optimization for table + // Readcell branch: end_row_key just check here, not used for scan if (start_key.ToString() + '\0' == end_row_key) { - SetupSingleRowIteratorOptions(start_key.ToString(), &read_option); - } else if (scan_options.is_batch_scan == true) { - read_option.prefetch_scan = true; - read_option.prefetch_scan_size = FLAGS_tera_tabletnode_prefetch_scan_size; - } - - *scan_it = db_->NewIterator(read_option); - TearDownIteratorOptions(&read_option); - - if ((*scan_it)->status().IsShutdownInProgress()) { - TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and return kKeyNotInRange"; - return kKeyNotInRange; - } - - VLOG(10) << "ll-scan: " << "startkey=[" << DebugString(start_key.ToString()) << ":" - << DebugString(start_col.ToString()) << ":" << DebugString(start_qual.ToString()); - std::string start_seek_key; - key_operator_->EncodeTeraKey(start_key.ToString(), "", "", kLatestTs, - leveldb::TKT_FORSEEK, &start_seek_key); - (*scan_it)->Seek(start_seek_key); - - return kTabletNodeOk; -} - -bool TabletIO::LowLevelScan(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - RowResult* value_list, - KeyValuePair* next_start_point, - uint32_t* read_row_count, - uint32_t* read_bytes, - bool* is_complete, + SetupSingleRowIteratorOptions(start_key.ToString(), &read_option); + } + } + read_option.rollbacks = rollbacks_; + // just batchscan need prefetch + if (scan_options.is_batch_scan) { + read_option.prefetch_scan = true; + read_option.prefetch_scan_size = FLAGS_tera_tabletnode_prefetch_scan_size; + } + + *scan_it = db_->NewIterator(read_option); + TearDownIteratorOptions(&read_option); + + if ((*scan_it)->status().IsShutdownInProgress()) { + TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and " + "return kKeyNotInRange"; + return kKeyNotInRange; + } + + std::string start_seek_key; + if (kv_only_) { + start_seek_key = start_tera_key; // start_tera_key is seek key trans by SetupScanKey() + } else { + key_operator_->EncodeTeraKey(start_key.ToString(), "", "", kLatestTs, leveldb::TKT_FORSEEK, + &start_seek_key); + } + (*scan_it)->Seek(start_seek_key); + + return kTabletNodeOk; +} + +bool TabletIO::LowLevelScan(const std::string& start_tera_key, const std::string& end_row_key, + const ScanOptions& scan_options, RowResult* values, + KeyValuePair* next_start_point, uint32_t* read_row_count, + uint32_t* read_cell_count, uint32_t* read_bytes, bool* complete, StatusCode* status) { - leveldb::Iterator* it = NULL; - StatusCode ret_code = InitedScanIterator(start_tera_key, end_row_key, scan_options, &it); - if (ret_code != kTabletNodeOk) { - SetStatusCode(ret_code, status); - return false; - } + leveldb::Iterator* it = NULL; + StatusCode ret_code = InitScanIterator(start_tera_key, end_row_key, scan_options, &it); + if (ret_code != kTabletNodeOk) { + SetStatusCode(ret_code, status); + return false; + } - ScanContext* context = new ScanContext; - context->compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); - context->version_num = 1; - context->qu_num = 1; - bool ret = LowLevelScan(start_tera_key, end_row_key, scan_options, it, context, - value_list, next_start_point, read_row_count, read_bytes, - is_complete, status); - delete it; - delete context->compact_strategy; - delete context; - return ret; + ScanContext* context = new ScanContext; + context->compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); + context->version_num = 1; + context->qu_num = 1; + bool ret = + LowLevelScan(start_tera_key, end_row_key, scan_options, it, context, values, next_start_point, + read_row_count, read_cell_count, read_bytes, complete, status); + delete it; + delete context->compact_strategy; + delete context; + return ret; } bool TabletIO::ScanWithFilter(const ScanOptions& scan_options) { - return scan_options.filter_list.filter_size() != 0; + return scan_options.filter != nullptr; } // 检测`row_buf'中的数据是否为一整行,`row_buf'为空是整行的特例,也返回true -// 从LowLevelScan的for()循环中跳出时,leveldb::Iterator* it 指向第一个不在row_buf中的cell +// 从LowLevelScan的for()循环中跳出时,leveldb::Iterator* it +// 指向第一个不在row_buf中的cell // 如果这个cell的rowkey和row_buf中的数据rowkey相同, // 则说明`row_buf'中的数据不是一整行,返回false // `row_buf'自身的逻辑保证了其中的所有cell必定属于同一行(row) -bool TabletIO::IsCompleteRow(const std::list& row_buf, - leveldb::Iterator* it) { - assert((it != NULL) && (it->Valid())); - if (row_buf.size() == 0) { - VLOG(9) << "[filter] row_buf empty"; - return true; - } - leveldb::Slice origin_cell = it->key(); - leveldb::Slice cur_cell = it->key(); - for (; it->Valid();) { - cur_cell = it->key(); - leveldb::Slice row; - if (!key_operator_->ExtractTeraKey(cur_cell, &row, - NULL, NULL, NULL, NULL)) { - LOG(ERROR) << "[filter] invalid tera key: " << DebugString(cur_cell.ToString()); - it->Next(); - continue; - } - if (cur_cell.compare(origin_cell) != 0) { - it->Seek(origin_cell); - } - bool res = row.compare(row_buf.begin()->key()) == 0; - VLOG(9) << "[filter] " << ( res ? "NOT " : "") << "complete row"; - return !res; +bool TabletIO::IsCompleteRow(const SingleRowBuffer& row_buf, leveldb::Iterator* it) { + assert((it != NULL) && (it->Valid())); + if (row_buf.Size() == 0) { + VLOG(9) << "[filter] key value list empty"; + return true; + } + leveldb::Slice origin_cell = it->key(); + leveldb::Slice cur_cell = it->key(); + for (; it->Valid();) { + cur_cell = it->key(); + leveldb::Slice row; + if (!key_operator_->ExtractTeraKey(cur_cell, &row, NULL, NULL, NULL, NULL)) { + LOG(ERROR) << "[filter] invalid tera key: " << DebugString(cur_cell.ToString()); + it->Next(); + continue; } if (cur_cell.compare(origin_cell) != 0) { - it->Seek(origin_cell); + it->Seek(origin_cell); } - VLOG(9) << "[filter] reach the end, row_buf is complete row"; - return true; + bool res = row.compare(row_buf.RowKey(0)) == 0; + VLOG(9) << "[filter] " << (res ? "NOT " : "") << "complete row"; + return !res; + } + if (cur_cell.compare(origin_cell) != 0) { + it->Seek(origin_cell); + } + VLOG(9) << "[filter] reach the end, row_buf is complete row"; + return true; } // 检测是否应该过滤掉`row_buf'中cell所在的一整行(row) // 用户指定了一定数量的filter,针对某些特定列的值对row进行过滤, // 返回false表示不过滤这一行,这一行数据被返回给用户 -bool TabletIO::ShouldFilterRow(const ScanOptions& scan_options, - const std::list& row_buf, +bool TabletIO::ShouldFilterRow(const ScanOptions& scan_options, const SingleRowBuffer& row_buf, leveldb::Iterator* it) { - assert((it != NULL) && it->Valid()); - if (row_buf.size() == 0) { - VLOG(9) << "[filter] row_buf empty"; - return false; - } - std::string origin_row = row_buf.begin()->key(); - - leveldb::Slice origin_cell = it->key(); - - int filter_num = scan_options.filter_list.filter_size(); - - // TODO(taocipian) - // 0). some target cf maybe already in row_buf - // 1). collects all target cf and sorts them, - // then Seek() to the 1st cf, Next() to the rest - for (int i = 0; i < filter_num; ++i) { - const Filter& filter = scan_options.filter_list.filter(i); - // 针对用户指定了过滤条件的每一列,seek过去看看是否符合 - std::string target_cf = filter.content(); - VLOG(9) << "[filter] " << i << " of " << filter_num - << " , target cf:" << target_cf; - std::string seek_key; - key_operator_->EncodeTeraKey(origin_row, target_cf, "", kLatestTs, - leveldb::TKT_FORSEEK, &seek_key); - it->Seek(seek_key); - for (; it->Valid();) { - leveldb::Slice row, cf, qu; - int64_t ts; - if (!key_operator_->ExtractTeraKey(it->key(), &row, &cf, &qu, &ts, NULL)) { - LOG(ERROR) << "[filter] invalid tera key: " << DebugString(it->key().ToString()); - it->Next(); - continue; - } - if ((row.ToString() != origin_row) - || (cf.ToString() != target_cf) - || (qu.ToString() != "")) { - // 用户试图过滤不存在的一列,忽略这个过滤条件 - VLOG(9) << "[filter] target cf not found:" << target_cf; - break; - } - KeyValuePair pair; - leveldb::Slice value = it->value(); - MakeKvPair(row, cf, qu, ts, value, &pair); - if (!CheckCell(pair, filter)) { - it->Seek(origin_cell); - VLOG(9) << "[filter] check failed at target cf:" << target_cf; - return true; - } - VLOG(9) << "[filter] target cf check passed"; - break; - } - } - it->Seek(origin_cell); - VLOG(9) << "[filter] this row check passed"; + assert((it != NULL) && it->Valid()); + if (row_buf.Size() == 0) { + VLOG(9) << "[filter] row_buf empty"; return false; + } + const std::string& origin_row = row_buf.RowKey(0); + leveldb::Slice origin_cell = it->key(); + scan_options.filter->Reset(); + filter::Filter::ReturnCode rc = filter::Filter::kIncludeCurCell; + + // TODO(taocipian) + // 0). some target cf maybe already in row_buf + // 1). collects all target cf and sorts them, + // then Seek() to the 1st cf, Next() to the rest + for (auto filter_column_it = scan_options.filter_column_set.begin(); + filter_column_it != scan_options.filter_column_set.end(); filter_column_it++) { + // 针对用户指定了过滤条件的每一列,seek过去看看是否符合 + std::string seek_key; + key_operator_->EncodeTeraKey(origin_row, filter_column_it->first, filter_column_it->second, + kLatestTs, leveldb::TKT_FORSEEK, &seek_key); + it->Seek(seek_key); + rc = filter::Filter::kIncludeCurCell; + for (; it->Valid();) { + leveldb::Slice row, cf, qu; + int64_t ts; + if (!key_operator_->ExtractTeraKey(it->key(), &row, &cf, &qu, &ts, NULL)) { + LOG(ERROR) << "[filter] invalid tera key: " << DebugString(it->key().ToString()); + it->Next(); + continue; + } + if (row.ToString() != origin_row) { + // 用户试图过滤不存在的一列,忽略这个过滤条件 + break; + } + rc = scan_options.filter->FilterCell(cf.ToString(), qu.ToString(), it->value().ToString()); + break; + } + if (rc == filter::Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + it->Seek(origin_cell); + return scan_options.filter->FilterRow(); } // seek到`row_buf'中cell所在行(row)的下一行, // 调用者需要检查此函数返回以后迭代器的状态是否有效, // 因为可能已经到了数据库的最后 -void TabletIO::GotoNextRow(const std::list& row_buf, - leveldb::Iterator* it, +void TabletIO::GotoNextRow(const SingleRowBuffer& row_buf, leveldb::Iterator* it, KeyValuePair* next) { - assert(it != NULL); - if (!it->Valid() || row_buf.size() == 0) { - return; + assert(it != NULL); + if (!it->Valid() || row_buf.Size() == 0) { + return; + } + std::string row = row_buf.RowKey(0); + std::string next_row = row + '\0'; + std::string seek_key; + key_operator_->EncodeTeraKey(next_row, "", "", kLatestTs, leveldb::TKT_FORSEEK, &seek_key); + it->Seek(seek_key); + MakeKvPair(leveldb::Slice(next_row), "", "", kLatestTs, "", next); + VLOG(9) << "[filter] goto next row:" << next_row << ":" << next_row.size(); +} + +bool TabletIO::LowLevelScan(const std::string& start_tera_key, const std::string& end_row_key, + const ScanOptions& scan_options, leveldb::Iterator* it, + ScanContext* scan_context, RowResult* values, + KeyValuePair* next_start_point, uint32_t* read_row_count, + uint32_t* read_cell_count, uint32_t* read_bytes, bool* complete, + StatusCode* status) { + leveldb::CompactStrategy* compact_strategy = scan_context->compact_strategy; + std::string& last_key = scan_context->last_key; + std::string& last_col = scan_context->last_col; + std::string& last_qual = scan_context->last_qual; + uint32_t& version_num = scan_context->version_num; + uint64_t& qu_num = scan_context->qu_num; + + SingleRowBuffer row_buf; + uint32_t buffer_size = 0; + int64_t number_limit = 0; + values->clear_key_values(); + *read_row_count = 0; + *read_cell_count = 0; + *read_bytes = 0; + int64_t now_time = GetTimeStampInMs(); + int64_t time_out = now_time + scan_options.timeout; + KeyValuePair next_start_kv_pair; + VLOG(9) << "ll-scan timeout set to be " << scan_options.timeout << ", start_tera_key " + << DebugString(start_tera_key) << ", end_row_key " << DebugString(end_row_key) + << ", max_size " << scan_options.max_size << ", number_limit " + << scan_options.number_limit << ", max_versions " << scan_options.max_versions + << ", max_qualifiers " << scan_options.max_qualifiers; + + *complete = false; + for (; it->Valid();) { + bool has_merged = false; + std::string merged_value; + counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); + *read_bytes += it->key().size() + it->value().size(); + ++*read_cell_count; + now_time = GetTimeStampInMs(); + + leveldb::Slice tera_key = it->key(); + leveldb::Slice value = it->value(); + leveldb::Slice key, col, qual; + int64_t ts = 0; + leveldb::TeraKeyType type; + if (!key_operator_->ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { + LOG(WARNING) << "invalid tera key: " << DebugString(tera_key.ToString()); + it->Next(); + continue; + } + + VLOG(10) << "ll-scan: " + << "tablet=[" << tablet_path_ << "] key=[" << DebugString(key.ToString()) + << "] column=[" << DebugString(col.ToString()) << ":" << DebugString(qual.ToString()) + << "] ts=[" << ts << "] type=[" << type << "] buffer_size=[" << buffer_size + << "] row_count=[" << *read_row_count << "] kv_count=[" << *read_cell_count + << "] number_limit=[" << number_limit << "]" + << "] read_bytes=[" << *read_bytes << "] qu_num=[" << qu_num << "]"; + + if (now_time > time_out) { + VLOG(9) << "ll-scan timeout, now_time: " << now_time << ", time_out: " << time_out; + if (next_start_point != NULL) { + VLOG(9) << "Mark next start key: " << DebugString(tera_key.ToString()); + MakeKvPair(key, col, qual, ts, "", next_start_point); + } + SetStatusCode(kRPCTimeout, status); + break; + } + if (db_->IsShutdown1Finished()) { + TABLET_UNLOAD_LOG << "break lowlevelscan before iterator next"; + SetStatusCode(kKeyNotInRange, status); + break; + } + + if (end_row_key.size() && key.compare(end_row_key) >= 0) { + // scan finished + *complete = true; + break; + } + + const std::set& cf_set = scan_options.iter_cf_set; + if (cf_set.size() > 0 && cf_set.find(col.ToString()) == cf_set.end() && + type != leveldb::TKT_DEL) { + // donot need this column, skip row deleting tag + it->Next(); + continue; + } + + if (compact_strategy->ScanDrop(it->key(), 0)) { + // skip drop record + scan_drop_count.Inc(); + it->Next(); + continue; + } + + // only use for sync scan, not available for stream scan + if (key_operator_->Compare(it->key(), start_tera_key) < 0) { + // skip out-of-range records + // keep record of version info to prevent dirty data + if (key.compare(last_key) == 0 && col.compare(last_col) == 0 && + qual.compare(last_qual) == 0) { + ++version_num; + } else { + last_key.assign(key.data(), key.size()); + last_col.assign(col.data(), col.size()); + last_qual.assign(qual.data(), qual.size()); + version_num = 1; + } + it->Next(); + continue; + } + + // begin to scan next row + if (key.compare(last_key) != 0) { + *read_row_count += 1; + ProcessRowBuffer(row_buf, scan_options, values, &buffer_size, &number_limit); + row_buf.Clear(); + } + + if (key.compare(last_key) == 0 && col.compare(last_col) == 0 && qual.compare(last_qual) == 0) { + if (++version_num > scan_options.max_versions) { + it->Next(); + continue; + } + } else { + if (key.compare(last_key) == 0 && col.compare(last_col) == 0) { + if (++qu_num > scan_options.max_qualifiers) { + VLOG(10) << "max_qualifiers triggered, max_qualifiers: " << scan_options.max_qualifiers; + it->Next(); + continue; + } + } else { + qu_num = 1; + } + + last_key.assign(key.data(), key.size()); + last_col.assign(col.data(), col.size()); + last_qual.assign(qual.data(), qual.size()); + version_num = 1; + int64_t merged_num = 0; + has_merged = compact_strategy->ScanMergedValue(it, &merged_value, &merged_num); + if (has_merged) { + counter_.low_read_cell.Add(merged_num - 1); + low_level_read_count.Add(merged_num - 1); + value = merged_value; + key = last_key; + col = last_col; + qual = last_qual; + + VLOG(10) << "ll-scan merge: " + << "key=[" << DebugString(key.ToString()) << "] column=[" + << DebugString(col.ToString()) << ":" << DebugString(qual.ToString()) << "] ts=[" + << ts << "] type=[" << type << "]" + << " value=[" << DebugString(value.ToString()) << "] merged=" << merged_num; + } + } + + row_buf.Add(key, col, qual, value, ts); + + // ScanMergedValue may have set it->Next() + // Must make sure has_merged == false before it->Next() + // Couldn't put this part in if (has_merged) else { it->Next() } + if (!has_merged) { + it->Next(); + } + + // check scan buffer + if (buffer_size >= scan_options.max_size || number_limit >= scan_options.number_limit) { + VLOG(10) << "stream scan, break scan context" + << ", buffer_size " << buffer_size << ", number_limit " << number_limit << ", key " + << DebugString(key.ToString()) << ", col " << DebugString(col.ToString()) + << ", qual " << DebugString(qual.ToString()); + break; + } + } + *complete = !it->Valid() ? true : *complete; + + if (ScanWithFilter(scan_options) && it->Valid() && !IsCompleteRow(row_buf, it) && + ShouldFilterRow(scan_options, row_buf, it)) { + GotoNextRow(row_buf, it, &next_start_kv_pair); + } else { + // process the last row of tablet + ProcessRowBuffer(row_buf, scan_options, values, &buffer_size, &number_limit); + } + + if (*status == kRPCTimeout || *status == kKeyNotInRange) { + return false; + } + + if (!it->Valid() && !(it->status().ok())) { + SetStatusCode(it->status(), status); + VLOG(10) << "ll-scan fail: " + << "tablet=[" << tablet_path_ << "], " + << "status=[" << StatusCodeToString(*status) << "]"; + return false; + } + + SetStatusCode(kTabletNodeOk, status); + return true; +} + +void TabletIO::MakeKvPair(leveldb::Slice key, leveldb::Slice col, leveldb::Slice qual, int64_t ts, + leveldb::Slice value, KeyValuePair* kv) { + kv->set_key(key.data(), key.size()); + kv->set_column_family(col.data(), col.size()); + kv->set_qualifier(qual.data(), qual.size()); + kv->set_timestamp(ts); + kv->set_value(value.data(), value.size()); +} + +bool TabletIO::LowLevelSeek(const std::string& row_key, const ScanOptions& scan_options, + RowResult* values, StatusCode* status) { + StatusCode s; + SetStatusCode(kTabletNodeOk, &s); + values->clear_key_values(); + + // create tera iterator + leveldb::ReadOptions read_option(&ldb_options_); + read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; + SetupIteratorOptions(scan_options, &read_option); + uint64_t snapshot_id = scan_options.snapshot_id; + if (snapshot_id != 0) { + if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { + TearDownIteratorOptions(&read_option); + SetStatusCode(kSnapshotNotExist, status); + return false; + } + } + read_option.rollbacks = rollbacks_; + SetupSingleRowIteratorOptions(row_key, &read_option); + std::unique_ptr it_data(db_->NewIterator(read_option)); + TearDownIteratorOptions(&read_option); + if (it_data->status().IsShutdownInProgress()) { + TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and return early"; + SetStatusCode(kKeyNotInRange, status); + return false; + } + + // init compact strategy + leveldb::CompactStrategy* compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); + + // seek to the row start & process row delete mark + std::string row_seek_key; + key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, leveldb::TKT_FORSEEK, &row_seek_key); + it_data->Seek(row_seek_key); + counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); + if (it_data->Valid()) { + VLOG(10) << "ll-seek: " + << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "]"; + leveldb::Slice cur_row_key; + key_operator_->ExtractTeraKey(it_data->key(), &cur_row_key, NULL, NULL, NULL, NULL); + if (cur_row_key.compare(row_key) > 0) { + SetStatusCode(kKeyNotExist, &s); + } else { + compact_strategy->ScanDrop(it_data->key(), 0); } - std::string row = row_buf.begin()->key(); - std::string next_row = row + '\0'; - std::string seek_key; - key_operator_->EncodeTeraKey(next_row, "", "", kLatestTs, - leveldb::TKT_FORSEEK, &seek_key); - it->Seek(seek_key); - MakeKvPair(leveldb::Slice(next_row), "", "", kLatestTs, "", next); - VLOG(9) << "[filter] goto next row:" << next_row << ":" << next_row.size(); -} - -inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - leveldb::Iterator* it, - ScanContext* scan_context, - RowResult* value_list, - KeyValuePair* next_start_point, - uint32_t* read_row_count, - uint32_t* read_bytes, - bool* is_complete, - StatusCode* status) { - leveldb::CompactStrategy* compact_strategy = scan_context->compact_strategy; - std::string& last_key = scan_context->last_key; - std::string& last_col = scan_context->last_col; - std::string& last_qual = scan_context->last_qual; - uint32_t& version_num = scan_context->version_num; - uint64_t& qu_num = scan_context->qu_num; - - std::list row_buf; - uint32_t buffer_size = 0; - int64_t number_limit = 0; - value_list->clear_key_values(); - *read_row_count = 0; - *read_bytes = 0; - int64_t now_time = GetTimeStampInMs(); - int64_t time_out = now_time + scan_options.timeout; - KeyValuePair next_start_kv_pair; - VLOG(9) << "ll-scan timeout set to be " << scan_options.timeout - << ", start_tera_key " << DebugString(start_tera_key) - << ", end_row_key " << DebugString(end_row_key) - << ", max_size " << scan_options.max_size - << ", number_limit " << scan_options.number_limit - << ", max_versions " << scan_options.max_versions - << ", max_qualifiers " << scan_options.max_qualifiers; - - *is_complete = false; - for (; it->Valid();) { - bool has_merged = false; - std::string merged_value; - counter_.low_read_cell.Inc(); - low_level_read_count.Inc(); - *read_bytes += it->key().size() + it->value().size(); - now_time = GetTimeStampInMs(); - - leveldb::Slice tera_key = it->key(); - leveldb::Slice value = it->value(); - leveldb::Slice key, col, qual; - int64_t ts = 0; - leveldb::TeraKeyType type; - if (!key_operator_->ExtractTeraKey(tera_key, &key, &col, &qual, &ts, &type)) { - LOG(WARNING) << "invalid tera key: " << DebugString(tera_key.ToString()); - it->Next(); - continue; - } + } else if (it_data->status().ok()) { + SetStatusCode(kKeyNotExist, &s); + } else { + SetStatusCode(it_data->status(), &s); + } - VLOG(10) << "ll-scan: " << "tablet=[" << tablet_path_ - << "] key=[" << DebugString(key.ToString()) - << "] column=[" << DebugString(col.ToString()) - << ":" << DebugString(qual.ToString()) - << "] ts=[" << ts << "] type=[" << type << "]" - << " buffer_size=[" << buffer_size << "]" - << " number_limit=[" << number_limit << "]" - << " read_bytes=[" << *read_bytes << "]" - << " qu_num=[" << qu_num << "]"; - - if (now_time > time_out) { - VLOG(9) << "ll-scan timeout, now_time: " << now_time << ", time_out: " << time_out; - if (next_start_point != NULL) { - VLOG(9) << "Mark next start key: " << DebugString(tera_key.ToString()); - MakeKvPair(key, col, qual, ts, "", next_start_point); - } - SetStatusCode(kRPCTimeout, status); - break; - } - if (db_->IsShutdown1Finished()) { - TABLET_UNLOAD_LOG << "break lowlevelscan before iterator next"; - SetStatusCode(kKeyNotInRange, status); - break; - } - - if (end_row_key.size() && key.compare(end_row_key) >= 0) { - // scan finished - *is_complete = true; - break; - } - - const std::set& cf_set = scan_options.iter_cf_set; - if (cf_set.size() > 0 && - cf_set.find(col.ToString()) == cf_set.end() && - type != leveldb::TKT_DEL) { - // donot need this column, skip row deleting tag - it->Next(); - continue; - } - - if (compact_strategy->ScanDrop(it->key(), 0)) { - // skip drop record - scan_drop_count.Inc(); - it->Next(); - continue; - } - - // only use for sync scan, not available for stream scan - if (key_operator_->Compare(it->key(), start_tera_key) < 0) { - // skip out-of-range records - // keep record of version info to prevent dirty data - if (key.compare(last_key) == 0 && - col.compare(last_col) == 0 && - qual.compare(last_qual) == 0) { - ++version_num; - } else { - last_key.assign(key.data(), key.size()); - last_col.assign(col.data(), col.size()); - last_qual.assign(qual.data(), qual.size()); - version_num = 1; - } - it->Next(); - continue; - } - - // begin to scan next row - if (key.compare(last_key) != 0) { - *read_row_count += 1; - ProcessRowBuffer(row_buf, scan_options, value_list, &buffer_size, &number_limit); - row_buf.clear(); - } - - if (key.compare(last_key) == 0 && - col.compare(last_col) == 0 && - qual.compare(last_qual) == 0) { - if (++version_num > scan_options.max_versions) { - it->Next(); - continue; - } - } else { - if (key.compare(last_key) == 0 && col.compare(last_col) == 0 ) { - if (++qu_num > scan_options.max_qualifiers) { - VLOG(10) << "max_qualifiers triggered, max_qualifiers: " << scan_options.max_qualifiers; - it->Next(); - continue; - } - } else { - qu_num = 1; - } - - last_key.assign(key.data(), key.size()); - last_col.assign(col.data(), col.size()); - last_qual.assign(qual.data(), qual.size()); - version_num = 1; - int64_t merged_num = 0; - has_merged = compact_strategy->ScanMergedValue(it, &merged_value, &merged_num); - if (has_merged) { - counter_.low_read_cell.Add(merged_num - 1); - low_level_read_count.Add(merged_num - 1); - value = merged_value; - key = last_key; - col = last_col; - qual = last_qual; - - VLOG(10) << "ll-scan merge: " << "key=[" << DebugString(key.ToString()) - << "] column=[" << DebugString(col.ToString()) - << ":" << DebugString(qual.ToString()) - << "] ts=[" << ts << "] type=[" << type << "]" - << " value=[" << DebugString(value.ToString()) - << "] merged=" << merged_num; - } - } - - KeyValuePair kv; - MakeKvPair(key, col, qual, ts, value, &kv); - row_buf.push_back(kv); - - // ScanMergedValue may have set it->Next() - // Must make sure has_merged == false before it->Next() - // Couldn't put this part in if (has_merged) else { it->Next() } - if (!has_merged) { - it->Next(); - } - - // check scan buffer - if (buffer_size >= scan_options.max_size || number_limit >= scan_options.number_limit) { - VLOG(10) << "stream scan, break scan context" - <<", buffer_size " << buffer_size - <<", number_limit " << number_limit - << ", key " << DebugString(key.ToString()) << ", col " << DebugString(col.ToString()) - << ", qual " << DebugString(qual.ToString()); - break; - } - } - *is_complete = !it->Valid() ? true : *is_complete; - - if (ScanWithFilter(scan_options) - && it->Valid() - && !IsCompleteRow(row_buf, it) - && ShouldFilterRow(scan_options, row_buf, it)) { - GotoNextRow(row_buf, it, &next_start_kv_pair); - } else { - // process the last row of tablet - ProcessRowBuffer(row_buf, scan_options, value_list, &buffer_size, &number_limit); - } - - if (*status == kRPCTimeout || *status == kKeyNotInRange) { - return false; - } - if (!it->Valid() && !(it->status().ok())) { - SetStatusCode(it->status(), status); - VLOG(10) << "ll-scan fail: " << "tablet=[" << tablet_path_ << "], " - << "status=[" << StatusCodeToString(*status) << "]"; - return false; - } - SetStatusCode(kTabletNodeOk, status); - return true; -} - -void TabletIO::MakeKvPair(leveldb::Slice key, leveldb::Slice col, leveldb::Slice qual, - int64_t ts, leveldb::Slice value, KeyValuePair* kv) { - kv->set_key(key.data(), key.size()); - kv->set_column_family(col.data(), col.size()); - kv->set_qualifier(qual.data(), qual.size()); - kv->set_timestamp(ts); - kv->set_value(value.data(), value.size()); -} - -bool TabletIO::LowLevelSeek(const std::string& row_key, - const ScanOptions& scan_options, - RowResult* value_list, - StatusCode* status) { - StatusCode s; - SetStatusCode(kTabletNodeOk, &s); - value_list->clear_key_values(); - - // create tera iterator - leveldb::ReadOptions read_option(&ldb_options_); - read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; - SetupIteratorOptions(scan_options, &read_option); - uint64_t snapshot_id = scan_options.snapshot_id; - if (snapshot_id != 0) { - if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { - TearDownIteratorOptions(&read_option); - SetStatusCode(kSnapshotNotExist, status); - return false; - } - } - read_option.rollbacks = rollbacks_; - SetupSingleRowIteratorOptions(row_key, &read_option); - std::unique_ptr it_data(db_->NewIterator(read_option)); - TearDownIteratorOptions(&read_option); - if (it_data->status().IsShutdownInProgress()) { - TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and return early"; - SetStatusCode(kKeyNotInRange, status); - return false; - } - - // init compact strategy - leveldb::CompactStrategy* compact_strategy = - ldb_options_.compact_strategy_factory->NewInstance(); - - // seek to the row start & process row delete mark - std::string row_seek_key; - key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, - leveldb::TKT_FORSEEK, &row_seek_key); - it_data->Seek(row_seek_key); + if (s != kTabletNodeOk) { + delete compact_strategy; + SetStatusCode(s, status); + return false; + } + + ColumnFamilyMap::const_iterator it_cf = scan_options.column_family_list.begin(); + for (; it_cf != scan_options.column_family_list.end(); ++it_cf) { + const std::string& cf_name = it_cf->first; + const std::set& qu_set = it_cf->second; + + // seek to the cf start & process cf delete mark + std::string cf_seek_key; + key_operator_->EncodeTeraKey(row_key, cf_name, "", kLatestTs, leveldb::TKT_FORSEEK, + &cf_seek_key); + it_data->Seek(cf_seek_key); counter_.low_read_cell.Inc(); low_level_read_count.Inc(); if (it_data->Valid()) { - VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ - << "] row_key=[" << row_key << "]"; - leveldb::Slice cur_row_key; - key_operator_->ExtractTeraKey(it_data->key(), &cur_row_key, - NULL, NULL, NULL, NULL); - if (cur_row_key.compare(row_key) > 0) { - SetStatusCode(kKeyNotExist, &s); - } else { - compact_strategy->ScanDrop(it_data->key(), 0); - } + VLOG(10) << "ll-seek: " + << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name + << "]"; + leveldb::Slice cur_row, cur_cf; + key_operator_->ExtractTeraKey(it_data->key(), &cur_row, &cur_cf, NULL, NULL, NULL); + if (cur_row.compare(row_key) > 0 || cur_cf.compare(cf_name) > 0) { + continue; + } else { + compact_strategy->ScanDrop(it_data->key(), 0); + } + } else if (it_data->status().ok()) { + VLOG(10) << "ll-seek fail, not found."; + continue; } else { - SetStatusCode(kKeyNotExist, &s); - } - if (s != kTabletNodeOk) { - delete compact_strategy; - SetStatusCode(s, status); - return false; - } - - ColumnFamilyMap::const_iterator it_cf = - scan_options.column_family_list.begin(); - for (; it_cf != scan_options.column_family_list.end(); ++it_cf) { - const string& cf_name = it_cf->first; - const std::set& qu_set = it_cf->second; - - // seek to the cf start & process cf delete mark - std::string cf_seek_key; - key_operator_->EncodeTeraKey(row_key, cf_name, "", kLatestTs, - leveldb::TKT_FORSEEK, &cf_seek_key); - it_data->Seek(cf_seek_key); + SetStatusCode(it_data->status(), status); + return false; + } + + if (qu_set.empty()) { + LOG(FATAL) << "low level seek only support qualifier read."; + } + std::set::iterator it_qu = qu_set.begin(); + for (; it_qu != qu_set.end(); ++it_qu) { + const std::string& qu_name = *it_qu; + VLOG(10) << "ll-seek: try find " + << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name + << "] qu=[" << qu_name << "]"; + + // seek to the cf start & process cf delete mark + std::string qu_seek_key; + key_operator_->EncodeTeraKey(row_key, cf_name, qu_name, kLatestTs, leveldb::TKT_FORSEEK, + &qu_seek_key); + it_data->Seek(qu_seek_key); + uint32_t version_num = 0; + for (; it_data->Valid();) { + if (db_->IsShutdown1Finished()) { + // break early on waiting_for_shutdown2_ + // igrone haven't scan versions of this qualifier + TABLET_UNLOAD_LOG << "break lowlevelscan before iterator next"; + SetStatusCode(kKeyNotInRange, &s); + break; + } counter_.low_read_cell.Inc(); low_level_read_count.Inc(); - if (it_data->Valid()) { - VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ - << "] row_key=[" << row_key - << "] cf=[" << cf_name << "]"; - leveldb::Slice cur_row, cur_cf; - key_operator_->ExtractTeraKey(it_data->key(), &cur_row, &cur_cf, - NULL, NULL, NULL); - if (cur_row.compare(row_key) > 0 || cur_cf.compare(cf_name) > 0) { - continue; - } else { - compact_strategy->ScanDrop(it_data->key(), 0); - } - } else { - VLOG(10) << "ll-seek fail, not found."; - continue; + VLOG(10) << "ll-seek: " + << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name + << "] qu=[" << qu_name << "]"; + leveldb::Slice cur_row, cur_cf, cur_qu; + int64_t timestamp; + key_operator_->ExtractTeraKey(it_data->key(), &cur_row, &cur_cf, &cur_qu, ×tamp, NULL); + if (cur_row.compare(row_key) > 0 || cur_cf.compare(cf_name) > 0 || + cur_qu.compare(qu_name) > 0) { + break; } - if (qu_set.empty()) { - LOG(FATAL) << "low level seek only support qualifier read."; + // skip qu delete mark and out-of-range version + if (compact_strategy->ScanDrop(it_data->key(), 0)) { + VLOG(10) << "ll-seek: scan drop " + << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name + << "] qu=[" << qu_name << "]"; + scan_drop_count.Inc(); + // skip to next qualifier + break; } - std::set::iterator it_qu = qu_set.begin(); - for (; it_qu != qu_set.end(); ++it_qu) { - const string& qu_name = *it_qu; - VLOG(10) << "ll-seek: try find " << "tablet=[" << tablet_path_ - << "] row_key=[" << row_key << "] cf=[" << cf_name - << "] qu=[" << qu_name << "]"; - - // seek to the cf start & process cf delete mark - std::string qu_seek_key; - key_operator_->EncodeTeraKey(row_key, cf_name, qu_name, kLatestTs, - leveldb::TKT_FORSEEK, &qu_seek_key); - it_data->Seek(qu_seek_key); - uint32_t version_num = 0; - for (; it_data->Valid();) { - if (db_->IsShutdown1Finished()) { - // break early on waiting_for_shutdown2_ - // igrone haven't scan versions of this qualifier - TABLET_UNLOAD_LOG << "break lowlevelscan before iterator next"; - SetStatusCode(kKeyNotInRange, &s); - break; - } - counter_.low_read_cell.Inc(); - low_level_read_count.Inc(); - VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ - << "] row_key=[" << row_key << "] cf=[" << cf_name - << "] qu=[" << qu_name << "]"; - leveldb::Slice cur_row, cur_cf, cur_qu; - int64_t timestamp; - key_operator_->ExtractTeraKey(it_data->key(), &cur_row, &cur_cf, - &cur_qu, ×tamp, NULL); - if (cur_row.compare(row_key) > 0 || cur_cf.compare(cf_name) > 0 || - cur_qu.compare(qu_name) > 0) { - break; - } - - // skip qu delete mark and out-of-range version - if (compact_strategy->ScanDrop(it_data->key(), 0)) { - VLOG(10) << "ll-seek: scan drop " << "tablet=[" << tablet_path_ - << "] row_key=[" << row_key << "] cf=[" << cf_name - << "] qu=[" << qu_name << "]"; - scan_drop_count.Inc(); - // skip to next qualifier - break; - } - - if (scan_options.ts_start > timestamp) { - break; - } - if (scan_options.ts_end < timestamp) { - it_data->Next(); - continue; - } - - // version filter - if (++version_num > scan_options.max_versions) { - break; - } - - KeyValuePair* kv = value_list->add_key_values(); - kv->set_key(row_key); - kv->set_column_family(cf_name); - kv->set_qualifier(qu_name); - kv->set_timestamp(timestamp); - - int64_t merged_num; - std::string merged_value; - bool has_merged = - compact_strategy->ScanMergedValue(it_data.get(), &merged_value, &merged_num); - if (has_merged) { - counter_.low_read_cell.Add(merged_num - 1); - low_level_read_count.Add(merged_num - 1); - kv->set_value(merged_value); - VLOG(10) << "ll-seek merge: " << "key=[" << DebugString(row_key) - << "] column=[" << DebugString(cf_name) - << ":" << DebugString(qu_name) - << "] ts=[" << timestamp << "] " - << " value_v=[" << io::DecodeBigEndain(merged_value.data()) << "] " - << " value=[" << DebugString(merged_value) - << "] merged=" << merged_num; - } else { - leveldb::Slice value = it_data->value(); - kv->set_value(value.data(), value.size()); - it_data->Next(); - } - } - if (s == kKeyNotInRange) { - // only on waiting_for_shutdown2_ != NULL will break with kKeyNotInRange - // igrone haven't scan qualifiers - break; - } + + if (scan_options.ts_start > timestamp) { + break; } - if (s == kKeyNotInRange) { - // only on waiting_for_shutdown2_ != NULL will break with kKeyNotInRange - // igrone haven't scan column_families - break; + if (scan_options.ts_end < timestamp) { + it_data->Next(); + continue; } - } - delete compact_strategy; - SetStatusCode(s, status); - return kTabletNodeOk == s; -} - -bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, - uint64_t snapshot_id, StatusCode* status, int64_t timeout_ms) { - { - MutexLock lock(&mutex_); - if ((status_ != kReady && status_ != kUnLoading) || IsUrgentUnload()) { - if (status_ == kUnLoading2) { - // keep compatable for old sdk protocol - // we can remove this in the future. - SetStatusCode(kUnLoading, status); - } else { - SetStatusCode(status_, status); - } - return false; + // version filter + if (++version_num > scan_options.max_versions) { + break; } - db_ref_count_++; - } - int64_t start_read_us = get_micros(); + KeyValuePair* kv = values->add_key_values(); + kv->set_key(row_key); + kv->set_column_family(cf_name); + kv->set_qualifier(qu_name); + kv->set_timestamp(timestamp); - if (kv_only_) { - std::string key(row_reader.key()); - std::string value; - if (RawKeyType() == TTLKv) { - key.append(8, '\0'); - } - if (!Read(key, &value, snapshot_id, status)) { - counter_.read_rows.Inc(); - row_read_count.Inc(); - row_read_delay.Add(get_micros() - start_read_us); - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return false; - } - KeyValuePair* result = value_list->add_key_values(); - result->set_key(row_reader.key()); - result->set_value(value); - counter_.read_rows.Inc(); - row_read_count.Inc(); - counter_.read_size.Add(result->ByteSize()); - row_read_bytes.Add(result->ByteSize()); - row_read_delay.Add(get_micros() - start_read_us); - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return true; - } - - ScanOptions scan_options; - bool ll_seek_available = true; - for (int32_t i = 0; i < row_reader.cf_list_size(); ++i) { - const ColumnFamily& column_family = row_reader.cf_list(i); - const std::string& column_family_name = column_family.family_name(); - std::set& qualifier_list = - scan_options.column_family_list[column_family_name]; - qualifier_list.clear(); - for (int32_t j = 0; j < column_family.qualifier_list_size(); ++j) { - qualifier_list.insert(column_family.qualifier_list(j)); - } - if (qualifier_list.empty()) { - ll_seek_available = false; + int64_t merged_num; + std::string merged_value; + bool has_merged = + compact_strategy->ScanMergedValue(it_data.get(), &merged_value, &merged_num); + if (has_merged) { + counter_.low_read_cell.Add(merged_num - 1); + low_level_read_count.Add(merged_num - 1); + kv->set_value(merged_value); + VLOG(10) << "ll-seek merge: " + << "key=[" << DebugString(row_key) << "] column=[" << DebugString(cf_name) << ":" + << DebugString(qu_name) << "] ts=[" << timestamp << "] " + << " value_v=[" << io::DecodeBigEndain(merged_value.data()) << "] " + << " value=[" << DebugString(merged_value) << "] merged=" << merged_num; + } else { + leveldb::Slice value = it_data->value(); + kv->set_value(value.data(), value.size()); + it_data->Next(); } - scan_options.iter_cf_set.insert(column_family_name); - } - if (scan_options.column_family_list.empty()) { - ll_seek_available = false; + } + if (!it_data->status().ok()) { + SetStatusCode(it_data->status(), status); + return false; + } + if (s == kKeyNotInRange) { + // only on waiting_for_shutdown2_ != NULL will break with kKeyNotInRange + // igrone haven't scan qualifiers + break; + } } - - if (row_reader.has_max_version()) { - scan_options.max_versions = row_reader.max_version(); + if (s == kKeyNotInRange) { + // only on waiting_for_shutdown2_ != NULL will break with kKeyNotInRange + // igrone haven't scan column_families + break; } + } + delete compact_strategy; - if (row_reader.has_max_qualifiers()) { - scan_options.max_qualifiers = row_reader.max_qualifiers(); - } else { - scan_options.max_qualifiers = std::numeric_limits::max(); - } + SetStatusCode(s, status); + return kTabletNodeOk == s; +} - if (row_reader.has_time_range()) { - scan_options.ts_start = row_reader.time_range().ts_start(); - scan_options.ts_end = row_reader.time_range().ts_end(); - VLOG(10) << "ReadCells: " << "timerange=[" << scan_options.ts_start - << "," << scan_options.ts_end << "]"; +bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* values, uint64_t snapshot_id, + StatusCode* status, int64_t timeout_ms) { + { + MutexLock lock(&mutex_); + if ((status_ != kReady && status_ != kUnloading) || IsUrgentUnload()) { + if (status_ == kUnloading2) { + // keep compatable for old sdk protocol + // we can remove this in the future. + SetStatusCode(kUnloading, status); + } else { + SetStatusCode(status_, status); + } + return false; + } + db_ref_count_++; + } + + int64_t start_read_us = get_micros(); + + if (kv_only_) { + std::string key(row_reader.key()); + std::string value; + if (RawKeyType() == TTLKv) { + key.append(8, '\0'); } - - scan_options.snapshot_id = snapshot_id; - scan_options.timeout = timeout_ms; - - - VLOG(10) << "ReadCells: " << "key=[" << DebugString(row_reader.key()) << "]"; - - bool ret = false; - // if read all columns, use LowLevelScan - if (ll_seek_available) { - ret = LowLevelSeek(row_reader.key(), scan_options, value_list, status); - } else { - std::string start_tera_key; - key_operator_->EncodeTeraKey(row_reader.key(), "", "", kLatestTs, - leveldb::TKT_VALUE, &start_tera_key); - std::string end_row_key = row_reader.key() + '\0'; - uint32_t read_row_count = 0; - uint32_t read_bytes = 0; - bool is_complete = false; - ret = LowLevelScan(start_tera_key, end_row_key, scan_options, - value_list, NULL, &read_row_count, &read_bytes, - &is_complete, status); + if (!Read(key, &value, snapshot_id, status)) { + counter_.read_rows.Inc(); + row_read_count.Inc(); + row_read_delay.Add(get_micros() - start_read_us); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return false; } + KeyValuePair* result = values->add_key_values(); + result->set_key(row_reader.key()); + result->set_value(value); counter_.read_rows.Inc(); row_read_count.Inc(); + counter_.read_size.Add(result->ByteSize()); + row_read_bytes.Add(result->ByteSize()); row_read_delay.Add(get_micros() - start_read_us); { - MutexLock lock(&mutex_); - db_ref_count_--; - } - if (!ret) { - return false; - } else { - counter_.read_size.Add(value_list->ByteSize()); - row_read_bytes.Add(value_list->ByteSize()); - } - - if (value_list->key_values_size() == 0) { - SetStatusCode(kKeyNotExist, status); - return false; + MutexLock lock(&mutex_); + db_ref_count_--; } return true; + } + + ScanOptions scan_options; + scan_options.enable_dfs_read_thread_limiter = FLAGS_enable_dfs_read_thread_limiter; + bool ll_seek_available = true; + for (int32_t i = 0; i < row_reader.cf_list_size(); ++i) { + const ColumnFamily& column_family = row_reader.cf_list(i); + const std::string& column_family_name = column_family.family_name(); + std::set& qualifier_list = scan_options.column_family_list[column_family_name]; + qualifier_list.clear(); + for (int32_t j = 0; j < column_family.qualifier_list_size(); ++j) { + qualifier_list.insert(column_family.qualifier_list(j)); + } + if (qualifier_list.empty()) { + ll_seek_available = false; + } + scan_options.iter_cf_set.insert(column_family_name); + } + if (scan_options.column_family_list.empty()) { + ll_seek_available = false; + } + + if (row_reader.has_max_version()) { + scan_options.max_versions = row_reader.max_version(); + } + + if (row_reader.has_max_qualifiers()) { + scan_options.max_qualifiers = row_reader.max_qualifiers(); + } else { + scan_options.max_qualifiers = std::numeric_limits::max(); + } + + if (row_reader.has_time_range()) { + scan_options.ts_start = row_reader.time_range().ts_start(); + scan_options.ts_end = row_reader.time_range().ts_end(); + VLOG(10) << "ReadCells: " + << "timerange=[" << scan_options.ts_start << "," << scan_options.ts_end << "]"; + } + + scan_options.snapshot_id = snapshot_id; + scan_options.timeout = timeout_ms; + + VLOG(10) << "ReadCells: " + << "key=[" << DebugString(row_reader.key()) << "]"; + + bool ret = false; + // if read all columns, use LowLevelScan + if (ll_seek_available) { + ret = LowLevelSeek(row_reader.key(), scan_options, values, status); + } else { + std::string start_tera_key; + key_operator_->EncodeTeraKey(row_reader.key(), "", "", kLatestTs, leveldb::TKT_VALUE, + &start_tera_key); + std::string end_row_key = row_reader.key() + '\0'; + uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; + uint32_t read_bytes = 0; + bool complete = false; + ret = LowLevelScan(start_tera_key, end_row_key, scan_options, values, NULL, &read_row_count, + &read_cell_count, &read_bytes, &complete, status); + } + counter_.read_rows.Inc(); + row_read_count.Inc(); + row_read_delay.Add(get_micros() - start_read_us); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + if (!ret) { + return false; + } else { + counter_.read_size.Add(values->ByteSize()); + row_read_bytes.Add(values->ByteSize()); + } + + if (values->key_values_size() == 0) { + SetStatusCode(kKeyNotExist, status); + return false; + } + return true; } bool TabletIO::WriteBatch(leveldb::WriteBatch* batch, bool disable_wal, bool sync, StatusCode* status) { - leveldb::WriteOptions options; - options.disable_wal = disable_wal; - options.sync = sync; + leveldb::WriteOptions options; + options.disable_wal = disable_wal; + options.sync = sync; - CHECK_NOTNULL(db_); + CHECK_NOTNULL(db_); - leveldb::Status db_status = db_->Write(options, batch); - if (!db_status.ok()) { - LOG(ERROR) << "fail to batch write to tablet: " << tablet_path_ - << ", " << db_status.ToString(); - SetStatusCode(kIOError, status); - return false; - } - counter_.write_size.Add(batch->DataSize()); - row_write_bytes.Add(batch->DataSize()); - SetStatusCode(kTabletNodeOk, status); - return true; + leveldb::Status db_status = db_->Write(options, batch); + if (!db_status.ok()) { + LOG(ERROR) << "fail to batch write to tablet: " << tablet_path_ << ", " << db_status.ToString(); + SetStatusCode(kIOError, status); + return false; + } + counter_.write_size.Add(batch->DataSize()); + row_write_bytes.Add(batch->DataSize()); + SetStatusCode(kTabletNodeOk, status); + return true; } -bool TabletIO::WriteOne(const std::string& key, const std::string& value, - bool sync, StatusCode* status) { - leveldb::WriteBatch batch; - batch.Put(key, value); - return WriteBatch(&batch, false, sync, status); +bool TabletIO::WriteOne(const std::string& key, const std::string& value, bool sync, + StatusCode* status) { + leveldb::WriteBatch batch; + batch.Put(key, value); + return WriteBatch(&batch, false, sync, status); } bool TabletIO::Write(std::vector* row_mutation_vec, - std::vector* status_vec, bool is_instant, - WriteCallback callback, StatusCode* status) { - { - MutexLock lock(&mutex_); - if ((status_ != kReady && status_ != kUnLoading) || IsUrgentUnload()) { - if (status_ == kUnLoading2) { - // keep compatable for old sdk protocol - // we can remove this in the future. - SetStatusCode(kUnLoading, status); - } else { - SetStatusCode(status_, status); - } - return false; - } - db_ref_count_++; - } - bool ret = async_writer_->Write(row_mutation_vec, status_vec, is_instant, - callback, status); - if (!ret) { - counter_.write_reject_rows.Add(row_mutation_vec->size()); - } - - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return ret; + std::vector* status_vec, bool is_instant, WriteCallback callback, + StatusCode* status) { + { + MutexLock lock(&mutex_); + if ((status_ != kReady && status_ != kUnloading) || IsUrgentUnload()) { + if (status_ == kUnloading2) { + // keep compatable for old sdk protocol + // we can remove this in the future. + SetStatusCode(kUnloading, status); + } else { + SetStatusCode(status_, status); + } + return false; + } + db_ref_count_++; + } + bool ret = async_writer_->Write(row_mutation_vec, status_vec, is_instant, callback, status); + if (!ret) { + counter_.write_reject_rows.Add(row_mutation_vec->size()); + } + + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return ret; } -bool TabletIO::ScanRows(const ScanTabletRequest* request, - ScanTabletResponse* response, +bool TabletIO::ScanRows(const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - StatusCode status = kTabletNodeOk; - { - MutexLock lock(&mutex_); - if ((status_ != kReady && status_ != kUnLoading) || IsUrgentUnload()) { - if (status_ == kUnLoading2) { - // keep compatable for old sdk protocol - // we can remove this in the future. - SetStatusCode(kUnLoading, &status); - } else { - SetStatusCode(status_, &status); - } - response->set_status(status); - done->Run(); - return false; - } - db_ref_count_++; - } + StatusCode status = kTabletNodeOk; + { + MutexLock lock(&mutex_); + if ((status_ != kReady && status_ != kUnloading) || IsUrgentUnload()) { + if (status_ == kUnloading2) { + // keep compatable for old sdk protocol + // we can remove this in the future. + SetStatusCode(kUnloading, &status); + } else { + SetStatusCode(status_, &status); + } + response->set_status(status); + done->Run(); + return false; + } + db_ref_count_++; + } + + bool success = false; + // slide window of batchscan use unique rpc session + // so, has_session_id means batchscan + if (kv_only_ && !request->has_session_id()) { + success = ScanKvsRestricted(request, response, done); + } else if (request->has_session_id() && request->session_id() > 0) { + batch_scan_count.Inc(); + success = HandleScan(request, response, done); + } else { + sync_scan_count.Inc(); + success = ScanRowsRestricted(request, response, done); + } + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return success; +} + +bool TabletIO::ScanKvsRestricted(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done) { + bool ret = false; + ScanOption scan_option; + scan_option.set_snapshot_id(request->snapshot_id()); + scan_option.mutable_key_range()->set_key_start(request->start()); + scan_option.mutable_key_range()->set_key_end(request->end()); + if (request->has_buffer_limit()) { + scan_option.set_size_limit(request->buffer_limit()); + } else { + scan_option.set_size_limit(FLAGS_tera_tabletnode_scan_pack_max_size << 10); + } + scan_option.set_round_down(request->round_down()); + bool complete = false; + uint32_t read_row_count = 0; + uint32_t read_bytes = 0; + int64_t start_scan_us = get_micros(); + + StatusCode status = kTabletNodeOk; + if (Scan(scan_option, response->mutable_results()->mutable_key_values(), &read_row_count, + &read_bytes, &complete, &status)) { + response->set_complete(complete); + ret = true; + } + + counter_.scan_rows.Add(read_row_count); + counter_.scan_kvs.Add(read_row_count); + counter_.scan_size.Add(read_bytes); + row_scan_count.Add(read_row_count); + row_scan_bytes.Add(read_bytes); + row_scan_delay.Add(get_micros() - start_scan_us); + + response->set_data_size(read_bytes); + response->set_row_count(read_row_count); + response->set_cell_count(read_row_count); + + response->set_status(status); + done->Run(); + return ret; +} + +bool TabletIO::ScanRowsRestricted(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done) { + std::string start_tera_key; + std::string end_row_key; + SetupScanKey(request, &start_tera_key, &end_row_key); - bool success = false; - if (kv_only_) { - ScanOption scan_option; - scan_option.set_snapshot_id(request->snapshot_id()); - scan_option.mutable_key_range()->set_key_start(request->start()); - scan_option.mutable_key_range()->set_key_end(request->end()); - if (request->has_buffer_limit()) { - scan_option.set_size_limit(request->buffer_limit()); - } else { - scan_option.set_size_limit(FLAGS_tera_tabletnode_scan_pack_max_size << 10); - } - scan_option.set_round_down(request->round_down()); - bool complete = false; - success = Scan(scan_option, response->mutable_results()->mutable_key_values(), - &complete, &status); - if (success) { - response->set_complete(complete); - } - response->set_status(status); - done->Run(); - } else if (request->has_session_id() && request->session_id() > 0) { - batch_scan_count.Inc(); - success = HandleScan(request, response, done); - } else { - sync_scan_count.Inc(); - success = ScanRowsRestricted(request, response, done); - } - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return success; -} + ScanOptions scan_options; + SetupScanRowOptions(request, &scan_options); -bool TabletIO::ScanRowsRestricted(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done) { - std::string start_tera_key; - std::string end_row_key; - SetupScanInternalTeraKey(request, &start_tera_key, &end_row_key); + uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; + uint32_t read_bytes = 0; + bool complete = false; - ScanOptions scan_options; - SetupScanRowOptions(request, &scan_options); + StatusCode status = kTabletNodeOk; + bool ret = false; - uint32_t read_row_count = 0; - uint32_t read_bytes = 0; - bool is_complete = false; + int64_t start_scan_us = get_micros(); - StatusCode status = kTabletNodeOk; - bool ret = false; + if (LowLevelScan(start_tera_key, end_row_key, scan_options, response->mutable_results(), + response->mutable_next_start_point(), &read_row_count, &read_cell_count, + &read_bytes, &complete, &status)) { + response->set_complete(complete); + ret = true; + } - int64_t start_scan_us = get_micros(); + counter_.scan_rows.Add(read_row_count); + counter_.scan_kvs.Add(read_cell_count); + counter_.scan_size.Add(read_bytes); + row_scan_count.Add(read_row_count); + row_scan_bytes.Add(read_bytes); + row_scan_delay.Add(get_micros() - start_scan_us); - if (LowLevelScan(start_tera_key, end_row_key, scan_options, - response->mutable_results(), response->mutable_next_start_point(), - &read_row_count, &read_bytes, &is_complete, &status)) { - response->set_complete(is_complete); - counter_.scan_rows.Add(read_row_count); - counter_.scan_size.Add(read_bytes); - row_scan_count.Add(read_row_count); - row_scan_bytes.Add(read_bytes); - row_scan_delay.Add(get_micros() - start_scan_us); - ret = true; - } + response->set_data_size(read_bytes); + response->set_row_count(read_row_count); + response->set_cell_count(read_cell_count); - response->set_status(status); - done->Run(); - return ret; + response->set_status(status); + done->Run(); + return ret; } -bool TabletIO::HandleScan(const ScanTabletRequest* request, - ScanTabletResponse* response, +bool TabletIO::HandleScan(const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - // concurrency control, ensure only one scanner step init leveldb::Iterator - ScanContext* context = scan_context_manager_->GetScanContext(this, request, response, done); - if (context == NULL) { - return true; - } + // concurrency control, ensure only one scanner step init leveldb::Iterator + ScanContext* context = scan_context_manager_->GetScanContext(this, request, response, done); + if (context == NULL) { + return true; + } - // first rpc init iterator and scan parameter - if (context->it == NULL) { - SetupScanInternalTeraKey(request, &(context->start_tera_key), &(context->end_row_key)); - SetupScanRowOptions(request, &(context->scan_options)); - context->scan_options.is_batch_scan = true; - context->ret_code = InitedScanIterator(context->start_tera_key, context->end_row_key, - context->scan_options, &(context->it)); - context->compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); + // first rpc init iterator and scan parameter + if (context->it == NULL) { + SetupScanRowOptions(request, &(context->scan_options)); + context->scan_options.is_batch_scan = true; + // context->complete set false in GetScanContext() + SetupScanKey(request, &(context->start_tera_key), &(context->end_row_key)); + context->ret_code = InitScanIterator(context->start_tera_key, context->end_row_key, + context->scan_options, &(context->it)); + if (!kv_only_ || RawKeyType() == TTLKv) { + context->compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); } - // schedule scan context - return scan_context_manager_->ScheduleScanContext(context); + } + // schedule scan context + return scan_context_manager_->ScheduleScanContext(context); } void TabletIO::ProcessScan(ScanContext* context) { - uint32_t rows_scan_num = 0; - uint32_t size_scan_bytes = 0; - - int64_t start_scan_us = get_micros(); + uint32_t rows_scan_num = 0; + uint32_t cells_scan_num = 0; + uint32_t size_scan_bytes = 0; + int64_t start_scan_us = get_micros(); + + if (kv_only_) { + KvTableScan(context, &rows_scan_num, &size_scan_bytes); + context->data_size = size_scan_bytes; + context->cell_count = rows_scan_num; // if kv table, cell_count equal row_count + context->row_count = rows_scan_num; + counter_.scan_kvs.Add(rows_scan_num); + } else { + LowLevelScan(context->start_tera_key, context->end_row_key, context->scan_options, context->it, + context, context->result, NULL, &rows_scan_num, &cells_scan_num, &size_scan_bytes, + &context->complete, &context->ret_code); + context->data_size = size_scan_bytes; + context->cell_count = cells_scan_num; + context->row_count = rows_scan_num; + counter_.scan_kvs.Add(cells_scan_num); + } + counter_.scan_rows.Add(rows_scan_num); + counter_.scan_size.Add(size_scan_bytes); + row_scan_count.Add(rows_scan_num); + row_scan_bytes.Add(size_scan_bytes); + row_scan_delay.Add(get_micros() - start_scan_us); +} + +bool TabletIO::KvTableScan(ScanContext* scan_context, uint32_t* read_row_count, + uint32_t* read_bytes) { + std::string& start = scan_context->start_tera_key; + std::string& end = scan_context->end_row_key; + ScanOptions& scan_options = scan_context->scan_options; + bool& complete = scan_context->complete; + RowResult* values = scan_context->result; + StatusCode* status = &scan_context->ret_code; + leveldb::CompactStrategy* compact_strategy = scan_context->compact_strategy; + + values->clear_key_values(); + *read_row_count = 0; + *read_bytes = 0; + int64_t now_time = GetTimeStampInMs(); + int64_t time_out = now_time + scan_options.timeout; + VLOG(9) << "kv-scan timeout " << scan_options.timeout << ", max_size " << scan_options.max_size + << ", number_limit " << scan_options.number_limit << ", start_key " << DebugString(start) + << ", end_key " << DebugString(end); + + auto it = scan_context->it; + for (; it->Valid(); it->Next()) { + leveldb::Slice key = it->key(); + leveldb::Slice value = it->value(); + ++*read_row_count; + + VLOG(10) << "kv-scan: tablet=[" << tablet_path_ << "] key=[" << DebugString(key.ToString()) + << "] scan_row_count=[" << *read_row_count << "]" + << " read_bytes=[" << *read_bytes << "]"; - if (LowLevelScan(context->start_tera_key, context->end_row_key, - context->scan_options, context->it, context, - context->result, NULL, &rows_scan_num, &size_scan_bytes, - &context->complete, &context->ret_code)) { - counter_.scan_rows.Add(rows_scan_num); - counter_.scan_size.Add(size_scan_bytes); - row_scan_count.Add(rows_scan_num); - row_scan_bytes.Add(size_scan_bytes); - row_scan_delay.Add(get_micros() - start_scan_us); + if (RawKeyType() == TTLKv) { + complete = (!end.empty() && key_operator_->Compare(key, end) >= 0); + } else { + complete = (!end.empty() && key.compare(end) >= 0); } -} -bool TabletIO::Scan(const ScanOption& option, KeyValueList* kv_list, - bool* complete, StatusCode* status) { - - int64_t start_scan_us = get_micros(); - - std::string start = option.key_range().key_start(); - std::string end = option.key_range().key_end(); - if (start < start_key_) { - start = start_key_; - } - if (end.empty() || (!end_key_.empty() && end > end_key_)) { - end = end_key_; + now_time = GetTimeStampInMs(); + // 4 conditions: complete, timeout, max size, max row number + if (complete || now_time >= time_out || + (scan_options.max_size > 0 && *read_bytes >= scan_options.max_size) || + *read_row_count > scan_options.number_limit) { + break; } - bool noexist_end = false; - if (end.empty()) { - noexist_end = true; + if (compact_strategy && compact_strategy->ScanDrop(key, 0)) { + VLOG(10) << "[KV-Scan] key:[" << key.ToString() << "] Dropped."; + } else { + KeyValuePair* pair = values->add_key_values(); + if (RawKeyType() == TTLKv) { + pair->set_key(key.data(), key.size() - sizeof(int64_t)); + } else { + pair->set_key(key.data(), key.size()); + } + pair->set_value(value.data(), value.size()); + *read_bytes += pair->key().size() + pair->value().size(); + } + + if (db_->IsShutdown1Finished()) { + // return early on waiting_for_shutdown2_, igrone rows haven't scan + TABLET_UNLOAD_LOG << "break scan kv table before iterator next"; + *status = kKeyNotInRange; + return false; + } + } + + if (!it->Valid()) { + complete = true; + } + if (!it->Valid() && !(it->status().ok())) { + SetStatusCode(it->status(), status); + VLOG(10) << "kv-scan fail: " + << "tablet=[" << tablet_path_ << "], " + << "status=[" << StatusCodeToString(*status) << "]"; + return false; + } + SetStatusCode(kTabletNodeOk, status); + return true; +} + +bool TabletIO::Scan(const ScanOption& option, KeyValueList* kv_list, uint32_t* read_row_count, + uint32_t* read_bytes, bool* complete, StatusCode* status) { + std::string start = option.key_range().key_start(); + std::string end = option.key_range().key_end(); + if (start < start_key_) { + start = start_key_; + } + if (end.empty() || (!end_key_.empty() && end > end_key_)) { + end = end_key_; + } + + // TTL-KV : key_operator_::Compare会解RawKey([row_key | expire_timestamp]) + // 因此传递给Leveldb的Key一定要保证以expire_timestamp结尾. + std::unique_ptr strategy(nullptr); + if (RawKeyType() == TTLKv) { + if (!start.empty()) { + std::string start_key; + key_operator_->EncodeTeraKey(start, "", "", 0, leveldb::TKT_FORSEEK, &start_key); + start.swap(start_key); + } + if (!end.empty()) { + std::string end_key; + key_operator_->EncodeTeraKey(end, "", "", 0, leveldb::TKT_FORSEEK, &end_key); + end.swap(end_key); + } + strategy.reset(ldb_options_.compact_strategy_factory->NewInstance()); + } + + *read_row_count = 0; + *read_bytes = 0; + + uint64_t snapshot_id = option.snapshot_id(); + leveldb::ReadOptions read_option(&ldb_options_); + read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; + if (snapshot_id != 0 && !SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { + *status = kSnapshotNotExist; + return false; + } + read_option.rollbacks = rollbacks_; + + std::unique_ptr it(db_->NewIterator(read_option)); + if (it->status().IsShutdownInProgress()) { + TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and " + "return kKeyNotInRange"; + *status = kKeyNotInRange; + return false; + } + + it->Seek(start); + // round down is just for internal meta scan + if (option.round_down()) { + if (it->Valid() && key_operator_->Compare(it->key(), start) > 0) { + it->Prev(); + if (!it->Valid()) { + it->SeekToFirst(); + } + } else if (!it->Valid()) { + it->SeekToLast(); + } + } + + int64_t pack_size = 0; + for (; it->Valid(); it->Next()) { + leveldb::Slice key = it->key(); + leveldb::Slice value = it->value(); + *read_bytes += it->key().size() + it->value().size(); + *read_row_count += 1; + if (RawKeyType() == TTLKv) { // only compare row key + *complete = (!end.empty() && key_operator_->Compare(key, end) >= 0); + } else { + *complete = (!end.empty() && key.compare(end) >= 0); } - int64_t pack_size = 0; - uint64_t snapshot_id = option.snapshot_id(); - leveldb::ReadOptions read_option(&ldb_options_); - read_option.verify_checksums = FLAGS_tera_leveldb_verify_checksums; - if (snapshot_id != 0) { - if (!SnapshotIDToSeq(snapshot_id, &read_option.snapshot)) { - *status = kSnapshotNotExist; - return false; - } + if (*complete || (option.size_limit() > 0 && pack_size > option.size_limit())) { + break; } - read_option.rollbacks = rollbacks_; - std::unique_ptr it(db_->NewIterator(read_option)); - if (it->status().IsShutdownInProgress()) { - TABLET_UNLOAD_LOG << "on waiting_for_shutdown2_ new a ErrorIterator, and return kKeyNotInRange"; - *status = kKeyNotInRange; - return false; - } + if (strategy && strategy->ScanDrop(key, 0)) { + VLOG(10) << "[KV-Scan] key:[" << key.ToString() << "] Dropped."; + } else { + KeyValuePair* pair = kv_list->Add(); + if (RawKeyType() == TTLKv) { + pair->set_key(key.data(), key.size() - sizeof(int64_t)); + } else { + pair->set_key(key.data(), key.size()); + } + pair->set_value(value.data(), value.size()); + pack_size += pair->key().size() + pair->value().size(); + } + + if (db_->IsShutdown1Finished()) { + // return early on waiting_for_shutdown2_, igrone rows haven't scan + TABLET_UNLOAD_LOG << "break scan kv before iterator next"; + *status = kKeyNotInRange; + return false; + } + } + if (!it->Valid()) { + *complete = true; + } + + return true; +} + +void TabletIO::SetupScanKey(const ScanTabletRequest* request, std::string* start_tera_key, + std::string* end_row_key) { + std::string start_key = request->start(); + if (start_key.empty() || start_key < start_key_) { + start_key = start_key_; + } + *end_row_key = request->end(); + if (end_row_key->empty() || (!end_key_.empty() && *end_row_key > end_key_)) { + *end_row_key = end_key_; + } + + if (kv_only_) { // TTL-KV : key_operator_::Compare会解RawKey([row_key | expire_timestamp]) // 因此传递给Leveldb的Key一定要保证以expire_timestamp结尾. - std::unique_ptr strategy(nullptr); if (RawKeyType() == TTLKv) { - if (!start.empty()) { - std::string start_key; - key_operator_->EncodeTeraKey(start, "", "", 0, leveldb::TKT_FORSEEK, &start_key); - start.swap(start_key); - } - if (!end.empty()) { - std::string end_key; - key_operator_->EncodeTeraKey(end, "", "", 0, leveldb::TKT_FORSEEK, &end_key); - end.swap(end_key); - } - strategy.reset(ldb_options_.compact_strategy_factory->NewInstance()); - } - - it->Seek(start); - if (option.round_down()) { - if (it->Valid() && key_operator_->Compare(it->key(), start) > 0) { - it->Prev(); - if (!it->Valid()) { - it->SeekToFirst(); - } - } else if (!it->Valid()) { - it->SeekToLast(); - } - } - for (; it->Valid(); it->Next()) { - leveldb::Slice key = it->key(); - leveldb::Slice value = it->value(); - if (RawKeyType() == TTLKv) { - // only compare row key - *complete = (!noexist_end && key_operator_->Compare(key, end) >= 0); - } else { - *complete = (!noexist_end && key.compare(end) >= 0); - } - if (*complete || (option.size_limit() > 0 && pack_size > option.size_limit())) { - break; - } else { - if (!(strategy && strategy->ScanDrop(key, 0))) { - KeyValuePair* pair = kv_list->Add(); - if (RawKeyType() == TTLKv) { - pair->set_key(key.data(), key.size() - sizeof(int64_t)); - } else { - pair->set_key(key.data(), key.size()); - } - pair->set_value(value.data(), value.size()); - pack_size += pair->key().size() + pair->value().size(); - } else { - VLOG(10) << "[KV-Scan] key:[" << key.ToString() << "] Dropped."; - } - } - if (db_->IsShutdown1Finished()) { - // return early on waiting_for_shutdown2_ - // igrone haven't scan versions of this qualifier - TABLET_UNLOAD_LOG << "break scan kv before iterator next"; - *status = kKeyNotInRange; - return false; - } - } - if (!it->Valid()) { - *complete = true; - } - - counter_.scan_rows.Add(kv_list->size()); - counter_.scan_size.Add(pack_size); - row_scan_count.Add(kv_list->size()); - row_scan_bytes.Add(pack_size); - row_scan_delay.Add(get_micros() - start_scan_us); - - return true; -} - -void TabletIO::SetupScanInternalTeraKey(const ScanTabletRequest* request, - std::string* start_tera_key, - std::string* end_row_key) { + if (!start_key.empty()) { + std::string start_ttlkv_key; + key_operator_->EncodeTeraKey(start_key, "", "", 0, leveldb::TKT_FORSEEK, &start_ttlkv_key); + start_key.swap(start_ttlkv_key); + } + if (!end_row_key->empty()) { + std::string end_ttlkv_key; + key_operator_->EncodeTeraKey(*end_row_key, "", "", 0, leveldb::TKT_FORSEEK, &end_ttlkv_key); + end_row_key->swap(end_ttlkv_key); + } + } + *start_tera_key = start_key; + } else { bool has_cf = request->has_start_family(); bool has_qualifier = (has_cf && request->has_start_qualifier()); // bool has_ts = (has_qualifier && request->has_start_timestamp()); bool has_ts = request->has_start_timestamp(); + key_operator_->EncodeTeraKey(start_key, has_cf ? request->start_family() : "", + has_qualifier ? request->start_qualifier() : "", + has_ts ? request->start_timestamp() : kLatestTs, + leveldb::TKT_VALUE, start_tera_key); + } +} - std::string start_key = request->start(); - *end_row_key = request->end(); - if (start_key == "" || start_key < start_key_) { - start_key = start_key_; - } - if (*end_row_key == "" || (end_key_ != "" && *end_row_key > end_key_)) { - *end_row_key = end_key_; - } - - key_operator_->EncodeTeraKey(start_key, - has_cf ? request->start_family() : "", - has_qualifier ? request->start_qualifier() : "", - has_ts ? request->start_timestamp() : kLatestTs, - leveldb::TKT_VALUE, - start_tera_key); -} - -void TabletIO::SetupScanRowOptions(const ScanTabletRequest* request, - ScanOptions* scan_options) { - scan_options->max_size = 65536; - for (int32_t i = 0; i < request->cf_list_size(); ++i) { - const ColumnFamily& column_family = request->cf_list(i); - const std::string& column_family_name = column_family.family_name(); - std::set& qualifier_list = - scan_options->column_family_list[column_family_name]; - qualifier_list.clear(); - for (int32_t j = 0; j < column_family.qualifier_list_size(); ++j) { - qualifier_list.insert(column_family.qualifier_list(j)); - } - scan_options->iter_cf_set.insert(column_family_name); - } +void TabletIO::AddFilterCfs(filter::ColumnSet& filter_column_set, std::set* cf_set) { + for (auto& filter_column : filter_column_set) { + cf_set->insert(filter_column.first); + } +} - if (request->has_filter_list() && - request->filter_list().filter_size() > 0) { - scan_options->filter_list.CopyFrom(request->filter_list()); - } - if (scan_options->iter_cf_set.size() > 0 && - scan_options->filter_list.filter_size() > 0) { - ScanFilter scan_filter(scan_options->filter_list); - scan_filter.GetAllCfs(&scan_options->iter_cf_set); - } - if (request->has_max_version()) { - scan_options->max_versions = request->max_version(); - } - if (request->has_max_qualifiers()) { - scan_options->max_qualifiers = request->max_qualifiers(); +bool TabletIO::IsValidOldFilter(const Filter& old_filter_desc) { + if (!old_filter_desc.has_type() || old_filter_desc.type() != BinComp) { + LOG(ERROR) << "only support compare."; + return false; + } + if (!old_filter_desc.has_field() || old_filter_desc.field() != ValueFilter) { + LOG(ERROR) << "only support value-compare."; + return false; + } + if (!old_filter_desc.has_value_type()) { + LOG(ERROR) << "only support int64 value."; + return false; + } + if (!old_filter_desc.has_content()) { + return false; + } + if (!old_filter_desc.has_ref_value()) { + return false; + } + if (!old_filter_desc.has_bin_comp_op()) { + return false; + } + return true; +} + +bool TabletIO::TransFilter(const FilterList& old_filter_list_desc, ScanOptions* scan_options) { + filter::FilterListPtr new_filter_list = + std::make_shared(filter::FilterList::kAnd); + for (int i = 0; i < old_filter_list_desc.filter_size(); ++i) { + const Filter& old_filter_desc = old_filter_list_desc.filter(i); + if (!IsValidOldFilter(old_filter_desc)) { + return false; + } + int64_t ref_value = *(int64_t*)(const_cast(old_filter_desc.ref_value().c_str())); + filter::IntegerComparatorPtr comparator = + std::make_shared(filter::IntegerValueType::kInt64, ref_value); + filter::CompareOperator op; + if (!TransBinCompOp(old_filter_desc.bin_comp_op(), &op)) { + return false; + } + filter::ValueFilterPtr value_filter = std::make_shared(op, comparator); + value_filter->SetColumnFamily(old_filter_desc.content()); + new_filter_list->AddFilter(value_filter); + } + scan_options->filter = new_filter_list; + return true; +} + +bool TabletIO::SetupFilter(const filter::FilterDesc& filter_desc, ScanOptions* scan_options) { + filter::FilterDesc::FilterType filter_type = filter_desc.type(); + switch (filter_type) { + case filter::FilterDesc::kFilterList: + scan_options->filter = std::make_shared(); + break; + case filter::FilterDesc::kValueFilter: + scan_options->filter = std::make_shared(); + break; + default: + scan_options->filter.reset(); + LOG(WARNING) << "not support filter type"; + break; + } + if (scan_options->filter) { + int ret = scan_options->filter->ParseFrom(filter_desc.serialized_filter()); + if (ret) { + return true; } else { - scan_options->max_qualifiers = std::numeric_limits::max(); - } - if (request->has_timerange()) { - scan_options->ts_start = request->timerange().ts_start(); - scan_options->ts_end = request->timerange().ts_end(); - } - if (request->has_buffer_limit()) { - scan_options->max_size = request->buffer_limit(); - } - if (request->has_number_limit() && (request->number_limit() > 0)) { - scan_options->number_limit = request->number_limit(); - } - if (request->timeout()) { - scan_options->timeout = request->timeout(); - } - scan_options->snapshot_id = request->snapshot_id(); + LOG(WARNING) << "parse filter failed"; + scan_options->filter.reset(); + } + } + return false; +} + +void TabletIO::SetupScanRowOptions(const ScanTabletRequest* request, ScanOptions* scan_options) { + scan_options->max_size = 65536; + for (int32_t i = 0; i < request->cf_list_size(); ++i) { + const ColumnFamily& column_family = request->cf_list(i); + const std::string& column_family_name = column_family.family_name(); + std::set& qualifier_list = scan_options->column_family_list[column_family_name]; + qualifier_list.clear(); + for (int32_t j = 0; j < column_family.qualifier_list_size(); ++j) { + qualifier_list.insert(column_family.qualifier_list(j)); + } + scan_options->iter_cf_set.insert(column_family_name); + } + + int ret = 0; + if (request->has_filter_list() && request->filter_list().filter_size() > 0) { + ret = TransFilter(request->filter_list(), scan_options); + } else if (request->has_filter()) { + ret = SetupFilter(request->filter(), scan_options); + } + if (scan_options->iter_cf_set.size() > 0 && ret) { + scan_options->filter->GetAllColumn(&scan_options->filter_column_set); + AddFilterCfs(scan_options->filter_column_set, &scan_options->iter_cf_set); + } + if (request->has_max_version()) { + scan_options->max_versions = request->max_version(); + } + if (request->has_max_qualifiers()) { + scan_options->max_qualifiers = request->max_qualifiers(); + } else { + scan_options->max_qualifiers = std::numeric_limits::max(); + } + if (request->has_timerange()) { + scan_options->ts_start = request->timerange().ts_start(); + scan_options->ts_end = request->timerange().ts_end(); + } + if (request->has_buffer_limit()) { + scan_options->max_size = request->buffer_limit(); + } + if (request->has_number_limit() && (request->number_limit() > 0)) { + scan_options->number_limit = request->number_limit(); + } + if (request->timeout()) { + scan_options->timeout = request->timeout(); + } + scan_options->snapshot_id = request->snapshot_id(); } // no concurrent, so no lock on schema_mutex_ void TabletIO::SetupOptionsForLG(const std::set& ignore_err_lgs) { - if (kv_only_) { - if (RawKeyType() == TTLKv) { - ldb_options_.compact_strategy_factory = - new KvCompactStrategyFactory(table_schema_); - } else { - ldb_options_.compact_strategy_factory = - new leveldb::DummyCompactStrategyFactory(); - } - } else if (FLAGS_tera_leveldb_compact_strategy == "default") { - // default strategy - ldb_options_.compact_strategy_factory = - new DefaultCompactStrategyFactory(table_schema_); + if (kv_only_) { + if (RawKeyType() == TTLKv) { + ldb_options_.compact_strategy_factory = new KvCompactStrategyFactory(table_schema_); } else { - ldb_options_.compact_strategy_factory = - new leveldb::DummyCompactStrategyFactory(); + ldb_options_.compact_strategy_factory = new leveldb::DummyCompactStrategyFactory(); } + } else if (FLAGS_tera_leveldb_compact_strategy == "default") { + // default strategy + ldb_options_.compact_strategy_factory = new DefaultCompactStrategyFactory(table_schema_); + } else { + ldb_options_.compact_strategy_factory = new leveldb::DummyCompactStrategyFactory(); + } - std::set* exist_lg_list = new std::set; - std::map* lg_info_list = - new std::map; - std::set ignore_corruption_in_open_lg_list; + std::set* exist_lg_list = new std::set; + std::map* lg_info_list = new std::map; + std::set ignore_corruption_in_open_lg_list; - int64_t triggered_log_size = 0; - for (int32_t lg_i = 0; lg_i < table_schema_.locality_groups_size(); - ++lg_i) { - if (table_schema_.locality_groups(lg_i).is_del()) { - continue; - } - const LocalityGroupSchema& lg_schema = - table_schema_.locality_groups(lg_i); - bool compress = lg_schema.compress_type(); - StoreMedium store = lg_schema.store_type(); - - leveldb::LG_info* lg_info = new leveldb::LG_info(lg_schema.id()); - - if (mock_env_ != NULL) { - // for testing - LOG(INFO) << "mock env used"; - lg_info->env = LeveldbMockEnv(); - } else if (store == MemoryStore) { - if (FLAGS_tera_use_flash_for_memenv) { - if (FLAGS_tera_tabletnode_flash_block_cache_enabled) { - LOG(INFO) << "MemLG[" << lg_i << "] activate FlashBlockCache"; - lg_info->env = DefaultFlashBlockCacheEnv(); - } else { - lg_info->env = LeveldbFlashEnv(); - } - } else { - lg_info->env = LeveldbMemEnv(); - } - lg_info->seek_latency = 0; - lg_info->block_cache = m_memory_cache; - } else if (store == FlashStore) { - if (FLAGS_tera_tabletnode_flash_block_cache_enabled) { - LOG(INFO) << "FlashLG[" << lg_i << "] activate FlashBlockCache"; - lg_info->env = DefaultFlashBlockCacheEnv(); - } else { - lg_info->env = LeveldbFlashEnv(); - lg_info->use_direct_io_read = FLAGS_tera_leveldb_use_direct_io_read; - lg_info->use_direct_io_write = FLAGS_tera_leveldb_use_direct_io_write; - lg_info->posix_write_buffer_size = FLAGS_tera_leveldb_posix_write_buffer_size; - } - lg_info->seek_latency = FLAGS_tera_leveldb_env_local_seek_latency; - } else { - lg_info->env = LeveldbBaseEnv(); - lg_info->seek_latency = FLAGS_tera_leveldb_env_dfs_seek_latency; - } + int64_t triggered_log_size = 0; + for (int32_t lg_i = 0; lg_i < table_schema_.locality_groups_size(); ++lg_i) { + if (table_schema_.locality_groups(lg_i).is_del()) { + continue; + } + const LocalityGroupSchema& lg_schema = table_schema_.locality_groups(lg_i); + bool compress = lg_schema.compress_type(); + StoreMedium store = lg_schema.store_type(); - if (compress) { - lg_info->compression = leveldb::kSnappyCompression; - } + leveldb::LG_info* lg_info = new leveldb::LG_info(lg_schema.id()); + lg_info->memtable_shard_num = ldb_options_.memtable_shard_num; - lg_info->block_size = lg_schema.block_size() * 1024; - if (lg_schema.use_memtable_on_leveldb()) { - lg_info->use_memtable_on_leveldb = true; - lg_info->memtable_ldb_write_buffer_size = - lg_schema.memtable_ldb_write_buffer_size() * 1024; - lg_info->memtable_ldb_block_size = - lg_schema.memtable_ldb_block_size() * 1024; - LOG(INFO) << "enable mem-ldb for LG:" << lg_schema.name().c_str() - << ", buffer_size:" << lg_info->memtable_ldb_write_buffer_size - << ", block_size:" << lg_info->memtable_ldb_block_size; - } - lg_info->sst_size = lg_schema.sst_size(); - // FLAGS_tera_tablet_write_buffer_size is the max buffer size - int64_t max_size = FLAGS_tera_tablet_max_write_buffer_size * 1024 * 1024; - if (lg_schema.sst_size() * 4 < max_size) { - lg_info->write_buffer_size = lg_schema.sst_size() * 4; - } else { - lg_info->write_buffer_size = max_size; - } - triggered_log_size += lg_info->write_buffer_size; - lg_info->table_builder_batch_write = (FLAGS_tera_leveldb_table_builder_write_batch_size > 0); - lg_info->table_builder_batch_size = FLAGS_tera_leveldb_table_builder_write_batch_size; - exist_lg_list->insert(lg_i); - (*lg_info_list)[lg_i] = lg_info; - if (ignore_err_lgs.find(lg_schema.name()) != ignore_err_lgs.end()) { - ignore_corruption_in_open_lg_list.insert(lg_i); - } - } if (mock_env_ != NULL) { - ldb_options_.env = LeveldbMockEnv(); + // for testing + LOG(INFO) << "mock env used"; + lg_info->env = LeveldbMockEnv(); + } else if (store == MemoryStore) { + if (FLAGS_tera_use_flash_for_memenv) { + if (!GetCachePaths().empty()) { + if (FLAGS_tera_enable_persistent_cache) { + lg_info->env = LeveldbBaseEnv(); + auto s = GetPersistentCache(&lg_info->persistent_cache); + assert(s.ok()); + } else { + lg_info->env = LeveldbFlashEnv(); + if (!lg_info->env) { + lg_info->env = LeveldbBaseEnv(); + } + } + } + } else { + lg_info->env = LeveldbMemEnv(); + } + lg_info->seek_latency = 0; + lg_info->block_cache = m_memory_cache; + } else if (store == FlashStore && !GetCachePaths().empty()) { + if (FLAGS_tera_enable_persistent_cache) { + lg_info->env = LeveldbBaseEnv(); + auto s = GetPersistentCache(&lg_info->persistent_cache); + assert(s.ok()); + } else { + lg_info->env = LeveldbFlashEnv(); + if (!lg_info->env) { + lg_info->env = LeveldbBaseEnv(); + } else { + lg_info->use_direct_io_read = FLAGS_tera_leveldb_use_direct_io_read; + lg_info->use_direct_io_write = FLAGS_tera_leveldb_use_direct_io_write; + lg_info->posix_write_buffer_size = FLAGS_tera_leveldb_posix_write_buffer_size; + } + } + if (lg_info->persistent_cache || lg_info->env == LeveldbFlashEnv()) { + lg_info->seek_latency = FLAGS_tera_leveldb_env_local_seek_latency; + } } else { - ldb_options_.env = LeveldbBaseEnv(); + lg_info->env = LeveldbBaseEnv(); + lg_info->seek_latency = FLAGS_tera_leveldb_env_dfs_seek_latency; } - if (exist_lg_list->size() == 0) { - delete exist_lg_list; - } else { - ldb_options_.exist_lg_list = exist_lg_list; - ldb_options_.flush_triggered_log_size = triggered_log_size * 2; - } - if (lg_info_list->size() == 0) { - delete lg_info_list; - } else { - ldb_options_.lg_info_list = lg_info_list; - ldb_options_.ignore_corruption_in_open_lg_list - = ignore_corruption_in_open_lg_list; + if (compress) { + lg_info->compression = leveldb::kSnappyCompression; } - IndexingCfToLG(); + lg_info->block_size = lg_schema.block_size() * 1024; + if (lg_schema.use_memtable_on_leveldb()) { + lg_info->use_memtable_on_leveldb = true; + lg_info->memtable_ldb_write_buffer_size = lg_schema.memtable_ldb_write_buffer_size() * 1024; + lg_info->memtable_ldb_block_size = lg_schema.memtable_ldb_block_size() * 1024; + LOG(INFO) << "enable mem-ldb for LG:" << lg_schema.name().c_str() + << ", buffer_size:" << lg_info->memtable_ldb_write_buffer_size + << ", block_size:" << lg_info->memtable_ldb_block_size; + } + lg_info->sst_size = lg_schema.sst_size(); + // FLAGS_tera_tablet_write_buffer_size is the max buffer size + int64_t max_size = FLAGS_tera_tablet_max_write_buffer_size * 1024 * 1024; + if (lg_schema.sst_size() * 4 < max_size) { + lg_info->write_buffer_size = lg_schema.sst_size() * 4; + } else { + lg_info->write_buffer_size = max_size; + } + triggered_log_size += lg_info->write_buffer_size; + lg_info->table_builder_batch_write = (FLAGS_tera_leveldb_table_builder_write_batch_size > 0); + lg_info->table_builder_batch_size = FLAGS_tera_leveldb_table_builder_write_batch_size; + exist_lg_list->insert(lg_i); + (*lg_info_list)[lg_i] = lg_info; + if (ignore_err_lgs.find(lg_schema.name()) != ignore_err_lgs.end()) { + ignore_corruption_in_open_lg_list.insert(lg_i); + } + } + if (mock_env_ != NULL) { + ldb_options_.env = LeveldbMockEnv(); + } else { + ldb_options_.env = LeveldbBaseEnv(); + } + + if (exist_lg_list->size() == 0) { + delete exist_lg_list; + } else { + ldb_options_.exist_lg_list = exist_lg_list; + ldb_options_.flush_triggered_log_size = triggered_log_size * 2; + } + if (lg_info_list->size() == 0) { + delete lg_info_list; + } else { + ldb_options_.lg_info_list = lg_info_list; + ldb_options_.ignore_corruption_in_open_lg_list = ignore_corruption_in_open_lg_list; + } + + IndexingCfToLG(); } void TabletIO::TearDownOptionsForLG() { - if (ldb_options_.compact_strategy_factory) { - delete ldb_options_.compact_strategy_factory; - ldb_options_.compact_strategy_factory = NULL; - } + if (ldb_options_.compact_strategy_factory) { + delete ldb_options_.compact_strategy_factory; + ldb_options_.compact_strategy_factory = NULL; + } - if (ldb_options_.exist_lg_list) { - ldb_options_.exist_lg_list->clear(); - delete ldb_options_.exist_lg_list; - ldb_options_.exist_lg_list = NULL; - } + if (ldb_options_.exist_lg_list) { + ldb_options_.exist_lg_list->clear(); + delete ldb_options_.exist_lg_list; + ldb_options_.exist_lg_list = NULL; + } - if (ldb_options_.lg_info_list) { - std::map::iterator it = - ldb_options_.lg_info_list->begin(); - for (; it != ldb_options_.lg_info_list->end(); ++it) { - delete it->second; - } - delete ldb_options_.lg_info_list; - ldb_options_.lg_info_list = NULL; + if (ldb_options_.lg_info_list) { + std::map::iterator it = ldb_options_.lg_info_list->begin(); + for (; it != ldb_options_.lg_info_list->end(); ++it) { + delete it->second; } + delete ldb_options_.lg_info_list; + ldb_options_.lg_info_list = NULL; + } } void TabletIO::IndexingCfToLG() { - for (int32_t i = 0; i < table_schema_.locality_groups_size(); ++i) { - const LocalityGroupSchema& lg_schema = - table_schema_.locality_groups(i); - lg_id_map_[lg_schema.name()] = i; // lg_schema.id(); - } - for (int32_t i = 0; i < table_schema_.column_families_size(); ++i) { - const ColumnFamilySchema& cf_schema = - table_schema_.column_families(i); - - std::map::iterator it = - lg_id_map_.find(cf_schema.locality_group()); - if (it == lg_id_map_.end()) { - // using default lg for not-defined descor - cf_lg_map_[cf_schema.name()] = 0; - } else { - cf_lg_map_[cf_schema.name()] = it->second; - } + for (int32_t i = 0; i < table_schema_.locality_groups_size(); ++i) { + const LocalityGroupSchema& lg_schema = table_schema_.locality_groups(i); + lg_id_map_[lg_schema.name()] = i; // lg_schema.id(); + } + for (int32_t i = 0; i < table_schema_.column_families_size(); ++i) { + const ColumnFamilySchema& cf_schema = table_schema_.column_families(i); + + std::map::iterator it = lg_id_map_.find(cf_schema.locality_group()); + if (it == lg_id_map_.end()) { + // using default lg for not-defined descor + cf_lg_map_[cf_schema.name()] = 0; + } else { + cf_lg_map_[cf_schema.name()] = it->second; } + } } void TabletIO::SetupIteratorOptions(const ScanOptions& scan_options, leveldb::ReadOptions* leveldb_opts) { - MutexLock lock(&schema_mutex_); - std::set target_lgs; - std::set::const_iterator cf_it = scan_options.iter_cf_set.begin(); - for (; cf_it != scan_options.iter_cf_set.end(); ++cf_it) { - std::map::iterator map_it = - cf_lg_map_.find(*cf_it); - if (map_it != cf_lg_map_.end()) { - target_lgs.insert(map_it->second); - } - } - if (target_lgs.size() > 0) { - leveldb_opts->target_lgs = new std::set(target_lgs); + MutexLock lock(&schema_mutex_); + std::set target_lgs; + std::set::const_iterator cf_it = scan_options.iter_cf_set.begin(); + for (; cf_it != scan_options.iter_cf_set.end(); ++cf_it) { + std::map::iterator map_it = cf_lg_map_.find(*cf_it); + if (map_it != cf_lg_map_.end()) { + target_lgs.insert(map_it->second); } + } + if (target_lgs.size() > 0) { + leveldb_opts->target_lgs = new std::set(target_lgs); + } + leveldb_opts->enable_dfs_read_thread_limiter = scan_options.enable_dfs_read_thread_limiter; } void TabletIO::SetupSingleRowIteratorOptions(const std::string& row_key, leveldb::ReadOptions* opts) { - std::string row_start_key, row_end_key; - key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, - leveldb::TKT_FORSEEK, &row_start_key); - key_operator_->EncodeTeraKey(row_key + '\0', "", "", kLatestTs, - leveldb::TKT_FORSEEK, &row_end_key); - opts->read_single_row = true; - opts->row_start_key = row_start_key; - opts->row_end_key = row_end_key; + std::string row_start_key, row_end_key; + key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, leveldb::TKT_FORSEEK, &row_start_key); + + if (RawKeyType() == Readable) { + key_operator_->EncodeTeraKey(row_key + '\1', "", "", kLatestTs, leveldb::TKT_FORSEEK, + &row_end_key); + } else { + key_operator_->EncodeTeraKey(row_key + '\0', "", "", kLatestTs, leveldb::TKT_FORSEEK, + &row_end_key); + } + + opts->read_single_row = true; + opts->row_start_key = row_start_key; + opts->row_end_key = row_end_key; } void TabletIO::TearDownIteratorOptions(leveldb::ReadOptions* opts) { - if (opts->target_lgs) { - delete opts->target_lgs; - opts->target_lgs = NULL; - } + if (opts->target_lgs) { + delete opts->target_lgs; + opts->target_lgs = NULL; + } } -bool TabletIO::ShouldFilterRowBuffer(std::list& row_buf, +bool TabletIO::ShouldFilterRowBuffer(const SingleRowBuffer& row_buf, const ScanOptions& scan_options) { - if (row_buf.size() <= 0) { - return true; - } - std::list::iterator it; - int filter_num = scan_options.filter_list.filter_size(); - - VLOG(10) << "Filter check: kv_num: " << row_buf.size() - << ", filter_num: " << filter_num; - - for (int i = 0; i < filter_num; ++i) { - const Filter& filter = scan_options.filter_list.filter(i); - for (it = row_buf.begin(); it != row_buf.end(); ++it) { - if (it->column_family() != filter.content()) { - continue; - } - if (filter.value_type() != kINT64) { - LOG(ERROR) << "only support int64 value."; - return true; - } - if (!CheckCell(*it, filter)) { - return true; - } - } - } + if (row_buf.Size() <= 0) { + return true; + } + if (!scan_options.filter) { return false; -} - -void TabletIO::ProcessRowBuffer(std::list& row_buf, - const ScanOptions& scan_options, - RowResult* value_list, - uint32_t* buffer_size, - int64_t* number_limit) { - if (row_buf.size() <= 0) { - return; - } - if (ShouldFilterRowBuffer(row_buf, scan_options)) { - return; - } - - std::list::iterator it; - for (it = row_buf.begin(); it != row_buf.end(); ++it) { - const std::string& key = it->key(); - const std::string& col = it->column_family(); - const std::string& qual = it->qualifier(); - const std::string& value = it->value(); - int64_t ts = it->timestamp(); - - // skip unnecessary columns and qualifiers - if (scan_options.column_family_list.size() > 0) { - ColumnFamilyMap::const_iterator it = - scan_options.column_family_list.find(col); - if (it != scan_options.column_family_list.end()) { - const std::set& qual_list = it->second; - if (qual_list.size() > 0 && qual_list.end() == qual_list.find(qual)) { - continue; - } - } else { - continue; - } - } - // time range filter - if (ts < scan_options.ts_start || ts > scan_options.ts_end) { - continue; - } - - value_list->add_key_values()->CopyFrom(*it); - - (*number_limit)++; - *buffer_size += key.size() + col.size() + qual.size() - + sizeof(ts) + value.size(); - } -} - -uint64_t TabletIO::GetSnapshot(uint64_t id, uint64_t snapshot_sequence, - StatusCode* status) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - SetStatusCode(status_, status); - return 0; - } - db_ref_count_++; - } - uint64_t snapshot = db_->GetSnapshot(snapshot_sequence); + } + scan_options.filter->Reset(); + for (size_t kv_index = 0; kv_index != row_buf.Size(); ++kv_index) { + filter::Filter::ReturnCode rc = scan_options.filter->FilterCell( + row_buf.ColumnFamily(kv_index), row_buf.Qualifier(kv_index), row_buf.Value(kv_index)); + if (rc == filter::Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + if (scan_options.filter->FilterRow()) { + return true; + } else { + return false; + } +} + +void TabletIO::ProcessRowBuffer(const SingleRowBuffer& row_buf, const ScanOptions& scan_options, + RowResult* values, uint32_t* buffer_size, int64_t* number_limit) { + if (row_buf.Size() <= 0) { + return; + } + if (ShouldFilterRowBuffer(row_buf, scan_options)) { + scan_filter_count.Add(row_buf.Size()); + return; + } + + for (size_t kv_index = 0; kv_index != row_buf.Size(); ++kv_index) { + const std::string& key = row_buf.RowKey(kv_index); + const std::string& col = row_buf.ColumnFamily(kv_index); + const std::string& qual = row_buf.Qualifier(kv_index); + const std::string& value = row_buf.Value(kv_index); + int64_t ts = row_buf.TimeStamp(kv_index); + + // skip unnecessary columns and qualifiers + if (scan_options.column_family_list.size() > 0) { + ColumnFamilyMap::const_iterator it = scan_options.column_family_list.find(col); + if (it != scan_options.column_family_list.end()) { + const std::set& qual_list = it->second; + if (qual_list.size() > 0 && qual_list.end() == qual_list.find(qual)) { + scan_filter_count.Inc(); + continue; + } + } else { + scan_filter_count.Inc(); + continue; + } + } + // time range filter + if (ts < scan_options.ts_start || ts > scan_options.ts_end) { + scan_filter_count.Inc(); + continue; + } + + (*number_limit)++; + *buffer_size += key.size() + col.size() + qual.size() + sizeof(ts) + value.size(); + + row_buf.Serialize(kv_index, values->add_key_values()); + } +} + +uint64_t TabletIO::GetSnapshot(uint64_t id, uint64_t snapshot_sequence, StatusCode* status) { + { MutexLock lock(&mutex_); - id_to_snapshot_num_[id] = snapshot; - db_ref_count_--; - return snapshot; -} - -bool TabletIO::ReleaseSnapshot(uint64_t snapshot_id, StatusCode* status) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - SetStatusCode(status_, status); - return false; - } - if (id_to_snapshot_num_.find(snapshot_id) == id_to_snapshot_num_.end()) { - SetStatusCode(kSnapshotNotExist, status); - return false; - } - db_ref_count_++; + if (status_ != kReady) { + SetStatusCode(status_, status); + return 0; } - db_->ReleaseSnapshot(id_to_snapshot_num_[snapshot_id]); - MutexLock lock(&mutex_); - id_to_snapshot_num_.erase(snapshot_id); - db_ref_count_--; - return true; + db_ref_count_++; + } + uint64_t snapshot = db_->GetSnapshot(snapshot_sequence); + MutexLock lock(&mutex_); + id_to_snapshot_num_[id] = snapshot; + db_ref_count_--; + return snapshot; } -void TabletIO::ListSnapshot(std::vector* snapshot_id) { +bool TabletIO::ReleaseSnapshot(uint64_t snapshot_id, StatusCode* status) { + { MutexLock lock(&mutex_); if (status_ != kReady) { - return; + SetStatusCode(status_, status); + return false; } - for (std::map::iterator it = id_to_snapshot_num_.begin(); - it != id_to_snapshot_num_.end(); ++it) { - snapshot_id->push_back(it->first); - VLOG(7) << tablet_path_ << " ListSnapshot: " << it->first << " - " << it->second; + if (id_to_snapshot_num_.find(snapshot_id) == id_to_snapshot_num_.end()) { + SetStatusCode(kSnapshotNotExist, status); + return false; } + db_ref_count_++; + } + db_->ReleaseSnapshot(id_to_snapshot_num_[snapshot_id]); + MutexLock lock(&mutex_); + id_to_snapshot_num_.erase(snapshot_id); + db_ref_count_--; + return true; +} + +void TabletIO::ListSnapshot(std::vector* snapshot_id) { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return; + } + for (std::map::iterator it = id_to_snapshot_num_.begin(); + it != id_to_snapshot_num_.end(); ++it) { + snapshot_id->push_back(it->first); + VLOG(7) << tablet_path_ << " ListSnapshot: " << it->first << " - " << it->second; + } } uint64_t TabletIO::Rollback(uint64_t snapshot_id, StatusCode* status) { - uint64_t sequence; - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - SetStatusCode(status_, status); - return false; - } - std::map::iterator it = id_to_snapshot_num_.find(snapshot_id); - if (it == id_to_snapshot_num_.end()) { - SetStatusCode(kSnapshotNotExist, status); - return false; - } else { - sequence = it->second; - } - db_ref_count_++; - } - uint64_t rollback_point = db_->Rollback(sequence); + uint64_t sequence; + { MutexLock lock(&mutex_); - rollbacks_[sequence] = rollback_point; - db_ref_count_--; - return rollback_point; + if (status_ != kReady) { + SetStatusCode(status_, status); + return false; + } + std::map::iterator it = id_to_snapshot_num_.find(snapshot_id); + if (it == id_to_snapshot_num_.end()) { + SetStatusCode(kSnapshotNotExist, status); + return false; + } else { + sequence = it->second; + } + db_ref_count_++; + } + uint64_t rollback_point = db_->Rollback(sequence); + MutexLock lock(&mutex_); + rollbacks_[sequence] = rollback_point; + db_ref_count_--; + return rollback_point; } uint32_t TabletIO::GetLGidByCFName(const std::string& cfname) { - MutexLock lock(&schema_mutex_); - std::map::iterator it = cf_lg_map_.find(cfname); - if (it != cf_lg_map_.end()) { - return it->second; - } - return 0; + MutexLock lock(&schema_mutex_); + std::map::iterator it = cf_lg_map_.find(cfname); + if (it != cf_lg_map_.end()) { + return it->second; + } + return 0; } void TabletIO::SetStatus(TabletStatus status) { - MutexLock lock(&mutex_); - status_ = status; + MutexLock lock(&mutex_); + status_ = status; } TabletIO::TabletStatus TabletIO::GetStatus() { - MutexLock lock(&mutex_); - return status_; + MutexLock lock(&mutex_); + return status_; } - std::string TabletIO::GetLastErrorMessage() { - MutexLock lock(&mutex_); - return last_err_msg_; + MutexLock lock(&mutex_); + return last_err_msg_; } -const leveldb::RawKeyOperator* TabletIO::GetRawKeyOperator() { - return key_operator_; -} +const leveldb::RawKeyOperator* TabletIO::GetRawKeyOperator() { return key_operator_; } int32_t TabletIO::AddRef() { - MutexLock lock(&mutex_); - ++ref_count_; - return ref_count_; + MutexLock lock(&mutex_); + ++ref_count_; + return ref_count_; } int32_t TabletIO::DecRef() { - int32_t ref = 0; - { - MutexLock lock(&mutex_); - ref = (--ref_count_); - } - if (ref == 0) { - delete this; - } - return ref; + int32_t ref = 0; + { + MutexLock lock(&mutex_); + ref = (--ref_count_); + } + if (ref == 0) { + delete this; + } + return ref; } -int32_t TabletIO::GetRef() const { - return ref_count_; -} +int32_t TabletIO::GetRef() const { return ref_count_; } void TabletIO::ApplySchema(const TableSchema& schema) { - MutexLock lock(&schema_mutex_); - SetSchema(schema); - IndexingCfToLG(); - ldb_options_.compact_strategy_factory->SetArg(&schema); + MutexLock lock(&schema_mutex_); + SetSchema(schema); + IndexingCfToLG(); + ldb_options_.compact_strategy_factory->SetArg(&schema); } bool TabletIO::SingleRowTxnCheck(const std::string& row_key, - const SingleRowTxnReadInfo& txn_read_info, - StatusCode* status) { - // init scan_options - ScanOptions scan_options; - for (int32_t i = 0; i < txn_read_info.read_column_list_size(); ++i) { - const ColumnFamily& column_info = txn_read_info.read_column_list(i); - std::set& qualifier_list = - scan_options.column_family_list[column_info.family_name()]; - for (int32_t j = 0; j < column_info.qualifier_list_size(); ++j) { - qualifier_list.insert(column_info.qualifier_list(j)); - } - scan_options.iter_cf_set.insert(column_info.family_name()); - } - scan_options.max_versions = txn_read_info.max_versions(); - if (txn_read_info.has_start_timestamp()) { - scan_options.ts_start = txn_read_info.start_timestamp(); - } - if (txn_read_info.has_end_timestamp()) { - scan_options.ts_end = txn_read_info.end_timestamp(); - } - - // read the row - std::string start_tera_key; - key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, - leveldb::TKT_VALUE, &start_tera_key); - std::string end_row_key = row_key + '\0'; - RowResult row_result; - uint32_t read_row_count = 0; - uint32_t read_bytes = 0; - bool is_complete = false; - if (!LowLevelScan(start_tera_key, end_row_key, scan_options, - &row_result, NULL, &read_row_count, &read_bytes, - &is_complete, status)) { - return false; - } + const SingleRowTxnReadInfo& txn_read_info, StatusCode* status) { + // init scan_options + ScanOptions scan_options; + for (int32_t i = 0; i < txn_read_info.read_column_list_size(); ++i) { + const ColumnFamily& column_info = txn_read_info.read_column_list(i); + std::set& qualifier_list = + scan_options.column_family_list[column_info.family_name()]; + for (int32_t j = 0; j < column_info.qualifier_list_size(); ++j) { + qualifier_list.insert(column_info.qualifier_list(j)); + } + scan_options.iter_cf_set.insert(column_info.family_name()); + } + scan_options.max_versions = txn_read_info.max_versions(); + if (txn_read_info.has_start_timestamp()) { + scan_options.ts_start = txn_read_info.start_timestamp(); + } + if (txn_read_info.has_end_timestamp()) { + scan_options.ts_end = txn_read_info.end_timestamp(); + } + + // read the row + std::string start_tera_key; + key_operator_->EncodeTeraKey(row_key, "", "", kLatestTs, leveldb::TKT_VALUE, &start_tera_key); + std::string end_row_key = row_key + '\0'; + RowResult row_result; + uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; + uint32_t read_bytes = 0; + bool complete = false; + if (!LowLevelScan(start_tera_key, end_row_key, scan_options, &row_result, NULL, &read_row_count, + &read_cell_count, &read_bytes, &complete, status)) { + return false; + } - // verify value_list against txn_read_info - if (row_result.key_values_size() != txn_read_info.read_result().key_values_size()) { - SetStatusCode(kTxnFail, status); - return false; - } - // older sdk's write request has no start_timestamp/end_timestamp/value - bool has_timestamp = txn_read_info.has_start_timestamp() || txn_read_info.has_end_timestamp(); - for (int32_t i = 0; i < row_result.key_values_size(); ++i) { - const KeyValuePair& new_kv = row_result.key_values(i); - const KeyValuePair& old_kv = txn_read_info.read_result().key_values(i); - if (new_kv.column_family() != old_kv.column_family() - || new_kv.qualifier() != old_kv.qualifier() - || new_kv.timestamp() != old_kv.timestamp() - || (has_timestamp && new_kv.value() != old_kv.value())) { - SetStatusCode(kTxnFail, status); - return false; - } - } - return true; + // verify values against txn_read_info + if (row_result.key_values_size() != txn_read_info.read_result().key_values_size()) { + LOG(WARNING) << "[stxn] [kTxnFail] " << row_result.key_values_size() << ":" + << txn_read_info.read_result().key_values_size(); + SetStatusCode(kTxnFail, status); + return false; + } + // older sdk's write request has no start_timestamp/end_timestamp/value + bool has_timestamp = txn_read_info.has_start_timestamp() || txn_read_info.has_end_timestamp(); + for (int32_t i = 0; i < row_result.key_values_size(); ++i) { + const KeyValuePair& new_kv = row_result.key_values(i); + const KeyValuePair& old_kv = txn_read_info.read_result().key_values(i); + if (new_kv.column_family() != old_kv.column_family() || + new_kv.qualifier() != old_kv.qualifier() || new_kv.timestamp() != old_kv.timestamp() || + (has_timestamp && new_kv.value() != old_kv.value())) { + LOG(WARNING) << "[stxn] [kTxnFail] ColumnFamily=" << new_kv.column_family() << ":" + << old_kv.column_family() << " Qualifier=" << new_kv.qualifier() << ":" + << old_kv.qualifier() << " Timestamp=" << new_kv.timestamp() << ":" + << old_kv.timestamp() << " " << has_timestamp + << "Value=" << (new_kv.value() != old_kv.value()); + SetStatusCode(kTxnFail, status); + return false; + } + } + return true; } bool TabletIO::RefreshDBStatus() { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return false; - } - db_ref_count_++; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; } + db_ref_count_++; + } - std::string db_property_key = "leveldb.verify-db-integrity"; - std::string db_property_val; - if (db_->GetProperty(db_property_key, &db_property_val)) { - MutexLock lock(&mutex_); - if (db_property_val.find("verify_fail") != std::string::npos) { - tablet_status_ = TabletMeta::kTabletCorruption; - LOG(WARNING) << "db status: " << db_property_val; - } else { - tablet_status_ = static_cast(kTabletReady); - } + std::string db_property_key = "leveldb.verify-db-integrity"; + std::string db_property_val; + if (db_->GetProperty(db_property_key, &db_property_val)) { + MutexLock lock(&mutex_); + if (db_property_val.find("verify_fail") != std::string::npos) { + tablet_status_ = TabletMeta::kTabletCorruption; + LOG(WARNING) << "db status: " << db_property_val; + } else if (db_property_val.find("manifest_error") != std::string::npos) { + tablet_status_ = TabletMeta::kTabletManifestError; + LOG(WARNING) << "db status: " << db_property_val; + } else { + tablet_status_ = static_cast(kTabletReady); } + } - { - MutexLock lock(&mutex_); - db_ref_count_--; - } - return true; + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return true; } void TabletIO::GetDBStatus(tera::TabletMeta::TabletStatus* tablet_status) { - *tablet_status = static_cast(kTabletReady); - MutexLock lock(&mutex_); - if (status_ == kReady) { - *tablet_status = tablet_status_; - } + *tablet_status = static_cast(kTabletReady); + MutexLock lock(&mutex_); + if (status_ == kReady) { + *tablet_status = tablet_status_; + } } void TabletIO::CheckBackgroundError(std::string* error_msg) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return; - } - db_ref_count_++; - } - - std::string db_property_key = "leveldb.compaction_error"; - std::string db_property_val; - if (db_->GetProperty(db_property_key, &db_property_val) && - db_property_val.find("Corruption: ") != std::string::npos) { - - if (db_property_val.length() > kReportErrorSize) { - LOG(ERROR) << "Find compaction error too much for report, " - << db_property_val; - db_property_val = "Too much error for report, check ts log."; - } - *error_msg = db_property_val; - } - { - MutexLock lock(&mutex_); - db_ref_count_--; - } + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return; + } + db_ref_count_++; + } + + std::string db_property_key = "leveldb.compaction_error"; + std::string db_property_val; + if (db_->GetProperty(db_property_key, &db_property_val) && + db_property_val.find("Corruption: ") != std::string::npos) { + if (db_property_val.length() > kReportErrorSize) { + LOG(ERROR) << "Find compaction error too much for report, " << db_property_val; + db_property_val = "Too much error for report, check ts log."; + } + *error_msg = db_property_val; + } + { + MutexLock lock(&mutex_); + db_ref_count_--; + } } bool TabletIO::IsUrgentUnload() const { - return try_unload_count_ >= FLAGS_tera_tablet_unload_count_limit; + return try_unload_count_ >= FLAGS_tera_tablet_unload_count_limit; } bool TabletIO::GetDBLevelSize(std::vector* result) { - { - MutexLock lock(&mutex_); - if (status_ != kReady) { - return false; - } - db_ref_count_++; - } - db_->GetCurrentLevelSize(result); - { - MutexLock lock(&mutex_); - db_ref_count_--; + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; } - return true; - + db_ref_count_++; + } + db_->GetCurrentLevelSize(result); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return true; } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/tablet_io.h b/src/io/tablet_io.h index da523d97b..4cacbb736 100644 --- a/src/io/tablet_io.h +++ b/src/io/tablet_io.h @@ -29,6 +29,7 @@ #include "proto/tabletnode_rpc.pb.h" #include "types.h" #include "common/counter.h" +#include "leveldb/include/leveldb/tera_key.h" namespace tera { @@ -51,294 +52,352 @@ class TabletWriter; struct ScanOptions; struct ScanContext; class ScanContextManager; +class SingleRowBuffer; std::string MetricLabelToString(const std::string& tablet_path); class TabletIO { -public: - enum CompactionType { - kManualCompaction = 1, - kMinorCompaction = 2, - }; - - enum TabletStatus { - kNotInit = kTabletNotInit, - kReady = kTabletReady, - kOnLoad = kTabletOnLoad, - kUnLoading = kTabletUnLoading, - kUnLoading2 = kTabletUnLoading2 - }; - - struct StatCounter { - const std::string label; - tera::MetricCounter low_read_cell; - tera::MetricCounter scan_rows; - tera::MetricCounter scan_kvs; - tera::MetricCounter scan_size; - tera::MetricCounter read_rows; - tera::MetricCounter read_kvs; - tera::MetricCounter read_size; - tera::MetricCounter write_rows; - tera::MetricCounter write_kvs; - tera::MetricCounter write_size; - tera::MetricCounter write_reject_rows; - - StatCounter(const std::string& tablet_path) - : label(MetricLabelToString(tablet_path)), - low_read_cell(tera::kLowReadCellMetricName, label, {SubscriberType::QPS}), - scan_rows(tera::kScanRowsMetricName, label, {SubscriberType::QPS}), - scan_kvs(tera::kScanKvsMetricName, label, {SubscriberType::QPS}), - scan_size(tera::kScanThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), - read_rows(tera::kReadRowsMetricName, label, {SubscriberType::QPS}), - read_kvs(tera::kReadKvsMetricName, label, {SubscriberType::QPS}), - read_size(tera::kReadThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), - write_rows(tera::kWriteRowsMetricName, label, {SubscriberType::QPS}), - write_kvs(tera::kWriteKvsMetricName, label, {SubscriberType::QPS}), - write_size(tera::kWriteThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), - write_reject_rows(tera::kWriteRejectRowsMetricName, label, {SubscriberType::QPS}) {} - }; - - typedef std::function*, - std::vector*)> WriteCallback; - - friend std::ostream& operator << (std::ostream& o, const TabletIO& tablet_io); - -public: - TabletIO(const std::string& key_start, const std::string& key_end, - const std::string& path); - virtual ~TabletIO(); - - // for testing - void SetMockEnv(leveldb::Env* e); - - std::string GetTableName() const; - std::string GetTablePath() const; - std::string GetStartKey() const; - std::string GetEndKey() const; - const std::string& GetMetricLabel() const; - virtual CompactStatus GetCompactStatus() const; - virtual TableSchema GetSchema() const; - RawKey RawKeyType() const; - bool KvOnly() const { return kv_only_; } - StatCounter& GetCounter(); - // Set independent cache for memory table. - void SetMemoryCache(leveldb::Cache* cache); - // tablet - virtual bool Load(const TableSchema& schema, - const std::string& path, - const std::vector& parent_tablets, - const std::set& ignore_err_lgs, - leveldb::Logger* logger = NULL, - leveldb::Cache* block_cache = NULL, - leveldb::TableCache* table_cache = NULL, - StatusCode* status = NULL); - virtual bool Unload(StatusCode* status = NULL); - virtual bool Split(std::string* split_key, StatusCode* status = NULL); - virtual bool Compact(int lg_no = -1, StatusCode* status = NULL, CompactionType type = kManualCompaction); - bool Destroy(StatusCode* status = NULL); - virtual bool GetDataSize(uint64_t* size, std::vector* lgsize = NULL, - StatusCode* status = NULL); - virtual bool AddInheritedLiveFiles(std::vector >* live); - bool GetDBLevelSize(std::vector*); - - bool IsBusy(); - bool Workload(double* write_workload); - - bool SnapshotIDToSeq(uint64_t snapshot_id, uint64_t* snapshot_sequence); - - virtual bool Read(const leveldb::Slice& key, std::string* value, - uint64_t snapshot_id = 0, StatusCode* status = NULL); - - // read a row - virtual bool ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, - uint64_t snapshot_id = 0, StatusCode* status = NULL, - int64_t timeout_ms = std::numeric_limits::max()); - /// scan from leveldb return ture means complete flase means not complete - bool LowLevelScan(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - RowResult* value_list, - KeyValuePair* next_start_point, - uint32_t* read_row_count, - uint32_t* read_bytes, - bool* is_complete, - StatusCode* status = NULL); - - bool LowLevelSeek(const std::string& row_key, const ScanOptions& scan_options, - RowResult* value_list, StatusCode* status = NULL); - - bool WriteOne(const std::string& key, const std::string& value, - bool sync = true, StatusCode* status = NULL); - bool WriteBatch(leveldb::WriteBatch* batch, bool disable_wal = false, bool sync = true, + public: + enum CompactionType { + kManualCompaction = 1, + kMinorCompaction = 2, + }; + + enum TabletStatus { + kNotInit = kTabletNotInit, + kReady = kTabletReady, + kOnLoad = kTabletOnLoad, + kUnloading = kTabletUnloading, + kUnloading2 = kTabletUnloading2 + }; + + struct StatCounter { + const std::string label; + tera::MetricCounter low_read_cell; + tera::MetricCounter scan_rows; + tera::MetricCounter scan_kvs; + tera::MetricCounter scan_size; + tera::MetricCounter read_rows; + tera::MetricCounter read_kvs; + tera::MetricCounter read_size; + tera::MetricCounter write_rows; + tera::MetricCounter write_kvs; + tera::MetricCounter write_size; + tera::MetricCounter write_reject_rows; + + StatCounter(const std::string& tablet_path) + : label(MetricLabelToString(tablet_path)), + low_read_cell(tera::kLowReadCellMetricName, label, {SubscriberType::QPS}), + scan_rows(tera::kScanRowsMetricName, label, {SubscriberType::QPS}), + scan_kvs(tera::kScanKvsMetricName, label, {SubscriberType::QPS}), + scan_size(tera::kScanThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + read_rows(tera::kReadRowsMetricName, label, {SubscriberType::QPS}), + read_kvs(tera::kReadKvsMetricName, label, {SubscriberType::QPS}), + read_size(tera::kReadThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + write_rows(tera::kWriteRowsMetricName, label, {SubscriberType::QPS}), + write_kvs(tera::kWriteKvsMetricName, label, {SubscriberType::QPS}), + write_size(tera::kWriteThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + write_reject_rows(tera::kWriteRejectRowsMetricName, label, {SubscriberType::QPS}) {} + }; + + typedef std::function*, std::vector*)> + WriteCallback; + + friend std::ostream& operator<<(std::ostream& o, const TabletIO& tablet_io); + + public: + TabletIO(const std::string& key_start, const std::string& key_end, const std::string& path, + int64_t ctime, uint64_t version); + TabletIO(const std::string& key_start, const std::string& key_end, const std::string& path); + + virtual ~TabletIO(); + + // for testing + void SetMockEnv(leveldb::Env* e); + + std::string GetTableName() const; + std::string GetTablePath() const; + std::string GetStartKey() const; + std::string GetEndKey() const; + int64_t CreateTime() const { return ctime_; } + uint64_t Version() const { return version_; } + + const std::string& GetMetricLabel() const; + virtual CompactStatus GetCompactStatus() const; + virtual TableSchema GetSchema() const; + RawKey RawKeyType() const; + bool KvOnly() const { return kv_only_; } + StatCounter& GetCounter(); + // Set independent cache for memory table. + void SetMemoryCache(leveldb::Cache* cache); + // tablet + virtual bool Load(const TableSchema& schema, const std::string& path, + const std::vector& parent_tablets, + const std::set& ignore_err_lgs, leveldb::Logger* logger = NULL, + leveldb::Cache* block_cache = NULL, leveldb::TableCache* table_cache = NULL, StatusCode* status = NULL); - bool Write(std::vector* row_mutation_vec, - std::vector* status_vec, bool is_instant, - WriteCallback callback, StatusCode* status = NULL); + virtual bool Unload(StatusCode* status = NULL); + virtual bool Split(std::string* split_key, StatusCode* status = NULL); + virtual bool Compact(int lg_no = -1, StatusCode* status = NULL, + CompactionType type = kManualCompaction); + bool Destroy(StatusCode* status = NULL); + virtual bool GetDataSize(uint64_t* size, std::vector* lgsize = NULL, + uint64_t* mem_table_size = NULL, StatusCode* status = NULL); + virtual bool AddInheritedLiveFiles(std::vector >* live); + bool GetDBLevelSize(std::vector*); + + bool IsBusy(); + bool Workload(double* write_workload); + + bool SnapshotIDToSeq(uint64_t snapshot_id, uint64_t* snapshot_sequence); + + virtual bool Read(const leveldb::Slice& key, std::string* value, uint64_t snapshot_id = 0, + StatusCode* status = NULL); + + // read a row + virtual bool ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, + uint64_t snapshot_id = 0, StatusCode* status = NULL, + int64_t timeout_ms = std::numeric_limits::max()); + /// scan from leveldb return ture means complete flase means not complete + bool LowLevelScan(const std::string& start_tera_key, const std::string& end_row_key, + const ScanOptions& scan_options, RowResult* value_list, + KeyValuePair* next_start_point, uint32_t* read_row_count, + uint32_t* read_cell_count, uint32_t* read_bytes, bool* is_complete, + StatusCode* status = NULL); + + bool LowLevelSeek(const std::string& row_key, const ScanOptions& scan_options, + RowResult* value_list, StatusCode* status = NULL); + + bool WriteOne(const std::string& key, const std::string& value, bool sync = true, + StatusCode* status = NULL); + bool WriteBatch(leveldb::WriteBatch* batch, bool disable_wal = false, bool sync = true, + StatusCode* status = NULL); + bool Write(std::vector* row_mutation_vec, + std::vector* status_vec, bool is_instant, WriteCallback callback, + StatusCode* status = NULL); + + bool ScanKvsRestricted(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done); + + virtual bool Scan(const ScanOption& option, KeyValueList* kv_list, uint32_t* read_row_count, + uint32_t* read_bytes, bool* complete, StatusCode* status = NULL); + + virtual bool ScanRows(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done); + + uint64_t GetSnapshot(uint64_t id, uint64_t snapshot_sequence, StatusCode* status = NULL); + bool ReleaseSnapshot(uint64_t snapshot_id, StatusCode* status = NULL); + void ListSnapshot(std::vector* snapshot_id); + + uint64_t Rollback(uint64_t snapshot_id, StatusCode* status); + + uint32_t GetLGidByCFName(const std::string& cfname); + + const leveldb::RawKeyOperator* GetRawKeyOperator(); + + void SetStatus(TabletStatus status); + TabletStatus GetStatus(); + + std::string GetLastErrorMessage(); + + int32_t AddRef(); + int32_t DecRef(); + int32_t GetRef() const; + + static bool FindAverageKey(const std::string& start, const std::string& end, std::string* res); + void ProcessScan(ScanContext* context); + void ApplySchema(const TableSchema& schema); - virtual bool Scan(const ScanOption& option, KeyValueList* kv_list, - bool* complete, StatusCode* status = NULL); + bool ShouldForceUnloadOnError(); - virtual bool ScanRows(const ScanTabletRequest* request, - ScanTabletResponse* response, + // generate a db status snapshot + // verify-db-integrity maybe spend more time + bool RefreshDBStatus(); + + // alwarys get a db status snapshot + void GetDBStatus(tera::TabletMeta::TabletStatus* tablet_status); + + void CheckBackgroundError(std::string* bg_error_str); + + private: + friend class TabletWriter; + friend class ScanConextManager; + bool WriteWithoutLock(const std::string& key, const std::string& value, bool sync = false, + StatusCode* status = NULL); + // int64_t GetDataSizeWithoutLock(StatusCode* status = NULL); + + void SetupOptionsForLG(const std::set& ignore_err_lgs); + void TearDownOptionsForLG(); + void IndexingCfToLG(); + + void SetupIteratorOptions(const ScanOptions& scan_options, leveldb::ReadOptions* leveldb_opts); + void SetupSingleRowIteratorOptions(const std::string& row_key, leveldb::ReadOptions* opts); + void TearDownIteratorOptions(leveldb::ReadOptions* opts); + + void ProcessRowBuffer(const SingleRowBuffer& row_buf, const ScanOptions& scan_options, + RowResult* value_list, uint32_t* buffer_size, int64_t* number_limit); + + StatusCode InitScanIterator(const std::string& start_tera_key, const std::string& end_row_key, + const ScanOptions& scan_options, leveldb::Iterator** scan_it); + + bool ScanRowsRestricted(const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done); + // tablet scanner + bool HandleScan(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done); - uint64_t GetSnapshot(uint64_t id, uint64_t snapshot_sequence, - StatusCode* status = NULL); - bool ReleaseSnapshot(uint64_t snapshot_id, StatusCode* status = NULL); - void ListSnapshot(std::vector* snapshot_id); - - uint64_t Rollback(uint64_t snapshot_id, StatusCode* status); - - uint32_t GetLGidByCFName(const std::string& cfname); - - const leveldb::RawKeyOperator* GetRawKeyOperator(); - - void SetStatus(TabletStatus status); - TabletStatus GetStatus(); - - std::string GetLastErrorMessage(); - - int32_t AddRef(); - int32_t DecRef(); - int32_t GetRef() const; - - static bool FindAverageKey(const std::string& start, const std::string& end, - std::string* res); - void ProcessScan(ScanContext* context); - void ApplySchema(const TableSchema& schema); - - bool ShouldForceUnloadOnError(); - - // generate a db status snapshot - // verify-db-integrity maybe spend more time - bool RefreshDBStatus(); - - // alwarys get a db status snapshot - void GetDBStatus(tera::TabletMeta::TabletStatus* tablet_status); - - void CheckBackgroundError(std::string* bg_error_str); - -private: - friend class TabletWriter; - friend class ScanConextManager; - bool WriteWithoutLock(const std::string& key, const std::string& value, - bool sync = false, StatusCode* status = NULL); -// int64_t GetDataSizeWithoutLock(StatusCode* status = NULL); - - void SetupOptionsForLG(const std::set& ignore_err_lgs); - void TearDownOptionsForLG(); - void IndexingCfToLG(); - - void SetupIteratorOptions(const ScanOptions& scan_options, - leveldb::ReadOptions* leveldb_opts); - void SetupSingleRowIteratorOptions(const std::string& row_key, - leveldb::ReadOptions* opts); - void TearDownIteratorOptions(leveldb::ReadOptions* opts); - - void ProcessRowBuffer(std::list& row_buf, - const ScanOptions& scan_options, - RowResult* value_list, - uint32_t* buffer_size, - int64_t* number_limit); - - StatusCode InitedScanIterator(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - leveldb::Iterator** scan_it); - - bool ScanRowsRestricted(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done); - // tablet scanner - bool HandleScan(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done); - - void SetupScanInternalTeraKey(const ScanTabletRequest* request, - std::string* start_tera_key, - std::string* end_row_key); - void SetupScanRowOptions(const ScanTabletRequest* request, - ScanOptions* scan_options); - - bool LowLevelScan(const std::string& start_tera_key, - const std::string& end_row_key, - const ScanOptions& scan_options, - leveldb::Iterator* it, - ScanContext* scan_context, - RowResult* value_list, - KeyValuePair* next_start_point, - uint32_t* read_row_count, - uint32_t* read_bytes, - bool* is_complete, - StatusCode* status); - - void MakeKvPair(leveldb::Slice key, leveldb::Slice col, leveldb::Slice qual, - int64_t ts, leveldb::Slice value, KeyValuePair* kv); - - bool ParseRowKey(const std::string& tera_key, std::string* row_key); - bool ShouldFilterRowBuffer(std::list& row_buf, - const ScanOptions& scan_options); - - bool ScanWithFilter(const ScanOptions& scan_options); - bool IsCompleteRow(const std::list& row_buf, + void SetupScanKey(const ScanTabletRequest* request, std::string* start_tera_key, + std::string* end_row_key); + void SetupScanRowOptions(const ScanTabletRequest* request, ScanOptions* scan_options); + + bool KvTableScan(ScanContext* scan_context, uint32_t* read_row_count, uint32_t* read_bytes); + + bool LowLevelScan(const std::string& start_tera_key, const std::string& end_row_key, + const ScanOptions& scan_options, leveldb::Iterator* it, + ScanContext* scan_context, RowResult* value_list, + KeyValuePair* next_start_point, uint32_t* read_row_count, + uint32_t* read_cell_count, uint32_t* read_bytes, bool* is_complete, + StatusCode* status); + + void MakeKvPair(leveldb::Slice key, leveldb::Slice col, leveldb::Slice qual, int64_t ts, + leveldb::Slice value, KeyValuePair* kv); + + bool ParseRowKey(const std::string& tera_key, std::string* row_key); + bool ShouldFilterRowBuffer(const SingleRowBuffer& row_buf, const ScanOptions& scan_options); + + bool ScanWithFilter(const ScanOptions& scan_options); + bool IsCompleteRow(const SingleRowBuffer& row_buf, leveldb::Iterator* it); + bool ShouldFilterRow(const ScanOptions& scan_options, const SingleRowBuffer& row_buf, leveldb::Iterator* it); - bool ShouldFilterRow(const ScanOptions& scan_options, - const std::list& row_buf, - leveldb::Iterator* it); - void GotoNextRow(const std::list& row_buf, - leveldb::Iterator* it, - KeyValuePair* next); - void SetSchema(const TableSchema& schema); - - bool SingleRowTxnCheck(const std::string& row_key, - const SingleRowTxnReadInfo& txn_read_info, - StatusCode* status); - - bool IsUrgentUnload() const; - -private: - mutable Mutex mutex_; - TabletWriter* async_writer_; - ScanContextManager* scan_context_manager_; - - std::string tablet_path_; - const std::string start_key_; - const std::string end_key_; - const std::string short_path_; - std::string raw_start_key_; - std::string raw_end_key_; - CompactStatus compact_status_; - - TabletStatus status_; - tera::TabletMeta::TabletStatus tablet_status_; // check wether db corruption - std::string last_err_msg_; - volatile int32_t ref_count_; - volatile int32_t db_ref_count_; - leveldb::Options ldb_options_; - leveldb::DB* db_; - leveldb::Cache* m_memory_cache; - TableSchema table_schema_; - bool kv_only_; - std::map id_to_snapshot_num_; - std::map rollbacks_; - - const leveldb::RawKeyOperator* key_operator_; - - std::map cf_lg_map_; - std::map lg_id_map_; - - // accept unload request for this tablet will inc this count - std::atomic try_unload_count_; - StatCounter counter_; - mutable Mutex schema_mutex_; - - leveldb::Env* mock_env_; // mock env for testing + void GotoNextRow(const SingleRowBuffer& row_buf, leveldb::Iterator* it, KeyValuePair* next); + void SetSchema(const TableSchema& schema); + + bool SingleRowTxnCheck(const std::string& row_key, const SingleRowTxnReadInfo& txn_read_info, + StatusCode* status); + + bool IsUrgentUnload() const; + void AddFilterCfs(filter::ColumnSet& filter_column_set, std::set* cf_set); + bool SetupFilter(const filter::FilterDesc& filter_desc, ScanOptions* scan_options); + bool IsValidOldFilter(const Filter& old_filter_desc); + bool TransFilter(const FilterList& old_filter_list_desc, ScanOptions* scan_options); + + private: + mutable Mutex mutex_; + TabletWriter* async_writer_; + ScanContextManager* scan_context_manager_; + + std::string tablet_path_; + const std::string start_key_; + const std::string end_key_; + const int64_t ctime_; + const uint64_t version_; + const std::string short_path_; + std::string raw_start_key_; + std::string raw_end_key_; + CompactStatus compact_status_; + + TabletStatus status_; + tera::TabletMeta::TabletStatus tablet_status_; // check wether db corruption + std::string last_err_msg_; + volatile int32_t ref_count_; + volatile int32_t db_ref_count_; + leveldb::Options ldb_options_; + leveldb::DB* db_; + leveldb::Cache* m_memory_cache; + TableSchema table_schema_; + bool kv_only_; + std::map id_to_snapshot_num_; + std::map rollbacks_; + + const leveldb::RawKeyOperator* key_operator_; + + std::map cf_lg_map_; + std::map lg_id_map_; + + // accept unload request for this tablet will inc this count + std::atomic try_unload_count_; + StatCounter counter_; + mutable Mutex schema_mutex_; + + leveldb::Env* mock_env_; // mock env for testing +}; + +class SingleRowBuffer { + public: + // Never copied or assigned + SingleRowBuffer() = default; + SingleRowBuffer(const SingleRowBuffer&) = delete; + SingleRowBuffer& operator=(const SingleRowBuffer&) = delete; + + const std::string& RowKey(size_t index) const { + assert(index < row_buf_.size()); + return *row_buf_[index].row_key; + } + + const std::string& ColumnFamily(size_t index) const { + assert(index < row_buf_.size()); + return *row_buf_[index].column_family; + } + + const std::string& Qualifier(size_t index) const { + assert(index < row_buf_.size()); + return *row_buf_[index].qualifier; + } + + const std::string& Value(size_t index) const { + assert(index < row_buf_.size()); + return *row_buf_[index].value; + } + + int64_t TimeStamp(size_t index) const { + assert(index < row_buf_.size()); + return row_buf_[index].timestamp; + } + + void Add(const leveldb::Slice& row_key, const leveldb::Slice& column_family, + const leveldb::Slice& qualifier, const leveldb::Slice& value, int64_t timestamp) { + row_buf_.emplace_back(row_key, column_family, qualifier, value, timestamp); + } + + void Clear() { row_buf_.clear(); } + + size_t Size() const { return row_buf_.size(); } + + void Serialize(size_t index, KeyValuePair* kv) const { + assert(index < row_buf_.size()); + auto& tera_kv = row_buf_[index]; + kv->set_allocated_key(tera_kv.row_key.release()); + kv->set_allocated_column_family(tera_kv.column_family.release()); + kv->set_allocated_qualifier(tera_kv.qualifier.release()); + kv->set_timestamp(tera_kv.timestamp); + kv->set_allocated_value(tera_kv.value.release()); + } + + private: + struct RowData { + using UniqueStringPtr = std::unique_ptr; + RowData(const leveldb::Slice& row_key, const leveldb::Slice& column_family, + const leveldb::Slice& qualifier, const leveldb::Slice& value, int64_t timestamp) + : row_key(new std::string(row_key.data(), row_key.size())), + column_family(new std::string(column_family.data(), column_family.size())), + qualifier(new std::string(qualifier.data(), qualifier.size())), + value(new std::string(value.data(), value.size())), + timestamp(timestamp) {} + + UniqueStringPtr row_key; + UniqueStringPtr column_family; + UniqueStringPtr qualifier; + UniqueStringPtr value; + int64_t timestamp; + }; + + mutable std::vector row_buf_; }; #define TABLET_ID (!this ? std::string("") : GetTablePath()) #define TABLET_UNLOAD_LOG LOG_IF(INFO, FLAGS_debug_tera_tablet_unload) << "[" << TABLET_ID << "] " -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_TABLET_IO_H_ +#endif // TERA_IO_TABLET_IO_H_ diff --git a/src/io/tablet_scanner.cc b/src/io/tablet_scanner.cc index 47f082126..36609acae 100644 --- a/src/io/tablet_scanner.cc +++ b/src/io/tablet_scanner.cc @@ -19,170 +19,174 @@ namespace tera { namespace io { ScanContextManager::ScanContextManager() { - cache_ = leveldb::NewLRUCache(FLAGS_tera_tabletnode_scanner_cache_size); + cache_ = leveldb::NewLRUCache(FLAGS_tera_tabletnode_scanner_cache_size); } // when tabletio unload, because scan_context->m_it has reference of version, // so we shoud drop all cache it ScanContextManager::~ScanContextManager() { - MutexLock l(&lock_); - delete cache_; + MutexLock l(&lock_); + delete cache_; } // access in lock_ context static void LRUCacheDeleter(const ::leveldb::Slice& key, void* value) { - ScanContext* context = reinterpret_cast(value); - VLOG(10) << "evict from cache, " << context->session_id; - CHECK(context->handle == NULL); - if (context->it) { - delete context->it; - } - if (context->compact_strategy) { - delete context->compact_strategy; - } - delete context; - return; + ScanContext* context = reinterpret_cast(value); + VLOG(10) << "evict from cache, " << context->session_id; + CHECK(context->handle == NULL); + if (context->it) { + delete context->it; + } + if (context->compact_strategy) { + delete context->compact_strategy; + } + delete context; + return; } ScanContext* ScanContextManager::GetScanContext(TabletIO* tablet_io, const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - ScanContext* context = NULL; - ::leveldb::Cache::Handle* handle = NULL; - - // init common param of response - VLOG(10) << "push task for session id: " << request->session_id() - << ", sequence id: " << request->sequence_id(); - response->set_results_id(std::numeric_limits::max()); - response->set_complete(false); - response->set_status(kTabletNodeOk); - - // search from cache - MutexLock l(&lock_); - char buf[sizeof(int64_t)]; - ::leveldb::EncodeFixed64(buf, request->session_id()); - ::leveldb::Slice key(buf, sizeof(buf)); - handle = cache_->Lookup(key); - if (handle) { - // not first session rpc, no need init scan context - context = reinterpret_cast(cache_->Value(handle)); - context->jobs.push(ScanJob(response, done)); - if (context->jobs.size() > 1) { - cache_->Release(handle); - VLOG(10) << "push task into queue, " << request->session_id(); - return NULL; - } - CHECK(context->handle == NULL); - context->handle = handle; // first one refer item in cache - return context; - } - - // case 1: if this session's first request not arrive, drop this one - // case 2: client RPCtimeout resend - if (request->part_of_session()) { - VLOG(10) << "drop invalid request " << request->sequence_id() << ", session_id " << request->session_id(); - done->Run(); - return NULL; - } - - // first rpc new scan context - context = new ScanContext; - context->session_id = request->session_id(); - context->tablet_io = tablet_io; - - context->it = NULL; - context->compact_strategy = NULL; - context->ret_code = kTabletNodeOk; - context->result = NULL; - context->data_idx = 0; - context->complete = false; - context->version_num = 1; - - handle = cache_->Insert(key, context, 1, &LRUCacheDeleter); + ScanContext* context = NULL; + ::leveldb::Cache::Handle* handle = NULL; + + // init common param of response + VLOG(10) << "push task for session id: " << request->session_id() + << ", sequence id: " << request->sequence_id(); + response->set_results_id(std::numeric_limits::max()); + response->set_complete(false); + response->set_status(kTabletNodeOk); + + // search from cache + MutexLock l(&lock_); + char buf[sizeof(int64_t)]; + ::leveldb::EncodeFixed64(buf, request->session_id()); + ::leveldb::Slice key(buf, sizeof(buf)); + handle = cache_->Lookup(key); + if (handle) { + // not first session rpc, no need init scan context + context = reinterpret_cast(cache_->Value(handle)); context->jobs.push(ScanJob(response, done)); - context->handle = handle; // refer item in cache - // init context other param in TabletIO context + if (context->jobs.size() > 1) { + cache_->Release(handle); + VLOG(10) << "push task into queue, " << request->session_id(); + return NULL; + } + CHECK(context->handle == NULL); + context->handle = handle; // first one refer item in cache return context; + } + + // case 1: if this session's first request not arrive, drop this one + // case 2: client RPCtimeout resend + if (request->part_of_session()) { + VLOG(10) << "drop invalid request " << request->sequence_id() << ", session_id " + << request->session_id(); + done->Run(); + return NULL; + } + + // first rpc new scan context + context = new ScanContext; + context->session_id = request->session_id(); + context->tablet_io = tablet_io; + + context->it = nullptr; + context->compact_strategy = nullptr; + context->ret_code = kTabletNodeOk; + context->result = nullptr; + context->data_idx = 0; + context->complete = false; + context->version_num = 1; + + handle = cache_->Insert(key, context, 1, &LRUCacheDeleter); + context->jobs.push(ScanJob(response, done)); + context->handle = handle; // refer item in cache + // init context other param in TabletIO context + return context; } // check event bit, then schedule context bool ScanContextManager::ScheduleScanContext(ScanContext* context) { - while (context->ret_code == kTabletNodeOk) { - ScanTabletResponse* response; - ::google::protobuf::Closure* done; - { - MutexLock l(&lock_); - response = context->jobs.front().first; - done = context->jobs.front().second; - } - context->result = response->mutable_results(); - - context->tablet_io->ProcessScan(context); - - // reply to client - response->set_complete(context->complete); - response->set_status(context->ret_code); - response->set_results_id(context->data_idx); - (context->data_idx)++; - context->result = NULL; - done->Run();// TODO: try async return, time consume need test - - { - MutexLock l(&lock_); - context->jobs.pop(); - - // complete or io error, return all the rest request to client - if (context->complete || (context->ret_code != kTabletNodeOk)) { - DeleteScanContext(context); // never use context - if (context->ret_code != kTabletNodeOk) { - return false; - } - return true; - } - if (context->jobs.size() == 0) { - ::leveldb::Cache::Handle* handle = context->handle; - context->handle = NULL; - cache_->Release(handle); // unrefer cache item - return true; - } - } + while (context->ret_code == kTabletNodeOk) { + ScanTabletResponse* response; + ::google::protobuf::Closure* done; + { + MutexLock l(&lock_); + response = context->jobs.front().first; + done = context->jobs.front().second; } + context->result = response->mutable_results(); + + context->tablet_io->ProcessScan(context); + + // reply to client + response->set_complete(context->complete); + response->set_status(context->ret_code); + response->set_results_id(context->data_idx); + response->set_data_size(context->data_size); + response->set_row_count(context->row_count); + response->set_cell_count(context->cell_count); + (context->data_idx)++; + context->result = NULL; + done->Run(); // TODO: try async return, time consume need test + { - MutexLock l(&lock_); + MutexLock l(&lock_); + context->jobs.pop(); + + // complete or io error, return all the rest request to client + if (context->complete || (context->ret_code != kTabletNodeOk)) { + DeleteScanContext(context); // never use context if (context->ret_code != kTabletNodeOk) { - DeleteScanContext(context); // never use context - return false; + return false; } + return true; + } + if (context->jobs.size() == 0) { + ::leveldb::Cache::Handle* handle = context->handle; + context->handle = NULL; + cache_->Release(handle); // unrefer cache item + return true; + } + } + } + { + MutexLock l(&lock_); + if (context->ret_code != kTabletNodeOk) { + DeleteScanContext(context); // never use context + return false; } - return true; + } + return true; } // access in lock_ context void ScanContextManager::DeleteScanContext(ScanContext* context) { - uint32_t job_size = context->jobs.size(); - while (job_size) { - ScanTabletResponse* response = context->jobs.front().first; - ::google::protobuf::Closure* done = context->jobs.front().second; - response->set_complete(context->complete); - response->set_status(context->ret_code); - done->Run(); - - context->jobs.pop(); - job_size--; - } - - int64_t session_id = context->session_id; - VLOG(10) << "scan " << session_id << ", complete " << context->complete << ", ret " << StatusCode_Name(context->ret_code); - ::leveldb::Cache::Handle* handle = context->handle; - context->handle = NULL; - cache_->Release(handle); // unrefer cache item, no more use context!!! - - char buf[sizeof(int64_t)]; - ::leveldb::EncodeFixed64(buf, session_id); - ::leveldb::Slice key(buf, sizeof(buf)); - cache_->Erase(key); + uint32_t job_size = context->jobs.size(); + while (job_size) { + ScanTabletResponse* response = context->jobs.front().first; + ::google::protobuf::Closure* done = context->jobs.front().second; + response->set_complete(context->complete); + response->set_status(context->ret_code); + done->Run(); + + context->jobs.pop(); + job_size--; + } + + int64_t session_id = context->session_id; + VLOG(10) << "scan " << session_id << ", complete " << context->complete << ", ret " + << StatusCode_Name(context->ret_code); + ::leveldb::Cache::Handle* handle = context->handle; + context->handle = NULL; + cache_->Release(handle); // unrefer cache item, no more use context!!! + + char buf[sizeof(int64_t)]; + ::leveldb::EncodeFixed64(buf, session_id); + ::leveldb::Slice key(buf, sizeof(buf)); + cache_->Erase(key); } -} // namespace io -}// namespace tera - +} // namespace io +} // namespace tera diff --git a/src/io/tablet_scanner.h b/src/io/tablet_scanner.h index dd727447d..97c18a04e 100644 --- a/src/io/tablet_scanner.h +++ b/src/io/tablet_scanner.h @@ -15,87 +15,95 @@ #include "leveldb/compact_strategy.h" #include "leveldb/db.h" #include "proto/tabletnode_rpc.pb.h" +#include "tera.h" namespace tera { namespace io { class TabletIO; -typedef std::map< std::string, std::set > ColumnFamilyMap; +typedef std::map > ColumnFamilyMap; struct ScanOptions { - uint32_t max_versions; - uint32_t max_size; - int64_t number_limit; // kv number > number_limit, return to user - int64_t ts_start; - int64_t ts_end; - uint64_t snapshot_id; - FilterList filter_list; - ColumnFamilyMap column_family_list; - std::set iter_cf_set; - int64_t timeout; - uint64_t max_qualifiers; - // If sdk uses batch scan, we will use prefetch scan iterator.; - bool is_batch_scan; - - ScanOptions() - : max_versions(std::numeric_limits::max()), - max_size(std::numeric_limits::max()), - number_limit(std::numeric_limits::max()), - ts_start(kOldestTs), ts_end(kLatestTs), snapshot_id(0), - timeout(std::numeric_limits::max() / 2), - max_qualifiers(std::numeric_limits::max()), - is_batch_scan(false) - {} + uint32_t max_versions; + uint32_t max_size; + int64_t number_limit; // kv number > number_limit, return to user + int64_t ts_start; + int64_t ts_end; + uint64_t snapshot_id; + filter::FilterPtr filter; + ColumnFamilyMap column_family_list; + std::set iter_cf_set; + filter::ColumnSet filter_column_set; + int64_t timeout; + uint64_t max_qualifiers; + // If sdk uses batch scan, we will use prefetch scan iterator.; + bool is_batch_scan; + bool enable_dfs_read_thread_limiter; + + ScanOptions() + : max_versions(std::numeric_limits::max()), + max_size(std::numeric_limits::max()), + number_limit(std::numeric_limits::max()), + ts_start(kOldestTs), + ts_end(kLatestTs), + snapshot_id(0), + timeout(std::numeric_limits::max() / 2), + max_qualifiers(std::numeric_limits::max()), + is_batch_scan(false), + enable_dfs_read_thread_limiter(false) {} }; class ScanContextManager; typedef std::pair ScanJob; struct ScanContext { - int64_t session_id; - TabletIO* tablet_io; - - // use for lowlevelscan - std::string start_tera_key; - std::string end_row_key; - ScanOptions scan_options; - leveldb::Iterator* it; // init to NULL - leveldb::CompactStrategy* compact_strategy; - uint32_t version_num; - uint64_t qu_num; - std::string last_key; - std::string last_col; - std::string last_qual; - - // use for reture - StatusCode ret_code; // set by lowlevelscan - bool complete; // test this flag know whether scan finish or not - RowResult* result; // scan result for one round - uint64_t data_idx; // return data_id - - // protect by manager lock - std::queue jobs; - leveldb::Cache::Handle* handle; + int64_t session_id; + TabletIO* tablet_io; + + // use for lowlevelscan + std::string start_tera_key; + std::string end_row_key; + ScanOptions scan_options; + leveldb::Iterator* it; // init to NULL + leveldb::CompactStrategy* compact_strategy; + uint32_t version_num; + uint64_t qu_num; + std::string last_key; + std::string last_col; + std::string last_qual; + + // use for reture + StatusCode ret_code; // set by lowlevelscan + bool complete; // test this flag know whether scan finish or not + RowResult* result; // scan result for one round + uint64_t data_idx; // return data_id + uint32_t cell_count; // scan total cell count for one round, kvtable cell_count equal row_count + uint32_t row_count; // scan total row count for one round + uint32_t data_size; // scan total data size for one round + + // protect by manager lock + std::queue jobs; + leveldb::Cache::Handle* handle; }; class ScanContextManager { -public: - ScanContextManager(); - ~ScanContextManager(); + public: + ScanContextManager(); + ~ScanContextManager(); - ScanContext* GetScanContext(TabletIO* tablet_io, const ScanTabletRequest* request, - ScanTabletResponse* response, google::protobuf::Closure* done); - bool ScheduleScanContext(ScanContext* context); + ScanContext* GetScanContext(TabletIO* tablet_io, const ScanTabletRequest* request, + ScanTabletResponse* response, google::protobuf::Closure* done); + bool ScheduleScanContext(ScanContext* context); -private: - void DeleteScanContext(ScanContext* context); + private: + void DeleteScanContext(ScanContext* context); - // + // - Mutex lock_; - ::leveldb::Cache* cache_; + Mutex lock_; + ::leveldb::Cache* cache_; }; -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_TABLET_SCANNER_H +#endif // TERA_IO_TABLET_SCANNER_H diff --git a/src/io/tablet_writer.cc b/src/io/tablet_writer.cc index 79f75c139..1ca3a4d38 100644 --- a/src/io/tablet_writer.cc +++ b/src/io/tablet_writer.cc @@ -56,448 +56,438 @@ tera::MetricCounter flush_to_disk_finish_delay(kFlushToDiskDelayMetric, kFlushFi tera::MetricCounter row_write_count(kRowCountMetric, kApiLabelWrite, {SubscriberType::QPS}); tera::MetricCounter row_write_delay(kRowDelayMetric, kApiLabelWrite, {}); -tera::AutoSubscriberRegister row_write_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( - MetricId("tera_ts_row_write_delay_us_per_row"), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelWrite), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelWrite), SubscriberType::SUM))))); +tera::AutoSubscriberRegister row_write_delay_per_row(std::unique_ptr( + new tera::RatioSubscriber(MetricId("tera_ts_row_write_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowDelayMetric, kApiLabelWrite), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRowCountMetric, kApiLabelWrite), + SubscriberType::SUM))))); TabletWriter::TabletWriter(TabletIO* tablet_io) - : tablet_(tablet_io), stopped_(true), + : tablet_(tablet_io), + stopped_(true), sync_timestamp_(0), active_buffer_instant_(false), active_buffer_size_(0), tablet_busy_(false) { - active_buffer_ = new WriteTaskBuffer; - sealed_buffer_ = new WriteTaskBuffer; + active_buffer_ = new WriteTaskBuffer; + sealed_buffer_ = new WriteTaskBuffer; } TabletWriter::~TabletWriter() { - Stop(); - delete active_buffer_; - delete sealed_buffer_; + Stop(); + delete active_buffer_; + delete sealed_buffer_; } void TabletWriter::Start() { - { - MutexLock lock(&status_mutex_); - if (!stopped_) { - LOG(WARNING) << "tablet writer has been started"; - return; - } - stopped_ = false; + { + MutexLock lock(&status_mutex_); + if (!stopped_) { + LOG(WARNING) << "tablet writer has been started"; + return; } - LOG(INFO) << "start tablet writer ..."; - thread_.Start(std::bind(&TabletWriter::DoWork, this)); - ThisThread::Yield(); + stopped_ = false; + } + LOG(INFO) << "start tablet writer ..."; + thread_ = std::thread{&TabletWriter::DoWork, this}; } void TabletWriter::Stop() { - { - MutexLock lock(&status_mutex_); - if (stopped_) { - return; - } - stopped_ = true; + { + MutexLock lock(&status_mutex_); + if (stopped_) { + return; } + stopped_ = true; + } + thread_.join(); - worker_done_event_.Wait(); + FlushToDiskBatch(sealed_buffer_); + FlushToDiskBatch(active_buffer_); - FlushToDiskBatch(sealed_buffer_); - FlushToDiskBatch(active_buffer_); - - LOG(INFO) << "tablet writer is stopped"; + LOG(INFO) << "tablet writer is stopped"; } uint64_t TabletWriter::CountRequestSize(std::vector& row_mutation_vec, bool kv_only) { - uint64_t data_size = 0; - for (uint32_t i = 0; i < row_mutation_vec.size(); i++) { - const RowMutationSequence& mu_seq = *row_mutation_vec[i]; - int32_t mu_num = mu_seq.mutation_sequence_size(); - for (int32_t j = 0; j < mu_num; j++) { - const Mutation& mu = mu_seq.mutation_sequence(j); - data_size += mu_seq.row_key().size() + mu.value().size(); - if (!kv_only) { - data_size += mu.family().size() - + mu.qualifier().size() - + sizeof(mu.timestamp()); - } - } + uint64_t data_size = 0; + for (uint32_t i = 0; i < row_mutation_vec.size(); i++) { + const RowMutationSequence& mu_seq = *row_mutation_vec[i]; + int32_t mu_num = mu_seq.mutation_sequence_size(); + for (int32_t j = 0; j < mu_num; j++) { + const Mutation& mu = mu_seq.mutation_sequence(j); + data_size += mu_seq.row_key().size() + mu.value().size(); + if (!kv_only) { + data_size += mu.family().size() + mu.qualifier().size() + sizeof(mu.timestamp()); + } } - return data_size; + } + return data_size; } bool TabletWriter::Write(std::vector* row_mutation_vec, std::vector* status_vec, bool is_instant, WriteCallback callback, StatusCode* status) { - static uint32_t last_print = time(NULL); - const uint64_t MAX_PENDING_SIZE = FLAGS_tera_asyncwriter_pending_limit * 1024UL; + static uint32_t last_print = time(NULL); + const uint64_t MAX_PENDING_SIZE = FLAGS_tera_asyncwriter_pending_limit * 1024UL; - MutexLock lock(&task_mutex_); - if (stopped_) { - LOG(ERROR) << "tablet writer is stopped"; - SetStatusCode(kAsyncNotRunning, status); - return false; - } - if (active_buffer_size_ >= MAX_PENDING_SIZE || tablet_busy_) { - uint32_t now_time = time(NULL); - if (now_time > last_print) { - LOG(WARNING) << "[" << tablet_->GetTablePath() - << "] is too busy, active_buffer_size_: " - << (active_buffer_size_>>10) << "KB, tablet_busy_: " - << tablet_busy_; - last_print = now_time; - } - SetStatusCode(kTabletNodeIsBusy, status); - return false; - } - - uint64_t request_size = CountRequestSize(*row_mutation_vec, tablet_->KvOnly()); - WriteTask task; - task.row_mutation_vec = row_mutation_vec; - task.status_vec = status_vec; - task.callback = callback; - - active_buffer_->push_back(task); - active_buffer_size_ += request_size; - active_buffer_instant_ |= is_instant; - if (active_buffer_size_ >= FLAGS_tera_asyncwriter_sync_size_threshold * 1024UL || - active_buffer_instant_) { - write_event_.Set(); + MutexLock lock(&task_mutex_); + if (stopped_) { + LOG(ERROR) << "tablet writer is stopped"; + SetStatusCode(kAsyncNotRunning, status); + return false; + } + if (active_buffer_size_ >= MAX_PENDING_SIZE || tablet_busy_) { + uint32_t now_time = time(NULL); + if (now_time > last_print) { + LOG(WARNING) << "[" << tablet_->GetTablePath() + << "] is too busy, active_buffer_size_: " << (active_buffer_size_ >> 10) + << "KB, tablet_busy_: " << tablet_busy_; + last_print = now_time; } - return true; + SetStatusCode(kTabletNodeIsBusy, status); + return false; + } + + uint64_t request_size = CountRequestSize(*row_mutation_vec, tablet_->KvOnly()); + WriteTask task; + task.row_mutation_vec = row_mutation_vec; + task.status_vec = status_vec; + task.callback = callback; + + active_buffer_->push_back(task); + active_buffer_size_ += request_size; + active_buffer_instant_ |= is_instant; + if (active_buffer_size_ >= FLAGS_tera_asyncwriter_sync_size_threshold * 1024UL || + active_buffer_instant_) { + write_event_.Set(); + } + return true; } void TabletWriter::DoWork() { - sync_timestamp_ = GetTimeStampInMs(); - int32_t sync_interval = FLAGS_tera_asyncwriter_sync_interval; - if (sync_interval == 0) { - sync_interval = 1; - } - - while (!stopped_) { - int64_t sleep_duration = sync_timestamp_ + sync_interval - GetTimeStampInMs(); - // 如果没数据, 等 - if (!SwapActiveBuffer(sleep_duration <= 0)) { - if (sleep_duration <= 0) { - sync_timestamp_ = GetTimeStampInMs(); - } else { - write_event_.TimeWait(sleep_duration); - } - continue; - } - // 否则 flush - VLOG(7) << "write data, sleep_duration: " << sleep_duration; + LOG(INFO) << "Tablet writer work thread started for tablet: " << tablet_->GetTablePath(); + + sync_timestamp_ = GetTimeStampInMs(); + int32_t sync_interval = FLAGS_tera_asyncwriter_sync_interval; + if (sync_interval == 0) { + sync_interval = 1; + } + + while (!stopped_) { + int64_t sleep_duration = sync_timestamp_ + sync_interval - GetTimeStampInMs(); + // 如果没数据, 等 + if (!SwapActiveBuffer(sleep_duration <= 0)) { + if (sleep_duration <= 0) { sync_timestamp_ = GetTimeStampInMs(); - FlushToDiskBatch(sealed_buffer_); - sealed_buffer_->clear(); + } else { + write_event_.TimeWait(sleep_duration); + } + continue; } - LOG(INFO) << "AsyncWriter::DoWork done"; - worker_done_event_.Set(); + // 否则 flush + VLOG(7) << "write data, sleep_duration: " << sleep_duration; + sync_timestamp_ = GetTimeStampInMs(); + FlushToDiskBatch(sealed_buffer_); + sealed_buffer_->clear(); + } + LOG(INFO) << "AsyncWriter::DoWork done"; } bool TabletWriter::IsBusy() { - const uint64_t MAX_PENDING_SIZE = FLAGS_tera_asyncwriter_pending_limit * 1024UL; + const uint64_t MAX_PENDING_SIZE = FLAGS_tera_asyncwriter_pending_limit * 1024UL; - MutexLock lock(&task_mutex_); - return active_buffer_size_ >= MAX_PENDING_SIZE; + MutexLock lock(&task_mutex_); + return active_buffer_size_ >= MAX_PENDING_SIZE; } bool TabletWriter::SwapActiveBuffer(bool force) { - const uint64_t SYNC_SIZE = FLAGS_tera_asyncwriter_sync_size_threshold * 1024UL; - if (FLAGS_tera_enable_level0_limit == true) { - tablet_busy_ = tablet_->IsBusy(); - } - - MutexLock lock(&task_mutex_); - if (active_buffer_->size() <= 0) { - return false; - } - if (!force && !active_buffer_instant_ && active_buffer_size_ < SYNC_SIZE) { - return false; - } - VLOG(7) << "SwapActiveBuffer, buffer:" << active_buffer_size_ - << ":" <size() << ", force:" << force - << ", instant:" << active_buffer_instant_; - WriteTaskBuffer* temp = active_buffer_; - active_buffer_ = sealed_buffer_; - sealed_buffer_ = temp; - CHECK_EQ(0U, active_buffer_->size()); - - active_buffer_size_ = 0; - active_buffer_instant_ = false; + const uint64_t SYNC_SIZE = FLAGS_tera_asyncwriter_sync_size_threshold * 1024UL; + if (FLAGS_tera_enable_level0_limit == true) { + tablet_busy_ = tablet_->IsBusy(); + } - return true; + MutexLock lock(&task_mutex_); + if (active_buffer_->size() <= 0) { + return false; + } + if (!force && !active_buffer_instant_ && active_buffer_size_ < SYNC_SIZE) { + return false; + } + VLOG(7) << "SwapActiveBuffer, buffer:" << active_buffer_size_ << ":" << active_buffer_->size() + << ", force:" << force << ", instant:" << active_buffer_instant_; + WriteTaskBuffer* temp = active_buffer_; + active_buffer_ = sealed_buffer_; + sealed_buffer_ = temp; + CHECK_EQ(0U, active_buffer_->size()); + + active_buffer_size_ = 0; + active_buffer_instant_ = false; + + return true; } -void TabletWriter::BatchRequest(WriteTaskBuffer* task_buffer, - leveldb::WriteBatch* batch) { - int64_t timestamp_old = 0; - for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { - WriteTask& task = (*task_buffer)[task_idx]; - const std::vector& row_mutation_vec = *(task.row_mutation_vec); - std::vector* status_vec = task.status_vec; - - for (uint32_t i = 0; i < row_mutation_vec.size(); ++i) { - StatusCode* status = &((*status_vec)[i]); - const RowMutationSequence& row_mu = *row_mutation_vec[i]; - const std::string& row_key = row_mu.row_key(); - uint32_t mu_num = row_mu.mutation_sequence().size(); - if (*status != kTabletNodeOk) { - VLOG(11) << "batch write fail, row " << DebugString(row_key) - << ", status " << StatusCodeToString(*status); - continue; +void TabletWriter::BatchRequest(WriteTaskBuffer* task_buffer, leveldb::WriteBatch* batch) { + auto table_schema = tablet_->GetSchema(); + int64_t timestamp_old = 0; + for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { + WriteTask& task = (*task_buffer)[task_idx]; + const std::vector& row_mutation_vec = *(task.row_mutation_vec); + std::vector* status_vec = task.status_vec; + + for (uint32_t i = 0; i < row_mutation_vec.size(); ++i) { + StatusCode* status = &((*status_vec)[i]); + const RowMutationSequence& row_mu = *row_mutation_vec[i]; + const std::string& row_key = row_mu.row_key(); + uint32_t mu_num = row_mu.mutation_sequence().size(); + if (*status != kTabletNodeOk) { + VLOG(11) << "batch write fail, row " << DebugString(row_key) << ", status " + << StatusCodeToString(*status); + continue; + } + if (mu_num == 0) { + continue; + } + if (tablet_->KvOnly()) { + // only the last mutation take effect for kv + const Mutation& mu = row_mu.mutation_sequence().Get(mu_num - 1); + std::string tera_key; + if (table_schema.raw_key() == TTLKv) { // TTL-KV + if (mu.ttl() == -1) { // never expires + tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, "", "", kLatestTs, + leveldb::TKT_FORSEEK, &tera_key); + } else { // no check of overflow risk ... + tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, "", "", + get_micros() / 1000000 + mu.ttl(), + leveldb::TKT_FORSEEK, &tera_key); + } + } else { // Readable-KV + tera_key.assign(row_key); + } + if (mu.type() == kPut) { + batch->Put(tera_key, mu.value()); + } else { + batch->Delete(tera_key); + } + } else { + for (uint32_t t = 0; t < mu_num; ++t) { + const Mutation& mu = row_mu.mutation_sequence().Get(t); + std::string tera_key; + leveldb::TeraKeyType type = leveldb::TKT_VALUE; + switch (mu.type()) { + case kDeleteRow: + type = leveldb::TKT_DEL; + break; + case kDeleteFamily: + type = leveldb::TKT_DEL_COLUMN; + break; + case kDeleteColumn: + type = leveldb::TKT_DEL_QUALIFIER; + break; + case kDeleteColumns: + type = leveldb::TKT_DEL_QUALIFIERS; + break; + case kAdd: + type = leveldb::TKT_ADD; + break; + case kAddInt64: + type = leveldb::TKT_ADDINT64; + break; + case kPutIfAbsent: + type = leveldb::TKT_PUT_IFABSENT; + break; + case kAppend: + type = leveldb::TKT_APPEND; + break; + default: + break; + } + int64_t timestamp = get_unique_micros(timestamp_old); + timestamp_old = timestamp; + if (table_schema.enable_txn()) { + if (mu.has_timestamp() && (mu.timestamp() != kLatestTimestamp)) { + timestamp = mu.timestamp(); } - if (mu_num == 0) { - continue; + } else { + if (leveldb::TeraKey::IsTypeAllowUserSetTimestamp(type) && mu.has_timestamp() && + mu.timestamp() != kLatestTimestamp) { + timestamp = mu.timestamp(); } - if (tablet_->KvOnly()) { - // only the last mutation take effect for kv - const Mutation& mu = row_mu.mutation_sequence().Get(mu_num - 1); - std::string tera_key; - if (tablet_->GetSchema().raw_key() == TTLKv) { // TTL-KV - if (mu.ttl() == -1) { // never expires - tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, "", "", - kLatestTs, leveldb::TKT_FORSEEK, &tera_key); - } else { // no check of overflow risk ... - tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, "", "", - get_micros() / 1000000 + mu.ttl(), leveldb::TKT_FORSEEK, &tera_key); - } - } else { // Readable-KV - tera_key.assign(row_key); - } - if (mu.type() == kPut) { - batch->Put(tera_key, mu.value()); - } else { - batch->Delete(tera_key); - } + } + tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, mu.family(), mu.qualifier(), + timestamp, type, &tera_key); + uint32_t lg_id = 0; + size_t lg_num = tablet_->ldb_options_.exist_lg_list->size(); + if (lg_num > 1) { + if (type != leveldb::TKT_DEL) { + lg_id = tablet_->GetLGidByCFName(mu.family()); + leveldb::PutFixed32LGId(&tera_key, lg_id); + VLOG(10) << "Batch Request, key: " << DebugString(row_key) + << " family: " << mu.family() << ", lg_id: " << lg_id; + batch->Put(tera_key, mu.value()); } else { - for (uint32_t t = 0; t < mu_num; ++t) { - const Mutation& mu = row_mu.mutation_sequence().Get(t); - std::string tera_key; - leveldb::TeraKeyType type = leveldb::TKT_VALUE; - switch (mu.type()) { - case kDeleteRow: - type = leveldb::TKT_DEL; - break; - case kDeleteFamily: - type = leveldb::TKT_DEL_COLUMN; - break; - case kDeleteColumn: - type = leveldb::TKT_DEL_QUALIFIER; - break; - case kDeleteColumns: - type = leveldb::TKT_DEL_QUALIFIERS; - break; - case kAdd: - type = leveldb::TKT_ADD; - break; - case kAddInt64: - type = leveldb::TKT_ADDINT64; - break; - case kPutIfAbsent: - type = leveldb::TKT_PUT_IFABSENT; - break; - case kAppend: - type = leveldb::TKT_APPEND; - break; - default: - break; - } - int64_t timestamp = get_unique_micros(timestamp_old); - timestamp_old = timestamp; - if (tablet_->GetSchema().enable_txn()) { - if (mu.has_timestamp() && (mu.timestamp() != kLatestTimestamp)) { - timestamp = mu.timestamp(); - } - } else { - if (leveldb::TeraKey::IsTypeAllowUserSetTimestamp(type) && - mu.has_timestamp() && mu.timestamp() < timestamp) { - timestamp = mu.timestamp(); - } - } - tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, mu.family(), mu.qualifier(), - timestamp, type, &tera_key); - uint32_t lg_id = 0; - size_t lg_num = tablet_->ldb_options_.exist_lg_list->size(); - if (lg_num > 1) { - if (type != leveldb::TKT_DEL) { - lg_id = tablet_->GetLGidByCFName(mu.family()); - leveldb::PutFixed32LGId(&tera_key, lg_id); - VLOG(10) << "Batch Request, key: " << DebugString(row_key) - << " family: " << mu.family() << ", lg_id: " << lg_id; - batch->Put(tera_key, mu.value()); - } else { - // put row_del mark to all LGs - for (lg_id = 0; lg_id < lg_num; ++lg_id) { - std::string tera_key_tmp = tera_key; - leveldb::PutFixed32LGId(&tera_key_tmp, lg_id); - VLOG(10) << "Batch Request, key: " << DebugString(row_key) - << " family: " << mu.family() << ", lg_id: " << lg_id; - batch->Put(tera_key_tmp, mu.value()); - } - } - } else { - VLOG(10) << "Batch Request, key: " << DebugString(row_key) - << " family: " << mu.family() << ", qualifier " << mu.qualifier() - << ", ts " << timestamp << ", type " << type << ", lg_id: " << lg_id; - batch->Put(tera_key, mu.value()); - } - } + // put row_del mark to all LGs + for (lg_id = 0; lg_id < lg_num; ++lg_id) { + std::string tera_key_tmp = tera_key; + leveldb::PutFixed32LGId(&tera_key_tmp, lg_id); + VLOG(10) << "Batch Request, key: " << DebugString(row_key) + << " family: " << mu.family() << ", lg_id: " << lg_id; + batch->Put(tera_key_tmp, mu.value()); + } } + } else { + VLOG(10) << "Batch Request, key: " << DebugString(row_key) << " family: " << mu.family() + << ", qualifier " << mu.qualifier() << ", ts " << timestamp << ", type " + << type << ", lg_id: " << lg_id; + batch->Put(tera_key, mu.value()); + } } + } } - return; + } + return; } void TabletWriter::FinishTask(WriteTaskBuffer* task_buffer, StatusCode status) { - for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { - WriteTask& task = (*task_buffer)[task_idx]; - tablet_->GetCounter().write_rows.Add(task.row_mutation_vec->size()); - row_write_count.Add(task.row_mutation_vec->size()); - row_write_delay.Add(get_micros() - task.start_time); - for (uint32_t i = 0; i < task.row_mutation_vec->size(); i++) { - tablet_->GetCounter().write_kvs.Add((*task.row_mutation_vec)[i]->mutation_sequence_size()); - // set batch_write status for row_mu - if ((*task.status_vec)[i] == kTabletNodeOk) { - (*task.status_vec)[i] = status; - } - } - task.callback(task.row_mutation_vec, task.status_vec); + for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { + WriteTask& task = (*task_buffer)[task_idx]; + tablet_->GetCounter().write_rows.Add(task.row_mutation_vec->size()); + row_write_count.Add(task.row_mutation_vec->size()); + row_write_delay.Add(get_micros() - task.start_time); + for (uint32_t i = 0; i < task.row_mutation_vec->size(); i++) { + tablet_->GetCounter().write_kvs.Add((*task.row_mutation_vec)[i]->mutation_sequence_size()); + // set batch_write status for row_mu + if ((*task.status_vec)[i] == kTabletNodeOk) { + (*task.status_vec)[i] = status; + } } - return; + task.callback(task.row_mutation_vec, task.status_vec); + } + return; } -// set status to kTxnFail, if single row transaction or putifabsent conflicts +// set status to kTxnFail, if single row transaction conflicts bool TabletWriter::CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, - std::set* commit_row_key_set, + std::unordered_set* commit_row_key_set, StatusCode* status) { - const std::string& row_key = row_mu.row_key(); - if (row_mu.txn_read_info().has_read()) { - if (!tablet_->GetSchema().enable_txn()) { - VLOG(10) << "txn of row " << DebugString(row_key) - << " is interrupted: txn not enabled"; - SetStatusCode(kTxnFail, status); - return true; - } - if (commit_row_key_set->find(row_key) != commit_row_key_set->end()) { - VLOG(10) << "txn of row " << DebugString(row_key) - << " is interrupted: found same row in one batch"; - SetStatusCode(kTxnFail, status); - return true; - } - if (!tablet_->SingleRowTxnCheck(row_key, row_mu.txn_read_info(), status)) { - VLOG(10) << "txn of row " << DebugString(row_key) - << " is interrupted: check fail, status: " - << StatusCodeToString(*status); - return true; - } - VLOG(10) << "txn of row " << DebugString(row_key) << " check pass"; + const std::string& row_key = row_mu.row_key(); + if (row_mu.txn_read_info().has_read()) { + if (commit_row_key_set->find(row_key) != commit_row_key_set->end()) { + VLOG(10) << "txn of row " << DebugString(row_key) + << " is interrupted: found same row in one batch"; + SetStatusCode(kTxnFail, status); + return true; } - commit_row_key_set->insert(row_key); - return false; + if (!tablet_->SingleRowTxnCheck(row_key, row_mu.txn_read_info(), status)) { + VLOG(10) << "txn of row " << DebugString(row_key) + << " is interrupted: check fail, status: " << StatusCodeToString(*status); + return true; + } + VLOG(10) << "txn of row " << DebugString(row_key) << " check pass"; + } + commit_row_key_set->insert(row_key); + return false; } bool TabletWriter::CheckIllegalRowArg(const RowMutationSequence& row_mu, - const std::set& cf_set, - StatusCode* status) { - // check arguments - if (row_mu.row_key().size() >= 64 * 1024) { + const std::set& cf_set, StatusCode* status) { + // check arguments + if (row_mu.row_key().size() >= 64 * 1024) { + SetStatusCode(kTableInvalidArg, status); + return true; + } + for (int32_t i = 0; i < row_mu.mutation_sequence().size(); ++i) { + const Mutation& mu = row_mu.mutation_sequence(i); + if (mu.value().size() >= 32 * 1024 * 1024) { + SetStatusCode(kTableInvalidArg, status); + return true; + } + if (!tablet_->KvOnly()) { + if (mu.qualifier().size() >= 64 * 1024) { // 64KB SetStatusCode(kTableInvalidArg, status); return true; + } + if (mu.type() != kDeleteRow && (cf_set.find(mu.family()) == cf_set.end())) { + SetStatusCode(kTableInvalidArg, status); + VLOG(11) << "batch write check, illegal cf, row " << DebugString(row_mu.row_key()) + << ", cf " << mu.family() << ", qu " << mu.qualifier() << ", ts " << mu.timestamp() + << ", type " << mu.type() << ", cf_set.size " << cf_set.size() << ", status " + << StatusCodeToString(*status); + return true; + } } - for (int32_t i = 0; i < row_mu.mutation_sequence().size(); ++i) { - const Mutation& mu = row_mu.mutation_sequence(i); - if (mu.value().size() >= 32 * 1024 * 1024) { - SetStatusCode(kTableInvalidArg, status); - return true; - } - if (!tablet_->KvOnly()) { - if (mu.qualifier().size() >= 64 * 1024) { // 64KB - SetStatusCode(kTableInvalidArg, status); - return true; - } - if (mu.type() != kDeleteRow && - (cf_set.find(mu.family()) == cf_set.end())) { - SetStatusCode(kTableInvalidArg, status); - VLOG(11) << "batch write check, illegal cf, row " << DebugString(row_mu.row_key()) - << ", cf " << mu.family() << ", qu " << mu.qualifier() - << ", ts " << mu.timestamp() << ", type " << mu.type() - << ", cf_set.size " << cf_set.size() - << ", status " << StatusCodeToString(*status); - return true; - } - } - } - return false; + } + return false; } void TabletWriter::CheckRows(WriteTaskBuffer* task_buffer) { - std::set cf_set; - TableSchema schema = tablet_->GetSchema(); - for (int32_t cf_idx = 0; cf_idx < schema.column_families_size(); ++cf_idx) { - cf_set.insert(schema.column_families(cf_idx).name()); - } - - std::set commit_row_key_set; - for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { - WriteTask& task = (*task_buffer)[task_idx]; - std::vector& row_mutation_vec = *task.row_mutation_vec; - std::vector& status_vec = *task.status_vec; - - for (uint32_t row_idx = 0; row_idx < row_mutation_vec.size(); ++row_idx) { - const RowMutationSequence* row_mu = row_mutation_vec[row_idx]; - if(CheckSingleRowTxnConflict(*row_mu, &commit_row_key_set, &status_vec[row_idx])) { - continue; - } - if (CheckIllegalRowArg(*row_mu, cf_set, &status_vec[row_idx])) { - continue; - } - status_vec[row_idx] = kTabletNodeOk; + std::set cf_set; + TableSchema schema = tablet_->GetSchema(); + for (int32_t cf_idx = 0; cf_idx < schema.column_families_size(); ++cf_idx) { + cf_set.insert(schema.column_families(cf_idx).name()); + } + + std::unordered_set commit_row_key_set; + for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { + WriteTask& task = (*task_buffer)[task_idx]; + std::vector& row_mutation_vec = *task.row_mutation_vec; + std::vector& status_vec = *task.status_vec; + + for (uint32_t row_idx = 0; row_idx < row_mutation_vec.size(); ++row_idx) { + const RowMutationSequence* row_mu = row_mutation_vec[row_idx]; + if (schema.enable_txn()) { + if (CheckSingleRowTxnConflict(*row_mu, &commit_row_key_set, &status_vec[row_idx])) { + continue; } + } + if (CheckIllegalRowArg(*row_mu, cf_set, &status_vec[row_idx])) { + continue; + } + status_vec[row_idx] = kTabletNodeOk; } - return; + } + return; } StatusCode TabletWriter::FlushToDiskBatch(WriteTaskBuffer* task_buffer) { - int64_t start_ts, check_cost, batch_cost, write_cost, finish_cost; - - start_ts = get_micros(); - CheckRows(task_buffer); - check_cost = get_micros(); - - leveldb::WriteBatch batch; - BatchRequest(task_buffer, &batch); - batch_cost = get_micros(); - StatusCode status = kTabletNodeOk; - if (tablet_->IsUrgentUnload()) { - LOG(INFO) << "tablet unload slow, reject to write log and memtable"; - } else { - const bool disable_wal = false; - tablet_->WriteBatch(&batch, disable_wal, FLAGS_tera_sync_log, &status); - } - batch.Clear(); - write_cost = get_micros(); - - FinishTask(task_buffer, status); - finish_cost = get_micros(); - int64_t check_delay = check_cost - start_ts; - int64_t batch_delay = batch_cost - check_cost; - int64_t write_delay = write_cost - batch_cost; - int64_t finish_delay = finish_cost - write_cost; - - flush_to_disk_check_delay.Add(check_delay); - flush_to_disk_batch_delay.Add(batch_delay); - flush_to_disk_write_delay.Add(write_delay); - flush_to_disk_finish_delay.Add(finish_delay); - - VLOG(7) << "finish a batch: " << task_buffer->size() << ", cost(check/batch/write/finish): " - << check_delay << "/" - << batch_delay << "/" - << write_delay << "/" - << finish_delay; - return status; + int64_t start_ts, check_cost, batch_cost, write_cost, finish_cost; + + start_ts = get_micros(); + CheckRows(task_buffer); + check_cost = get_micros(); + + leveldb::WriteBatch batch; + BatchRequest(task_buffer, &batch); + batch_cost = get_micros(); + StatusCode status = kTabletNodeOk; + if (tablet_->IsUrgentUnload()) { + LOG(INFO) << "tablet unload slow, reject to write log and memtable"; + } else { + const bool disable_wal = false; + tablet_->WriteBatch(&batch, disable_wal, FLAGS_tera_sync_log, &status); + } + batch.Clear(); + write_cost = get_micros(); + + FinishTask(task_buffer, status); + finish_cost = get_micros(); + int64_t check_delay = check_cost - start_ts; + int64_t batch_delay = batch_cost - check_cost; + int64_t write_delay = write_cost - batch_cost; + int64_t finish_delay = finish_cost - write_cost; + + flush_to_disk_check_delay.Add(check_delay); + flush_to_disk_batch_delay.Add(batch_delay); + flush_to_disk_write_delay.Add(write_delay); + flush_to_disk_finish_delay.Add(finish_delay); + + VLOG(7) << "finish a batch: " << task_buffer->size() + << ", cost(check/batch/write/finish): " << check_delay << "/" << batch_delay << "/" + << write_delay << "/" << finish_delay; + return status; } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/io/tablet_writer.h b/src/io/tablet_writer.h index 52b159920..48342b6d9 100644 --- a/src/io/tablet_writer.h +++ b/src/io/tablet_writer.h @@ -6,11 +6,12 @@ #define TERA_TABLETNODE_TABLET_WRITER_H_ #include +#include #include +#include #include "common/event.h" #include "common/mutex.h" -#include "common/thread.h" #include "proto/status_code.pb.h" #include "proto/tabletnode_rpc.pb.h" @@ -25,73 +26,70 @@ namespace io { class TabletIO; class TabletWriter { -public: - typedef std::function*, \ - std::vector*)> WriteCallback; - - struct WriteTask { - WriteTask():start_time(get_micros()) {} - std::vector* row_mutation_vec; - std::vector* status_vec; - WriteCallback callback; - int64_t start_time; - }; - - typedef std::vector WriteTaskBuffer; - -public: - TabletWriter(TabletIO* tablet_io); - ~TabletWriter(); - bool Write(std::vector* row_mutation_vec, - std::vector* status_vec, bool is_instant, - WriteCallback callback, StatusCode* status = NULL); - /// 初略计算一个request的数据大小 - static uint64_t CountRequestSize(std::vector& row_mutation_vec, - bool kv_only); - void Start(); - void Stop(); - bool IsBusy(); - -private: - void DoWork(); - bool SwapActiveBuffer(bool force); - /// 把一个request打到一个leveldbbatch里去, request是原子的, batch也是, so .. - void BatchRequest(WriteTaskBuffer* task_buffer, - leveldb::WriteBatch* batch); - bool CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, - std::set* commit_row_key_set, - StatusCode* status); - - bool CheckIllegalRowArg(const RowMutationSequence& row_mu, - const std::set& cf_set, - StatusCode* status); - void CheckRows(WriteTaskBuffer* task_buffer); - /// 任务完成, 执行回调 - void FinishTask(WriteTaskBuffer* task_buffer, StatusCode status); - /// 将buffer刷到磁盘(leveldb), 并sync - StatusCode FlushToDiskBatch(WriteTaskBuffer* task_buffer); - -private: - TabletIO* tablet_; - - mutable Mutex task_mutex_; - mutable Mutex status_mutex_; - AutoResetEvent write_event_; ///< 有数据可写 - AutoResetEvent worker_done_event_; ///< worker退出 - - bool stopped_; - common::Thread thread_; - - WriteTaskBuffer* active_buffer_; ///< 前台buffer,接收写请求 - WriteTaskBuffer* sealed_buffer_; ///< 后台buffer,等待刷到磁盘 - int64_t sync_timestamp_; - - bool active_buffer_instant_; ///< active_buffer包含instant请求 - uint64_t active_buffer_size_; ///< active_buffer的数据大小 - bool tablet_busy_; ///< tablet处于忙碌状态 + public: + typedef std::function*, std::vector*)> + WriteCallback; + + struct WriteTask { + WriteTask() : start_time(get_micros()) {} + std::vector* row_mutation_vec; + std::vector* status_vec; + WriteCallback callback; + int64_t start_time; + }; + + typedef std::vector WriteTaskBuffer; + + public: + TabletWriter(TabletIO* tablet_io); + ~TabletWriter(); + bool Write(std::vector* row_mutation_vec, + std::vector* status_vec, bool is_instant, WriteCallback callback, + StatusCode* status = NULL); + /// 初略计算一个request的数据大小 + static uint64_t CountRequestSize(std::vector& row_mutation_vec, + bool kv_only); + void Start(); + void Stop(); + bool IsBusy(); + + private: + void DoWork(); + bool SwapActiveBuffer(bool force); + /// 把一个request打到一个leveldbbatch里去, request是原子的, batch也是, so .. + void BatchRequest(WriteTaskBuffer* task_buffer, leveldb::WriteBatch* batch); + bool CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, + std::unordered_set* commit_row_key_set, + StatusCode* status); + + bool CheckIllegalRowArg(const RowMutationSequence& row_mu, const std::set& cf_set, + StatusCode* status); + void CheckRows(WriteTaskBuffer* task_buffer); + /// 任务完成, 执行回调 + void FinishTask(WriteTaskBuffer* task_buffer, StatusCode status); + /// 将buffer刷到磁盘(leveldb), 并sync + StatusCode FlushToDiskBatch(WriteTaskBuffer* task_buffer); + + private: + TabletIO* tablet_; + + mutable Mutex task_mutex_; + mutable Mutex status_mutex_; + AutoResetEvent write_event_; ///< 有数据可写 + + bool stopped_; + std::thread thread_; + + WriteTaskBuffer* active_buffer_; ///< 前台buffer,接收写请求 + WriteTaskBuffer* sealed_buffer_; ///< 后台buffer,等待刷到磁盘 + int64_t sync_timestamp_; + + bool active_buffer_instant_; ///< active_buffer包含instant请求 + uint64_t active_buffer_size_; ///< active_buffer的数据大小 + bool tablet_busy_; ///< tablet处于忙碌状态 }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLET_WRITER_H_ +#endif // TERA_TABLETNODE_TABLET_WRITER_H_ diff --git a/src/io/test/async_writer_test.cc b/src/io/test/async_writer_test.cc index a8c51bf2f..a8af5069e 100644 --- a/src/io/test/async_writer_test.cc +++ b/src/io/test/async_writer_test.cc @@ -34,256 +34,247 @@ namespace io { const std::string working_dir = "async_writer_testdata/"; class AsyncWriterTest : public ::testing::Test { -public: - AsyncWriterTest() { - std::string cmd = std::string("mkdir -p ") + working_dir; - FLAGS_tera_tabletnode_path_prefix = "./"; - system(cmd.c_str()); + public: + AsyncWriterTest() { + std::string cmd = std::string("mkdir -p ") + working_dir; + FLAGS_tera_tabletnode_path_prefix = "./"; + system(cmd.c_str()); + } + ~AsyncWriterTest() { + std::string cmd = std::string("rm -rf ") + working_dir; + system(cmd.c_str()); + } + + void Done() { callback_count_++; } + + void CreateTestData(const std::string& table_name, int32_t key_start, int32_t key_end, + bool is_sync, bool is_instant, + std::vector* task_list) { + for (int32_t i = key_start; i < key_end; ++i) { + TabletWriter::WriteTask task; + WriteTabletRequest* request = new WriteTabletRequest(); + WriteTabletResponse* response = new WriteTabletResponse(); + ; + google::protobuf::Closure* done = google::protobuf::NewCallback(this, &AsyncWriterTest::Done); + request->set_sequence_id(i); + request->set_tablet_name(table_name); + request->set_is_sync(is_sync); + request->set_is_instant(is_instant); + std::string str = StringFormat("%08llu", i); + RowMutationSequence* mu_seq = request->add_row_list(); + mu_seq->set_row_key(str); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value(str); + + response->set_status(kTabletNodeOk); + response->mutable_row_status_list()->Reserve(1); + response->mutable_row_status_list()->AddAlreadyReserved(); + std::vector* index_list = new std::vector; + index_list->push_back(0); + Counter* done_counter = new Counter; + WriteRpcTimer* write_rpc = new WriteRpcTimer(request, response, done, 1); + + task.request = request; + task.response = response; + task.done = done; + task.index_list = index_list; + task.done_counter = done_counter; + task.timer = write_rpc; + task_list->push_back(task); } - ~AsyncWriterTest() { - std::string cmd = std::string("rm -rf ") + working_dir; - system(cmd.c_str()); + } + void CreateSingleTestData(const std::string& table_name, int32_t key, int32_t value, bool is_sync, + bool is_instant, std::vector* task_list) { + TabletWriter::WriteTask task; + WriteTabletRequest* request = new WriteTabletRequest(); + WriteTabletResponse* response = new WriteTabletResponse(); + ; + google::protobuf::Closure* done = google::protobuf::NewCallback(this, &AsyncWriterTest::Done); + request->set_sequence_id(key); + request->set_tablet_name(table_name); + request->set_is_sync(is_sync); + request->set_is_instant(is_instant); + std::string key_str = StringFormat("%08llu", key); + std::string value_str = StringFormat("%08llu", value); + RowMutationSequence* mu_seq = request->add_row_list(); + mu_seq->set_row_key(key_str); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value(value_str); + + response->set_status(kTabletNodeOk); + response->mutable_row_status_list()->Reserve(1); + response->mutable_row_status_list()->AddAlreadyReserved(); + std::vector* index_list = new std::vector; + index_list->push_back(0); + Counter* done_counter = new Counter; + WriteRpcTimer* write_rpc = new WriteRpcTimer(request, response, done, 1); + + task.request = request; + task.response = response; + task.done = done; + task.index_list = index_list; + task.done_counter = done_counter; + task.timer = write_rpc; + task_list->push_back(task); + } + + void CleanTestData(std::vector task_list) { + for (uint32_t i = 0; i < task_list.size(); ++i) { + delete task_list[i].request; + delete task_list[i].response; } - - void Done() { - callback_count_++; - } - - void CreateTestData(const std::string& table_name, - int32_t key_start, int32_t key_end, - bool is_sync, bool is_instant, - std::vector* task_list) { - for (int32_t i = key_start; i < key_end; ++i) { - TabletWriter::WriteTask task; - WriteTabletRequest* request = new WriteTabletRequest(); - WriteTabletResponse* response = new WriteTabletResponse();; - google::protobuf::Closure* done = - google::protobuf::NewCallback(this, &AsyncWriterTest::Done); - request->set_sequence_id(i); - request->set_tablet_name(table_name); - request->set_is_sync(is_sync); - request->set_is_instant(is_instant); - std::string str = StringFormat("%08llu", i); - RowMutationSequence* mu_seq = request->add_row_list(); - mu_seq->set_row_key(str); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(str); - - response->set_status(kTabletNodeOk); - response->mutable_row_status_list()->Reserve(1); - response->mutable_row_status_list()->AddAlreadyReserved(); - std::vector* index_list = new std::vector; - index_list->push_back(0); - Counter* done_counter = new Counter; - WriteRpcTimer* write_rpc = new WriteRpcTimer(request, response, done, 1); - - task.request = request; - task.response = response; - task.done = done; - task.index_list = index_list; - task.done_counter = done_counter; - task.timer = write_rpc; - task_list->push_back(task); - } - } - void CreateSingleTestData(const std::string& table_name, - int32_t key, int32_t value, - bool is_sync, bool is_instant, - std::vector* task_list) { - TabletWriter::WriteTask task; - WriteTabletRequest* request = new WriteTabletRequest(); - WriteTabletResponse* response = new WriteTabletResponse();; - google::protobuf::Closure* done = - google::protobuf::NewCallback(this, &AsyncWriterTest::Done); - request->set_sequence_id(key); - request->set_tablet_name(table_name); - request->set_is_sync(is_sync); - request->set_is_instant(is_instant); - std::string key_str = StringFormat("%08llu", key); - std::string value_str = StringFormat("%08llu", value); - RowMutationSequence* mu_seq = request->add_row_list(); - mu_seq->set_row_key(key_str); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(value_str); - - response->set_status(kTabletNodeOk); - response->mutable_row_status_list()->Reserve(1); - response->mutable_row_status_list()->AddAlreadyReserved(); - std::vector* index_list = new std::vector; - index_list->push_back(0); - Counter* done_counter = new Counter; - WriteRpcTimer* write_rpc = new WriteRpcTimer(request, response, done, 1); - - task.request = request; - task.response = response; - task.done = done; - task.index_list = index_list; - task.done_counter = done_counter; - task.timer = write_rpc; - task_list->push_back(task); - } - - void CleanTestData(std::vector task_list) { - for (uint32_t i = 0; i < task_list.size(); ++i) { - delete task_list[i].request; - delete task_list[i].response; - } + } + + void CreateTestTable(const std::string& table_name, + const std::vector& task_list) { + std::string tablet_path = working_dir + table_name; + io::TabletIO tablet; + EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), + std::map())); + for (uint32_t i = 0; i < task_list.size(); ++i) { + EXPECT_TRUE(tablet.Write(task_list[i].request, task_list[i].response, task_list[i].done, + task_list[i].index_list, task_list[i].done_counter, + task_list[i].timer)); } - - void CreateTestTable(const std::string& table_name, - const std::vector& task_list) { - std::string tablet_path = working_dir + table_name; - io::TabletIO tablet; - EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), std::map())); - for (uint32_t i = 0; i < task_list.size(); ++i) { - EXPECT_TRUE(tablet.Write(task_list[i].request, - task_list[i].response, - task_list[i].done, - task_list[i].index_list, - task_list[i].done_counter, - task_list[i].timer)); - } - EXPECT_TRUE(tablet.Unload()); - } - - void VerifyOperation(const std::string& table_name, - int32_t key_start, int32_t key_end) { - std::string tablet_path = working_dir + table_name; - io::TabletIO tablet; - EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), std::map())); - for (int32_t i = key_start; i < key_end; ++i) { - std::string key = StringFormat("%08llu", i); - std::string value; - EXPECT_TRUE(tablet.Read(key, &value)); - EXPECT_EQ(key, value); - } - EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Unload()); + } + + void VerifyOperation(const std::string& table_name, int32_t key_start, int32_t key_end) { + std::string tablet_path = working_dir + table_name; + io::TabletIO tablet; + EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), + std::map())); + for (int32_t i = key_start; i < key_end; ++i) { + std::string key = StringFormat("%08llu", i); + std::string value; + EXPECT_TRUE(tablet.Read(key, &value)); + EXPECT_EQ(key, value); } - - void VerifySingleOperation(const std::string& table_name, - int32_t key, int32_t value) { - std::string tablet_path = working_dir + table_name; - io::TabletIO tablet; - EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), std::map())); - std::string key_str = StringFormat("%08llu", key); - std::string value_str = StringFormat("%08llu", value); - std::string value; - EXPECT_TRUE(tablet.Read(key_str, &value)); - EXPECT_EQ(value_str, value); - EXPECT_TRUE(tablet.Unload()); - } -protected: - uint32_t callback_count_; + EXPECT_TRUE(tablet.Unload()); + } + + void VerifySingleOperation(const std::string& table_name, int32_t key, int32_t value) { + std::string tablet_path = working_dir + table_name; + io::TabletIO tablet; + EXPECT_TRUE(tablet.Load(TableSchema(), "", "", tablet_path, std::vector(), + std::map())); + std::string key_str = StringFormat("%08llu", key); + std::string value_str = StringFormat("%08llu", value); + std::string value; + EXPECT_TRUE(tablet.Read(key_str, &value)); + EXPECT_EQ(value_str, value); + EXPECT_TRUE(tablet.Unload()); + } + + protected: + uint32_t callback_count_; }; TEST_F(AsyncWriterTest, Instant) { - int32_t start = 0; - int32_t end = 1000; - bool is_sync = false; - bool is_instant = true; - std::string table_name = "instant"; - std::vector task_list; - - CreateTestData(table_name, start, end, is_sync, is_instant, - &task_list); - EXPECT_TRUE(task_list.size() > 0); - CreateTestTable(table_name, task_list); - VerifyOperation(table_name, start, end); - CleanTestData(task_list); + int32_t start = 0; + int32_t end = 1000; + bool is_sync = false; + bool is_instant = true; + std::string table_name = "instant"; + std::vector task_list; + + CreateTestData(table_name, start, end, is_sync, is_instant, &task_list); + EXPECT_TRUE(task_list.size() > 0); + CreateTestTable(table_name, task_list); + VerifyOperation(table_name, start, end); + CleanTestData(task_list); } TEST_F(AsyncWriterTest, NotInstant) { - int32_t start = 0; - int32_t end = 10; - bool is_sync = false; - bool is_instant = false; - std::string table_name = "no_instant"; - std::vector task_list; - - CreateTestData(table_name, start, end, is_sync, is_instant, - &task_list); - EXPECT_TRUE(task_list.size() > 0); - CreateTestTable(table_name, task_list); - VerifyOperation(table_name, start, end); - CleanTestData(task_list); + int32_t start = 0; + int32_t end = 10; + bool is_sync = false; + bool is_instant = false; + std::string table_name = "no_instant"; + std::vector task_list; + + CreateTestData(table_name, start, end, is_sync, is_instant, &task_list); + EXPECT_TRUE(task_list.size() > 0); + CreateTestTable(table_name, task_list); + VerifyOperation(table_name, start, end); + CleanTestData(task_list); } TEST_F(AsyncWriterTest, InstantToNotInstant) { - LOG(INFO) << "InstantToNot"; - std::string table_name = "from_instant_to_not_instant"; - std::vector task_list; - - // create test data for instantly return query - CreateTestData(table_name, 0, 10, false, true, &task_list); - // create test data for un-instantly return query - CreateTestData(table_name, 10, 20, false, false, &task_list); - EXPECT_TRUE(task_list.size() > 0); - - CreateTestTable(table_name, task_list); - VerifyOperation(table_name, 0, 20); - CleanTestData(task_list); + LOG(INFO) << "InstantToNot"; + std::string table_name = "from_instant_to_not_instant"; + std::vector task_list; + + // create test data for instantly return query + CreateTestData(table_name, 0, 10, false, true, &task_list); + // create test data for un-instantly return query + CreateTestData(table_name, 10, 20, false, false, &task_list); + EXPECT_TRUE(task_list.size() > 0); + + CreateTestTable(table_name, task_list); + VerifyOperation(table_name, 0, 20); + CleanTestData(task_list); } TEST_F(AsyncWriterTest, NotInstantToInstant) { - LOG(INFO) << "NotInstantToInstant"; - std::string table_name = "from_not_instant_to_instant"; - std::vector task_list; + LOG(INFO) << "NotInstantToInstant"; + std::string table_name = "from_not_instant_to_instant"; + std::vector task_list; - // create test data for un-instantly return query - CreateTestData(table_name, 10, 24, false, false, &task_list); - // create test data for instantly return query - CreateTestData(table_name, 0, 10, false, true, &task_list); - EXPECT_TRUE(task_list.size() > 0); + // create test data for un-instantly return query + CreateTestData(table_name, 10, 24, false, false, &task_list); + // create test data for instantly return query + CreateTestData(table_name, 0, 10, false, true, &task_list); + EXPECT_TRUE(task_list.size() > 0); - CreateTestTable(table_name, task_list); - VerifyOperation(table_name, 0, 20); - CleanTestData(task_list); + CreateTestTable(table_name, task_list); + VerifyOperation(table_name, 0, 20); + CleanTestData(task_list); - LOG(INFO) << "m_callback_count = " << callback_count_; + LOG(INFO) << "m_callback_count = " << callback_count_; } TEST_F(AsyncWriterTest, KeepOrderForInstant) { - LOG(INFO) << "KeepOrder"; - std::string table_name = "keep_order"; - std::vector task_list; + LOG(INFO) << "KeepOrder"; + std::string table_name = "keep_order"; + std::vector task_list; - CreateTestData(table_name, 0, 2, false, false, &task_list); - CreateSingleTestData(table_name, 1, 10, false, true, &task_list); - EXPECT_TRUE(task_list.size() > 0); + CreateTestData(table_name, 0, 2, false, false, &task_list); + CreateSingleTestData(table_name, 1, 10, false, true, &task_list); + EXPECT_TRUE(task_list.size() > 0); - CreateTestTable(table_name, task_list); -// VerifySingleOperation(table_name, 1, 10); - CleanTestData(task_list); + CreateTestTable(table_name, task_list); + // VerifySingleOperation(table_name, 1, 10); + CleanTestData(task_list); - LOG(INFO) << "m_callback_count = " << callback_count_; + LOG(INFO) << "m_callback_count = " << callback_count_; } TEST_F(AsyncWriterTest, KeepOrderForNotInstant) { - LOG(INFO) << "KeepOrder"; - std::string table_name = "keep_order"; - std::vector task_list; + LOG(INFO) << "KeepOrder"; + std::string table_name = "keep_order"; + std::vector task_list; - CreateTestData(table_name, 0, 2, false, true, &task_list); - CreateSingleTestData(table_name, 1, 10, false, false, &task_list); - EXPECT_TRUE(task_list.size() > 0); + CreateTestData(table_name, 0, 2, false, true, &task_list); + CreateSingleTestData(table_name, 1, 10, false, false, &task_list); + EXPECT_TRUE(task_list.size() > 0); - CreateTestTable(table_name, task_list); - VerifySingleOperation(table_name, 1, 10); - CleanTestData(task_list); + CreateTestTable(table_name, task_list); + VerifySingleOperation(table_name, 1, 10); + CleanTestData(task_list); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera int main(int argc, char** argv) { - FLAGS_v = 6; - FLAGS_tera_tablet_write_buffer_size = 2; - FLAGS_tera_leveldb_env_type = "local"; - ::google::InitGoogleLogging(argv[0]); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_v = 6; + FLAGS_tera_tablet_write_buffer_size = 2; + FLAGS_tera_leveldb_env_type = "local"; + ::google::InitGoogleLogging(argv[0]); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } - diff --git a/src/io/test/load_test.cc b/src/io/test/load_test.cc index 5f3b26c0b..b1676c70a 100644 --- a/src/io/test/load_test.cc +++ b/src/io/test/load_test.cc @@ -42,373 +42,379 @@ static const std::string working_dir = "testdata/"; static const uint32_t N = 50000; class TabletIOTest : public ::testing::Test { -public: - TabletIOTest() { - std::string cmd = std::string("mkdir -p ") + working_dir; - FLAGS_tera_tabletnode_path_prefix = "./"; - system(cmd.c_str()); + public: + TabletIOTest() { + std::string cmd = std::string("mkdir -p ") + working_dir; + FLAGS_tera_tabletnode_path_prefix = "./"; + system(cmd.c_str()); - InitSchema(); - } + InitSchema(); + } - ~TabletIOTest() { - std::string cmd = std::string("rm -rf ") + working_dir; - system(cmd.c_str()); - } + ~TabletIOTest() { + std::string cmd = std::string("rm -rf ") + working_dir; + system(cmd.c_str()); + } - const TableSchema& GetTableSchema() { - return schema_; + const TableSchema& GetTableSchema() { return schema_; } - } + void InitSchema() { + schema_.set_name("tera"); + schema_.set_raw_key(Binary); - void InitSchema() { - schema_.set_name("tera"); - schema_.set_raw_key(Binary); - - LocalityGroupSchema* lg = schema_.add_locality_groups(); - lg->set_name("lg0"); + LocalityGroupSchema* lg = schema_.add_locality_groups(); + lg->set_name("lg0"); - ColumnFamilySchema* cf = schema_.add_column_families(); - cf->set_name("column"); - cf->set_locality_group("lg0"); - cf->set_max_versions(3); - } + ColumnFamilySchema* cf = schema_.add_column_families(); + cf->set_name("column"); + cf->set_locality_group("lg0"); + cf->set_max_versions(3); + } - std::map empty_snaphsots_; - std::map empty_rollback_; - TableSchema schema_; + std::map empty_snaphsots_; + std::map empty_rollback_; + TableSchema schema_; }; // prepare test data bool PrepareTestData(TabletIO* tablet, uint64_t e, uint64_t s = 0) { - leveldb::WriteBatch batch; - for (uint64_t i = s; i < e; ++i) { - std::string str = StringFormat("%011llu", i); // NumberToString(i); - batch.Put(str, str); - } - return tablet->WriteBatch(&batch); + leveldb::WriteBatch batch; + for (uint64_t i = s; i < e; ++i) { + std::string str = StringFormat("%011llu", i); // NumberToString(i); + batch.Put(str, str); + } + return tablet->WriteBatch(&batch); } TEST_F(TabletIOTest, General) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); - tablet.SetMockEnv(env); + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); + tablet.SetMockEnv(env); - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); - std::string key = "555"; - std::string value = "value of 555"; + std::string key = "555"; + std::string value = "value of 555"; - EXPECT_TRUE(tablet.WriteOne(key, value)); + EXPECT_TRUE(tablet.WriteOne(key, value)); - std::string read_value; + std::string read_value; - EXPECT_TRUE(tablet.Read(key, &read_value)); + EXPECT_TRUE(tablet.Read(key, &read_value)); - EXPECT_EQ(value, read_value); + EXPECT_EQ(value, read_value); - EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Unload()); - env->ResetMock(); + env->ResetMock(); } static bool DropCurrent(int32_t t, const std::string& fname) { - // std::cout << "[DropCurrent]" << t << " " << fname << std::endl; - if ((t == 1) && (fname == "CURRENT")) { - return true; - } - return false; + // std::cout << "[DropCurrent]" << t << " " << fname << std::endl; + if ((t == 1) && (fname == "CURRENT")) { + return true; + } + return false; } TEST_F(TabletIOTest, CurrentLost) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); - env->SetGetChildrenCallback(DropCurrent); - tablet.SetMockEnv(env); - - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); - - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); - - env->ResetMock(); + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); + env->SetGetChildrenCallback(DropCurrent); + tablet.SetMockEnv(env); + + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); + + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); + + env->ResetMock(); } //#if 0 static bool CannotReadCurrent(int32_t t, const std::string& fname) { - // std::cout << "[CannotReadCurrent]" << t << " " << fname << std::endl; - if ((t == 1) && (fname.find("CURRENT") != std::string::npos)) { - return true; - } - return false; + // std::cout << "[CannotReadCurrent]" << t << " " << fname << std::endl; + if ((t == 1) && (fname.find("CURRENT") != std::string::npos)) { + return true; + } + return false; } TEST_F(TabletIOTest, CurrentReadFailed) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); - env->SetNewSequentialFileFailedCallback(CannotReadCurrent); - tablet.SetMockEnv(env); - - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); - - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); - - env->ResetMock(); + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); + env->SetNewSequentialFileFailedCallback(CannotReadCurrent); + tablet.SetMockEnv(env); + + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); + + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); + + env->ResetMock(); } bool ReadCorrputedCurrent(int32_t t, char* scratch, size_t* mock_size) { - // std::cout << "[ReadCorrputedCurrent]" << t << std::endl; - if (t != 1) { - // no mock - return false; - } - // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred - // users need only N bytes in Read - // the `N' is not passed to this callback, carefully! - const char* c = "oops"; - memcpy(scratch, c, strlen(c)); - *mock_size = strlen(c); - return true; + // std::cout << "[ReadCorrputedCurrent]" << t << std::endl; + if (t != 1) { + // no mock + return false; + } + // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred + // users need only N bytes in Read + // the `N' is not passed to this callback, carefully! + const char* c = "oops"; + memcpy(scratch, c, strlen(c)); + *mock_size = strlen(c); + return true; } TEST_F(TabletIOTest, CurrentCorrupted) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); - env->SetSequentialFileReadCallback(ReadCorrputedCurrent); - tablet.SetMockEnv(env); + env->SetSequentialFileReadCallback(ReadCorrputedCurrent); + tablet.SetMockEnv(env); - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); - env->ResetMock(); + env->ResetMock(); } bool ReadCurrentGetNotExistsManifest(int32_t t, char* scratch, size_t* mock_size) { - // std::cout << "[ReadCurrentGetMockManifest]" << t << std::endl; - if (t != 1) { - // no mock - return false; - } - // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred - // users need only N bytes in Read - // the `N' is not passed to this callback, carefully! - const char* c = "MANIFEST-999997\n"; // manifest not exists - memcpy(scratch, c, strlen(c)); - *mock_size = strlen(c); - return true; + // std::cout << "[ReadCurrentGetMockManifest]" << t << std::endl; + if (t != 1) { + // no mock + return false; + } + // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred + // users need only N bytes in Read + // the `N' is not passed to this callback, carefully! + const char* c = "MANIFEST-999997\n"; // manifest not exists + memcpy(scratch, c, strlen(c)); + *mock_size = strlen(c); + return true; } TEST_F(TabletIOTest, ManifestLost) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); - env->SetSequentialFileReadCallback(ReadCurrentGetNotExistsManifest); - tablet.SetMockEnv(env); + env->SetSequentialFileReadCallback(ReadCurrentGetNotExistsManifest); + tablet.SetMockEnv(env); - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); - env->ResetMock(); + env->ResetMock(); } static bool CannotReadManifest(int32_t t, const std::string& fname) { - // std::cout << "[CannotReadCurrent]" << t << " " << fname << std::endl; - if ((t == 2) && (fname.find("MANIFEST") != std::string::npos)) { - return true; - } - return false; + // std::cout << "[CannotReadCurrent]" << t << " " << fname << std::endl; + if ((t == 2) && (fname.find("MANIFEST") != std::string::npos)) { + return true; + } + return false; } TEST_F(TabletIOTest, ManifestReadFailed) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); - env->SetNewSequentialFileFailedCallback(CannotReadManifest); - tablet.SetMockEnv(env); - - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); - - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); - - env->ResetMock(); + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); + env->SetNewSequentialFileFailedCallback(CannotReadManifest); + tablet.SetMockEnv(env); + + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); + + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); + + env->ResetMock(); } bool ReadCorrputedManifest(int32_t t, char* scratch, size_t* mock_size) { - // std::cout << "[ReadCorrputedManifest]" << t << std::endl; - if (t != 3) { - // no mock - return false; - } - // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred - // users need only N bytes in Read - // the `N' is not passed to this callback, carefully! - const char* c = "oops2"; - memcpy(scratch, c, strlen(c)); - *mock_size = strlen(c); - return true; + // std::cout << "[ReadCorrputedManifest]" << t << std::endl; + if (t != 3) { + // no mock + return false; + } + // NOTE: don't fill too many bytes into scratch, otherwise overflow ocurred + // users need only N bytes in Read + // the `N' is not passed to this callback, carefully! + const char* c = "oops2"; + memcpy(scratch, c, strlen(c)); + *mock_size = strlen(c); + return true; } TEST_F(TabletIOTest, ManifestCorrupted) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); - env->SetSequentialFileReadCallback(ReadCorrputedManifest); - tablet.SetMockEnv(env); + env->SetSequentialFileReadCallback(ReadCorrputedManifest); + tablet.SetMockEnv(env); - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); - env->ResetMock(); + env->ResetMock(); } static bool DropSst(int32_t t, const std::string& fname) { - // std::cout << "[DropSst]" << t << " " << fname << std::endl; - if ((fname.find(".sst") != std::string::npos)) { - return true; - } - return false; + // std::cout << "[DropSst]" << t << " " << fname << std::endl; + if ((fname.find(".sst") != std::string::npos)) { + return true; + } + return false; } TEST_F(TabletIOTest, SstLost) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - env->SetPrefix(mock_env_prefix); + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + env->SetPrefix(mock_env_prefix); - env->SetGetChildrenCallback(DropSst); - tablet.SetMockEnv(env); + env->SetGetChildrenCallback(DropSst); + tablet.SetMockEnv(env); - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); - ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), ldb_logger, NULL, NULL, &status)); + ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), ldb_logger, NULL, NULL, &status)); - env->ResetMock(); + env->ResetMock(); } TEST_F(TabletIOTest, SstLostButIgnore) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - - env->SetPrefix(mock_env_prefix); - env->SetGetChildrenCallback(DropSst); - tablet.SetMockEnv(env); - - leveldb::Logger* ldb_logger; - leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); - assert(s.ok()); - std::set ignore_err_lgs; - ignore_err_lgs.insert("lg0"); - TableSchema schema = TableSchema(); - - LocalityGroupSchema* lg = schema.add_locality_groups(); - lg->set_name("lg0"); - - ColumnFamilySchema* cf = schema.add_column_families(); - cf->set_name("column"); - cf->set_locality_group("lg0"); - cf->set_max_versions(3); - - ASSERT_TRUE(tablet.Load(schema, tablet_path, std::vector(), - ignore_err_lgs, ldb_logger, NULL, NULL, &status)); - - env->ResetMock(); + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); + + env->SetPrefix(mock_env_prefix); + env->SetGetChildrenCallback(DropSst); + tablet.SetMockEnv(env); + + leveldb::Logger* ldb_logger; + leveldb::Status s = leveldb::Env::Default()->NewLogger( + "./log/leveldblog", leveldb::LogOption::LogOptionBuilder().Build(), &ldb_logger); + assert(s.ok()); + std::set ignore_err_lgs; + ignore_err_lgs.insert("lg0"); + TableSchema schema = TableSchema(); + + LocalityGroupSchema* lg = schema.add_locality_groups(); + lg->set_name("lg0"); + + ColumnFamilySchema* cf = schema.add_column_families(); + cf->set_name("column"); + cf->set_locality_group("lg0"); + cf->set_max_versions(3); + + ASSERT_TRUE(tablet.Load(schema, tablet_path, std::vector(), ignore_err_lgs, ldb_logger, + NULL, NULL, &status)); + + env->ResetMock(); } //#endif -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera int main(int argc, char** argv) { - FLAGS_tera_io_retry_max_times = 1; - FLAGS_tera_tablet_living_period = 0; - FLAGS_tera_tablet_max_write_buffer_size = 1; - // FLAGS_tera_leveldb_env_type = "local"; - ::google::InitGoogleLogging(argv[0]); - FLAGS_log_dir = "./log"; - if (access(FLAGS_log_dir.c_str(), F_OK)) { - mkdir(FLAGS_log_dir.c_str(), 0777); - } - std::string pragram_name("tera"); - tera::utils::SetupLog(pragram_name); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_tera_io_retry_max_times = 1; + FLAGS_tera_tablet_living_period = 0; + FLAGS_tera_tablet_max_write_buffer_size = 1; + // FLAGS_tera_leveldb_env_type = "local"; + ::google::InitGoogleLogging(argv[0]); + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("tera"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/io/test/tablet_io_test.cc b/src/io/test/tablet_io_test.cc index 146c18d00..8faea6eb5 100644 --- a/src/io/test/tablet_io_test.cc +++ b/src/io/test/tablet_io_test.cc @@ -41,788 +41,979 @@ const std::string working_dir = "testdata/"; const uint32_t N = 50000; class TabletIOTest : public ::testing::Test { -public: - TabletIOTest() { - std::string cmd = std::string("mkdir -p ") + working_dir; - FLAGS_tera_tabletnode_path_prefix = "./"; - system(cmd.c_str()); - - InitSchema(); - } - - ~TabletIOTest() { - std::string cmd = std::string("rm -rf ") + working_dir; - system(cmd.c_str()); - } - - const TableSchema& GetTableSchema() { - return schema_; - - } - - void InitSchema() { - schema_.set_name("tera"); - schema_.set_raw_key(Binary); - - LocalityGroupSchema* lg = schema_.add_locality_groups(); - lg->set_name("lg0"); - - ColumnFamilySchema* cf = schema_.add_column_families(); - cf->set_name("column"); - cf->set_locality_group("lg0"); - cf->set_max_versions(3); - } - - std::map empty_snaphsots_; - std::map empty_rollback_; - TableSchema schema_; + public: + TabletIOTest() { + std::string cmd = std::string("mkdir -p ") + working_dir; + FLAGS_tera_tabletnode_path_prefix = "./"; + system(cmd.c_str()); + + InitSchema(); + } + + ~TabletIOTest() { + std::string cmd = std::string("rm -rf ") + working_dir; + system(cmd.c_str()); + } + + const TableSchema& GetTableSchema() { return schema_; } + + void InitSchema() { + schema_.set_name("tera"); + schema_.set_raw_key(Binary); + + LocalityGroupSchema* lg = schema_.add_locality_groups(); + lg->set_name("lg0"); + + ColumnFamilySchema* cf = schema_.add_column_families(); + cf->set_name("column"); + cf->set_locality_group("lg0"); + cf->set_max_versions(3); + } + + std::map empty_snaphsots_; + std::map empty_rollback_; + TableSchema schema_; }; // prepare test data bool PrepareTestData(TabletIO* tablet, uint64_t e, uint64_t s = 0) { - leveldb::WriteBatch batch; - for (uint64_t i = s; i < e; ++i) { - std::string str = StringFormat("%011llu", i); // NumberToString(i); - batch.Put(str, str); - } - return tablet->WriteBatch(&batch); + leveldb::WriteBatch batch; + for (uint64_t i = s; i < e; ++i) { + std::string str = StringFormat("%011llu", i); // NumberToString(i); + batch.Put(str, str); + } + return tablet->WriteBatch(&batch); } TEST_F(TabletIOTest, General) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); - std::string key = "555"; - std::string value = "value of 555"; + std::string key = "555"; + std::string value = "value of 555"; - EXPECT_TRUE(tablet.WriteOne(key, value)); + EXPECT_TRUE(tablet.WriteOne(key, value)); - std::string read_value; + std::string read_value; - EXPECT_TRUE(tablet.Read(key, &read_value)); + EXPECT_TRUE(tablet.Read(key, &read_value)); - EXPECT_EQ(value, read_value); + EXPECT_EQ(value, read_value); - EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Unload()); } TEST_F(TabletIOTest, Split) { - std::string tablet_path = working_dir + "split_tablet"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - uint64_t size = 0; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::string tablet_path = working_dir + "split_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + uint64_t size = 0; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // prepare test data + EXPECT_TRUE(PrepareTestData(&tablet, N)); + + // for first tablet + tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + + std::string split_key; + EXPECT_TRUE(tablet.Split(&split_key, &status)); + LOG(INFO) << "split key = " << split_key; + // EXPECT_TRUE((split_key == "00000035473")); + EXPECT_TRUE(tablet.Unload()); + + // open tablet for other key scope + key_start = "5000"; + key_end = "8000"; + TabletIO other_tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(other_tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + other_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + split_key.clear(); + EXPECT_TRUE(other_tablet.Split(&split_key, &status)); + LOG(INFO) << "split key = " << split_key << ", code " << StatusCodeToString(status); + // EXPECT_LG(split_key, "6"); + EXPECT_LT(key_start, split_key); + EXPECT_LT(split_key, key_end); + EXPECT_TRUE(other_tablet.Unload()); + + key_start = ""; + key_end = "5000"; + TabletIO l_tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), std::set(), NULL, NULL, NULL, &status)); - - // prepare test data - EXPECT_TRUE(PrepareTestData(&tablet, N)); - - // for first tablet - tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - - std::string split_key; - EXPECT_TRUE(tablet.Split(&split_key, &status)); - LOG(INFO) << "split key = " << split_key; -// EXPECT_TRUE((split_key == "00000035473")); - EXPECT_TRUE(tablet.Unload()); - - // open tablet for other key scope - key_start = "5000"; - key_end = "8000"; - TabletIO other_tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(other_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - other_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - split_key.clear(); - EXPECT_TRUE(other_tablet.Split(&split_key, &status)); - LOG(INFO) << "split key = " << split_key << ", code " << StatusCodeToString(status); - //EXPECT_LG(split_key, "6"); - EXPECT_LT(key_start, split_key); - EXPECT_LT(split_key, key_end); - EXPECT_TRUE(other_tablet.Unload()); - - key_start = ""; - key_end = "5000"; - TabletIO l_tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - l_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - EXPECT_TRUE(l_tablet.Unload()); - - key_start = "8000"; - key_end = ""; - TabletIO r_tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - r_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - EXPECT_TRUE(r_tablet.Unload()); + l_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + EXPECT_TRUE(l_tablet.Unload()); + + key_start = "8000"; + key_end = ""; + TabletIO r_tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + r_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + EXPECT_TRUE(r_tablet.Unload()); } TEST_F(TabletIOTest, SplitAndCheckSize) { - LOG(INFO) << "SplitAndCheckSize() begin ..."; - std::string tablet_path = working_dir + "split_tablet_check"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - uint64_t size = 0; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + LOG(INFO) << "SplitAndCheckSize() begin ..."; + std::string tablet_path = working_dir + "split_tablet_check"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + uint64_t size = 0; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // prepare test data + EXPECT_TRUE(PrepareTestData(&tablet, N)); + + // for first tablet + tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + + std::string split_key; + EXPECT_TRUE(tablet.Split(&split_key)); + LOG(INFO) << "split key = " << split_key; + LOG(INFO) << "table[" << key_start << ", " << split_key << "]"; + LOG(INFO) << "table[" << split_key << ", " << key_end << "]"; + EXPECT_TRUE(tablet.Unload()); + + // open from split key to check scope size + TabletIO l_tablet(key_start, split_key, tablet_path); + EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), std::set(), NULL, NULL, NULL, &status)); + l_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << split_key << "]: size = " << size; + EXPECT_TRUE(l_tablet.Unload()); - // prepare test data - EXPECT_TRUE(PrepareTestData(&tablet, N)); - - // for first tablet - tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - - std::string split_key; - EXPECT_TRUE(tablet.Split(&split_key)); - LOG(INFO) << "split key = " << split_key; - LOG(INFO) << "table[" << key_start << ", " << split_key << "]"; - LOG(INFO) << "table[" << split_key << ", " << key_end << "]"; - EXPECT_TRUE(tablet.Unload()); - - // open from split key to check scope size - TabletIO l_tablet(key_start, split_key, tablet_path); - EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - l_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << split_key - << "]: size = " << size; - EXPECT_TRUE(l_tablet.Unload()); - - TabletIO r_tablet(split_key, key_end, tablet_path); - EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - r_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << split_key << ", " << key_end - << "]: size = " << size; - EXPECT_TRUE(r_tablet.Unload()); + TabletIO r_tablet(split_key, key_end, tablet_path); + EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + r_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << split_key << ", " << key_end << "]: size = " << size; + EXPECT_TRUE(r_tablet.Unload()); - LOG(INFO) << "SplitAndCheckSize() end ..."; + LOG(INFO) << "SplitAndCheckSize() end ..."; } TEST_F(TabletIOTest, OverWrite) { - std::string tablet_path = working_dir + "general_tablet"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); - std::string key = "555"; - std::string value = "value of 555"; - EXPECT_TRUE(tablet.WriteOne(key, value)); + std::string key = "555"; + std::string value = "value of 555"; + EXPECT_TRUE(tablet.WriteOne(key, value)); - value = "value of 666"; - EXPECT_TRUE(tablet.WriteOne(key, value)); + value = "value of 666"; + EXPECT_TRUE(tablet.WriteOne(key, value)); - std::string read_value; - EXPECT_TRUE(tablet.Read(key, &read_value)); + std::string read_value; + EXPECT_TRUE(tablet.Read(key, &read_value)); - EXPECT_EQ(value, read_value); + EXPECT_EQ(value, read_value); - EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Unload()); } -//TEST_F(TabletIOTest, DISABLED_Compact) { +// TEST_F(TabletIOTest, DISABLED_Compact) { TEST_F(TabletIOTest, Compact) { - std::string tablet_path = working_dir + "compact_tablet"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - // prepare test data - EXPECT_TRUE(PrepareTestData(&tablet, 100)); - - uint64_t table_size = 0; - tablet.GetDataSize(&table_size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << table_size; - EXPECT_TRUE(tablet.Unload()); - - // open another scope - std::string new_key_start = StringFormat("%011llu", 5); // NumberToString(500); - std::string new_key_end = StringFormat("%011llu", 50); // NumberToString(800); - TabletIO new_tablet(new_key_start, new_key_end, tablet_path); - EXPECT_TRUE(new_tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - EXPECT_TRUE(new_tablet.Compact(0, &status)); - - uint64_t new_table_size = 0; - new_tablet.GetDataSize(&new_table_size, NULL, &status); - LOG(INFO) << "table[" << new_key_start << ", " << new_key_end - << "]: size = " << new_table_size; - - for (int i = 0; i < 100; ++i) { - std::string key = StringFormat("%011llu", i); // NumberToString(i); - std::string value; - if (i >= 5 && i < 50) { - EXPECT_TRUE(new_tablet.Read(key, &value)); - EXPECT_EQ(key, value); - } else { - EXPECT_FALSE(new_tablet.Read(key, &value)); - } + std::string tablet_path = working_dir + "compact_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // prepare test data + EXPECT_TRUE(PrepareTestData(&tablet, 100)); + + uint64_t table_size = 0; + tablet.GetDataSize(&table_size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << table_size; + EXPECT_TRUE(tablet.Unload()); + + // open another scope + std::string new_key_start = StringFormat("%011llu", 5); // NumberToString(500); + std::string new_key_end = StringFormat("%011llu", 50); // NumberToString(800); + TabletIO new_tablet(new_key_start, new_key_end, tablet_path); + EXPECT_TRUE(new_tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + EXPECT_TRUE(new_tablet.Compact(0, &status)); + + uint64_t new_table_size = 0; + new_tablet.GetDataSize(&new_table_size, NULL, NULL, &status); + LOG(INFO) << "table[" << new_key_start << ", " << new_key_end << "]: size = " << new_table_size; + + for (int i = 0; i < 100; ++i) { + std::string key = StringFormat("%011llu", i); // NumberToString(i); + std::string value; + if (i >= 5 && i < 50) { + EXPECT_TRUE(new_tablet.Read(key, &value)); + EXPECT_EQ(key, value); + } else { + EXPECT_FALSE(new_tablet.Read(key, &value)); } + } - EXPECT_TRUE(new_tablet.Unload()); + EXPECT_TRUE(new_tablet.Unload()); } TEST_F(TabletIOTest, LowLevelSeek) { - std::string tablet_path = working_dir + "llseek_tablet"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - // init scan - ScanOptions scan_options; - ColumnFamilyMap cf_map; - std::set qu_set; - qu_set.insert("qualifer"); - qu_set.insert("2a"); - qu_set.insert("1a"); - cf_map["column"] = qu_set; - scan_options.column_family_list = cf_map; - scan_options.iter_cf_set.insert("column"); - - std::string tkey1; - // delete this key - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "" , false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "" , false, NULL); - - // write cell - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "qualifer", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala" , false, NULL); - RowResult value_list; - - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 1); - - // delete cell - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "" , false, NULL); - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 0); - - // write cell again - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala" , false, NULL); - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 1); - - // clean - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "", false, NULL); - - // write 5 versions - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala1", false, NULL); - int64_t start_ts = get_micros(); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", start_ts, leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala2", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala3", false, NULL); - int64_t end_ts = get_micros(); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", end_ts, leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala4", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala5", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala5", false, NULL); - - // read all versions ( write 5 versions, but schema set max_versions = 3 ) - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 3); - - // for max_versions - // read 2 versions - scan_options.max_versions = 2; - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 2); - - // for timerange and max_versions - // read 2 versions ( write 5 versions, but schema set max_versions = 3) - scan_options.max_versions = 4; - scan_options.ts_start = start_ts; - scan_options.ts_end = end_ts; - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 2); - - // start_ts not in top 3 versions - scan_options.ts_start = start_ts; - scan_options.ts_end = start_ts; - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 0); - - // end_ts in top 3 versions - scan_options.ts_start = end_ts; - scan_options.ts_end = end_ts; - EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); - EXPECT_EQ(value_list.key_values_size(), 1); - - EXPECT_TRUE(tablet.Unload()); + std::string tablet_path = working_dir + "llseek_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // init scan + ScanOptions scan_options; + ColumnFamilyMap cf_map; + std::set qu_set; + qu_set.insert("qualifer"); + qu_set.insert("2a"); + qu_set.insert("1a"); + cf_map["column"] = qu_set; + scan_options.column_family_list = cf_map; + scan_options.iter_cf_set.insert("column"); + + std::string tkey1; + // delete this key + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + + // write cell + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "qualifer", get_micros(), + leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + RowResult value_list; + + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + // delete cell + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + // write cell again + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + // clean + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + + // write 5 versions + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala1", false, NULL); + int64_t start_ts = get_micros(); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", start_ts, leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala2", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala3", false, NULL); + int64_t end_ts = get_micros(); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", end_ts, leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala4", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala5", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "1a", get_micros(), + leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala5", false, NULL); + + // read all versions ( write 5 versions, but schema set max_versions = 3 ) + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 3); + + // for max_versions + // read 2 versions + scan_options.max_versions = 2; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 2); + + // for timerange and max_versions + // read 2 versions ( write 5 versions, but schema set max_versions = 3) + scan_options.max_versions = 4; + scan_options.ts_start = start_ts; + scan_options.ts_end = end_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 2); + + // start_ts not in top 3 versions + scan_options.ts_start = start_ts; + scan_options.ts_end = start_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + // end_ts in top 3 versions + scan_options.ts_start = end_ts; + scan_options.ts_end = end_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + std::string tkey2; + std::string rawkey2 = "row2"; + int64_t ts = 10; + // write/del with the same timestamp + tablet.GetRawKeyOperator()->EncodeTeraKey(rawkey2, "column", "Lqu0", ts, + leveldb::TKT_DEL_QUALIFIERS, &tkey2); + tablet.WriteOne(tkey2, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey(rawkey2, "column", "Lqu0", ts, leveldb::TKT_VALUE, + &tkey2); + tablet.WriteOne(tkey2, "value0L", false, NULL); + + // Read the Del Qua + // Get the same result with lowlevelscan + ScanOptions scan_options2; + ColumnFamilyMap cf_map2; + std::set qu_set2; + qu_set2.insert("Lqu0"); + cf_map2["column"] = qu_set2; + scan_options2.column_family_list = cf_map2; + scan_options2.iter_cf_set.insert("column"); + scan_options2.ts_start = 10; + scan_options2.ts_end = 10; + EXPECT_TRUE(tablet.LowLevelSeek(rawkey2, scan_options2, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + EXPECT_TRUE(tablet.Unload()); } TEST_F(TabletIOTest, LowLevelScan) { - std::string tablet_path = working_dir + "llscan_tablet"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - std::string tkey1; - - // delete this key - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "" , false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "" , false, NULL); - - - // write cell - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "qualifer", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala" , false, NULL); - - std::string start_tera_key; - std::string end_row_key; - RowResult value_list; - KeyValuePair next_start_point; - uint32_t read_row_count = 0; - uint32_t read_bytes = 0; - bool is_complete = false; - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 1); - - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "lala" , false, NULL); - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 0); - - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala" , false, NULL); - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 1); - - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "2b", get_micros(), leveldb::TKT_VALUE, &tkey1); - tablet.WriteOne(tkey1, "lala", false, NULL); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", 0, leveldb::TKT_FORSEEK, &start_tera_key); - end_row_key = std::string("row1\0", 5); - ScanOptions scan_options; - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 5); - tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", 0, leveldb::TKT_FORSEEK, &start_tera_key); - end_row_key = std::string("row\0", 5); - scan_options.column_family_list["column"].insert("1a"); - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 3); - scan_options.max_versions = 2; - EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); - EXPECT_EQ(value_list.key_values_size(), 2); - EXPECT_TRUE(tablet.Unload()); + std::string tablet_path = working_dir + "llscan_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + std::string tkey1; + + // delete this key + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + + // write cell + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "qualifer", get_micros(), + leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + + std::string start_tera_key; + std::string end_row_key; + RowResult value_list; + KeyValuePair next_start_point; + uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; + uint32_t read_bytes = 0; + bool is_complete = false; + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), &value_list, &next_start_point, + &read_row_count, &read_cell_count, &read_bytes, &is_complete, + &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), &value_list, &next_start_point, + &read_row_count, &read_cell_count, &read_bytes, &is_complete, + &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), &value_list, &next_start_point, + &read_row_count, &read_cell_count, &read_bytes, &is_complete, + &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, + &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "1a", get_micros(), + leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "2b", get_micros(), + leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", 0, leveldb::TKT_FORSEEK, + &start_tera_key); + end_row_key = std::string("row1\0", 5); + ScanOptions scan_options; + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 5); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", 0, leveldb::TKT_FORSEEK, + &start_tera_key); + end_row_key = std::string("row\0", 5); + scan_options.column_family_list["column"].insert("1a"); + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 3); + scan_options.max_versions = 2; + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 2); + + std::string rawkey2 = "row2"; + int64_t ts = 10; + // write/del with the same timestamp + tablet.GetRawKeyOperator()->EncodeTeraKey(rawkey2, "column", "Lqu0", ts, + leveldb::TKT_DEL_QUALIFIERS, &start_tera_key); + tablet.WriteOne(start_tera_key, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey(rawkey2, "column", "Lqu0", ts, leveldb::TKT_VALUE, + &start_tera_key); + tablet.WriteOne(start_tera_key, "value0L", false, NULL); + + // Scan the row where put and del with the same timestamp. + ScanOptions scan_options2; + end_row_key = std::string("row2\0", 5); + // Scan All Cf + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options2, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + // Scan column cf + std::set qu_set; + ColumnFamilyMap cf_map; + cf_map["column"] = qu_set; + scan_options2.column_family_list = cf_map; + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options2, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + // Scan column && qua + scan_options2.column_family_list["column"].insert("Lqu0"); + EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options2, &value_list, + &next_start_point, &read_row_count, &read_cell_count, &read_bytes, + &is_complete, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + EXPECT_TRUE(tablet.Unload()); } TEST_F(TabletIOTest, SplitToSubTable) { - LOG(INFO) << "SplitToSubTable() begin ..."; - std::string tablet_path = leveldb::GetTabletPathFromNum(working_dir, 1); - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - uint64_t size = 0; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - // prepare test data - EXPECT_TRUE(PrepareTestData(&tablet, N / 2, 0)); - EXPECT_TRUE(PrepareTestData(&tablet, N, N / 2)); - - // make sure all data are dumped into sst - EXPECT_TRUE(tablet.Unload()); - EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - // for first tablet - tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << key_end - << "]: size = " << size; - - std::string split_key; - EXPECT_TRUE(tablet.Split(&split_key)); - LOG(INFO) << "split key = " << split_key; - LOG(INFO) << "table[" << key_start << ", " << split_key << "]"; - LOG(INFO) << "table[" << split_key << ", " << key_end << "]"; - EXPECT_TRUE(tablet.Unload()); - - // open from split key to check scope size - std::string split_path_1; - std::string split_path_2; - split_path_1 = leveldb::GetTabletPathFromNum(working_dir, 2); - split_path_2 = leveldb::GetTabletPathFromNum(working_dir, 3); - //ASSERT_TRUE(leveldb::GetSplitPath(tablet_path, &split_path_1, &split_path_2)); - LOG(INFO) << tablet_path << ", lpath " << split_path_1 << ", rpath " << split_path_2 << "\n"; - std::vector parent_tablet; - parent_tablet.push_back(1); - - // 1. load sub-table 1 - TabletIO l_tablet(key_start, split_key, split_path_1); - EXPECT_TRUE(l_tablet.Load(TableSchema(), split_path_1, parent_tablet, - std::set(), NULL, NULL, NULL, &status)); - l_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << key_start << ", " << split_key - << "]: size = " << size; - // varify result - int split_key_num = atoi(split_key.c_str()); - LOG(INFO) << "split_key_num " << split_key_num; - for (uint64_t i = 0; i < (uint64_t)split_key_num; ++i) { - std::string key = StringFormat("%011llu", i); - std::string value; - EXPECT_TRUE(l_tablet.Read(key, &value)); - ASSERT_EQ(key, value); - } - EXPECT_TRUE(l_tablet.Unload()); - - // 2. load sub-table 2 - TabletIO r_tablet(split_key, key_end, split_path_2); - EXPECT_TRUE(r_tablet.Load(TableSchema(), split_path_2, parent_tablet, - std::set(), NULL, NULL, NULL, &status)); - r_tablet.GetDataSize(&size, NULL, &status); - LOG(INFO) << "table[" << split_key << ", " << key_end - << "]: size = " << size; - // varify result - for (uint64_t i = (uint64_t)split_key_num; i < N; ++i) { - std::string key = StringFormat("%011llu", i); - std::string value; - EXPECT_TRUE(r_tablet.Read(key, &value)); - ASSERT_EQ(key, value); - } - EXPECT_TRUE(r_tablet.Unload()); - - LOG(INFO) << "SplitToSubTable() end ..."; + LOG(INFO) << "SplitToSubTable() begin ..."; + std::string tablet_path = leveldb::GetTabletPathFromNum(working_dir, 1); + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + uint64_t size = 0; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // prepare test data + EXPECT_TRUE(PrepareTestData(&tablet, N / 2, 0)); + EXPECT_TRUE(PrepareTestData(&tablet, N, N / 2)); + + // make sure all data are dumped into sst + EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // for first tablet + tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; + + std::string split_key; + EXPECT_TRUE(tablet.Split(&split_key)); + LOG(INFO) << "split key = " << split_key; + LOG(INFO) << "table[" << key_start << ", " << split_key << "]"; + LOG(INFO) << "table[" << split_key << ", " << key_end << "]"; + EXPECT_TRUE(tablet.Unload()); + + // open from split key to check scope size + std::string split_path_1; + std::string split_path_2; + split_path_1 = leveldb::GetTabletPathFromNum(working_dir, 2); + split_path_2 = leveldb::GetTabletPathFromNum(working_dir, 3); + // ASSERT_TRUE(leveldb::GetSplitPath(tablet_path, &split_path_1, + // &split_path_2)); + LOG(INFO) << tablet_path << ", lpath " << split_path_1 << ", rpath " << split_path_2 << "\n"; + std::vector parent_tablet; + parent_tablet.push_back(1); + + // 1. load sub-table 1 + TabletIO l_tablet(key_start, split_key, split_path_1); + EXPECT_TRUE(l_tablet.Load(TableSchema(), split_path_1, parent_tablet, std::set(), + NULL, NULL, NULL, &status)); + l_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << key_start << ", " << split_key << "]: size = " << size; + // varify result + int split_key_num = atoi(split_key.c_str()); + LOG(INFO) << "split_key_num " << split_key_num; + for (uint64_t i = 0; i < (uint64_t)split_key_num; ++i) { + std::string key = StringFormat("%011llu", i); + std::string value; + EXPECT_TRUE(l_tablet.Read(key, &value)); + ASSERT_EQ(key, value); + } + EXPECT_TRUE(l_tablet.Unload()); + + // 2. load sub-table 2 + TabletIO r_tablet(split_key, key_end, split_path_2); + EXPECT_TRUE(r_tablet.Load(TableSchema(), split_path_2, parent_tablet, std::set(), + NULL, NULL, NULL, &status)); + r_tablet.GetDataSize(&size, NULL, NULL, &status); + LOG(INFO) << "table[" << split_key << ", " << key_end << "]: size = " << size; + // varify result + for (uint64_t i = (uint64_t)split_key_num; i < N; ++i) { + std::string key = StringFormat("%011llu", i); + std::string value; + EXPECT_TRUE(r_tablet.Read(key, &value)); + ASSERT_EQ(key, value); + } + EXPECT_TRUE(r_tablet.Unload()); + + LOG(INFO) << "SplitToSubTable() end ..."; } TEST_F(TabletIOTest, FindAverageKey) { - std::string start, end, ave; - - start = "abc"; - end = "abe"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - //ASSERT_EQ(ave, "abd"); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - start = "helloa"; - end = "hellob"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave, "helloa\x80"); - - start = "a"; - end = "b"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave, "a\x80"); - - start = "a"; - // b(0x62), 1(0x31) - end = "ab"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - // _(0x5F) - start = "a\x10"; - end = "b"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - //ASSERT_EQ(ave, "a\x88"); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - start = ""; - end = ""; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave, "\x7F"); - - start = ""; - end = "b"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave[0], '1'); - ASSERT_EQ(ave[1], '\0'); - - start = "b"; - end = ""; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - //ASSERT_EQ(ave, "\xb0"); - ASSERT_LT(start, ave); - ASSERT_NE(ave, start); - std::cout << DebugString(start) << ", " << DebugString(ave) << ", " << std::endl; - - start = "000000000000001480186993"; - end = "000000000000002147352684"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - start = std::string("000017\xF0"); - end = "000018000000001397050688"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - start = std::string("0000\177"); - end = std::string("0000\200"); - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_LT(start, ave); - ASSERT_LT(ave, end); - - start = ""; - end = "\x1"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave, std::string("\x0", 1)); - - start = ""; - end = std::string("\x0", 1); - ASSERT_FALSE(TabletIO::FindAverageKey(start, end, &ave)); - - start = "aaa"; - end = "aaa"; - end.append(1, '\0'); - ASSERT_FALSE(TabletIO::FindAverageKey(start, end, &ave)); - - start = "a\xff\xff"; - end = "b"; - ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); - ASSERT_EQ(ave, "a\xff\xff\x80"); + std::string start, end, ave; + + start = "abc"; + end = "abe"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + // ASSERT_EQ(ave, "abd"); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + start = "helloa"; + end = "hellob"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave, "helloa\x80"); + + start = "a"; + end = "b"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave, "a\x80"); + + start = "a"; + // b(0x62), 1(0x31) + end = "ab"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + // _(0x5F) + start = "a\x10"; + end = "b"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + // ASSERT_EQ(ave, "a\x88"); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + start = ""; + end = ""; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave, "\x7F"); + + start = ""; + end = "b"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave[0], '1'); + ASSERT_EQ(ave[1], '\0'); + + start = "b"; + end = ""; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + // ASSERT_EQ(ave, "\xb0"); + ASSERT_LT(start, ave); + ASSERT_NE(ave, start); + std::cout << DebugString(start) << ", " << DebugString(ave) << ", " << std::endl; + + start = "000000000000001480186993"; + end = "000000000000002147352684"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + start = std::string("000017\xF0"); + end = "000018000000001397050688"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + start = std::string("0000\177"); + end = std::string("0000\200"); + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_LT(start, ave); + ASSERT_LT(ave, end); + + start = ""; + end = "\x1"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave, std::string("\x0", 1)); + + start = ""; + end = std::string("\x0", 1); + ASSERT_FALSE(TabletIO::FindAverageKey(start, end, &ave)); + + start = "aaa"; + end = "aaa"; + end.append(1, '\0'); + ASSERT_FALSE(TabletIO::FindAverageKey(start, end, &ave)); + + start = "a\xff\xff"; + end = "b"; + ASSERT_TRUE(TabletIO::FindAverageKey(start, end, &ave)); + ASSERT_EQ(ave, "a\xff\xff\x80"); } -static void TabletUnloadWapper(TabletIO* tablet) { - tablet->Unload(); -} +static void TabletUnloadWapper(TabletIO* tablet) { tablet->Unload(); } TEST_F(TabletIOTest, TryUnload) { - std::string tablet_path = working_dir + "unload_try"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - tablet.db_ref_count_++; - std::vector threads; - threads.reserve(2); - EXPECT_TRUE(tablet.try_unload_count_ == 0); - for (int i = 0; i < 2; ++i) { - threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); - } - sleep(2); - EXPECT_TRUE(tablet.try_unload_count_ == 2); - tablet.db_ref_count_--; - for (int i = 0; i < 2; ++i) { - threads[i].join(); - } - threads.clear(); + std::string tablet_path = working_dir + "unload_try"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + tablet.db_ref_count_++; + std::vector threads; + threads.reserve(2); + EXPECT_TRUE(tablet.try_unload_count_ == 0); + for (int i = 0; i < 2; ++i) { + threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); + } + sleep(2); + EXPECT_TRUE(tablet.try_unload_count_ == 2); + tablet.db_ref_count_--; + for (int i = 0; i < 2; ++i) { + threads[i].join(); + } + threads.clear(); } TEST_F(TabletIOTest, OnSlowUnloadOP) { - std::string tablet_path = working_dir + "unload_slow_op"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "unload_slow_op"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + tablet.db_ref_count_++; + std::vector threads; + threads.reserve(3); + EXPECT_TRUE(tablet.try_unload_count_ == 0); + for (int i = 0; i < 3; ++i) { + threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); + } + sleep(5); + EXPECT_TRUE(tablet.try_unload_count_ == 3); + uint64_t size = 0; + std::vector lgsize; + // GetDataSize + EXPECT_TRUE(tablet.GetDataSize(&size, &lgsize, NULL, &status) == false); + EXPECT_TRUE(size == 0); + EXPECT_TRUE(lgsize.size() == 0); + + // LowLevelScan + + std::string start_tera_key; + tablet.GetRawKeyOperator()->EncodeTeraKey("123213", "", "", kLatestTs, leveldb::TKT_FORSEEK, + &start_tera_key); + std::string end_row_key = "123213" + '\0'; + + RowResult value_list; + KeyValuePair next_start_point; + uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; + uint32_t read_bytes = 0; + bool is_complete = false; + status = kTabletNodeOk; + EXPECT_FALSE(tablet.LowLevelScan(start_tera_key, end_row_key, ScanOptions(), &value_list, + &next_start_point, &read_row_count, &read_cell_count, + &read_bytes, &is_complete, &status)); + EXPECT_TRUE(status == kKeyNotInRange); + status = kTabletNodeOk; + + // LowLevelSeek + EXPECT_FALSE(tablet.LowLevelSeek("row", ScanOptions(), &value_list, &status)); + EXPECT_TRUE(status == kKeyNotInRange); + status = kTabletNodeOk; + + // ReadRows + RowReaderInfo row_reader; + EXPECT_FALSE(tablet.ReadCells(row_reader, &value_list, 0, &status, 1000)); + EXPECT_TRUE(status != kTabletNodeOk); + status = kTabletNodeOk; + + tablet.db_ref_count_--; + for (int i = 0; i < 3; ++i) { + threads[i].join(); + } + threads.clear(); +} - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - tablet.db_ref_count_++; - std::vector threads; - threads.reserve(3); - EXPECT_TRUE(tablet.try_unload_count_ == 0); - for (int i = 0; i < 3; ++i) { - threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); +TEST_F(TabletIOTest, RowBloomFilter) { + const int32_t NR = 10000; + const int32_t CR = 10; + std::string tablet_path = working_dir + "row_bloomfilter"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + // prepare data + leveldb::WriteBatch batch; + for (int32_t i = 0; i < NR; i++) { + char buf[16]; + snprintf(buf, sizeof(buf), "%06d", i); + std::string row(buf); + + for (int32_t j = 0; j < CR; j++) { + char buf[16]; + snprintf(buf, sizeof(buf), "%03d", j); + std::string col(buf); + + std::string tera_key; + tablet.GetRawKeyOperator()->EncodeTeraKey(row, "column", col, get_micros(), + leveldb::TKT_VALUE, &tera_key); + batch.Put(tera_key, ""); } - sleep(5); - EXPECT_TRUE(tablet.try_unload_count_ == 3); - uint64_t size = 0; - std::vector lgsize; - // GetDataSize - EXPECT_TRUE(tablet.GetDataSize(&size, &lgsize, &status) == false); - EXPECT_TRUE(size == 0); - EXPECT_TRUE(lgsize.size() == 0); + } + ASSERT_TRUE(tablet.WriteBatch(&batch, false, true, NULL)); - // LowLevelScan + // read and verify + for (int32_t i = 0; i < NR; i++) { + char buf[16]; + snprintf(buf, sizeof(buf), "%06d", i); + std::string row(buf); std::string start_tera_key; - tablet.GetRawKeyOperator()->EncodeTeraKey("123213", "", "", kLatestTs, leveldb::TKT_FORSEEK, + tablet.GetRawKeyOperator()->EncodeTeraKey(row, "", "", kLatestTs, leveldb::TKT_FORSEEK, &start_tera_key); - std::string end_row_key = "123213" + '\0'; + std::string end_row_key = row + '\0'; RowResult value_list; KeyValuePair next_start_point; uint32_t read_row_count = 0; + uint32_t read_cell_count = 0; uint32_t read_bytes = 0; bool is_complete = false; - status = kTabletNodeOk; - EXPECT_FALSE(tablet.LowLevelScan(start_tera_key, end_row_key, ScanOptions(), &value_list, - &next_start_point, &read_row_count, &read_bytes, - &is_complete, &status)); - EXPECT_TRUE(status == kKeyNotInRange); - status = kTabletNodeOk; - - // LowLevelSeek - EXPECT_FALSE(tablet.LowLevelSeek("row", ScanOptions(), &value_list, &status)); - EXPECT_TRUE(status == kKeyNotInRange); - status = kTabletNodeOk; - - // ReadRows - RowReaderInfo row_reader; - EXPECT_FALSE(tablet.ReadCells(row_reader, &value_list, 0, &status, 1000)); - EXPECT_TRUE(status != kTabletNodeOk); - status = kTabletNodeOk; - - tablet.db_ref_count_--; - for (int i = 0; i < 3; ++i) { - threads[i].join(); + ASSERT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, ScanOptions(), &value_list, + &next_start_point, &read_row_count, &read_cell_count, + &read_bytes, &is_complete, &status)); + ASSERT_EQ(value_list.key_values_size(), CR); + for (int32_t j = 0; j < CR; j++) { + char buf[16]; + snprintf(buf, sizeof(buf), "%03d", j); + std::string col(buf); + + const KeyValuePair& kv = value_list.key_values(j); + EXPECT_EQ(kv.key(), row); + EXPECT_EQ(kv.qualifier(), col); } - threads.clear(); + } } +class TabletIOKVOnlyTest : public ::testing::Test { + public: + TabletIOKVOnlyTest() { + std::string cmd = std::string("mkdir -p ") + working_dir; + FLAGS_tera_tabletnode_path_prefix = "./"; + system(cmd.c_str()); + + InitSchema(); + } + + ~TabletIOKVOnlyTest() { + std::cout << "kvonly clean" << std::endl; + ; + std::string cmd = std::string("rm -rf ") + working_dir; + system(cmd.c_str()); + } + + const TableSchema& GetTableSchema() { return schema_; } + + void InitSchema() { + schema_.set_name("terakv"); + schema_.set_raw_key(GeneralKv); + } + + std::map empty_snaphsots_; + std::map empty_rollback_; + TableSchema schema_; +}; -TEST_F(TabletIOTest, RowBloomFilter) { - const int32_t NR = 10000; - const int32_t CR = 10; - std::string tablet_path = working_dir + "row_bloomfilter"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - // prepare data - leveldb::WriteBatch batch; - for (int32_t i = 0; i < NR; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%06d", i); - std::string row(buf); - - for (int32_t j = 0; j < CR; j++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%03d", j); - std::string col(buf); - - std::string tera_key; - tablet.GetRawKeyOperator()->EncodeTeraKey(row, "column", col, get_micros(), - leveldb::TKT_VALUE, &tera_key); - batch.Put(tera_key, ""); - } - } - ASSERT_TRUE(tablet.WriteBatch(&batch, false, true, NULL)); - - // read and verify - for (int32_t i = 0; i < NR; i++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%06d", i); - std::string row(buf); - - std::string start_tera_key; - tablet.GetRawKeyOperator()->EncodeTeraKey(row, "", "", kLatestTs, leveldb::TKT_FORSEEK, - &start_tera_key); - std::string end_row_key = row + '\0'; - - RowResult value_list; - KeyValuePair next_start_point; - uint32_t read_row_count = 0; - uint32_t read_bytes = 0; - bool is_complete = false; - ASSERT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, ScanOptions(), &value_list, - &next_start_point, &read_row_count, &read_bytes, - &is_complete, &status)); - ASSERT_EQ(value_list.key_values_size(), CR); - for (int32_t j = 0; j < CR; j++) { - char buf[16]; - snprintf(buf, sizeof(buf), "%03d", j); - std::string col(buf); - - const KeyValuePair& kv = value_list.key_values(j); - EXPECT_EQ(kv.key(), row); - EXPECT_EQ(kv.qualifier(), col); - } - } +TEST_F(TabletIOKVOnlyTest, KvTableScan) { + std::string tablet_path = working_dir + "kvscan_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + uint32_t read_row_count = 0; + uint32_t read_bytes = 0; + + // case 1, empty table + RowResult value_list; + ScanContext* scan_context = new ScanContext; + scan_context->start_tera_key = ""; + scan_context->end_row_key = ""; + scan_context->scan_options = ScanOptions(); + scan_context->it = NULL; + scan_context->result = &value_list; + scan_context->ret_code = kTabletNodeOk; + scan_context->data_idx = 0; + scan_context->complete = false; + scan_context->compact_strategy = tablet.ldb_options_.compact_strategy_factory->NewInstance(); + tablet.InitScanIterator(scan_context->start_tera_key, scan_context->end_row_key, + scan_context->scan_options, &(scan_context->it)); + std::cout << "kv scan test1" << std::endl; + EXPECT_TRUE(tablet.KvTableScan(scan_context, &read_row_count, &read_bytes)); + EXPECT_EQ(value_list.key_values_size(), 0); + delete scan_context->it; // for db iterator + delete scan_context; + + // case 2, 5 row, scan "" "" + tablet.WriteOne("row1", "helloword1", false, NULL); + tablet.WriteOne("row2", "helloword2", false, NULL); + tablet.WriteOne("row3", "helloword3", false, NULL); + tablet.WriteOne("row4", "helloword4", false, NULL); + tablet.WriteOne("row5", "helloword5", false, NULL); + + value_list.clear_key_values(); + scan_context = new ScanContext; + scan_context->start_tera_key = ""; + scan_context->end_row_key = ""; + scan_context->scan_options = ScanOptions(); + scan_context->it = NULL; + scan_context->result = &value_list; + scan_context->ret_code = kTabletNodeOk; + scan_context->data_idx = 0; + scan_context->complete = false; + scan_context->compact_strategy = tablet.ldb_options_.compact_strategy_factory->NewInstance(); + tablet.InitScanIterator(scan_context->start_tera_key, scan_context->end_row_key, + scan_context->scan_options, &(scan_context->it)); + std::cout << "kv scan test2" << std::endl; + EXPECT_TRUE(tablet.KvTableScan(scan_context, &read_row_count, &read_bytes)); + EXPECT_EQ(value_list.key_values_size(), 5); + for (int32_t j = 0; j < 5; j++) { + char buf[16]; + snprintf(buf, sizeof(buf), "%03d", j); + std::string col(buf); + + const KeyValuePair& kv = value_list.key_values(j); + std::string row = "row" + std::to_string(j + 1); + std::cout << kv.key() << row << std::endl; + EXPECT_EQ(kv.key(), row); + std::string value = "helloword" + std::to_string(j + 1); + std::cout << kv.value() << value << std::endl; + EXPECT_EQ(kv.value(), value); + } + delete scan_context->it; // for db iterator + delete scan_context; + + // case 3, scan "row1" "row3" + value_list.clear_key_values(); + scan_context = new ScanContext; + scan_context->start_tera_key = "row1"; + scan_context->end_row_key = "row3"; + scan_context->scan_options = ScanOptions(); + scan_context->it = NULL; + scan_context->result = &value_list; + scan_context->ret_code = kTabletNodeOk; + scan_context->data_idx = 0; + scan_context->complete = false; + scan_context->compact_strategy = tablet.ldb_options_.compact_strategy_factory->NewInstance(); + tablet.InitScanIterator(scan_context->start_tera_key, scan_context->end_row_key, + scan_context->scan_options, &(scan_context->it)); + std::cout << "kv scan test3" << std::endl; + EXPECT_TRUE(tablet.KvTableScan(scan_context, &read_row_count, &read_bytes)); + EXPECT_EQ(value_list.key_values_size(), 2); + + delete scan_context->it; // for db iterator + delete scan_context; + // EXPECT_TRUE(tablet.Unload()); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera int main(int argc, char** argv) { - FLAGS_tera_io_retry_max_times = 1; - FLAGS_tera_tablet_living_period = 0; - FLAGS_tera_tablet_max_write_buffer_size = 1; - FLAGS_tera_leveldb_env_type = "local"; - ::google::InitGoogleLogging(argv[0]); - FLAGS_log_dir = "./log"; - if (access(FLAGS_log_dir.c_str(), F_OK)) { - mkdir(FLAGS_log_dir.c_str(), 0777); - } - std::string pragram_name("tera"); - tera::utils::SetupLog(pragram_name); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_tera_io_retry_max_times = 1; + FLAGS_tera_tablet_living_period = 0; + FLAGS_tera_tablet_max_write_buffer_size = 1; + FLAGS_tera_leveldb_env_type = "local"; + ::google::InitGoogleLogging(argv[0]); + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("tera"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } - diff --git a/src/io/test/tablet_scanner_test.cc b/src/io/test/tablet_scanner_test.cc index 6c1d02315..c3fa4bd0f 100644 --- a/src/io/test/tablet_scanner_test.cc +++ b/src/io/test/tablet_scanner_test.cc @@ -39,290 +39,282 @@ namespace io { const std::string working_dir = "testdata/"; class TabletScannerTest : public ::testing::Test { -public: - TabletScannerTest() { - session_id_ = 0; - last_key_= 0; - done_cnt_ = 0; - - std::string cmd = std::string("mkdir -p ") + working_dir; - FLAGS_tera_tabletnode_path_prefix = "./"; - system(cmd.c_str()); - InitSchema(); + public: + TabletScannerTest() { + session_id_ = 0; + last_key_ = 0; + done_cnt_ = 0; + + std::string cmd = std::string("mkdir -p ") + working_dir; + FLAGS_tera_tabletnode_path_prefix = "./"; + system(cmd.c_str()); + InitSchema(); + } + + ~TabletScannerTest() { + std::string cmd = std::string("rm -rf ") + working_dir; + system(cmd.c_str()); + } + + const TableSchema& GetTableSchema() { return schema_; } + + void InitSchema() { + schema_.set_name("tera"); + schema_.set_raw_key(Binary); + + LocalityGroupSchema* lg = schema_.add_locality_groups(); + lg->set_name("lg0"); + + ColumnFamilySchema* cf = schema_.add_column_families(); + cf->set_name("column"); + cf->set_locality_group("lg0"); + cf->set_max_versions(3); + } + + void NewRpcRequestDone(ScanTabletRequest* request, ScanTabletResponse* response) { + uint32_t size = response->results().key_values_size(); + for (uint32_t i = 0; i < size; i++) { + const tera::KeyValuePair& row = response->results().key_values(i); + // LOG(INFO) << row.key() << ":" << row.column_family() << ":" << + // row.qualifier() << ":" << row.value(); + std::string last_key = StringFormat("%011llu", last_key_); // NumberToString(500); + EXPECT_TRUE(last_key == row.key()); + last_key_++; } - - ~TabletScannerTest() { - std::string cmd = std::string("rm -rf ") + working_dir; - system(cmd.c_str()); - } - - const TableSchema& GetTableSchema() { - return schema_; - } - - void InitSchema() { - schema_.set_name("tera"); - schema_.set_raw_key(Binary); - - LocalityGroupSchema* lg = schema_.add_locality_groups(); - lg->set_name("lg0"); - - ColumnFamilySchema* cf = schema_.add_column_families(); - cf->set_name("column"); - cf->set_locality_group("lg0"); - cf->set_max_versions(3); + if (size == 0) { + LOG(INFO) << "req[" << done_cnt_ << "] scan done"; } - - void NewRpcRequestDone(ScanTabletRequest* request, ScanTabletResponse* response) { - uint32_t size = response->results().key_values_size(); - for (uint32_t i = 0; i < size; i++) { - const tera::KeyValuePair& row = response->results().key_values(i); - //LOG(INFO) << row.key() << ":" << row.column_family() << ":" << row.qualifier() << ":" << row.value(); - std::string last_key = StringFormat("%011llu", last_key_); // NumberToString(500); - EXPECT_TRUE(last_key == row.key()); - last_key_++; - } - if (size == 0) { - LOG(INFO) << "req[" << done_cnt_ << "] scan done"; - } - done_cnt_++; - if (req_vec_.size() == done_cnt_) { - for (uint32_t j = 0; j < done_cnt_; j++) { - delete req_vec_[j]; - delete resp_vec_[j]; - } - req_vec_.clear(); - resp_vec_.clear(); - done_vec_.clear(); - } + done_cnt_++; + if (req_vec_.size() == done_cnt_) { + for (uint32_t j = 0; j < done_cnt_; j++) { + delete req_vec_[j]; + delete resp_vec_[j]; + } + req_vec_.clear(); + resp_vec_.clear(); + done_vec_.clear(); } - - void NewRpcRequest(uint64_t nr_req, uint64_t s, uint64_t e) { - std::string start_key = StringFormat("%011llu", s); // NumberToString(500); - std::string end_key = StringFormat("%011llu", e); // NumberToString(500); - session_id_ = get_micros(); - uint64_t ts = get_micros(); - - last_key_ = s; - done_cnt_ = 0; - for (uint32_t i = 0; i < nr_req; i++) { - ScanTabletRequest* request = new ScanTabletRequest; - ScanTabletResponse* response = new ScanTabletResponse; - google::protobuf::Closure* done = - google::protobuf::NewCallback(this, &TabletScannerTest::NewRpcRequestDone, request, response); - - request->set_part_of_session(true); - if (i == 0) { - request->set_part_of_session(false); - } - request->set_session_id(session_id_); - request->set_sequence_id(100); - request->set_table_name(schema_.name()); - request->set_start(start_key); - request->set_end(end_key); - request->set_snapshot_id(0); - request->set_timeout(5000); - request->set_buffer_limit(65536); - request->set_snapshot_id(0); - request->set_max_version(1); - TimeRange* time_range = request->mutable_timerange(); - time_range->set_ts_start(0); - time_range->set_ts_end(ts); - request->set_timestamp(ts); - - req_vec_.push_back(request); - resp_vec_.push_back(response); - done_vec_.push_back(done); - } + } + + void NewRpcRequest(uint64_t nr_req, uint64_t s, uint64_t e) { + std::string start_key = StringFormat("%011llu", s); // NumberToString(500); + std::string end_key = StringFormat("%011llu", e); // NumberToString(500); + session_id_ = get_micros(); + uint64_t ts = get_micros(); + + last_key_ = s; + done_cnt_ = 0; + for (uint32_t i = 0; i < nr_req; i++) { + ScanTabletRequest* request = new ScanTabletRequest; + ScanTabletResponse* response = new ScanTabletResponse; + google::protobuf::Closure* done = google::protobuf::NewCallback( + this, &TabletScannerTest::NewRpcRequestDone, request, response); + + request->set_part_of_session(true); + if (i == 0) { + request->set_part_of_session(false); + } + request->set_session_id(session_id_); + request->set_sequence_id(100); + request->set_table_name(schema_.name()); + request->set_start(start_key); + request->set_end(end_key); + request->set_snapshot_id(0); + request->set_timeout(5000); + request->set_buffer_limit(65536); + request->set_snapshot_id(0); + request->set_max_version(1); + TimeRange* time_range = request->mutable_timerange(); + time_range->set_ts_start(0); + time_range->set_ts_end(ts); + request->set_timestamp(ts); + + req_vec_.push_back(request); + resp_vec_.push_back(response); + done_vec_.push_back(done); } - - // prepare test data - void PrepareData(TabletIO* tablet, uint64_t e, uint64_t s = 0) { - leveldb::WriteBatch batch; - for (uint64_t i = s; i < e; ++i) { - std::string str = StringFormat("%011llu", i); // NumberToString(i); - - std::string key; - tablet->GetRawKeyOperator()->EncodeTeraKey(str, "column", "qualifer", get_micros(), leveldb::TKT_VALUE, &key); - batch.Put(key, str); - } - EXPECT_TRUE(tablet->WriteBatch(&batch)); - return; + } + + // prepare test data + void PrepareData(TabletIO* tablet, uint64_t e, uint64_t s = 0) { + leveldb::WriteBatch batch; + for (uint64_t i = s; i < e; ++i) { + std::string str = StringFormat("%011llu", i); // NumberToString(i); + + std::string key; + tablet->GetRawKeyOperator()->EncodeTeraKey(str, "column", "qualifer", get_micros(), + leveldb::TKT_VALUE, &key); + batch.Put(key, str); } - - void NewRequestDone(ScanTabletRequest* request, ScanTabletResponse* response) { - - } - void NewRequest(uint64_t nr_req, uint64_t s, uint64_t e, - std::vector * req_vec, - std::vector * resp_vec, - std::vector * done_vec) { - std::string start_key = StringFormat("%011llu", s); // NumberToString(500); - std::string end_key = StringFormat("%011llu", e); // NumberToString(500); - int64_t session_id = get_micros(); - uint64_t ts = get_micros(); - - for (uint32_t i = 0; i < nr_req; i++) { - ScanTabletRequest* request = new ScanTabletRequest; - ScanTabletResponse* response = new ScanTabletResponse; - google::protobuf::Closure* done = - google::protobuf::NewCallback(this, &TabletScannerTest::NewRequestDone, request, response); - - request->set_part_of_session(true); - if (i == 0) { - request->set_part_of_session(false); - } - request->set_session_id(session_id); - request->set_sequence_id(100); - request->set_table_name(schema_.name()); - request->set_start(start_key); - request->set_end(end_key); - request->set_snapshot_id(0); - request->set_timeout(5000); - request->set_buffer_limit(65536); - request->set_snapshot_id(0); - request->set_max_version(1); - TimeRange* time_range = request->mutable_timerange(); - time_range->set_ts_start(0); - time_range->set_ts_end(ts); - request->set_timestamp(ts); - - req_vec->push_back(request); - resp_vec->push_back(response); - done_vec->push_back(done); - } + EXPECT_TRUE(tablet->WriteBatch(&batch)); + return; + } + + void NewRequestDone(ScanTabletRequest* request, ScanTabletResponse* response) {} + void NewRequest(uint64_t nr_req, uint64_t s, uint64_t e, std::vector* req_vec, + std::vector* resp_vec, + std::vector* done_vec) { + std::string start_key = StringFormat("%011llu", s); // NumberToString(500); + std::string end_key = StringFormat("%011llu", e); // NumberToString(500); + int64_t session_id = get_micros(); + uint64_t ts = get_micros(); + + for (uint32_t i = 0; i < nr_req; i++) { + ScanTabletRequest* request = new ScanTabletRequest; + ScanTabletResponse* response = new ScanTabletResponse; + google::protobuf::Closure* done = google::protobuf::NewCallback( + this, &TabletScannerTest::NewRequestDone, request, response); + + request->set_part_of_session(true); + if (i == 0) { + request->set_part_of_session(false); + } + request->set_session_id(session_id); + request->set_sequence_id(100); + request->set_table_name(schema_.name()); + request->set_start(start_key); + request->set_end(end_key); + request->set_snapshot_id(0); + request->set_timeout(5000); + request->set_buffer_limit(65536); + request->set_snapshot_id(0); + request->set_max_version(1); + TimeRange* time_range = request->mutable_timerange(); + time_range->set_ts_start(0); + time_range->set_ts_end(ts); + request->set_timestamp(ts); + + req_vec->push_back(request); + resp_vec->push_back(response); + done_vec->push_back(done); } + } - void MultiScan(TabletIO* tablet) { - uint64_t nr = 10; - std::vector req_vec; - std::vector resp_vec; - std::vector done_vec; - NewRequest(nr, 5, 5000, &req_vec, &resp_vec, &done_vec); + void MultiScan(TabletIO* tablet) { + uint64_t nr = 10; + std::vector req_vec; + std::vector resp_vec; + std::vector done_vec; + NewRequest(nr, 5, 5000, &req_vec, &resp_vec, &done_vec); - for (uint32_t i = 0; i < nr; i++) { - tablet->ScanRows(req_vec[i], resp_vec[i], done_vec[i]); - } + for (uint32_t i = 0; i < nr; i++) { + tablet->ScanRows(req_vec[i], resp_vec[i], done_vec[i]); } + } -public: - uint64_t session_id_; + public: + uint64_t session_id_; - std::vector req_vec_; - std::vector resp_vec_; - std::vector done_vec_; - uint64_t done_cnt_; - uint64_t last_key_; + std::vector req_vec_; + std::vector resp_vec_; + std::vector done_vec_; + uint64_t done_cnt_; + uint64_t last_key_; - std::map empty_snaphsots_; - std::map empty_rollback_; - TableSchema schema_; + std::map empty_snaphsots_; + std::map empty_rollback_; + TableSchema schema_; }; TEST_F(TabletScannerTest, General) { - std::string tablet_path = working_dir + "general"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; + std::string tablet_path = working_dir + "general"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); - PrepareData(&tablet, 1000000); - uint64_t nr = 400; - NewRpcRequest(nr, 5, 500000); + PrepareData(&tablet, 1000000); + uint64_t nr = 400; + NewRpcRequest(nr, 5, 500000); - for (uint32_t i = 0; i < nr; i++) { - tablet.ScanRows(req_vec_[i], resp_vec_[i], done_vec_[i]); - } + for (uint32_t i = 0; i < nr; i++) { + tablet.ScanRows(req_vec_[i], resp_vec_[i], done_vec_[i]); + } - EXPECT_TRUE(tablet.Unload()); + EXPECT_TRUE(tablet.Unload()); } -static void TabletUnloadWapper(TabletIO* tablet) { - tablet->Unload(); -} +static void TabletUnloadWapper(TabletIO* tablet) { tablet->Unload(); } TEST_F(TabletScannerTest, GeneralOnUnloadSlow) { - std::string tablet_path = working_dir + "general_1"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - PrepareData(&tablet, 1000000); - uint64_t nr = 400; - NewRpcRequest(nr, 5, 500000); - // make it unload slow - tablet.db_ref_count_++; - std::vector threads; - threads.reserve(3); - EXPECT_TRUE(tablet.try_unload_count_ == 0); - for (int i = 0; i < 3; ++i) { - threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); - } - sleep(5); - EXPECT_TRUE(tablet.try_unload_count_ == 3); - - tablet.try_unload_count_ = 3; - for (uint32_t i = 0; i < nr; i++) { - EXPECT_FALSE(tablet.ScanRows(req_vec_[i], resp_vec_[i], done_vec_[i])); - } - - tablet.db_ref_count_--; - for (int i = 0; i < 3; ++i) { - threads[i].join(); - } - threads.clear(); + std::string tablet_path = working_dir + "general_1"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + PrepareData(&tablet, 1000000); + uint64_t nr = 400; + NewRpcRequest(nr, 5, 500000); + // make it unload slow + tablet.db_ref_count_++; + std::vector threads; + threads.reserve(3); + EXPECT_TRUE(tablet.try_unload_count_ == 0); + for (int i = 0; i < 3; ++i) { + threads.push_back(std::thread(&TabletUnloadWapper, &tablet)); + } + sleep(5); + EXPECT_TRUE(tablet.try_unload_count_ == 3); + + tablet.try_unload_count_ = 3; + for (uint32_t i = 0; i < nr; i++) { + EXPECT_FALSE(tablet.ScanRows(req_vec_[i], resp_vec_[i], done_vec_[i])); + } + + tablet.db_ref_count_--; + for (int i = 0; i < 3; ++i) { + threads[i].join(); + } + threads.clear(); } - TEST_F(TabletScannerTest, CacheEvict) { - std::string tablet_path = working_dir + "CacheEvict"; - std::string key_start = ""; - std::string key_end = ""; - StatusCode status; - - TabletIO tablet(key_start, key_end, tablet_path); - EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - std::set(), NULL, NULL, NULL, &status)); - - PrepareData(&tablet, 1000000); - - // multi scan - uint32_t nr_thread = 40; - ThreadPool pool(nr_thread); - for (uint32_t i = 0; i < nr_thread; i++) { - ThreadPool::Task task = - std::bind(&TabletScannerTest::MultiScan, this, &tablet); - pool.AddTask(task); - } - pool.Stop(true); - EXPECT_TRUE(tablet.Unload()); + std::string tablet_path = working_dir + "CacheEvict"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), NULL, NULL, NULL, &status)); + + PrepareData(&tablet, 1000000); + + // multi scan + uint32_t nr_thread = 40; + ThreadPool pool(nr_thread); + for (uint32_t i = 0; i < nr_thread; i++) { + ThreadPool::Task task = std::bind(&TabletScannerTest::MultiScan, this, &tablet); + pool.AddTask(task); + } + pool.Stop(true); + EXPECT_TRUE(tablet.Unload()); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera int main(int argc, char** argv) { - FLAGS_tera_io_retry_max_times = 1; - FLAGS_tera_tablet_living_period = 0; - FLAGS_tera_tablet_max_write_buffer_size = 1; - FLAGS_tera_leveldb_env_type = "local"; - //FLAGS_v = 10; - ::google::InitGoogleLogging(argv[0]); - FLAGS_log_dir = "./log"; - if (access(FLAGS_log_dir.c_str(), F_OK)) { - mkdir(FLAGS_log_dir.c_str(), 0777); - } - std::string pragram_name("tera"); - tera::utils::SetupLog(pragram_name); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_tera_io_retry_max_times = 1; + FLAGS_tera_tablet_living_period = 0; + FLAGS_tera_tablet_max_write_buffer_size = 1; + FLAGS_tera_leveldb_env_type = "local"; + // FLAGS_v = 10; + ::google::InitGoogleLogging(argv[0]); + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("tera"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } - diff --git a/src/io/timekey_comparator.cc b/src/io/timekey_comparator.cc index b89a74636..323b01579 100644 --- a/src/io/timekey_comparator.cc +++ b/src/io/timekey_comparator.cc @@ -17,59 +17,54 @@ TimekeyComparator::TimekeyComparator(const leveldb::Comparator* comparator) TimekeyComparator::~TimekeyComparator() {} -const char* TimekeyComparator::Name() const { - return "TimekeyComparator"; -} +const char* TimekeyComparator::Name() const { return "TimekeyComparator"; } -int TimekeyComparator::Compare(const leveldb::Slice& akey, - const leveldb::Slice& bkey) const { - if (akey.size() < sizeof(uint64_t) || bkey.size() < sizeof(uint64_t)) { - return static_cast(akey.size()) - static_cast(bkey.size()); - } - int r = comparator_->Compare(ExtractShortKey(akey), ExtractShortKey(bkey)); - if (r == 0) { - const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); - const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); - if (anum > bnum) { - r = -1; - } else if (anum < bnum) { - r = +1; - } +int TimekeyComparator::Compare(const leveldb::Slice& akey, const leveldb::Slice& bkey) const { + if (akey.size() < sizeof(uint64_t) || bkey.size() < sizeof(uint64_t)) { + return static_cast(akey.size()) - static_cast(bkey.size()); + } + int r = comparator_->Compare(ExtractShortKey(akey), ExtractShortKey(bkey)); + if (r == 0) { + const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); + const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); + if (anum > bnum) { + r = -1; + } else if (anum < bnum) { + r = +1; } - return r; + } + return r; } void TimekeyComparator::FindShortestSeparator(std::string* start, const leveldb::Slice& limit) const { - leveldb::Slice user_start = ExtractTimeKey(*start); - leveldb::Slice user_limit = ExtractTimeKey(limit); - std::string tmp(user_start.data(), user_start.size()); + leveldb::Slice user_start = ExtractTimeKey(*start); + leveldb::Slice user_limit = ExtractTimeKey(limit); + std::string tmp(user_start.data(), user_start.size()); - comparator_->FindShortestSeparator(&tmp, user_limit); - if (tmp.size() < user_start.size() && - comparator_->Compare(user_start, tmp) < 0) { - PutFixed64(&tmp, PackTimestampAndType(kMaxTimeStamp, UKT_FORSEEK)); - CHECK(this->Compare(*start, tmp) < 0); - CHECK(this->Compare(tmp, limit) < 0); - start->swap(tmp); - } + comparator_->FindShortestSeparator(&tmp, user_limit); + if (tmp.size() < user_start.size() && comparator_->Compare(user_start, tmp) < 0) { + PutFixed64(&tmp, PackTimestampAndType(kMaxTimeStamp, UKT_FORSEEK)); + CHECK(this->Compare(*start, tmp) < 0); + CHECK(this->Compare(tmp, limit) < 0); + start->swap(tmp); + } } void TimekeyComparator::FindShortSuccessor(std::string* key) const { - leveldb::Slice user_key = ExtractTimeKey(*key); - std::string tmp(user_key.data(), user_key.size()); + leveldb::Slice user_key = ExtractTimeKey(*key); + std::string tmp(user_key.data(), user_key.size()); - comparator_->FindShortSuccessor(&tmp); - if (tmp.size() < user_key.size() && - comparator_->Compare(user_key, tmp) < 0) { - PutFixed64(&tmp, PackTimestampAndType(kMaxTimeStamp, UKT_FORSEEK)); - CHECK(this->Compare(*key, tmp) < 0); - key->swap(tmp); - } + comparator_->FindShortSuccessor(&tmp); + if (tmp.size() < user_key.size() && comparator_->Compare(user_key, tmp) < 0) { + PutFixed64(&tmp, PackTimestampAndType(kMaxTimeStamp, UKT_FORSEEK)); + CHECK(this->Compare(*key, tmp) < 0); + key->swap(tmp); + } } const TimekeyComparator* NewTimekeyComparator(const leveldb::Comparator* comparator) { - return new TimekeyComparator(comparator); + return new TimekeyComparator(comparator); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/timekey_comparator.h b/src/io/timekey_comparator.h index 62e1b753f..401b32383 100644 --- a/src/io/timekey_comparator.h +++ b/src/io/timekey_comparator.h @@ -12,26 +12,25 @@ namespace tera { namespace io { class TimekeyComparator : public leveldb::Comparator { -public: - TimekeyComparator(const leveldb::Comparator* comparator); - ~TimekeyComparator(); + public: + TimekeyComparator(const leveldb::Comparator* comparator); + ~TimekeyComparator(); - int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const; + int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const; - const char* Name() const; + const char* Name() const; - void FindShortestSeparator(std::string* start, - const leveldb::Slice& limit) const; + void FindShortestSeparator(std::string* start, const leveldb::Slice& limit) const; - void FindShortSuccessor(std::string* key) const; + void FindShortSuccessor(std::string* key) const; -private: - const leveldb::Comparator* comparator_; + private: + const leveldb::Comparator* comparator_; }; const TimekeyComparator* NewTimekeyComparator(const leveldb::Comparator* comparator); -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_TIMEKEY_COMARATOR_H_ +#endif // TERA_IO_TIMEKEY_COMARATOR_H_ diff --git a/src/io/ttlkv_compact_strategy.cc b/src/io/ttlkv_compact_strategy.cc index 21e8a1b86..be2169b1b 100644 --- a/src/io/ttlkv_compact_strategy.cc +++ b/src/io/ttlkv_compact_strategy.cc @@ -16,79 +16,78 @@ KvCompactStrategy::KvCompactStrategy(const TableSchema& schema) raw_key_operator_(GetRawKeyOperatorFromSchema(schema_)), cmp_(NewRowKeyComparator(raw_key_operator_)), snapshot_(leveldb::kMaxSequenceNumber) { - VLOG(11) << "KvCompactStrategy construct"; + VLOG(11) << "KvCompactStrategy construct"; } -KvCompactStrategy::~KvCompactStrategy() { - delete cmp_; -} +KvCompactStrategy::~KvCompactStrategy() { delete cmp_; } -const leveldb::Comparator* KvCompactStrategy::RowKeyComparator() { - return cmp_; -} +const leveldb::Comparator* KvCompactStrategy::RowKeyComparator() { return cmp_; } -const char* KvCompactStrategy::Name() const { - return "tera.TTLKvCompactStrategy"; +void KvCompactStrategy::ExtractRowKey(const Slice& tera_key, std::string* raw_row_key) { + Slice row_key; + if (raw_key_operator_->ExtractTeraKey(tera_key, &row_key, NULL, NULL, NULL, NULL)) { + std::string tera_key_forseek; + raw_key_operator_->EncodeTeraKey(row_key.ToString(), "", "", 0, leveldb::TKT_FORSEEK, + &tera_key_forseek); + *raw_row_key = tera_key_forseek; + } else { + *raw_row_key = tera_key.ToString(); + } } -void KvCompactStrategy::SetSnapshot(uint64_t snapshot) { - snapshot_ = snapshot; -} +const char* KvCompactStrategy::Name() const { return "tera.TTLKvCompactStrategy"; } + +void KvCompactStrategy::SetSnapshot(uint64_t snapshot) { snapshot_ = snapshot; } bool KvCompactStrategy::CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) { - *del_tag = false; - leveldb::Slice row_key; - int64_t expire_timestamp; - raw_key_operator_->ExtractTeraKey(tera_key, &row_key, NULL, NULL, - &expire_timestamp, NULL); - *ttl_tag = (expire_timestamp > 0 && expire_timestamp != kLatestTs) ? (expire_timestamp * 1000000LL): -1; - VLOG(11) << "CheckTag, expire " << expire_timestamp << ", ttl_tag " << *ttl_tag; - return true; + *del_tag = false; + leveldb::Slice row_key; + int64_t expire_timestamp; + raw_key_operator_->ExtractTeraKey(tera_key, &row_key, NULL, NULL, &expire_timestamp, NULL); + *ttl_tag = + (expire_timestamp > 0 && expire_timestamp != kLatestTs) ? (expire_timestamp * 1000000LL) : -1; + VLOG(11) << "CheckTag, expire " << expire_timestamp << ", ttl_tag " << *ttl_tag; + return true; } bool KvCompactStrategy::Drop(const leveldb::Slice& tera_key, uint64_t n, const std::string& lower_bound) { - leveldb::Slice row_key; - int64_t expire_timestamp; - raw_key_operator_->ExtractTeraKey(tera_key, &row_key, NULL, NULL, - &expire_timestamp, NULL); - - int64_t now = get_micros() / 1000000; - if (expire_timestamp <= 0 /*上溢,永不过期*/ - || expire_timestamp > now) { - VLOG(11) << "[KvCompactStrategy-Not-Drop] row_key:[" << row_key.ToString() - << "] expire_timestamp:[" << expire_timestamp - << "] now:[" << now << "]"; - return false; - } - VLOG(11) << "[KvCompactStrategy-Drop] row_key:[" << row_key.ToString() - << "] expire_timestamp:[" << expire_timestamp - << "] now:[" << now << "]"; - return true; + leveldb::Slice row_key; + int64_t expire_timestamp; + raw_key_operator_->ExtractTeraKey(tera_key, &row_key, NULL, NULL, &expire_timestamp, NULL); + + int64_t now = get_micros() / 1000000; + if (expire_timestamp <= 0 /*上溢,永不过期*/ + || expire_timestamp > now) { + VLOG(11) << "[KvCompactStrategy-Not-Drop] row_key:[" << row_key.ToString() + << "] expire_timestamp:[" << expire_timestamp << "] now:[" << now << "]"; + return false; + } + VLOG(11) << "[KvCompactStrategy-Drop] row_key:[" << row_key.ToString() << "] expire_timestamp:[" + << expire_timestamp << "] now:[" << now << "]"; + return true; } bool KvCompactStrategy::ScanDrop(const leveldb::Slice& tera_key, uint64_t n) { - return Drop(tera_key, n, ""); // used in scan. + return Drop(tera_key, n, ""); // used in scan. } bool KvCompactStrategy::ScanMergedValue(Iterator* it, std::string* merged_value, int64_t* merged_num) { - return false; + return false; } bool KvCompactStrategy::MergeAtomicOPs(Iterator* it, std::string* merged_value, - std::string* merged_key) { - return false; + std::string* merged_key) { + return false; } -KvCompactStrategyFactory::KvCompactStrategyFactory(const TableSchema& schema) : - schema_(schema) { -} +KvCompactStrategyFactory::KvCompactStrategyFactory(const TableSchema& schema) : schema_(schema) {} KvCompactStrategy* KvCompactStrategyFactory::NewInstance() { - MutexLock lock(&mutex_); - return new KvCompactStrategy(schema_); + MutexLock lock(&mutex_); + return new KvCompactStrategy(schema_); } -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera diff --git a/src/io/ttlkv_compact_strategy.h b/src/io/ttlkv_compact_strategy.h index 259a147c7..71b2039d2 100644 --- a/src/io/ttlkv_compact_strategy.h +++ b/src/io/ttlkv_compact_strategy.h @@ -16,54 +16,54 @@ namespace tera { namespace io { class KvCompactStrategy : public leveldb::CompactStrategy { -public: - KvCompactStrategy(const TableSchema& schema); - virtual ~KvCompactStrategy(); - - virtual const leveldb::Comparator* RowKeyComparator(); - virtual bool CheckTag(const leveldb::Slice& tera_key, bool* del_tag, int64_t* ttl_tag); - virtual bool Drop(const leveldb::Slice& k, uint64_t n, - const std::string& lower_bound); - - // tera-specific, based on all-level iterators. - // used in LowLevelScan - virtual bool ScanDrop(const leveldb::Slice& k, uint64_t n); - - virtual bool ScanMergedValue(leveldb::Iterator* it, std::string* merged_value, - int64_t* merged_num); - - virtual bool MergeAtomicOPs(leveldb::Iterator* it, std::string* merged_value, - std::string* merged_key); - - virtual const char* Name() const; - - virtual void SetSnapshot(uint64_t snapshot); - -private: - TableSchema schema_; - const leveldb::RawKeyOperator* raw_key_operator_; - leveldb::Comparator* cmp_; - uint64_t snapshot_; + public: + KvCompactStrategy(const TableSchema& schema); + virtual ~KvCompactStrategy(); + + virtual const leveldb::Comparator* RowKeyComparator(); + + virtual void ExtractRowKey(const leveldb::Slice& tera_key, std::string* row_key); + + virtual bool CheckTag(const leveldb::Slice& tera_key, bool* del_tag, int64_t* ttl_tag); + virtual bool Drop(const leveldb::Slice& k, uint64_t n, const std::string& lower_bound); + + // tera-specific, based on all-level iterators. + // used in LowLevelScan + virtual bool ScanDrop(const leveldb::Slice& k, uint64_t n); + + virtual bool ScanMergedValue(leveldb::Iterator* it, std::string* merged_value, + int64_t* merged_num); + + virtual bool MergeAtomicOPs(leveldb::Iterator* it, std::string* merged_value, + std::string* merged_key); + + virtual const char* Name() const; + + virtual void SetSnapshot(uint64_t snapshot); + + private: + TableSchema schema_; + const leveldb::RawKeyOperator* raw_key_operator_; + leveldb::Comparator* cmp_; + uint64_t snapshot_; }; class KvCompactStrategyFactory : public leveldb::CompactStrategyFactory { -public: - KvCompactStrategyFactory(const TableSchema& schema); - virtual KvCompactStrategy* NewInstance(); - virtual const char* Name() const { - return "tera.TTLKvCompactStrategyFactory"; - } - virtual void SetArg(const void* arg) { - MutexLock lock(&mutex_); - schema_.CopyFrom(*(TableSchema*)arg); - } - -private: - TableSchema schema_; - mutable Mutex mutex_; + public: + KvCompactStrategyFactory(const TableSchema& schema); + virtual KvCompactStrategy* NewInstance(); + virtual const char* Name() const { return "tera.TTLKvCompactStrategyFactory"; } + virtual void SetArg(const void* arg) { + MutexLock lock(&mutex_); + schema_.CopyFrom(*(TableSchema*)arg); + } + + private: + TableSchema schema_; + mutable Mutex mutex_; }; -} // namespace io -} // namespace tera +} // namespace io +} // namespace tera -#endif // TERA_IO_TTLKV_COMPACT_STRATEGY_H_ +#endif // TERA_IO_TTLKV_COMPACT_STRATEGY_H_ diff --git a/src/io/utils_leveldb.cc b/src/io/utils_leveldb.cc index 8f997d5ed..cee826718 100644 --- a/src/io/utils_leveldb.cc +++ b/src/io/utils_leveldb.cc @@ -6,9 +6,11 @@ #include #include +#include #include #include +#include #include #include @@ -21,7 +23,6 @@ #include "common/timer.h" #include "db/filename.h" #include "io/timekey_comparator.h" -#include "leveldb/env_flash_block_cache.h" #include "leveldb/comparator.h" #include "leveldb/env_dfs.h" #include "leveldb/env_flash.h" @@ -29,6 +30,9 @@ #include "leveldb/env_mock.h" #include "leveldb/table_utils.h" #include "common/timer.h" +#include "leveldb/persistent_cache.h" +#include "leveldb/env.h" +#include "utils/utils_cmd.h" DECLARE_string(tera_leveldb_env_type); DECLARE_string(tera_leveldb_env_dfs_type); @@ -40,445 +44,541 @@ DECLARE_string(tera_dfs_so_path); DECLARE_string(tera_dfs_conf); DECLARE_int64(tera_master_gc_trash_expire_time_s); DECLARE_int32(tera_leveldb_block_cache_env_thread_num); +DECLARE_bool(tera_leveldb_use_direct_io_read); +DECLARE_bool(tera_leveldb_use_direct_io_write); +DECLARE_uint64(tera_leveldb_posix_write_buffer_size); + +DECLARE_string(tera_tabletnode_cache_paths); +DECLARE_bool(tera_enable_persistent_cache); +DECLARE_bool(tera_enable_persistent_cache_transfer_flash_env_files); +DECLARE_uint64(persistent_cache_write_retry_times); +DECLARE_string(persistent_cache_sizes_in_MB); namespace tera { namespace io { void InitDfsEnv() { - if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { - if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { - LOG(INFO) << "init nfs system: use configure file " - << FLAGS_tera_leveldb_env_nfs_conf_path; - leveldb::InitNfsEnv(FLAGS_tera_leveldb_env_nfs_mountpoint, - FLAGS_tera_leveldb_env_nfs_conf_path); - } else { - LOG(FATAL) << "init nfs system: no configure file found"; - } - } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { - LOG(INFO) << "init hdfs2 system"; - leveldb::InitHdfs2Env(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); - } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { - LOG(INFO) << "init hdfs system"; - leveldb::InitHdfsEnv(); + if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { + if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { + LOG(INFO) << "init nfs system: use configure file " << FLAGS_tera_leveldb_env_nfs_conf_path; + leveldb::InitNfsEnv(FLAGS_tera_leveldb_env_nfs_mountpoint, + FLAGS_tera_leveldb_env_nfs_conf_path); } else { - LOG(INFO) << "Init dfs system: " << FLAGS_tera_dfs_so_path << "(" - << FLAGS_tera_dfs_conf << ")"; - leveldb::InitDfsEnv(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + LOG(FATAL) << "init nfs system: no configure file found"; } + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { + LOG(INFO) << "init hdfs2 system"; + leveldb::InitHdfs2Env(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { + LOG(INFO) << "init hdfs system"; + leveldb::InitHdfsEnv(); + } else { + LOG(INFO) << "Init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; + leveldb::InitDfsEnv(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + } } leveldb::Env* LeveldbBaseEnv() { - if (FLAGS_tera_leveldb_env_type == "local") { - return leveldb::Env::Default(); - } else { - return leveldb::EnvDfs(); - } -} - -// Tcache: default env -static pthread_once_t flash_block_cache_once = PTHREAD_ONCE_INIT; -static leveldb::Env* default_flash_block_cache_env; -static void InitDefaultFlashBlockCacheEnv() { - default_flash_block_cache_env = new leveldb::FlashBlockCacheEnv(LeveldbBaseEnv()); - default_flash_block_cache_env->SetBackgroundThreads(FLAGS_tera_leveldb_block_cache_env_thread_num); - LOG(INFO) << "init block cache, thread num " << FLAGS_tera_leveldb_block_cache_env_thread_num; -} - -leveldb::Env* DefaultFlashBlockCacheEnv() { - pthread_once(&flash_block_cache_once, InitDefaultFlashBlockCacheEnv); - return default_flash_block_cache_env; + if (FLAGS_tera_leveldb_env_type == "local") { + return leveldb::Env::Default(); + } else { + return leveldb::EnvDfs(); + } } // mem env leveldb::Env* LeveldbMemEnv() { - static Mutex mutex; - static leveldb::Env* mem_env = NULL; - MutexLock locker(&mutex); - if (mem_env) { - return mem_env; - } - leveldb::Env* base_env = LeveldbBaseEnv(); - mem_env = leveldb::NewInMemoryEnv(base_env); + static Mutex mutex; + static leveldb::Env* mem_env = NULL; + MutexLock locker(&mutex); + if (mem_env) { return mem_env; + } + leveldb::Env* base_env = LeveldbBaseEnv(); + mem_env = leveldb::NewInMemoryEnv(base_env); + return mem_env; } // flash env leveldb::Env* LeveldbFlashEnv() { - static Mutex mutex; - static leveldb::Env* flash_env = NULL; - MutexLock locker(&mutex); - if (flash_env) { - return flash_env; - } - leveldb::Env* base_env = LeveldbBaseEnv(); - flash_env = leveldb::NewFlashEnv(base_env); + if (GetCachePaths().empty() || FLAGS_tera_enable_persistent_cache) { + return nullptr; + } + static Mutex mutex; + static leveldb::Env* flash_env = NULL; + MutexLock locker(&mutex); + if (flash_env) { return flash_env; + } + leveldb::Env* base_env = LeveldbBaseEnv(); + flash_env = leveldb::NewFlashEnv(base_env); + return flash_env; } -leveldb::Env* LeveldbMockEnv() { - return leveldb::NewMockEnv(); -} +leveldb::Env* LeveldbMockEnv() { return leveldb::NewMockEnv(); } std::string GetTrashDir() { - const std::string trash("#trash"); - return FLAGS_tera_tabletnode_path_prefix + "/" + trash; + const std::string trash("#trash"); + return FLAGS_tera_tabletnode_path_prefix + "/" + trash; } std::string GetTrackableGcTrashDir() { - const std::string trash("#trackable_gc_trash"); - return FLAGS_tera_tabletnode_path_prefix + "/" + trash; + const std::string trash("#trackable_gc_trash"); + return FLAGS_tera_tabletnode_path_prefix + "/" + trash; } bool MoveEnvDirToTrash(const std::string& tablename) { - leveldb::Env* env = LeveldbBaseEnv(); - std::string src_dir = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; - leveldb::Status s = env->FileExists(src_dir); - if (s.ok()) { - // exists, do nothing in here - } else if(s.IsNotFound()) { - // not found, so no need to delete - return true; - } else { - // unknown status - return false; - } - - std::string trash_dir = GetTrashDir(); - s = env->FileExists(trash_dir); - if (s.IsNotFound()) { - if (!env->CreateDir(trash_dir).ok()) { - LOG(ERROR) << "fail to create trash dir: " << trash_dir; - return false; - } else { - LOG(INFO) << "succeed in creating trash dir: " << trash_dir; - } - } else if (s.ok()) { - // trash dir exists, do nothing in here + leveldb::Env* env = LeveldbBaseEnv(); + std::string src_dir = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; + leveldb::Status s = env->FileExists(src_dir); + if (s.ok()) { + // exists, do nothing in here + } else if (s.IsNotFound()) { + // not found, so no need to delete + return true; + } else { + // unknown status + return false; + } + + std::string trash_dir = GetTrashDir(); + s = env->FileExists(trash_dir); + if (s.IsNotFound()) { + if (!env->CreateDir(trash_dir).ok()) { + LOG(ERROR) << "fail to create trash dir: " << trash_dir; + return false; } else { - // unknown status - return false; + LOG(INFO) << "succeed in creating trash dir: " << trash_dir; } - - std::string time = get_curtime_str(); - std::replace(time.begin(), time.end(), ':', '-'); - std::string dest_dir = trash_dir + "/" + tablename + "." + time; - if (!env->RenameFile(src_dir, dest_dir).ok()) { - LOG(ERROR) << "fail to move dir to trash, dir: " << src_dir; - return false; - } - LOG(INFO) << "Move dir to trash, dir: " << src_dir; - return true; + } else if (s.ok()) { + // trash dir exists, do nothing in here + } else { + // unknown status + return false; + } + + std::string time = get_curtime_str(); + std::replace(time.begin(), time.end(), ':', '-'); + std::string dest_dir = trash_dir + "/" + tablename + "." + time; + if (!env->RenameFile(src_dir, dest_dir).ok()) { + LOG(ERROR) << "fail to move dir to trash, dir: " << src_dir; + return false; + } + LOG(INFO) << "Move dir to trash, dir: " << src_dir; + return true; } -leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, - uint64_t tablet_id, - uint32_t lg_id, - uint64_t file_id) { - leveldb::Status s; - leveldb::Env* env = LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/" + table_name; - std::string src_path = leveldb::BuildTableFilePath(table_path, tablet_id, lg_id, file_id); - - s = env->FileExists(src_path); - if(s.IsNotFound()) { - // not found, so no need to move - return leveldb::Status::OK(); - } else if (!s.ok()) { - // unknown status - return s; - } - - std::string trash_dir = GetTrackableGcTrashDir(); - s = env->FileExists(trash_dir); - if (s.IsNotFound()) { - if (!env->CreateDir(trash_dir).ok()) { - LOG(ERROR) << "[gc] fail to create trackable gc trash dir: " << trash_dir; - return leveldb::Status::IOError("fail to create trackable gc trash dir"); - } else { - LOG(INFO) << "[gc] succeed in creating trackable gc trash dir: " << trash_dir; - } - } else if (!s.ok()) { - // unknown status - return s; - } +leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, uint64_t tablet_id, + uint32_t lg_id, uint64_t file_id) { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/" + table_name; + std::string src_path = leveldb::BuildTableFilePath(table_path, tablet_id, lg_id, file_id); - std::string time = get_curtime_str(); - std::replace(time.begin(), time.end(), ':', '-'); - std::string dest_path = leveldb::BuildTrashTableFilePath( - trash_dir + "/" + table_name, tablet_id, lg_id, file_id, time); - - size_t dir_pos = dest_path.rfind("/"); - if (dir_pos == std::string::npos) { - LOG(ERROR) << "[gc] invalid dest path: " << dest_path; - return leveldb::Status::IOError("invalid dest path"); - } - std::string lg_path = dest_path.substr(0, dir_pos); - s = env->FileExists(lg_path); - if(s.IsNotFound()) { - // not found, so no need to mkdir - s = env->CreateDir(lg_path); - if (!s.ok()) { - LOG(ERROR) << "[gc] create lg dir in trash: " << lg_path - << " failed: " << s.ToString(); - return s; - } - } else if (!s.ok()) { - // unknown status - return s; + s = env->FileExists(src_path); + if (s.IsNotFound()) { + // not found, so no need to move + return leveldb::Status::OK(); + } else if (!s.ok()) { + // unknown status + return s; + } + + std::string trash_dir = GetTrackableGcTrashDir(); + s = env->FileExists(trash_dir); + if (s.IsNotFound()) { + if (!env->CreateDir(trash_dir).ok()) { + LOG(ERROR) << "[gc] fail to create trackable gc trash dir: " << trash_dir; + return leveldb::Status::IOError("fail to create trackable gc trash dir"); + } else { + LOG(INFO) << "[gc] succeed in creating trackable gc trash dir: " << trash_dir; } - - s = env->RenameFile(src_path, dest_path); + } else if (!s.ok()) { + // unknown status + return s; + } + + std::string time = get_curtime_str(); + std::replace(time.begin(), time.end(), ':', '-'); + std::string dest_path = leveldb::BuildTrashTableFilePath(trash_dir + "/" + table_name, tablet_id, + lg_id, file_id, time); + + size_t dir_pos = dest_path.rfind("/"); + if (dir_pos == std::string::npos) { + LOG(ERROR) << "[gc] invalid dest path: " << dest_path; + return leveldb::Status::IOError("invalid dest path"); + } + std::string lg_path = dest_path.substr(0, dir_pos); + s = env->FileExists(lg_path); + if (s.IsNotFound()) { + // not found, so no need to mkdir + s = env->CreateDir(lg_path); if (!s.ok()) { - LOG(ERROR) << "[gc] fail to move file to trackable gc trash, src_path: " << src_path - << ", dest_path: " << dest_path << ", status: " << s.ToString(); - return s; + LOG(ERROR) << "[gc] create lg dir in trash: " << lg_path << " failed: " << s.ToString(); + return s; } - VLOG(29) << "[gc] move file to trackable gc trash, src_path: " << src_path - << ", dest_path: " << dest_path; + } else if (!s.ok()) { + // unknown status + return s; + } - return leveldb::Status::OK(); + s = env->RenameFile(src_path, dest_path); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to move file to trackable gc trash, src_path: " << src_path + << ", dest_path: " << dest_path << ", status: " << s.ToString(); + return s; + } + VLOG(29) << "[gc] move file to trackable gc trash, src_path: " << src_path + << ", dest_path: " << dest_path; + + return leveldb::Status::OK(); } void CleanTrashDir() { - leveldb::Env* env = LeveldbBaseEnv(); - std::string trash_dir = GetTrashDir(); - std::vector children; - leveldb::Status s; - s = env->GetChildren(trash_dir, &children); - if (!s.ok()) { - return; - } - for (size_t i = 0; i < children.size(); ++i) { - std::string c_dir = trash_dir + '/' + children[i]; - DeleteEnvDir(c_dir); - } + leveldb::Env* env = LeveldbBaseEnv(); + std::string trash_dir = GetTrashDir(); + std::vector children; + leveldb::Status s; + s = env->GetChildren(trash_dir, &children); + if (!s.ok()) { return; + } + for (size_t i = 0; i < children.size(); ++i) { + std::string c_dir = trash_dir + '/' + children[i]; + DeleteEnvDir(c_dir); + } + return; } -bool TryDeleteEmptyDir(const std::string& dir_path, - size_t total_children_size, +bool TryDeleteEmptyDir(const std::string& dir_path, size_t total_children_size, size_t deleted_children_size) { - bool deleted = false; - - if (deleted_children_size == total_children_size) { - leveldb::Status s; - leveldb::Env* env = LeveldbBaseEnv(); - s = env->DeleteDir(dir_path); - if (s.ok()) { - LOG(INFO) << "[gc] delete empty dir: " << dir_path; - deleted = true; - } else { - LOG(WARNING) << "[gc] fail to delete empty dir: " - << dir_path <<" status: " << s.ToString(); - deleted = false; - } - } - - return deleted; -} + bool deleted = false; -leveldb::Status DeleteTrashFileIfExpired(const std::string& file_path) { + if (deleted_children_size == total_children_size) { leveldb::Status s; leveldb::Env* env = LeveldbBaseEnv(); - - std::string file_time_str = leveldb::GetTimeStrFromTrashFile(file_path); - if (file_time_str.empty()) { - LOG(ERROR) << "[gc] skip invalid trash file path: " << file_path; - return leveldb::Status::Corruption("invalid trash file path"); + s = env->DeleteDir(dir_path); + if (s.ok()) { + LOG(INFO) << "[gc] delete empty dir: " << dir_path; + deleted = true; + } else { + LOG(WARNING) << "[gc] fail to delete empty dir: " << dir_path << " status: " << s.ToString(); + deleted = false; } + } + + return deleted; +} - // change time format - // eg.: change "20170801-15-54-23" to "20170801-15:54:23" - file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); - file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); - - int64_t file_time = get_timestamp_from_str(file_time_str); - int64_t current_time = time(nullptr); - if (current_time - file_time > FLAGS_tera_master_gc_trash_expire_time_s) { - s = env->DeleteFile(file_path); - if (s.ok()) { - LOG(INFO) << "[gc] delete expired trash file: " << file_path +leveldb::Status DeleteTrashFileIfExpired(const std::string& file_path) { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + + std::string file_time_str = leveldb::GetTimeStrFromTrashFile(file_path); + if (file_time_str.empty()) { + LOG(ERROR) << "[gc] skip invalid trash file path: " << file_path; + return leveldb::Status::Corruption("invalid trash file path"); + } + + // change time format + // eg.: change "20170801-15-54-23" to "20170801-15:54:23" + file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); + file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); + + int64_t file_time = get_timestamp_from_str(file_time_str); + int64_t current_time = time(nullptr); + if (current_time - file_time > FLAGS_tera_master_gc_trash_expire_time_s) { + s = env->DeleteFile(file_path); + if (s.ok()) { + LOG(INFO) << "[gc] delete expired trash file: " << file_path << ", file added to trash time: " << get_time_str(file_time) << ", current time: " << get_time_str(current_time); - } else { - LOG(ERROR) << "[gc] fail to delete expired trash file: " << file_path - <<" status: " << s.ToString(); - return s; - } } else { - return leveldb::Status::Corruption("file not expired"); + LOG(ERROR) << "[gc] fail to delete expired trash file: " << file_path + << " status: " << s.ToString(); + return s; } + } else { + return leveldb::Status::Corruption("file not expired"); + } - return s; + return s; } void CleanTrackableGcTrash() { - leveldb::Status s; - leveldb::Env* env = LeveldbBaseEnv(); - std::string trash_dir = GetTrackableGcTrashDir(); + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + std::string trash_dir = GetTrackableGcTrashDir(); - s = env->FileExists(trash_dir); - if (s.IsNotFound()) { - LOG(INFO) << "[gc] skip empty trash dir: " << trash_dir - <<" status: " << s.ToString(); - return; - } + s = env->FileExists(trash_dir); + if (s.IsNotFound()) { + LOG(INFO) << "[gc] skip empty trash dir: " << trash_dir << " status: " << s.ToString(); + return; + } + + std::vector tables; + s = env->GetChildren(trash_dir, &tables); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to list trash dir: " << trash_dir << " status: " << s.ToString(); + return; + } - std::vector tables; - s = env->GetChildren(trash_dir, &tables); + for (const auto& table : tables) { + std::string table_path = trash_dir + "/" + table; + std::vector tablets; + s = env->GetChildren(table_path, &tablets); if (!s.ok()) { - LOG(ERROR) << "[gc] fail to list trash dir: " << trash_dir - <<" status: " << s.ToString(); - return; + LOG(ERROR) << "[gc] skip due to fail to list table dir: " << table_path + << " status: " << s.ToString(); + continue; } - for (const auto& table : tables) { - std::string table_path = trash_dir + "/" + table; - std::vector tablets; - s = env->GetChildren(table_path, &tablets); + size_t deleted_empty_tablet_num = 0; + for (const auto& tablet : tablets) { + std::string tablet_path = table_path + "/" + tablet; + std::vector lgs; + s = env->GetChildren(tablet_path, &lgs); + if (!s.ok()) { + LOG(ERROR) << "[gc] skip due to fail to list tablet dir: " << tablet_path + << " status: " << s.ToString(); + continue; + } + + size_t deleted_empty_lg_num = 0; + for (const auto& lg : lgs) { + std::string lg_path = tablet_path + "/" + lg; + std::vector files; + s = env->GetChildren(lg_path, &files); if (!s.ok()) { - LOG(ERROR) << "[gc] skip due to fail to list table dir: " << table_path - <<" status: " << s.ToString(); - continue; + LOG(ERROR) << "[gc] skip due to fail to list lg dir: " << lg_path + << " status: " << s.ToString(); + continue; } - size_t deleted_empty_tablet_num = 0; - for (const auto& tablet : tablets) { - std::string tablet_path = table_path + "/" + tablet; - std::vector lgs; - s = env->GetChildren(tablet_path, &lgs); - if (!s.ok()) { - LOG(ERROR) << "[gc] skip due to fail to list tablet dir: " << tablet_path - <<" status: " << s.ToString(); - continue; - } - - size_t deleted_empty_lg_num = 0; - for (const auto& lg : lgs) { - std::string lg_path = tablet_path + "/" + lg; - std::vector files; - s = env->GetChildren(lg_path, &files); - if (!s.ok()) { - LOG(ERROR) << "[gc] skip due to fail to list lg dir: " << lg_path - <<" status: " << s.ToString(); - continue; - } - - size_t deleted_file_num = 0; - for (const auto& file : files) { - std::string file_path = lg_path + "/" + file; - if (DeleteTrashFileIfExpired(file_path).ok()) { - ++deleted_file_num; - } - } - if (TryDeleteEmptyDir(lg_path, files.size(), deleted_file_num)) { - ++ deleted_empty_lg_num; - } - } - if (TryDeleteEmptyDir(tablet_path, lgs.size(), deleted_empty_lg_num)) { - ++ deleted_empty_tablet_num; - } + size_t deleted_file_num = 0; + for (const auto& file : files) { + std::string file_path = lg_path + "/" + file; + if (DeleteTrashFileIfExpired(file_path).ok()) { + ++deleted_file_num; + } + } + if (TryDeleteEmptyDir(lg_path, files.size(), deleted_file_num)) { + ++deleted_empty_lg_num; } - TryDeleteEmptyDir(table_path, tablets.size(), deleted_empty_tablet_num); + } + if (TryDeleteEmptyDir(tablet_path, lgs.size(), deleted_empty_lg_num)) { + ++deleted_empty_tablet_num; + } } + TryDeleteEmptyDir(table_path, tablets.size(), deleted_empty_tablet_num); + } - return; + return; } leveldb::Status DeleteEnvDir(const std::string& dir) { - leveldb::Status s; - static bool is_support_rmdir = true; + leveldb::Status s; + static bool is_support_rmdir = true; - leveldb::Env* env = LeveldbBaseEnv(); - s = env->DeleteFile(dir); + leveldb::Env* env = LeveldbBaseEnv(); + s = env->DeleteFile(dir); + if (s.ok()) { + LOG(INFO) << "[gc] delete: " << dir; + return s; + } + if (is_support_rmdir) { + s = env->DeleteDir(dir); if (s.ok()) { - LOG(INFO) << "[gc] delete: " << dir; - return s; - } - if (is_support_rmdir) { - s = env->DeleteDir(dir); - if (s.ok()) { - LOG(INFO) << "[gc] delete: " << dir; - return s; - } else { - is_support_rmdir = false; - LOG(INFO) << "[gc] file system not support rmdir" + LOG(INFO) << "[gc] delete: " << dir; + return s; + } else { + is_support_rmdir = false; + LOG(INFO) << "[gc] file system not support rmdir" << ", status: " << s.ToString(); - } } + } - // file system do not support delete dir, try delete recursively - std::vector children; - s = env->GetChildren(dir, &children); - if (!s.ok()) { - LOG(ERROR) << "[gc] fail to get children, dir: " << dir - << ", status: " << s.ToString(); - return s; + // file system do not support delete dir, try delete recursively + std::vector children; + s = env->GetChildren(dir, &children); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to get children, dir: " << dir << ", status: " << s.ToString(); + return s; + } + leveldb::FileLock* file_lock = nullptr; + env->LockFile(dir + "/", &file_lock); + delete file_lock; + for (size_t i = 0; i < children.size(); ++i) { + std::string c_dir = dir + '/' + children[i]; + DeleteEnvDir(c_dir); + } + + s = env->DeleteDir(dir); + if (s.ok()) { + LOG(INFO) << "[gc] delete: " << dir; + return s; + } + return s; +} + +static std::vector ParseCachePath(const std::string& path) { + std::vector paths; + + size_t beg = 0; + const char* str = path.c_str(); + for (size_t i = 0; i <= path.size(); ++i) { + if ((str[i] == '\0' || str[i] == ';') && i - beg > 0) { + paths.emplace_back(std::string(str + beg, i - beg)); + beg = i + 1; + if (!leveldb::Env::Default()->FileExists(paths.back()).ok() && + !leveldb::Env::Default()->CreateDir(paths.back()).ok()) { + LOG(ERROR) << "[env_flash] cannot access cache dir: \n" << paths.back(); + paths.pop_back(); + } } - leveldb::FileLock* file_lock = nullptr; - env->LockFile(dir + "/", &file_lock); - delete file_lock; - for (size_t i = 0; i < children.size(); ++i) { - std::string c_dir = dir + '/' + children[i]; - DeleteEnvDir(c_dir); + } + return paths; +}; + +static leveldb::Status GetPathDiskSize(const std::string& path, uint64_t* size) { + struct statfs disk_info; + if (statfs(path.c_str(), &disk_info) != 0) { + return leveldb::Status::IOError(("Get disk size failed for " + path), strerror(errno)); + } + uint64_t block_size = disk_info.f_bsize; + uint64_t total_size = block_size * disk_info.f_blocks; + *size = total_size; + return leveldb::Status::OK(); +} + +const std::vector& GetCachePaths() { + static std::vector cache_paths; + static std::once_flag once_flag; + std::call_once(once_flag, []() { + cache_paths = ParseCachePath(FLAGS_tera_tabletnode_cache_paths); + for (auto& path : cache_paths) { + if (path.back() != '/') { + path.append("/"); + } } - - s = env->DeleteDir(dir); - if (s.ok()) { - LOG(INFO) << "[gc] delete: " << dir; - return s; + }); + return cache_paths; +} + +static std::string FormatPathString(const std::string& path) { + if (path.empty()) { + return ""; + } + std::vector terms; + SplitString(path, "/", &terms); + std::string ret; + if (path[0] == '/') { + ret += "/"; + } + for (const auto& str : terms) { + if (!str.empty()) { + ret += (str + "/"); } - return s; + } + if (path.back() != '/' && ret.size() > 0 && ret.back() == '/') { + ret.pop_back(); + } + return ret; } -leveldb::Status DeleteOldFlashCache(const std::vector& path_list) { - LOG(INFO) << "delete old falsh cache begin"; - int64_t begin_ts_us = get_micros(); - leveldb::Env* posix_env = leveldb::Env::Default(); - leveldb::Status s; - for (uint32_t i = 0; i < path_list.size(); ++i) { - LOG(INFO) << "deal with: " << path_list[i]; - std::vector childs; - s = posix_env->GetChildren(path_list[i], &childs); - if (!s.ok()) { - LOG(WARNING) << "get children of dir fail when clean old flash cache, dir: " - << path_list[i] << ", status: " << s.ToString(); - continue; - } - for (size_t j = 0; j < childs.size(); ++j) { - if (childs[j] == "." || childs[j] == ".." || childs[j] == "flash_block_cache") { - LOG(INFO) << "skip child: " << childs[j]; - continue; - } - std::string child_path = path_list[i] + "/" + childs[j]; - LOG(INFO) << "deal with child path: " << child_path; - struct stat info; - if (0 != stat(child_path.c_str(), &info)) { - s = leveldb::Status::IOError(child_path, strerror(errno)); - LOG(WARNING) << "stat dir fail when clean old flash cache, dir: " - << child_path << ", status: " << s.ToString(); - continue; - } - if (S_ISDIR(info.st_mode)) { - s = posix_env->DeleteDirRecursive(child_path); - if (!s.ok()) { - LOG(WARNING) << "delete dir recursive fail when clean old flash cache, dir: " - << child_path << ", status: " << s.ToString(); - continue; - } - } else { - s = posix_env->DeleteFile(child_path); - if (!s.ok()) { - LOG(WARNING) << "delete file fail when clean old flash cache, file: " - << child_path << ", status: " << s.ToString(); - continue; - } - } - } +const std::vector& GetPersistentCachePaths() { + static std::vector persistent_cache_paths; + static std::once_flag once_flag; + std::call_once(once_flag, []() { + persistent_cache_paths = GetCachePaths(); + for (auto& path : persistent_cache_paths) { + path += FLAGS_tera_tabletnode_path_prefix; + if (path.back() != '/') { + path.append("/"); + } + path.assign(FormatPathString(path)); } - LOG(INFO) << "delete old flash cache end, time used(ms): " << (get_micros() - begin_ts_us) / 1000; - return s; + }); + return persistent_cache_paths; +} + +static std::vector ParseCacheSize(const std::string& size) { + if (size.empty()) { + return {}; + } + std::vector sizes_str; + SplitString(size, ";", &sizes_str); + std::vector sizes; + for (const auto& sz : sizes_str) { + sizes.emplace_back(std::stoll(sz)); + } + return sizes; +}; + +const std::vector& GetPersistentCacheSizes() { + static std::vector persistent_cache_sizes; + static std::once_flag once_flag; + std::call_once(once_flag, []() { + persistent_cache_sizes = ParseCacheSize(FLAGS_persistent_cache_sizes_in_MB); + }); + return persistent_cache_sizes; +} + +leveldb::Status GetPersistentCache(std::shared_ptr* cache) { + static std::shared_ptr persistent_cache = nullptr; + static std::once_flag once_flag; + static leveldb::Status status; + std::call_once(once_flag, []() { + if (!FLAGS_tera_enable_persistent_cache) { + return; + } + auto& cache_paths = GetPersistentCachePaths(); + auto& cache_sizes = GetPersistentCacheSizes(); + if (!cache_sizes.empty() && (cache_sizes.size() != cache_paths.size())) { + LOG(FATAL) << "Unmatch cache size and cache path, please check tera.flag. Cache path num: " + << cache_paths.size() << ". Cache size num: " << cache_sizes.size(); + } + if (cache_paths.empty()) { + status = leveldb::Status::InvalidArgument("Empty persistent cache path."); + return; + } + for (auto& cache_path : cache_paths) { + leveldb::Env::Default()->CreateDir(cache_path); + } + leveldb::EnvOptions opt; + opt.use_direct_io_read = FLAGS_tera_leveldb_use_direct_io_read; + opt.use_direct_io_write = FLAGS_tera_leveldb_use_direct_io_write; + opt.posix_write_buffer_size = FLAGS_tera_leveldb_posix_write_buffer_size; + std::vector configs; + for (size_t i = 0; i != cache_paths.size(); ++i) { + uint64_t size{0}; + status = GetPathDiskSize(cache_paths[i], &size); + if (!status.ok()) { + return; + } + assert(size > 2L << 30); + // Use 2G of cache size for persistent cache meta data and other overheads. + int64_t cache_size = cache_sizes.empty() ? size - (2L << 30) : cache_sizes[i] << 20; + configs.emplace_back(leveldb::Env::Default(), cache_paths[i], cache_size); + + LOG(INFO) << "Initing persistent pache, path: " << cache_paths[i] + << ", size: " << utils::ConvertByteToString(size) + << ", cache size: " << utils::ConvertByteToString(cache_size); + configs.back().SetEnvOptions(opt); + configs.back().write_retry_times = FLAGS_persistent_cache_write_retry_times; + if (FLAGS_tera_enable_persistent_cache_transfer_flash_env_files) { + configs.back().transfer_flash_env_files = true; + } + } + status = leveldb::NewShardedPersistentCache(configs, &persistent_cache); + }); + + if (status.ok()) { + *cache = persistent_cache; + } + + return status; } -} // namespace io -} // namespace leveldb +} // namespace io +} // namespace leveldb diff --git a/src/io/utils_leveldb.h b/src/io/utils_leveldb.h index be0b6950d..b1c646316 100644 --- a/src/io/utils_leveldb.h +++ b/src/io/utils_leveldb.h @@ -9,6 +9,7 @@ #include #include "leveldb/env.h" +#include "leveldb/persistent_cache.h" namespace tera { namespace io { @@ -37,15 +38,12 @@ std::string GetTrackableGcTrashDir(); bool MoveEnvDirToTrash(const std::string& subdir); -leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, - uint64_t tablet_id, - uint32_t lg_id, - uint64_t file_id); +leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, uint64_t tablet_id, + uint32_t lg_id, uint64_t file_id); void CleanTrashDir(); -bool TryDeleteEmptyDir(const std::string& dir_path, - size_t total_children_size, +bool TryDeleteEmptyDir(const std::string& dir_path, size_t total_children_size, size_t deleted_children_size); leveldb::Status DeleteTrashFileIfExpired(const std::string& file_path); @@ -54,9 +52,11 @@ void CleanTrackableGcTrash(); leveldb::Status DeleteEnvDir(const std::string& subdir); -leveldb::Status DeleteOldFlashCache(const std::vector& path_list); +const std::vector& GetCachePaths(); +const std::vector& GetPersistentCachePaths(); -} // namespace io -} // namespace tera +leveldb::Status GetPersistentCache(std::shared_ptr* cache); +} // namespace io +} // namespace tera -#endif // TERA_IO_UTILS_LEVELDB_H +#endif // TERA_IO_UTILS_LEVELDB_H diff --git a/src/lbcli_main.cc b/src/lbcli_main.cc index ad6c36f48..44cfa33d5 100644 --- a/src/lbcli_main.cc +++ b/src/lbcli_main.cc @@ -61,20 +61,19 @@ using namespace tera; typedef std::shared_ptr TablePtr; typedef std::shared_ptr TableImplPtr; -typedef std::map CommandTable; +typedef std::map CommandTable; static CommandTable& GetCommandTable() { - static CommandTable command_table; - return command_table; + static CommandTable command_table; + return command_table; } static std::string GetServerAddr() { - return FLAGS_tera_lb_server_addr + ":" + FLAGS_tera_lb_server_port; + return FLAGS_tera_lb_server_addr + ":" + FLAGS_tera_lb_server_port; } const char* builtin_cmd_list[] = { - "safemode", - "safemode [enter | leave | get]", + "safemode", "safemode [enter | leave | get]", "help", "help [cmd] \n\ @@ -86,229 +85,228 @@ const char* builtin_cmd_list[] = { }; static void PrintCmdHelpInfo(const char* msg) { - if (msg == NULL) { - return; - } - int count = sizeof(builtin_cmd_list)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if(strncmp(msg, builtin_cmd_list[i], 32) == 0) { - std::cout << builtin_cmd_list[i + 1] << std::endl; - return; - } + if (msg == NULL) { + return; + } + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (strncmp(msg, builtin_cmd_list[i], 32) == 0) { + std::cout << builtin_cmd_list[i + 1] << std::endl; + return; } + } } -static void PrintCmdHelpInfo(const std::string& msg) { - PrintCmdHelpInfo(msg.c_str()); -} +static void PrintCmdHelpInfo(const std::string& msg) { PrintCmdHelpInfo(msg.c_str()); } static void PrintAllCmd() { - std::cout << "there is cmd list:" << std::endl; - int count = sizeof(builtin_cmd_list)/sizeof(char*); - bool newline = false; - for (int i = 0; i < count; i+=2) { - std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; - if (newline) { - std::cout << std::endl; - newline = false; - } else { - newline = true; - } + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(builtin_cmd_list) / sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i += 2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; } + } - std::cout << std::endl << "help [cmd] for details." << std::endl; + std::cout << std::endl + << "help [cmd] for details." << std::endl; } // return false if similar command(s) not found static bool PromptSimilarCmd(const char* msg) { - if (msg == NULL) { - return false; - } - bool found = false; - int64_t len = strlen(msg); - int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); - int count = sizeof(builtin_cmd_list)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { - if (!found) { - std::cout << "Did you mean:" << std::endl; - found = true; - } - std::cout << " " << builtin_cmd_list[i] << std::endl; - } + if (msg == NULL) { + return false; + } + bool found = false; + int64_t len = strlen(msg); + int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { + if (!found) { + std::cout << "Did you mean:" << std::endl; + found = true; + } + std::cout << " " << builtin_cmd_list[i] << std::endl; } - return found; + } + return found; } static void PrintUnknownCmdHelpInfo(const char* msg) { - if (msg != NULL) { - std::cout << "'" << msg << "' is not a valid command." << std::endl << std::endl; - } - if ((msg != NULL) - && PromptSimilarCmd(msg)) { - return; - } - PrintAllCmd(); + if (msg != NULL) { + std::cout << "'" << msg << "' is not a valid command." << std::endl + << std::endl; + } + if ((msg != NULL) && PromptSimilarCmd(msg)) { + return; + } + PrintAllCmd(); } int32_t SafemodeOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string op = argv[2]; + if (op != "get" && op != "leave" && op != "enter") { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + load_balancer::LBClient lb_client(GetServerAddr()); + CmdCtrlRequest request; + CmdCtrlResponse response; + + request.set_sequence_id(0); + request.set_command("safemode"); + request.add_arg_list(op); + + string reason; + if (lb_client.CmdCtrl(&request, &response)) { + if (response.status() != tera::kLoadBalancerOk) { + reason = StatusCodeToString(response.status()); + LOG(ERROR) << reason; + std::cout << reason << std::endl; + err->SetFailed(ErrorCode::kSystem, reason); + return -1; } - - std::string op = argv[2]; - if (op != "get" && op != "leave" && op != "enter") { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - load_balancer::LBClient lb_client(GetServerAddr()); - CmdCtrlRequest request; - CmdCtrlResponse response; - - request.set_sequence_id(0); - request.set_command("safemode"); - request.add_arg_list(op); - - string reason; - if (lb_client.CmdCtrl(&request, &response)) { - if (response.status() != tera::kLoadBalancerOk) { - reason = StatusCodeToString(response.status()); - LOG(ERROR) << reason; - std::cout << reason << std::endl; - err->SetFailed(ErrorCode::kSystem, reason); - return -1; - } - if (op == "get") { - if (response.bool_result()) { - std::cout << "true" << std::endl; - } else { - std::cout << "false" << std::endl; - } - } - return 0; - } else { - reason = "fail to CmdCtrl"; - LOG(ERROR) << reason; - std::cout << reason << std::endl; - err->SetFailed(ErrorCode::kSystem, reason); - return -1; + if (op == "get") { + if (response.bool_result()) { + std::cout << "true" << std::endl; + } else { + std::cout << "false" << std::endl; + } } + return 0; + } else { + reason = "fail to CmdCtrl"; + LOG(ERROR) << reason; + std::cout << reason << std::endl; + err->SetFailed(ErrorCode::kSystem, reason); + return -1; + } } int32_t HelpOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { - if (argc == 2) { - PrintAllCmd(); - } else if (argc == 3) { - PrintCmdHelpInfo(argv[2]); - } else { - PrintCmdHelpInfo("help"); - } - return 0; + if (argc == 2) { + PrintAllCmd(); + } else if (argc == 3) { + PrintCmdHelpInfo(argv[2]); + } else { + PrintCmdHelpInfo("help"); + } + return 0; } int32_t HelpOp(int32_t argc, char** argv) { - std::vector argv_svec(argv, argv + argc); - return HelpOp(NULL, argc, &argv_svec[0], NULL); + std::vector argv_svec(argv, argv + argc); + return HelpOp(NULL, argc, &argv_svec[0], NULL); } bool ParseCommand(int argc, char** arg_list, std::vector* parsed_arg_list) { - for (int i = 0; i < argc; i++) { - std::string parsed_arg = arg_list[i]; - if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { - std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; - return false; - } - parsed_arg_list->push_back(parsed_arg); + for (int i = 0; i < argc; i++) { + std::string parsed_arg = arg_list[i]; + if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { + std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; + return false; } - return true; + parsed_arg_list->push_back(parsed_arg); + } + return true; } -static void InitializeCommandTable(){ - CommandTable& command_table = GetCommandTable(); - command_table["safemode"] = SafemodeOp; - command_table["help"] = HelpOp; +static void InitializeCommandTable() { + CommandTable& command_table = GetCommandTable(); + command_table["safemode"] = SafemodeOp; + command_table["help"] = HelpOp; } int ExecuteCommand(Client* client, int argc, char** arg_list) { - int ret = 0; - ErrorCode error_code; - - std::vector parsed_arg_list; - if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { - return 1; - } - std::string* argv = &parsed_arg_list[0]; - - CommandTable& command_table = GetCommandTable(); - std::string cmd = argv[1]; - if (cmd == "version") { - PrintSystemVersion(); - } else if (command_table.find(cmd) != command_table.end()) { - ret = command_table[cmd](client, argc, argv, &error_code); - } else { - PrintUnknownCmdHelpInfo(argv[1].c_str()); - ret = 1; - } - - if (error_code.GetType() != ErrorCode::kOK) { - LOG(ERROR) << "fail reason: " << error_code.ToString(); - } - return ret; + int ret = 0; + ErrorCode error_code; + + std::vector parsed_arg_list; + if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { + return 1; + } + std::string* argv = &parsed_arg_list[0]; + + CommandTable& command_table = GetCommandTable(); + std::string cmd = argv[1]; + if (cmd == "version") { + PrintSystemVersion(); + } else if (command_table.find(cmd) != command_table.end()) { + ret = command_table[cmd](client, argc, argv, &error_code); + } else { + PrintUnknownCmdHelpInfo(argv[1].c_str()); + ret = 1; + } + + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "fail reason: " << error_code.ToString(); + } + return ret; } int main(int argc, char* argv[]) { - FLAGS_minloglevel = 2; - ::google::ParseCommandLineFlags(&argc, &argv, true); - - if (argc > 1 && std::string(argv[1]) == "version") { - PrintSystemVersion(); - return 0; - } else if (argc > 1 && std::string(argv[1]) == "help") { - HelpOp(argc, argv); - return 0; - } + FLAGS_minloglevel = 2; + ::google::ParseCommandLineFlags(&argc, &argv, true); - Client* client = Client::NewClient(FLAGS_flagfile, NULL); - if (client == NULL) { - LOG(ERROR) << "client instance not exist"; - return -1; - } - g_printer_opt.print_head = FLAGS_stdout_is_tty; - - InitializeCommandTable(); - - int ret = 0; - if (argc == 1) { - char* line = NULL; - while ((line = readline("lb> ")) != NULL) { - char* line_copy = strdup(line); - std::vector arg_list; - arg_list.push_back(argv[0]); - char* tmp = NULL; - char* token = strtok_r(line, " \t", &tmp); - while (token != NULL) { - arg_list.push_back(token); - token = strtok_r(NULL, " \t", &tmp); - } - if (arg_list.size() == 2 && - (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { - free(line_copy); - free(line); - break; - } - if (arg_list.size() > 1) { - add_history(line_copy); - ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); - } - free(line_copy); - free(line); - } - } else { - ret = ExecuteCommand(client, argc, argv); + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + return 0; + } else if (argc > 1 && std::string(argv[1]) == "help") { + HelpOp(argc, argv); + return 0; + } + + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -1; + } + g_printer_opt.print_head = FLAGS_stdout_is_tty; + + InitializeCommandTable(); + + int ret = 0; + if (argc == 1) { + char* line = NULL; + while ((line = readline("lb> ")) != NULL) { + char* line_copy = strdup(line); + std::vector arg_list; + arg_list.push_back(argv[0]); + char* tmp = NULL; + char* token = strtok_r(line, " \t", &tmp); + while (token != NULL) { + arg_list.push_back(token); + token = strtok_r(NULL, " \t", &tmp); + } + if (arg_list.size() == 2 && + (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { + free(line_copy); + free(line); + break; + } + if (arg_list.size() > 1) { + add_history(line_copy); + ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); + } + free(line_copy); + free(line); } + } else { + ret = ExecuteCommand(client, argc, argv); + } - delete client; - return ret; + delete client; + return ret; } diff --git a/src/leveldb/Makefile b/src/leveldb/Makefile index 8471de28a..322c29f9d 100644 --- a/src/leveldb/Makefile +++ b/src/leveldb/Makefile @@ -41,6 +41,7 @@ TESTS = \ crc32c_test \ db_test \ dbformat_test \ + env_test \ filename_test \ filter_block_test \ issue178_test \ @@ -49,7 +50,6 @@ TESTS = \ skiplist_test \ table_test \ version_edit_test \ - version_set_test \ write_batch_test \ raw_key_operator_test \ tera_key_test @@ -152,8 +152,8 @@ db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) -#env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) - #$(CXX) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) +env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) @@ -176,8 +176,8 @@ skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) -version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) +#version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) + #$(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) $(LDFLAGS) diff --git a/src/leveldb/bench/db_bench_sqlite3.cc b/src/leveldb/bench/db_bench_sqlite3.cc index c40af8472..22248b096 100644 --- a/src/leveldb/bench/db_bench_sqlite3.cc +++ b/src/leveldb/bench/db_bench_sqlite3.cc @@ -42,8 +42,7 @@ static const char* FLAGS_benchmarks = "fillrand100K," "fillseq100K," "readseq," - "readrand100K," - ; + "readrand100K,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -82,8 +81,7 @@ static bool FLAGS_WAL_enabled = true; // Use the db with the following name. static const char* FLAGS_db = NULL; -inline -static void ExecErrorCheck(int status, char *err_msg) { +inline static void ExecErrorCheck(int status, char* err_msg) { if (status != SQLITE_OK) { fprintf(stderr, "SQL error: %s\n", err_msg); sqlite3_free(err_msg); @@ -91,24 +89,21 @@ static void ExecErrorCheck(int status, char *err_msg) { } } -inline -static void StepErrorCheck(int status) { +inline static void StepErrorCheck(int status) { if (status != SQLITE_DONE) { fprintf(stderr, "SQL step error: status = %d\n", status); exit(1); } } -inline -static void ErrorCheck(int status) { +inline static void ErrorCheck(int status) { if (status != SQLITE_OK) { fprintf(stderr, "sqlite3 error: status = %d\n", status); exit(1); } } -inline -static void WalCheckpoint(sqlite3* db_) { +inline static void WalCheckpoint(sqlite3* db_) { // Flush all writes to disk if (FLAGS_WAL_enabled) { sqlite3_wal_checkpoint_v2(db_, NULL, SQLITE_CHECKPOINT_FULL, NULL, NULL); @@ -156,7 +151,7 @@ static Slice TrimSpace(Slice s) { start++; } int limit = s.size(); - while (limit > start && isspace(s[limit-1])) { + while (limit > start && isspace(s[limit - 1])) { limit--; } return Slice(s.data() + start, limit - start); @@ -180,7 +175,7 @@ class Benchmark { // State kept for progress messages int done_; - int next_report_; // When to report next + int next_report_; // When to report next void PrintHeader() { const int kKeySize = 16; @@ -189,21 +184,17 @@ class Benchmark { fprintf(stdout, "Values: %d bytes each\n", FLAGS_value_size); fprintf(stdout, "Entries: %d\n", num_); fprintf(stdout, "RawSize: %.1f MB (estimated)\n", - ((static_cast(kKeySize + FLAGS_value_size) * num_) - / 1048576.0)); + ((static_cast(kKeySize + FLAGS_value_size) * num_) / 1048576.0)); PrintWarnings(); fprintf(stdout, "------------------------------------------------\n"); } void PrintWarnings() { #if defined(__GNUC__) && !defined(__OPTIMIZE__) - fprintf(stdout, - "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n" - ); + fprintf(stdout, "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); #endif #ifndef NDEBUG - fprintf(stdout, - "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); + fprintf(stdout, "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); #endif } @@ -265,13 +256,20 @@ class Benchmark { done_++; if (done_ >= next_report_) { - if (next_report_ < 1000) next_report_ += 100; - else if (next_report_ < 5000) next_report_ += 500; - else if (next_report_ < 10000) next_report_ += 1000; - else if (next_report_ < 50000) next_report_ += 5000; - else if (next_report_ < 100000) next_report_ += 10000; - else if (next_report_ < 500000) next_report_ += 50000; - else next_report_ += 100000; + if (next_report_ < 1000) + next_report_ += 100; + else if (next_report_ < 5000) + next_report_ += 500; + else if (next_report_ < 10000) + next_report_ += 1000; + else if (next_report_ < 50000) + next_report_ += 5000; + else if (next_report_ < 100000) + next_report_ += 10000; + else if (next_report_ < 500000) + next_report_ += 50000; + else + next_report_ += 100000; fprintf(stderr, "... finished %d ops%30s\r", done_, ""); fflush(stderr); } @@ -286,20 +284,16 @@ class Benchmark { if (bytes_ > 0) { char rate[100]; - snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / (finish - start_)); + snprintf(rate, sizeof(rate), "%6.1f MB/s", (bytes_ / 1048576.0) / (finish - start_)); if (!message_.empty()) { - message_ = std::string(rate) + " " + message_; + message_ = std::string(rate) + " " + message_; } else { message_ = rate; } } - fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", - name.ToString().c_str(), - (finish - start_) * 1e6 / done_, - (message_.empty() ? "" : " "), - message_.c_str()); + fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(), + (finish - start_) * 1e6 / done_, (message_.empty() ? "" : " "), message_.c_str()); if (FLAGS_histogram) { fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); } @@ -307,22 +301,16 @@ class Benchmark { } public: - enum Order { - SEQUENTIAL, - RANDOM - }; - enum DBState { - FRESH, - EXISTING - }; + enum Order { SEQUENTIAL, RANDOM }; + enum DBState { FRESH, EXISTING }; Benchmark() - : db_(NULL), - db_num_(0), - num_(FLAGS_num), - reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), - bytes_(0), - rand_(301) { + : db_(NULL), + db_num_(0), + num_(FLAGS_num), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + bytes_(0), + rand_(301) { std::vector files; std::string test_dir; Env::Default()->GetTestDirectory(&test_dir); @@ -429,10 +417,7 @@ class Benchmark { // Open database std::string tmp_dir; Env::Default()->GetTestDirectory(&tmp_dir); - snprintf(file_name, sizeof(file_name), - "%s/dbbench_sqlite3-%d.db", - tmp_dir.c_str(), - db_num_); + snprintf(file_name, sizeof(file_name), "%s/dbbench_sqlite3-%d.db", tmp_dir.c_str(), db_num_); status = sqlite3_open(file_name, &db_); if (status) { fprintf(stderr, "open error: %s\n", sqlite3_errmsg(db_)); @@ -441,16 +426,14 @@ class Benchmark { // Change SQLite cache size char cache_size[100]; - snprintf(cache_size, sizeof(cache_size), "PRAGMA cache_size = %d", - FLAGS_num_pages); + snprintf(cache_size, sizeof(cache_size), "PRAGMA cache_size = %d", FLAGS_num_pages); status = sqlite3_exec(db_, cache_size, NULL, NULL, &err_msg); ExecErrorCheck(status, err_msg); // FLAGS_page_size is defaulted to 1024 if (FLAGS_page_size != 1024) { char page_size[100]; - snprintf(page_size, sizeof(page_size), "PRAGMA page_size = %d", - FLAGS_page_size); + snprintf(page_size, sizeof(page_size), "PRAGMA page_size = %d", FLAGS_page_size); status = sqlite3_exec(db_, page_size, NULL, NULL, &err_msg); ExecErrorCheck(status, err_msg); } @@ -469,9 +452,8 @@ class Benchmark { // Change locking mode to exclusive and create tables/index for database std::string locking_stmt = "PRAGMA locking_mode = EXCLUSIVE"; - std::string create_stmt = - "CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))"; - std::string stmt_array[] = { locking_stmt, create_stmt }; + std::string create_stmt = "CREATE TABLE test (key blob, value blob, PRIMARY KEY(key))"; + std::string stmt_array[] = {locking_stmt, create_stmt}; int stmt_array_length = sizeof(stmt_array) / sizeof(std::string); for (int i = 0; i < stmt_array_length; i++) { status = sqlite3_exec(db_, stmt_array[i].c_str(), NULL, NULL, &err_msg); @@ -479,8 +461,8 @@ class Benchmark { } } - void Write(bool write_sync, Order order, DBState state, - int num_entries, int value_size, int entries_per_batch) { + void Write(bool write_sync, Order order, DBState state, int num_entries, int value_size, + int entries_per_batch) { // Create new database if state == FRESH if (state == FRESH) { if (FLAGS_use_existing_db) { @@ -502,26 +484,22 @@ class Benchmark { char* err_msg = NULL; int status; - sqlite3_stmt *replace_stmt, *begin_trans_stmt, *end_trans_stmt; + sqlite3_stmt* replace_stmt, *begin_trans_stmt, *end_trans_stmt; std::string replace_str = "REPLACE INTO test (key, value) VALUES (?, ?)"; std::string begin_trans_str = "BEGIN TRANSACTION;"; std::string end_trans_str = "END TRANSACTION;"; // Check for synchronous flag in options - std::string sync_stmt = (write_sync) ? "PRAGMA synchronous = FULL" : - "PRAGMA synchronous = OFF"; + std::string sync_stmt = (write_sync) ? "PRAGMA synchronous = FULL" : "PRAGMA synchronous = OFF"; status = sqlite3_exec(db_, sync_stmt.c_str(), NULL, NULL, &err_msg); ExecErrorCheck(status, err_msg); // Preparing sqlite3 statements - status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1, - &replace_stmt, NULL); + status = sqlite3_prepare_v2(db_, replace_str.c_str(), -1, &replace_stmt, NULL); ErrorCheck(status); - status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1, - &begin_trans_stmt, NULL); + status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1, &begin_trans_stmt, NULL); ErrorCheck(status); - status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, - &end_trans_stmt, NULL); + status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt, NULL); ErrorCheck(status); bool transaction = (entries_per_batch > 1); @@ -539,16 +517,14 @@ class Benchmark { const char* value = gen_.Generate(value_size).data(); // Create values for key-value pair - const int k = (order == SEQUENTIAL) ? i + j : - (rand_.Next() % num_entries); + const int k = (order == SEQUENTIAL) ? i + j : (rand_.Next() % num_entries); char key[100]; snprintf(key, sizeof(key), "%016d", k); // Bind KV values into replace_stmt status = sqlite3_bind_blob(replace_stmt, 1, key, 16, SQLITE_STATIC); ErrorCheck(status); - status = sqlite3_bind_blob(replace_stmt, 2, value, - value_size, SQLITE_STATIC); + status = sqlite3_bind_blob(replace_stmt, 2, value, value_size, SQLITE_STATIC); ErrorCheck(status); // Execute replace_stmt @@ -584,18 +560,16 @@ class Benchmark { void Read(Order order, int entries_per_batch) { int status; - sqlite3_stmt *read_stmt, *begin_trans_stmt, *end_trans_stmt; + sqlite3_stmt* read_stmt, *begin_trans_stmt, *end_trans_stmt; std::string read_str = "SELECT * FROM test WHERE key = ?"; std::string begin_trans_str = "BEGIN TRANSACTION;"; std::string end_trans_str = "END TRANSACTION;"; // Preparing sqlite3 statements - status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1, - &begin_trans_stmt, NULL); + status = sqlite3_prepare_v2(db_, begin_trans_str.c_str(), -1, &begin_trans_stmt, NULL); ErrorCheck(status); - status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, - &end_trans_stmt, NULL); + status = sqlite3_prepare_v2(db_, end_trans_str.c_str(), -1, &end_trans_stmt, NULL); ErrorCheck(status); status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &read_stmt, NULL); ErrorCheck(status); @@ -622,7 +596,8 @@ class Benchmark { ErrorCheck(status); // Execute read statement - while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {} + while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) { + } StepErrorCheck(status); // Reset SQLite statement for another use @@ -652,7 +627,7 @@ class Benchmark { void ReadSequential() { int status; - sqlite3_stmt *pStmt; + sqlite3_stmt* pStmt; std::string read_str = "SELECT * FROM test ORDER BY key"; status = sqlite3_prepare_v2(db_, read_str.c_str(), -1, &pStmt, NULL); @@ -665,7 +640,6 @@ class Benchmark { status = sqlite3_finalize(pStmt); ErrorCheck(status); } - }; } // namespace leveldb @@ -678,13 +652,11 @@ int main(int argc, char** argv) { char junk; if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); - } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_histogram = n; } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { FLAGS_compression_ratio = d; - } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_use_existing_db = n; } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { FLAGS_num = n; @@ -698,8 +670,7 @@ int main(int argc, char** argv) { FLAGS_page_size = n; } else if (sscanf(argv[i], "--num_pages=%d%c", &n, &junk) == 1) { FLAGS_num_pages = n; - } else if (sscanf(argv[i], "--WAL_enabled=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--WAL_enabled=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_WAL_enabled = n; } else if (strncmp(argv[i], "--db=", 5) == 0) { FLAGS_db = argv[i] + 5; @@ -711,9 +682,9 @@ int main(int argc, char** argv) { // Choose a location for the test database if none given with --db= if (FLAGS_db == NULL) { - leveldb::Env::Default()->GetTestDirectory(&default_db_path); - default_db_path += "/dbbench"; - FLAGS_db = default_db_path.c_str(); + leveldb::Env::Default()->GetTestDirectory(&default_db_path); + default_db_path += "/dbbench"; + FLAGS_db = default_db_path.c_str(); } leveldb::Benchmark benchmark; diff --git a/src/leveldb/bench/db_bench_tree_db.cc b/src/leveldb/bench/db_bench_tree_db.cc index f07996730..2264a2146 100644 --- a/src/leveldb/bench/db_bench_tree_db.cc +++ b/src/leveldb/bench/db_bench_tree_db.cc @@ -38,8 +38,7 @@ static const char* FLAGS_benchmarks = "fillrand100K," "fillseq100K," "readseq100K," - "readrand100K," - ; + "readrand100K,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -75,9 +74,7 @@ static bool FLAGS_compression = true; // Use the db with the following name. static const char* FLAGS_db = NULL; -inline -static void DBSynchronize(kyotocabinet::TreeDB* db_) -{ +inline static void DBSynchronize(kyotocabinet::TreeDB* db_) { // Synchronize will flush writes to disk if (!db_->synchronize()) { fprintf(stderr, "synchronize error: %s\n", db_->error().name()); @@ -125,7 +122,7 @@ static Slice TrimSpace(Slice s) { start++; } int limit = s.size(); - while (limit > start && isspace(s[limit-1])) { + while (limit > start && isspace(s[limit - 1])) { limit--; } return Slice(s.data() + start, limit - start); @@ -136,7 +133,7 @@ static Slice TrimSpace(Slice s) { class Benchmark { private: kyotocabinet::TreeDB* db_; - int db_num_; + int db_num_ = 0; int num_; int reads_; double start_; @@ -150,41 +147,35 @@ class Benchmark { // State kept for progress messages int done_; - int next_report_; // When to report next + int next_report_; // When to report next void PrintHeader() { const int kKeySize = 16; PrintEnvironment(); fprintf(stdout, "Keys: %d bytes each\n", kKeySize); - fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n", - FLAGS_value_size, + fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n", FLAGS_value_size, static_cast(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); fprintf(stdout, "Entries: %d\n", num_); fprintf(stdout, "RawSize: %.1f MB (estimated)\n", - ((static_cast(kKeySize + FLAGS_value_size) * num_) - / 1048576.0)); + ((static_cast(kKeySize + FLAGS_value_size) * num_) / 1048576.0)); fprintf(stdout, "FileSize: %.1f MB (estimated)\n", - (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) - / 1048576.0)); + (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) / 1048576.0)); PrintWarnings(); fprintf(stdout, "------------------------------------------------\n"); } void PrintWarnings() { #if defined(__GNUC__) && !defined(__OPTIMIZE__) - fprintf(stdout, - "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n" - ); + fprintf(stdout, "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); #endif #ifndef NDEBUG - fprintf(stdout, - "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); + fprintf(stdout, "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); #endif } void PrintEnvironment() { - fprintf(stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n", - kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV); + fprintf(stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n", kyotocabinet::VERSION, + kyotocabinet::LIBVER, kyotocabinet::LIBREV); #if defined(__linux) time_t now = time(NULL); @@ -241,13 +232,20 @@ class Benchmark { done_++; if (done_ >= next_report_) { - if (next_report_ < 1000) next_report_ += 100; - else if (next_report_ < 5000) next_report_ += 500; - else if (next_report_ < 10000) next_report_ += 1000; - else if (next_report_ < 50000) next_report_ += 5000; - else if (next_report_ < 100000) next_report_ += 10000; - else if (next_report_ < 500000) next_report_ += 50000; - else next_report_ += 100000; + if (next_report_ < 1000) + next_report_ += 100; + else if (next_report_ < 5000) + next_report_ += 500; + else if (next_report_ < 10000) + next_report_ += 1000; + else if (next_report_ < 50000) + next_report_ += 5000; + else if (next_report_ < 100000) + next_report_ += 10000; + else if (next_report_ < 500000) + next_report_ += 50000; + else + next_report_ += 100000; fprintf(stderr, "... finished %d ops%30s\r", done_, ""); fflush(stderr); } @@ -262,20 +260,16 @@ class Benchmark { if (bytes_ > 0) { char rate[100]; - snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / (finish - start_)); + snprintf(rate, sizeof(rate), "%6.1f MB/s", (bytes_ / 1048576.0) / (finish - start_)); if (!message_.empty()) { - message_ = std::string(rate) + " " + message_; + message_ = std::string(rate) + " " + message_; } else { message_ = rate; } } - fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", - name.ToString().c_str(), - (finish - start_) * 1e6 / done_, - (message_.empty() ? "" : " "), - message_.c_str()); + fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(), + (finish - start_) * 1e6 / done_, (message_.empty() ? "" : " "), message_.c_str()); if (FLAGS_histogram) { fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); } @@ -283,21 +277,15 @@ class Benchmark { } public: - enum Order { - SEQUENTIAL, - RANDOM - }; - enum DBState { - FRESH, - EXISTING - }; + enum Order { SEQUENTIAL, RANDOM }; + enum DBState { FRESH, EXISTING }; Benchmark() - : db_(NULL), - num_(FLAGS_num), - reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), - bytes_(0), - rand_(301) { + : db_(NULL), + num_(FLAGS_num), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + bytes_(0), + rand_(301) { std::vector files; std::string test_dir; Env::Default()->GetTestDirectory(&test_dir); @@ -390,7 +378,7 @@ class Benchmark { } private: - void Open(bool sync) { + void Open(bool sync) { assert(db_ == NULL); // Initialize db_ @@ -399,16 +387,11 @@ class Benchmark { db_num_++; std::string test_dir; Env::Default()->GetTestDirectory(&test_dir); - snprintf(file_name, sizeof(file_name), - "%s/dbbench_polyDB-%d.kct", - test_dir.c_str(), - db_num_); + snprintf(file_name, sizeof(file_name), "%s/dbbench_polyDB-%d.kct", test_dir.c_str(), db_num_); // Create tuning options and open the database - int open_options = kyotocabinet::PolyDB::OWRITER | - kyotocabinet::PolyDB::OCREATE; - int tune_options = kyotocabinet::TreeDB::TSMALL | - kyotocabinet::TreeDB::TLINEAR; + int open_options = kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE; + int tune_options = kyotocabinet::TreeDB::TSMALL | kyotocabinet::TreeDB::TLINEAR; if (FLAGS_compression) { tune_options |= kyotocabinet::TreeDB::TCOMPRESS; db_->tune_compressor(&comp_); @@ -416,7 +399,7 @@ class Benchmark { db_->tune_options(tune_options); db_->tune_page_cache(FLAGS_cache_size); db_->tune_page(FLAGS_page_size); - db_->tune_map(256LL<<20); + db_->tune_map(256LL << 20); if (sync) { open_options |= kyotocabinet::PolyDB::OAUTOSYNC; } @@ -425,8 +408,8 @@ class Benchmark { } } - void Write(bool sync, Order order, DBState state, - int num_entries, int value_size, int entries_per_batch) { + void Write(bool sync, Order order, DBState state, int num_entries, int value_size, + int entries_per_batch) { // Create new database if state == FRESH if (state == FRESH) { if (FLAGS_use_existing_db) { @@ -446,8 +429,7 @@ class Benchmark { } // Write to database - for (int i = 0; i < num_entries; i++) - { + for (int i = 0; i < num_entries; i++) { const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries); char key[100]; snprintf(key, sizeof(key), "%016d", k); @@ -495,8 +477,7 @@ int main(int argc, char** argv) { FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { FLAGS_compression_ratio = d; - } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_histogram = n; } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { FLAGS_num = n; @@ -508,8 +489,7 @@ int main(int argc, char** argv) { FLAGS_cache_size = n; } else if (sscanf(argv[i], "--page_size=%d%c", &n, &junk) == 1) { FLAGS_page_size = n; - } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_compression = (n == 1) ? true : false; } else if (strncmp(argv[i], "--db=", 5) == 0) { FLAGS_db = argv[i] + 5; @@ -521,9 +501,9 @@ int main(int argc, char** argv) { // Choose a location for the test database if none given with --db= if (FLAGS_db == NULL) { - leveldb::Env::Default()->GetTestDirectory(&default_db_path); - default_db_path += "/dbbench"; - FLAGS_db = default_db_path.c_str(); + leveldb::Env::Default()->GetTestDirectory(&default_db_path); + default_db_path += "/dbbench"; + FLAGS_db = default_db_path.c_str(); } leveldb::Benchmark benchmark; diff --git a/src/leveldb/bench/tera_bench.cc b/src/leveldb/bench/tera_bench.cc index a76aaaea8..ca3396f64 100644 --- a/src/leveldb/bench/tera_bench.cc +++ b/src/leveldb/bench/tera_bench.cc @@ -18,8 +18,7 @@ // random -- output N values in random key order static const char* FLAGS_benchmarks = "seq," - "random," - ; + "random,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -105,7 +104,7 @@ class Benchmark { // State kept for progress messages int done_; - int next_report_; // When to report next + int next_report_; // When to report next void Start() { start_ = Env::Default()->NowMicros() * 1e-6; @@ -124,38 +123,31 @@ class Benchmark { if (bytes_ > 0) { char rate[100]; - snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / (finish - start_)); + snprintf(rate, sizeof(rate), "%6.1f MB/s", (bytes_ / 1048576.0) / (finish - start_)); if (!message_.empty()) { - message_ = std::string(rate) + " " + message_; + message_ = std::string(rate) + " " + message_; } else { message_ = rate; } } - fprintf(stderr, "%-12s : %11.3f micros/op;%s%s\n", - name.ToString().c_str(), - (finish - start_) * 1e6 / done_, - (message_.empty() ? "" : " "), - message_.c_str()); + fprintf(stderr, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(), + (finish - start_) * 1e6 / done_, (message_.empty() ? "" : " "), message_.c_str()); fflush(stderr); } public: - enum Order { - SEQUENTIAL, - RANDOM - }; + enum Order { SEQUENTIAL, RANDOM }; Benchmark() - : num_(FLAGS_num), - reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), - start_(0.0), - bytes_(0), - rand_(FLAGS_key_seed), - done_(0), - next_report_(0) { - tablet_rand_vector_ = new Random*[FLAGS_tablet_num]; + : num_(FLAGS_num), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + start_(0.0), + bytes_(0), + rand_(FLAGS_key_seed), + done_(0), + next_report_(0) { + tablet_rand_vector_ = new Random* [FLAGS_tablet_num]; for (int i = 0; i < FLAGS_tablet_num; i++) { tablet_rand_vector_[i] = new Random(FLAGS_key_seed); } @@ -215,7 +207,6 @@ class Benchmark { } private: - void Output(Order order, int num_entries, int value_size, std::vector& cfs) { if (num_entries != num_) { char msg[100]; @@ -228,8 +219,7 @@ class Benchmark { // Write to database int i = FLAGS_start_key; int end_key = i + num_entries * FLAGS_key_step; - for (; i < end_key; i += FLAGS_key_step) - { + for (; i < end_key; i += FLAGS_key_step) { const int t = rand_.Next() % FLAGS_tablet_num; const int k = (order == SEQUENTIAL) ? i : (tablet_rand_vector_[t]->Next()); char key[10000]; @@ -239,7 +229,8 @@ class Benchmark { fprintf(stdout, "%s\t%s\n", key, gen_.Generate(value_size).ToString().c_str()); } else { for (size_t j = 0; j < cfs.size(); ++j) { - fprintf(stdout, "%s\t%s\t%s\t%s\n", key, gen_.Generate(value_size).ToString().c_str(), cfs[j].c_str(), ts); + fprintf(stdout, "%s\t%s\t%s\t%s\n", key, gen_.Generate(value_size).ToString().c_str(), + cfs[j].c_str(), ts); } } } diff --git a/src/leveldb/build_detect_platform b/src/leveldb/build_detect_platform index 325dfaf01..c9a6e6b90 100755 --- a/src/leveldb/build_detect_platform +++ b/src/leveldb/build_detect_platform @@ -59,16 +59,11 @@ PLATFORM_CXXFLAGS= PLATFORM_LDFLAGS= PLATFORM_LIBS= PLATFORM_SHARED_EXT="so" -PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl," +PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,-fPIC" PLATFORM_SHARED_CFLAGS="-fPIC" PLATFORM_SHARED_VERSIONED=true -MEMCMP_FLAG= -if [ "$CXX" = "g++" ]; then - # Use libc's memcmp instead of GCC's memcmp. This results in ~40% - # performance improvement on readrandom under gcc 4.4.3 on Linux/x86. - MEMCMP_FLAG="-fno-builtin-memcmp" -fi +MEMCMP_FLAG="-fno-builtin-memcmp -fPIC" case "$TARGET_OS" in Darwin) @@ -142,7 +137,7 @@ COMMON_FLAGS="$COMMON_FLAGS" # except for the test and benchmark files. By default, find will output a list # of all files matching either rule, so we need to append -print to make the # prune take effect. -DIRS="$PREFIX/db $PREFIX/util $PREFIX/table $PREFIX/helpers/memenv" +DIRS="$PREFIX/db $PREFIX/util $PREFIX/table $PREFIX/persistent_cache $PREFIX/helpers/memenv $PREFIX/../common/metric $PREFIX/../common/base" set -f # temporarily disable globbing so that our patterns aren't expanded PRUNE_TEST="-name *test*.cc -prune" diff --git a/src/leveldb/db/builder.cc b/src/leveldb/db/builder.cc index b5d99e1d1..6228215d7 100644 --- a/src/leveldb/db/builder.cc +++ b/src/leveldb/db/builder.cc @@ -21,17 +21,12 @@ namespace leveldb { -Status BuildTable(const std::string& dbname, - Env* env, - const Options& options, - TableCache* table_cache, - Iterator* iter, - FileMetaData* meta, - uint64_t* saved_size, +Status BuildTable(const std::string& dbname, Env* env, const Options& options, + TableCache* table_cache, Iterator* iter, FileMetaData* meta, uint64_t* saved_size, uint64_t smallest_snapshot) { Status s; - int64_t del_num = 0; // statistic: delete tag's percentage in sst - std::vector ttls; // use for calculate timeout percentage + int64_t del_num = 0; // statistic: delete tag's percentage in sst + std::vector ttls; // use for calculate timeout percentage int64_t entries = 0; meta->file_size = 0; iter->SeekToFirst(); @@ -54,7 +49,7 @@ Status BuildTable(const std::string& dbname, ParsedInternalKey ikey; TableBuilder* builder = new TableBuilder(options, file); meta->smallest.DecodeFrom(iter->key()); - for (;iter->Valid();) { + for (; iter->Valid();) { Slice key = iter->key(); // no-length-prefix-key assert(ParseInternalKey(key, &ikey)); @@ -63,14 +58,11 @@ Status BuildTable(const std::string& dbname, bool drop = compact_strategy->Drop(ikey.user_key, ikey.sequence); if (drop) { iter->Next(); - //Log(options.info_log, "[%s] [Memtable Drop] sequence_id: %lu, seq: %lu, raw_key: %s", - // dbname.c_str(), ikey.sequence, snapshot, ikey.user_key.data()); - continue; // drop it before build + continue; // drop it before build } else { std::string merged_value; std::string merged_key; - has_atom_merged = compact_strategy->MergeAtomicOPs(iter, &merged_value, - &merged_key); + has_atom_merged = compact_strategy->MergeAtomicOPs(iter, &merged_value, &merged_key); if (has_atom_merged) { meta->largest.DecodeFrom(Slice(merged_key)); builder->Add(Slice(merged_key), Slice(merged_value)); @@ -83,12 +75,8 @@ Status BuildTable(const std::string& dbname, int64_t ttl = -1; compact_strategy && compact_strategy->CheckTag(ikey.user_key, &del_tag, &ttl); if (ikey.type == kTypeDeletion || del_tag) { - //Log(options_.info_log, "[%s] add del_tag %d, key_type %d\n", - // dbname_.c_str(), del_tag, ikey.type); del_num++; - } else if (ttl > 0) { // del tag has not ttl - //Log(options_.info_log, "[%s] add ttl_tag %ld\n", - // dbname_.c_str(), ttl); + } else if (ttl > 0) { // del tag has not ttl ttls.push_back(ttl); } @@ -96,8 +84,6 @@ Status BuildTable(const std::string& dbname, builder->Add(key, iter->value()); iter->Next(); } - //Log(options.info_log, "[%s] [Memtable Not Drop] sequence_id: %lu, seq: %lu, raw_key: %s", - // dbname.c_str(), ikey.sequence, snapshot, ikey.user_key.data()); } if (compact_strategy) { @@ -117,20 +103,18 @@ Status BuildTable(const std::string& dbname, // update ttl/del information entries = builder->NumEntries(); std::sort(ttls.begin(), ttls.end()); - uint32_t idx = ttls.size() * options.ttl_percentage / 100 ; + uint32_t idx = ttls.size() * options.ttl_percentage / 100; meta->del_percentage = del_num * 100 / entries; /* delete tag percentage */ - meta->check_ttl_ts = ((ttls.size() > 0) && (idx < ttls.size())) ? ttls[idx] : 0; /* sst's check ttl's time */ - meta->ttl_percentage = ((ttls.size() > 0) && (idx < ttls.size())) ? idx * 100 / ttls.size() : 0; /* ttl tag percentage */ - Log(options.info_log, "[%s] (mem dump) AddFile, number #%u, entries %ld, del_nr %lu" - ", ttl_nr %lu, del_p %lu, ttl_check_ts %lu, ttl_p %lu\n", - dbname.c_str(), - (unsigned int) meta->number, - entries, - del_num, - ttls.size(), - meta->del_percentage, - meta->check_ttl_ts, - meta->ttl_percentage); + meta->check_ttl_ts = + ((ttls.size() > 0) && (idx < ttls.size())) ? ttls[idx] : 0; /* sst's check ttl's time */ + meta->ttl_percentage = ((ttls.size() > 0) && (idx < ttls.size())) + ? idx * 100 / ttls.size() + : 0; /* ttl tag percentage */ + LEVELDB_LOG(options.info_log, + "[%s] (mem dump) AddFile, number #%u, entries %ld, del_nr %lu" + ", ttl_nr %lu, del_p %lu, ttl_check_ts %lu, ttl_p %lu\n", + dbname.c_str(), (unsigned int)meta->number, entries, del_num, ttls.size(), + meta->del_percentage, meta->check_ttl_ts, meta->ttl_percentage); } } else { builder->Abandon(); @@ -146,10 +130,8 @@ Status BuildTable(const std::string& dbname, if (s.ok() && meta->file_size) { // Verify that the table is usable - Iterator* it = table_cache->NewIterator(ReadOptions(&options), - dbname, - meta->number, - meta->file_size); + Iterator* it = + table_cache->NewIterator(ReadOptions(&options), dbname, meta->number, meta->file_size); s = it->status(); delete it; } diff --git a/src/leveldb/db/builder.h b/src/leveldb/db/builder.h index 9d845278e..62f386bff 100644 --- a/src/leveldb/db/builder.h +++ b/src/leveldb/db/builder.h @@ -28,14 +28,9 @@ class VersionEdit; // *meta will be filled with metadata about the generated table. // If no data is present in *iter, meta->file_size will be set to // zero, and no Table file will be produced. -extern Status BuildTable(const std::string& dbname, - Env* env, - const Options& options, - TableCache* table_cache, - Iterator* iter, - FileMetaData* meta, - uint64_t* saved_size, - uint64_t smallest_snapshot); +extern Status BuildTable(const std::string& dbname, Env* env, const Options& options, + TableCache* table_cache, Iterator* iter, FileMetaData* meta, + uint64_t* saved_size, uint64_t smallest_snapshot); } // namespace leveldb diff --git a/src/leveldb/db/c.cc b/src/leveldb/db/c.cc index 876170f61..0dba7c517 100644 --- a/src/leveldb/db/c.cc +++ b/src/leveldb/db/c.cc @@ -39,7 +39,7 @@ using leveldb::Range; using leveldb::ReadOptions; using leveldb::SequentialFile; using leveldb::Slice; -//using leveldb::Snapshot; +// using leveldb::Snapshot; using leveldb::Status; using leveldb::WritableFile; using leveldb::WriteBatch; @@ -47,67 +47,77 @@ using leveldb::WriteOptions; extern "C" { -struct leveldb_t { DB* rep; }; -struct leveldb_iterator_t { Iterator* rep; }; -struct leveldb_writebatch_t { WriteBatch rep; }; -struct leveldb_snapshot_t { uint64_t rep; }; -struct leveldb_readoptions_t { ReadOptions rep; }; -struct leveldb_writeoptions_t { WriteOptions rep; }; -struct leveldb_options_t { Options rep; }; -struct leveldb_cache_t { Cache* rep; }; -struct leveldb_seqfile_t { SequentialFile* rep; }; -struct leveldb_randomfile_t { RandomAccessFile* rep; }; -struct leveldb_writablefile_t { WritableFile* rep; }; -struct leveldb_logger_t { Logger* rep; }; -struct leveldb_filelock_t { FileLock* rep; }; +struct leveldb_t { + DB* rep; +}; +struct leveldb_iterator_t { + Iterator* rep; +}; +struct leveldb_writebatch_t { + WriteBatch rep; +}; +struct leveldb_snapshot_t { + uint64_t rep; +}; +struct leveldb_readoptions_t { + ReadOptions rep; +}; +struct leveldb_writeoptions_t { + WriteOptions rep; +}; +struct leveldb_options_t { + Options rep; +}; +struct leveldb_cache_t { + Cache* rep; +}; +struct leveldb_seqfile_t { + SequentialFile* rep; +}; +struct leveldb_randomfile_t { + RandomAccessFile* rep; +}; +struct leveldb_writablefile_t { + WritableFile* rep; +}; +struct leveldb_logger_t { + Logger* rep; +}; +struct leveldb_filelock_t { + FileLock* rep; +}; struct leveldb_comparator_t : public Comparator { void* state_; void (*destructor_)(void*); - int (*compare_)( - void*, - const char* a, size_t alen, - const char* b, size_t blen); + int (*compare_)(void*, const char* a, size_t alen, const char* b, size_t blen); const char* (*name_)(void*); - virtual ~leveldb_comparator_t() { - (*destructor_)(state_); - } + virtual ~leveldb_comparator_t() { (*destructor_)(state_); } virtual int Compare(const Slice& a, const Slice& b) const { return (*compare_)(state_, a.data(), a.size(), b.data(), b.size()); } - virtual const char* Name() const { - return (*name_)(state_); - } + virtual const char* Name() const { return (*name_)(state_); } // No-ops since the C binding does not support key shortening methods. - virtual void FindShortestSeparator(std::string*, const Slice&) const { } - virtual void FindShortSuccessor(std::string* key) const { } + virtual void FindShortestSeparator(std::string*, const Slice&) const {} + virtual void FindShortSuccessor(std::string* key) const {} }; struct leveldb_filterpolicy_t : public FilterPolicy { void* state_; void (*destructor_)(void*); const char* (*name_)(void*); - char* (*create_)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length); - unsigned char (*key_match_)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length); - - virtual ~leveldb_filterpolicy_t() { - (*destructor_)(state_); - } + char* (*create_)(void*, const char* const* key_array, const size_t* key_length_array, + int num_keys, size_t* filter_length); + unsigned char (*key_match_)(void*, const char* key, size_t length, const char* filter, + size_t filter_length); - virtual const char* Name() const { - return (*name_)(state_); - } + virtual ~leveldb_filterpolicy_t() { (*destructor_)(state_); } + + virtual const char* Name() const { return (*name_)(state_); } virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { std::vector key_pointers(n); @@ -123,8 +133,7 @@ struct leveldb_filterpolicy_t : public FilterPolicy { } virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { - return (*key_match_)(state_, key.data(), key.size(), - filter.data(), filter.size()); + return (*key_match_)(state_, key.data(), key.size(), filter.data(), filter.size()); } }; @@ -153,10 +162,7 @@ static char* CopyString(const std::string& str) { return result; } -leveldb_t* leveldb_open( - const leveldb_options_t* options, - const char* name, - char** errptr) { +leveldb_t* leveldb_open(const leveldb_options_t* options, const char* name, char** errptr) { DB* db; if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) { return NULL; @@ -171,39 +177,23 @@ void leveldb_close(leveldb_t* db) { delete db; } -void leveldb_put( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - const char* val, size_t vallen, - char** errptr) { - SaveError(errptr, - db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen))); +void leveldb_put(leveldb_t* db, const leveldb_writeoptions_t* options, const char* key, + size_t keylen, const char* val, size_t vallen, char** errptr) { + SaveError(errptr, db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen))); } -void leveldb_delete( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - char** errptr) { +void leveldb_delete(leveldb_t* db, const leveldb_writeoptions_t* options, const char* key, + size_t keylen, char** errptr) { SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); } - -void leveldb_write( - leveldb_t* db, - const leveldb_writeoptions_t* options, - leveldb_writebatch_t* batch, - char** errptr) { +void leveldb_write(leveldb_t* db, const leveldb_writeoptions_t* options, + leveldb_writebatch_t* batch, char** errptr) { SaveError(errptr, db->rep->Write(options->rep, &batch->rep)); } -char* leveldb_get( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, size_t keylen, - size_t* vallen, - char** errptr) { +char* leveldb_get(leveldb_t* db, const leveldb_readoptions_t* options, const char* key, + size_t keylen, size_t* vallen, char** errptr) { char* result = NULL; std::string tmp; Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp); @@ -219,31 +209,24 @@ char* leveldb_get( return result; } -leveldb_iterator_t* leveldb_create_iterator( - leveldb_t* db, - const leveldb_readoptions_t* options) { +leveldb_iterator_t* leveldb_create_iterator(leveldb_t* db, const leveldb_readoptions_t* options) { leveldb_iterator_t* result = new leveldb_iterator_t; result->rep = db->rep->NewIterator(options->rep); return result; } -const leveldb_snapshot_t* leveldb_create_snapshot( - leveldb_t* db) { +const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db) { leveldb_snapshot_t* result = new leveldb_snapshot_t; result->rep = db->rep->GetSnapshot(); return result; } -void leveldb_release_snapshot( - leveldb_t* db, - const leveldb_snapshot_t* snapshot) { +void leveldb_release_snapshot(leveldb_t* db, const leveldb_snapshot_t* snapshot) { db->rep->ReleaseSnapshot(snapshot->rep); delete snapshot; } -char* leveldb_property_value( - leveldb_t* db, - const char* propname) { +char* leveldb_property_value(leveldb_t* db, const char* propname) { std::string tmp; if (db->rep->GetProperty(Slice(propname), &tmp)) { // We use strdup() since we expect human readable output. @@ -253,12 +236,10 @@ char* leveldb_property_value( } } -void leveldb_approximate_sizes( - leveldb_t* db, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes) { +void leveldb_approximate_sizes(leveldb_t* db, int num_ranges, const char* const* range_start_key, + const size_t* range_start_key_len, + const char* const* range_limit_key, + const size_t* range_limit_key_len, uint64_t* sizes) { Range* ranges = new Range[num_ranges]; for (int i = 0; i < num_ranges; i++) { ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); @@ -268,10 +249,8 @@ void leveldb_approximate_sizes( delete[] ranges; } -void leveldb_compact_range( - leveldb_t* db, - const char* start_key, size_t start_key_len, - const char* limit_key, size_t limit_key_len) { +void leveldb_compact_range(leveldb_t* db, const char* start_key, size_t start_key_len, + const char* limit_key, size_t limit_key_len) { Slice a, b; db->rep->CompactRange( // Pass NULL Slice if corresponding "const char*" is NULL @@ -279,17 +258,11 @@ void leveldb_compact_range( (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL)); } -void leveldb_destroy_db( - const leveldb_options_t* options, - const char* name, - char** errptr) { +void leveldb_destroy_db(const leveldb_options_t* options, const char* name, char** errptr) { SaveError(errptr, DestroyDB(name, options->rep)); } -void leveldb_repair_db( - const leveldb_options_t* options, - const char* name, - char** errptr) { +void leveldb_repair_db(const leveldb_options_t* options, const char* name, char** errptr) { SaveError(errptr, RepairDB(name, options->rep)); } @@ -298,29 +271,19 @@ void leveldb_iter_destroy(leveldb_iterator_t* iter) { delete iter; } -unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) { - return iter->rep->Valid(); -} +unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) { return iter->rep->Valid(); } -void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) { - iter->rep->SeekToFirst(); -} +void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) { iter->rep->SeekToFirst(); } -void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) { - iter->rep->SeekToLast(); -} +void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) { iter->rep->SeekToLast(); } void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) { iter->rep->Seek(Slice(k, klen)); } -void leveldb_iter_next(leveldb_iterator_t* iter) { - iter->rep->Next(); -} +void leveldb_iter_next(leveldb_iterator_t* iter) { iter->rep->Next(); } -void leveldb_iter_prev(leveldb_iterator_t* iter) { - iter->rep->Prev(); -} +void leveldb_iter_prev(leveldb_iterator_t* iter) { iter->rep->Prev(); } const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) { Slice s = iter->rep->key(); @@ -338,36 +301,25 @@ void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) { SaveError(errptr, iter->rep->status()); } -leveldb_writebatch_t* leveldb_writebatch_create() { - return new leveldb_writebatch_t; -} +leveldb_writebatch_t* leveldb_writebatch_create() { return new leveldb_writebatch_t; } -void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { - delete b; -} +void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { delete b; } -void leveldb_writebatch_clear(leveldb_writebatch_t* b) { - b->rep.Clear(); -} +void leveldb_writebatch_clear(leveldb_writebatch_t* b) { b->rep.Clear(); } -void leveldb_writebatch_put( - leveldb_writebatch_t* b, - const char* key, size_t klen, - const char* val, size_t vlen) { +void leveldb_writebatch_put(leveldb_writebatch_t* b, const char* key, size_t klen, const char* val, + size_t vlen) { b->rep.Put(Slice(key, klen), Slice(val, vlen)); } -void leveldb_writebatch_delete( - leveldb_writebatch_t* b, - const char* key, size_t klen) { +void leveldb_writebatch_delete(leveldb_writebatch_t* b, const char* key, size_t klen) { b->rep.Delete(Slice(key, klen)); } -void leveldb_writebatch_iterate( - leveldb_writebatch_t* b, - void* state, - void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), - void (*deleted)(void*, const char* k, size_t klen)) { +void leveldb_writebatch_iterate(leveldb_writebatch_t* b, void* state, + void (*put)(void*, const char* k, size_t klen, const char* v, + size_t vlen), + void (*deleted)(void*, const char* k, size_t klen)) { class H : public WriteBatch::Handler { public: void* state_; @@ -376,9 +328,7 @@ void leveldb_writebatch_iterate( virtual void Put(const Slice& key, const Slice& value) { (*put_)(state_, key.data(), key.size(), value.data(), value.size()); } - virtual void Delete(const Slice& key) { - (*deleted_)(state_, key.data(), key.size()); - } + virtual void Delete(const Slice& key) { (*deleted_)(state_, key.data(), key.size()); } }; H handler; handler.state_ = state; @@ -387,33 +337,23 @@ void leveldb_writebatch_iterate( b->rep.Iterate(&handler); } -leveldb_options_t* leveldb_options_create() { - return new leveldb_options_t; -} +leveldb_options_t* leveldb_options_create() { return new leveldb_options_t; } -void leveldb_options_destroy(leveldb_options_t* options) { - delete options; -} +void leveldb_options_destroy(leveldb_options_t* options) { delete options; } -void leveldb_options_set_comparator( - leveldb_options_t* opt, - leveldb_comparator_t* cmp) { +void leveldb_options_set_comparator(leveldb_options_t* opt, leveldb_comparator_t* cmp) { opt->rep.comparator = cmp; } -void leveldb_options_set_filter_policy( - leveldb_options_t* opt, - leveldb_filterpolicy_t* policy) { +void leveldb_options_set_filter_policy(leveldb_options_t* opt, leveldb_filterpolicy_t* policy) { opt->rep.filter_policy = policy; } -void leveldb_options_set_error_if_exists( - leveldb_options_t* opt, unsigned char v) { +void leveldb_options_set_error_if_exists(leveldb_options_t* opt, unsigned char v) { opt->rep.error_if_exists = v; } -void leveldb_options_set_paranoid_checks( - leveldb_options_t* opt, unsigned char v) { +void leveldb_options_set_paranoid_checks(leveldb_options_t* opt, unsigned char v) { opt->rep.paranoid_checks = v; } @@ -437,9 +377,7 @@ void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) { opt->rep.block_cache = c->rep; } -void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) { - opt->rep.block_size = s; -} +void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) { opt->rep.block_size = s; } void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) { opt->rep.block_restart_interval = n; @@ -449,14 +387,10 @@ void leveldb_options_set_compression(leveldb_options_t* opt, int t) { opt->rep.compression = static_cast(t); } -leveldb_comparator_t* leveldb_comparator_create( - void* state, - void (*destructor)(void*), - int (*compare)( - void*, - const char* a, size_t alen, - const char* b, size_t blen), - const char* (*name)(void*)) { +leveldb_comparator_t* leveldb_comparator_create(void* state, void (*destructor)(void*), + int (*compare)(void*, const char* a, size_t alen, + const char* b, size_t blen), + const char* (*name)(void*)) { leveldb_comparator_t* result = new leveldb_comparator_t; result->state_ = state; result->destructor_ = destructor; @@ -465,22 +399,14 @@ leveldb_comparator_t* leveldb_comparator_create( return result; } -void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { - delete cmp; -} +void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { delete cmp; } leveldb_filterpolicy_t* leveldb_filterpolicy_create( - void* state, - void (*destructor)(void*), - char* (*create_filter)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length), + void* state, void (*destructor)(void*), + char* (*create_filter)(void*, const char* const* key_array, const size_t* key_length_array, + int num_keys, size_t* filter_length), + unsigned char (*key_may_match)(void*, const char* key, size_t length, const char* filter, + size_t filter_length), const char* (*name)(void*)) { leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t; result->state_ = state; @@ -491,9 +417,7 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create( return result; } -void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) { - delete filter; -} +void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) { delete filter; } leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) { // Make a leveldb_filterpolicy_t, but override all of its methods so @@ -509,7 +433,7 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) { bool KeyMayMatch(const Slice& key, const Slice& filter) const { return rep_->KeyMayMatch(key, filter); } - static void DoNothing(void*) { } + static void DoNothing(void*) {} }; Wrapper* wrapper = new Wrapper; wrapper->rep_ = NewBloomFilterPolicy(bits_per_key); @@ -518,41 +442,27 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) { return wrapper; } -leveldb_readoptions_t* leveldb_readoptions_create() { - return new leveldb_readoptions_t; -} +leveldb_readoptions_t* leveldb_readoptions_create() { return new leveldb_readoptions_t; } -void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { - delete opt; -} +void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { delete opt; } -void leveldb_readoptions_set_verify_checksums( - leveldb_readoptions_t* opt, - unsigned char v) { +void leveldb_readoptions_set_verify_checksums(leveldb_readoptions_t* opt, unsigned char v) { opt->rep.verify_checksums = v; } -void leveldb_readoptions_set_fill_cache( - leveldb_readoptions_t* opt, unsigned char v) { +void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t* opt, unsigned char v) { opt->rep.fill_cache = v; } -void leveldb_readoptions_set_snapshot( - leveldb_readoptions_t* opt, - const leveldb_snapshot_t* snap) { +void leveldb_readoptions_set_snapshot(leveldb_readoptions_t* opt, const leveldb_snapshot_t* snap) { opt->rep.snapshot = (snap ? snap->rep : leveldb::kMaxSequenceNumber); } -leveldb_writeoptions_t* leveldb_writeoptions_create() { - return new leveldb_writeoptions_t; -} +leveldb_writeoptions_t* leveldb_writeoptions_create() { return new leveldb_writeoptions_t; } -void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { - delete opt; -} +void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { delete opt; } -void leveldb_writeoptions_set_sync( - leveldb_writeoptions_t* opt, unsigned char v) { +void leveldb_writeoptions_set_sync(leveldb_writeoptions_t* opt, unsigned char v) { opt->rep.sync = v; } @@ -579,16 +489,10 @@ void leveldb_env_destroy(leveldb_env_t* env) { delete env; } -void leveldb_free(void* ptr) { - free(ptr); -} +void leveldb_free(void* ptr) { free(ptr); } -int leveldb_major_version() { - return kMajorVersion; -} +int leveldb_major_version() { return kMajorVersion; } -int leveldb_minor_version() { - return kMinorVersion; -} +int leveldb_minor_version() { return kMinorVersion; } } // end extern "C" diff --git a/src/leveldb/db/c_test.c b/src/leveldb/db/c_test.c index 47907631d..2bc7aa769 100644 --- a/src/leveldb/db/c_test.c +++ b/src/leveldb/db/c_test.c @@ -12,9 +12,12 @@ #include #include #include +#include #include #include +#define gettid() syscall(__NR_gettid) + const char* phase = ""; static char dbname[200]; @@ -25,21 +28,20 @@ static void StartPhase(const char* name) { static const char* GetTempDir(void) { const char* ret = getenv("TEST_TMPDIR"); - if (ret == NULL || ret[0] == '\0') - ret = "/tmp"; + if (ret == NULL || ret[0] == '\0') ret = "/tmp"; return ret; } -#define CheckNoError(err) \ - if ((err) != NULL) { \ +#define CheckNoError(err) \ + if ((err) != NULL) { \ fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \ - abort(); \ + abort(); \ } -#define CheckCondition(cond) \ - if (!(cond)) { \ +#define CheckCondition(cond) \ + if (!(cond)) { \ fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, #cond); \ - abort(); \ + abort(); \ } static void CheckEqual(const char* expected, const char* v, size_t n) { @@ -50,9 +52,7 @@ static void CheckEqual(const char* expected, const char* v, size_t n) { // ok return; } else { - fprintf(stderr, "%s: expected '%s', got '%s'\n", - phase, - (expected ? expected : "(null)"), + fprintf(stderr, "%s: expected '%s', got '%s'\n", phase, (expected ? expected : "(null)"), (v ? v : "(null")); abort(); } @@ -65,11 +65,8 @@ static void Free(char** ptr) { } } -static void CheckGet( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, - const char* expected) { +static void CheckGet(leveldb_t* db, const leveldb_readoptions_t* options, const char* key, + const char* expected) { char* err = NULL; size_t val_len; char* val; @@ -79,8 +76,7 @@ static void CheckGet( Free(&val); } -static void CheckIter(leveldb_iterator_t* iter, - const char* key, const char* val) { +static void CheckIter(leveldb_iterator_t* iter, const char* key, const char* val) { size_t len; const char* str; str = leveldb_iter_key(iter, &len); @@ -90,10 +86,8 @@ static void CheckIter(leveldb_iterator_t* iter, } // Callback from leveldb_writebatch_iterate() -static void CheckPut(void* ptr, - const char* k, size_t klen, - const char* v, size_t vlen) { - int* state = (int*) ptr; +static void CheckPut(void* ptr, const char* k, size_t klen, const char* v, size_t vlen) { + int* state = (int*)ptr; CheckCondition(*state < 2); switch (*state) { case 0: @@ -110,49 +104,41 @@ static void CheckPut(void* ptr, // Callback from leveldb_writebatch_iterate() static void CheckDel(void* ptr, const char* k, size_t klen) { - int* state = (int*) ptr; + int* state = (int*)ptr; CheckCondition(*state == 2); CheckEqual("bar", k, klen); (*state)++; } -static void CmpDestroy(void* arg) { } +static void CmpDestroy(void* arg) {} -static int CmpCompare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { +static int CmpCompare(void* arg, const char* a, size_t alen, const char* b, size_t blen) { int n = (alen < blen) ? alen : blen; int r = memcmp(a, b, n); if (r == 0) { - if (alen < blen) r = -1; - else if (alen > blen) r = +1; + if (alen < blen) + r = -1; + else if (alen > blen) + r = +1; } return r; } -static const char* CmpName(void* arg) { - return "foo"; -} +static const char* CmpName(void* arg) { return "foo"; } // Custom filter policy static unsigned char fake_filter_result = 1; -static void FilterDestroy(void* arg) { } -static const char* FilterName(void* arg) { - return "TestFilter"; -} -static char* FilterCreate( - void* arg, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length) { +static void FilterDestroy(void* arg) {} +static const char* FilterName(void* arg) { return "TestFilter"; } +static char* FilterCreate(void* arg, const char* const* key_array, const size_t* key_length_array, + int num_keys, size_t* filter_length) { *filter_length = 4; char* result = malloc(4); memcpy(result, "fake", 4); return result; } -unsigned char FilterKeyMatch( - void* arg, - const char* key, size_t length, - const char* filter, size_t filter_length) { +unsigned char FilterKeyMatch(void* arg, const char* key, size_t length, const char* filter, + size_t filter_length) { CheckCondition(filter_length == 4); CheckCondition(memcmp(filter, "fake", 4) == 0); return fake_filter_result; @@ -172,10 +158,7 @@ int main(int argc, char** argv) { CheckCondition(leveldb_major_version() >= 1); CheckCondition(leveldb_minor_version() >= 1); - snprintf(dbname, sizeof(dbname), - "%s/leveldb_c_test-%di/meta/0", - GetTempDir(), - ((int) geteuid())); + snprintf(dbname, sizeof(dbname), "%s/leveldb_c_test-%di/meta/0", GetTempDir(), (int)gettid()); StartPhase("create_objects"); cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName); @@ -272,16 +255,15 @@ int main(int argc, char** argv) { char keybuf[100]; char valbuf[100]; uint64_t sizes[2]; - const char* start[2] = { "a", "k00000000000000010000" }; - size_t start_len[2] = { 1, 21 }; - const char* limit[2] = { "k00000000000000010000", "z" }; - size_t limit_len[2] = { 21, 1 }; + const char* start[2] = {"a", "k00000000000000010000"}; + size_t start_len[2] = {1, 21}; + const char* limit[2] = {"k00000000000000010000", "z"}; + size_t limit_len[2] = {21, 1}; leveldb_writeoptions_set_sync(woptions, 0); for (i = 0; i < n; i++) { snprintf(keybuf, sizeof(keybuf), "k%020d", i); snprintf(valbuf, sizeof(valbuf), "v%020d", i); - leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf), - &err); + leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf), &err); CheckNoError(err); } leveldb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes); @@ -292,7 +274,7 @@ int main(int argc, char** argv) { StartPhase("property"); { char* prop = leveldb_property_value(db, "nosuchprop"); -/* CheckCondition(prop == NULL);*/ + /* CheckCondition(prop == NULL);*/ prop = leveldb_property_value(db, "leveldb.stats"); CheckCondition(prop != NULL); Free(&prop); @@ -331,8 +313,8 @@ int main(int argc, char** argv) { CheckNoError(err); leveldb_filterpolicy_t* policy; if (run == 0) { - policy = leveldb_filterpolicy_create( - NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName); + policy = leveldb_filterpolicy_create(NULL, FilterDestroy, FilterCreate, FilterKeyMatch, + FilterName); } else { policy = leveldb_filterpolicy_create_bloom(10); } diff --git a/src/leveldb/db/corruption_test.cc b/src/leveldb/db/corruption_test.cc index e2be33d54..48aa2f809 100644 --- a/src/leveldb/db/corruption_test.cc +++ b/src/leveldb/db/corruption_test.cc @@ -48,9 +48,9 @@ class CorruptionTest { } ~CorruptionTest() { - delete db_; - DestroyDB(dbname_, Options()); - delete tiny_cache_; + delete db_; + DestroyDB(dbname_, Options()); + delete tiny_cache_; } Status TryReopen(Options* options = NULL) { @@ -62,9 +62,7 @@ class CorruptionTest { return DB::Open(opt, dbname_, &db_); } - void Reopen(Options* options = NULL) { - ASSERT_OK(TryReopen(options)); - } + void Reopen(Options* options = NULL) { ASSERT_OK(TryReopen(options)); } void RepairDB() { delete db_; @@ -76,7 +74,7 @@ class CorruptionTest { std::string key_space, value_space; WriteBatch batch; for (int i = 0; i < n; i++) { - //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); + // if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); Slice key = Key(i, &key_space); batch.Clear(); batch.Put(key, Value(i, &value_space)); @@ -95,9 +93,7 @@ class CorruptionTest { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { uint64_t key; Slice in(iter->key()); - if (!ConsumeDecimalNumber(&in, &key) || - !in.empty() || - key < next_expected) { + if (!ConsumeDecimalNumber(&in, &key) || !in.empty() || key < next_expected) { bad_keys++; continue; } @@ -111,8 +107,7 @@ class CorruptionTest { } delete iter; - fprintf(stderr, - "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n", + fprintf(stderr, "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n", min_expected, max_expected, correct, bad_keys, bad_values, missed); ASSERT_LE(min_expected, correct); ASSERT_GE(max_expected, correct); @@ -122,7 +117,7 @@ class CorruptionTest { // Pick file to corrupt std::string db_path = dbname_; if (lg_id >= 0) { - db_path = dbname_ + "/" + Uint64ToString(lg_id); + db_path = dbname_ + "/" + Uint64ToString(lg_id); } std::vector filenames; ASSERT_OK(env_.GetChildren(db_path, &filenames)); @@ -131,8 +126,7 @@ class CorruptionTest { std::string fname; int picked_number = -1; for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && - type == filetype && + if (ParseFileName(filenames[i], &number, &type) && type == filetype && int(number) > picked_number) { // Pick latest file fname = db_path + "/" + filenames[i]; picked_number = number; @@ -175,8 +169,7 @@ class CorruptionTest { int Property(const std::string& name) { std::string property; int result; - if (db_->GetProperty(name, &property) && - sscanf(property.c_str(), "%d", &result) == 1) { + if (db_->GetProperty(name, &property) && sscanf(property.c_str(), "%d", &result) == 1) { return result; } else { return -1; @@ -201,7 +194,7 @@ class CorruptionTest { TEST(CorruptionTest, Recovery) { Build(100); Check(100, 100); - Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record + Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block Reopen(); @@ -366,6 +359,4 @@ TEST(CorruptionTest, UnrelatedKeys) { } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/db.cc b/src/leveldb/db/db.cc index 727ca506e..386fdcc93 100644 --- a/src/leveldb/db/db.cc +++ b/src/leveldb/db/db.cc @@ -27,7 +27,6 @@ namespace leveldb { - // Default implementations of convenience methods that subclasses of DB // can call if they wish Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { @@ -42,134 +41,132 @@ Status DB::Delete(const WriteOptions& opt, const Slice& key) { return Write(opt, &batch); } -DB::~DB() { } +DB::~DB() {} -Status DB::Open(const Options& options, const std::string& dbname, - DB** dbptr) { - *dbptr = NULL; +Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) { + *dbptr = NULL; - DBTable* db_table = new DBTable(options, dbname); - Status s = db_table->Init(); - if (s.ok()) { - *dbptr = db_table; - } else { - delete db_table; - } - return s; + DBTable* db_table = new DBTable(options, dbname); + Status s = db_table->Init(); + if (s.ok()) { + *dbptr = db_table; + } else { + delete db_table; + } + return s; } Status DestroyLG(const std::string& lgname, const Options& options) { - Env* env = options.env; - std::vector filenames; - env->GetChildren(lgname, &filenames); - if (filenames.empty()) { - return Status::OK(); - } - - FileLock* lock; - const std::string lockname = LockFileName(lgname); - Status result = env->LockFile(lockname, &lock); - if (!result.ok()) { - return result; - } - - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && - type != kDBLockFile) { // Lock file will be deleted at end - Status del = env->DeleteFile(lgname + "/" + filenames[i]); - if (result.ok() && !del.ok()) { - result = del; - } - } - } - - filenames.clear(); - env->GetChildren(lgname + "/lost", &filenames); - for (size_t i = 0; i < filenames.size(); i++) { - Status del = env->DeleteFile(lgname + "/lost/" + filenames[i]); - } - env->DeleteDir(lgname + "/lost"); - env->UnlockFile(lock); // Ignore error since state is already gone - env->DeleteDir(lgname); // Ignore error in case dir contains other files - + Env* env = options.env; + std::vector filenames; + env->GetChildren(lgname, &filenames); + if (filenames.empty()) { + return Status::OK(); + } + + FileLock* lock; + const std::string lockname = LockFileName(lgname); + Status result = env->LockFile(lockname, &lock); + if (!result.ok()) { return result; + } + + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && + type != kDBLockFile) { // Lock file will be deleted at end + Status del = env->DeleteFile(lgname + "/" + filenames[i]); + if (result.ok() && !del.ok()) { + result = del; + } + } + } + + filenames.clear(); + env->GetChildren(lgname + "/lost", &filenames); + for (size_t i = 0; i < filenames.size(); i++) { + Status del = env->DeleteFile(lgname + "/lost/" + filenames[i]); + } + env->DeleteDir(lgname + "/lost"); + env->UnlockFile(lock); // Ignore error since state is already gone + env->DeleteDir(lgname); // Ignore error in case dir contains other files + + return result; } Status DestroyDB(const std::string& dbname, const Options& opt) { - Options options = opt; - Env* env = options.env; - - std::vector filenames; - env->GetChildren(dbname, &filenames); - if (filenames.empty()) { - return Status::OK(); - } - - FileLock* lock; - const std::string lockname = LockFileName(dbname); - Status result = env->LockFile(lockname, &lock); - if (!result.ok()) { - return result; - } - - // clean db/lg dir - if (options.exist_lg_list == NULL) { - options.exist_lg_list = new std::set; - options.exist_lg_list->insert(0); - } - std::set::iterator it = options.exist_lg_list->begin(); - for (; it != options.exist_lg_list->end(); ++it) { - std::string lgname = dbname + "/" + Uint64ToString(*it); - Options lg_opt = options; - if (options.lg_info_list && options.lg_info_list->size() > 0) { - std::map::iterator info_it = - options.lg_info_list->find(*it); - if (info_it != options.lg_info_list->end() && info_it->second != NULL) { - LG_info* lg_info = info_it->second; - if (lg_info->env && lg_info->env != lg_opt.env) { - lg_opt.env = lg_info->env; - } - lg_opt.compression = lg_info->compression; - delete lg_info; - info_it->second = NULL; - } - } else if (options.lg_info_list) { - delete options.lg_info_list; - options.lg_info_list = NULL; - } - Status lg_ret = DestroyLG(lgname, lg_opt); - if (!lg_ret.ok()) { - result = lg_ret; + Options options = opt; + Env* env = options.env; + + std::vector filenames; + env->GetChildren(dbname, &filenames); + if (filenames.empty()) { + return Status::OK(); + } + + FileLock* lock; + const std::string lockname = LockFileName(dbname); + Status result = env->LockFile(lockname, &lock); + if (!result.ok()) { + return result; + } + + // clean db/lg dir + if (options.exist_lg_list == NULL) { + options.exist_lg_list = new std::set; + options.exist_lg_list->insert(0); + } + std::set::iterator it = options.exist_lg_list->begin(); + for (; it != options.exist_lg_list->end(); ++it) { + std::string lgname = dbname + "/" + Uint64ToString(*it); + Options lg_opt = options; + if (options.lg_info_list && options.lg_info_list->size() > 0) { + std::map::iterator info_it = options.lg_info_list->find(*it); + if (info_it != options.lg_info_list->end() && info_it->second != NULL) { + LG_info* lg_info = info_it->second; + if (lg_info->env && lg_info->env != lg_opt.env) { + lg_opt.env = lg_info->env; } + lg_opt.compression = lg_info->compression; + delete lg_info; + info_it->second = NULL; + } + } else if (options.lg_info_list) { + delete options.lg_info_list; + options.lg_info_list = NULL; } - delete options.exist_lg_list; - - // clean db/ dir - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && - type != kDBLockFile) { // Lock file will be deleted at end - Status del = env->DeleteFile(dbname + "/" + filenames[i]); - if (result.ok() && !del.ok()) { - result = del; - } - } + Status lg_ret = DestroyLG(lgname, lg_opt); + if (!lg_ret.ok()) { + result = lg_ret; } - - // clean db/lost dir - filenames.clear(); - env->GetChildren(dbname + "/lost", &filenames); - for (size_t i = 0; i < filenames.size(); i++) { - Status del = env->DeleteFile(dbname + "/lost/" + filenames[i]); + } + delete options.exist_lg_list; + + // clean db/ dir + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && + type != kDBLockFile) { // Lock file will be deleted at end + Status del = env->DeleteFile(dbname + "/" + filenames[i]); + if (result.ok() && !del.ok()) { + result = del; + } } - env->DeleteDir(dbname + "/lost"); - env->UnlockFile(lock); // Ignore error since state is already gone - env->DeleteDir(dbname); // Ignore error in case dir contains other files - - return result; + } + + // clean db/lost dir + filenames.clear(); + env->GetChildren(dbname + "/lost", &filenames); + for (size_t i = 0; i < filenames.size(); i++) { + Status del = env->DeleteFile(dbname + "/lost/" + filenames[i]); + } + env->DeleteDir(dbname + "/lost"); + env->UnlockFile(lock); // Ignore error since state is already gone + env->DeleteDir(dbname); // Ignore error in case dir contains other files + + return result; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/db/db_bench.cc b/src/leveldb/db/db_bench.cc index 245b4c488..c82edaaf4 100644 --- a/src/leveldb/db/db_bench.cc +++ b/src/leveldb/db/db_bench.cc @@ -71,8 +71,7 @@ static const char* FLAGS_benchmarks = "bmzcomp," "bmzuncomp," #endif - "acquireload," - ; + "acquireload,"; // Number of key/values to place in database static int FLAGS_num = 1000000; @@ -95,7 +94,7 @@ static bool FLAGS_histogram = false; // Number of bytes to buffer in memtable before compacting // (initialized to default value by "main") -static int FLAGS_write_buffer_size = 32*1024*1024; +static int FLAGS_write_buffer_size = 32 * 1024 * 1024; // Number of bytes to use as a cache of uncompressed data. // Negative means use default settings. @@ -170,7 +169,7 @@ static Slice TrimSpace(Slice s) { start++; } uint32_t limit = s.size(); - while (limit > start && isspace(s[limit-1])) { + while (limit > start && isspace(s[limit - 1])) { limit--; } return Slice(s.data() + start, limit - start); @@ -228,9 +227,7 @@ class Stats { seconds_ = (finish_ - start_) * 1e-6; } - void AddMessage(Slice msg) { - AppendWithSpace(&message_, msg); - } + void AddMessage(Slice msg) { AppendWithSpace(&message_, msg); } void FinishedSingleOp() { if (FLAGS_histogram) { @@ -246,21 +243,26 @@ class Stats { done_++; if (done_ >= next_report_) { - if (next_report_ < 1000) next_report_ += 100; - else if (next_report_ < 5000) next_report_ += 500; - else if (next_report_ < 10000) next_report_ += 1000; - else if (next_report_ < 50000) next_report_ += 5000; - else if (next_report_ < 100000) next_report_ += 10000; - else if (next_report_ < 500000) next_report_ += 50000; - else next_report_ += 100000; + if (next_report_ < 1000) + next_report_ += 100; + else if (next_report_ < 5000) + next_report_ += 500; + else if (next_report_ < 10000) + next_report_ += 1000; + else if (next_report_ < 50000) + next_report_ += 5000; + else if (next_report_ < 100000) + next_report_ += 10000; + else if (next_report_ < 500000) + next_report_ += 50000; + else + next_report_ += 100000; fprintf(stderr, "... finished %d ops%30s\r", done_, ""); fflush(stderr); } } - void AddBytes(int64_t n) { - bytes_ += n; - } + void AddBytes(int64_t n) { bytes_ += n; } void Report(const Slice& name) { // Pretend at least one op was done in case we are running a benchmark @@ -273,17 +275,13 @@ class Stats { // elapsed times. double elapsed = (finish_ - start_) * 1e-6; char rate[100]; - snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / elapsed); + snprintf(rate, sizeof(rate), "%6.1f MB/s", (bytes_ / 1048576.0) / elapsed); extra = rate; } AppendWithSpace(&extra, message_); - fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", - name.ToString().c_str(), - seconds_ * 1e6 / done_, - (extra.empty() ? "" : " "), - extra.c_str()); + fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", name.ToString().c_str(), + seconds_ * 1e6 / done_, (extra.empty() ? "" : " "), extra.c_str()); if (FLAGS_histogram) { fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); } @@ -307,21 +305,17 @@ struct SharedState { int num_done; bool start; - SharedState() : cv(&mu), total(0), num_initialized(0), num_done(0), start(false) { } + SharedState() : cv(&mu), total(0), num_initialized(0), num_done(0), start(false) {} }; // Per-thread state for concurrent executions of the same benchmark. struct ThreadState { - int tid; // 0..n-1 when running in n threads - Random rand; // Has different seeds for different threads + int tid; // 0..n-1 when running in n threads + Random rand; // Has different seeds for different threads Stats stats; SharedState* shared; - ThreadState(int index) - : tid(index), - rand(1000 + index), - shared(NULL) { - } + ThreadState(int index) : tid(index), rand(1000 + index), shared(NULL) {} }; } // namespace @@ -342,29 +336,23 @@ class Benchmark { const int kKeySize = 16; PrintEnvironment(); fprintf(stdout, "Keys: %d bytes each\n", kKeySize); - fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n", - FLAGS_value_size, + fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n", FLAGS_value_size, static_cast(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); fprintf(stdout, "Entries: %d\n", num_); fprintf(stdout, "RawSize: %.1f MB (estimated)\n", - ((static_cast(kKeySize + FLAGS_value_size) * num_) - / 1048576.0)); + ((static_cast(kKeySize + FLAGS_value_size) * num_) / 1048576.0)); fprintf(stdout, "FileSize: %.1f MB (estimated)\n", - (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) - / 1048576.0)); + (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) / 1048576.0)); PrintWarnings(); fprintf(stdout, "------------------------------------------------\n"); } void PrintWarnings() { #if defined(__GNUC__) && !defined(__OPTIMIZE__) - fprintf(stdout, - "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n" - ); + fprintf(stdout, "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); #endif #ifndef NDEBUG - fprintf(stdout, - "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); + fprintf(stdout, "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); #endif // See if snappy is working by attempting to compress a compressible string @@ -378,8 +366,7 @@ class Benchmark { } void PrintEnvironment() { - fprintf(stderr, "LevelDB: version %d.%d\n", - kMajorVersion, kMinorVersion); + fprintf(stderr, "LevelDB: version %d.%d\n", kMajorVersion, kMinorVersion); #if defined(__linux) time_t now = time(NULL); @@ -414,16 +401,14 @@ class Benchmark { public: Benchmark() - : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL), - filter_policy_(FLAGS_bloom_bits >= 0 - ? NewBloomFilterPolicy(FLAGS_bloom_bits) - : NULL), - db_(NULL), - num_(FLAGS_num), - value_size_(FLAGS_value_size), - entries_per_batch_(1), - reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), - heap_counter_(0) { + : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL), + filter_policy_(FLAGS_bloom_bits >= 0 ? NewBloomFilterPolicy(FLAGS_bloom_bits) : NULL), + db_(NULL), + num_(FLAGS_num), + value_size_(FLAGS_value_size), + entries_per_batch_(1), + reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), + heap_counter_(0) { std::vector files; Env::Default()->GetChildren(FLAGS_db, &files); for (uint32_t i = 0; i < files.size(); i++) { @@ -467,29 +452,29 @@ class Benchmark { write_options_.disable_wal = FLAGS_disable_wal; void (Benchmark::*method)(ThreadState*) = NULL; - //bool fresh_db = false; + // bool fresh_db = false; int num_threads = FLAGS_threads; if (name == Slice("fillseq")) { - //fresh_db = true; + // fresh_db = true; method = &Benchmark::WriteSeq; } else if (name == Slice("fillbatch")) { - //fresh_db = true; + // fresh_db = true; entries_per_batch_ = 1000; method = &Benchmark::WriteSeq; } else if (name == Slice("fillrandom")) { - //fresh_db = true; + // fresh_db = true; method = &Benchmark::WriteRandom; } else if (name == Slice("overwrite")) { - //fresh_db = false; + // fresh_db = false; method = &Benchmark::WriteRandom; } else if (name == Slice("fillsync")) { - //fresh_db = true; + // fresh_db = true; num_ /= 1000; write_options_.sync = true; method = &Benchmark::WriteRandom; } else if (name == Slice("fill100K")) { - //fresh_db = true; + // fresh_db = true; num_ /= 1000; value_size_ = 100 * 1000; method = &Benchmark::WriteRandom; @@ -547,7 +532,7 @@ class Benchmark { } } - //if (fresh_db) { + // if (fresh_db) { // if (FLAGS_use_existing_db) { // fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n", // name.ToString().c_str()); @@ -602,8 +587,7 @@ class Benchmark { } } - void RunBenchmark(int n, Slice name, - void (Benchmark::*method)(ThreadState*)) { + void RunBenchmark(int n, Slice name, void (Benchmark::*method)(ThreadState*)) { SharedState shared; shared.total = n; shared.num_initialized = 0; @@ -666,7 +650,7 @@ class Benchmark { int dummy; port::AtomicPointer ap(&dummy); int count = 0; - void *ptr = NULL; + void* ptr = NULL; thread->stats.AddMessage("(each op is 1000 loads)"); while (count < 100000) { for (int i = 0; i < 1000; i++) { @@ -675,7 +659,7 @@ class Benchmark { count++; thread->stats.FinishedSingleOp(); } - if (ptr == NULL) exit(1); // Disable unused variable warning. + if (ptr == NULL) exit(1); // Disable unused variable warning. } void SnappyCompress(ThreadState* thread) { @@ -696,8 +680,7 @@ class Benchmark { thread->stats.AddMessage("(snappy failure)"); } else { char buf[100]; - snprintf(buf, sizeof(buf), "(output: %.1f%%)", - (produced * 100.0) / bytes); + snprintf(buf, sizeof(buf), "(output: %.1f%%)", (produced * 100.0) / bytes); thread->stats.AddMessage(buf); thread->stats.AddBytes(bytes); } @@ -711,8 +694,7 @@ class Benchmark { int64_t bytes = 0; char* uncompressed = new char[input.size()]; while (ok && bytes < 1024 * 1048576) { // Compress 1G - ok = port::Snappy_Uncompress(compressed.data(), compressed.size(), - uncompressed); + ok = port::Snappy_Uncompress(compressed.data(), compressed.size(), uncompressed); bytes += input.size(); thread->stats.FinishedSingleOp(); } @@ -744,8 +726,7 @@ class Benchmark { thread->stats.AddMessage("(LZ4 failure)"); } else { char buf[100]; - snprintf(buf, sizeof(buf), "(output: %.1f%%)", - (produced * 100.0) / bytes); + snprintf(buf, sizeof(buf), "(output: %.1f%%)", (produced * 100.0) / bytes); thread->stats.AddMessage(buf); thread->stats.AddBytes(bytes); } @@ -760,8 +741,8 @@ class Benchmark { char* uncompressed = new char[input.size()]; size_t max_buffer_size = input.size() * 3; while (ok && bytes < 1024 * 1048576) { // Compress 1G - ok = port::Lz4_Uncompress(compressed.data(), compressed.size(), - uncompressed, &max_buffer_size); + ok = port::Lz4_Uncompress(compressed.data(), compressed.size(), uncompressed, + &max_buffer_size); bytes += input.size(); thread->stats.FinishedSingleOp(); } @@ -792,8 +773,7 @@ class Benchmark { thread->stats.AddMessage("(BMZ failure)"); } else { char buf[100]; - snprintf(buf, sizeof(buf), "(output: %.1f%%)", - (produced * 100.0) / bytes); + snprintf(buf, sizeof(buf), "(output: %.1f%%)", (produced * 100.0) / bytes); thread->stats.AddMessage(buf); thread->stats.AddBytes(bytes); } @@ -808,8 +788,8 @@ class Benchmark { char* uncompressed = new char[input.size()]; size_t max_buffer_size = input.size() * 3; while (ok && bytes < 1024 * 1048576) { // Compress 1G - ok = port::Bmz_Uncompress(compressed.data(), compressed.size(), - uncompressed, &max_buffer_size); + ok = port::Bmz_Uncompress(compressed.data(), compressed.size(), uncompressed, + &max_buffer_size); bytes += input.size(); thread->stats.FinishedSingleOp(); } @@ -825,13 +805,13 @@ class Benchmark { CompressionType NumToCompressionType(int n) { if (n == 1) { - return kSnappyCompression; + return kSnappyCompression; } else if (n == 2) { - return kBmzCompression; + return kBmzCompression; } else if (n == 3) { - return kLZ4Compression; + return kLZ4Compression; } else { - return kNoCompression; + return kNoCompression; } } @@ -844,17 +824,18 @@ class Benchmark { options.filter_policy = filter_policy_; options.block_size = FLAGS_block_size; options.compression = NumToCompressionType(FLAGS_compress); - Status log_s = Env::Default()->NewLogger("./ldblog", &options.info_log); + Status log_s = Env::Default()->NewLogger("./ldblog", LogOption::LogOptionBuilder().Build(), + &options.info_log); if (FLAGS_env == NULL) { - // do nothing + // do nothing } else if (strncmp(FLAGS_env, "hdfs", 5) == 0) { - options.env = EnvDfs(); + options.env = EnvDfs(); } else if (strncmp(FLAGS_env, "flash", 4) == 0) { - options.env = NewFlashEnv(EnvDfs()); + options.env = NewFlashEnv(EnvDfs()); } else if (strncmp(FLAGS_env, "inmem", 4) == 0) { - options.env = NewInMemoryEnv(EnvDfs()); + options.env = NewInMemoryEnv(EnvDfs()); } else if (strncmp(FLAGS_env, "mem", 4) == 0) { - options.env = NewMemEnv(Env::Default()); + options.env = NewMemEnv(Env::Default()); } Status s = DB::Open(options, FLAGS_db, &db_); if (!s.ok()) { @@ -863,13 +844,9 @@ class Benchmark { } } - void WriteSeq(ThreadState* thread) { - DoWrite(thread, true); - } + void WriteSeq(ThreadState* thread) { DoWrite(thread, true); } - void WriteRandom(ThreadState* thread) { - DoWrite(thread, false); - } + void WriteRandom(ThreadState* thread) { DoWrite(thread, false); } void DoWrite(ThreadState* thread, bool seq) { if (num_ != FLAGS_num) { @@ -885,7 +862,7 @@ class Benchmark { for (int i = 0; i < num_; i += entries_per_batch_) { batch.Clear(); for (int j = 0; j < entries_per_batch_; j++) { - const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num); + const int k = seq ? i + j : (thread->rand.Next() % FLAGS_num); char key[100]; snprintf(key, sizeof(key), "%016d", k); batch.Put(key, gen.Generate(value_size_)); @@ -996,7 +973,7 @@ class Benchmark { for (int i = 0; i < num_; i += entries_per_batch_) { batch.Clear(); for (int j = 0; j < entries_per_batch_; j++) { - const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num); + const int k = seq ? i + j : (thread->rand.Next() % FLAGS_num); char key[100]; snprintf(key, sizeof(key), "%016d", k); batch.Delete(key); @@ -1010,13 +987,9 @@ class Benchmark { } } - void DeleteSeq(ThreadState* thread) { - DoDelete(thread, true); - } + void DeleteSeq(ThreadState* thread) { DoDelete(thread, true); } - void DeleteRandom(ThreadState* thread) { - DoDelete(thread, false); - } + void DeleteRandom(ThreadState* thread) { DoDelete(thread, false); } void ReadWhileWriting(ThreadState* thread) { if (thread->tid > 0) { @@ -1048,9 +1021,7 @@ class Benchmark { } } - void Compact(ThreadState* thread) { - db_->CompactRange(NULL, NULL); - } + void Compact(ThreadState* thread) { db_->CompactRange(NULL, NULL); } void PrintStats(const char* key) { std::string stats; @@ -1096,14 +1067,11 @@ int main(int argc, char** argv) { FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { FLAGS_compression_ratio = d; - } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_histogram = n; - } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_use_existing_db = n; - } else if (sscanf(argv[i], "--disable_wal=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { + } else if (sscanf(argv[i], "--disable_wal=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_disable_wal = n; } else if (sscanf(argv[i], "--compress=%d%c", &n, &junk) == 1 && (n == 0 || n == 1 || n == 2 || n == 3)) { @@ -1138,9 +1106,9 @@ int main(int argc, char** argv) { // Choose a location for the test database if none given with --db= if (FLAGS_db == NULL) { - leveldb::Env::Default()->GetTestDirectory(&default_db_path); - default_db_path += "/dbbench"; - FLAGS_db = default_db_path.c_str(); + leveldb::Env::Default()->GetTestDirectory(&default_db_path); + default_db_path += "/dbbench"; + FLAGS_db = default_db_path.c_str(); } leveldb::Benchmark benchmark; diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc index 00934f4d4..5154d906a 100644 --- a/src/leveldb/db/db_impl.cc +++ b/src/leveldb/db/db_impl.cc @@ -42,18 +42,20 @@ #include "util/coding.h" #include "util/logging.h" #include "util/mutexlock.h" +#include "memtable_on_leveldb.h" +#include "sharded_memtable.h" +#include "leveldb/persistent_cache.h" namespace leveldb { -extern Status WriteStringToFileSync(Env* env, const Slice& data, - const std::string& fname); +extern Status WriteStringToFileSync(Env* env, const Slice& data, const std::string& fname); const int kNumNonTableCacheFiles = 10; // if this file exists, ignore error in db-opening const static std::string mark_file_name = "/__oops"; -// if this file exists, +// if this file exists, const static std::string init_load_filelock = "/__init_load_filelock"; // Information kept for every waiting writer @@ -61,7 +63,7 @@ struct DBImpl::Writer { WriteBatch* batch; port::CondVar cv; - explicit Writer(port::Mutex* mu) : batch(NULL), cv(mu) { } + explicit Writer(port::Mutex* mu) : batch(NULL), cv(mu) {} }; struct DBImpl::CompactionState { @@ -77,15 +79,12 @@ struct DBImpl::CompactionState { struct Output { uint64_t number; uint64_t file_size; - int64_t del_num; // statistic: delete tag's percentage in sst - std::vector ttls; // use for calculate timeout percentage + int64_t del_num; // statistic: delete tag's percentage in sst + std::vector ttls; // use for calculate timeout percentage int64_t entries; InternalKey smallest, largest; - Output(): number(0), - file_size(0), - del_num(0), - entries(0) {} + Output() : number(0), file_size(0), del_num(0), entries(0) {} }; std::vector outputs; @@ -96,38 +95,35 @@ struct DBImpl::CompactionState { uint64_t total_bytes; Status status; - Output* current_output() { return &outputs[outputs.size()-1]; } + Output* current_output() { return &outputs[outputs.size() - 1]; } explicit CompactionState(Compaction* c) : compaction(c), smallest_snapshot(kMaxSequenceNumber), outfile(NULL), builder(NULL), - total_bytes(0) { - } + total_bytes(0) {} }; // Fix user-supplied options to be reasonable -template +template static void ClipToRange(T* ptr, V minvalue, V maxvalue) { if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; if (static_cast(*ptr) < minvalue) *ptr = minvalue; } -Options SanitizeOptions(const std::string& dbname, - const InternalKeyComparator* icmp, - const InternalFilterPolicy* ipolicy, - const Options& src) { +Options SanitizeOptions(const std::string& dbname, const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, const Options& src) { Options result = src; result.comparator = icmp; result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL; - ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); - ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); - ClipToRange(&result.block_size, 1<<10, 4<<20); + ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); + ClipToRange(&result.write_buffer_size, 64 << 10, 1 << 30); + ClipToRange(&result.block_size, 1 << 10, 4 << 20); if (result.info_log == NULL) { // Open a log file in the same directory as the db src.env->CreateDir(dbname); // In case it does not exist - src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname)); - Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log); + Status s = src.env->NewLogger(InfoLogFileName(dbname), LogOption::LogOptionBuilder().Build(), + &result.info_log); if (!s.ok()) { // No place suitable for logging result.info_log = NULL; @@ -138,13 +134,15 @@ Options SanitizeOptions(const std::string& dbname, } if (result.ignore_corruption_in_open) { - Log(result.info_log, "[%s] caution: open with ignore_corruption_in_open", dbname.c_str()); + LEVELDB_LOG(result.info_log, "[%s] caution: open with ignore_corruption_in_open", + dbname.c_str()); } { std::string oops = dbname + mark_file_name; Status s = src.env->FileExists(oops); if (s.ok()) { - Log(result.info_log, "[%s] caution: open with ignore_corruption_in_open", dbname.c_str()); + LEVELDB_LOG(result.info_log, "[%s] caution: open with ignore_corruption_in_open", + dbname.c_str()); result.ignore_corruption_in_open = true; } // Ignore error from FileExists since there is no harm @@ -153,12 +151,13 @@ Options SanitizeOptions(const std::string& dbname, } DBImpl::DBImpl(const Options& options, const std::string& dbname) - : state_(kNotOpen), key_start_(options.key_start), key_end_(options.key_end), + : state_(kNotOpen), + key_start_(options.key_start), + key_end_(options.key_end), env_(options.env), internal_comparator_(options.comparator), internal_filter_policy_(options.filter_policy), - options_(SanitizeOptions( - dbname, &internal_comparator_, &internal_filter_policy_, options)), + options_(SanitizeOptions(dbname, &internal_comparator_, &internal_filter_policy_, options)), owns_info_log_(options_.info_log != options.info_log), owns_block_cache_(options_.block_cache != options.block_cache), dbname_(dbname), @@ -170,7 +169,8 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) writting_mem_cv_(&mutex_), is_writting_mem_(false), mem_(NewMemTable()), - imm_(NULL), recover_mem_(NULL), + imm_(NULL), + recover_mem_(NULL), logfile_(NULL), logfile_number_(0), log_(NULL), @@ -184,19 +184,18 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) // Reserve ten files or so for other uses and give the rest to TableCache. if (owns_table_cache_) { - Log(options_.info_log, "[%s] create new table cache.", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] create new table cache.", dbname_.c_str()); // assume 2MB per file - const size_t table_cache_size = (options_.max_open_files - kNumNonTableCacheFiles) - * 2LL * 1024 * 1024; // 2MB + const size_t table_cache_size = + (options_.max_open_files - kNumNonTableCacheFiles) * 2LL * 1024 * 1024; // 2MB table_cache_ = new TableCache(table_cache_size); } - versions_ = new VersionSet(dbname_, &options_, table_cache_, - &internal_comparator_); + versions_ = new VersionSet(dbname_, &options_, table_cache_, &internal_comparator_); } bool DBImpl::ShouldForceUnloadOnError() { - MutexLock l(&mutex_); - return bg_error_.IsIOPermissionDenied(); + MutexLock l(&mutex_); + return bg_error_.IsIOPermissionDenied(); } Status DBImpl::Shutdown1() { @@ -206,7 +205,7 @@ Status DBImpl::Shutdown1() { MutexLock l(&mutex_); shutting_down_.Release_Store(this); // Any non-NULL value is ok - Log(options_.info_log, "[%s] wait bg compact finish", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] wait bg compact finish", dbname_.c_str()); std::vector::iterator it = bg_compaction_tasks_.begin(); for (; it != bg_compaction_tasks_.end(); ++it) { env_->ReSchedule((*it)->id, kDumpMemTableUrgentScore, 0); @@ -214,23 +213,24 @@ Status DBImpl::Shutdown1() { while (bg_compaction_tasks_.size() > 0) { bg_cv_.Wait(); } - // has enconutered IOPermission Denied error, return immediately and do not try to compact memory table aynmore + // has enconutered IOPermission Denied error, return immediately and do not + // try to compact memory table aynmore if (bg_error_.IsIOPermissionDenied()) { - return bg_error_; + return bg_error_; } Status s; if (!options_.dump_mem_on_shutdown) { return s; } - Log(options_.info_log, "[%s] fg compact mem table", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fg compact mem table", dbname_.c_str()); if (imm_ != NULL) { s = CompactMemTable(); } if (s.ok()) { assert(imm_ == NULL); while (is_writting_mem_) { - writting_mem_cv_.Wait(); + writting_mem_cv_.Wait(); } imm_ = mem_; has_imm_.Release_Store(imm_); @@ -247,14 +247,14 @@ Status DBImpl::Shutdown2() { state_ = kShutdown2; MutexLock l(&mutex_); - if(bg_error_.IsIOPermissionDenied()) { - return bg_error_; + if (bg_error_.IsIOPermissionDenied()) { + return bg_error_; } Status s; if (!options_.dump_mem_on_shutdown) { return s; } - Log(options_.info_log, "[%s] fg compact mem table", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fg compact mem table", dbname_.c_str()); assert(imm_ == NULL); imm_ = mem_; has_imm_.Release_Store(imm_); @@ -267,7 +267,7 @@ DBImpl::~DBImpl() { if (state_ == kOpened) { Status s = Shutdown1(); if (s.ok()) { - Shutdown2(); + Shutdown2(); } } @@ -327,8 +327,8 @@ void DBImpl::MaybeIgnoreError(Status* s) const { if (s->ok() || options_.paranoid_checks) { // No change needed } else { - Log(options_.info_log, "[%s] Ignoring error %s", - dbname_.c_str(), s->ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Ignoring error %s", dbname_.c_str(), + s->ToString().c_str()); *s = Status::OK(); } } @@ -344,7 +344,7 @@ void DBImpl::DeleteObsoleteFiles() { // check filesystem, and then check pending_outputs_ std::vector filenames; mutex_.Unlock(); - env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose + env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose mutex_.Lock(); // Make a set of all of the live files @@ -352,12 +352,13 @@ void DBImpl::DeleteObsoleteFiles() { versions_->AddLiveFiles(&live); // manifest file set, keep latest 3 manifest files for backup - //std::set manifest_set; + // std::set manifest_set; - Log(options_.info_log, "[%s] try DeleteObsoleteFiles, total live file num: %llu," - " pending_outputs %lu, children_nr %lu\n", - dbname_.c_str(), static_cast(live.size()), - pending_outputs_.size(), filenames.size()); + LEVELDB_LOG(options_.info_log, + "[%s] try DeleteObsoleteFiles, total live file num: %llu," + " pending_outputs %lu, children_nr %lu\n", + dbname_.c_str(), static_cast(live.size()), + pending_outputs_.size(), filenames.size()); uint64_t number; FileType type; for (size_t i = 0; i < filenames.size(); i++) { @@ -365,10 +366,9 @@ void DBImpl::DeleteObsoleteFiles() { bool keep = true; switch (type) { case kLogFile: - keep = ((number >= versions_->LogNumber()) || - (number == versions_->PrevLogNumber())); + keep = ((number >= versions_->LogNumber()) || (number == versions_->PrevLogNumber())); break; - //case kDescriptorFile: + // case kDescriptorFile: // manifest_set.insert(filenames[i]); // if (manifest_set.size() > 3) { // std::set::iterator it = manifest_set.begin(); @@ -385,7 +385,7 @@ void DBImpl::DeleteObsoleteFiles() { case kTableFile: keep = (live.find(BuildFullFileNumber(dbname_, number)) != live.end()); break; - //case kTempFile: + // case kTempFile: // // Any temp files that are currently being written to must // // be recorded in pending_outputs_, which is inserted into "live" // keep = (live.find(number) != live.end()); @@ -403,10 +403,19 @@ void DBImpl::DeleteObsoleteFiles() { if (!keep) { if (type == kTableFile) { table_cache_->Evict(dbname_, BuildFullFileNumber(dbname_, number)); + if (options_.persistent_cache) { + auto filename = dbname_ + "/" + filenames[i]; + Slice key{filename}; + key.remove_specified_prefix(options_.dfs_storage_path_prefix); + options_.persistent_cache->ForceEvict(key); + LEVELDB_LOG(options_.info_log, + "[%s] Force evict obsolete file from persistent cache: %s\n", + dbname_.c_str(), filenames[i].c_str()); + } } - Log(options_.info_log, "[%s] Delete type=%s #%lld, fname %s\n", - dbname_.c_str(), FileTypeToString(type), - static_cast(number), filenames[i].c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Delete type=%s #%lld, fname %s\n", dbname_.c_str(), + FileTypeToString(type), static_cast(number), + filenames[i].c_str()); mutex_.Unlock(); env_->DeleteFile(dbname_ + "/" + filenames[i]); mutex_.Lock(); @@ -422,37 +431,32 @@ void DBImpl::DeleteObsoleteFiles() { // 1). Status::Corruption -> CURRENT lost, // 2). Status::IOError -> Maybe request timeout, don't use *exists Status DBImpl::ParentCurrentStatus(uint64_t parent_no, bool* exists) { - assert(exists != NULL); - std::string current = - CurrentFileName(RealDbName(dbname_, parent_no)); - Status s = env_->FileExists(current); - if (s.ok()) { - *exists = true; - return s; - } else if (s.IsNotFound()) { - *exists = false; - if (options_.ignore_corruption_in_open) { - // Drop all data in parent tablet - Log(options_.info_log, "[%s] parent tablet(%ld) CURRENT error(drop all data): %s", - dbname_.c_str(), - static_cast(parent_no), - s.ToString().c_str()); - return Status::OK(); // Data lost, reopen it as a new db - } else { - Log(options_.info_log, "[%s] parent tablet(%ld) CURRENT error: %s", - dbname_.c_str(), - static_cast(parent_no), - s.ToString().c_str()); - return Status::Corruption("CURRENT parent current lost", " parent tablet:" - + std::to_string(static_cast(parent_no)) + ", " + s.ToString()); - } + assert(exists != NULL); + std::string current = CurrentFileName(RealDbName(dbname_, parent_no)); + Status s = env_->FileExists(current); + if (s.ok()) { + *exists = true; + return s; + } else if (s.IsNotFound()) { + *exists = false; + if (options_.ignore_corruption_in_open) { + // Drop all data in parent tablet + LEVELDB_LOG(options_.info_log, "[%s] parent tablet(%ld) CURRENT error(drop all data): %s", + dbname_.c_str(), static_cast(parent_no), s.ToString().c_str()); + return Status::OK(); // Data lost, reopen it as a new db } else { - // Maybe request timeout, should retry open - Log(options_.info_log, "[%s] parent tablet(%ld) CURRENT timeout", - dbname_.c_str(), - static_cast(parent_no)); - return Status::IOError("parent CURRENT timeout"); + LEVELDB_LOG(options_.info_log, "[%s] parent tablet(%ld) CURRENT error: %s", dbname_.c_str(), + static_cast(parent_no), s.ToString().c_str()); + return Status::Corruption( + "CURRENT parent current lost", + " parent tablet:" + std::to_string(static_cast(parent_no)) + ", " + s.ToString()); } + } else { + // Maybe request timeout, should retry open + LEVELDB_LOG(options_.info_log, "[%s] parent tablet(%ld) CURRENT timeout", dbname_.c_str(), + static_cast(parent_no)); + return Status::IOError("parent CURRENT timeout"); + } } // Returns: @@ -475,8 +479,7 @@ Status DBImpl::DbExists(bool* exists) { FileType type; bool valid = ParseFileName(files[i], &number, &type); if (!valid) { - Log(options_.info_log, "[%s] invalid filename %s", - dbname_.c_str(), files[i].c_str()); + LEVELDB_LOG(options_.info_log, "[%s] invalid filename %s", dbname_.c_str(), files[i].c_str()); continue; } if (type == kCurrentFile) { @@ -497,8 +500,8 @@ Status DBImpl::DbExists(bool* exists) { if (manifest_exists) { // CURRENT file lost, but MANIFEST exist, maybe still open it if (options_.ignore_corruption_in_open) { - Log(options_.info_log, "[%s] CURRENT file lost, but MANIFEST exists", - dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] CURRENT file lost, but MANIFEST exists", + dbname_.c_str()); options_.parent_tablets.resize(0); *exists = true; return Status::OK(); @@ -549,19 +552,17 @@ Status DBImpl::DbExists(bool* exists) { *exists = true; options_.parent_tablets.resize(0); options_.parent_tablets.push_back(parent0); - Log(options_.info_log, "[%s] ignore parent(%ld) lost", - dbname_.c_str(), parent1); + LEVELDB_LOG(options_.info_log, "[%s] ignore parent(%ld) lost", dbname_.c_str(), parent1); } else if (parent1_exists) { *exists = true; options_.parent_tablets.resize(0); options_.parent_tablets.push_back(parent1); - Log(options_.info_log, "[%s] ignore parent(%ld) lost", - dbname_.c_str(), parent0); + LEVELDB_LOG(options_.info_log, "[%s] ignore parent(%ld) lost", dbname_.c_str(), parent0); } else { // Parents data lost, open this db as an empty db *exists = false; - Log(options_.info_log, "[%s] ignore all parents(%ld, %ld) lost", - dbname_.c_str(), parent0, parent1); + LEVELDB_LOG(options_.info_log, "[%s] ignore all parents(%ld, %ld) lost", dbname_.c_str(), + parent0, parent1); } return s; } else { @@ -573,32 +574,32 @@ Status DBImpl::Recover(VersionEdit* edit) { mutex_.AssertHeld(); { - Status s = env_->FileExists(dbname_); - if (s.IsNotFound()) { - s = env_->CreateDir(dbname_); - if (!s.ok()) { - Log(options_.info_log, "[%s] fail to create db: %s", - dbname_.c_str(), s.ToString().c_str()); + Status s = env_->FileExists(dbname_); + if (s.IsNotFound()) { + s = env_->CreateDir(dbname_); + if (!s.ok()) { + LEVELDB_LOG(options_.info_log, "[%s] fail to create db: %s", dbname_.c_str(), + s.ToString().c_str()); + return s; + } + need_newdb_txn_ = true; + } else if (s.ok()) { + // lg directory exists and not ignore curruption in open + if (!options_.ignore_corruption_in_open) { + s = env_->FileExists(dbname_ + init_load_filelock); + if (s.ok()) { + need_newdb_txn_ = true; + } else if (!s.IsNotFound()) { + // Unknown status return s; } - need_newdb_txn_ = true; - } else if (s.ok()) { - // lg directory exists and not ignore curruption in open - if (!options_.ignore_corruption_in_open) { - s = env_->FileExists(dbname_ + init_load_filelock); - if (s.ok()) { - need_newdb_txn_ = true; - } else if (!s.IsNotFound()) { - // Unknown status - return s; - } - } - } else { - // Unknown status - return s; } + } else { + // Unknown status + return s; + } } - + if (options_.use_file_lock) { Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!s.ok()) { @@ -612,8 +613,8 @@ Status DBImpl::Recover(VersionEdit* edit) { s = env_->DeleteFile(dbname_ + init_load_filelock); if (!s.ok()) { // legacy initlock-file is dangerous - Log(options_.info_log, "[%s] delete initlock-file failed for %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] delete initlock-file failed for %s", dbname_.c_str(), + s.ToString().c_str()); return Status::IOError("delete initlock-file failed"); } } @@ -622,8 +623,8 @@ Status DBImpl::Recover(VersionEdit* edit) { s = env_->DeleteFile(dbname_ + mark_file_name); if (!s.ok()) { // legacy mark-file is dangerous - Log(options_.info_log, "[%s] delete mark-file failed for %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] delete mark-file failed for %s", dbname_.c_str(), + s.ToString().c_str()); return Status::IOError("delete mark-file failed"); } } @@ -648,11 +649,11 @@ Status DBImpl::Recover(VersionEdit* edit) { } } - Log(options_.info_log, "[%s] start VersionSet::Recover, last_seq= %llu", - dbname_.c_str(), static_cast(versions_->LastSequence())); + LEVELDB_LOG(options_.info_log, "[%s] start VersionSet::Recover, last_seq= %llu", dbname_.c_str(), + static_cast(versions_->LastSequence())); s = versions_->Recover(); - Log(options_.info_log, "[%s] end VersionSet::Recover last_seq= %llu", - dbname_.c_str(), static_cast(versions_->LastSequence())); + LEVELDB_LOG(options_.info_log, "[%s] end VersionSet::Recover last_seq= %llu", dbname_.c_str(), + static_cast(versions_->LastSequence())); // check loss of sst files (fs exception) if (s.ok()) { @@ -671,27 +672,27 @@ Status DBImpl::Recover(VersionEdit* edit) { std::set::iterator it_tablet = tablets.begin(); for (; it_tablet != tablets.end(); ++it_tablet) { std::string path = RealDbName(dbname_, *it_tablet); - Log(options_.info_log, "[%s] GetChildren(%s)", dbname_.c_str(), path.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] GetChildren(%s)", dbname_.c_str(), path.c_str()); std::vector filenames; s = env_->GetChildren(path, &filenames); if (s.ok()) { // Do nothing } else if (s.IsTimeOut()) { // Should retry open - Log(options_.info_log, "[%s] GetChildren(%s) timeout: %s", - dbname_.c_str(), path.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] GetChildren(%s) timeout: %s", dbname_.c_str(), + path.c_str(), s.ToString().c_str()); return Status::TimeOut("GetChildren timeout"); } else { // Cannot read the directory if (options_.ignore_corruption_in_open) { - Log(options_.info_log, "[%s] GetChildren(%s) fail: %s, still open!", - dbname_.c_str(), path.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] GetChildren(%s) fail: %s, still open!", + dbname_.c_str(), path.c_str(), s.ToString().c_str()); // Reset the status s = Status::OK(); continue; } else { - Log(options_.info_log, "[%s] GetChildren(%s) fail: %s", - dbname_.c_str(), path.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] GetChildren(%s) fail: %s", dbname_.c_str(), + path.c_str(), s.ToString().c_str()); return Status::IOError("GetChildren fail"); } } @@ -712,8 +713,8 @@ Status DBImpl::Recover(VersionEdit* edit) { edit->DeleteFile(it->second, it->first); } } - Log(options_.info_log, "[%s] file system lost files: %s", dbname_.c_str(), - lost_files_str.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] file system lost files: %s", dbname_.c_str(), + lost_files_str.c_str()); if (!options_.ignore_corruption_in_open) { return Status::Corruption("sst lost", lost_files_str); @@ -726,8 +727,7 @@ Status DBImpl::Recover(VersionEdit* edit) { return s; } -Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, - Version* base, uint64_t* number) { +Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base, uint64_t* number) { mutex_.AssertHeld(); const uint64_t start_micros = env_->NowMicros(); FileMetaData meta; @@ -737,8 +737,8 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, } pending_outputs_.insert(meta.number); Iterator* iter = mem->NewIterator(); - Log(options_.info_log, "[%s] Level-0 table #%u: started", - dbname_.c_str(), (unsigned int) meta.number); + LEVELDB_LOG(options_.info_log, "[%s] Level-0 table #%u: started", dbname_.c_str(), + (unsigned int)meta.number); uint64_t saved_size = 0; Status s; @@ -748,8 +748,8 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, smallest_snapshot = *(snapshots_.begin()); } mutex_.Unlock(); - s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, - &saved_size, smallest_snapshot); + s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, &saved_size, + smallest_snapshot); mutex_.Lock(); } delete iter; @@ -767,13 +767,10 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, edit->AddFile(level, meta); } VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, "[%s] Level-0 table #%u: dump-level %d, %lld (+ %lld ) bytes %s, %s", - dbname_.c_str(), (unsigned int) meta.number, - level, - (unsigned long long) meta.file_size, - (unsigned long long) saved_size, - s.ToString().c_str(), - versions_->LevelSummary(&tmp)); + LEVELDB_LOG(options_.info_log, + "[%s] Level-0 table #%u: dump-level %d, %lld (+ %lld ) bytes %s, %s", dbname_.c_str(), + (unsigned int)meta.number, level, (unsigned long long)meta.file_size, + (unsigned long long)saved_size, s.ToString().c_str(), versions_->LevelSummary(&tmp)); CompactionStats stats; stats.micros = env_->NowMicros() - start_micros; @@ -788,18 +785,16 @@ Status DBImpl::CompactMemTable(bool* sched_idle) { assert(imm_ != NULL); Status s; if (sched_idle) { - *sched_idle = true; + *sched_idle = true; } if (imm_->BeingFlushed()) { - //Log(options_.info_log, "[%s] CompactMemTable conflict, seq %lu", - // dbname_.c_str(), GetLastSequence(false)); return s; } imm_->SetBeingFlushed(true); - if (imm_->ApproximateMemoryUsage() <= 0) { // imm is empty, do nothing - Log(options_.info_log, "[%s] CompactMemTable empty memtable %lu", - dbname_.c_str(), GetLastSequence(false)); + if (imm_->ApproximateMemoryUsage() <= 0) { // imm is empty, do nothing + LEVELDB_LOG(options_.info_log, "[%s] CompactMemTable empty memtable %lu", dbname_.c_str(), + GetLastSequence(false)); imm_->Unref(); imm_ = NULL; has_imm_.Release_Store(NULL); @@ -823,14 +818,16 @@ Status DBImpl::CompactMemTable(bool* sched_idle) { // Replace immutable memtable with the generated Table if (s.ok()) { - pending_outputs_.insert(number); // LogAndApply donot holds lock, so use pending_outputs_ to make sure new file will not be deleted + pending_outputs_.insert(number); // LogAndApply donot holds lock, so use + // pending_outputs_ to make sure new file + // will not be deleted edit.SetPrevLogNumber(0); edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed if (imm_->GetLastSequence()) { edit.SetLastSequence(imm_->GetLastSequence()); } - Log(options_.info_log, "[%s] CompactMemTable SetLastSequence %lu", - dbname_.c_str(), edit.GetLastSequence()); + LEVELDB_LOG(options_.info_log, "[%s] CompactMemTable SetLastSequence %lu", dbname_.c_str(), + edit.GetLastSequence()); s = versions_->LogAndApply(&edit, &mutex_); pending_outputs_.erase(number); } @@ -859,13 +856,13 @@ void DBImpl::CompactRange(const Slice* begin, const Slice* end, int lg_no) { } } } - TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap + TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap for (int level = 0; level < max_level_with_files; level++) { TEST_CompactRange(level, begin, end); } } -void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { +void DBImpl::TEST_CompactRange(int level, const Slice* begin, const Slice* end) { assert(level >= 0); assert(level + 1 < config::kNumLevels); @@ -891,13 +888,13 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { MutexLock l(&mutex_); while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) { - if (manual_compaction_ == NULL) { // Idle + if (manual_compaction_ == NULL) { // Idle manual_compaction_ = &manual; MaybeScheduleCompaction(); } else if (manual_compaction_->compaction_conflict == kManualCompactConflict) { manual_compaction_->compaction_conflict = kManualCompactIdle; MaybeScheduleCompaction(); - } else { // Running either my compaction or another compaction. + } else { // Running either my compaction or another compaction. bg_cv_.Wait(); } } @@ -909,16 +906,16 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { Status DBImpl::TEST_CompactMemTable() { // NULL batch means just wait for earlier writes to be done - Log(options_.info_log, "[%s] CompactMemTable start", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] CompactMemTable start", dbname_.c_str()); Status s = Write(WriteOptions(), NULL); - Log(options_.info_log, "[%s] CompactMemTable Write done", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] CompactMemTable Write done", dbname_.c_str()); if (s.ok()) { // Wait until the compaction completes MutexLock l(&mutex_); while (imm_ != NULL && bg_error_.ok()) { bg_cv_.Wait(); } - Log(options_.info_log, "[%s] CompactMemTable done", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] CompactMemTable done", dbname_.c_str()); if (imm_ != NULL) { s = bg_error_; } @@ -929,19 +926,18 @@ Status DBImpl::TEST_CompactMemTable() { // tera-specific bool DBImpl::FindSplitKey(double ratio, std::string* split_key) { - MutexLock l(&mutex_); - return versions_->current()->FindSplitKey(ratio, split_key); + MutexLock l(&mutex_); + return versions_->current()->FindSplitKey(ratio, split_key); } -bool DBImpl::FindKeyRange(std::string* smallest_key, - std::string* largest_key) { - MutexLock l(&mutex_); - return versions_->current()->FindKeyRange(smallest_key, largest_key); +bool DBImpl::FindKeyRange(std::string* smallest_key, std::string* largest_key) { + MutexLock l(&mutex_); + return versions_->current()->FindKeyRange(smallest_key, largest_key); } bool DBImpl::MinorCompact() { - Status s = TEST_CompactMemTable(); - return s.ok(); + Status s = TEST_CompactMemTable(); + return s.ok(); } void DBImpl::AddInheritedLiveFiles(std::vector >* live) { @@ -964,8 +960,6 @@ void DBImpl::AddInheritedLiveFiles(std::vector >* live) { (*live)[lg].insert(*it); } } - //Log(options_.info_log, "[%s] finish collect inherited sst fils, %d totals", - // dbname_.c_str(), (*live)[lg].size()); } Status DBImpl::RecoverInsertMem(WriteBatch* batch, VersionEdit* edit) { @@ -980,9 +974,9 @@ Status DBImpl::RecoverInsertMem(WriteBatch* batch, VersionEdit* edit) { // if duplicate record, ignore if (log_sequence <= recover_mem_->GetLastSequence()) { - assert (last_sequence <= recover_mem_->GetLastSequence()); - Log(options_.info_log, "[%s] duplicate record, ignore %lu ~ %lu", - dbname_.c_str(), log_sequence, last_sequence); + assert(last_sequence <= recover_mem_->GetLastSequence()); + LEVELDB_LOG(options_.info_log, "[%s] duplicate record, ignore %lu ~ %lu", dbname_.c_str(), + log_sequence, last_sequence); return Status::OK(); } @@ -1025,11 +1019,10 @@ Status DBImpl::RecoverLastDumpToLevel0(VersionEdit* edit) { DeleteObsoleteFiles(); MaybeScheduleCompaction(); } else { - Log(options_.info_log, "[%s] Fail to modify manifest", - dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Fail to modify manifest", dbname_.c_str()); } } else { - Log(options_.info_log, "[%s] Fail to dump log to level 0", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Fail to dump log to level 0", dbname_.c_str()); } return s; } @@ -1047,47 +1040,57 @@ void DBImpl::MaybeScheduleCompaction() { if (shutting_down_.Acquire_Load()) { // DB is being deleted; no more background compactions } else if (bg_error_.IsIOPermissionDenied()) { - // We have met an PermissionDenied error, not try to do compaction anymore, the tablet will be unloaded soon + // We have met an PermissionDenied error, not try to do compaction anymore, + // the tablet will be unloaded soon } else { std::vector > scores; if (imm_ && !imm_->BeingFlushed()) { - scores.push_back(std::pair(kDumpMemTableScore, 0)); + scores.emplace_back(kDumpMemTableScore, 0); } if (manual_compaction_ && !manual_compaction_->being_sched && (manual_compaction_->compaction_conflict != kManualCompactConflict)) { - scores.push_back(std::pair(kManualCompactScore, 0)); + scores.emplace_back(kManualCompactScore, 0); } - versions_->CompactionScore(&scores); + versions_->GetCompactionScores(&scores); - size_t qlen = scores.size() > bg_compaction_tasks_.size() ? scores.size(): bg_compaction_tasks_.size(); + size_t qlen = std::max(scores.size(), bg_compaction_tasks_.size()); for (size_t i = 0; i < bg_compaction_tasks_.size(); i++) { CompactionTask* task = bg_compaction_tasks_[i]; - scores.push_back(std::pair(task->score, task->timeout)); + scores.emplace_back(task->score, task->timeout); } std::sort(scores.begin(), scores.end(), ScoreSortGreater); for (size_t i = 0; i < qlen; i++) { if (bg_compaction_tasks_.size() < options_.max_background_compactions) { - if (i < bg_compaction_tasks_.size()) { // try reschedule + if (i < bg_compaction_tasks_.size()) { // try reschedule CompactionTask* task = bg_compaction_tasks_[i]; - if (ScoreSortGreater(scores[i], std::pair(task->score, task->timeout))) { // resched + if (ScoreSortGreater( + scores[i], std::pair(task->score, task->timeout))) { // resched task->score = scores[i].first; task->timeout = scores[i].second; env_->ReSchedule(task->id, task->score, task->timeout); - Log(options_.info_log, "[%s] ReSchedule Compact[%ld] score= %.2f, timeout=%lu, currency %d", - dbname_.c_str(), task->id, task->score, task->timeout, (int)bg_compaction_tasks_.size()); - assert(scores[i].first <= 1 || scores[i].second == 0); // if score > 1, then timeout MUST be 0 + LEVELDB_LOG(options_.info_log, + "[%s] ReSchedule Compact[%ld] score= %.2f, " + "timeout=%lu, currency %d", + dbname_.c_str(), task->id, task->score, task->timeout, + (int)bg_compaction_tasks_.size()); + assert(scores[i].first <= 1 || + scores[i].second == 0); // if score > 1, then timeout MUST be 0 } - } else { // new compact task + } else { // new compact task CompactionTask* task = new CompactionTask; task->db = this; task->score = scores[i].first; task->timeout = scores[i].second; bg_compaction_tasks_.push_back(task); task->id = env_->Schedule(&DBImpl::BGWork, task, task->score, task->timeout); - Log(options_.info_log, "[%s] Schedule Compact[%ld] score= %.2f, timeout=%lu, currency %d", - dbname_.c_str(), task->id, task->score, task->timeout, (int)bg_compaction_tasks_.size()); - assert(scores[i].first <= 1 || scores[i].second == 0); // if score > 1, then timeout MUST be 0 + LEVELDB_LOG(options_.info_log, + "[%s] Schedule Compact[%ld] score= %.2f, timeout=%lu, " + "currency %d", + dbname_.c_str(), task->id, task->score, task->timeout, + (int)bg_compaction_tasks_.size()); + assert(scores[i].first <= 1 || + scores[i].second == 0); // if score > 1, then timeout MUST be 0 } } } @@ -1102,8 +1105,8 @@ void DBImpl::BGWork(void* task) { void DBImpl::BackgroundCall(CompactionTask* task) { MutexLock l(&mutex_); - Log(options_.info_log, "[%s] BackgroundCompact[%ld] score= %.2f currency %d", - dbname_.c_str(), task->id, task->score, (int)bg_compaction_tasks_.size()); + LEVELDB_LOG(options_.info_log, "[%s] BackgroundCompact[%ld] score= %.2f currency %d", + dbname_.c_str(), task->id, task->score, (int)bg_compaction_tasks_.size()); bool sched_idle = false; if (!shutting_down_.Acquire_Load()) { Status s = BackgroundCompaction(&sched_idle); @@ -1118,12 +1121,13 @@ void DBImpl::BackgroundCall(CompactionTask* task) { // chew up resources for failed compactions for the duration of // the problem. bg_cv_.SignalAll(); // In case a waiter can proceed despite the error - Log(options_.info_log, "[%s] Waiting after background compaction error: %s, retry: %d", - dbname_.c_str(), s.ToString().c_str(), consecutive_compaction_errors_); + LEVELDB_LOG(options_.info_log, + "[%s] Waiting after background compaction error: %s, retry: %d", dbname_.c_str(), + s.ToString().c_str(), consecutive_compaction_errors_); ++consecutive_compaction_errors_; if (s.IsIOPermissionDenied() || consecutive_compaction_errors_ > 100000) { - bg_error_ = s; - consecutive_compaction_errors_ = 0; + bg_error_ = s; + consecutive_compaction_errors_ = 0; } mutex_.Unlock(); int seconds_to_sleep = 1; @@ -1137,9 +1141,8 @@ void DBImpl::BackgroundCall(CompactionTask* task) { sched_idle = true; } - std::vector::iterator task_id = std::find(bg_compaction_tasks_.begin(), - bg_compaction_tasks_.end(), - task); + std::vector::iterator task_id = + std::find(bg_compaction_tasks_.begin(), bg_compaction_tasks_.end(), task); assert(task_id != bg_compaction_tasks_.end()); bg_compaction_tasks_.erase(task_id); delete task; @@ -1166,7 +1169,8 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { InternalKey manual_end; if (is_manual) { ManualCompaction* m = manual_compaction_; - if (m->being_sched) { // other thread doing manual compaction or range being compacted + if (m->being_sched) { // other thread doing manual compaction or range + // being compacted return status; } m->being_sched = true; @@ -1177,12 +1181,13 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { if (c != NULL) { manual_end = c->input(0, c->num_input_files(0) - 1)->largest; } - Log(options_.info_log, - "[%s] Manual compaction, conflit %u, at level-%d from %s .. %s; will stop at %s\n", - dbname_.c_str(), conflict, m->level, - (m->begin ? m->begin->DebugString().c_str() : "(begin)"), - (m->end ? m->end->DebugString().c_str() : "(end)"), - (m->done ? "(end)" : manual_end.DebugString().c_str())); + LEVELDB_LOG(options_.info_log, + "[%s] Manual compaction, conflit %u, at level-%d from %s .. " + "%s; will stop at %s\n", + dbname_.c_str(), conflict, m->level, + (m->begin ? m->begin->DebugString().c_str() : "(begin)"), + (m->end ? m->end->DebugString().c_str() : "(end)"), + (m->done ? "(end)" : manual_end.DebugString().c_str())); } else { c = versions_->PickCompaction(); } @@ -1198,14 +1203,12 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { c->edit()->AddFile(c->output_level(), *f); status = versions_->LogAndApply(c->edit(), &mutex_); VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, "[%s] Moved #%08u, #%u to level-%d %lld bytes %s: %s\n", - dbname_.c_str(), - static_cast(f->number >> 32 & 0x7fffffff), //tablet number - static_cast(f->number & 0xffffffff), //sst number - c->output_level(), - static_cast(f->file_size), - status.ToString().c_str(), - versions_->LevelSummary(&tmp)); + LEVELDB_LOG(options_.info_log, "[%s] Moved #%08u, #%u to level-%d %lld bytes %s: %s\n", + dbname_.c_str(), + static_cast(f->number >> 32 & 0x7fffffff), // tablet number + static_cast(f->number & 0xffffffff), // sst number + c->output_level(), static_cast(f->file_size), + status.ToString().c_str(), versions_->LevelSummary(&tmp)); versions_->ReleaseCompaction(c, status); } else { status = ParallelCompaction(c); @@ -1217,9 +1220,8 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { } else if (shutting_down_.Acquire_Load()) { // Ignore compaction errors found during shutting down } else { - Log(options_.info_log, - "[%s] Compaction error: %s", - dbname_.c_str(), status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Compaction error: %s", dbname_.c_str(), + status.ToString().c_str()); if (bg_error_.ok()) { stink_bg_error_ = status; } @@ -1231,7 +1233,7 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { if (is_manual && manual_compaction_ != NULL) { ManualCompaction* m = manual_compaction_; m->being_sched = false; - if (m->compaction_conflict != kManualCompactConflict) { // PickRange success + if (m->compaction_conflict != kManualCompactConflict) { // PickRange success if (!status.ok()) { m->done = true; } @@ -1243,7 +1245,7 @@ Status DBImpl::BackgroundCompaction(bool* sched_idle) { } manual_compaction_ = NULL; } - } + } return status; } @@ -1253,22 +1255,19 @@ Status DBImpl::ParallelCompaction(Compaction* c) { std::vector compaction_state_vec; std::vector compact_stragety_vec; assert(versions_->NumLevelFiles(c->level()) > 0); - SequenceNumber smallest_snapshot = snapshots_.empty() ? kMaxSequenceNumber : *(snapshots_.begin()); + SequenceNumber smallest_snapshot = + snapshots_.empty() ? kMaxSequenceNumber : *(snapshots_.begin()); versions_->GenerateSubCompaction(c, &compaction_vec, &mutex_); mutex_.Unlock(); // handle compaction without Lock std::vector thread_pool; thread_pool.reserve(compaction_vec.size() - 1); - Log(options_.info_log, "[%s] parallel compacting %d@%d + %d@%d files, " - "sub_compact %lu, snapshot %lu\n", - dbname_.c_str(), - c->num_input_files(0), - c->level(), - c->num_input_files(1), - c->output_level(), - compaction_vec.size(), - smallest_snapshot); + LEVELDB_LOG(options_.info_log, + "[%s] parallel compacting %d@%d + %d@%d files, " + "sub_compact %lu, snapshot %lu\n", + dbname_.c_str(), c->num_input_files(0), c->level(), c->num_input_files(1), + c->output_level(), compaction_vec.size(), smallest_snapshot); for (size_t i = 0; i < compaction_vec.size(); i++) { CompactionState* compaction = new CompactionState(compaction_vec[i]); assert(compaction->builder == NULL); @@ -1279,15 +1278,12 @@ Status DBImpl::ParallelCompaction(Compaction* c) { CompactStrategy* compact_strategy = NewCompactStrategy(compaction); compact_stragety_vec.push_back(compact_strategy); if (i == 0) { - Log(options_.info_log, "[%s] compact strategy: %s, snapshot %lu\n", - dbname_.c_str(), - compact_strategy->Name(), - compaction->smallest_snapshot); + LEVELDB_LOG(options_.info_log, "[%s] compact strategy: %s, snapshot %lu\n", dbname_.c_str(), + compact_strategy->Name(), compaction->smallest_snapshot); } if (i < compaction_vec.size() - 1) { - thread_pool.emplace_back(&DBImpl::HandleCompactionWork, this, - compaction, compact_strategy); + thread_pool.emplace_back(&DBImpl::HandleCompactionWork, this, compaction, compact_strategy); } else { HandleCompactionWork(compaction, compact_strategy); } @@ -1301,7 +1297,7 @@ Status DBImpl::ParallelCompaction(Compaction* c) { compact->smallest_snapshot = smallest_snapshot; for (size_t i = 0; i < compaction_vec.size(); i++) { CompactionState* compaction = compaction_state_vec[i]; - for (auto & out : compaction->outputs) { + for (auto& out : compaction->outputs) { compact->outputs.push_back(out); stats.bytes_written += out.file_size; } @@ -1325,21 +1321,22 @@ Status DBImpl::ParallelCompaction(Compaction* c) { status = InstallCompactionResults(compact); } VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, "[%s] compacted to: %s, compacte stat %s", - dbname_.c_str(), versions_->LevelSummary(&tmp), status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] compacted to: %s, compacte stat %s", dbname_.c_str(), + versions_->LevelSummary(&tmp), status.ToString().c_str()); stats.micros = env_->NowMicros() - start_micros; stats_[compact->compaction->output_level()].Add(stats); for (size_t i = 0; i < compaction_vec.size(); i++) { CompactionState* compaction = compaction_state_vec[i]; - CleanupCompaction(compaction); // pop pedning output, which can be deleted in DeleteObSoleteFiles() + CleanupCompaction(compaction); // pop pedning output, which can be deleted + // in DeleteObSoleteFiles() delete compaction_vec[i]; } assert(compact->builder == NULL); assert(compact->outfile == NULL); CleanupCompaction(compact); - versions_->ReleaseCompaction(c, status); // current_version has reference to c->inputs_[0,1] + versions_->ReleaseCompaction(c, status); // current_version has reference to c->inputs_[0,1] c->ReleaseInputs(); if (!status.IsIOPermissionDenied()) { DeleteObsoleteFiles(); @@ -1360,7 +1357,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) { for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; if (pending_outputs_.erase(BuildFullFileNumber(dbname_, out.number)) > 0) { - Log(options_.info_log, "[%s] erase pending_output #%lu", dbname_.c_str(), out.number); + LEVELDB_LOG(options_.info_log, "[%s] erase pending_output #%lu", dbname_.c_str(), out.number); } } delete compact; @@ -1380,7 +1377,7 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { out.largest.Clear(); compact->outputs.push_back(out); - Log(options_.info_log, "[%s] insert pending_output #%lu", dbname_.c_str(), file_number); + LEVELDB_LOG(options_.info_log, "[%s] insert pending_output #%lu", dbname_.c_str(), file_number); mutex_.Unlock(); } @@ -1393,8 +1390,7 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { return s; } -Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, - Iterator* input) { +Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, Iterator* input) { assert(compact != NULL); assert(compact->outfile != NULL); assert(compact->builder != NULL); @@ -1405,7 +1401,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, // Check for iterator errors Status s; if (!options_.ignore_corruption_in_compaction) { - s = input->status(); + s = input->status(); } const uint64_t current_entries = compact->builder->NumEntries(); compact->current_output()->entries = current_entries; @@ -1430,23 +1426,19 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, if (s.ok() && current_entries > 0) { // Verify that the table is usable - Iterator* iter = table_cache_->NewIterator(ReadOptions(&options_), dbname_, - BuildFullFileNumber(dbname_, output_number), - current_bytes); + Iterator* iter = + table_cache_->NewIterator(ReadOptions(&options_), dbname_, + BuildFullFileNumber(dbname_, output_number), current_bytes); s = iter->status(); delete iter; if (s.ok()) { - Log(options_.info_log, - "[%s] Generated table #%llu: %lld keys, %lld (+ %lld ) bytes", - dbname_.c_str(), - (unsigned long long) output_number, - (unsigned long long) current_entries, - (unsigned long long) current_bytes, - (unsigned long long) saved_bytes); + LEVELDB_LOG(options_.info_log, "[%s] Generated table #%llu: %lld keys, %lld (+ %lld ) bytes", + dbname_.c_str(), (unsigned long long)output_number, + (unsigned long long)current_entries, (unsigned long long)current_bytes, + (unsigned long long)saved_bytes); } else { - Log(options_.info_log, - "[%s] Verify new sst file fail #%llu", - dbname_.c_str(), (unsigned long long) output_number); + LEVELDB_LOG(options_.info_log, "[%s] Verify new sst file fail #%llu", dbname_.c_str(), + (unsigned long long)output_number); } } return s; @@ -1454,41 +1446,37 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, Status DBImpl::InstallCompactionResults(CompactionState* compact) { mutex_.AssertHeld(); - Log(options_.info_log, "[%s] Compacted %d@%d + %d@%d files => %lld bytes", - dbname_.c_str(), - compact->compaction->num_input_files(0), - compact->compaction->level(), - compact->compaction->num_input_files(1), - compact->compaction->output_level(), - static_cast(compact->total_bytes)); + LEVELDB_LOG(options_.info_log, "[%s] Compacted %d@%d + %d@%d files => %lld bytes", + dbname_.c_str(), compact->compaction->num_input_files(0), + compact->compaction->level(), compact->compaction->num_input_files(1), + compact->compaction->output_level(), static_cast(compact->total_bytes)); // Add compaction outputs, skip file without entries compact->compaction->AddInputDeletions(compact->compaction->edit()); for (size_t i = 0; i < compact->outputs.size(); i++) { CompactionState::Output& out = compact->outputs[i]; if (out.entries <= 0) { - continue; + continue; } std::sort(out.ttls.begin(), out.ttls.end()); - uint32_t idx = out.ttls.size() * options_.ttl_percentage / 100 ; + uint32_t idx = out.ttls.size() * options_.ttl_percentage / 100; compact->compaction->edit()->AddFile( - compact->compaction->output_level(), BuildFullFileNumber(dbname_, out.number), - out.file_size, out.smallest, out.largest, - out.del_num * 100 / out.entries /* delete tag percentage */, - ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? out.ttls[idx] : 0 /* sst's check ttl's time */, - ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? idx * 100 / out.ttls.size() : 0 /* delete tag percentage */); - Log(options_.info_log, "[%s] AddFile, level %d, number #%lu, entries %ld, del_nr %lu" - ", ttl_nr %lu, del_p %lu, ttl_check_ts %lu, ttl_p %lu\n", - dbname_.c_str(), - compact->compaction->output_level(), - out.number, - out.entries, - out.del_num, - out.ttls.size(), - out.del_num * 100 / out.entries, - ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? out.ttls[idx] : 0, - ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? idx * 100 / out.ttls.size() : 0); + compact->compaction->output_level(), BuildFullFileNumber(dbname_, out.number), + out.file_size, out.smallest, out.largest, + out.del_num * 100 / out.entries /* delete tag percentage */, + ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? out.ttls[idx] + : 0 /* sst's check ttl's time */, + ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? idx * 100 / out.ttls.size() + : 0 /* delete tag percentage */); + LEVELDB_LOG( + options_.info_log, + "[%s] AddFile, level %d, number #%lu, entries %ld, del_nr %lu" + ", ttl_nr %lu, del_p %lu, ttl_check_ts %lu, ttl_p %lu\n", + dbname_.c_str(), compact->compaction->output_level(), out.number, out.entries, out.del_num, + out.ttls.size(), out.del_num * 100 / out.entries, + ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? out.ttls[idx] : 0, + ((out.ttls.size() > 0) && (idx < out.ttls.size())) ? idx * 100 / out.ttls.size() : 0); } return versions_->LogAndApply(compact->compaction->edit(), &mutex_); } @@ -1503,8 +1491,7 @@ CompactStrategy* DBImpl::NewCompactStrategy(CompactionState* compact) { } // ** Handle sub compaction without LOCK ** -void DBImpl::HandleCompactionWork(CompactionState* compact, - CompactStrategy* compact_strategy) { +void DBImpl::HandleCompactionWork(CompactionState* compact, CompactStrategy* compact_strategy) { Compaction* c = compact->compaction; Status& status = compact->status; Iterator* input = versions_->MakeInputIterator(c); @@ -1514,44 +1501,37 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, input->Seek(c->sub_compact_start_); } Slice end_key(c->sub_compact_end_); - Log(options_.info_log, "[%s] handle %d@%d + %d@%d compact, range [%s, %s)\n", - dbname_.c_str(), - c->num_input_files(0), - c->level(), - c->num_input_files(1), - c->output_level(), - c->sub_compact_start_.c_str(), - c->sub_compact_end_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] handle %d@%d + %d@%d compact, range [%s, %s)\n", + dbname_.c_str(), c->num_input_files(0), c->level(), c->num_input_files(1), + c->output_level(), c->sub_compact_start_.c_str(), c->sub_compact_end_.c_str()); ParsedInternalKey ikey; std::string current_user_key; bool has_current_user_key = false; SequenceNumber last_sequence_for_key = kMaxSequenceNumber; - for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { + for (; input->Valid() && !shutting_down_.Acquire_Load();) { // Prioritize immutable compaction work if (has_imm_.NoBarrier_Load() != NULL) { mutex_.Lock(); if (imm_ && !imm_->BeingFlushed()) { - CompactMemTable(); // no need check failure, because imm_ not null if dump fail. + CompactMemTable(); // no need check failure, because imm_ not null if + // dump fail. bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary } mutex_.Unlock(); } Slice key = input->key(); - if (end_key.size() > 0 && internal_comparator_.InternalKeyComparator::Compare(input->key(), end_key) >= 0) { - Log(options_.info_log, "[%s] handle %d@%d + %d@%d compact, stop at %s\n", - dbname_.c_str(), - c->num_input_files(0), - c->level(), - c->num_input_files(1), - c->output_level(), - end_key.data()); - break; // reach end_key, stop this sub compaction + if (end_key.size() > 0 && + internal_comparator_.InternalKeyComparator::Compare(input->key(), end_key) >= 0) { + LEVELDB_LOG(options_.info_log, "[%s] handle %d@%d + %d@%d compact, stop at %s\n", + dbname_.c_str(), c->num_input_files(0), c->level(), c->num_input_files(1), + c->output_level(), end_key.data()); + break; // reach end_key, stop this sub compaction } if (compact->compaction->ShouldStopBefore(key) && - compact->builder != NULL) { // should not overlap level() + 2 too much + compact->builder != NULL) { // should not overlap level() + 2 too much status = FinishCompactionOutputFile(compact, input); if (!status.ok()) { break; @@ -1567,8 +1547,7 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, last_sequence_for_key = kMaxSequenceNumber; } else { if (!has_current_user_key || - user_comparator()->Compare(ikey.user_key, - Slice(current_user_key)) != 0) { + user_comparator()->Compare(ikey.user_key, Slice(current_user_key)) != 0) { // First occurrence of this user key current_user_key.assign(ikey.user_key.data(), ikey.user_key.size()); has_current_user_key = true; @@ -1580,9 +1559,8 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, } else if (last_sequence_for_key <= compact->smallest_snapshot && last_sequence_for_key != kMaxSequenceNumber) { // Hidden by an newer entry for same user key - drop = true; // (A) - } else if (ikey.type == kTypeDeletion && - ikey.sequence <= compact->smallest_snapshot && + drop = true; // (A) + } else if (ikey.type == kTypeDeletion && ikey.sequence <= compact->smallest_snapshot && options_.drop_base_level_del_in_compaction && compact->compaction->IsBaseLevelForKey(ikey.user_key)) { // For this user key: @@ -1604,7 +1582,7 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, last_sequence_for_key = ikey.sequence; } #if 0 - Log(options_.info_log, + LEVELDB_LOG(options_.info_log, " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, " "%d smallest_snapshot: %d", ikey.user_key.ToString().c_str(), @@ -1631,11 +1609,10 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, if (compact_strategy && ikey.sequence <= compact->smallest_snapshot) { std::string merged_value; std::string merged_key; - has_atom_merged = compact_strategy->MergeAtomicOPs( - input, &merged_value, &merged_key); + has_atom_merged = compact_strategy->MergeAtomicOPs(input, &merged_value, &merged_key); if (has_atom_merged) { - Slice newValue(merged_value); - compact->builder->Add(Slice(merged_key), newValue); + Slice newValue(merged_value); + compact->builder->Add(Slice(merged_key), newValue); } } @@ -1645,19 +1622,14 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, int64_t ttl = -1; compact_strategy && compact_strategy->CheckTag(ikey.user_key, &del_tag, &ttl); if (ikey.type == kTypeDeletion || del_tag) { - //Log(options_.info_log, "[%s] add del_tag %d, key_type %d\n", - // dbname_.c_str(), del_tag, ikey.type); compact->current_output()->del_num++; - } else if (ttl > 0) { // del tag has not ttl - //Log(options_.info_log, "[%s] add ttl_tag %ld\n", - // dbname_.c_str(), ttl); + } else if (ttl > 0) { // del tag has not ttl compact->current_output()->ttls.push_back(ttl); } compact->builder->Add(key, input->value()); } // Close output file if it is big enough - if (compact->builder->FileSize() >= - compact->compaction->MaxOutputFileSize()) { + if (compact->builder->FileSize() >= compact->compaction->MaxOutputFileSize()) { status = FinishCompactionOutputFile(compact, input); if (!status.ok()) { break; @@ -1678,8 +1650,8 @@ void DBImpl::HandleCompactionWork(CompactionState* compact, } if (status.ok() && !input->status().ok()) { if (options_.ignore_corruption_in_compaction) { - Log(options_.info_log, "[%s] ignore compaction error: %s", - dbname_.c_str(), input->status().ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] ignore compaction error: %s", dbname_.c_str(), + input->status().ToString().c_str()); } else { status = input->status(); } @@ -1705,8 +1677,7 @@ static void CleanupIteratorState(void* arg1, void* arg2) { delete state; } -Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, - SequenceNumber* latest_snapshot) { +Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, SequenceNumber* latest_snapshot) { IterState* cleanup = new IterState; mutex_.Lock(); *latest_snapshot = GetLastSequence(false); @@ -1728,9 +1699,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, } current->AddIterators(options, &child_iterators); Iterator* internal_iter = - NewMergingIterator(&internal_comparator_, - &child_iterators[0], - child_iterators.size()); + NewMergingIterator(&internal_comparator_, &child_iterators[0], child_iterators.size()); cleanup->mu = &mutex_; cleanup->mem = mem; @@ -1751,9 +1720,7 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() { return versions_->MaxNextLevelOverlappingBytes(); } -Status DBImpl::Get(const ReadOptions& options, - const Slice& key, - std::string* value) { +Status DBImpl::Get(const ReadOptions& options, const Slice& key, std::string* value) { Status s; MutexLock l(&mutex_); SequenceNumber snapshot; @@ -1803,26 +1770,21 @@ Iterator* DBImpl::NewIterator(const ReadOptions& options) { Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot); return NewDBIterator( &dbname_, env_, user_comparator(), internal_iter, - (options.snapshot != kMaxSequenceNumber - ? options.snapshot : latest_snapshot), - options.rollbacks); + (options.snapshot != kMaxSequenceNumber ? options.snapshot : latest_snapshot), + options.rollbacks); } const uint64_t DBImpl::GetSnapshot(uint64_t last_sequence) { MutexLock l(&mutex_); if (options_.use_memtable_on_leveldb) { if (mem_) { - ((MemTableOnLevelDB*)mem_)->GetSnapshot(last_sequence); + mem_->GetSnapshot(last_sequence); } if (imm_) { - ((MemTableOnLevelDB*)imm_)->GetSnapshot(last_sequence); + imm_->GetSnapshot(last_sequence); } } snapshots_.insert(last_sequence); - // Log(options_.info_log, - // "[%s] get snapshot: %llu, size %llu", dbname_.c_str(), - // (unsigned long long)last_sequence, - // (unsigned long long)snapshots_.size()); return last_sequence; } @@ -1830,19 +1792,16 @@ void DBImpl::ReleaseSnapshot(uint64_t sequence_number) { MutexLock l(&mutex_); if (options_.use_memtable_on_leveldb) { if (mem_) { - ((MemTableOnLevelDB*)mem_)->ReleaseSnapshot(sequence_number); + mem_->ReleaseSnapshot(sequence_number); } if (imm_) { - ((MemTableOnLevelDB*)imm_)->ReleaseSnapshot(sequence_number); + imm_->ReleaseSnapshot(sequence_number); } } + std::multiset::iterator it = snapshots_.find(sequence_number); assert(it != snapshots_.end()); snapshots_.erase(it); - //Log(options_.info_log, - // "[%s] release snapshot: %llu, size %llu", dbname_.c_str(), - // (unsigned long long)sequence_number, - // (unsigned long long)snapshots_.size()); } const uint64_t DBImpl::Rollback(uint64_t snapshot_seq, uint64_t rollback_point) { @@ -1869,8 +1828,8 @@ bool DBImpl::BusyWrite() { void DBImpl::Workload(double* write_workload) { MutexLock l(&mutex_); std::vector > scores; - versions_->CompactionScore(&scores); - double wwl = scores.size() > 0? scores[0].first: 0; + versions_->GetCompactionScores(&scores); + double wwl = scores.size() > 0 ? scores[0].first : 0; if (wwl >= 0) { *write_workload = wwl; } else { @@ -1938,9 +1897,7 @@ Status DBImpl::MakeRoomForWrite(bool force) { // Yield previous error s = bg_error_; break; - } else if ( - allow_delay && - versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { + } else if (allow_delay && versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { // We are getting close to hitting a hard limit on the number of // L0 files. Rather than delaying a single write by several // seconds when we hit the hard limit, start delaying each @@ -1953,20 +1910,17 @@ Status DBImpl::MakeRoomForWrite(bool force) { mutex_.Lock(); } else if (shutting_down_.Acquire_Load()) { break; - } else if (!force && - (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { + } else if (!force && (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { // There is room in current memtable break; } else if (imm_ != NULL) { // We have filled up the current memtable, but the previous // one is still being compacted, so we wait. - Log(options_.info_log, "[%s] Current memtable full; waiting...\n", - dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Current memtable full; waiting...\n", dbname_.c_str()); bg_cv_.Wait(); } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) { // There are too many level-0 files. - Log(options_.info_log, "[%s] Too many L0 files; waiting...\n", - dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Too many L0 files; waiting...\n", dbname_.c_str()); bg_cv_.Wait(); } else { imm_ = mem_; @@ -1974,7 +1928,7 @@ Status DBImpl::MakeRoomForWrite(bool force) { mem_ = NewMemTable(); mem_->Ref(); bound_log_size_ = 0; - force = false; // Do not force another compaction if have room + force = false; // Do not force another compaction if have room MaybeScheduleCompaction(); } } @@ -1992,16 +1946,15 @@ void DBImpl::AddBoundLogSize(uint64_t size) { return; } if (imm_ != NULL) { - Log(options_.info_log, "[%s] [TimeoutCompaction] imm_ != NULL", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] [TimeoutCompaction] imm_ != NULL", dbname_.c_str()); return; } } Status s = Write(WriteOptions(), NULL); if (s.ok()) { - Log(options_.info_log, "[%s] [TimeoutCompaction] done %lu", - dbname_.c_str(), size); + LEVELDB_LOG(options_.info_log, "[%s] [TimeoutCompaction] done %lu", dbname_.c_str(), size); } else { - Log(options_.info_log, "[%s] [TimeoutCompaction] fail", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] [TimeoutCompaction] fail", dbname_.c_str()); } } @@ -2022,8 +1975,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { return false; } else { char buf[100]; - snprintf(buf, sizeof(buf), "%d", - versions_->NumLevelFiles(static_cast(level))); + snprintf(buf, sizeof(buf), "%d", versions_->NumLevelFiles(static_cast(level))); *value = buf; return true; } @@ -2032,21 +1984,14 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { snprintf(buf, sizeof(buf), " Compactions\n" "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n" - "--------------------------------------------------\n" - ); + "--------------------------------------------------\n"); value->append(buf); for (int level = 0; level < config::kNumLevels; level++) { int files = versions_->NumLevelFiles(level); if (stats_[level].micros > 0 || files > 0) { - snprintf( - buf, sizeof(buf), - "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", - level, - files, - versions_->NumLevelBytes(level) / 1048576.0, - stats_[level].micros / 1e6, - stats_[level].bytes_read / 1048576.0, - stats_[level].bytes_written / 1048576.0); + snprintf(buf, sizeof(buf), "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", level, files, + versions_->NumLevelBytes(level) / 1048576.0, stats_[level].micros / 1e6, + stats_[level].bytes_read / 1048576.0, stats_[level].bytes_written / 1048576.0); value->append(buf); } } @@ -2056,8 +2001,9 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { return true; } else if (in == "verify-db-integrity") { std::map check_file_list; + std::map manifest_error_list; versions_->AddLiveFilesWithSize(&check_file_list); - mutex_.Unlock(); + l.Unlock(); std::set tablet_num; std::map::iterator it = check_file_list.begin(); @@ -2077,8 +2023,6 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { return false; } s = env_->GetChildren(tablet_path, &filenames); - //Log(options_.info_log, "[%s] verify db(slow), GetChildren %s, files_nr %lu, status %s", - // dbname_.c_str(), tablet_path.c_str(), filenames.size(), s.ToString().c_str()); uint64_t number; FileType type; @@ -2095,7 +2039,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { uint64_t fsize = 0; Status s1 = env_->GetFileSize(tablet_path + "/" + filenames[i], &fsize); - // when some one timeout, maybe dfs master is busy now, + // when some one timeout, maybe dfs master is busy now, // return immediate, check after next round if (s1.IsTimeOut()) { return false; @@ -2103,15 +2047,33 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { if (!s1.ok() || check_file_list[tablet_no] == fsize) { check_file_list.erase(tablet_no); } else { - Log(options_.info_log, "[%s] verify db, size mismatch, " - "path %s, tablet %s, size(in meta) %lu, size(in fs) %lu", - dbname_.c_str(), tablet_path.c_str(), filenames[i].c_str(), check_file_list[tablet_no], fsize); + LEVELDB_LOG(options_.info_log, + "[%s] verify db, size mismatch, " + "path %s, tablet %s, size(in meta) %lu, size(in fs) %lu", + dbname_.c_str(), tablet_path.c_str(), filenames[i].c_str(), + check_file_list[tablet_no], fsize); + } + } else if (ParseFileName(filenames[i], &number, &type) && (type == kCurrentFile)) { + std::string desc_name; + Status s2 = ReadFileToString(env_, tablet_path + "/" + filenames[i], &desc_name); + if (s2.ok()) { + if (!desc_name.empty() && desc_name[desc_name.size() - 1] == '\n') { + desc_name.resize(desc_name.size() - 1); + } + s2 = env_->FileExists(tablet_path + "/" + desc_name); + if (s2.IsNotFound() || desc_name.empty()) { + manifest_error_list[tablet_path] = desc_name; + LEVELDB_LOG(options_.info_log, + "[%s] verify db, cur mani mismatch, " + "tablet %s, manifest %s is miss", + dbname_.c_str(), tablet_path.c_str(), desc_name.c_str()); + } } } } } - mutex_.Lock(); + l.Lock(); std::map live; versions_->AddLiveFilesWithSize(&live); @@ -2124,22 +2086,25 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { } } - if (s.ok() && check_file_list.empty()) { // verify success + if (s.ok() && check_file_list.empty() && manifest_error_list.empty()) { // verify success value->append("verify_success"); - } else if (s.ok()) { //sst file lost + } else if (s.ok() && !manifest_error_list.empty()) { // current manifest mismatch + value->append("manifest_error"); + LEVELDB_LOG(options_.info_log, "[%s] db_manifest_error", dbname_.c_str()); + } else if (s.ok()) { // sst file lost value->append("verify_fail"); - Log(options_.info_log, "[%s] db_corruption, lost %lu", - dbname_.c_str(), check_file_list.size()); + LEVELDB_LOG(options_.info_log, "[%s] db_corruption, lost %lu", dbname_.c_str(), + check_file_list.size()); } return s.ok(); } else if (in == "compaction_error") { if (!bg_error_.ok()) { - stink_bg_error_ = bg_error_; + stink_bg_error_ = bg_error_; } - + if (!stink_bg_error_.ok()) { - value->append("Corruption: "); - value->append(stink_bg_error_.ToString()); + value->append("Corruption: "); + value->append(stink_bg_error_.ToString()); } bool ret = !stink_bg_error_.ok(); // reset stink_bg_error_ to ok @@ -2174,17 +2139,29 @@ void DBImpl::GetApproximateSizes(const Range* range, int n, uint64_t* sizes) { } } -void DBImpl::GetApproximateSizes(uint64_t* size, std::vector* lgsize) { +void DBImpl::GetApproximateSizes(uint64_t* size, std::vector* lgsize, + uint64_t* mem_table_size) { MutexLock l(&mutex_); versions_->current()->GetApproximateSizes(size); + if (mem_table_size) { + *mem_table_size = 0; + } // add mem&imm size if (size) { if (mem_) { - *size += mem_->ApproximateMemoryUsage(); + auto tmp_mem_size = mem_->ApproximateMemoryUsage(); + *size += tmp_mem_size; + if (mem_table_size) { + *mem_table_size += tmp_mem_size; + } } if (imm_) { - *size += imm_->ApproximateMemoryUsage(); + auto tmp_imm_table_size = imm_->ApproximateMemoryUsage(); + *size += tmp_imm_table_size; + if (mem_table_size) { + *mem_table_size += tmp_imm_table_size; + } } } } @@ -2202,29 +2179,48 @@ uint64_t DBImpl::GetLastSequence(bool is_locked) { retval = versions_->LastSequence(); } if (is_locked) { - mutex_.Unlock(); + mutex_.Unlock(); } return retval; } MemTable* DBImpl::NewMemTable() const { - if (!options_.use_memtable_on_leveldb) { - return new MemTable(internal_comparator_, - options_.enable_strategy_when_get ? options_.compact_strategy_factory : NULL); + if (!options_.use_memtable_on_leveldb) { + if (options_.memtable_shard_num > 1) { + LEVELDB_LOG(options_.info_log, "[%s] New shard base memTable, shard num: %d", dbname_.c_str(), + options_.memtable_shard_num); + return new ShardedMemTable( + internal_comparator_, + options_.enable_strategy_when_get ? options_.compact_strategy_factory : NULL, + options_.memtable_shard_num); } else { - Logger* info_log = NULL; - //Logger* info_log = options_.info_log; - MemTableOnLevelDB* new_mem = new MemTableOnLevelDB(dbname_, internal_comparator_, - options_.compact_strategy_factory, - options_.memtable_ldb_write_buffer_size, - options_.memtable_ldb_block_size, - info_log); - std::multiset::iterator i = snapshots_.begin(); - for (; i != snapshots_.end(); ++i) { - new_mem->GetSnapshot(*i); - } - return new_mem; + return new BaseMemTable(internal_comparator_, options_.enable_strategy_when_get + ? options_.compact_strategy_factory + : NULL); + } + } else { + Logger* info_log = NULL; + MemTable* new_mem = nullptr; + if (options_.memtable_shard_num > 1) { + LEVELDB_LOG(options_.info_log, "[%s] New shard leveldb memTable, shard num: %d", + dbname_.c_str(), options_.memtable_shard_num); + new_mem = new ShardedMemTable( + dbname_, internal_comparator_, options_.compact_strategy_factory, + options_.memtable_ldb_write_buffer_size, options_.memtable_ldb_block_size, info_log, + options_.memtable_shard_num); + } else { + // Logger* info_log = options_.info_log; + new_mem = new MemTableOnLevelDB( + dbname_, internal_comparator_, options_.compact_strategy_factory, + options_.memtable_ldb_write_buffer_size, options_.memtable_ldb_block_size, info_log); + } + + for (auto snapshot : snapshots_) { + new_mem->GetSnapshot(snapshot); } + + return new_mem; + } } uint64_t DBImpl::GetLastVerSequence() { @@ -2238,29 +2234,29 @@ Iterator* DBImpl::NewInternalIterator() { } Status DBImpl::BeginNewDbTransaction() { - Log(options_.info_log, "[%s] Begin load txn",dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Begin load txn", dbname_.c_str()); std::string lock_file_name = dbname_ + init_load_filelock; Status s = env_->FileExists(lock_file_name); if (s.IsNotFound()) { // first new by split or merge add __lock file for first create lg s = WriteStringToFileSync(env_, "\n", lock_file_name); if (!s.ok()) { - Log(options_.info_log, "[%s] fail to start new db transaction: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to start new db transaction: %s", dbname_.c_str(), + s.ToString().c_str()); return s; } } else if (s.ok()) { - // have failed before this time to open + // have failed before this time to open // && ignore corruption option not opened // && don't have sst files // need to delete all files in this db except __init_load_filelock file - Log(options_.info_log, "[%s] begin to re-new db: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] begin to re-new db: %s", dbname_.c_str(), + s.ToString().c_str()); std::vector files; s = env_->GetChildren(dbname_, &files); if (!s.ok()) { - Log(options_.info_log, "[%s] fail to re-new db: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to re-new db: %s", dbname_.c_str(), + s.ToString().c_str()); return s; } uint64_t number; @@ -2275,14 +2271,14 @@ Status DBImpl::BeginNewDbTransaction() { if ("/" + files[f] != init_load_filelock) { s = env_->DeleteFile(old_file_name); if (!s.ok()) { - Log(options_.info_log, "[%s] fail to re-new db: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to re-new db: %s", dbname_.c_str(), + s.ToString().c_str()); return s; } } } } - return s; + return s; } Status DBImpl::CommitNewDbTransaction() { @@ -2291,19 +2287,19 @@ Status DBImpl::CommitNewDbTransaction() { return s; } - Log(options_.info_log, "[%s] Commit load txn", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Commit load txn", dbname_.c_str()); std::string lock_file_name = dbname_ + init_load_filelock; s = env_->FileExists(lock_file_name); if (s.IsNotFound()) { // lost lock file during this new db - Log(options_.info_log, "[%s] find transaction lock file fail: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] find transaction lock file fail: %s", dbname_.c_str(), + s.ToString().c_str()); return Status::Corruption("newdb transaction lock disappeared"); } else if (s.ok()) { s = env_->DeleteFile(lock_file_name); if (!s.ok()) { - Log(options_.info_log, "[%s] delete transaction lock file fail: %s", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] delete transaction lock file fail: %s", dbname_.c_str(), + s.ToString().c_str()); return Status::Corruption("newdb transaction clean lock faild"); } } diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h index be2c4e0bd..36d549954 100644 --- a/src/leveldb/db/db_impl.h +++ b/src/leveldb/db/db_impl.h @@ -11,6 +11,7 @@ #include #include +#include #include "db/db_table.h" #include "db/dbformat.h" #include "db/log_writer.h" @@ -41,18 +42,18 @@ class DBImpl : public DB { virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); virtual Status Delete(const WriteOptions&, const Slice& key); virtual Status Write(const WriteOptions& options, WriteBatch* updates); - virtual Status Get(const ReadOptions& options, - const Slice& key, - std::string* value); + virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value); virtual Iterator* NewIterator(const ReadOptions&); virtual const uint64_t GetSnapshot(uint64_t last_sequence = kMaxSequenceNumber); virtual void ReleaseSnapshot(uint64_t sequence_number); - virtual const uint64_t Rollback(uint64_t snapshot_seq, uint64_t rollback_point = kMaxSequenceNumber); + virtual const uint64_t Rollback(uint64_t snapshot_seq, + uint64_t rollback_point = kMaxSequenceNumber); virtual bool GetProperty(const Slice& property, std::string* value); virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); - virtual void GetCurrentLevelSize(std::vector *); + virtual void GetCurrentLevelSize(std::vector*); // lgsize not used in db_impl, just for interface compatable - virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL); + virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL, + uint64_t* mem_table_size = NULL); virtual void CompactRange(const Slice* begin, const Slice* end, int lg_no = -1); virtual bool ShouldForceUnloadOnError(); @@ -92,8 +93,8 @@ class DBImpl : public DB { int64_t TEST_MaxNextLevelOverlappingBytes(); // Recover the descriptor from persistent storage. May do a significant - // amount of work to recover recently logged updates. Any changes to - // be made to the descriptor are added to *edit. + // amount of work to recover recently logged updates. Any changes to + // be made to the descriptor are added to *edit. Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_); private: @@ -102,14 +103,13 @@ class DBImpl : public DB { struct CompactionState; struct Writer; struct CompactionTask { - int64_t id; // compaction thread id - double score; // compaction score - uint64_t timeout; // compaction task delay time + int64_t id; // compaction thread id + double score; // compaction score + uint64_t timeout; // compaction task delay time DBImpl* db; }; - Iterator* NewInternalIterator(const ReadOptions&, - SequenceNumber* latest_snapshot); + Iterator* NewInternalIterator(const ReadOptions&, SequenceNumber* latest_snapshot); Status NewDB(); Status DbExists(bool* exists); @@ -121,16 +121,14 @@ class DBImpl : public DB { CompactStrategy* NewCompactStrategy(CompactionState* compact); - void HandleCompactionWork(CompactionState* compact, - CompactStrategy* compact_strategy); + void HandleCompactionWork(CompactionState* compact, CompactStrategy* compact_strategy); // Delete any unneeded files and stale in-memory entries. void DeleteObsoleteFiles(); // Compact the in-memory write buffer to disk. Switches to a new // log-file/memtable and writes a new descriptor iff successful. - Status CompactMemTable(bool* sched_idle = NULL) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); + Status CompactMemTable(bool* sched_idle = NULL) EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base, uint64_t* number = NULL) EXCLUSIVE_LOCKS_REQUIRED(mutex_); @@ -142,13 +140,11 @@ class DBImpl : public DB { static void BGWork(void* db); void BackgroundCall(CompactionTask* task); Status BackgroundCompaction(bool* sched_idle) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - void CleanupCompaction(CompactionState* compact) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void CleanupCompaction(CompactionState* compact) EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status OpenCompactionOutputFile(CompactionState* compact); Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); - Status InstallCompactionResults(CompactionState* compact) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); + Status InstallCompactionResults(CompactionState* compact) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Returns: // Status OK: iff *exists == true -> exists @@ -193,13 +189,13 @@ class DBImpl : public DB { // State below is protected by mutex_ port::Mutex mutex_; port::AtomicPointer shutting_down_; - port::CondVar bg_cv_; // Signalled when background work finishes - port::CondVar writting_mem_cv_; // Writer is writting mem_ + port::CondVar bg_cv_; // Signalled when background work finishes + port::CondVar writting_mem_cv_; // Writer is writting mem_ bool is_writting_mem_; std::multiset snapshots_; std::map rollbacks_; MemTable* mem_; - MemTable* imm_; // Memtable being compacted + MemTable* imm_; // Memtable being compacted MemTable* recover_mem_; port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_ WritableFile* logfile_; @@ -223,17 +219,17 @@ class DBImpl : public DB { // Information for a manual compaction enum ManualCompactState { - kManualCompactIdle, // manual compact inited - kManualCompactConflict, // manual compact run simultaneously + kManualCompactIdle, // manual compact inited + kManualCompactConflict, // manual compact run simultaneously }; struct ManualCompaction { int level; bool done; bool being_sched; - const InternalKey* begin; // NULL means beginning of key range - const InternalKey* end; // NULL means end of key range - InternalKey tmp_storage; // Used to keep track of compaction progress - ManualCompactState compaction_conflict; // 0 == idle, 1 == conflict, 2 == wake + const InternalKey* begin; // NULL means beginning of key range + const InternalKey* end; // NULL means end of key range + InternalKey tmp_storage; // Used to keep track of compaction progress + ManualCompactState compaction_conflict; // 0 == idle, 1 == conflict, 2 == wake }; ManualCompaction* manual_compaction_; @@ -248,7 +244,7 @@ class DBImpl : public DB { // true if disable WAL bool flush_on_destroy_; - + // true , if first create dbname DIR, or last time load DB failed with txn bool need_newdb_txn_; @@ -259,7 +255,7 @@ class DBImpl : public DB { int64_t bytes_read; int64_t bytes_written; - CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { } + CompactionStats() : micros(0), bytes_read(0), bytes_written(0) {} void Add(const CompactionStats& c) { this->micros += c.micros; @@ -273,17 +269,13 @@ class DBImpl : public DB { DBImpl(const DBImpl&); void operator=(const DBImpl&); - const Comparator* user_comparator() const { - return internal_comparator_.user_comparator(); - } + const Comparator* user_comparator() const { return internal_comparator_.user_comparator(); } }; // Sanitize db options. The caller should delete result.info_log if // it is not equal to src.info_log. -extern Options SanitizeOptions(const std::string& db, - const InternalKeyComparator* icmp, - const InternalFilterPolicy* ipolicy, - const Options& src); +extern Options SanitizeOptions(const std::string& db, const InternalKeyComparator* icmp, + const InternalFilterPolicy* ipolicy, const Options& src); } // namespace leveldb diff --git a/src/leveldb/db/db_impl_test.cc b/src/leveldb/db/db_impl_test.cc new file mode 100644 index 000000000..fc9b07afd --- /dev/null +++ b/src/leveldb/db/db_impl_test.cc @@ -0,0 +1,131 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/testharness.h" +#include "db/db_impl.h" + +namespace leveldb { + +class EnvForVeriyDbInGetPropertyCase1 : public EnvWrapper { + public: + EnvForVeriyDbInGetPropertyCase1() : EnvWrapper(Env::Default()) {} + + virtual Status GetChildren(const std::string& dir, std::vector* r) { + return Status::OK(); + } + virtual Status GetFileSize(const std::string& f, uint64_t* s) { + *s = 789; + return Status::OK(); + } + void SetDBImpl(DBImpl* impl) { impl_ = impl; } + + public: + DBImpl* impl_; +}; + +class EnvForVeriyDbInGetPropertyCase2 : public EnvWrapper { + public: + EnvForVeriyDbInGetPropertyCase2() : EnvWrapper(Env::Default()) {} + + virtual Status GetChildren(const std::string& dir, std::vector* r) { + r->push_back("456.sst"); + impl_->shutting_down_.Release_Store(impl_); + return Status::OK(); + } + virtual Status GetFileSize(const std::string& f, uint64_t* s) { + *s = 789; + return Status::OK(); + } + void SetDBImpl(DBImpl* impl) { impl_ = impl; } + + public: + DBImpl* impl_; +}; + +class EnvForVeriyDbInGetPropertyCase3 : public EnvWrapper { + public: + EnvForVeriyDbInGetPropertyCase3() : EnvWrapper(Env::Default()) {} + + virtual Status GetChildren(const std::string& dir, std::vector* r) { + r->push_back("456.sst"); + return Status::OK(); + } + virtual Status GetFileSize(const std::string& f, uint64_t* s) { + *s = 789; + return Status::TimeOut("timeout"); + } + void SetDBImpl(DBImpl* impl) { impl_ = impl; } + + public: + DBImpl* impl_; +}; + +class DBImplTest { + public: + void init(DBImpl* impl) { + Version* v = new Version(impl->versions_); + FileMetaData* f = new FileMetaData; + f->refs++; + f->number = (1UL << 63 | 123UL << 32 | 456); + f->file_size = 789; + f->smallest = InternalKey("", 0, kTypeValue); + f->largest = InternalKey("", 0, kTypeValue); + (v->files_[0]).push_back(f); + impl->versions_->AppendVersion(v); + } + DBImplTest() {} + + ~DBImplTest() {} +}; + +TEST(DBImplTest, VeriyDbInGetPropertyWhenShuttingDownCase1) { + Options opt; + EnvForVeriyDbInGetPropertyCase1* env = new EnvForVeriyDbInGetPropertyCase1; + opt.env = env; + DBImpl* impl = new DBImpl(opt, "test_table/tablet000123/1"); + env->SetDBImpl(impl); + init(impl); + std::string db_property_key = "leveldb.verify-db-integrity"; + std::string db_property_val; + impl->shutting_down_.Release_Store(impl); + ASSERT_EQ(impl->GetProperty(db_property_key, &db_property_val), false); + delete opt.env; + delete impl; +} + +TEST(DBImplTest, VeriyDbInGetPropertyWhenShuttingDownCase2) { + Options opt; + EnvForVeriyDbInGetPropertyCase2* env = new EnvForVeriyDbInGetPropertyCase2; + opt.env = env; + DBImpl* impl = new DBImpl(opt, "test_table/tablet000123/1"); + env->SetDBImpl(impl); + init(impl); + std::string db_property_key = "leveldb.verify-db-integrity"; + std::string db_property_val; + ASSERT_EQ(impl->GetProperty(db_property_key, &db_property_val), false); + delete opt.env; + delete impl; +} + +TEST(DBImplTest, VeriyDbInGetPropertyWhenShuttingDownCase3) { + Options opt; + EnvForVeriyDbInGetPropertyCase3* env = new EnvForVeriyDbInGetPropertyCase3; + opt.env = env; + DBImpl* impl = new DBImpl(opt, "test_table/tablet000123/1"); + env->SetDBImpl(impl); + init(impl); + std::string db_property_key = "leveldb.verify-db-integrity"; + std::string db_property_val; + ASSERT_EQ(impl->GetProperty(db_property_key, &db_property_val), false); + delete opt.env; + delete impl; +} + +} // namespace leveldb + +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/db_iter.cc b/src/leveldb/db/db_iter.cc index 1c6d6e1b9..35d241190 100644 --- a/src/leveldb/db/db_iter.cc +++ b/src/leveldb/db/db_iter.cc @@ -38,21 +38,17 @@ namespace { // combines multiple entries for the same userkey found in the DB // representation into a single entry while accounting for sequence // numbers, deletion markers, overwrites, etc. -class DBIter: public Iterator { +class DBIter : public Iterator { public: // Which direction is the iterator currently moving? // (1) When moving forward, the internal iterator is positioned at // the exact entry that yields this->key(), this->value() // (2) When moving backwards, the internal iterator is positioned // just before all entries whose user key == this->key(). - enum Direction { - kForward, - kReverse - }; + enum Direction { kForward, kReverse }; - DBIter(const std::string* dbname, Env* env, - const Comparator* cmp, Iterator* iter, SequenceNumber s, - const std::map& rollbacks) + DBIter(const std::string* dbname, Env* env, const Comparator* cmp, Iterator* iter, + SequenceNumber s, const std::map& rollbacks) : dbname_(dbname), env_(env), user_comparator_(cmp), @@ -60,11 +56,8 @@ class DBIter: public Iterator { sequence_(s), rollbacks_(rollbacks), direction_(kForward), - valid_(false) { - } - virtual ~DBIter() { - delete iter_; - } + valid_(false) {} + virtual ~DBIter() { delete iter_; } virtual bool Valid() const { return valid_; } virtual Slice key() const { assert(valid_); @@ -93,9 +86,7 @@ class DBIter: public Iterator { void FindPrevUserEntry(); bool ParseKey(ParsedInternalKey* key); - inline void SaveKey(const Slice& k, std::string* dst) { - dst->assign(k.data(), k.size()); - } + inline void SaveKey(const Slice& k, std::string* dst) { dst->assign(k.data(), k.size()); } inline void ClearSavedValue() { if (saved_value_.capacity() > 1048576) { @@ -114,8 +105,8 @@ class DBIter: public Iterator { const std::map rollbacks_; Status status_; - std::string saved_key_; // == current key when direction_==kReverse - std::string saved_value_; // == current raw value when direction_==kReverse + std::string saved_key_; // == current key when direction_==kReverse + std::string saved_value_; // == current raw value when direction_==kReverse Direction direction_; bool valid_; @@ -174,8 +165,7 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { skipping = true; break; case kTypeValue: - if (skipping && - user_comparator_->Compare(ikey.user_key, *skip) <= 0) { + if (skipping && user_comparator_->Compare(ikey.user_key, *skip) <= 0) { // Entry hidden } else { valid_ = true; @@ -207,8 +197,7 @@ void DBIter::Prev() { ClearSavedValue(); return; } - if (user_comparator_->Compare(ExtractUserKey(iter_->key()), - saved_key_) < 0) { + if (user_comparator_->Compare(ExtractUserKey(iter_->key()), saved_key_) < 0) { break; } } @@ -264,8 +253,7 @@ void DBIter::Seek(const Slice& target) { direction_ = kForward; ClearSavedValue(); saved_key_.clear(); - AppendInternalKey( - &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); + AppendInternalKey(&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); iter_->Seek(saved_key_); if (iter_->Valid()) { FindNextUserEntry(false, &saved_key_ /* temporary storage */); @@ -294,13 +282,9 @@ void DBIter::SeekToLast() { } // anonymous namespace -Iterator* NewDBIterator( - const std::string* dbname, - Env* env, - const Comparator* user_key_comparator, - Iterator* internal_iter, - const SequenceNumber& sequence, - const std::map& rollbacks) { +Iterator* NewDBIterator(const std::string* dbname, Env* env, const Comparator* user_key_comparator, + Iterator* internal_iter, const SequenceNumber& sequence, + const std::map& rollbacks) { return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence, rollbacks); } diff --git a/src/leveldb/db/db_iter.h b/src/leveldb/db/db_iter.h index 08e039756..28a507e3f 100644 --- a/src/leveldb/db/db_iter.h +++ b/src/leveldb/db/db_iter.h @@ -18,13 +18,10 @@ namespace leveldb { // Return a new iterator that converts internal keys (yielded by // "*internal_iter") that were live at the specified "sequence" number // into appropriate user keys. -extern Iterator* NewDBIterator( - const std::string* dbname, - Env* env, - const Comparator* user_key_comparator, - Iterator* internal_iter, - const SequenceNumber& sequence, - const std::map& rollbacks); +extern Iterator* NewDBIterator(const std::string* dbname, Env* env, + const Comparator* user_key_comparator, Iterator* internal_iter, + const SequenceNumber& sequence, + const std::map& rollbacks); } // namespace leveldb diff --git a/src/leveldb/db/db_table.cc b/src/leveldb/db/db_table.cc index d3d32a488..319946acb 100644 --- a/src/leveldb/db/db_table.cc +++ b/src/leveldb/db/db_table.cc @@ -38,27 +38,21 @@ struct DBTable::RecordWriter { bool done; port::CondVar cv; - explicit RecordWriter(port::Mutex* mu) - : batch(NULL), - sync(true), - done(false), - cv(mu) {} + explicit RecordWriter(port::Mutex* mu) : batch(NULL), sync(true), done(false), cv(mu) {} }; Options InitDefaultOptions(const Options& options, const std::string& dbname) { Options opt = options; Status s = opt.env->CreateDir(dbname); if (!s.ok()) { - std::cerr << "[" << dbname << "] fail to create dir: " - << s.ToString() << std::endl; + std::cerr << "[" << dbname << "] fail to create dir: " << s.ToString() << std::endl; } if (opt.info_log == NULL) { - opt.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname)); - s = opt.env->NewLogger(InfoLogFileName(dbname), &opt.info_log); + s = opt.env->NewLogger(InfoLogFileName(dbname), LogOption::LogOptionBuilder().Build(), + &opt.info_log); if (!s.ok()) { // No place suitable for logging - std::cerr << "[" << dbname << "] fail to init info log: " - << s.ToString() << std::endl; + std::cerr << "[" << dbname << "] fail to init info log: " << s.ToString() << std::endl; opt.info_log = NULL; } } @@ -85,12 +79,16 @@ Options InitOptionsLG(const Options& options, uint32_t lg_id) { return opt; } LG_info* lg_info = it->second; + if (lg_info->env) { opt.env = lg_info->env; } + if (lg_info->block_cache) { opt.block_cache = lg_info->block_cache; } + + opt.persistent_cache = lg_info->persistent_cache; opt.compression = lg_info->compression; opt.block_size = lg_info->block_size; opt.use_memtable_on_leveldb = lg_info->use_memtable_on_leveldb; @@ -104,33 +102,46 @@ Options InitOptionsLG(const Options& options, uint32_t lg_id) { opt.posix_write_buffer_size = lg_info->posix_write_buffer_size; opt.table_builder_batch_write = lg_info->table_builder_batch_write; opt.table_builder_batch_size = lg_info->table_builder_batch_size; - if (options.ignore_corruption_in_open_lg_list.find(lg_id) - != options.ignore_corruption_in_open_lg_list.end()) { + opt.memtable_shard_num = lg_info->memtable_shard_num; + if (options.ignore_corruption_in_open_lg_list.find(lg_id) != + options.ignore_corruption_in_open_lg_list.end()) { opt.ignore_corruption_in_open = true; } return opt; } DBTable::DBTable(const Options& options, const std::string& dbname) - : state_(kNotOpen), shutting_down_(NULL), shutdown1_finished_(NULL), bg_cv_(&mutex_), - bg_cv_timer_(&mutex_), bg_cv_sleeper_(&mutex_), - options_(InitDefaultOptions(options, dbname)), - dbname_(dbname), env_(options.env), db_lock_(NULL), - created_own_lg_list_(options_.exist_lg_list != options.exist_lg_list), - created_own_info_log_(options_.info_log != options.info_log), - created_own_compact_strategy_(options_.compact_strategy_factory != options.compact_strategy_factory), - commit_snapshot_(kMaxSequenceNumber), logfile_(NULL), log_(NULL), force_switch_log_(false), - last_sequence_(0), current_log_size_(0), - tmp_batch_(new WriteBatch), - bg_schedule_gc_(false), bg_schedule_gc_id_(0), - bg_schedule_gc_score_(0), force_clean_log_seq_(0) { -} + : state_(kNotOpen), + shutting_down_(NULL), + shutdown1_finished_(NULL), + bg_cv_(&mutex_), + bg_cv_timer_(&mutex_), + bg_cv_sleeper_(&mutex_), + options_(InitDefaultOptions(options, dbname)), + dbname_(dbname), + env_(options.env), + db_lock_(NULL), + created_own_lg_list_(options_.exist_lg_list != options.exist_lg_list), + created_own_info_log_(options_.info_log != options.info_log), + created_own_compact_strategy_(options_.compact_strategy_factory != + options.compact_strategy_factory), + commit_snapshot_(kMaxSequenceNumber), + logfile_(NULL), + log_(NULL), + force_switch_log_(false), + last_sequence_(0), + current_log_size_(0), + tmp_batch_(new WriteBatch), + bg_schedule_gc_(false), + bg_schedule_gc_id_(0), + bg_schedule_gc_score_(0), + force_clean_log_seq_(0) {} Status DBTable::Shutdown1() { assert(state_ == kOpened); state_ = kShutdown1; - Log(options_.info_log, "[%s] shutdown1 start", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] shutdown1 start", dbname_.c_str()); shutting_down_.Release_Store(this); Status s; @@ -144,7 +155,7 @@ Status DBTable::Shutdown1() { } } - Log(options_.info_log, "[%s] wait bg garbage clean finish", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] wait bg garbage clean finish", dbname_.c_str()); mutex_.Lock(); if (bg_schedule_gc_) { env_->ReSchedule(bg_schedule_gc_id_, kDeleteLogUrgentScore); @@ -154,10 +165,10 @@ Status DBTable::Shutdown1() { } mutex_.Unlock(); - Log(options_.info_log, "[%s] fg garbage clean", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fg garbage clean", dbname_.c_str()); GarbageClean(); - Log(options_.info_log, "[%s] shutdown1 done", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] shutdown1 done", dbname_.c_str()); shutdown1_finished_.Release_Store(this); return s; } @@ -166,7 +177,7 @@ Status DBTable::Shutdown2() { assert(state_ == kShutdown1); state_ = kShutdown2; - Log(options_.info_log, "[%s] shutdown2 start", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] shutdown2 start", dbname_.c_str()); Status s; for (uint32_t i = 0; i < lg_list_.size(); ++i) { @@ -179,22 +190,22 @@ Status DBTable::Shutdown2() { } } - Log(options_.info_log, "[%s] stop async log", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] stop async log", dbname_.c_str()); if (log_) { log_->Stop(false); } if (s.ok() && options_.dump_mem_on_shutdown) { - Log(options_.info_log, "[%s] gather all log file", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] gather all log file", dbname_.c_str()); std::vector logfiles; s = GatherLogFile(0, &logfiles); if (s.ok()) { - Log(options_.info_log, "[%s] delete all log file", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] delete all log file", dbname_.c_str()); s = DeleteLogFile(logfiles); } } - Log(options_.info_log, "[%s] shutdown2 done", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] shutdown2 done", dbname_.c_str()); return s; } @@ -234,14 +245,14 @@ DBTable::~DBTable() { Status DBTable::Init() { std::vector lg_edits; - Log(options_.info_log, "[%s] start Init()", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] start Init()", dbname_.c_str()); Status s; if (options_.use_file_lock) { - s = env_->LockFile(LockFileName(dbname_), &db_lock_); - if (!s.ok()) { - Log(options_.info_log, "[%s] Get db lock fail", dbname_.c_str()); - return s; - } + s = env_->LockFile(LockFileName(dbname_), &db_lock_); + if (!s.ok()) { + LEVELDB_LOG(options_.info_log, "[%s] Get db lock fail", dbname_.c_str()); + return s; + } } MutexLock lock(&mutex_); @@ -249,10 +260,9 @@ Status DBTable::Init() { std::vector snapshot_sequence = options_.snapshots_sequence; std::map rollbacks = options_.rollbacks; for (std::set::iterator it = options_.exist_lg_list->begin(); - it != options_.exist_lg_list->end() && s.ok(); ++it) { + it != options_.exist_lg_list->end() && s.ok(); ++it) { uint32_t i = *it; - DBImpl* impl = new DBImpl(InitOptionsLG(options_, i), - dbname_ + "/" + Uint64ToString(i)); + DBImpl* impl = new DBImpl(InitOptionsLG(options_, i), dbname_ + "/" + Uint64ToString(i)); lg_list_.push_back(impl); lg_edits.push_back(new VersionEdit); for (uint32_t i = 0; i < snapshot_sequence.size(); ++i) { @@ -264,16 +274,16 @@ Status DBTable::Init() { } // recover SST - Log(options_.info_log, "[%s] start Recover lg%d, last_seq= %lu", - dbname_.c_str(), i, impl->GetLastSequence()); + LEVELDB_LOG(options_.info_log, "[%s] start Recover lg%d, last_seq= %lu", dbname_.c_str(), i, + impl->GetLastSequence()); s = impl->Recover(lg_edits[i]); - Log(options_.info_log, "[%s] end Recover lg%d, last_seq= %lu", - dbname_.c_str(), i, impl->GetLastSequence()); + LEVELDB_LOG(options_.info_log, "[%s] end Recover lg%d, last_seq= %lu", dbname_.c_str(), i, + impl->GetLastSequence()); if (s.ok()) { uint64_t last_seq = impl->GetLastSequence(); - Log(options_.info_log, - "[%s] Recover lg %d last_log_seq= %lu", dbname_.c_str(), i, last_seq); + LEVELDB_LOG(options_.info_log, "[%s] Recover lg %d last_log_seq= %lu", dbname_.c_str(), i, + last_seq); if (min_log_sequence > last_seq) { min_log_sequence = last_seq; } @@ -281,12 +291,12 @@ Status DBTable::Init() { last_sequence_ = last_seq; } } else { - Log(options_.info_log, "[%s] fail to recover lg %d", dbname_.c_str(), i); + LEVELDB_LOG(options_.info_log, "[%s] fail to recover lg %d", dbname_.c_str(), i); break; } } if (!s.ok()) { - Log(options_.info_log, "[%s] fail to recover table.", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to recover table.", dbname_.c_str()); for (uint32_t i = 0; i != lg_list_.size(); ++i) { delete lg_list_[i]; } @@ -294,7 +304,7 @@ Status DBTable::Init() { return s; } - Log(options_.info_log, "[%s] start GatherLogFile", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] start GatherLogFile", dbname_.c_str()); // recover log files std::vector logfiles; s = GatherLogFile(min_log_sequence + 1, &logfiles); @@ -308,15 +318,15 @@ Status DBTable::Init() { } s = RecoverLogFile(logfiles[i], recover_limit, &lg_edits); if (!s.ok()) { - Log(options_.info_log, "[%s] Fail to RecoverLogFile %ld", - dbname_.c_str(), logfiles[i]); + LEVELDB_LOG(options_.info_log, "[%s] Fail to RecoverLogFile %ld", dbname_.c_str(), + logfiles[i]); } } } else { - Log(options_.info_log, "[%s] Fail to GatherLogFile", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Fail to GatherLogFile", dbname_.c_str()); } - Log(options_.info_log, "[%s] start RecoverLogToLevel0Table", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] start RecoverLogToLevel0Table", dbname_.c_str()); std::set::iterator it = options_.exist_lg_list->begin(); for (; it != options_.exist_lg_list->end(); ++it) { uint32_t i = *it; @@ -326,7 +336,7 @@ Status DBTable::Init() { } if (s.ok()) { - Log(options_.info_log, "[%s] start DeleteLogFile", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] start DeleteLogFile", dbname_.c_str()); s = DeleteLogFile(logfiles); } @@ -334,12 +344,12 @@ Status DBTable::Init() { std::string log_file_name = LogHexFileName(dbname_, last_sequence_ + 1); s = options_.env->NewWritableFile(log_file_name, &logfile_, EnvOptions(options_)); if (s.ok()) { - //Log(options_.info_log, "[%s] open logfile %s", + // LEVELDB_LOG(options_.info_log, "[%s] open logfile %s", // dbname_.c_str(), log_file_name.c_str()); log_ = new log::AsyncWriter(logfile_, options_.log_async_mode); } else { - Log(options_.info_log, "[%s] fail to open logfile %s", - dbname_.c_str(), log_file_name.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to open logfile %s", dbname_.c_str(), + log_file_name.c_str()); } } @@ -355,13 +365,13 @@ Status DBTable::Init() { if (s.ok()) { state_ = kOpened; - Log(options_.info_log, "[%s] custom compact strategy: %s, flush trigger %lu", - dbname_.c_str(), options_.compact_strategy_factory->Name(), - options_.flush_triggered_log_num); + LEVELDB_LOG(options_.info_log, "[%s] custom compact strategy: %s, flush trigger %lu", + dbname_.c_str(), options_.compact_strategy_factory->Name(), + options_.flush_triggered_log_num); commit_snapshot_ = last_sequence_; - Log(options_.info_log, "[%s] Init() done, last_seq=%llu", dbname_.c_str(), - static_cast(last_sequence_)); + LEVELDB_LOG(options_.info_log, "[%s] Init() done, last_seq=%llu", dbname_.c_str(), + static_cast(last_sequence_)); } else { for (uint32_t i = 0; i != lg_list_.size(); ++i) { delete lg_list_[i]; @@ -371,8 +381,7 @@ Status DBTable::Init() { return s; } -Status DBTable::Put(const WriteOptions& options, - const Slice& key, const Slice& value) { +Status DBTable::Put(const WriteOptions& options, const Slice& key, const Slice& value) { return DB::Put(options, key, value); } @@ -383,7 +392,7 @@ Status DBTable::Delete(const WriteOptions& options, const Slice& key) { bool DBTable::BusyWrite() { MutexLock l(&mutex_); for (std::set::iterator it = options_.exist_lg_list->begin(); - it != options_.exist_lg_list->end(); ++it) { + it != options_.exist_lg_list->end(); ++it) { if (lg_list_[*it]->BusyWrite()) { return true; } @@ -399,7 +408,7 @@ void DBTable::Workload(double* write_workload) { double ww = -1; *write_workload = -1; for (std::set::iterator it = options_.exist_lg_list->begin(); - it != options_.exist_lg_list->end(); ++it) { + it != options_.exist_lg_list->end(); ++it) { lg_list_[*it]->Workload(&ww); if (ww > *write_workload) { *write_workload = ww; @@ -425,7 +434,7 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { // DB with fatal error is unwritable. Status s = fatal_error_; if (IsShutdown1Finished()) { - s = Status::ShutdownInProgress(dbname_ + ": fail to write on waiting shutdown2"); + s = Status::ShutdownInProgress(dbname_ + ": fail to write on waiting shutdown2"); } RecordWriter* last_writer = &w; @@ -453,14 +462,15 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { Slice slice = WriteBatchInternal::Contents(updates); uint32_t wait_sec = options_.write_log_time_out; - for (; ; wait_sec <<= 1) { + for (;; wait_sec <<= 1) { // write a record into log log_->AddRecord(slice); s = log_->WaitDone(wait_sec); if (s.IsTimeOut()) { - Log(options_.info_log, "[%s] AddRecord time out, current log size: %lu, " - "record size: %lu, wait_sec: %u", - dbname_.c_str(), current_log_size_, slice.size(), wait_sec); + LEVELDB_LOG(options_.info_log, + "[%s] AddRecord time out, current log size: %lu, " + "record size: %lu, wait_sec: %u", + dbname_.c_str(), current_log_size_, slice.size(), wait_sec); int ret = SwitchLog(true); if (ret == 0) { continue; @@ -482,8 +492,8 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { log_->Sync(options.sync); s = log_->WaitDone(wait_sec); if (s.IsTimeOut()) { - Log(options_.info_log, "[%s] Sync time out %lu", - dbname_.c_str(), current_log_size_); + LEVELDB_LOG(options_.info_log, "[%s] Sync time out %lu", dbname_.c_str(), + current_log_size_); int ret = SwitchLog(true); if (ret == 0) { continue; @@ -506,7 +516,7 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { } mutex_.Lock(); if (s.IsIOPermissionDenied()) { - fatal_error_ = s; + fatal_error_ = s; } } if (s.ok()) { @@ -527,14 +537,13 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { lg_updates[0] = updates; } mutex_.Unlock(); - //TODO: should be multi-thread distributed + // TODO: should be multi-thread distributed for (uint32_t i = 0; i < lg_updates.size(); ++i) { assert(lg_updates[i] != NULL); Status lg_s = lg_list_[i]->Write(WriteOptions(), lg_updates[i]); if (!lg_s.ok()) { // 这种情况下内存处于不一致状态 - Log(options_.info_log, "[%s] [Fatal] Write to lg%u fail", - dbname_.c_str(), i); + LEVELDB_LOG(options_.info_log, "[%s] [Fatal] Write to lg%u fail", dbname_.c_str(), i); s = lg_s; break; } @@ -545,7 +554,7 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { lg_list_[i]->AddBoundLogSize(updates->DataSize()); } } else { - fatal_error_ = s; + fatal_error_ = s; } // Commit updates @@ -605,8 +614,8 @@ WriteBatch* DBTable::GroupWriteBatch(RecordWriter** last_writer) { // original write is small, limit the growth so we do not slow // down the small write too much. size_t max_size = 1 << 20; - if (size <= (128<<10)) { - max_size = size + (128<<10); + if (size <= (128 << 10)) { + max_size = size + (128 << 10); } *last_writer = first; @@ -642,8 +651,7 @@ WriteBatch* DBTable::GroupWriteBatch(RecordWriter** last_writer) { return result; } -Status DBTable::Get(const ReadOptions& options, - const Slice& key, std::string* value) { +Status DBTable::Get(const ReadOptions& options, const Slice& key, std::string* value) { uint32_t lg_id = 0; Slice real_key = key; if (!GetFixed32LGId(&real_key, &lg_id)) { @@ -681,15 +689,14 @@ Iterator* DBTable::NewIterator(const ReadOptions& options) { it = options_.exist_lg_list->begin(); for (; it != options_.exist_lg_list->end(); ++it) { if (options.target_lgs) { - std::set::const_iterator found_it = - options.target_lgs->find(*it); + std::set::const_iterator found_it = options.target_lgs->find(*it); if (found_it == options.target_lgs->end()) { continue; } } // when shutdown1 finished waiting for shutdown2 hang will eary break if (IsShutdown1Finished()) { - break; + break; } list.push_back(lg_list_[*it]->NewIterator(new_options)); } @@ -723,21 +730,22 @@ void DBTable::ReleaseSnapshot(uint64_t sequence_number) { } bool DBTable::ShouldForceUnloadOnError() { - MutexLock l(&mutex_); - bool permission_error = fatal_error_.IsIOPermissionDenied(); - if (permission_error) { //return early - return permission_error; - } - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - permission_error |= lg_list_[*it]->ShouldForceUnloadOnError(); - } + MutexLock l(&mutex_); + bool permission_error = fatal_error_.IsIOPermissionDenied(); + if (permission_error) { // return early return permission_error; + } + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + permission_error |= lg_list_[*it]->ShouldForceUnloadOnError(); + } + return permission_error; } const uint64_t DBTable::Rollback(uint64_t snapshot_seq, uint64_t rollback_point) { std::set::iterator it = options_.exist_lg_list->begin(); - uint64_t rollback_seq = rollback_point == kMaxSequenceNumber ? last_sequence_ : rollback_point;; + uint64_t rollback_seq = rollback_point == kMaxSequenceNumber ? last_sequence_ : rollback_point; + ; for (; it != options_.exist_lg_list->end(); ++it) { lg_list_[*it]->Rollback(snapshot_seq, rollback_seq); } @@ -751,7 +759,7 @@ bool DBTable::GetProperty(const Slice& property, std::string* value) { std::set::iterator it = options_.exist_lg_list->begin(); for (; it != options_.exist_lg_list->end(); ++it) { if (shutting_down_.Acquire_Load()) { - return ret; + return ret; } std::string lg_value; bool lg_ret = lg_list_[*it]->GetProperty(property, &lg_value); @@ -775,8 +783,7 @@ bool DBTable::GetProperty(const Slice& property, std::string* value) { return ret; } -void DBTable::GetApproximateSizes(const Range* range, int n, - uint64_t* sizes) { +void DBTable::GetApproximateSizes(const Range* range, int n, uint64_t* sizes) { for (int j = 0; j < n; ++j) { sizes[j] = 0; } @@ -792,21 +799,33 @@ void DBTable::GetApproximateSizes(const Range* range, int n, } } -void DBTable::GetApproximateSizes(uint64_t* size, std::vector* lgsize) { +// tera-specific +// size: db size, include mem, imm, all sst files +// lgsize: each lg size, include all storage +// mem_table_size: memtable's size, for analyzing memory usage. +void DBTable::GetApproximateSizes(uint64_t* size, std::vector* lgsize, + uint64_t* mem_table_size) { if (size) { *size = 0; } if (lgsize) { lgsize->clear(); } + if (mem_table_size) { + *mem_table_size = 0; + } std::set::iterator it = options_.exist_lg_list->begin(); for (; it != options_.exist_lg_list->end(); ++it) { uint32_t i = *it; uint64_t size_tmp; - lg_list_[i]->GetApproximateSizes(&size_tmp); + uint64_t mem_table_size_tmp = 0; + lg_list_[i]->GetApproximateSizes(&size_tmp, nullptr, &mem_table_size_tmp); if (size) { *size += size_tmp; } + if (mem_table_size) { + *mem_table_size += mem_table_size_tmp; + } if (lgsize) { lgsize->push_back(size_tmp); } @@ -831,12 +850,11 @@ void DBTable::CompactRange(const Slice* begin, const Slice* end, int lg_no) { } // @begin_num: the 1st record(sequence number) should be recover -Status DBTable::GatherLogFile(uint64_t begin_num, - std::vector* logfiles) { +Status DBTable::GatherLogFile(uint64_t begin_num, std::vector* logfiles) { std::vector files; Status s = env_->GetChildren(dbname_, &files); if (!s.ok()) { - Log(options_.info_log, "[%s] GatherLogFile fail", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] GatherLogFile fail", dbname_.c_str()); return s; } uint64_t number = 0; @@ -845,9 +863,7 @@ Status DBTable::GatherLogFile(uint64_t begin_num, for (uint32_t i = 0; i < files.size(); ++i) { type = kUnknown; number = 0; - if (ParseFileName(files[i], &number, &type) - && type == kLogFile - && number >= begin_num) { + if (ParseFileName(files[i], &number, &type) && type == kLogFile && number >= begin_num) { logfiles->push_back(number); } else if (type == kLogFile && number > last_number) { last_number = number; @@ -865,13 +881,14 @@ Status DBTable::GatherLogFile(uint64_t begin_num, *case 2: ^ ^ * 001.log last_number.log */ - if ((last_number > 0 && first_log_num > begin_num) // case 1 - || (last_number > 0 && logfiles->size() == 0)) { // case 2 + if ((last_number > 0 && first_log_num > begin_num) // case 1 + || (last_number > 0 && logfiles->size() == 0)) { // case 2 logfiles->push_back(last_number); - Log(options_.info_log, "[%s] add log file #%lu", dbname_.c_str(), last_number); + LEVELDB_LOG(options_.info_log, "[%s] add log file #%lu", dbname_.c_str(), last_number); } - Log(options_.info_log, "[%s] begin_seq= %lu, first log num= %lu, last num=%lu, log count=%lu\n", - dbname_.c_str(), begin_num, first_log_num, last_number, logfiles->size()); + LEVELDB_LOG(options_.info_log, + "[%s] begin_seq= %lu, first log num= %lu, last num=%lu, log count=%lu\n", + dbname_.c_str(), begin_num, first_log_num, last_number, logfiles->size()); std::sort(logfiles->begin(), logfiles->end()); return s; } @@ -884,9 +901,9 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, const char* fname; Status* status; // NULL if options_.paranoid_checks==false virtual void Corruption(size_t bytes, const Status& s) { - Log(info_log, "%s%s: dropping %d bytes; %s", - (this->status == NULL ? "(ignoring error) " : ""), - fname, static_cast(bytes), s.ToString().c_str()); + LEVELDB_LOG(info_log, "%s%s: dropping %d bytes; %s", + (this->status == NULL ? "(ignoring error) " : ""), fname, static_cast(bytes), + s.ToString().c_str()); if (this->status != NULL && this->status->ok()) *this->status = s; } }; @@ -908,10 +925,9 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, reporter.info_log = options_.info_log; reporter.fname = fname.c_str(); reporter.status = (options_.paranoid_checks ? &status : NULL); - log::Reader reader(file, &reporter, true/*checksum*/, - 0/*initial_offset*/); - Log(options_.info_log, "[%s] Recovering log #%lx, sequence limit %lu", - dbname_.c_str(), log_number, recover_limit); + log::Reader reader(file, &reporter, true /*checksum*/, 0 /*initial_offset*/); + LEVELDB_LOG(options_.info_log, "[%s] Recovering log #%lx, sequence limit %lu", dbname_.c_str(), + log_number, recover_limit); // Read all the records and add to a memtable std::string scratch; @@ -919,18 +935,19 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, WriteBatch batch; while (reader.ReadRecord(&record, &scratch) && status.ok()) { if (record.size() < 12) { - reporter.Corruption(record.size(), - Status::Corruption("log record too small")); + reporter.Corruption(record.size(), Status::Corruption("log record too small")); continue; } WriteBatchInternal::SetContents(&batch, record); uint64_t first_seq = WriteBatchInternal::Sequence(&batch); uint64_t last_seq = first_seq + WriteBatchInternal::Count(&batch) - 1; - //Log(options_.info_log, "[%s] batch_seq= %lu, last_seq= %lu, count=%d", - // dbname_.c_str(), batch_seq, last_sequence_, WriteBatchInternal::Count(&batch)); + // LEVELDB_LOG(options_.info_log, "[%s] batch_seq= %lu, last_seq= %lu, + // count=%d", + // dbname_.c_str(), batch_seq, last_sequence_, + // WriteBatchInternal::Count(&batch)); if (last_seq >= recover_limit) { - Log(options_.info_log, "[%s] exceed limit %lu, ignore %lu ~ %lu", - dbname_.c_str(), recover_limit, first_seq, last_seq); + LEVELDB_LOG(options_.info_log, "[%s] exceed limit %lu, ignore %lu ~ %lu", dbname_.c_str(), + recover_limit, first_seq, last_seq); continue; } @@ -953,7 +970,7 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, } if (status.ok()) { - //TODO: should be multi-thread distributed + // TODO: should be multi-thread distributed for (uint32_t i = 0; i < lg_updates.size(); ++i) { if (lg_updates[i] == NULL) { continue; @@ -963,13 +980,14 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, } uint64_t first = WriteBatchInternal::Sequence(lg_updates[i]); uint64_t last = first + WriteBatchInternal::Count(lg_updates[i]) - 1; - // Log(options_.info_log, "[%s] recover log batch first= %lu, last= %lu\n", + // LEVELDB_LOG(options_.info_log, "[%s] recover log batch first= %lu, + // last= %lu\n", // dbname_.c_str(), first, last); Status lg_s = lg_list_[i]->RecoverInsertMem(lg_updates[i], (*edit_list)[i]); if (!lg_s.ok()) { - Log(options_.info_log, "[%s] recover log fail batch first= %lu, last= %lu\n", - dbname_.c_str(), first, last); + LEVELDB_LOG(options_.info_log, "[%s] recover log fail batch first= %lu, last= %lu\n", + dbname_.c_str(), first, last); status = lg_s; } } @@ -992,8 +1010,8 @@ void DBTable::MaybeIgnoreError(Status* s) const { if (s->ok() || options_.paranoid_checks) { // No change needed } else { - Log(options_.info_log, "[%s] Ignoring error %s", - dbname_.c_str(), s->ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Ignoring error %s", dbname_.c_str(), + s->ToString().c_str()); *s = Status::OK(); } } @@ -1002,9 +1020,8 @@ Status DBTable::DeleteLogFile(const std::vector& log_numbers) { Status s; for (uint32_t i = 0; i < log_numbers.size() && s.ok(); ++i) { uint64_t log_number = log_numbers[i]; - Log(options_.info_log, "[%s] Delete type=%s #%llu", - dbname_.c_str(), FileTypeToString(kLogFile), - static_cast(log_number)); + LEVELDB_LOG(options_.info_log, "[%s] Delete type=%s #%llu", dbname_.c_str(), + FileTypeToString(kLogFile), static_cast(log_number)); std::string fname = LogHexFileName(dbname_, log_number); s = env_->DeleteFile(fname); // The last log file must be deleted before write a new log @@ -1014,9 +1031,8 @@ Status DBTable::DeleteLogFile(const std::vector& log_numbers) { MaybeIgnoreError(&s); } if (!s.ok()) { - Log(options_.info_log, "[%s] fail to delete logfile %llu: %s", - dbname_.c_str(), static_cast(log_number), - s.ToString().data()); + LEVELDB_LOG(options_.info_log, "[%s] fail to delete logfile %llu: %s", dbname_.c_str(), + static_cast(log_number), s.ToString().data()); } } return s; @@ -1033,8 +1049,7 @@ void DBTable::DeleteObsoleteFiles(uint64_t seq_no) { uint64_t delete_log_num = 0; for (size_t i = 0; i < filenames.size(); ++i) { bool deleted = false; - if (ParseFileName(filenames[i], &number, &type) - && type == kLogFile) { + if (ParseFileName(filenames[i], &number, &type) && type == kLogFile) { if (number < seq_no) { deleted = true; delete_log_num++; @@ -1043,9 +1058,8 @@ void DBTable::DeleteObsoleteFiles(uint64_t seq_no) { } } if (deleted) { - Log(options_.info_log, "[%s] Delete type=%s #%llu", - dbname_.c_str(), FileTypeToString(type), - static_cast(number)); + LEVELDB_LOG(options_.info_log, "[%s] Delete type=%s #%llu", dbname_.c_str(), + FileTypeToString(type), static_cast(number)); if (!last_file.empty()) { // ArchiveFile(dbname_ + "/" + last_file); env_->DeleteFile(dbname_ + "/" + last_file); @@ -1053,11 +1067,9 @@ void DBTable::DeleteObsoleteFiles(uint64_t seq_no) { last_file = filenames[i]; } } - Log(options_.info_log, "[%s] delete obsolete log: %u, keep: %u, [seq < %llu]", - dbname_.c_str(), - static_cast(delete_log_num), - static_cast(keep_log_num), - static_cast(seq_no)); + LEVELDB_LOG(options_.info_log, "[%s] delete obsolete log: %u, keep: %u, [seq < %llu]", + dbname_.c_str(), static_cast(delete_log_num), + static_cast(keep_log_num), static_cast(seq_no)); } void DBTable::ArchiveFile(const std::string& fname) { @@ -1072,9 +1084,8 @@ void DBTable::ArchiveFile(const std::string& fname) { new_file.append("/"); new_file.append((slash == NULL) ? fname.c_str() : slash + 1); Status s = env_->RenameFile(fname, new_file); - Log(options_.info_log, "[%s] Archiving %s: %s\n", - dbname_.c_str(), - fname.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Archiving %s: %s\n", dbname_.c_str(), fname.c_str(), + s.ToString().c_str()); } // tera-specific @@ -1088,8 +1099,7 @@ bool DBTable::FindSplitKey(double ratio, std::string* split_key) { lg_list_[*it]->GetApproximateSizes(&size); size_of_lg[size] = lg_list_[*it]; } - std::map::reverse_iterator biggest_it = - size_of_lg.rbegin(); + std::map::reverse_iterator biggest_it = size_of_lg.rbegin(); if (biggest_it == size_of_lg.rend()) { return false; } @@ -1139,13 +1149,11 @@ void DBTable::AddInheritedLiveFiles(std::vector >* live) { lg_list_[i]->AddInheritedLiveFiles(live); } } - //Log(options_.info_log, "[%s] finish collect inherited sst fils", + // LEVELDB_LOG(options_.info_log, "[%s] finish collect inherited sst fils", // dbname_.c_str()); } -bool DBTable::IsShutdown1Finished() const { - return shutdown1_finished_.Acquire_Load() != NULL; -} +bool DBTable::IsShutdown1Finished() const { return shutdown1_finished_.Acquire_Load() != NULL; } // end of tera-specific @@ -1178,25 +1186,22 @@ Iterator* DBTable::TEST_NewInternalIterator() { return NewMergingIterator(options_.comparator, &list[0], list.size()); } -int64_t DBTable::TEST_MaxNextLevelOverlappingBytes() { - return 0; -} +int64_t DBTable::TEST_MaxNextLevelOverlappingBytes() { return 0; } int DBTable::SwitchLog(bool blocked_switch) { { MutexLock l(&mutex_); if (fatal_error_.IsIOPermissionDenied() || IsShutdown1Finished()) { if (IsShutdown1Finished()) { - fatal_error_ = Status::ShutdownInProgress(dbname_ + - ": fail to switch log on waiting shutdown2"); + fatal_error_ = + Status::ShutdownInProgress(dbname_ + ": fail to switch log on waiting shutdown2"); } - Log(options_.info_log, "[%s] can not switch log becasue %s", - dbname_.c_str(), fatal_error_.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] can not switch log becasue %s", dbname_.c_str(), + fatal_error_.ToString().c_str()); return 2; } } - if (!blocked_switch || - log::AsyncWriter::BlockLogNum() < options_.max_block_log_number) { + if (!blocked_switch || log::AsyncWriter::BlockLogNum() < options_.max_block_log_number) { if (current_log_size_ == 0) { last_sequence_++; } @@ -1217,22 +1222,22 @@ int DBTable::SwitchLog(bool blocked_switch) { if (blocked_switch) { // if we switched log because it was blocked log::AsyncWriter::BlockLogNumInc(); - Log(options_.info_log, "[%s] SwitchLog", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] SwitchLog", dbname_.c_str()); } - return 0; // success + return 0; // success } else if (s.IsIOPermissionDenied()) { - MutexLock l(&mutex_); - fatal_error_ = s; - return 2; // posix error EACCES = 13 + MutexLock l(&mutex_); + fatal_error_ = s; + return 2; // posix error EACCES = 13 } else { - Log(options_.info_log, "[%s] fail to open logfile %s. SwitchLog failed", - dbname_.c_str(), log_file_name.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] fail to open logfile %s. SwitchLog failed", + dbname_.c_str(), log_file_name.c_str()); if (!blocked_switch) { - return 2; // wanted to switch log but failed + return 2; // wanted to switch log but failed } } } - return 1; // cannot switch log right now + return 1; // cannot switch log right now } void DBTable::ScheduleGarbageClean(double score) { @@ -1244,16 +1249,16 @@ void DBTable::ScheduleGarbageClean(double score) { if (bg_schedule_gc_ && score <= bg_schedule_gc_score_) { return; } else if (bg_schedule_gc_) { - Log(options_.info_log, "[%s] ReSchedule Garbage clean[%ld] score= %.2f", - dbname_.c_str(), bg_schedule_gc_id_, score); + LEVELDB_LOG(options_.info_log, "[%s] ReSchedule Garbage clean[%ld] score= %.2f", + dbname_.c_str(), bg_schedule_gc_id_, score); env_->ReSchedule(bg_schedule_gc_id_, score); bg_schedule_gc_score_ = score; } else { bg_schedule_gc_id_ = env_->Schedule(&DBTable::GarbageCleanWrapper, this, score); bg_schedule_gc_score_ = score; bg_schedule_gc_ = true; - Log(options_.info_log, "[%s] Schedule Garbage clean[%ld] score= %.2f", - dbname_.c_str(), bg_schedule_gc_id_, score); + LEVELDB_LOG(options_.info_log, "[%s] Schedule Garbage clean[%ld] score= %.2f", dbname_.c_str(), + bg_schedule_gc_id_, score); } } @@ -1285,15 +1290,15 @@ void DBTable::GarbageClean() { } } if (force_clean_log_seq_ > min_last_seq) { - Log(options_.info_log, "[%s] force_clean_log_seq_= %lu, min_last_seq= %lu", - dbname_.c_str(), force_clean_log_seq_, min_last_seq); + LEVELDB_LOG(options_.info_log, "[%s] force_clean_log_seq_= %lu, min_last_seq= %lu", + dbname_.c_str(), force_clean_log_seq_, min_last_seq); min_last_seq = force_clean_log_seq_; found = true; } if (found && min_last_seq > 0) { - Log(options_.info_log, "[%s] delete obsolete file, seq_no below: %lu", - dbname_.c_str(), min_last_seq); + LEVELDB_LOG(options_.info_log, "[%s] delete obsolete file, seq_no below: %lu", dbname_.c_str(), + min_last_seq); DeleteObsoleteFiles(min_last_seq); } } @@ -1307,9 +1312,9 @@ void DBTable::GetCurrentLevelSize(std::vector* result) { uint32_t i = *it; lg_list_[i]->GetCurrentLevelSize(&lg_level_size); assert(result->size() == lg_level_size.size()); - for (size_t level = 0; level != lg_level_size.size(); ++ level) { + for (size_t level = 0; level != lg_level_size.size(); ++level) { (*result)[level] += lg_level_size[level]; } - } + } } } diff --git a/src/leveldb/db/db_table.h b/src/leveldb/db/db_table.h index c0530ebf6..4d2fc1dfa 100644 --- a/src/leveldb/db/db_table.h +++ b/src/leveldb/db/db_table.h @@ -27,193 +27,191 @@ class MemTable; class FileLock; class DBTable : public DB { -public: - DBTable(const Options& options, const std::string& dbname); - virtual ~DBTable(); - - Status Init(); - virtual Status Shutdown1(); - virtual Status Shutdown2(); - - // Set the database entry for "key" to "value". Returns OK on success, - // and a non-OK status on error. - // Note: consider setting options.sync = true. - virtual Status Put(const WriteOptions& options, - const Slice& key, - const Slice& value); - - // Remove the database entry (if any) for "key". Returns OK on - // success, and a non-OK status on error. It is not an error if "key" - // did not exist in the database. - // Note: consider setting options.sync = true. - virtual Status Delete(const WriteOptions& options, const Slice& key); - - // Is too busy to write. - virtual bool BusyWrite(); - - virtual void Workload(double* write_workload); - - // Apply the specified updates to the database. - // Returns OK on success, non-OK on failure. - // Note: consider setting options.sync = true. - virtual Status Write(const WriteOptions& options, WriteBatch* updates); - - // If the database contains an entry for "key" store the - // corresponding value in *value and return OK. - // - // If there is no entry for "key" leave *value unchanged and return - // a status for which Status::IsNotFound() returns true. - // - // May return some other Status on an error. - virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value); - - // Return a heap-allocated iterator over the contents of the database. - // The result of NewIterator() is initially invalid (caller must - // call one of the Seek methods on the iterator before using it). - // - // Caller should delete the iterator when it is no longer needed. - // The returned iterator should be deleted before this db is deleted. - virtual Iterator* NewIterator(const ReadOptions& options); - - // Return a handle to the current DB state. Iterators created with - // this handle will all observe a stable snapshot of the current DB - // state. The caller must call ReleaseSnapshot(result) when the - // snapshot is no longer needed. - virtual const uint64_t GetSnapshot(uint64_t last_sequence = kMaxSequenceNumber); - - // Release a previously acquired snapshot. The caller must not - // use "snapshot" after this call. - virtual void ReleaseSnapshot(uint64_t sequence_number); - - virtual const uint64_t Rollback(uint64_t snapshot_seq, uint64_t rollback_point = kMaxSequenceNumber); - - virtual bool ShouldForceUnloadOnError(); - - // DB implementations can export properties about their state - // via this method. If "property" is a valid property understood by this - // DB implementation, fills "*value" with its current value and returns - // true. Otherwise returns false. - // - // - // Valid property names include: - // - // "leveldb.num-files-at-level" - return the number of files at level , - // where is an ASCII representation of a level number (e.g. "0"). - // "leveldb.stats" - returns a multi-line string that describes statistics - // about the internal operation of the DB. - // "leveldb.sstables" - returns a multi-line string that describes all - // of the sstables that make up the db contents. - virtual bool GetProperty(const Slice& property, std::string* value); - - // For each i in [0,n-1], store in "sizes[i]", the approximate - // file system space used by keys in "[range[i].start .. range[i].limit)". - // - // Note that the returned sizes measure file system space usage, so - // if the user data compresses by a factor of ten, the returned - // sizes will be one-tenth the size of the corresponding user data size. - // - // The results may not include the sizes of recently written data. - virtual void GetApproximateSizes(const Range* range, int n, - uint64_t* sizes); - // tera-specific - // size: db size, include mem, imm, all sst files - // lgsize: each lg size, include all storage - virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize); - - // tera-specific - // result: each level's total file size - virtual void GetCurrentLevelSize(std::vector *result); - - // Compact the underlying storage for the key range [*begin,*end]. - // In particular, deleted and overwritten versions are discarded, - // and the data is rearranged to reduce the cost of operations - // needed to access the data. This operation should typically only - // be invoked by users who understand the underlying implementation. - // - // begin==NULL is treated as a key before all keys in the database. - // end==NULL is treated as a key after all keys in the database. - // Therefore the following call will compact the entire database: - // db->CompactRange(NULL, NULL); - virtual void CompactRange(const Slice* begin, const Slice* end, int lg_no); - - // tera-specific - virtual bool FindSplitKey(double ratio, std::string* split_key); - - virtual bool FindKeyRange(std::string* smallest_key, std::string* largest_key); - - virtual bool MinorCompact(); - - // Add all sst files inherited from other tablets - virtual void AddInheritedLiveFiles(std::vector >* live); - - // Strategy : Always return True begin shutdown1 finished. Else return False - virtual bool IsShutdown1Finished() const; - - // for unit test - Status TEST_CompactMemTable(); - void TEST_CompactRange(int level, const Slice* begin, const Slice* end); - Iterator* TEST_NewInternalIterator(); - int64_t TEST_MaxNextLevelOverlappingBytes(); - -private: - struct RecordWriter; - WriteBatch* GroupWriteBatch(RecordWriter** last_writer); - - Status RecoverLogFile(uint64_t log_number, uint64_t recover_limit, - std::vector* edit_list); - void MaybeIgnoreError(Status* s) const; - Status GatherLogFile(uint64_t begin_num, - std::vector* logfiles); - Status DeleteLogFile(const std::vector& log_numbers); - void DeleteObsoleteFiles(uint64_t seq_no = -1U); - void ArchiveFile(const std::string& filepath); - - // return 0: switch log successed - // return 1: cannot switch log right now - // return 2: can switch but failed - int SwitchLog(bool blocked_switch); - void ScheduleGarbageClean(double score); - static void GarbageCleanWrapper(void* db); - void BackgroundGarbageClean(); - void GarbageClean(); - -private: - State state_; - std::vector lg_list_; - port::Mutex mutex_; - // store not null at shutdown1 start - port::AtomicPointer shutting_down_; - // store not null at shutdown1 finished and waiting for shutdown2 - port::AtomicPointer shutdown1_finished_; - port::CondVar bg_cv_; - port::CondVar bg_cv_timer_; - port::CondVar bg_cv_sleeper_; - const Options options_; - const std::string dbname_; - Env* const env_; - // Lock over the persistent DB state. Non-NULL iff successfully acquired. - FileLock* db_lock_; - bool created_own_lg_list_; - bool created_own_info_log_; - bool created_own_compact_strategy_; - uint64_t commit_snapshot_; - Status fatal_error_; - - WritableFile* logfile_; - log::AsyncWriter* log_; - bool force_switch_log_; - uint64_t last_sequence_; - size_t current_log_size_; - - std::deque writers_; - WriteBatch* tmp_batch_; - - // for GC schedule - bool bg_schedule_gc_; - int64_t bg_schedule_gc_id_; - double bg_schedule_gc_score_; - uint64_t force_clean_log_seq_; + public: + DBTable(const Options& options, const std::string& dbname); + virtual ~DBTable(); + + Status Init(); + virtual Status Shutdown1(); + virtual Status Shutdown2(); + + // Set the database entry for "key" to "value". Returns OK on success, + // and a non-OK status on error. + // Note: consider setting options.sync = true. + virtual Status Put(const WriteOptions& options, const Slice& key, const Slice& value); + + // Remove the database entry (if any) for "key". Returns OK on + // success, and a non-OK status on error. It is not an error if "key" + // did not exist in the database. + // Note: consider setting options.sync = true. + virtual Status Delete(const WriteOptions& options, const Slice& key); + + // Is too busy to write. + virtual bool BusyWrite(); + + virtual void Workload(double* write_workload); + + // Apply the specified updates to the database. + // Returns OK on success, non-OK on failure. + // Note: consider setting options.sync = true. + virtual Status Write(const WriteOptions& options, WriteBatch* updates); + + // If the database contains an entry for "key" store the + // corresponding value in *value and return OK. + // + // If there is no entry for "key" leave *value unchanged and return + // a status for which Status::IsNotFound() returns true. + // + // May return some other Status on an error. + virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value); + + // Return a heap-allocated iterator over the contents of the database. + // The result of NewIterator() is initially invalid (caller must + // call one of the Seek methods on the iterator before using it). + // + // Caller should delete the iterator when it is no longer needed. + // The returned iterator should be deleted before this db is deleted. + virtual Iterator* NewIterator(const ReadOptions& options); + + // Return a handle to the current DB state. Iterators created with + // this handle will all observe a stable snapshot of the current DB + // state. The caller must call ReleaseSnapshot(result) when the + // snapshot is no longer needed. + virtual const uint64_t GetSnapshot(uint64_t last_sequence = kMaxSequenceNumber); + + // Release a previously acquired snapshot. The caller must not + // use "snapshot" after this call. + virtual void ReleaseSnapshot(uint64_t sequence_number); + + virtual const uint64_t Rollback(uint64_t snapshot_seq, + uint64_t rollback_point = kMaxSequenceNumber); + + virtual bool ShouldForceUnloadOnError(); + + // DB implementations can export properties about their state + // via this method. If "property" is a valid property understood by this + // DB implementation, fills "*value" with its current value and returns + // true. Otherwise returns false. + // + // + // Valid property names include: + // + // "leveldb.num-files-at-level" - return the number of files at level , + // where is an ASCII representation of a level number (e.g. "0"). + // "leveldb.stats" - returns a multi-line string that describes statistics + // about the internal operation of the DB. + // "leveldb.sstables" - returns a multi-line string that describes all + // of the sstables that make up the db contents. + virtual bool GetProperty(const Slice& property, std::string* value); + + // For each i in [0,n-1], store in "sizes[i]", the approximate + // file system space used by keys in "[range[i].start .. range[i].limit)". + // + // Note that the returned sizes measure file system space usage, so + // if the user data compresses by a factor of ten, the returned + // sizes will be one-tenth the size of the corresponding user data size. + // + // The results may not include the sizes of recently written data. + virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); + // tera-specific + // size: db size, include mem, imm, all sst files + // lgsize: each lg size, include all storage + // mem_table_size: memtable's size, for analyzing memory usage. + virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL, + uint64_t* mem_table_size = NULL); + + // tera-specific + // result: each level's total file size + virtual void GetCurrentLevelSize(std::vector* result); + + // Compact the underlying storage for the key range [*begin,*end]. + // In particular, deleted and overwritten versions are discarded, + // and the data is rearranged to reduce the cost of operations + // needed to access the data. This operation should typically only + // be invoked by users who understand the underlying implementation. + // + // begin==NULL is treated as a key before all keys in the database. + // end==NULL is treated as a key after all keys in the database. + // Therefore the following call will compact the entire database: + // db->CompactRange(NULL, NULL); + virtual void CompactRange(const Slice* begin, const Slice* end, int lg_no); + + // tera-specific + virtual bool FindSplitKey(double ratio, std::string* split_key); + + virtual bool FindKeyRange(std::string* smallest_key, std::string* largest_key); + + virtual bool MinorCompact(); + + // Add all sst files inherited from other tablets + virtual void AddInheritedLiveFiles(std::vector >* live); + + // Strategy : Always return True begin shutdown1 finished. Else return False + virtual bool IsShutdown1Finished() const; + + // for unit test + Status TEST_CompactMemTable(); + void TEST_CompactRange(int level, const Slice* begin, const Slice* end); + Iterator* TEST_NewInternalIterator(); + int64_t TEST_MaxNextLevelOverlappingBytes(); + + private: + struct RecordWriter; + WriteBatch* GroupWriteBatch(RecordWriter** last_writer); + + Status RecoverLogFile(uint64_t log_number, uint64_t recover_limit, + std::vector* edit_list); + void MaybeIgnoreError(Status* s) const; + Status GatherLogFile(uint64_t begin_num, std::vector* logfiles); + Status DeleteLogFile(const std::vector& log_numbers); + void DeleteObsoleteFiles(uint64_t seq_no = -1U); + void ArchiveFile(const std::string& filepath); + + // return 0: switch log successed + // return 1: cannot switch log right now + // return 2: can switch but failed + int SwitchLog(bool blocked_switch); + void ScheduleGarbageClean(double score); + static void GarbageCleanWrapper(void* db); + void BackgroundGarbageClean(); + void GarbageClean(); + + private: + State state_; + std::vector lg_list_; + port::Mutex mutex_; + // store not null at shutdown1 start + port::AtomicPointer shutting_down_; + // store not null at shutdown1 finished and waiting for shutdown2 + port::AtomicPointer shutdown1_finished_; + port::CondVar bg_cv_; + port::CondVar bg_cv_timer_; + port::CondVar bg_cv_sleeper_; + const Options options_; + const std::string dbname_; + Env* const env_; + // Lock over the persistent DB state. Non-NULL iff successfully acquired. + FileLock* db_lock_; + bool created_own_lg_list_; + bool created_own_info_log_; + bool created_own_compact_strategy_; + uint64_t commit_snapshot_; + Status fatal_error_; + + WritableFile* logfile_; + log::AsyncWriter* log_; + bool force_switch_log_; + uint64_t last_sequence_; + size_t current_log_size_; + + std::deque writers_; + WriteBatch* tmp_batch_; + + // for GC schedule + bool bg_schedule_gc_; + int64_t bg_schedule_gc_id_; + double bg_schedule_gc_score_; + uint64_t force_clean_log_seq_; }; } // namespace leveldb diff --git a/src/leveldb/db/db_test.cc b/src/leveldb/db/db_test.cc index 55a5fda8f..d8df40157 100644 --- a/src/leveldb/db/db_test.cc +++ b/src/leveldb/db/db_test.cc @@ -37,11 +37,10 @@ class AtomicCounter { private: port::Mutex mu_; int count_; + public: - AtomicCounter() : count_(0) { } - void Increment() { - IncrementBy(1); - } + AtomicCounter() : count_(0) {} + void Increment() { IncrementBy(1); } void IncrementBy(int count) { MutexLock l(&mu_); count_ += count; @@ -56,9 +55,7 @@ class AtomicCounter { } }; -void DelayMilliseconds(int millis) { - Env::Default()->SleepForMicroseconds(millis * 1000); -} +void DelayMilliseconds(int millis) { Env::Default()->SleepForMicroseconds(millis * 1000); } } // Special Env used to delay background operations @@ -78,7 +75,7 @@ class SpecialEnv : public EnvWrapper { // Force write to manifest files to fail while this pointer is non-NULL port::AtomicPointer manifest_write_error_; - + bool count_random_reads_; AtomicCounter random_read_counter_; @@ -104,10 +101,7 @@ class SpecialEnv : public EnvWrapper { WritableFile* base_; public: - InitLoadLockFile(SpecialEnv* env, WritableFile* base) - : env_(env), - base_(base) { - } + InitLoadLockFile(SpecialEnv* env, WritableFile* base) : env_(env), base_(base) {} ~InitLoadLockFile() { delete base_; } Status Append(const Slice& data) { return base_->Append(data); } Status Close() { return base_->Close(); } @@ -121,10 +115,7 @@ class SpecialEnv : public EnvWrapper { WritableFile* base_; public: - SSTableFile(SpecialEnv* env, WritableFile* base) - : env_(env), - base_(base) { - } + SSTableFile(SpecialEnv* env, WritableFile* base) : env_(env), base_(base) {} ~SSTableFile() { delete base_; } Status Append(const Slice& data) { if (env_->no_space_.Acquire_Load() != NULL) { @@ -147,9 +138,9 @@ class SpecialEnv : public EnvWrapper { private: SpecialEnv* env_; WritableFile* base_; + public: - ManifestFile(SpecialEnv* env, WritableFile* b) - : env_(env), base_(b) { } + ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) {} ~ManifestFile() { delete base_; } Status Append(const Slice& data) { env_->write_retry_c_.Increment(); @@ -164,8 +155,7 @@ class SpecialEnv : public EnvWrapper { Status Flush() { return base_->Flush(); } Status Sync() { env_->sync_retry_c_.Increment(); - if (env_->manifest_sync_error_.Acquire_Load() != NULL && - env_->sync_retry_c_.Read() < 10) { + if (env_->manifest_sync_error_.Acquire_Load() != NULL && env_->sync_retry_c_.Read() < 10) { return Status::IOError("simulated sync error"); } else { return base_->Sync(); @@ -196,13 +186,12 @@ class SpecialEnv : public EnvWrapper { private: RandomAccessFile* target_; AtomicCounter* counter_; + public: CountingFile(RandomAccessFile* target, AtomicCounter* counter) - : target_(target), counter_(counter) { - } + : target_(target), counter_(counter) {} virtual ~CountingFile() { delete target_; } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { counter_->Increment(); return target_->Read(offset, n, result, scratch); } @@ -219,7 +208,6 @@ class SpecialEnv : public EnvWrapper { sleep_counter_.Increment(); sleep_time_counter_.IncrementBy(micros); } - }; class DBTest { @@ -227,12 +215,7 @@ class DBTest { const FilterPolicy* filter_policy_; // Sequence of option configurations to try - enum OptionConfig { - kDefault, - kFilter, - kUncompressed, - kEnd - }; + enum OptionConfig { kDefault, kFilter, kUncompressed, kEnd }; int option_config_; public: @@ -242,8 +225,7 @@ class DBTest { Options last_options_; - DBTest() : option_config_(kDefault), - env_(new SpecialEnv(Env::Default())) { + DBTest() : option_config_(kDefault), env_(new SpecialEnv(Env::Default())) { filter_policy_ = NewBloomFilterPolicy(10); dbname_ = test::TmpDir() + "/db_test/tablet00000012"; DestroyDB(dbname_, Options()); @@ -275,7 +257,7 @@ class DBTest { Options options; options.dump_mem_on_shutdown = false; Logger* logger; - Env::Default()->NewLogger("/tmp/db_test.log", &logger); + Env::Default()->NewLogger("/tmp/db_test.log", LogOption::LogOptionBuilder().Build(), &logger); Env::Default()->SetLogger(logger); options.info_log = logger; switch (option_config_) { @@ -295,7 +277,8 @@ class DBTest { Options options; options.dump_mem_on_shutdown = false; Logger* logger; - Env::Default()->NewLogger("/tmp/db_test_split.log", &logger); + Env::Default()->NewLogger("/tmp/db_test_split.log", LogOption::LogOptionBuilder().Build(), + &logger); Env::Default()->SetLogger(logger); options.info_log = logger; switch (option_config_) { @@ -317,13 +300,9 @@ class DBTest { return options; } - DBTable* dbfull() { - return reinterpret_cast(db_); - } + DBTable* dbfull() { return reinterpret_cast(db_); } - void Reopen(Options* options = NULL) { - ASSERT_OK(TryReopen(options)); - } + void Reopen(Options* options = NULL) { ASSERT_OK(TryReopen(options)); } void Close() { delete db_; @@ -351,13 +330,9 @@ class DBTest { return DB::Open(opts, dbname_, &db_); } - Status Put(const std::string& k, const std::string& v) { - return db_->Put(WriteOptions(), k, v); - } + Status Put(const std::string& k, const std::string& v) { return db_->Put(WriteOptions(), k, v); } - Status Delete(const std::string& k) { - return db_->Delete(WriteOptions(), k); - } + Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); } std::string Get(const std::string& k, const uint64_t snapshot = leveldb::kMaxSequenceNumber) { ReadOptions options; @@ -443,9 +418,7 @@ class DBTest { int NumTableFilesAtLevel(int level) { std::string property; - ASSERT_TRUE( - db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level), - &property)); + ASSERT_TRUE(db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level), &property)); return atoi(property.c_str()); } @@ -487,9 +460,7 @@ class DBTest { return size; } - void Compact(const Slice& start, const Slice& limit) { - db_->CompactRange(&start, &limit); - } + void Compact(const Slice& start, const Slice& limit) { db_->CompactRange(&start, &limit); } // Do n memtable compactions, each of which produces an sstable // covering the range [small,large]. @@ -510,8 +481,7 @@ class DBTest { void DumpFileCounts(const char* label) { fprintf(stderr, "---\n%s:\n", label); fprintf(stderr, "maxoverlap: %lld\n", - static_cast( - dbfull()->TEST_MaxNextLevelOverlappingBytes())); + static_cast(dbfull()->TEST_MaxNextLevelOverlappingBytes())); for (int level = 0; level < config::kNumLevels; level++) { int num = NumTableFilesAtLevel(level); if (num > 0) { @@ -591,11 +561,11 @@ TEST(DBTest, GetFromImmutableLayer) { ASSERT_OK(Put("foo", "v1")); ASSERT_EQ("v1", Get("foo")); - env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls - Put("k1", std::string(100000, 'x')); // Fill memtable - Put("k2", std::string(100000, 'y')); // Trigger compaction + env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls + Put("k1", std::string(100000, 'x')); // Fill memtable + Put("k2", std::string(100000, 'y')); // Trigger compaction ASSERT_EQ("v1", Get("foo")); - env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls + env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls } while (ChangeOptions()); } @@ -679,8 +649,7 @@ TEST(DBTest, GetEncountersEmptyLevel) { // Step 1: First place sstables in levels 0 and 2 int compaction_count = 0; - while (NumTableFilesAtLevel(0) == 0 || - NumTableFilesAtLevel(2) == 0) { + while (NumTableFilesAtLevel(0) == 0 || NumTableFilesAtLevel(2) == 0) { ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2"; compaction_count++; Put("a", "begin"); @@ -817,10 +786,10 @@ TEST(DBTest, IterMulti) { ASSERT_EQ(IterStatus(iter), "b->vb"); // Make sure iter stays at snapshot - ASSERT_OK(Put("a", "va2")); + ASSERT_OK(Put("a", "va2")); ASSERT_OK(Put("a2", "va3")); - ASSERT_OK(Put("b", "vb2")); - ASSERT_OK(Put("c", "vc2")); + ASSERT_OK(Put("b", "vb2")); + ASSERT_OK(Put("c", "vc2")); ASSERT_OK(Delete("b")); iter->SeekToFirst(); ASSERT_EQ(IterStatus(iter), "a->va"); @@ -929,7 +898,7 @@ TEST(DBTest, RecoverWithLostCurrent0) { Options current_opt = CurrentOptions(); current_opt.dump_mem_on_shutdown = true; Reopen(¤t_opt); - Compact("1","6"); + Compact("1", "6"); Close(); std::string old_dbname = dbname_; dbname_ = dbname_.replace(dbname_.length() - 2, 1, "3"); @@ -950,7 +919,7 @@ TEST(DBTest, RecoverWithLostCurrent0) { } TEST(DBTest, RecoverWithLostCurrent1) { - // before write anything delete current file + // before write anything delete current file ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_ + "/0"))); leveldb::WritableFile* lock_file; ASSERT_OK(env_->NewWritableFile(dbname_ + "/0/__init_load_filelock", &lock_file, EnvOptions())); @@ -967,7 +936,7 @@ TEST(DBTest, RecoverWithLostCurrent1) { } TEST(DBTest, RecoverWithLostManifest) { - // before write anything delete current file + // before write anything delete current file ASSERT_OK(env_->DeleteFile(DescriptorFileName(dbname_ + "/0", 1))); leveldb::WritableFile* lock_file; ASSERT_OK(env_->NewWritableFile(dbname_ + "/0/__init_load_filelock", &lock_file, EnvOptions())); @@ -1081,7 +1050,7 @@ TEST(DBTest, RecoverWithLargeLog) { TEST(DBTest, CompactionsGenerateMultipleFiles) { Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer + options.write_buffer_size = 100000000; // Large write buffer Reopen(&options); Random rnd(301); @@ -1099,13 +1068,13 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) { dbfull()->TEST_CompactRange(0, NULL, NULL); ASSERT_EQ(NumTableFilesAtLevel(0), 0); -// ASSERT_GT(NumTableFilesAtLevel(1), 1); + // ASSERT_GT(NumTableFilesAtLevel(1), 1); for (int i = 0; i < 80; i++) { ASSERT_EQ(Get(Key(i)), values[i]); } } -#if 0 // config::kL0_StopWritesTrigger is changed +#if 0 // config::kL0_StopWritesTrigger is changed TEST(DBTest, RepeatedWritesToSameKey) { Options options = CurrentOptions(); options.env = env_; @@ -1152,27 +1121,25 @@ TEST(DBTest, SparseMerge) { dbfull()->TEST_CompactRange(0, NULL, NULL); // Make sparse update - Put("A", "va2"); + Put("A", "va2"); Put("B100", "bvalue2"); - Put("C", "vc2"); + Put("C", "vc2"); dbfull()->TEST_CompactMemTable(); // Compactions should not cause us to create a situation where // a file overlaps too much data at the next level. - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); + ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576); dbfull()->TEST_CompactRange(0, NULL, NULL); - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); + ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576); dbfull()->TEST_CompactRange(1, NULL, NULL); - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); + ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20 * 1048576); } static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), - (unsigned long long)(low), - (unsigned long long)(high)); + fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", (unsigned long long)(val), + (unsigned long long)(low), (unsigned long long)(high)); } return result; } @@ -1180,7 +1147,7 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) { TEST(DBTest, ApproximateSizes) { do { Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer + options.write_buffer_size = 100000000; // Large write buffer options.compression = kNoCompression; DestroyAndReopen(); @@ -1207,13 +1174,13 @@ TEST(DBTest, ApproximateSizes) { for (int compact_start = 0; compact_start < N; compact_start += 10) { for (int i = 0; i < N; i += 10) { - ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i)); - ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1))); - ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10)) + ASSERT_TRUE(Between(Size("", Key(i)), S1 * i, S2 * i)); + ASSERT_TRUE(Between(Size("", Key(i) + ".suffix"), S1 * (i + 1), S2 * (i + 1))); + ASSERT_TRUE(Between(Size(Key(i), Key(i + 10)), S1 * 10, S2 * 10)) << "[" << run << ", " << compact_start << ", " << i << "]"; } - ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50)); - ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50)); + ASSERT_TRUE(Between(Size("", Key(50)), S1 * 50, S2 * 50)); + ASSERT_TRUE(Between(Size("", Key(50) + ".suffix"), S1 * 50, S2 * 50)); std::string cstart_str = Key(compact_start); std::string cend_str = Key(compact_start + 9); @@ -1275,7 +1242,7 @@ TEST(DBTest, IteratorPinsRef) { // Write to force compactions Put("foo", "newvalue1"); for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values + ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values } Put("foo", "newvalue2"); @@ -1327,7 +1294,7 @@ TEST(DBTest, HiddenValuesAreRemoved) { Put("pastfoo", "v"); uint64_t snapshot = db_->GetSnapshot(); Put("foo", "tiny"); - Put("pastfoo2", "v2"); // Advance sequence number one more + Put("pastfoo2", "v2"); // Advance sequence number one more ASSERT_OK(dbfull()->TEST_CompactMemTable()); // tera-leveldb:kL0_CompactionTrigger == 2, compact will happen @@ -1355,14 +1322,14 @@ TEST(DBTest, DeletionMarkers1) { Put("foo", "v1"); ASSERT_OK(dbfull()->TEST_CompactMemTable()); const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level + ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level // Place a table at level last-1 to prevent merging with preceding mutation Put("a", "begin"); Put("z", "end"); dbfull()->TEST_CompactMemTable(); ASSERT_EQ(NumTableFilesAtLevel(last), 1); - ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); + ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1); Delete("foo"); Put("foo", "v2"); @@ -1370,11 +1337,11 @@ TEST(DBTest, DeletionMarkers1) { ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); Slice z("z"); - dbfull()->TEST_CompactRange(last-2, NULL, &z); + dbfull()->TEST_CompactRange(last - 2, NULL, &z); // DEL eliminated, but v1 remains because we aren't compacting that level // (DEL can be eliminated because v2 hides v1). ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]"); - dbfull()->TEST_CompactRange(last-1, NULL, NULL); + dbfull()->TEST_CompactRange(last - 1, NULL, NULL); // Merging last-1 w/ last, so we are the base level for "foo", so // DEL is removed. (as is v1). ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]"); @@ -1384,26 +1351,26 @@ TEST(DBTest, DeletionMarkers2) { Put("foo", "v1"); ASSERT_OK(dbfull()->TEST_CompactMemTable()); const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level + ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level // Place a table at level last-1 to prevent merging with preceding mutation Put("a", "begin"); Put("z", "end"); dbfull()->TEST_CompactMemTable(); ASSERT_EQ(NumTableFilesAtLevel(last), 1); - ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); + ASSERT_EQ(NumTableFilesAtLevel(last - 1), 1); Delete("foo"); ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); - dbfull()->TEST_CompactRange(last-2, NULL, NULL); - sleep(3); // del compaction stragety will be auto trigger. + dbfull()->TEST_CompactRange(last - 2, NULL, NULL); + sleep(3); // del compaction stragety will be auto trigger. // DEL kept: "last" file overlaps - //ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); + // ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); - dbfull()->TEST_CompactRange(last-1, NULL, NULL); + dbfull()->TEST_CompactRange(last - 1, NULL, NULL); // Merging last-1 w/ last, so we are the base level for "foo", so // DEL is removed. (as is v1). ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); @@ -1471,21 +1438,21 @@ TEST(DBTest, L0_CompactionBug_Issue44_a) { TEST(DBTest, L0_CompactionBug_Issue44_b) { Reopen(); - Put("",""); + Put("", ""); Reopen(); Delete("e"); - Put("",""); + Put("", ""); Reopen(); Put("c", "cv"); Reopen(); - Put("",""); + Put("", ""); Reopen(); - Put("",""); + Put("", ""); DelayMilliseconds(1000); // Wait for compaction to finish Reopen(); - Put("d","dv"); + Put("d", "dv"); Reopen(); - Put("",""); + Put("", ""); Reopen(); Delete("d"); Delete("b"); @@ -1514,40 +1481,36 @@ TEST(DBTest, ComparatorCheck) { new_options.comparator = &cmp; Status s = TryReopen(&new_options); ASSERT_TRUE(!s.ok()); - ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) - << s.ToString(); + ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) << s.ToString(); } TEST(DBTest, CustomComparator) { class NumberComparator : public Comparator { public: virtual const char* Name() const { return "test.NumberComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - return ToNumber(a) - ToNumber(b); - } + virtual int Compare(const Slice& a, const Slice& b) const { return ToNumber(a) - ToNumber(b); } virtual void FindShortestSeparator(std::string* s, const Slice& l) const { - ToNumber(*s); // Check format - ToNumber(l); // Check format + ToNumber(*s); // Check format + ToNumber(l); // Check format } virtual void FindShortSuccessor(std::string* key) const { - ToNumber(*key); // Check format + ToNumber(*key); // Check format } + private: static int ToNumber(const Slice& x) { // Check that there are no extra characters. - ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']') - << EscapeString(x); + ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']') << EscapeString(x); int val; char ignored; - ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) - << EscapeString(x); + ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) << EscapeString(x); return val; } }; NumberComparator cmp; Options new_options = CurrentOptions(); new_options.comparator = &cmp; - new_options.filter_policy = NULL; // Cannot use bloom filters + new_options.filter_policy = NULL; // Cannot use bloom filters new_options.write_buffer_size = 1000; // Compact more often DestroyAndReopen(&new_options); ASSERT_OK(Put("[10]", "ten")); @@ -1565,7 +1528,7 @@ TEST(DBTest, CustomComparator) { for (int run = 0; run < 2; run++) { for (int i = 0; i < 1000; i++) { char buf[100]; - snprintf(buf, sizeof(buf), "[%d]", i*10); + snprintf(buf, sizeof(buf), "[%d]", i * 10); ASSERT_OK(Put(buf, buf)); } Compact("[0]", "[1000000]"); @@ -1636,10 +1599,10 @@ TEST(DBTest, NoSpace) { ASSERT_EQ("v1", Get("foo")); Compact("a", "z"); const int num_files = CountFiles(); - env_->no_space_.Release_Store(env_); // Force out-of-space errors + env_->no_space_.Release_Store(env_); // Force out-of-space errors env_->sleep_counter_.Reset(); for (int i = 0; i < 5; i++) { - for (int level = 0; level < config::kNumLevels-1; level++) { + for (int level = 0; level < config::kNumLevels - 1; level++) { dbfull()->TEST_CompactRange(level, NULL, NULL); } } @@ -1704,9 +1667,8 @@ TEST(DBTest, ManifestWriteError) { // We iterate twice. In the second iteration, everything is the // same except the log record never makes it to the MANIFEST file. for (int iter = 0; iter < 2; iter++) { - port::AtomicPointer* error_type = (iter == 0) - ? &env_->manifest_sync_error_ - : &env_->manifest_write_error_; + port::AtomicPointer* error_type = + (iter == 0) ? &env_->manifest_sync_error_ : &env_->manifest_write_error_; // Insert foo=>bar mapping Options options = CurrentOptions(); @@ -1720,7 +1682,7 @@ TEST(DBTest, ManifestWriteError) { dbfull()->TEST_CompactMemTable(); ASSERT_EQ("bar", Get("foo")); const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level + ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level // Merging compaction (will fail) error_type->Release_Store(env_); @@ -1748,7 +1710,7 @@ TEST(DBTest, MissingSSTFile) { options.paranoid_checks = true; Status s = TryReopen(&options); ASSERT_TRUE(!s.ok()); - //ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) + // ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) // << s.ToString(); } @@ -1793,7 +1755,7 @@ TEST(DBTest, BloomFilter) { int reads = env_->random_read_counter_.Read(); fprintf(stderr, "%d present => %d reads\n", N, reads); ASSERT_GE(reads, N); - ASSERT_LE(reads, N + 2*N/100); + ASSERT_LE(reads, N + 2 * N / 100); // Lookup present keys. Should rarely read from either sstable. env_->random_read_counter_.Reset(); @@ -1802,7 +1764,7 @@ TEST(DBTest, BloomFilter) { } reads = env_->random_read_counter_.Read(); fprintf(stderr, "%d missing => %d reads\n", N, reads); - ASSERT_LE(reads, 3*N/100); + ASSERT_LE(reads, 3 * N / 100); env_->delay_sstable_sync_.Release_Store(NULL); Close(); @@ -1848,8 +1810,7 @@ static void MTThreadBody(void* arg) { if (rnd.OneIn(2)) { // Write values of the form . // We add some padding for force compactions. - snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", - key, id, static_cast(counter)); + snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", key, id, static_cast(counter)); ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf))); } else { // Read a value and verify that it matches the pattern written above. @@ -1865,8 +1826,7 @@ static void MTThreadBody(void* arg) { ASSERT_EQ(k, key); ASSERT_GE(w, 0); ASSERT_LT(w, kNumThreads); - ASSERT_LE(c, reinterpret_cast( - t->state->counter[w].Acquire_Load())); + ASSERT_LE(c, reinterpret_cast(t->state->counter[w].Acquire_Load())); } } counter++; @@ -1913,26 +1873,23 @@ namespace { typedef std::map KVMap; } -class ModelDB: public DB { +class ModelDB : public DB { public: class ModelSnapshot { public: KVMap map_; }; - explicit ModelDB(const Options& options): options_(options), snapshot_id_(0) { } - ~ModelDB() { } + explicit ModelDB(const Options& options) : options_(options), snapshot_id_(0) {} + ~ModelDB() {} virtual Status Shutdown1() { return Status(); } virtual Status Shutdown2() { return Status(); } virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) { return DB::Put(o, k, v); } - virtual Status Delete(const WriteOptions& o, const Slice& key) { - return DB::Delete(o, key); - } - virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) { - assert(false); // Not implemented + virtual Status Delete(const WriteOptions& o, const Slice& key) { return DB::Delete(o, key); } + virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) { + assert(false); // Not implemented return Status::NotFound(key); } virtual Iterator* NewIterator(const ReadOptions& options) { @@ -1942,7 +1899,7 @@ class ModelDB: public DB { return new ModelIter(saved, true); } else { std::multimap::iterator it = snapshots_.find(options.snapshot); - assert (it != snapshots_.end()); + assert(it != snapshots_.end()); const KVMap* snapshot_state = &(it->second->map_); return new ModelIter(snapshot_state, false); } @@ -1957,18 +1914,17 @@ class ModelDB: public DB { virtual void ReleaseSnapshot(uint64_t snapshot) { std::multimap::iterator it = snapshots_.find(snapshot); if (it != snapshots_.end()) { - delete it->second; - snapshots_.erase(it); + delete it->second; + snapshots_.erase(it); } } - virtual const uint64_t Rollback(uint64_t snapshot_seq, uint64_t rollback_point = kMaxSequenceNumber) { - // TODO - return 0; - } - virtual bool BusyWrite() { - return false; + virtual const uint64_t Rollback(uint64_t snapshot_seq, + uint64_t rollback_point = kMaxSequenceNumber) { + // TODO + return 0; } + virtual bool BusyWrite() { return false; } virtual void Workload(double* write_workload) {} virtual Status Write(const WriteOptions& options, WriteBatch* batch) { @@ -1978,60 +1934,44 @@ class ModelDB: public DB { virtual void Put(const Slice& key, const Slice& value) { (*map_)[key.ToString()] = value.ToString(); } - virtual void Delete(const Slice& key) { - map_->erase(key.ToString()); - } + virtual void Delete(const Slice& key) { map_->erase(key.ToString()); } }; Handler handler; handler.map_ = &map_; return batch->Iterate(&handler); } - virtual bool GetProperty(const Slice& property, std::string* value) { - return false; - } + virtual bool GetProperty(const Slice& property, std::string* value) { return false; } virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) { for (int i = 0; i < n; i++) { sizes[i] = 0; } } - virtual void GetApproximateSizes(uint64_t* size, - std::vector* lgsize = NULL) { - } + virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL, + uint64_t* mem_size = NULL) {} - virtual void CompactRange(const Slice* start, const Slice* end, int lg_no) { - } + virtual void CompactRange(const Slice* start, const Slice* end, int lg_no) {} - virtual bool FindSplitKey(double ratio, - std::string* split_key) { - return false; - } + virtual bool FindSplitKey(double ratio, std::string* split_key) { return false; } - virtual bool FindKeyRange(std::string* smallest_key, std::string* largest_key) { - return false; - } + virtual bool FindKeyRange(std::string* smallest_key, std::string* largest_key) { return false; } virtual void GetCurrentLevelSize(std::vector* result) {} - virtual uint64_t GetScopeSize(const std::string& start_key, - const std::string& end_key, + virtual uint64_t GetScopeSize(const std::string& start_key, const std::string& end_key, std::vector* lgsize = NULL) { - return 0; + return 0; } - virtual bool MinorCompact() { - return false; - } + virtual bool MinorCompact() { return false; } virtual void CompactMissFiles(const Slice* begin, const Slice* end) {} virtual void AddInheritedLiveFiles(std::vector >* live) {} private: - class ModelIter: public Iterator { + class ModelIter : public Iterator { public: - ModelIter(const KVMap* map, bool owned) - : map_(map), owned_(owned), iter_(map_->end()) { - } + ModelIter(const KVMap* map, bool owned) : map_(map), owned_(owned), iter_(map_->end()) {} ~ModelIter() { if (owned_) delete map_; } @@ -2044,14 +1984,13 @@ class ModelDB: public DB { iter_ = map_->find(map_->rbegin()->first); } } - virtual void Seek(const Slice& k) { - iter_ = map_->lower_bound(k.ToString()); - } + virtual void Seek(const Slice& k) { iter_ = map_->lower_bound(k.ToString()); } virtual void Next() { ++iter_; } virtual void Prev() { --iter_; } virtual Slice key() const { return iter_->first; } virtual Slice value() const { return iter_->second; } virtual Status status() const { return Status::OK(); } + private: const KVMap* const map_; const bool owned_; // Do we own map_ @@ -2064,17 +2003,12 @@ class ModelDB: public DB { }; static std::string RandomKey(Random* rnd) { - int len = (rnd->OneIn(3) - ? 1 // Short sometimes to encourage collisions - : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); + int len = (rnd->OneIn(3) ? 1 // Short sometimes to encourage collisions + : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); return test::RandomKey(rnd, len); } -static bool CompareIterators(int step, - DB* model, - DB* db, - uint64_t model_snap, - uint64_t db_snap) { +static bool CompareIterators(int step, DB* model, DB* db, uint64_t model_snap, uint64_t db_snap) { ReadOptions options; options.snapshot = model_snap; Iterator* miter = model->NewIterator(options); @@ -2082,25 +2016,19 @@ static bool CompareIterators(int step, Iterator* dbiter = db->NewIterator(options); bool ok = true; int count = 0; - for (miter->SeekToFirst(), dbiter->SeekToFirst(); - ok && miter->Valid() && dbiter->Valid(); + for (miter->SeekToFirst(), dbiter->SeekToFirst(); ok && miter->Valid() && dbiter->Valid(); miter->Next(), dbiter->Next()) { count++; if (miter->key().compare(dbiter->key()) != 0) { - fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s' [count: %d]\n", - step, - EscapeString(miter->key()).c_str(), - EscapeString(dbiter->key()).c_str(), - count); + fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s' [count: %d]\n", step, + EscapeString(miter->key()).c_str(), EscapeString(dbiter->key()).c_str(), count); ok = false; break; } if (miter->value().compare(dbiter->value()) != 0) { - fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", - step, - EscapeString(miter->key()).c_str(), - EscapeString(miter->value()).c_str(), + fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", step, + EscapeString(miter->key()).c_str(), EscapeString(miter->value()).c_str(), EscapeString(miter->value()).c_str()); ok = false; } @@ -2108,8 +2036,8 @@ static bool CompareIterators(int step, if (ok) { if (miter->Valid() != dbiter->Valid()) { - fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", - step, miter->Valid(), dbiter->Valid()); + fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", step, miter->Valid(), + dbiter->Valid()); ok = false; } } @@ -2133,21 +2061,18 @@ TEST(DBTest, Randomized) { } // TODO(sanjay): Test Get() works int p = rnd.Uniform(100); - if (p < 45) { // Put + if (p < 45) { // Put k = RandomKey(&rnd); - v = RandomString(&rnd, - rnd.OneIn(20) - ? 100 + rnd.Uniform(100) - : rnd.Uniform(8)); + v = RandomString(&rnd, rnd.OneIn(20) ? 100 + rnd.Uniform(100) : rnd.Uniform(8)); ASSERT_OK(model.Put(WriteOptions(), k, v)); ASSERT_OK(db_->Put(WriteOptions(), k, v)); - } else if (p < 90) { // Delete + } else if (p < 90) { // Delete k = RandomKey(&rnd); ASSERT_OK(model.Delete(WriteOptions(), k)); ASSERT_OK(db_->Delete(WriteOptions(), k)); - } else { // Multi-element batch + } else { // Multi-element batch WriteBatch b; const int num = rnd.Uniform(8); for (int i = 0; i < num; i++) { @@ -2169,8 +2094,8 @@ TEST(DBTest, Randomized) { } if ((step % 100) == 0) { - ASSERT_TRUE(CompareIterators(step, &model, db_, - leveldb::kMaxSequenceNumber, leveldb::kMaxSequenceNumber)); + ASSERT_TRUE(CompareIterators(step, &model, db_, leveldb::kMaxSequenceNumber, + leveldb::kMaxSequenceNumber)); ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); // Save a snapshot from each DB this time that we'll use next // time we compare things, to make sure the current state is @@ -2179,8 +2104,8 @@ TEST(DBTest, Randomized) { if (db_snap != leveldb::kMaxSequenceNumber) db_->ReleaseSnapshot(db_snap); Reopen(); - ASSERT_TRUE(CompareIterators(step, &model, db_, - leveldb::kMaxSequenceNumber, leveldb::kMaxSequenceNumber)); + ASSERT_TRUE(CompareIterators(step, &model, db_, leveldb::kMaxSequenceNumber, + leveldb::kMaxSequenceNumber)); model_snap = model.GetSnapshot(); db_snap = db_->GetSnapshot(); @@ -2279,8 +2204,8 @@ void BM_LogAndApply(int iters, int num_base_files) { VersionEdit vbase; uint64_t fnum = 1; for (int i = 0; i < num_base_files; i++) { - InternalKey start(MakeKey(2*fnum), 1, kTypeValue); - InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); + InternalKey start(MakeKey(2 * fnum), 1, kTypeValue); + InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion); vbase.AddFile(2, fnum++, 1 /* file size */, start, limit); } ASSERT_OK(vset.LogAndApply(&vbase, &mu)); @@ -2290,8 +2215,8 @@ void BM_LogAndApply(int iters, int num_base_files) { for (int i = 0; i < iters; i++) { VersionEdit vedit; vedit.DeleteFile(2, fnum); - InternalKey start(MakeKey(2*fnum), 1, kTypeValue); - InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); + InternalKey start(MakeKey(2 * fnum), 1, kTypeValue); + InternalKey limit(MakeKey(2 * fnum + 1), 1, kTypeDeletion); vedit.AddFile(2, fnum++, 1 /* file size */, start, limit); vset.LogAndApply(&vedit, &mu); } @@ -2299,9 +2224,8 @@ void BM_LogAndApply(int iters, int num_base_files) { unsigned int us = stop_micros - start_micros; char buf[16]; snprintf(buf, sizeof(buf), "%d", num_base_files); - fprintf(stderr, - "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n", - buf, iters, us, ((float)us) / iters); + fprintf(stderr, "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n", buf, iters, us, + ((float)us) / iters); } TEST(DBTest, FindKeyRange) { diff --git a/src/leveldb/db/dbformat.cc b/src/leveldb/db/dbformat.cc index 68731d667..9f024b29b 100644 --- a/src/leveldb/db/dbformat.cc +++ b/src/leveldb/db/dbformat.cc @@ -26,9 +26,7 @@ void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { std::string ParsedInternalKey::DebugString() const { char buf[50]; - snprintf(buf, sizeof(buf), "' @ %llu : %d", - (unsigned long long) sequence, - int(type)); + snprintf(buf, sizeof(buf), "' @ %llu : %d", (unsigned long long)sequence, int(type)); std::string result = "'"; result += EscapeString(user_key.ToString()); result += buf; @@ -47,9 +45,7 @@ std::string InternalKey::DebugString() const { return result; } -const char* InternalKeyComparator::Name() const { - return "leveldb.InternalKeyComparator"; -} +const char* InternalKeyComparator::Name() const { return "leveldb.InternalKeyComparator"; } int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { // Order by: @@ -69,19 +65,16 @@ int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { return r; } -void InternalKeyComparator::FindShortestSeparator( - std::string* start, - const Slice& limit) const { +void InternalKeyComparator::FindShortestSeparator(std::string* start, const Slice& limit) const { // Attempt to shorten the user portion of the key Slice user_start = ExtractUserKey(*start); Slice user_limit = ExtractUserKey(limit); std::string tmp(user_start.data(), user_start.size()); user_comparator_->FindShortestSeparator(&tmp, user_limit); - if (tmp.size() < user_start.size() && - user_comparator_->Compare(user_start, tmp) < 0) { + if (tmp.size() < user_start.size() && user_comparator_->Compare(user_start, tmp) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); + PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); assert(this->Compare(*start, tmp) < 0); assert(this->Compare(tmp, limit) < 0); start->swap(tmp); @@ -92,22 +85,18 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const { Slice user_key = ExtractUserKey(*key); std::string tmp(user_key.data(), user_key.size()); user_comparator_->FindShortSuccessor(&tmp); - if (tmp.size() < user_key.size() && - user_comparator_->Compare(user_key, tmp) < 0) { + if (tmp.size() < user_key.size() && user_comparator_->Compare(user_key, tmp) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); + PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); assert(this->Compare(*key, tmp) < 0); key->swap(tmp); } } -const char* InternalFilterPolicy::Name() const { - return user_policy_->Name(); -} +const char* InternalFilterPolicy::Name() const { return user_policy_->Name(); } -void InternalFilterPolicy::CreateFilter(const Slice* keys, int n, - std::string* dst) const { +void InternalFilterPolicy::CreateFilter(const Slice* keys, int n, std::string* dst) const { // We rely on the fact that the code in table.cc does not mind us // adjusting keys[]. Slice* mkey = const_cast(keys); diff --git a/src/leveldb/db/dbformat.h b/src/leveldb/db/dbformat.h index 8975f059d..ae84a3a56 100644 --- a/src/leveldb/db/dbformat.h +++ b/src/leveldb/db/dbformat.h @@ -25,10 +25,7 @@ class InternalKey; // Value types encoded as the last component of internal keys. // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk // data structures. -enum ValueType { - kTypeDeletion = 0x0, - kTypeValue = 0x1 -}; +enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 }; // kValueTypeForSeek defines the ValueType that should be passed when // constructing a ParsedInternalKey object for seeking to a particular // sequence number (since we sort sequence numbers in decreasing order @@ -44,10 +41,9 @@ struct ParsedInternalKey { SequenceNumber sequence; ValueType type; - ParsedInternalKey() - : sequence(kMaxSequenceNumber), type(kValueTypeForSeek) { } + ParsedInternalKey() : sequence(kMaxSequenceNumber), type(kValueTypeForSeek) {} ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) - : user_key(u), sequence(seq), type(t) { } + : user_key(u), sequence(seq), type(t) {} std::string DebugString() const; }; @@ -57,15 +53,13 @@ inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { } // Append the serialization of "key" to *result. -extern void AppendInternalKey(std::string* result, - const ParsedInternalKey& key); +extern void AppendInternalKey(std::string* result, const ParsedInternalKey& key); // Attempt to parse an internal key from "internal_key". On success, // stores the parsed data in "*result", and returns true. // // On error, returns false, leaves "*result" in an undefined state. -extern bool ParseInternalKey(const Slice& internal_key, - ParsedInternalKey* result); +extern bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result); extern bool RollbackDrop(uint64_t seq, const std::map& rollbacks); @@ -88,13 +82,12 @@ inline ValueType ExtractValueType(const Slice& internal_key) { class InternalKeyComparator : public Comparator { private: const Comparator* user_comparator_; + public: - explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { } + explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {} virtual const char* Name() const; virtual int Compare(const Slice& a, const Slice& b) const; - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const; + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const; virtual void FindShortSuccessor(std::string* key) const; const Comparator* user_comparator() const { return user_comparator_; } @@ -106,8 +99,9 @@ class InternalKeyComparator : public Comparator { class InternalFilterPolicy : public FilterPolicy { private: const FilterPolicy* const user_policy_; + public: - explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { } + explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) {} virtual const char* Name() const; virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const; virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const; @@ -119,8 +113,9 @@ class InternalFilterPolicy : public FilterPolicy { class InternalKey { private: std::string rep_; + public: - InternalKey() { } // Leave rep_ as empty to indicate it is invalid + InternalKey() {} // Leave rep_ as empty to indicate it is invalid InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) { AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t)); } @@ -143,13 +138,11 @@ class InternalKey { std::string DebugString() const; }; -inline int InternalKeyComparator::Compare( - const InternalKey& a, const InternalKey& b) const { +inline int InternalKeyComparator::Compare(const InternalKey& a, const InternalKey& b) const { return Compare(a.Encode(), b.Encode()); } -inline bool ParseInternalKey(const Slice& internal_key, - ParsedInternalKey* result) { +inline bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result) { const size_t n = internal_key.size(); if (n < 8) return false; uint64_t num = DecodeFixed64(internal_key.data() + n - 8); @@ -179,7 +172,7 @@ class LookupKey { ~LookupKey(); - // Return a key suitable for lookup in a MemTable. + // Return a key suitable for lookup in a BaseMemTable. Slice memtable_key() const { return Slice(start_, end_ - start_); } // Return an internal key (suitable for passing to an internal iterator) @@ -194,12 +187,12 @@ class LookupKey { // userkey char[klength] <-- kstart_ // tag uint64 // <-- end_ - // The array is a suitable MemTable key. + // The array is a suitable BaseMemTable key. // The suffix starting with "userkey" can be used as an InternalKey. const char* start_; const char* kstart_; const char* end_; - char space_[200]; // Avoid allocation for short keys + char space_[200]; // Avoid allocation for short keys // No copying allowed LookupKey(const LookupKey&); diff --git a/src/leveldb/db/dbformat_test.cc b/src/leveldb/db/dbformat_test.cc index 66888ad3a..40d9c0457 100644 --- a/src/leveldb/db/dbformat_test.cc +++ b/src/leveldb/db/dbformat_test.cc @@ -12,9 +12,7 @@ namespace leveldb { -static std::string IKey(const std::string& user_key, - uint64_t seq, - ValueType vt) { +static std::string IKey(const std::string& user_key, uint64_t seq, ValueType vt) { std::string encoded; AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); return encoded; @@ -32,9 +30,7 @@ static std::string ShortSuccessor(const std::string& s) { return result; } -static void TestKey(const std::string& key, - uint64_t seq, - ValueType vt) { +static void TestKey(const std::string& key, uint64_t seq, ValueType vt) { std::string encoded = IKey(key, seq, vt); Slice in(encoded); @@ -48,16 +44,13 @@ static void TestKey(const std::string& key, ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded)); } -class FormatTest { }; +class FormatTest {}; TEST(FormatTest, InternalKey_EncodeDecode) { - const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" }; - const uint64_t seq[] = { - 1, 2, 3, - (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, - (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1, - (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1 - }; + const char* keys[] = {"", "k", "hello", "longggggggggggggggggggggg"}; + const uint64_t seq[] = {1, 2, 3, (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, (1ull << 16) - 1, + 1ull << 16, (1ull << 16) + 1, (1ull << 32) - 1, 1ull << 32, + (1ull << 32) + 1}; for (size_t k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { for (size_t s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { TestKey(keys[k], seq[s], kTypeValue); @@ -69,48 +62,37 @@ TEST(FormatTest, InternalKey_EncodeDecode) { TEST(FormatTest, InternalKeyShortSeparator) { // When user keys are same ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 99, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 99, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 101, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 101, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 100, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeValue))); ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 100, kTypeDeletion))); + Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion))); // When user keys are misordered ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("bar", 99, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("bar", 99, kTypeValue))); // When user keys are different, but correctly ordered ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("foo", 100, kTypeValue), - IKey("hello", 200, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("hello", 200, kTypeValue))); // When start user key is prefix of limit user key ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foobar", 200, kTypeValue))); + Shorten(IKey("foo", 100, kTypeValue), IKey("foobar", 200, kTypeValue))); // When limit user key is prefix of start user key ASSERT_EQ(IKey("foobar", 100, kTypeValue), - Shorten(IKey("foobar", 100, kTypeValue), - IKey("foo", 200, kTypeValue))); + Shorten(IKey("foobar", 100, kTypeValue), IKey("foo", 200, kTypeValue))); } TEST(FormatTest, InternalKeyShortestSuccessor) { ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), ShortSuccessor(IKey("foo", 100, kTypeValue))); - ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), - ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); + ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/filename.cc b/src/leveldb/db/filename.cc index 4ac4a3864..32b80cbe2 100644 --- a/src/leveldb/db/filename.cc +++ b/src/leveldb/db/filename.cc @@ -18,20 +18,15 @@ namespace leveldb { // A utility routine: write "data" to the named file and Sync() it. -extern Status WriteStringToFileSync(Env* env, const Slice& data, - const std::string& fname); +extern Status WriteStringToFileSync(Env* env, const Slice& data, const std::string& fname); -static std::string MakeFileName(const std::string& name, uint64_t number, - const char* suffix) { +static std::string MakeFileName(const std::string& name, uint64_t number, const char* suffix) { char buf[100]; - snprintf(buf, sizeof(buf), "/%08llu.%s", - static_cast(number), - suffix); + snprintf(buf, sizeof(buf), "/%08llu.%s", static_cast(number), suffix); return name + buf; } -bool ParseDbName(const std::string& dbname, std::string* prefix, - uint64_t* tablet, uint64_t* lg) { +bool ParseDbName(const std::string& dbname, std::string* prefix, uint64_t* tablet, uint64_t* lg) { std::string::size_type pos1, pos2; assert(dbname[dbname.size() - 1] != '/'); @@ -87,33 +82,23 @@ std::string TableFileName(const std::string& name, uint64_t number) { std::string DescriptorFileName(const std::string& dbname, uint64_t number) { assert(number > 0); char buf[100]; - snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", - static_cast(number)); + snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", static_cast(number)); return dbname + buf; } -std::string CurrentFileName(const std::string& dbname) { - return dbname + "/CURRENT"; -} +std::string CurrentFileName(const std::string& dbname) { return dbname + "/CURRENT"; } -std::string LockFileName(const std::string& dbname) { - return dbname + "/LOCK"; -} +std::string LockFileName(const std::string& dbname) { return dbname + "/LOCK"; } std::string TempFileName(const std::string& dbname, uint64_t number) { assert(number > 0); return MakeFileName(dbname, number, "dbtmp"); } -std::string InfoLogFileName(const std::string& dbname) { - return dbname + "/LOG"; -} +std::string InfoLogFileName(const std::string& dbname) { return dbname + "/LOG"; } // Return the name of the old info log file for "dbname". -std::string OldInfoLogFileName(const std::string& dbname) { - return dbname + "/LOG.old"; -} - +std::string OldInfoLogFileName(const std::string& dbname) { return dbname + "/LOG.old"; } // Owned filenames have the form: // dbname/CURRENT @@ -122,9 +107,7 @@ std::string OldInfoLogFileName(const std::string& dbname) { // dbname/LOG.old // dbname/MANIFEST-[0-9]+ // dbname/[0-9]+.(log|sst) -bool ParseFileName(const std::string& fname, - uint64_t* number, - FileType* type) { +bool ParseFileName(const std::string& fname, uint64_t* number, FileType* type) { Slice rest(fname); if (rest == "CURRENT") { *number = 0; @@ -168,8 +151,7 @@ bool ParseFileName(const std::string& fname, return true; } -Status SetCurrentFile(Env* env, const std::string& dbname, - uint64_t descriptor_number) { +Status SetCurrentFile(Env* env, const std::string& dbname, uint64_t descriptor_number) { // Remove leading "dbname/" and add newline to manifest file name std::string manifest = DescriptorFileName(dbname, descriptor_number); Slice contents = manifest; @@ -180,12 +162,12 @@ Status SetCurrentFile(Env* env, const std::string& dbname, if (s.ok()) { s = env->RenameFile(tmp, CurrentFileName(dbname)); } else { - Log("[%s][dfs error] open dbtmp[%s] error, status[%s].\n", - dbname.c_str(), tmp.c_str(), s.ToString().c_str()); + LEVELDB_LOG("[%s][dfs error] open dbtmp[%s] error, status[%s].\n", dbname.c_str(), tmp.c_str(), + s.ToString().c_str()); } if (!s.ok()) { - Log("[%s][dfs error] rename CURRENT[%s] error, status[%s].\n", - dbname.c_str(), tmp.c_str(), s.ToString().c_str()); + LEVELDB_LOG("[%s][dfs error] rename CURRENT[%s] error, status[%s].\n", dbname.c_str(), + tmp.c_str(), s.ToString().c_str()); env->DeleteFile(tmp); } return s; @@ -193,21 +175,22 @@ Status SetCurrentFile(Env* env, const std::string& dbname, const char* FileTypeToString(FileType type) { switch (type) { - case kLogFile: - return "kLogFile"; - case kDBLockFile: - return "kDBLockFile"; - case kTableFile: - return "kTableFile"; - case kDescriptorFile: - return "kDescriptorFile"; - case kCurrentFile: - return "kCurrentFile"; - case kTempFile: - return "kTempFile"; - case kInfoLogFile: - return "kInfoLogFile"; - default:; + case kLogFile: + return "kLogFile"; + case kDBLockFile: + return "kDBLockFile"; + case kTableFile: + return "kTableFile"; + case kDescriptorFile: + return "kDescriptorFile"; + case kCurrentFile: + return "kCurrentFile"; + case kTempFile: + return "kTempFile"; + case kInfoLogFile: + return "kInfoLogFile"; + default: + ; } return "kUnknown"; } @@ -244,50 +227,45 @@ std::string BuildTabletPath(const std::string& prefix, uint64_t tablet) { std::string BuildTabletLgPath(const std::string& prefix, uint64_t tablet, uint64_t lg) { char buf[100]; - snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", - static_cast(tablet), + snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", static_cast(tablet), static_cast(lg)); std::string lg_path = prefix + buf; return lg_path; } -std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, - uint64_t lg, uint64_t number) { +std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, uint64_t lg, + uint64_t number) { char buf[100]; - snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", - static_cast(tablet), + snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", static_cast(tablet), static_cast(lg)); std::string dbname = prefix + buf; return MakeFileName(dbname, number & 0xffffffff, "sst"); } -std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, - uint32_t lg_id, uint64_t number, - const std::string& time) { - char buf[100]; - snprintf(buf, sizeof(buf), "/tablet%08llu/%lu/%08llu.sst.%s", - static_cast(tablet), - static_cast(lg_id), - static_cast(number), - time.c_str()); - - return prefix + buf; +std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, uint32_t lg_id, + uint64_t number, const std::string& time) { + char buf[100]; + snprintf(buf, sizeof(buf), "/tablet%08llu/%lu/%08llu.sst.%s", + static_cast(tablet), static_cast(lg_id), + static_cast(number), time.c_str()); + + return prefix + buf; } std::string GetTimeStrFromTrashFile(const std::string& path) { - size_t dir_pos = path.rfind("/"); - if (dir_pos == std::string::npos || dir_pos == path.length() - 1) { - return ""; - } - std::string file = path.substr(dir_pos + 1, path.length() - dir_pos - 1); + size_t dir_pos = path.rfind("/"); + if (dir_pos == std::string::npos || dir_pos == path.length() - 1) { + return ""; + } + std::string file = path.substr(dir_pos + 1, path.length() - dir_pos - 1); - size_t time_pos = file.rfind("."); - if (time_pos == std::string::npos) { - return ""; - } - std::string time_str = file.substr(time_pos + 1, file.length() - time_pos - 1); + size_t time_pos = file.rfind("."); + if (time_pos == std::string::npos) { + return ""; + } + std::string time_str = file.substr(time_pos + 1, file.length() - time_pos - 1); - return time_str; + return time_str; } std::string BuildTableFilePath(const std::string& prefix, uint64_t lg, uint64_t full_number) { @@ -304,8 +282,7 @@ std::string RealDbName(const std::string& dbname, uint64_t tablet) { return dbname; } char buf[100]; - snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", - static_cast(tablet), + snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", static_cast(tablet), static_cast(lg)); return prefix + buf; } @@ -313,16 +290,14 @@ std::string RealDbName(const std::string& dbname, uint64_t tablet) { std::string GetTabletPathFromNum(const std::string& tablename, uint64_t tablet) { assert(tablet > 0); char buf[32]; - snprintf(buf, sizeof(buf), "/tablet%08llu", - static_cast(tablet)); + snprintf(buf, sizeof(buf), "/tablet%08llu", static_cast(tablet)); return tablename + buf; } std::string GetChildTabletPath(const std::string& parent_path, uint64_t tablet) { assert(tablet > 0); char buf[32]; - snprintf(buf, sizeof(buf), "%08llu", - static_cast(tablet)); + snprintf(buf, sizeof(buf), "%08llu", static_cast(tablet)); return parent_path.substr(0, parent_path.size() - 8) + buf; } @@ -351,8 +326,7 @@ std::string FileNumberDebugString(uint64_t full_number) { uint64_t tablet = (full_number >> 32 & 0x7FFFFFFF); uint64_t file = full_number & 0xffffffff; char buf[32]; - snprintf(buf, sizeof(buf), "[%08llu %08llu.sst]", - static_cast(tablet), + snprintf(buf, sizeof(buf), "[%08llu %08llu.sst]", static_cast(tablet), static_cast(file)); return std::string(buf, 23); } diff --git a/src/leveldb/db/filename.h b/src/leveldb/db/filename.h index b151c165b..01570cd55 100644 --- a/src/leveldb/db/filename.h +++ b/src/leveldb/db/filename.h @@ -49,8 +49,7 @@ extern std::string TableFileName(const std::string& dbname, uint64_t number); // Return the name of the descriptor file for the db named by // "dbname" and the specified incarnation number. The result will be // prefixed with "dbname". -extern std::string DescriptorFileName(const std::string& dbname, - uint64_t number); +extern std::string DescriptorFileName(const std::string& dbname, uint64_t number); // Return the name of the current file. This file contains the name // of the current manifest file. The result will be prefixed with @@ -74,23 +73,18 @@ extern std::string OldInfoLogFileName(const std::string& dbname); // If filename is a leveldb file, store the type of the file in *type. // The number encoded in the filename is stored in *number. If the // filename was successfully parsed, returns true. Else return false. -extern bool ParseFileName(const std::string& filename, - uint64_t* number, - FileType* type); +extern bool ParseFileName(const std::string& filename, uint64_t* number, FileType* type); // Make the CURRENT file point to the descriptor file with the // specified number. -extern Status SetCurrentFile(Env* env, const std::string& dbname, - uint64_t descriptor_number); - +extern Status SetCurrentFile(Env* env, const std::string& dbname, uint64_t descriptor_number); const char* FileTypeToString(FileType type); // build a full path file number from dbname&filenumber, format: // |--tabletnum(4B)--|--filenum(4B)--| // tabletnum = 0x80000000|real_tablet_num -extern uint64_t BuildFullFileNumber(const std::string& dbname, - uint64_t number); +extern uint64_t BuildFullFileNumber(const std::string& dbname, uint64_t number); // Build tablet path from tablet_num // E.g. construct "/table1/tablet000003" @@ -102,15 +96,14 @@ std::string BuildTabletLgPath(const std::string& prfix, uint64_t tablet, uint64_ // Build file path from tablet_num & lg_num & file number // E.g. construct "/table1/tablet000003/0/00000001.sst" // from (/table1, 3, 0, 1) -std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, - uint64_t lg, uint64_t number); +std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, uint64_t lg, + uint64_t number); // Build trash file path from tablet_num & lg & file number & time // E.g. construct "/table1/tablet000003/0/00000001.sst.20170718-17-08-30" // from (/table1, 3, 0, 1, 20170718-17-08-30) -std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, - uint32_t lg_id, uint64_t number, - const std::string& time); +std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, uint32_t lg_id, + uint64_t number, const std::string& time); // get time string from trash file path // E.g. get "20170718-17-08-30" @@ -121,15 +114,13 @@ std::string GetTimeStrFromTrashFile(const std::string& path); // Build file path from lg_num & full file number // E.g. construct "/table1/tablet000003/0/00000001.sst" // from (/table1, 0, 0x8000000300000001) -std::string BuildTableFilePath(const std::string& prefix, - uint64_t lg, uint64_t full_number); +std::string BuildTableFilePath(const std::string& prefix, uint64_t lg, uint64_t full_number); // Parse a db_impl name to prefix, tablet number, lg number... // db_impl name format maybe: // /.../tablename/tablet000012/2 (have tablet name, allow split) // or /.../tablename/2 (have none tablet name, donot allow split) -bool ParseDbName(const std::string& dbname, std::string* prefix, - uint64_t* tablet, uint64_t* lg); +bool ParseDbName(const std::string& dbname, std::string* prefix, uint64_t* tablet, uint64_t* lg); // Parse a full file number to tablet number & file number bool ParseFullFileNumber(uint64_t full_number, uint64_t* tablet, uint64_t* file); diff --git a/src/leveldb/db/filename_test.cc b/src/leveldb/db/filename_test.cc index e00baefd3..59ef8f324 100644 --- a/src/leveldb/db/filename_test.cc +++ b/src/leveldb/db/filename_test.cc @@ -15,7 +15,7 @@ namespace leveldb { -class FileNameTest { }; +class FileNameTest {}; TEST(FileNameTest, Parse) { Slice db; @@ -28,16 +28,16 @@ TEST(FileNameTest, Parse) { uint64_t number; FileType type; } cases[] = { - { "100.log", 100, kLogFile }, - { "0.log", 0, kLogFile }, - { "0.sst", 0, kTableFile }, - { "CURRENT", 0, kCurrentFile }, - { "LOCK", 0, kDBLockFile }, - { "MANIFEST-2", 2, kDescriptorFile }, - { "MANIFEST-7", 7, kDescriptorFile }, - { "LOG", 0, kInfoLogFile }, - { "LOG.old", 0, kInfoLogFile }, - { "18446744073709551615.log", 18446744073709551615ull, kLogFile }, + {"100.log", 100, kLogFile}, + {"0.log", 0, kLogFile}, + {"0.sst", 0, kTableFile}, + {"CURRENT", 0, kCurrentFile}, + {"LOCK", 0, kDBLockFile}, + {"MANIFEST-2", 2, kDescriptorFile}, + {"MANIFEST-7", 7, kDescriptorFile}, + {"LOG", 0, kInfoLogFile}, + {"LOG.old", 0, kInfoLogFile}, + {"18446744073709551615.log", 18446744073709551615ull, kLogFile}, }; for (uint32_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { std::string f = cases[i].fname; @@ -48,29 +48,9 @@ TEST(FileNameTest, Parse) { // Errors static const char* errors[] = { - "", - "foo", - "foo-dx-100.log", - ".log", - "", - "manifest", - "CURREN", - "CURRENTX", - "MANIFES", - "MANIFEST", - "MANIFEST-", - "XMANIFEST-3", - "MANIFEST-3x", - "LOC", - "LOCKx", - "LO", - "LOGx", - "18446744073709551616.log", - "184467440737095516150.log", - "100", - "100.", - "100.lop" - }; + "", "foo", "foo-dx-100.log", ".log", "", "manifest", "CURREN", "CURRENTX", "MANIFES", + "MANIFEST", "MANIFEST-", "XMANIFEST-3", "MANIFEST-3x", "LOC", "LOCKx", "LO", "LOGx", + "18446744073709551616.log", "184467440737095516150.log", "100", "100.", "100.lop"}; for (uint32_t i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { std::string f = errors[i]; ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; @@ -284,6 +264,4 @@ TEST(FileNameTest, FileNumberDebugString) { } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/import_main.cc b/src/leveldb/db/import_main.cc index 358ec2f3c..ebf88a489 100644 --- a/src/leveldb/db/import_main.cc +++ b/src/leveldb/db/import_main.cc @@ -17,112 +17,111 @@ namespace leveldb { bool SaveKVToTable(void* arg, const char* buffer, ssize_t size) { - TableBuilder* builder = reinterpret_cast(arg); - const char* delim = strchr(buffer, '\t'); - if (NULL == delim || buffer == delim || buffer + size - 1 == delim) { - return false; - } - Slice key(buffer, delim - buffer); - Slice value(delim + 1, buffer + size - delim - 1); - builder->Add(key, value); - return true; + TableBuilder* builder = reinterpret_cast(arg); + const char* delim = strchr(buffer, '\t'); + if (NULL == delim || buffer == delim || buffer + size - 1 == delim) { + return false; + } + Slice key(buffer, delim - buffer); + Slice value(delim + 1, buffer + size - delim - 1); + builder->Add(key, value); + return true; } bool SaveKVToDB(void* arg, const char* buffer, ssize_t size) { - DB* db = reinterpret_cast(arg); - const char* delim = strchr(buffer, '\t'); - if (NULL == delim || buffer == delim || buffer + size - 1 == delim) { - return false; - } - Slice key(buffer, delim - buffer); - Slice value(delim + 1, buffer + size - delim - 1); - db->Put(WriteOptions(), key, value); - return true; + DB* db = reinterpret_cast(arg); + const char* delim = strchr(buffer, '\t'); + if (NULL == delim || buffer == delim || buffer + size - 1 == delim) { + return false; + } + Slice key(buffer, delim - buffer); + Slice value(delim + 1, buffer + size - delim - 1); + db->Put(WriteOptions(), key, value); + return true; } // buffer should be formatted: // [key_size:uint32][key][value_size:uint32][value] // bool SaveSeqKVToDB(void* arg, const char* buffer, ssize_t size) { - DB* db = reinterpret_cast(arg); - if (size <= static_cast(sizeof(int64_t)) * 2) { - return false; - } - Slice input(buffer, size); - Slice key; - Slice value; - if (!GetLengthPrefixedSlice(&input, &key)) { - std::cerr << "fail to parse key" << std::endl; - return false; - } - if (!GetLengthPrefixedSlice(&input, &value)) { - std::cerr << "fail to parse value" << std::endl; - return false; - } - db->Put(WriteOptions(), key, value); - return true; + DB* db = reinterpret_cast(arg); + if (size <= static_cast(sizeof(int64_t)) * 2) { + return false; + } + Slice input(buffer, size); + Slice key; + Slice value; + if (!GetLengthPrefixedSlice(&input, &key)) { + std::cerr << "fail to parse key" << std::endl; + return false; + } + if (!GetLengthPrefixedSlice(&input, &value)) { + std::cerr << "fail to parse value" << std::endl; + return false; + } + db->Put(WriteOptions(), key, value); + return true; } bool GetKeyValueFromStdin(void* arg, bool (*saver)(void*, const char*, ssize_t)) { - static size_t n = 10240; - static char* buffer = new char[n]; - - ssize_t line_size = 0; - while ((line_size = getline(&buffer, &n, stdin)) != -1) { - if (line_size > 0 && buffer[line_size - 1] == '\n') { - line_size--; - } - if (line_size < 3) { - std::cerr << "ignore empty line" << std::endl; - continue; - } - if (!(*saver)(arg, buffer, line_size)) { - std::cerr << "ignore invalid line: " << buffer << std::endl; - continue; - } - return true; + static size_t n = 10240; + static char* buffer = new char[n]; + + ssize_t line_size = 0; + while ((line_size = getline(&buffer, &n, stdin)) != -1) { + if (line_size > 0 && buffer[line_size - 1] == '\n') { + line_size--; } - return false; + if (line_size < 3) { + std::cerr << "ignore empty line" << std::endl; + continue; + } + if (!(*saver)(arg, buffer, line_size)) { + std::cerr << "ignore invalid line: " << buffer << std::endl; + continue; + } + return true; + } + return false; } // directly build sst table file -Status DirectBuildTable(const std::string& dbname, - int32_t file_no, - Env* env, +Status DirectBuildTable(const std::string& dbname, int32_t file_no, Env* env, const Options& options) { Status s; std::string fname = TableFileName(dbname, file_no); uint64_t file_size = 0; - WritableFile* file; - s = env->NewWritableFile(fname, &file, EnvOptions(options)); - if (!s.ok()) { - return s; - } + WritableFile* file; + s = env->NewWritableFile(fname, &file, EnvOptions(options)); + if (!s.ok()) { + return s; + } - TableBuilder* builder = new TableBuilder(options, file); - while (GetKeyValueFromStdin(builder, SaveKVToTable)); + TableBuilder* builder = new TableBuilder(options, file); + while (GetKeyValueFromStdin(builder, SaveKVToTable)) + ; - // Finish and check for builder errors + // Finish and check for builder errors + if (s.ok()) { + s = builder->Finish(); if (s.ok()) { - s = builder->Finish(); - if (s.ok()) { - file_size = builder->FileSize(); - } - } else { - builder->Abandon(); + file_size = builder->FileSize(); } - delete builder; + } else { + builder->Abandon(); + } + delete builder; - // Finish and check for file errors - if (s.ok()) { - s = file->Sync(); - } - if (s.ok()) { - s = file->Close(); - } - delete file; - file = NULL; + // Finish and check for file errors + if (s.ok()) { + s = file->Sync(); + } + if (s.ok()) { + s = file->Close(); + } + delete file; + file = NULL; if (s.ok() && file_size > 0) { // Keep it @@ -133,40 +132,39 @@ Status DirectBuildTable(const std::string& dbname, } // directly build db -Status DirectBuildDB(const std::string& dbname, - const Options& options) { - DB* db = NULL; - Status s; - - s = DB::Open(options, dbname, &db); - if (!s.ok()) { - std::cerr << "fail to open db: " << dbname - << ", error: " << s.ToString() << std::endl; - return s; - } - while (GetKeyValueFromStdin(db, SaveSeqKVToDB)); - delete db; +Status DirectBuildDB(const std::string& dbname, const Options& options) { + DB* db = NULL; + Status s; + s = DB::Open(options, dbname, &db); + if (!s.ok()) { + std::cerr << "fail to open db: " << dbname << ", error: " << s.ToString() << std::endl; return s; + } + while (GetKeyValueFromStdin(db, SaveSeqKVToDB)) + ; + delete db; + + return s; } } // namespace leveldb void Usage() { - std::cout << "Usage:" << std::endl; - std::cout << " ./db_import < kv_file" << std::endl; - std::cout << std::endl << " format: " - << "[key_size:uint32][key][value_size:uint32][value]" << std::endl; + std::cout << "Usage:" << std::endl; + std::cout << " ./db_import < kv_file" << std::endl; + std::cout << std::endl + << " format: " + << "[key_size:uint32][key][value_size:uint32][value]" << std::endl; } int main(int argc, char* argv[]) { - if (argc < 2) { - Usage(); - return -1; - } - leveldb::Options opts; - std::string dbname = argv[1]; - leveldb::DirectBuildDB(dbname, opts); - return 0; + if (argc < 2) { + Usage(); + return -1; + } + leveldb::Options opts; + std::string dbname = argv[1]; + leveldb::DirectBuildDB(dbname, opts); + return 0; } - diff --git a/src/leveldb/db/leveldb_main.cc b/src/leveldb/db/leveldb_main.cc index 5921b6a34..5b53b5af3 100644 --- a/src/leveldb/db/leveldb_main.cc +++ b/src/leveldb/db/leveldb_main.cc @@ -11,11 +11,9 @@ #include "leveldb/table_utils.h" static void Usage() { - fprintf( - stderr, - "Usage: leveldbutil command...\n" - " dump files... -- dump contents of specified files\n" - ); + fprintf(stderr, + "Usage: leveldbutil command...\n" + " dump files... -- dump contents of specified files\n"); } int main(int argc, char** argv) { @@ -27,7 +25,7 @@ int main(int argc, char** argv) { } else { std::string command = argv[1]; if (command == "dump") { - ok = leveldb::HandleDumpCommand(env, argv+2, argc-2); + ok = leveldb::HandleDumpCommand(env, argv + 2, argc - 2); } else { Usage(); ok = false; diff --git a/src/leveldb/db/lg_compact_thread.h b/src/leveldb/db/lg_compact_thread.h index dc13bd184..052c5df0c 100644 --- a/src/leveldb/db/lg_compact_thread.h +++ b/src/leveldb/db/lg_compact_thread.h @@ -12,24 +12,21 @@ namespace leveldb { class LGCompactThread : public Thread { -public: - LGCompactThread(uint32_t lg_id, DBImpl* lg_impl, - const Slice* begin = NULL, const Slice* end = NULL) - : lg_id_(lg_id), lg_impl_(lg_impl), - begin_(begin), end_(end) {} - virtual ~LGCompactThread() {} - - virtual void Run(void* params) { - lg_impl_->CompactRange(begin_, end_); - } - -private: - uint32_t lg_id_; - DBImpl* lg_impl_; - const Slice* begin_; - const Slice* end_; + public: + LGCompactThread(uint32_t lg_id, DBImpl* lg_impl, const Slice* begin = NULL, + const Slice* end = NULL) + : lg_id_(lg_id), lg_impl_(lg_impl), begin_(begin), end_(end) {} + virtual ~LGCompactThread() {} + + virtual void Run(void* params) { lg_impl_->CompactRange(begin_, end_); } + + private: + uint32_t lg_id_; + DBImpl* lg_impl_; + const Slice* begin_; + const Slice* end_; }; -} // namespace leveldb +} // namespace leveldb -#endif // LEVELDB_DB_LG_COMPACT_THREAD_H_ +#endif // LEVELDB_DB_LG_COMPACT_THREAD_H_ diff --git a/src/leveldb/db/lg_write_thread.h b/src/leveldb/db/lg_write_thread.h index c384fe756..acda5082a 100644 --- a/src/leveldb/db/lg_write_thread.h +++ b/src/leveldb/db/lg_write_thread.h @@ -12,29 +12,26 @@ namespace leveldb { class LGWriteThread : public Thread { -public: - LGWriteThread(uint32_t lg_id, DBImpl* impl, - const WriteOptions& opts, WriteBatch* bench) - : wopts_(opts), wbench_(bench); - virtual ~LGWriteThread() {} - - virtual void Run(void* params) { - std::cout << "LG Thread #" << lg_id_ << ": Write()" << std::endl; - lg_impl_->Write(wopts_, wbatch_); - } - - Status GetResult() { - return ret_; - } - -private: - uint32_t lg_id_; - DBImpl* lg_impl_; - const WriteOptions& wopts_; - WriteBatch* wbatch_; - Status ret_; + public: + LGWriteThread(uint32_t lg_id, DBImpl* impl, const WriteOptions& opts, WriteBatch* bench) + : wopts_(opts), wbench_(bench); + virtual ~LGWriteThread() {} + + virtual void Run(void* params) { + std::cout << "LG Thread #" << lg_id_ << ": Write()" << std::endl; + lg_impl_->Write(wopts_, wbatch_); + } + + Status GetResult() { return ret_; } + + private: + uint32_t lg_id_; + DBImpl* lg_impl_; + const WriteOptions& wopts_; + WriteBatch* wbatch_; + Status ret_; }; -} // namespace leveldb +} // namespace leveldb -#endif // LEVELDB_DB_LG_WRITE_THREAD_H_ +#endif // LEVELDB_DB_LG_WRITE_THREAD_H_ diff --git a/src/leveldb/db/log_async_writer.cc b/src/leveldb/db/log_async_writer.cc index 459fff411..41dd32553 100644 --- a/src/leveldb/db/log_async_writer.cc +++ b/src/leveldb/db/log_async_writer.cc @@ -113,7 +113,6 @@ void AsyncWriter::BlockLogNumInc() { ++block_log_number; } - void* AsyncWriter::ThreadFunc(void* arg) { static_cast(arg)->ThreadFuncCall(); return NULL; diff --git a/src/leveldb/db/log_async_writer.h b/src/leveldb/db/log_async_writer.h index f7496755d..8af286cd7 100644 --- a/src/leveldb/db/log_async_writer.h +++ b/src/leveldb/db/log_async_writer.h @@ -26,12 +26,7 @@ namespace log { class AsyncWriter { public: - enum Mode { - kNoAction, - kAddRecord, - kSync, - kFlush - }; + enum Mode { kNoAction, kAddRecord, kSync, kFlush }; // Create a writer that will append data to "*dest". // "*dest" must be initially empty. // "*dest" must remain live while this Writer is in use. @@ -74,7 +69,7 @@ class AsyncWriter { bool finished_; std::string* slice_data_; bool stop_; - bool blocked_; // whether the current writter blocked + bool blocked_; // whether the current writter blocked Status s_; static int block_log_number; diff --git a/src/leveldb/db/log_reader.cc b/src/leveldb/db/log_reader.cc index c15eecd93..4d371dc55 100644 --- a/src/leveldb/db/log_reader.cc +++ b/src/leveldb/db/log_reader.cc @@ -16,11 +16,9 @@ namespace leveldb { namespace log { -Reader::Reporter::~Reporter() { -} +Reader::Reporter::~Reporter() {} -Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset) +Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, uint64_t initial_offset) : file_(file), reporter_(reporter), checksum_(checksum), @@ -29,12 +27,9 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, eof_(false), last_record_offset_(0), end_of_buffer_offset_(0), - initial_offset_(initial_offset) { -} + initial_offset_(initial_offset) {} -Reader::~Reader() { - delete[] backing_store_; -} +Reader::~Reader() { delete[] backing_store_; } bool Reader::SkipToInitialBlock() { size_t offset_in_block = initial_offset_ % kBlockSize; @@ -116,8 +111,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) { case kMiddleType: if (!in_fragmented_record) { - ReportCorruption(fragment.size(), - "missing start of fragmented record(1)"); + ReportCorruption(fragment.size(), "missing start of fragmented record(1)"); } else { scratch->append(fragment.data(), fragment.size()); } @@ -125,8 +119,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) { case kLastType: if (!in_fragmented_record) { - ReportCorruption(fragment.size(), - "missing start of fragmented record(2)"); + ReportCorruption(fragment.size(), "missing start of fragmented record(2)"); } else { scratch->append(fragment.data(), fragment.size()); *record = Slice(*scratch); @@ -153,9 +146,7 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) { default: { char buf[40]; snprintf(buf, sizeof(buf), "unknown record type %u", record_type); - ReportCorruption( - (fragment.size() + (in_fragmented_record ? scratch->size() : 0)), - buf); + ReportCorruption((fragment.size() + (in_fragmented_record ? scratch->size() : 0)), buf); in_fragmented_record = false; scratch->clear(); break; @@ -165,17 +156,14 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) { return false; } -uint64_t Reader::LastRecordOffset() { - return last_record_offset_; -} +uint64_t Reader::LastRecordOffset() { return last_record_offset_; } void Reader::ReportCorruption(size_t bytes, const char* reason) { ReportDrop(bytes, Status::Corruption(reason)); } void Reader::ReportDrop(size_t bytes, const Status& reason) { - if (reporter_ != NULL && - end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) { + if (reporter_ != NULL && end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) { reporter_->Corruption(bytes, reason); } } @@ -248,8 +236,7 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) { buffer_.remove_prefix(kHeaderSize + length); // Skip physical record that started before initial_offset_ - if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < - initial_offset_) { + if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < initial_offset_) { result->clear(); return kBadRecord; } diff --git a/src/leveldb/db/log_reader.h b/src/leveldb/db/log_reader.h index f4cea2805..264e45120 100644 --- a/src/leveldb/db/log_reader.h +++ b/src/leveldb/db/log_reader.h @@ -44,8 +44,7 @@ class Reader { // // The Reader will start reading at the first record located at physical // position >= initial_offset within the file. - Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset); + Reader(SequentialFile* file, Reporter* reporter, bool checksum, uint64_t initial_offset); ~Reader(); @@ -67,7 +66,7 @@ class Reader { bool const checksum_; char* const backing_store_; Slice buffer_; - bool eof_; // Last Read() indicated EOF by returning < kBlockSize + bool eof_; // Last Read() indicated EOF by returning < kBlockSize // Offset of the last record returned by ReadRecord. uint64_t last_record_offset_; diff --git a/src/leveldb/db/log_test.cc b/src/leveldb/db/log_test.cc index e81ea253e..0d7b5c37a 100644 --- a/src/leveldb/db/log_test.cc +++ b/src/leveldb/db/log_test.cc @@ -60,7 +60,7 @@ class LogTest { Slice contents_; bool force_error_; bool returned_partial_; - StringSource() : force_error_(false), returned_partial_(false) { } + StringSource() : force_error_(false), returned_partial_(false) {} virtual Status Read(size_t n, Slice* result, char* scratch) { ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error"; @@ -97,7 +97,7 @@ class LogTest { size_t dropped_bytes_; std::string message_; - ReportCollector() : dropped_bytes_(0) { } + ReportCollector() : dropped_bytes_(0) {} virtual void Corruption(size_t bytes, const Status& status) { dropped_bytes_ += bytes; message_.append(status.ToString()); @@ -116,20 +116,17 @@ class LogTest { static uint64_t initial_offset_last_record_offsets_[]; public: - LogTest() : reading_(false), - writer_(&dest_), - reader_(&source_, &report_, true/*checksum*/, - 0/*initial_offset*/) { - } + LogTest() + : reading_(false), + writer_(&dest_), + reader_(&source_, &report_, true /*checksum*/, 0 /*initial_offset*/) {} void Write(const std::string& msg) { ASSERT_TRUE(!reading_) << "Write() after starting to read"; writer_.AddRecord(Slice(msg)); } - size_t WrittenBytes() const { - return dest_.contents_.size(); - } + size_t WrittenBytes() const { return dest_.contents_.size(); } std::string Read() { if (!reading_) { @@ -145,36 +142,24 @@ class LogTest { } } - void IncrementByte(int offset, int delta) { - dest_.contents_[offset] += delta; - } + void IncrementByte(int offset, int delta) { dest_.contents_[offset] += delta; } - void SetByte(int offset, char new_byte) { - dest_.contents_[offset] = new_byte; - } + void SetByte(int offset, char new_byte) { dest_.contents_[offset] = new_byte; } - void ShrinkSize(int bytes) { - dest_.contents_.resize(dest_.contents_.size() - bytes); - } + void ShrinkSize(int bytes) { dest_.contents_.resize(dest_.contents_.size() - bytes); } void FixChecksum(int header_offset, int len) { // Compute crc of type/len/data - uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len); + uint32_t crc = crc32c::Value(&dest_.contents_[header_offset + 6], 1 + len); crc = crc32c::Mask(crc); EncodeFixed32(&dest_.contents_[header_offset], crc); } - void ForceError() { - source_.force_error_ = true; - } + void ForceError() { source_.force_error_ = true; } - size_t DroppedBytes() const { - return report_.dropped_bytes_; - } + size_t DroppedBytes() const { return report_.dropped_bytes_; } - std::string ReportMessage() const { - return report_.message_; - } + std::string ReportMessage() const { return report_.message_; } // Returns OK iff recorded error message contains "msg" std::string MatchError(const std::string& msg) const { @@ -187,8 +172,7 @@ class LogTest { void WriteInitialOffsetLog() { for (int i = 0; i < 4; i++) { - std::string record(initial_offset_record_sizes_[i], - static_cast('a' + i)); + std::string record(initial_offset_record_sizes_[i], static_cast('a' + i)); Write(record); } } @@ -197,51 +181,40 @@ class LogTest { WriteInitialOffsetLog(); reading_ = true; source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - WrittenBytes() + offset_past_end); + Reader* offset_reader = + new Reader(&source_, &report_, true /*checksum*/, WrittenBytes() + offset_past_end); Slice record; std::string scratch; ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch)); delete offset_reader; } - void CheckInitialOffsetRecord(uint64_t initial_offset, - int expected_record_offset) { + void CheckInitialOffsetRecord(uint64_t initial_offset, int expected_record_offset) { WriteInitialOffsetLog(); reading_ = true; source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - initial_offset); + Reader* offset_reader = new Reader(&source_, &report_, true /*checksum*/, initial_offset); Slice record; std::string scratch; ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch)); - ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset], - record.size()); + ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset], record.size()); ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset], offset_reader->LastRecordOffset()); ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]); delete offset_reader; } - }; -size_t LogTest::initial_offset_record_sizes_[] = - {10000, // Two sizable records in first block - 10000, - 2 * log::kBlockSize - 1000, // Span three blocks - 1}; - -uint64_t LogTest::initial_offset_last_record_offsets_[] = - {0, - kHeaderSize + 10000, - 2 * (kHeaderSize + 10000), - 2 * (kHeaderSize + 10000) + - (2 * log::kBlockSize - 1000) + 3 * kHeaderSize}; +size_t LogTest::initial_offset_record_sizes_[] = {10000, // Two sizable records in first block + 10000, + 2 * log::kBlockSize - 1000, // Span three blocks + 1}; +uint64_t LogTest::initial_offset_last_record_offsets_[] = { + 0, kHeaderSize + 10000, 2 * (kHeaderSize + 10000), + 2 * (kHeaderSize + 10000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize}; -TEST(LogTest, Empty) { - ASSERT_EQ("EOF", Read()); -} +TEST(LogTest, Empty) { ASSERT_EQ("EOF", Read()); } TEST(LogTest, ReadWrite) { Write("foo"); @@ -278,7 +251,7 @@ TEST(LogTest, Fragmentation) { TEST(LogTest, MarginalTrailer) { // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; + const int n = kBlockSize - 2 * kHeaderSize; Write(BigString("foo", n)); ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes()); Write(""); @@ -291,7 +264,7 @@ TEST(LogTest, MarginalTrailer) { TEST(LogTest, MarginalTrailer2) { // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; + const int n = kBlockSize - 2 * kHeaderSize; Write(BigString("foo", n)); ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes()); Write("bar"); @@ -303,7 +276,7 @@ TEST(LogTest, MarginalTrailer2) { } TEST(LogTest, ShortTrailer) { - const int n = kBlockSize - 2*kHeaderSize + 4; + const int n = kBlockSize - 2 * kHeaderSize + 4; Write(BigString("foo", n)); ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); Write(""); @@ -315,7 +288,7 @@ TEST(LogTest, ShortTrailer) { } TEST(LogTest, AlignedEof) { - const int n = kBlockSize - 2*kHeaderSize + 4; + const int n = kBlockSize - 2 * kHeaderSize + 4; Write(BigString("foo", n)); ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); ASSERT_EQ(BigString("foo", n), Read()); @@ -357,7 +330,7 @@ TEST(LogTest, BadRecordType) { TEST(LogTest, TruncatedTrailingRecord) { Write("foo"); - ShrinkSize(4); // Drop all payload as well as a header byte + ShrinkSize(4); // Drop all payload as well as a header byte ASSERT_EQ("EOF", Read()); ASSERT_EQ(kHeaderSize - 1, DroppedBytes()); ASSERT_EQ("OK", MatchError("truncated record at end of file")); @@ -431,74 +404,47 @@ TEST(LogTest, ErrorJoinsRecords) { Write("correct"); // Wipe the middle block - for (uint32_t offset = kBlockSize; offset < 2*kBlockSize; offset++) { + for (uint32_t offset = kBlockSize; offset < 2 * kBlockSize; offset++) { SetByte(offset, 'x'); } ASSERT_EQ("correct", Read()); ASSERT_EQ("EOF", Read()); const uint32_t dropped = DroppedBytes(); - ASSERT_LE(dropped, 2*kBlockSize + 100); - ASSERT_GE(dropped, 2*kBlockSize); + ASSERT_LE(dropped, 2 * kBlockSize + 100); + ASSERT_GE(dropped, 2 * kBlockSize); } -TEST(LogTest, ReadStart) { - CheckInitialOffsetRecord(0, 0); -} +TEST(LogTest, ReadStart) { CheckInitialOffsetRecord(0, 0); } -TEST(LogTest, ReadSecondOneOff) { - CheckInitialOffsetRecord(1, 1); -} +TEST(LogTest, ReadSecondOneOff) { CheckInitialOffsetRecord(1, 1); } -TEST(LogTest, ReadSecondTenThousand) { - CheckInitialOffsetRecord(10000, 1); -} +TEST(LogTest, ReadSecondTenThousand) { CheckInitialOffsetRecord(10000, 1); } -TEST(LogTest, ReadSecondStart) { - CheckInitialOffsetRecord(10007, 1); -} +TEST(LogTest, ReadSecondStart) { CheckInitialOffsetRecord(10007, 1); } -TEST(LogTest, ReadThirdOneOff) { - CheckInitialOffsetRecord(10008, 2); -} +TEST(LogTest, ReadThirdOneOff) { CheckInitialOffsetRecord(10008, 2); } -TEST(LogTest, ReadThirdStart) { - CheckInitialOffsetRecord(20014, 2); -} +TEST(LogTest, ReadThirdStart) { CheckInitialOffsetRecord(20014, 2); } -TEST(LogTest, ReadFourthOneOff) { - CheckInitialOffsetRecord(20015, 3); -} +TEST(LogTest, ReadFourthOneOff) { CheckInitialOffsetRecord(20015, 3); } -TEST(LogTest, ReadFourthFirstBlockTrailer) { - CheckInitialOffsetRecord(log::kBlockSize - 4, 3); -} +TEST(LogTest, ReadFourthFirstBlockTrailer) { CheckInitialOffsetRecord(log::kBlockSize - 4, 3); } -TEST(LogTest, ReadFourthMiddleBlock) { - CheckInitialOffsetRecord(log::kBlockSize + 1, 3); -} +TEST(LogTest, ReadFourthMiddleBlock) { CheckInitialOffsetRecord(log::kBlockSize + 1, 3); } -TEST(LogTest, ReadFourthLastBlock) { - CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3); -} +TEST(LogTest, ReadFourthLastBlock) { CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3); } TEST(LogTest, ReadFourthStart) { CheckInitialOffsetRecord( - 2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize, - 3); + 2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize, 3); } -TEST(LogTest, ReadEnd) { - CheckOffsetPastEndReturnsNoRecords(0); -} +TEST(LogTest, ReadEnd) { CheckOffsetPastEndReturnsNoRecords(0); } -TEST(LogTest, ReadPastEnd) { - CheckOffsetPastEndReturnsNoRecords(5); -} +TEST(LogTest, ReadPastEnd) { CheckOffsetPastEndReturnsNoRecords(5); } } // namespace log } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/log_writer.cc b/src/leveldb/db/log_writer.cc index bd80e9c82..9ac8737ef 100644 --- a/src/leveldb/db/log_writer.cc +++ b/src/leveldb/db/log_writer.cc @@ -16,17 +16,14 @@ namespace leveldb { namespace log { -Writer::Writer(WritableFile* dest) - : dest_(dest), - block_offset_(0) { +Writer::Writer(WritableFile* dest) : dest_(dest), block_offset_(0) { for (int i = 0; i <= kMaxRecordType; i++) { char t = static_cast(i); type_crc_[i] = crc32c::Value(&t, 1); } } -Writer::~Writer() { -} +Writer::~Writer() {} Status Writer::AddRecord(const Slice& slice) { const char* ptr = slice.data(); @@ -88,7 +85,7 @@ Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { // Compute the crc of the record type and the payload. uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); - crc = crc32c::Mask(crc); // Adjust for storage + crc = crc32c::Mask(crc); // Adjust for storage EncodeFixed32(buf, crc); // Write the header and the payload diff --git a/src/leveldb/db/log_writer.h b/src/leveldb/db/log_writer.h index 1baf1412d..2784192b3 100644 --- a/src/leveldb/db/log_writer.h +++ b/src/leveldb/db/log_writer.h @@ -32,7 +32,7 @@ class Writer { private: WritableFile* dest_; - int block_offset_; // Current offset in block + int block_offset_; // Current offset in block // crc32c values for all supported record types. These are // pre-computed to reduce the overhead of computing the crc of the diff --git a/src/leveldb/db/memtable.cc b/src/leveldb/db/memtable.cc index ddee41b1d..4f1b31f66 100644 --- a/src/leveldb/db/memtable.cc +++ b/src/leveldb/db/memtable.cc @@ -22,24 +22,21 @@ static Slice GetLengthPrefixedSlice(const char* data) { return Slice(p, len); } -MemTable::MemTable(const InternalKeyComparator& cmp, CompactStrategyFactory* compact_strategy_factory) +BaseMemTable::BaseMemTable(const InternalKeyComparator& cmp, + CompactStrategyFactory* compact_strategy_factory) : last_seq_(0), comparator_(cmp), refs_(0), being_flushed_(false), table_(comparator_, &arena_), empty_(true), - compact_strategy_factory_(compact_strategy_factory) { -} + compact_strategy_factory_(compact_strategy_factory) {} -MemTable::~MemTable() { - assert(refs_ == 0); -} +BaseMemTable::~BaseMemTable() { assert(refs_ == 0); } -size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); } +size_t BaseMemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); } -int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) - const { +int BaseMemTable::KeyComparator::operator()(const char* aptr, const char* bptr) const { // Internal keys are encoded as length-prefixed strings. Slice a = GetLengthPrefixedSlice(aptr); Slice b = GetLengthPrefixedSlice(bptr); @@ -56,9 +53,9 @@ static const char* EncodeKey(std::string* scratch, const Slice& target) { return scratch->data(); } -class MemTableIterator: public Iterator { +class BaseMemTableIterator : public Iterator { public: - explicit MemTableIterator(MemTable::Table* table) : iter_(table) { } + explicit BaseMemTableIterator(BaseMemTable::Table* table) : iter_(table) {} virtual bool Valid() const { return iter_.Valid(); } virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); } @@ -75,21 +72,17 @@ class MemTableIterator: public Iterator { virtual Status status() const { return Status::OK(); } private: - MemTable::Table::Iterator iter_; - std::string tmp_; // For passing to EncodeKey + BaseMemTable::Table::Iterator iter_; + std::string tmp_; // For passing to EncodeKey // No copying allowed - MemTableIterator(const MemTableIterator&); - void operator=(const MemTableIterator&); + BaseMemTableIterator(const BaseMemTableIterator&); + void operator=(const BaseMemTableIterator&); }; -Iterator* MemTable::NewIterator() { - return new MemTableIterator(&table_); -} +Iterator* BaseMemTable::NewIterator() { return new BaseMemTableIterator(&table_); } -void MemTable::Add(SequenceNumber s, ValueType type, - const Slice& key, - const Slice& value) { +void BaseMemTable::Add(SequenceNumber s, ValueType type, const Slice& key, const Slice& value) { // Format of an entry is concatenation of: // key_size : varint32 of internal_key.size() // key bytes : char[internal_key.size()] @@ -99,8 +92,7 @@ void MemTable::Add(SequenceNumber s, ValueType type, size_t val_size = value.size(); size_t internal_key_size = key_size + 8; const size_t encoded_len = - VarintLength(internal_key_size) + internal_key_size + - VarintLength(val_size) + val_size; + VarintLength(internal_key_size) + internal_key_size + VarintLength(val_size) + val_size; char* buf = arena_.Allocate(encoded_len); char* p = EncodeVarint32(buf, internal_key_size); memcpy(p, key.data(), key_size); @@ -115,7 +107,8 @@ void MemTable::Add(SequenceNumber s, ValueType type, last_seq_ = s; } -bool MemTable::Get(const LookupKey& key, std::string* value, const std::map& rollbacks, Status* s) { +bool BaseMemTable::Get(const LookupKey& key, std::string* value, + const std::map& rollbacks, Status* s) { Slice memkey = key.memtable_key(); Table::Iterator iter(&table_); iter.Seek(memkey.data()); @@ -132,26 +125,25 @@ bool MemTable::Get(const LookupKey& key, std::string* value, const std::mapCompare( - Slice(key_ptr, key_length - 8), - key.user_key()) == 0) { + if (comparator_.comparator.user_comparator()->Compare(Slice(key_ptr, key_length - 8), + key.user_key()) == 0) { // Correct user key const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); switch (static_cast(tag & 0xff)) { case kTypeValue: { Slice v = GetLengthPrefixedSlice(key_ptr + key_length); - CompactStrategy* strategy = compact_strategy_factory_ ? - compact_strategy_factory_->NewInstance() : NULL; + CompactStrategy* strategy = + compact_strategy_factory_ ? compact_strategy_factory_->NewInstance() : NULL; if (!strategy || !strategy->Drop(Slice(key_ptr, key_length - 8), 0)) { - value->assign(v.data(), v.size()); + value->assign(v.data(), v.size()); } else { - *s = Status::NotFound(Slice()); + *s = Status::NotFound(Slice()); } delete strategy; return true; diff --git a/src/leveldb/db/memtable.h b/src/leveldb/db/memtable.h index a2a1a073a..a03fcceba 100644 --- a/src/leveldb/db/memtable.h +++ b/src/leveldb/db/memtable.h @@ -20,85 +20,112 @@ namespace leveldb { class InternalKeyComparator; class Mutex; -class MemTableIterator; +class BaseMemTableIterator; class MemTable { - public: // MemTables are reference counted. The initial reference count // is zero and the caller must call Ref() at least once. - explicit MemTable(const InternalKeyComparator& comparator, - CompactStrategyFactory* compact_strategy_factory = NULL); - + public: + MemTable() = default; + virtual ~MemTable() {} // Increase reference count. - void Ref() { ++refs_; } + virtual void Ref() = 0; // Drop reference count. Delete if no more references exist. - void Unref() { - --refs_; - assert(refs_ >= 0); - if (refs_ <= 0) { - delete this; - } - } + virtual void Unref() = 0; // Returns an estimate of the number of bytes of data in use by this // data structure. - // // REQUIRES: external synchronization to prevent simultaneous - // operations on the same MemTable. - virtual size_t ApproximateMemoryUsage(); + // operations on the same BaseMemTable. + virtual size_t ApproximateMemoryUsage() = 0; // Return an iterator that yields the contents of the memtable. // - // The caller must ensure that the underlying MemTable remains live + // The caller must ensure that the underlying BaseMemTable remains live // while the returned iterator is live. The keys returned by this // iterator are internal keys encoded by AppendInternalKey in the // db/format.{h,cc} module. - virtual Iterator* NewIterator(); + virtual Iterator* NewIterator() = 0; // Add an entry into memtable that maps key to value at the // specified sequence number and with the specified type. // Typically value will be empty if type==kTypeDeletion. - virtual void Add(SequenceNumber seq, ValueType type, - const Slice& key, - const Slice& value); + virtual void Add(SequenceNumber seq, ValueType type, const Slice& key, const Slice& value) = 0; // If memtable contains a value for key, store it in *value and return true. // If memtable contains a deletion for key, store a NotFound() error // in *status and return true. // Else, return false. - virtual bool Get(const LookupKey& key, std::string* value, const std::map& rollbacks, Status* s); + virtual bool Get(const LookupKey& key, std::string* value, + const std::map& rollbacks, Status* s) = 0; + + // These two methods are only used for memtable on leveldb + virtual uint64_t GetSnapshot(uint64_t) = 0; + virtual void ReleaseSnapshot(uint64_t) = 0; + + virtual SequenceNumber GetLastSequence() const = 0; + virtual bool Empty() = 0; + virtual void SetNonEmpty() = 0; + virtual bool BeingFlushed() = 0; + virtual void SetBeingFlushed(bool flag) = 0; + // No copying allowed + MemTable(const MemTable&) = delete; + void operator=(const MemTable&) = delete; +}; - SequenceNumber GetLastSequence() const { - return last_seq_; - } - bool Empty() { - return empty_; - } - void SetNonEmpty() { - empty_ = false; +class BaseMemTable : public MemTable { + public: + BaseMemTable(const InternalKeyComparator& comparator, + CompactStrategyFactory* compact_strategy_factory); + + void Ref() { ++refs_; } + + void Unref() { + --refs_; + assert(refs_ >= 0); + if (refs_ <= 0) { + delete this; + } } - bool BeingFlushed() { return being_flushed_;} + virtual size_t ApproximateMemoryUsage(); + virtual Iterator* NewIterator(); + + virtual void Add(SequenceNumber seq, ValueType type, const Slice& key, const Slice& value); + + virtual bool Get(const LookupKey& key, std::string* value, + const std::map& rollbacks, Status* s); + + SequenceNumber GetLastSequence() const { return last_seq_; } + + bool Empty() { return empty_; } + void SetNonEmpty() { empty_ = false; } + + bool BeingFlushed() { return being_flushed_; } void SetBeingFlushed(bool flag) { - assert(flag ? !being_flushed_ - : being_flushed_); + assert(flag != being_flushed_); being_flushed_ = flag; } - virtual ~MemTable(); + // GetSnapshot and ReleaseSnapshot are not used for base memtable; + virtual uint64_t GetSnapshot(uint64_t) { + abort(); + return -1; + } + virtual void ReleaseSnapshot(uint64_t) { abort(); } - protected: - SequenceNumber last_seq_; + virtual ~BaseMemTable(); private: + SequenceNumber last_seq_; struct KeyComparator { const InternalKeyComparator comparator; - explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } + explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) {} int operator()(const char* a, const char* b) const; }; - friend class MemTableIterator; - friend class MemTableBackwardIterator; + friend class BaseMemTableIterator; + friend class BaseMemTableBackwardIterator; typedef SkipList Table; @@ -110,10 +137,6 @@ class MemTable { Table table_; bool empty_; CompactStrategyFactory* compact_strategy_factory_; - - // No copying allowed - MemTable(const MemTable&); - void operator=(const MemTable&); }; } // namespace leveldb diff --git a/src/leveldb/db/memtable_on_leveldb.cc b/src/leveldb/db/memtable_on_leveldb.cc index 80bc9c144..4bd2b1295 100644 --- a/src/leveldb/db/memtable_on_leveldb.cc +++ b/src/leveldb/db/memtable_on_leveldb.cc @@ -9,93 +9,107 @@ #include #include +#include + #include "db/db_impl.h" #include "db/write_batch_internal.h" +#include "leveldb/cache.h" +#include "db/table_cache.h" #include "leveldb/write_batch.h" namespace leveldb { +// Use 100M block and table cache size for all memtable_on_leveldb. +// No need to set a flag for config. +constexpr size_t kSharedBlockCacheSize = 100UL << 20; +constexpr size_t kSharedTableCacheSize = 100UL << 20; +std::atomic MemTableOnLevelDB::unique_id_; + +static Cache* GetSharedBlockCache() { + static std::unique_ptr block_cache{NewLRUCache(kSharedBlockCacheSize)}; + return block_cache.get(); +} + +static TableCache* GetSharedTableCache() { + static std::unique_ptr table_cache{new TableCache(kSharedTableCacheSize)}; + return table_cache.get(); +} + MemTableOnLevelDB::MemTableOnLevelDB(const std::string& dbname, const InternalKeyComparator& comparator, CompactStrategyFactory* compact_strategy_factory, - size_t write_buffer_size, - size_t block_size, - Logger* info_log) - : MemTable(comparator, compact_strategy_factory) { - char memdb_name[1024] = { '\0' }; - snprintf(memdb_name, sizeof(memdb_name), "/%d/%s/%llu", getpid(), dbname.c_str(), - (unsigned long long)this); - leveldb::Options opts; - opts.env = memenv_ = leveldb::NewMemEnv(GetBaseEnv()); - opts.compression = leveldb::kSnappyCompression; - opts.write_buffer_size = write_buffer_size; - opts.block_size = block_size; - opts.compact_strategy_factory = compact_strategy_factory; - opts.comparator = comparator.user_comparator(); - opts.dump_mem_on_shutdown = false; - opts.drop_base_level_del_in_compaction = false; - opts.info_log = info_log; - - DBImpl* db_impl = new DBImpl(opts, memdb_name); - VersionEdit edit; - Status s = db_impl->Recover(&edit); - assert(s.ok()); - memdb_ = db_impl; + size_t write_buffer_size, size_t block_size, + Logger* info_log) { + char memdb_name[1024] = {'\0'}; + snprintf(memdb_name, sizeof(memdb_name), "/%d/%s/%llu/%lu", getpid(), dbname.c_str(), + (unsigned long long)this, unique_id_.fetch_add(1)); + leveldb::Options opts; + opts.env = memenv_ = leveldb::NewMemEnv(GetBaseEnv()); + opts.compression = leveldb::kSnappyCompression; + opts.write_buffer_size = write_buffer_size; + opts.block_size = block_size; + opts.compact_strategy_factory = compact_strategy_factory; + opts.comparator = comparator.user_comparator(); + opts.dump_mem_on_shutdown = false; + opts.drop_base_level_del_in_compaction = false; + opts.info_log = info_log; + opts.block_cache = GetSharedBlockCache(); + opts.table_cache = GetSharedTableCache(); + + DBImpl* db_impl = new DBImpl(opts, memdb_name); + VersionEdit edit; + Status s = db_impl->Recover(&edit); + assert(s.ok()); + memdb_ = db_impl; } MemTableOnLevelDB::~MemTableOnLevelDB() { - if (memdb_) { - memdb_->Shutdown1(); - memdb_->Shutdown2(); - delete memdb_; - } - delete memenv_; - + if (memdb_) { + memdb_->Shutdown1(); + memdb_->Shutdown2(); + delete memdb_; + } + delete memenv_; } size_t MemTableOnLevelDB::ApproximateMemoryUsage() { - uint64_t size; - memdb_->GetApproximateSizes(&size, NULL); - return size; + uint64_t size; + memdb_->GetApproximateSizes(&size, NULL); + return size; } -Iterator* MemTableOnLevelDB::NewIterator() { - return memdb_->NewInternalIterator(); -} +Iterator* MemTableOnLevelDB::NewIterator() { return memdb_->NewInternalIterator(); } -void MemTableOnLevelDB::Add(SequenceNumber seq, - ValueType type, - const Slice& key, +void MemTableOnLevelDB::Add(SequenceNumber seq, ValueType type, const Slice& key, const Slice& value) { - WriteBatch batch; - if (type == kTypeValue) { - batch.Put(key, value); - } else if (type == kTypeDeletion) { - batch.Delete(key); - } - WriteBatchInternal::SetSequence(&batch, seq); - memdb_->Write(WriteOptions(), &batch); - assert(last_seq_ < seq || seq == 0); - last_seq_ = seq; + WriteBatch batch; + if (type == kTypeValue) { + batch.Put(key, value); + } else if (type == kTypeDeletion) { + batch.Delete(key); + } + WriteBatchInternal::SetSequence(&batch, seq); + memdb_->Write(WriteOptions(), &batch); + assert(last_seq_ < seq || seq == 0); + last_seq_ = seq; } -bool MemTableOnLevelDB::Get(const LookupKey& key, - std::string* value, - Status* s) { - ReadOptions read_opt; - ParsedInternalKey internal_key_data; - ParseInternalKey(key.internal_key(), &internal_key_data); - read_opt.snapshot = internal_key_data.sequence; - *s = memdb_->Get(read_opt, key.user_key(), value); - return s->ok(); +bool MemTableOnLevelDB::Get(const LookupKey& key, std::string* value, + const std::map&, Status* s) { + ReadOptions read_opt; + ParsedInternalKey internal_key_data; + ParseInternalKey(key.internal_key(), &internal_key_data); + read_opt.snapshot = internal_key_data.sequence; + *s = memdb_->Get(read_opt, key.user_key(), value); + return s->ok(); } -const uint64_t MemTableOnLevelDB::GetSnapshot(uint64_t last_sequence) { - return memdb_->GetSnapshot(last_sequence); +uint64_t MemTableOnLevelDB::GetSnapshot(uint64_t last_sequence) { + return memdb_->GetSnapshot(last_sequence); } void MemTableOnLevelDB::ReleaseSnapshot(uint64_t sequence_number) { - return memdb_->ReleaseSnapshot(sequence_number); + return memdb_->ReleaseSnapshot(sequence_number); } static pthread_once_t mem_base_env_once = PTHREAD_ONCE_INIT; @@ -107,5 +121,4 @@ Env* MemTableOnLevelDB::GetBaseEnv() { return mem_base_env; } -} //end namespace leveldb - +} // end namespace leveldb diff --git a/src/leveldb/db/memtable_on_leveldb.h b/src/leveldb/db/memtable_on_leveldb.h index fa80ab6aa..56d072615 100644 --- a/src/leveldb/db/memtable_on_leveldb.h +++ b/src/leveldb/db/memtable_on_leveldb.h @@ -5,9 +5,10 @@ // Author: Junyi Sun (sunjunyi01@baidu.com) // Description: memtable built on leveldb -#ifndef STORAGE_LEVELDB_DB_MEMTABLE_ON_LEVELDB_H_ -#define STORAGE_LEVELDB_DB_MEMTABLE_ON_LEVELDB_H_ +#ifndef STORAGE_LEVELDB_DB_MEMTABLE_ON_LEVELDB_H_ +#define STORAGE_LEVELDB_DB_MEMTABLE_ON_LEVELDB_H_ +#include #include "db/memtable.h" #include "helpers/memenv/memenv.h" #include "db/db_impl.h" @@ -15,40 +16,65 @@ namespace leveldb { -class MemTableOnLevelDB : public MemTable{ +class MemTableOnLevelDB : public MemTable { + public: + MemTableOnLevelDB(const std::string& dbname, const InternalKeyComparator& comparator, + CompactStrategyFactory* compact_strategy_factory, size_t write_buffer_size, + size_t block_size, Logger* info_log); -public: + ~MemTableOnLevelDB(); - MemTableOnLevelDB (const std::string& dbname, - const InternalKeyComparator& comparator, - CompactStrategyFactory* compact_strategy_factory, - size_t write_buffer_size, - size_t block_size, - Logger* info_log); + size_t ApproximateMemoryUsage(); - ~MemTableOnLevelDB(); + Iterator* NewIterator(); - size_t ApproximateMemoryUsage(); + void Add(SequenceNumber seq, ValueType type, const Slice& key, const Slice& value); - Iterator* NewIterator(); + uint64_t GetSnapshot(uint64_t last_sequence); - void Add(SequenceNumber seq, ValueType type, - const Slice& key, - const Slice& value); + void ReleaseSnapshot(uint64_t sequence_number); - bool Get(const LookupKey& key, std::string* value, Status* s); + SequenceNumber GetLastSequence() const { return last_seq_; } - const uint64_t GetSnapshot(uint64_t last_sequence); + void Ref() { ++refs_; } - void ReleaseSnapshot(uint64_t sequence_number); + void Unref() { + --refs_; + assert(refs_ >= 0); + if (refs_ <= 0) { + delete this; + } + } -private: - Env* GetBaseEnv(); - leveldb::DBImpl* memdb_; - leveldb::Env* memenv_; -}; + bool Empty() { return empty_; } + + void SetNonEmpty() { empty_ = false; } + + bool BeingFlushed() { return being_flushed_; } -} //namespace leveldb + void SetBeingFlushed(bool flag) { + assert(flag != being_flushed_); + being_flushed_ = flag; + } + + // No body use this method for the followed reasons: + // 1. memtable_on_level_db is only used in lg's schema. + // 2. Get method in memtable/leveldb is only used in kv-table. + // 3. A table with lg schema is not a kv-table. + bool Get(const LookupKey& key, std::string* value, const std::map&, + Status* s); + + private: + SequenceNumber last_seq_{0}; + int refs_{0}; + bool being_flushed_{false}; + bool empty_{true}; + Env* GetBaseEnv(); + leveldb::DBImpl* memdb_; + leveldb::Env* memenv_; + static std::atomic unique_id_; +}; -#endif //STORAGE_LEVELDB_DB__MEMTABLE_ON_LEVELDB_H_ +} // namespace leveldb +#endif // STORAGE_LEVELDB_DB__MEMTABLE_ON_LEVELDB_H_ diff --git a/src/leveldb/db/repair.cc b/src/leveldb/db/repair.cc index cf15ed03b..4c429a864 100644 --- a/src/leveldb/db/repair.cc +++ b/src/leveldb/db/repair.cc @@ -63,10 +63,12 @@ class Repairer { owns_block_cache_(options_.block_cache != options.block_cache), owns_table_cache_(options_.table_cache == NULL), table_cache_(options_.table_cache), - next_file_number_(1), mem_(NULL), max_sequence_(0) { + next_file_number_(1), + mem_(NULL), + max_sequence_(0) { // TableCache can be small since we expect each table to be opened once. if (owns_table_cache_) { - Log(options_.info_log, "[%s] create new table cache in repairer.", dbname_.c_str()); + LEVELDB_LOG(options_.info_log, "[%s] create new table cache in repairer.", dbname_.c_str()); table_cache_ = new TableCache(100 * 2097152LL); } } @@ -95,14 +97,12 @@ class Repairer { for (size_t i = 0; i < tables_.size(); i++) { bytes += tables_[i].meta.file_size; } - Log(options_.info_log, - "**** Repaired leveldb %s; " - "recovered %d files; %llu bytes. " - "Some data may have been lost. " - "****", - dbname_.c_str(), - static_cast(tables_.size()), - bytes); + LEVELDB_LOG(options_.info_log, + "**** Repaired leveldb %s; " + "recovered %d files; %llu bytes. " + "Some data may have been lost. " + "****", + dbname_.c_str(), static_cast(tables_.size()), bytes); } return status; } @@ -171,10 +171,8 @@ class Repairer { std::string logname = LogFileName(dbname_, logs_[i]); Status status = ConvertLogToTable(logs_[i]); if (!status.ok()) { - Log(options_.info_log, "[%s] Log #%llu: ignoring conversion error: %s", - dbname_.c_str(), - (unsigned long long) logs_[i], - status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Log #%llu: ignoring conversion error: %s", + dbname_.c_str(), (unsigned long long)logs_[i], status.ToString().c_str()); } ArchiveFile(logname); } @@ -187,10 +185,8 @@ class Repairer { uint64_t lognum; virtual void Corruption(size_t bytes, const Status& s) { // We print error messages for corruption, but continue repairing. - Log(info_log, "Log #%llu: dropping %d bytes; %s", - (unsigned long long) lognum, - static_cast(bytes), - s.ToString().c_str()); + LEVELDB_LOG(info_log, "Log #%llu: dropping %d bytes; %s", (unsigned long long)lognum, + static_cast(bytes), s.ToString().c_str()); } }; @@ -211,20 +207,18 @@ class Repairer { // corruptions cause entire commits to be skipped instead of // propagating bad information (like overly large sequence // numbers). - log::Reader reader(lfile, &reporter, false/*do not checksum*/, - 0/*initial_offset*/); + log::Reader reader(lfile, &reporter, false /*do not checksum*/, 0 /*initial_offset*/); // Read all the records and add to a memtable std::string scratch; Slice record; WriteBatch batch; - mem_ = new MemTable(icmp_); + mem_ = new BaseMemTable(icmp_, nullptr); mem_->Ref(); int counter = 0; while (reader.ReadRecord(&record, &scratch)) { if (record.size() < 12) { - reporter.Corruption( - record.size(), Status::Corruption("log record too small")); + reporter.Corruption(record.size(), Status::Corruption("log record too small")); continue; } WriteBatchInternal::SetContents(&batch, record); @@ -232,10 +226,8 @@ class Repairer { if (status.ok()) { counter += WriteBatchInternal::Count(&batch); } else { - Log(options_.info_log, "[%s] Log #%llu: ignoring %s", - dbname_.c_str(), - (unsigned long long) log, - status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Log #%llu: ignoring %s", dbname_.c_str(), + (unsigned long long)log, status.ToString().c_str()); status = Status::OK(); // Keep going with rest of file } } @@ -247,8 +239,8 @@ class Repairer { meta.number = next_file_number_++; Iterator* iter = mem_->NewIterator(); uint64_t saved_bytes = 0; - status = BuildTable(dbname_, env_, options_, table_cache_, - iter, &meta, &saved_bytes, kMaxSequenceNumber); + status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, &saved_bytes, + kMaxSequenceNumber); delete iter; mem_->Unref(); mem_ = NULL; @@ -257,68 +249,59 @@ class Repairer { table_numbers_.push_back(meta.number); } } - Log(options_.info_log, "[%s] Log #%llu: %d ops saved to Table #%llu %s", - dbname_.c_str(), - (unsigned long long) log, - counter, - (unsigned long long) meta.number, - status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Log #%llu: %d ops saved to Table #%llu %s", + dbname_.c_str(), (unsigned long long)log, counter, (unsigned long long)meta.number, + status.ToString().c_str()); return status; } - Status InsertMemTable(WriteBatch* batch, uint64_t batch_seq) { - if (mem_ == NULL) { - mem_ = new MemTable(icmp_); - mem_->Ref(); - } - assert(batch_seq > max_sequence_); - max_sequence_ = batch_seq + WriteBatchInternal::Count(batch) - 1; - return WriteBatchInternal::InsertInto(batch, mem_); - } - bool HasMemTable() const { - return mem_ != NULL; - } - Status BuildTableFile(uint64_t log, uint32_t lg_id, uint64_t* file_number) { - FileMetaData meta; - meta.number = next_file_number_++; - *file_number = meta.number; - Iterator* iter = mem_->NewIterator(); - uint64_t saved_bytes = 0; - Status status = BuildTable(dbname_, env_, options_, table_cache_, - iter, &meta, &saved_bytes, kMaxSequenceNumber); - delete iter; - mem_->Unref(); - mem_ = NULL; - if (status.ok()) { - if (meta.file_size > 0) { - table_numbers_.push_back(meta.number); - } - } - Log(options_.info_log, "[%s][lg:%d] Log #%llu: saved to Table #%llu %s", - dbname_.c_str(), lg_id, - (unsigned long long) log, - (unsigned long long) meta.number, - status.ToString().c_str()); - return status; - } - - Status AddTableMeta(uint64_t table_number) { - TableInfo t; - t.meta.number = table_number; - Status status = ScanTable(&t); - if (!status.ok()) { - std::string fname = TableFileName(dbname_, table_number); - Log(options_.info_log, "[%s] Table #%llu: ignoring %s", - dbname_.c_str(), - (unsigned long long) table_number, + Status InsertMemTable(WriteBatch* batch, uint64_t batch_seq) { + if (mem_ == NULL) { + mem_ = new BaseMemTable(icmp_, nullptr); + mem_->Ref(); + } + assert(batch_seq > max_sequence_); + max_sequence_ = batch_seq + WriteBatchInternal::Count(batch) - 1; + return WriteBatchInternal::InsertInto(batch, mem_); + } + bool HasMemTable() const { return mem_ != NULL; } + Status BuildTableFile(uint64_t log, uint32_t lg_id, uint64_t* file_number) { + FileMetaData meta; + meta.number = next_file_number_++; + *file_number = meta.number; + Iterator* iter = mem_->NewIterator(); + uint64_t saved_bytes = 0; + Status status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, &saved_bytes, + kMaxSequenceNumber); + delete iter; + mem_->Unref(); + mem_ = NULL; + if (status.ok()) { + if (meta.file_size > 0) { + table_numbers_.push_back(meta.number); + } + } + LEVELDB_LOG(options_.info_log, "[%s][lg:%d] Log #%llu: saved to Table #%llu %s", + dbname_.c_str(), lg_id, (unsigned long long)log, (unsigned long long)meta.number, status.ToString().c_str()); - ArchiveFile(fname); - } else { - tables_.push_back(t); - table_numbers_.push_back(table_number); - } - return status; + return status; + } + + Status AddTableMeta(uint64_t table_number) { + TableInfo t; + t.meta.number = table_number; + Status status = ScanTable(&t); + if (!status.ok()) { + std::string fname = TableFileName(dbname_, table_number); + LEVELDB_LOG(options_.info_log, "[%s] Table #%llu: ignoring %s", dbname_.c_str(), + (unsigned long long)table_number, status.ToString().c_str()); + ArchiveFile(fname); + } else { + tables_.push_back(t); + table_numbers_.push_back(table_number); } + return status; + } void ExtractMetaData() { std::vector kept; @@ -328,15 +311,13 @@ class Repairer { Status status = ScanTable(&t); if (!status.ok()) { std::string fname = TableFileName(dbname_, table_numbers_[i]); - Log(options_.info_log, "[%s] Table #%llu: ignoring %s", - dbname_.c_str(), - (unsigned long long) table_numbers_[i], - status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Table #%llu: ignoring %s", dbname_.c_str(), + (unsigned long long)table_numbers_[i], status.ToString().c_str()); ArchiveFile(fname); } else { tables_.push_back(t); if (t.max_sequence > max_sequence_) { - max_sequence_ = t.max_sequence; + max_sequence_ = t.max_sequence; } } } @@ -347,18 +328,16 @@ class Repairer { int counter = 0; Status status = env_->GetFileSize(fname, &t->meta.file_size); if (status.ok()) { - Iterator* iter = table_cache_->NewIterator( - ReadOptions(&options_), dbname_, t->meta.number, t->meta.file_size); + Iterator* iter = table_cache_->NewIterator(ReadOptions(&options_), dbname_, t->meta.number, + t->meta.file_size); bool empty = true; ParsedInternalKey parsed; t->max_sequence = 0; for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { Slice key = iter->key(); if (!ParseInternalKey(key, &parsed)) { - Log(options_.info_log, "[%s] Table #%llu: unparsable key %s", - dbname_.c_str(), - (unsigned long long) t->meta.number, - EscapeString(key).c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Table #%llu: unparsable key %s", dbname_.c_str(), + (unsigned long long)t->meta.number, EscapeString(key).c_str()); continue; } @@ -380,11 +359,8 @@ class Repairer { status = Status::Corruption("sst is empty"); } } - Log(options_.info_log, "[%s] Table #%llu: %d entries %s", - dbname_.c_str(), - (unsigned long long) t->meta.number, - counter, - status.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Table #%llu: %d entries %s", dbname_.c_str(), + (unsigned long long)t->meta.number, counter, status.ToString().c_str()); return status; } @@ -411,11 +387,10 @@ class Repairer { for (size_t i = 0; i < tables_.size(); i++) { // TODO(opt): separate out into multiple levels const TableInfo& t = tables_[i]; - edit_.AddFile(0, t.meta.number, t.meta.file_size, - t.meta.smallest, t.meta.largest); + edit_.AddFile(0, t.meta.number, t.meta.file_size, t.meta.smallest, t.meta.largest); } - //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); + // fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); { log::Writer log(file); std::string record; @@ -463,277 +438,257 @@ class Repairer { new_file.append("/"); new_file.append((slash == NULL) ? fname.c_str() : slash + 1); Status s = env_->RenameFile(fname, new_file); - Log(options_.info_log, "[%s] Archiving %s: %s\n", - dbname_.c_str(), - fname.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_.info_log, "[%s] Archiving %s: %s\n", dbname_.c_str(), fname.c_str(), + s.ToString().c_str()); } }; Options InitDefaultOptions(const Options& options, const std::string& dbname) { - Options opt = options; + Options opt = options; - Status s = opt.env->CreateDir(dbname); - if (!s.ok()) { - std::cerr << "[" << dbname << "] fail to create dir: " - << s.ToString() << std::endl; - } - assert(s.ok()); + Status s = opt.env->CreateDir(dbname); + if (!s.ok()) { + std::cerr << "[" << dbname << "] fail to create dir: " << s.ToString() << std::endl; + } + assert(s.ok()); - if (opt.exist_lg_list == NULL) { - opt.exist_lg_list = new std::set; - opt.exist_lg_list->insert(0); - } - return opt; + if (opt.exist_lg_list == NULL) { + opt.exist_lg_list = new std::set; + opt.exist_lg_list->insert(0); + } + return opt; } class DBRepairer { -public: - DBRepairer(const std::string& dbname, const Options& options) - : dbname_(dbname), env_(options.env), - options_(InitDefaultOptions(options, dbname)), - created_own_lg_list_(options_.exist_lg_list != options.exist_lg_list), - log_number_(0), - last_sequence_(0) { - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - Repairer* repair = new Repairer(dbname_ + "/" + Uint64ToString(*it), - options_); - repairers.push_back(repair); - } + public: + DBRepairer(const std::string& dbname, const Options& options) + : dbname_(dbname), + env_(options.env), + options_(InitDefaultOptions(options, dbname)), + created_own_lg_list_(options_.exist_lg_list != options.exist_lg_list), + log_number_(0), + last_sequence_(0) { + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + Repairer* repair = new Repairer(dbname_ + "/" + Uint64ToString(*it), options_); + repairers.push_back(repair); } - ~DBRepairer() { - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - delete repairers[*it]; - } - if (created_own_lg_list_) { - delete options_.exist_lg_list; - } + } + ~DBRepairer() { + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + delete repairers[*it]; } + if (created_own_lg_list_) { + delete options_.exist_lg_list; + } + } - Status Run() { - Status status = FindFiles(); - if (status.ok()) { - ExtractMetaData(); - ConvertLogFilesToTables(); - status = WriteDescriptor(); - } - return status; + Status Run() { + Status status = FindFiles(); + if (status.ok()) { + ExtractMetaData(); + ConvertLogFilesToTables(); + status = WriteDescriptor(); } + return status; + } -private: - Status FindFiles() { - std::vector filenames; - Status status = env_->GetChildren(dbname_, &filenames); - if (!status.ok()) { - return status; - } - if (filenames.empty()) { - return Status::IOError(dbname_, "repair found no files"); - } + private: + Status FindFiles() { + std::vector filenames; + Status status = env_->GetChildren(dbname_, &filenames); + if (!status.ok()) { + return status; + } + if (filenames.empty()) { + return Status::IOError(dbname_, "repair found no files"); + } - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type)) { - if (type == kLogFile) { - logfiles_.push_back(number); - if (number + 1 > log_number_) { - log_number_ = number + 1; - } - } - } + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + if (type == kLogFile) { + logfiles_.push_back(number); + if (number + 1 > log_number_) { + log_number_ = number + 1; + } } + } + } - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - repairers[*it]->FindFiles(); - } - return status; - } - - void ConvertLogFilesToTables() { - for (size_t i = 0; i < logfiles_.size(); i++) { - std::string logname = LogHexFileName(dbname_, logfiles_[i]); - Status status = ConvertLogToTable(logfiles_[i]); - if (!status.ok()) { - Log(options_.info_log, "[%s] Log #%llu: ignoring conversion error: %s", - dbname_.c_str(), - (unsigned long long) logfiles_[i], - status.ToString().c_str()); - } - ArchiveFile(logname); - } + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + repairers[*it]->FindFiles(); } + return status; + } - Status ConvertLogToTable(uint64_t log) { - struct LogReporter : public log::Reader::Reporter { - Env* env; - Logger* info_log; - uint64_t lognum; - virtual void Corruption(size_t bytes, const Status& s) { - // We print error messages for corruption, but continue repairing. - Log(info_log, "Log #%llu: dropping %d bytes; %s", - (unsigned long long) lognum, - static_cast(bytes), - s.ToString().c_str()); - } - }; + void ConvertLogFilesToTables() { + for (size_t i = 0; i < logfiles_.size(); i++) { + std::string logname = LogHexFileName(dbname_, logfiles_[i]); + Status status = ConvertLogToTable(logfiles_[i]); + if (!status.ok()) { + LEVELDB_LOG(options_.info_log, "[%s] Log #%llu: ignoring conversion error: %s", + dbname_.c_str(), (unsigned long long)logfiles_[i], status.ToString().c_str()); + } + ArchiveFile(logname); + } + } - // Open the log file - std::string logname = LogHexFileName(dbname_, log); - SequentialFile* lfile; - Status status = env_->NewSequentialFile(logname, &lfile); - if (!status.ok()) { - return status; - } + Status ConvertLogToTable(uint64_t log) { + struct LogReporter : public log::Reader::Reporter { + Env* env; + Logger* info_log; + uint64_t lognum; + virtual void Corruption(size_t bytes, const Status& s) { + // We print error messages for corruption, but continue repairing. + LEVELDB_LOG(info_log, "Log #%llu: dropping %d bytes; %s", (unsigned long long)lognum, + static_cast(bytes), s.ToString().c_str()); + } + }; - // Create the log reader. - LogReporter reporter; - reporter.env = env_; - reporter.info_log = options_.info_log; - reporter.lognum = log; - - log::Reader reader(lfile, &reporter, false/*do not checksum*/, - 0/*initial_offset*/); - - // Read all the records and add to a memtable - std::string scratch; - Slice record; - WriteBatch batch; - int32_t counter = 0; - while (reader.ReadRecord(&record, &scratch)) { - if (record.size() < 12) { - reporter.Corruption( - record.size(), Status::Corruption("log record too small")); - continue; - } - WriteBatchInternal::SetContents(&batch, record); - uint64_t batch_seq = WriteBatchInternal::Sequence(&batch); - uint64_t batch_count = WriteBatchInternal::Count(&batch); - if (batch_seq <= last_sequence_) { - Log(options_.info_log, "[%s] duplicate record, ignore %llu ~ %llu", - dbname_.c_str(), static_cast(batch_seq), + // Open the log file + std::string logname = LogHexFileName(dbname_, log); + SequentialFile* lfile; + Status status = env_->NewSequentialFile(logname, &lfile); + if (!status.ok()) { + return status; + } + + // Create the log reader. + LogReporter reporter; + reporter.env = env_; + reporter.info_log = options_.info_log; + reporter.lognum = log; + + log::Reader reader(lfile, &reporter, false /*do not checksum*/, 0 /*initial_offset*/); + + // Read all the records and add to a memtable + std::string scratch; + Slice record; + WriteBatch batch; + int32_t counter = 0; + while (reader.ReadRecord(&record, &scratch)) { + if (record.size() < 12) { + reporter.Corruption(record.size(), Status::Corruption("log record too small")); + continue; + } + WriteBatchInternal::SetContents(&batch, record); + uint64_t batch_seq = WriteBatchInternal::Sequence(&batch); + uint64_t batch_count = WriteBatchInternal::Count(&batch); + if (batch_seq <= last_sequence_) { + LEVELDB_LOG(options_.info_log, "[%s] duplicate record, ignore %llu ~ %llu", dbname_.c_str(), + static_cast(batch_seq), static_cast(batch_seq + batch_count - 1)); - continue; - } - - std::vector lg_batchs; - lg_batchs.resize(options_.exist_lg_list->size()); - std::fill(lg_batchs.begin(), lg_batchs.end(), (WriteBatch*)0); - bool created_new_wb = false; - if (options_.exist_lg_list->size() > 1) { - status = batch.SeperateLocalityGroup(&lg_batchs); - created_new_wb = true; - if (!status.ok()) { - return status; - } - for (uint32_t i = 0; i < options_.exist_lg_list->size(); ++i) { - if (lg_batchs[i] != 0) { - WriteBatchInternal::SetSequence(lg_batchs[i], batch_seq); - } - } - } else { - lg_batchs[0] = (&batch); - } - for (uint32_t i = 0; i < lg_batchs.size(); ++i) { - if (lg_batchs[i] == NULL) { - continue; - } - status = repairers[i]->InsertMemTable(lg_batchs[i], batch_seq); - if (!status.ok()) { - Log(options_.info_log, "[%s][lg:%d] Insert log #%llu: ignoring %s", - dbname_.c_str(), i, - (unsigned long long) log, - status.ToString().c_str()); - status = Status::OK(); // Keep going with rest of file - } else { - counter += WriteBatchInternal::Count(lg_batchs[i]); - } - } - if (created_new_wb) { - for (uint32_t i = 0; i < lg_batchs.size(); ++i) { - if (lg_batchs[i] != NULL) { - delete lg_batchs[i]; - lg_batchs[i] = NULL; - } - } - } - last_sequence_ = batch_seq + batch_count - 1; + continue; + } + + std::vector lg_batchs; + lg_batchs.resize(options_.exist_lg_list->size()); + std::fill(lg_batchs.begin(), lg_batchs.end(), (WriteBatch*)0); + bool created_new_wb = false; + if (options_.exist_lg_list->size() > 1) { + status = batch.SeperateLocalityGroup(&lg_batchs); + created_new_wb = true; + if (!status.ok()) { + return status; + } + for (uint32_t i = 0; i < options_.exist_lg_list->size(); ++i) { + if (lg_batchs[i] != 0) { + WriteBatchInternal::SetSequence(lg_batchs[i], batch_seq); + } } - delete lfile; - - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - uint32_t i = *it; - uint64_t file_num = 0; - if (!repairers[i]->HasMemTable()) { - continue; - } - status = repairers[i]->BuildTableFile(log, i, &file_num); - if (!status.ok()) { - Log(options_.info_log, "[%s][lg:%d] BuildLogFile #%llu: ignoring %s", - dbname_.c_str(), i, - (unsigned long long) log, - status.ToString().c_str()); - status = Status::OK(); // Keep going with rest of file - } else { - status = repairers[i]->AddTableMeta(file_num); - if (!status.ok()) { - Log(options_.info_log, "[%s][lg:%d] AddTableMeta #%llu: ignoring %s", - dbname_.c_str(), i, - (unsigned long long) log, - status.ToString().c_str()); - status = Status::OK(); // Keep going with rest of file - } - } + } else { + lg_batchs[0] = (&batch); + } + for (uint32_t i = 0; i < lg_batchs.size(); ++i) { + if (lg_batchs[i] == NULL) { + continue; } - Log(options_.info_log, "[%s] Log #%llu to Table: %d entries %s", - dbname_.c_str(), - (unsigned long long) log, - counter, - status.ToString().c_str()); - return status; - } - - void ExtractMetaData() { - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - repairers[*it]->ExtractMetaData(); - if (last_sequence_ < repairers[*it]->max_sequence_) { - last_sequence_ = repairers[*it]->max_sequence_; - } + status = repairers[i]->InsertMemTable(lg_batchs[i], batch_seq); + if (!status.ok()) { + LEVELDB_LOG(options_.info_log, "[%s][lg:%d] Insert log #%llu: ignoring %s", + dbname_.c_str(), i, (unsigned long long)log, status.ToString().c_str()); + status = Status::OK(); // Keep going with rest of file + } else { + counter += WriteBatchInternal::Count(lg_batchs[i]); } + } + if (created_new_wb) { + for (uint32_t i = 0; i < lg_batchs.size(); ++i) { + if (lg_batchs[i] != NULL) { + delete lg_batchs[i]; + lg_batchs[i] = NULL; + } + } + } + last_sequence_ = batch_seq + batch_count - 1; } + delete lfile; - Status WriteDescriptor() { - Status status; - std::set::iterator it = options_.exist_lg_list->begin(); - for (; it != options_.exist_lg_list->end(); ++it) { - Status s = repairers[*it]->WriteDescriptor(); - if (!s.ok()) { - Log(options_.info_log, "[%s][lg:%d] WriteDescriptor error: %s", - dbname_.c_str(), *it, - s.ToString().c_str()); - status = s; - } + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + uint32_t i = *it; + uint64_t file_num = 0; + if (!repairers[i]->HasMemTable()) { + continue; + } + status = repairers[i]->BuildTableFile(log, i, &file_num); + if (!status.ok()) { + LEVELDB_LOG(options_.info_log, "[%s][lg:%d] BuildLogFile #%llu: ignoring %s", + dbname_.c_str(), i, (unsigned long long)log, status.ToString().c_str()); + status = Status::OK(); // Keep going with rest of file + } else { + status = repairers[i]->AddTableMeta(file_num); + if (!status.ok()) { + LEVELDB_LOG(options_.info_log, "[%s][lg:%d] AddTableMeta #%llu: ignoring %s", + dbname_.c_str(), i, (unsigned long long)log, status.ToString().c_str()); + status = Status::OK(); // Keep going with rest of file } - return status; + } } + LEVELDB_LOG(options_.info_log, "[%s] Log #%llu to Table: %d entries %s", dbname_.c_str(), + (unsigned long long)log, counter, status.ToString().c_str()); + return status; + } - void ArchiveFile(const std::string& fname) { - repairers[0]->ArchiveFile(fname); + void ExtractMetaData() { + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + repairers[*it]->ExtractMetaData(); + if (last_sequence_ < repairers[*it]->max_sequence_) { + last_sequence_ = repairers[*it]->max_sequence_; + } } + } -private: - std::vector repairers; - std::string const dbname_; - Env* const env_; - Options const options_; - bool created_own_lg_list_; - uint64_t log_number_; - std::vector logfiles_; - uint64_t last_sequence_; + Status WriteDescriptor() { + Status status; + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + Status s = repairers[*it]->WriteDescriptor(); + if (!s.ok()) { + LEVELDB_LOG(options_.info_log, "[%s][lg:%d] WriteDescriptor error: %s", dbname_.c_str(), + *it, s.ToString().c_str()); + status = s; + } + } + return status; + } + + void ArchiveFile(const std::string& fname) { repairers[0]->ArchiveFile(fname); } + + private: + std::vector repairers; + std::string const dbname_; + Env* const env_; + Options const options_; + bool created_own_lg_list_; + uint64_t log_number_; + std::vector logfiles_; + uint64_t last_sequence_; }; } // namespace diff --git a/src/leveldb/db/sharded_memtable.cc b/src/leveldb/db/sharded_memtable.cc new file mode 100644 index 000000000..48c343168 --- /dev/null +++ b/src/leveldb/db/sharded_memtable.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "sharded_memtable.h" +#include "leveldb/table/merger.h" + +namespace leveldb { + +// For Base Mem Table +ShardedMemTable::ShardedMemTable(const InternalKeyComparator& cmp, + CompactStrategyFactory* compact_strategy_factory, + int32_t shard_num) + : comparator_(cmp) { + sharded_memtable_.resize(shard_num, nullptr); + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), [&](MemTable*& mem) { + mem = new BaseMemTable(cmp, compact_strategy_factory); + mem->Ref(); + }); + current_memtable_ = sharded_memtable_.begin(); +} + +// For MemTable on LevelDB +ShardedMemTable::ShardedMemTable(const std::string& dbname, const InternalKeyComparator& cmp, + CompactStrategyFactory* compact_strategy_factory, + size_t write_buffer_size, size_t block_size, Logger* info_log, + int32_t shard_num) + : comparator_(cmp) { + sharded_memtable_.resize(shard_num, nullptr); + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), [&](MemTable*& mem) { + mem = new MemTableOnLevelDB(dbname, cmp, compact_strategy_factory, write_buffer_size, + block_size, info_log); + mem->Ref(); + }); + current_memtable_ = sharded_memtable_.begin(); +} + +ShardedMemTable::~ShardedMemTable() { + assert(refs_ == 0); + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), + [](MemTable* mem) { mem->Unref(); }); +} + +size_t ShardedMemTable::ApproximateMemoryUsage() { + return std::accumulate( + sharded_memtable_.begin(), sharded_memtable_.end(), (size_t)0, + [](size_t sum, MemTable* mem) { return sum + mem->ApproximateMemoryUsage(); }); +} + +Iterator* ShardedMemTable::NewIterator() { + std::vector mem_iterators; + mem_iterators.reserve(sharded_memtable_.size()); + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), + [&mem_iterators](MemTable* mem) { mem_iterators.push_back(mem->NewIterator()); }); + + return NewMergingIterator(&comparator_, &mem_iterators[0], mem_iterators.size()); +} + +void ShardedMemTable::Add(SequenceNumber seq, ValueType type, const Slice& key, + const Slice& value) { + if (current_memtable_ == sharded_memtable_.end()) { + current_memtable_ = sharded_memtable_.begin(); + } + (*current_memtable_)->Add(seq, type, key, value); + ++current_memtable_; + assert(last_seq_ < seq || seq == 0); + last_seq_ = seq; +} + +bool ShardedMemTable::Get(const LookupKey& key, std::string* value, + const std::map& rollbacks, Status* s) { + // This method is only used for kv-table, + // but ShardedMemTable is not used for kv-table + abort(); + return false; +} + +uint64_t ShardedMemTable::GetSnapshot(uint64_t last_sequence) { + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), + [last_sequence](MemTable* mem) { mem->GetSnapshot(last_sequence); }); + return last_sequence; +} + +void ShardedMemTable::ReleaseSnapshot(uint64_t sequence_number) { + std::for_each(sharded_memtable_.begin(), sharded_memtable_.end(), + [sequence_number](MemTable* mem) { mem->ReleaseSnapshot(sequence_number); }); +} +} diff --git a/src/leveldb/db/sharded_memtable.h b/src/leveldb/db/sharded_memtable.h new file mode 100644 index 000000000..b99e8f334 --- /dev/null +++ b/src/leveldb/db/sharded_memtable.h @@ -0,0 +1,71 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once +#include "memtable.h" +#include "memtable_on_leveldb.h" + +namespace leveldb { + +class ShardedMemTable : public MemTable { + public: + // For Base MemTble + ShardedMemTable(const InternalKeyComparator &cmp, + CompactStrategyFactory *compact_strategy_factory, int32_t shard_num); + + // For MemTable on LevelDB + ShardedMemTable(const std::string &dbname, const InternalKeyComparator &cmp, + CompactStrategyFactory *compact_strategy_factory, size_t write_buffer_size, + size_t block_size, Logger *info_log, int32_t shard_num); + + virtual void Ref() override { ++refs_; } + + virtual void Unref() override { + --refs_; + assert(refs_ >= 0); + if (refs_ <= 0) { + delete this; + } + } + + virtual ~ShardedMemTable() override; + + virtual size_t ApproximateMemoryUsage() override; + + virtual Iterator *NewIterator() override; + + virtual void Add(SequenceNumber number, ValueType type, const Slice &slice, + const Slice &slice1) override; + + virtual bool Get(const LookupKey &key, std::string *value, + const std::map &rollbacks, Status *s) override; + + virtual SequenceNumber GetLastSequence() const override { return last_seq_; } + + virtual bool Empty() override { return empty_; } + + virtual void SetNonEmpty() override { empty_ = false; } + + virtual bool BeingFlushed() override { return being_flushed_; } + + virtual void SetBeingFlushed(bool flag) override { + assert(flag != being_flushed_); + being_flushed_ = flag; + } + + virtual uint64_t GetSnapshot(uint64_t last_sequence) override; + + virtual void ReleaseSnapshot(uint64_t sequence_number) override; + + private: + InternalKeyComparator comparator_; + std::vector::iterator current_memtable_; + std::vector sharded_memtable_; + + SequenceNumber last_seq_{0}; + int refs_{0}; + bool being_flushed_{false}; + bool empty_{true}; +}; +} diff --git a/src/leveldb/db/skiplist.h b/src/leveldb/db/skiplist.h index 1299ba18f..7131e0ec2 100644 --- a/src/leveldb/db/skiplist.h +++ b/src/leveldb/db/skiplist.h @@ -38,7 +38,7 @@ namespace leveldb { class Arena; -template +template class SkipList { private: struct Node; @@ -100,17 +100,16 @@ class SkipList { // Immutable after construction Comparator const compare_; - Arena* const arena_; // Arena used for allocations of nodes + Arena* const arena_; // Arena used for allocations of nodes Node* const head_; // Modified only by Insert(). Read racily by readers, but stale // values are ok. - port::AtomicPointer max_height_; // Height of the entire list + port::AtomicPointer max_height_; // Height of the entire list inline int GetMaxHeight() const { - return static_cast( - reinterpret_cast(max_height_.NoBarrier_Load())); + return static_cast(reinterpret_cast(max_height_.NoBarrier_Load())); } // Read/written only by Insert(). @@ -144,9 +143,9 @@ class SkipList { }; // Implementation details follow -template -struct SkipList::Node { - explicit Node(const Key& k) : key(k) { } +template +struct SkipList::Node { + explicit Node(const Key& k) : key(k) {} Key const key; @@ -180,39 +179,38 @@ struct SkipList::Node { port::AtomicPointer next_[1]; }; -template -typename SkipList::Node* -SkipList::NewNode(const Key& key, int height) { - char* mem = arena_->AllocateAligned( - sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); +template +typename SkipList::Node* SkipList::NewNode(const Key& key, + int height) { + char* mem = arena_->AllocateAligned(sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); return new (mem) Node(key); } -template -inline SkipList::Iterator::Iterator(const SkipList* list) { +template +inline SkipList::Iterator::Iterator(const SkipList* list) { list_ = list; node_ = NULL; } -template -inline bool SkipList::Iterator::Valid() const { +template +inline bool SkipList::Iterator::Valid() const { return node_ != NULL; } -template -inline const Key& SkipList::Iterator::key() const { +template +inline const Key& SkipList::Iterator::key() const { assert(Valid()); return node_->key; } -template -inline void SkipList::Iterator::Next() { +template +inline void SkipList::Iterator::Next() { assert(Valid()); node_ = node_->Next(0); } -template -inline void SkipList::Iterator::Prev() { +template +inline void SkipList::Iterator::Prev() { // Instead of using explicit "prev" links, we just search for the // last node that falls before key. assert(Valid()); @@ -222,26 +220,26 @@ inline void SkipList::Iterator::Prev() { } } -template -inline void SkipList::Iterator::Seek(const Key& target) { +template +inline void SkipList::Iterator::Seek(const Key& target) { node_ = list_->FindGreaterOrEqual(target, NULL); } -template -inline void SkipList::Iterator::SeekToFirst() { +template +inline void SkipList::Iterator::SeekToFirst() { node_ = list_->head_->Next(0); } -template -inline void SkipList::Iterator::SeekToLast() { +template +inline void SkipList::Iterator::SeekToLast() { node_ = list_->FindLast(); if (node_ == list_->head_) { node_ = NULL; } } -template -int SkipList::RandomHeight() { +template +int SkipList::RandomHeight() { // Increase height with probability 1 in kBranching static const unsigned int kBranching = 4; int height = 1; @@ -253,15 +251,15 @@ int SkipList::RandomHeight() { return height; } -template -bool SkipList::KeyIsAfterNode(const Key& key, Node* n) const { +template +bool SkipList::KeyIsAfterNode(const Key& key, Node* n) const { // NULL n is considered infinite return (n != NULL) && (compare_(n->key, key) < 0); } -template -typename SkipList::Node* SkipList::FindGreaterOrEqual(const Key& key, Node** prev) - const { +template +typename SkipList::Node* SkipList::FindGreaterOrEqual( + const Key& key, Node** prev) const { Node* x = head_; int level = GetMaxHeight() - 1; while (true) { @@ -281,9 +279,9 @@ typename SkipList::Node* SkipList::FindGreaterOr } } -template -typename SkipList::Node* -SkipList::FindLessThan(const Key& key) const { +template +typename SkipList::Node* SkipList::FindLessThan( + const Key& key) const { Node* x = head_; int level = GetMaxHeight() - 1; while (true) { @@ -302,9 +300,8 @@ SkipList::FindLessThan(const Key& key) const { } } -template -typename SkipList::Node* SkipList::FindLast() - const { +template +typename SkipList::Node* SkipList::FindLast() const { Node* x = head_; int level = GetMaxHeight() - 1; while (true) { @@ -322,8 +319,8 @@ typename SkipList::Node* SkipList::FindLast() } } -template -SkipList::SkipList(Comparator cmp, Arena* arena) +template +SkipList::SkipList(Comparator cmp, Arena* arena) : compare_(cmp), arena_(arena), head_(NewNode(0 /* any key will do */, kMaxHeight)), @@ -334,8 +331,8 @@ SkipList::SkipList(Comparator cmp, Arena* arena) } } -template -void SkipList::Insert(const Key& key) { +template +void SkipList::Insert(const Key& key) { // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual() // here since Insert() is externally synchronized. Node* prev[kMaxHeight]; @@ -349,7 +346,7 @@ void SkipList::Insert(const Key& key) { for (int i = GetMaxHeight(); i < height; i++) { prev[i] = head_; } - //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); + // fprintf(stderr, "Change height from %d to %d\n", max_height_, height); // It is ok to mutate max_height_ without any synchronization // with concurrent readers. A concurrent reader that observes @@ -370,8 +367,8 @@ void SkipList::Insert(const Key& key) { } } -template -bool SkipList::Contains(const Key& key) const { +template +bool SkipList::Contains(const Key& key) const { Node* x = FindGreaterOrEqual(key, NULL); if (x != NULL && Equal(key, x->key)) { return true; diff --git a/src/leveldb/db/skiplist_test.cc b/src/leveldb/db/skiplist_test.cc index 29d74ad9a..4c24f3fec 100644 --- a/src/leveldb/db/skiplist_test.cc +++ b/src/leveldb/db/skiplist_test.cc @@ -30,7 +30,7 @@ struct Comparator { } }; -class SkipTest { }; +class SkipTest {}; TEST(SkipTest, Empty) { Arena arena; @@ -115,8 +115,7 @@ TEST(SkipTest, InsertAndLookup) { iter.SeekToLast(); // Compare against model iterator - for (std::set::reverse_iterator model_iter = keys.rbegin(); - model_iter != keys.rend(); + for (std::set::reverse_iterator model_iter = keys.rbegin(); model_iter != keys.rend(); ++model_iter) { ASSERT_TRUE(iter.Valid()); ASSERT_EQ(*model_iter, iter.key()); @@ -159,7 +158,7 @@ class ConcurrentTest { static uint64_t hash(Key key) { return key & 0xff; } static uint64_t HashNumbers(uint64_t k, uint64_t g) { - uint64_t data[2] = { k, g }; + uint64_t data[2] = {k, g}; return Hash(reinterpret_cast(data), sizeof(data), 0); } @@ -170,9 +169,7 @@ class ConcurrentTest { return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); } - static bool IsValidKey(Key k) { - return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); - } + static bool IsValidKey(Key k) { return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); } static Key RandomTarget(Random* rnd) { switch (rnd->Next() % 10) { @@ -191,12 +188,8 @@ class ConcurrentTest { // Per-key generation struct State { port::AtomicPointer generation[K]; - void Set(int k, intptr_t v) { - generation[k].Release_Store(reinterpret_cast(v)); - } - intptr_t Get(int k) { - return reinterpret_cast(generation[k].Acquire_Load()); - } + void Set(int k, intptr_t v) { generation[k].Release_Store(reinterpret_cast(v)); } + intptr_t Get(int k) { return reinterpret_cast(generation[k].Acquire_Load()); } State() { for (uint32_t k = 0; k < K; k++) { @@ -215,7 +208,7 @@ class ConcurrentTest { SkipList list_; public: - ConcurrentTest() : list_(Comparator(), &arena_) { } + ConcurrentTest() : list_(Comparator(), &arena_) {} // REQUIRES: External synchronization void WriteStep(Random* rnd) { @@ -253,12 +246,9 @@ class ConcurrentTest { // Note that generation 0 is never inserted, so it is ok if // <*,0,*> is missing. - ASSERT_TRUE((gen(pos) == 0u) || - (static_cast(gen(pos)) > initial_state.Get(key(pos))) - ) << "key: " << key(pos) - << "; gen: " << gen(pos) - << "; initgen: " - << initial_state.Get(key(pos)); + ASSERT_TRUE((gen(pos) == 0u) || (static_cast(gen(pos)) > initial_state.Get(key(pos)))) + << "key: " << key(pos) << "; gen: " << gen(pos) + << "; initgen: " << initial_state.Get(key(pos)); // Advance to next key in the valid key space if (key(pos) < key(current)) { @@ -304,17 +294,9 @@ class TestState { int seed_; port::AtomicPointer quit_flag_; - enum ReaderState { - STARTING, - RUNNING, - DONE - }; + enum ReaderState { STARTING, RUNNING, DONE }; - explicit TestState(int s) - : seed_(s), - quit_flag_(NULL), - state_(STARTING), - state_cv_(&mu_) {} + explicit TestState(int s) : seed_(s), quit_flag_(NULL), state_(STARTING), state_cv_(&mu_) {} void Wait(ReaderState s) { mu_.Lock(); @@ -377,6 +359,4 @@ TEST(SkipTest, Concurrent5) { RunConcurrent(5); } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/snapshot.h b/src/leveldb/db/snapshot.h index b9f63446b..2a850fc2e 100644 --- a/src/leveldb/db/snapshot.h +++ b/src/leveldb/db/snapshot.h @@ -28,7 +28,7 @@ class SnapshotImpl : public Snapshot { SnapshotImpl* prev_; SnapshotImpl* next_; - SnapshotList* list_; // just for sanity checks + SnapshotList* list_; // just for sanity checks }; class SnapshotList { @@ -39,8 +39,14 @@ class SnapshotList { } bool empty() const { return list_.next_ == &list_; } - SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } - SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } + SnapshotImpl* oldest() const { + assert(!empty()); + return list_.next_; + } + SnapshotImpl* newest() const { + assert(!empty()); + return list_.prev_; + } const SnapshotImpl* New(SequenceNumber seq) { SnapshotImpl* s = new SnapshotImpl; diff --git a/src/leveldb/db/table_cache.cc b/src/leveldb/db/table_cache.cc index 148c4a115..77dc71928 100644 --- a/src/leveldb/db/table_cache.cc +++ b/src/leveldb/db/table_cache.cc @@ -34,36 +34,31 @@ static void UnrefEntry(void* arg1, void* arg2) { cache->Release(h); } -static std::string GetTableFileSign(const std::string& dbname, - const uint64_t* file_number) { - return dbname + std::string(reinterpret_cast(file_number), - sizeof(*file_number)); +static std::string GetTableFileSign(const std::string& dbname, const uint64_t* file_number) { + return dbname + std::string(reinterpret_cast(file_number), sizeof(*file_number)); } -TableCache::TableCache(size_t byte_size) - : cache_(NewLRUCache(byte_size)) { -} +TableCache::TableCache(size_t byte_size) : cache_(NewLRUCache(byte_size)) {} -TableCache::~TableCache() { - delete cache_; -} +TableCache::~TableCache() { delete cache_; } Status TableCache::FindTable(const std::string& dbname, const Options* options, - uint64_t file_number, uint64_t file_size, - Cache::Handle** handle) { + uint64_t file_number, uint64_t file_size, Cache::Handle** handle) { Status s; std::string sign = GetTableFileSign(dbname, &file_number); Slice key(sign); - MutexLock lock(&mu_); + auto index = GetIndex(file_number); + auto& mu = this->mu_[index]; + MutexLock lock(&mu); *handle = cache_->Lookup(key); if (*handle == NULL) { - //printf("file not in cache %s, try open it\n", fname.c_str()); + // printf("file not in cache %s, try open it\n", fname.c_str()); Waiter* w = NULL; - WaitFileList::iterator it = wait_files_.find(sign); - if (it != wait_files_.end()){ - //printf("file in open_list %s, wait\n", fname.c_str()); + WaitFileList::iterator it = wait_files_[index].find(sign); + if (it != wait_files_[index].end()) { + // printf("file in open_list %s, wait\n", fname.c_str()); w = it->second; - w->wait_num ++; + w->wait_num++; while (!w->done) { w->cv.Wait(); } @@ -76,20 +71,20 @@ Status TableCache::FindTable(const std::string& dbname, const Options* options, if (--w->wait_num == 0) { // last thread wait for open - wait_files_.erase(sign); - //printf("wait done %s, delete cv\n", fname.c_str()); + wait_files_[index].erase(sign); + // printf("wait done %s, delete cv\n", fname.c_str()); delete w; } else { - //printf("wait done %s, not last\n", fname.c_str()); + // printf("wait done %s, not last\n", fname.c_str()); } } else { - //printf("file not in open_list %s, Do open\n", fname.c_str()); - w = new Waiter(&mu_); + // printf("file not in open_list %s, Do open\n", fname.c_str()); + w = new Waiter(&mu); w->wait_num = 1; - wait_files_[sign] = w; + wait_files_[index][sign] = w; // Unlock when open file - mu_.Unlock(); + mu.Unlock(); RandomAccessFile* file = NULL; Table* table = NULL; std::string fname = TableFileName(dbname, file_number); @@ -110,13 +105,13 @@ Status TableCache::FindTable(const std::string& dbname, const Options* options, tf->table = table; *handle = cache_->Insert(key, tf, table->IndexBlockSize(), &DeleteEntry); } - mu_.Lock(); + mu.Lock(); if (--w->wait_num == 0) { - wait_files_.erase(sign); - //printf("open done %s, no wait thread\n", fname.c_str()); + wait_files_[index].erase(sign); + // printf("open done %s, no wait thread\n", fname.c_str()); delete w; } else { - //printf("open done %s, signal all wait thread\n", fname.c_str()); + // printf("open done %s, signal all wait thread\n", fname.c_str()); w->status = s; w->done = true; w->cv.SignalAll(); @@ -126,21 +121,14 @@ Status TableCache::FindTable(const std::string& dbname, const Options* options, return s; } -Iterator* TableCache::NewIterator(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - Table** tableptr) { +Iterator* TableCache::NewIterator(const ReadOptions& options, const std::string& dbname, + uint64_t file_number, uint64_t file_size, Table** tableptr) { return NewIterator(options, dbname, file_number, file_size, "", "", tableptr); } -Iterator* TableCache::NewIterator(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - const Slice& smallest, - const Slice& largest, - Table** tableptr) { +Iterator* TableCache::NewIterator(const ReadOptions& options, const std::string& dbname, + uint64_t file_number, uint64_t file_size, const Slice& smallest, + const Slice& largest, Table** tableptr) { assert(options.db_opt); if (tableptr != NULL) { *tableptr = NULL; @@ -161,12 +149,8 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, return result; } -Status TableCache::Get(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - const Slice& k, - void* arg, +Status TableCache::Get(const ReadOptions& options, const std::string& dbname, uint64_t file_number, + uint64_t file_size, const Slice& k, void* arg, void (*saver)(void*, const Slice&, const Slice&)) { assert(options.db_opt); Cache::Handle* handle = NULL; diff --git a/src/leveldb/db/table_cache.h b/src/leveldb/db/table_cache.h index 061794d7a..5daa1db78 100644 --- a/src/leveldb/db/table_cache.h +++ b/src/leveldb/db/table_cache.h @@ -35,30 +35,19 @@ class TableCache { // the returned iterator. The returned "*tableptr" object is owned by // the cache and should not be deleted, and is valid for as long as the // returned iterator is live. - Iterator* NewIterator(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - Table** tableptr = NULL); + Iterator* NewIterator(const ReadOptions& options, const std::string& dbname, uint64_t file_number, + uint64_t file_size, Table** tableptr = NULL); // Specify key range of iterator [smallest, largest]. There are some // out-of-range keys in table file after tablet merging and splitting. - Iterator* NewIterator(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - const Slice& smallest, - const Slice& largest, + Iterator* NewIterator(const ReadOptions& options, const std::string& dbname, uint64_t file_number, + uint64_t file_size, const Slice& smallest, const Slice& largest, Table** tableptr = NULL); // If a seek to internal key "k" in specified file finds an entry, // call (*handle_result)(arg, found_key, found_value). - Status Get(const ReadOptions& options, - const std::string& dbname, - uint64_t file_number, - uint64_t file_size, - const Slice& k, - void* arg, + Status Get(const ReadOptions& options, const std::string& dbname, uint64_t file_number, + uint64_t file_size, const Slice& k, void* arg, void (*handle_result)(void*, const Slice&, const Slice&)); // Evict any entry for the specified file number @@ -74,20 +63,24 @@ class TableCache { size_t ByteSize() { return cache_->TotalCharge(); } private: + static constexpr int shard_lock_cnt_ = 512; + size_t GetIndex(uint64_t file_number) { + return std::hash()(file_number) % shard_lock_cnt_; + } Cache* cache_; - port::Mutex mu_; struct Waiter { port::CondVar cv; int wait_num; Status status; bool done; - Waiter(port::Mutex* mu):cv(mu), wait_num(0), done(false) {} + Waiter(port::Mutex* mu) : cv(mu), wait_num(0), done(false) {} }; typedef std::map WaitFileList; - WaitFileList wait_files_; - Status FindTable(const std::string& dbname, const Options* options, - uint64_t file_number, uint64_t file_size, Cache::Handle**); + port::Mutex mu_[shard_lock_cnt_]; + WaitFileList wait_files_[shard_lock_cnt_]; + Status FindTable(const std::string& dbname, const Options* options, uint64_t file_number, + uint64_t file_size, Cache::Handle**); }; } // namespace leveldb diff --git a/src/leveldb/db/table_utils.cc b/src/leveldb/db/table_utils.cc index 1b54a34cd..306d582c9 100644 --- a/src/leveldb/db/table_utils.cc +++ b/src/leveldb/db/table_utils.cc @@ -61,15 +61,12 @@ bool GuessType(const std::string& fname, FileType* type) { class CorruptionReporter : public log::Reader::Reporter { public: virtual void Corruption(size_t bytes, const Status& status) { - printf("corruption: %d bytes; %s\n", - static_cast(bytes), - status.ToString().c_str()); + printf("corruption: %d bytes; %s\n", static_cast(bytes), status.ToString().c_str()); } }; // Print contents of a log file. (*func)() is called on every record. -bool PrintLogContents(Env* env, const std::string& fname, - void (*func)(Slice)) { +bool PrintLogContents(Env* env, const std::string& fname, void (*func)(Slice)) { SequentialFile* file; Status s = env->NewSequentialFile(fname, &file); if (!s.ok()) { @@ -81,8 +78,7 @@ bool PrintLogContents(Env* env, const std::string& fname, Slice record; std::string scratch; while (reader.ReadRecord(&record, &scratch)) { - printf("--- offset %llu; ", - static_cast(reader.LastRecordOffset())); + printf("--- offset %llu; ", static_cast(reader.LastRecordOffset())); (*func)(record); } delete file; @@ -96,29 +92,21 @@ class WriteBatchItemPrinter : public WriteBatch::Handler { uint64_t sequence_; virtual void Put(const Slice& key, const Slice& value) { - printf(" put '%s' '%s'\n", - EscapeString(key).c_str(), - EscapeString(value).c_str()); - } - virtual void Delete(const Slice& key) { - printf(" del '%s'\n", - EscapeString(key).c_str()); + printf(" put '%s' '%s'\n", EscapeString(key).c_str(), EscapeString(value).c_str()); } + virtual void Delete(const Slice& key) { printf(" del '%s'\n", EscapeString(key).c_str()); } }; - // Called on every log record (each one of which is a WriteBatch) // found in a kLogFile. static void WriteBatchPrinter(Slice record) { if (record.size() < 12) { - printf("log record length %d is too small\n", - static_cast(record.size())); + printf("log record length %d is too small\n", static_cast(record.size())); return; } WriteBatch batch; WriteBatchInternal::SetContents(&batch, record); - printf("sequence %llu\n", - static_cast(WriteBatchInternal::Sequence(&batch))); + printf("sequence %llu\n", static_cast(WriteBatchInternal::Sequence(&batch))); WriteBatchItemPrinter batch_item_printer; Status s = batch.Iterate(&batch_item_printer); if (!s.ok()) { @@ -176,8 +164,7 @@ bool DumpTable(Env* env, const std::string& fname) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ParsedInternalKey key; if (!ParseInternalKey(iter->key(), &key)) { - printf("badkey '%s' => '%s'\n", - EscapeString(iter->key()).c_str(), + printf("badkey '%s' => '%s'\n", EscapeString(iter->key()).c_str(), EscapeString(iter->value()).c_str()); } else { char kbuf[20]; @@ -190,10 +177,8 @@ bool DumpTable(Env* env, const std::string& fname) { snprintf(kbuf, sizeof(kbuf), "%d", static_cast(key.type)); type = kbuf; } - printf("'%s' @ %8llu : %s => '%s'\n", - EscapeString(key.user_key).c_str(), - static_cast(key.sequence), - type, + printf("'%s' @ %8llu : %s => '%s'\n", EscapeString(key.user_key).c_str(), + static_cast(key.sequence), type, EscapeString(iter->value()).c_str()); } } @@ -215,9 +200,12 @@ bool DumpFile(Env* env, const std::string& fname) { return false; } switch (ftype) { - case kLogFile: return DumpLog(env, fname); - case kDescriptorFile: return DumpDescriptor(env, fname); - case kTableFile: return DumpTable(env, fname); + case kLogFile: + return DumpLog(env, fname); + case kDescriptorFile: + return DumpDescriptor(env, fname); + case kTableFile: + return DumpTable(env, fname); default: { fprintf(stderr, "%s: not a dump-able file type\n", fname.c_str()); diff --git a/src/leveldb/db/table_utils_test.cc b/src/leveldb/db/table_utils_test.cc index b18e2e975..8a0e078eb 100644 --- a/src/leveldb/db/table_utils_test.cc +++ b/src/leveldb/db/table_utils_test.cc @@ -9,9 +9,29 @@ namespace leveldb { class TableUtilsTest {}; - // none for now -} -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); +TEST(TableUtilsTest, HeadAndDumpManifest) { + // the hex content of the manifest + uint8_t content[] = { + 0x6e, 0x11, 0x5f, 0x7f, 0x2d, 0x00, 0x01, 0x9a, 0x80, 0x40, 0x01, 0x19, 0x74, 0x65, + 0x72, 0x61, 0x2e, 0x54, 0x65, 0x72, 0x61, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x43, + 0x6f, 0x6d, 0x70, 0x61, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x81, 0x80, 0x40, 0x02, 0x00, + 0x81, 0x80, 0x40, 0x03, 0x02, 0x81, 0x80, 0x40, 0x04, 0x00, 0x0a, + }; + + std::string manifest_file("./MANIFEST-000001"); + FILE* file = fopen(manifest_file.c_str(), "wb"); + size_t len = sizeof(content) / sizeof(uint8_t); + fwrite(content, 1, len, file); + fclose(file); + + leveldb::Env* env = leveldb::Env::Default(); + + bool ret = false; + ret = DumpFile(env, manifest_file); + ASSERT_TRUE(ret); + remove(manifest_file.c_str()); +} } + +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/version_edit.cc b/src/leveldb/db/version_edit.cc index 244733915..8deabfa61 100644 --- a/src/leveldb/db/version_edit.cc +++ b/src/leveldb/db/version_edit.cc @@ -16,22 +16,24 @@ namespace leveldb { // Tag numbers for serialized VersionEdit. These numbers are written to // disk and should not be changed. max tag number = 1<<20, min tag number = 1 enum Tag { - kComparator = 1, - kLogNumber = 2, - kNextFileNumber = 3, - kLastSequence = 4, - kCompactPointer = 5, + kComparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, kDeletedFileForCompat = 6, - kNewFileForCompat = 7, + kNewFileForCompat = 7, // 8 was used for large value refs - kPrevLogNumber = 9, - kNewFile = 10, - kDeletedFile = 11, - kNewFileInfo = 12, - kSstFileDataSize = 13, + kPrevLogNumber = 9, + kNewFile = 10, + kDeletedFile = 11, + kNewFileInfo = 12, + kSstFileDataSize = 13, + kStartKey = 14, + kEndKey = 15, // no more than 1<<20 - kMaxTag = 1 << 20, + kMaxTag = 1 << 20, }; void VersionEdit::Clear() { @@ -45,6 +47,8 @@ void VersionEdit::Clear() { has_prev_log_number_ = false; has_next_file_number_ = false; has_last_sequence_ = false; + has_start_key_ = false; + has_end_key_ = false; deleted_files_.clear(); new_files_.clear(); } @@ -90,6 +94,22 @@ void VersionEdit::EncodeTo(std::string* dst) const { PutVarint32(dst, kLastSequence); dst->append(str.data(), str.size()); } + if (has_start_key_) { + std::string str; + PutLengthPrefixedSlice(&str, start_key_); + + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kStartKey); + dst->append(str.data(), str.size()); + } + if (has_end_key_) { + std::string str; + PutLengthPrefixedSlice(&str, end_key_); + + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kEndKey); + dst->append(str.data(), str.size()); + } for (size_t i = 0; i < compact_pointers_.size(); i++) { std::string str; @@ -169,8 +189,7 @@ static bool GetInternalKey(Slice* input, InternalKey* dst) { static bool GetLevel(Slice* input, int* level) { uint32_t v; - if (GetVarint32(input, &v) && - v < static_cast(config::kNumLevels)) { + if (GetVarint32(input, &v) && v < static_cast(config::kNumLevels)) { *level = v; return true; } else { @@ -193,8 +212,8 @@ Status VersionEdit::DecodeNewFileInfo(Slice* input, FileMetaData* f) { GetVarint32(&file_input, &tag); switch (tag) { case kNewFileInfo: - GetVarint32(input, &len);// ignore len - GetVarint32(input, &tag);// ignore tag + GetVarint32(input, &len); // ignore len + GetVarint32(input, &tag); // ignore tag GetVarint64(input, &f->del_percentage); GetVarint64(input, &f->ttl_percentage); GetVarint64(input, &f->check_ttl_ts); @@ -204,9 +223,11 @@ Status VersionEdit::DecodeNewFileInfo(Slice* input, FileMetaData* f) { GetVarint32(input, &tag); GetVarint64(input, &f->data_size); break; + case kNewFile: + decode_continue = false; + break; default: - fprintf(stderr, "NewFile %lu without info, skip tag %d, len %d\n", - f->number & 0xffffffff, + fprintf(stderr, "NewFile %lu without info, skip tag %u, len %u\n", f->number & 0xffffffff, tag, len); decode_continue = false; break; @@ -231,10 +252,10 @@ Status VersionEdit::DecodeFrom(const Slice& src) { while (msg == NULL && GetVarint32(&input, &tag)) { uint32_t len = 0; if (tag > kMaxTag) { - len = tag - kMaxTag; - if (!GetVarint32(&input, &tag)) { - break; - } + len = tag - kMaxTag; + if (!GetVarint32(&input, &tag)) { + break; + } } switch (tag) { case kComparator: @@ -279,8 +300,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kCompactPointer: - if (GetLevel(&input, &level) && - GetInternalKey(&input, &key)) { + if (GetLevel(&input, &level) && GetInternalKey(&input, &key)) { compact_pointers_.push_back(std::make_pair(level, key)); } else { msg = "compaction pointer"; @@ -288,8 +308,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kDeletedFileForCompat: - if (GetLevel(&input, &level) && - GetVarint64(&input, &number)) { + if (GetLevel(&input, &level) && GetVarint64(&input, &number)) { DeleteFile(level, number); } else { msg = "deleted file"; @@ -297,10 +316,8 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kNewFileForCompat: - if (GetLevel(&input, &level) && - GetVarint64(&input, &f.number) && - GetVarint64(&input, &f.file_size) && - GetInternalKey(&input, &f.smallest) && + if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) && + GetVarint64(&input, &f.file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest)) { new_files_.push_back(std::make_pair(level, f)); } else { @@ -309,15 +326,12 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kNewFile: - if (GetLevel(&input, &level) && - GetVarint64(&input, &f.number) && - GetVarint64(&input, &f.file_size) && - GetInternalKey(&input, &f.smallest) && + if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) && + GetVarint64(&input, &f.file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest)) { uint32_t smallest_fake = 0; uint32_t largest_fake = 0; - if (GetVarint32(&input, &smallest_fake) && - GetVarint32(&input, &largest_fake)) { + if (GetVarint32(&input, &smallest_fake) && GetVarint32(&input, &largest_fake)) { if (smallest_fake == 0) { f.smallest_fake = false; } else { @@ -340,10 +354,8 @@ Status VersionEdit::DecodeFrom(const Slice& src) { break; case kDeletedFile: - if (GetLevel(&input, &level) && - GetVarint64(&input, &f.number) && - GetVarint64(&input, &f.file_size) && - GetInternalKey(&input, &f.smallest) && + if (GetLevel(&input, &level) && GetVarint64(&input, &f.number) && + GetVarint64(&input, &f.file_size) && GetInternalKey(&input, &f.smallest) && GetInternalKey(&input, &f.largest)) { deleted_files_.push_back(std::make_pair(level, f)); } else { @@ -351,9 +363,27 @@ Status VersionEdit::DecodeFrom(const Slice& src) { } break; - default: // tag not know, skip it. + case kStartKey: + if (GetLengthPrefixedSlice(&input, &str)) { + start_key_ = str.ToString(); + has_start_key_ = true; + } else { + msg = "start key name"; + } + break; + + case kEndKey: + if (GetLengthPrefixedSlice(&input, &str)) { + end_key_ = str.ToString(); + has_end_key_ = true; + } else { + msg = "end key name"; + } + break; + + default: // tag not know, skip it. input.remove_prefix(len); - fprintf(stderr, "VersionEdit, skip unknow tag %d, len %d\n", tag, len); + fprintf(stderr, "VersionEdit, skip unknow tag %u, len %u\n", tag, len); break; } } @@ -392,6 +422,14 @@ std::string VersionEdit::DebugString() const { r.append("\n LastSeq: "); AppendNumberTo(&r, last_sequence_); } + if (has_start_key_) { + r.append("\n StartKey: "); + r.append(start_key_.c_str()); + } + if (has_end_key_) { + r.append("\n EndKey: "); + r.append(end_key_.c_str()); + } for (size_t i = 0; i < compact_pointers_.size(); i++) { r.append("\n CompactPointer: "); AppendNumberTo(&r, compact_pointers_[i].first); @@ -400,8 +438,7 @@ std::string VersionEdit::DebugString() const { } uint64_t tablet_number = 0; uint64_t file_number = 0; - for (FileMetaSet::const_iterator iter = deleted_files_.begin(); - iter != deleted_files_.end(); + for (FileMetaSet::const_iterator iter = deleted_files_.begin(); iter != deleted_files_.end(); ++iter) { ParseFullFileNumber(iter->second.number, &tablet_number, &file_number); r.append("\n DeleteFile: level "); diff --git a/src/leveldb/db/version_edit.h b/src/leveldb/db/version_edit.h index 17b9bfc36..e3de64cf3 100644 --- a/src/leveldb/db/version_edit.h +++ b/src/leveldb/db/version_edit.h @@ -22,37 +22,38 @@ class VersionSetBuilder; struct FileMetaData { int refs; - int allowed_seeks; // Seeks allowed until compaction - uint64_t check_ttl_ts; // statistic: Descripe this sst file when to timeout check - uint64_t ttl_percentage; // statistic: By default, if 50% entry timeout, will trigger compaction - uint64_t del_percentage; // statistic: delete tag's percentage in sst + int allowed_seeks; // Seeks allowed until compaction + uint64_t check_ttl_ts; // statistic: Descripe this sst file when to timeout check + uint64_t ttl_percentage; // statistic: By default, if 50% entry timeout, will + // trigger compaction + uint64_t del_percentage; // statistic: delete tag's percentage in sst uint64_t number; - uint64_t file_size; // File size in bytes - uint64_t data_size; // data_size <= file_size - InternalKey smallest; // Smallest internal key served by table - InternalKey largest; // Largest internal key served by table - bool smallest_fake; // smallest is not real, have out-of-range keys - bool largest_fake; // largest is not real, have out-of-range keys - bool being_compacted; // Is this file undergoing compaction? - - FileMetaData() : - refs(0), - allowed_seeks(1 << 30), - check_ttl_ts(0), - ttl_percentage(0), - del_percentage(0), - number(0), - file_size(0), - data_size(0), - smallest_fake(false), - largest_fake(false), - being_compacted(false) { } + uint64_t file_size; // File size in bytes + uint64_t data_size; // data_size <= file_size + InternalKey smallest; // Smallest internal key served by table + InternalKey largest; // Largest internal key served by table + bool smallest_fake; // smallest is not real, have out-of-range keys + bool largest_fake; // largest is not real, have out-of-range keys + bool being_compacted; // Is this file undergoing compaction? + + FileMetaData() + : refs(0), + allowed_seeks(1 << 30), + check_ttl_ts(0), + ttl_percentage(0), + del_percentage(0), + number(0), + file_size(0), + data_size(0), + smallest_fake(false), + largest_fake(false), + being_compacted(false) {} }; class VersionEdit { public: VersionEdit() { Clear(); } - ~VersionEdit() { } + ~VersionEdit() {} void Clear(); @@ -80,35 +81,32 @@ class VersionEdit { compact_pointers_.push_back(std::make_pair(level, key)); } - uint64_t GetLastSequence() const { - return last_sequence_; - } - uint64_t GetLogNumber() const { - return log_number_; - } - uint64_t GetNextFileNumber() const { - return next_file_number_; - } - std::string GetComparatorName() const { - return comparator_; + void SetStartKey(const std::string key) { + has_start_key_ = true; + start_key_ = key; } - bool HasNextFileNumber() const { - return has_next_file_number_; - } - bool HasLastSequence() const { - return has_last_sequence_; - } - bool HasLogNumber() const { - return has_log_number_; - } - bool HasComparator() const { - return has_comparator_; + void SetEndKey(const std::string key) { + has_end_key_ = true; + end_key_ = key; } - bool HasFiles(std::vector* deleted_files, - std::vector* added_files) { - bool has_files = deleted_files_.size() > 0 - || new_files_.size() > 0; + + uint64_t GetLastSequence() const { return last_sequence_; } + uint64_t GetLogNumber() const { return log_number_; } + uint64_t GetNextFileNumber() const { return next_file_number_; } + std::string GetComparatorName() const { return comparator_; } + std::string GetStartKey() const { return start_key_; } + std::string GetEndKey() const { return end_key_; } + + bool HasNextFileNumber() const { return has_next_file_number_; } + bool HasLastSequence() const { return has_last_sequence_; } + bool HasLogNumber() const { return has_log_number_; } + bool HasComparator() const { return has_comparator_; } + bool HasStartKey() const { return has_start_key_; } + bool HasEndKey() const { return has_end_key_; } + + bool HasFiles(std::vector* deleted_files, std::vector* added_files) { + bool has_files = deleted_files_.size() > 0 || new_files_.size() > 0; // if (deleted_files && deleted_files_.size() > 0) { // DeletedFileSet::iterator set_it = deleted_files_.begin(); // for (; set_it != deleted_files_.end(); ++set_it) { @@ -122,25 +120,25 @@ class VersionEdit { // added_files->push_back(file.number); // } // } - return has_files; + return has_files; } void ModifyForMerge(std::map num_map) { - //if (num_map.size() == 0) { + // if (num_map.size() == 0) { // return; //} //// deleted file - //DeletedFileSet deleted_files(deleted_files_); - //deleted_files_.clear(); - //DeletedFileSet::iterator set_it = deleted_files.begin(); - //for (; set_it != deleted_files.end(); ++set_it) { + // DeletedFileSet deleted_files(deleted_files_); + // deleted_files_.clear(); + // DeletedFileSet::iterator set_it = deleted_files.begin(); + // for (; set_it != deleted_files.end(); ++set_it) { // std::pair pair = *set_it; // pair.second = num_map[pair.second]; // deleted_files_.insert(pair); //} //// new files - //for (uint32_t i = 0; i < new_files_.size(); ++i) { + // for (uint32_t i = 0; i < new_files_.size(); ++i) { // FileMetaData& file = new_files_[i].second; // file.number = num_map[file.number]; //} @@ -149,12 +147,8 @@ class VersionEdit { // Add the specified file at the specified number. // REQUIRES: This version has not been saved (see VersionSet::SaveTo) // REQUIRES: "smallest" and "largest" are smallest and largest keys in file - void AddFile(int level, uint64_t file, - uint64_t file_size, - const InternalKey& smallest, - const InternalKey& largest, - uint64_t del_percentage = 0, - uint64_t check_ttl_ts = 0, + void AddFile(int level, uint64_t file, uint64_t file_size, const InternalKey& smallest, + const InternalKey& largest, uint64_t del_percentage = 0, uint64_t check_ttl_ts = 0, uint64_t ttl_percentage = 0) { FileMetaData f; f.number = file; @@ -168,9 +162,7 @@ class VersionEdit { new_files_.push_back(std::make_pair(level, f)); } - void AddFile(int level, const FileMetaData& f) { - new_files_.push_back(std::make_pair(level, f)); - } + void AddFile(int level, const FileMetaData& f) { new_files_.push_back(std::make_pair(level, f)); } // Delete the specified "file" from the specified "level". void DeleteFile(int level, int64_t number) { @@ -196,20 +188,24 @@ class VersionEdit { friend class VersionSet; friend class VersionSetBuilder; - typedef std::vector< std::pair > FileMetaSet; + typedef std::vector > FileMetaSet; std::string comparator_; uint64_t log_number_; uint64_t prev_log_number_; uint64_t next_file_number_; SequenceNumber last_sequence_; + std::string start_key_; + std::string end_key_; bool has_comparator_; bool has_log_number_; bool has_prev_log_number_; bool has_next_file_number_; bool has_last_sequence_; + bool has_start_key_; + bool has_end_key_; - std::vector< std::pair > compact_pointers_; + std::vector > compact_pointers_; // Files in Version could be deleted by file number or file meta. // If deleted by file number, any file meta on this file would be deleted. diff --git a/src/leveldb/db/version_edit_test.cc b/src/leveldb/db/version_edit_test.cc index 44a5d308f..6762b1605 100644 --- a/src/leveldb/db/version_edit_test.cc +++ b/src/leveldb/db/version_edit_test.cc @@ -14,128 +14,127 @@ namespace leveldb { // Tag numbers for serialized VersionEdit. These numbers are written to // disk and should not be changed. max tag number = 1<<20, min tag number = 1 enum Tag { - kComparator = 1, - kLogNumber = 2, - kNextFileNumber = 3, - kLastSequence = 4, - kCompactPointer = 5, + kComparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, kDeletedFileForCompat = 6, - kNewFileForCompat = 7, + kNewFileForCompat = 7, // 8 was used for large value refs - kPrevLogNumber = 9, - kNewFile = 10, - kDeletedFile = 11, - kNewFileInfo = 12, - kSstFileDataSize = 13, + kPrevLogNumber = 9, + kNewFile = 10, + kDeletedFile = 11, + kNewFileInfo = 12, + kSstFileDataSize = 13, // no more than 1<<20 - kMaxTag = 1 << 20, + kMaxTag = 1 << 20, }; enum EditTestTag { kErrorTag = 200, }; -class VersionEditTest: public VersionEdit { - public: - VersionEditTest() : has_error_tag_(false) { - Clear(); +class VersionEditTest : public VersionEdit { + public: + VersionEditTest() : has_error_tag_(false) { Clear(); } + void AddErrorTag(const std::string& str) { + has_error_tag_ = true; + error_code_ = str; + } + void EncodeToTestTag(std::string* dst) const { + if (has_error_tag_) { + std::string str; + PutLengthPrefixedSlice(&str, error_code_); + + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kErrorTag); + dst->append(str.data(), str.size()); } - void AddErrorTag(const std::string& str) { - has_error_tag_ = true; - error_code_ = str; + } + void EncodeToOld(std::string* dst, bool with_sst, bool with_data_size) { + DumpToOldFormat(); + if (has_comparator_) { + PutVarint32(dst, kComparator); + PutLengthPrefixedSlice(dst, comparator_); } - void EncodeToTestTag(std::string* dst) const { - if (has_error_tag_) { - std::string str; - PutLengthPrefixedSlice(&str, error_code_); - - PutVarint32(dst, str.size() + kMaxTag); - PutVarint32(dst, kErrorTag); - dst->append(str.data(), str.size()); - } + if (has_log_number_) { + PutVarint32(dst, kLogNumber); + PutVarint64(dst, log_number_); + } + if (has_next_file_number_) { + PutVarint32(dst, kNextFileNumber); + PutVarint64(dst, next_file_number_); + } + if (has_last_sequence_) { + PutVarint32(dst, kLastSequence); + PutVarint64(dst, last_sequence_); + } + if (!with_sst) { + return; } - void EncodeToOld(std::string* dst, bool with_sst, bool with_data_size) { - DumpToOldFormat(); - if (has_comparator_) { - PutVarint32(dst, kComparator); - PutLengthPrefixedSlice(dst, comparator_); - } - if (has_log_number_) { - PutVarint32(dst, kLogNumber); - PutVarint64(dst, log_number_); - } - if (has_next_file_number_) { - PutVarint32(dst, kNextFileNumber); - PutVarint64(dst, next_file_number_); - } - if (has_last_sequence_) { - PutVarint32(dst, kLastSequence); - PutVarint64(dst, last_sequence_); - } - if (!with_sst) { - return; - } - for (uint32_t i = 0; i < 5; i++) { - FileMetaData f; - f.number = 100 + i; - f.file_size = 200 + i; - f.data_size = f.file_size; - f.smallest = InternalKey("apple", 300 + i, kTypeValue); - f.largest = InternalKey("zookeeper", 400 + i, kTypeDeletion); - EncodeSstFile(i, f, dst, with_data_size); - } + for (uint32_t i = 0; i < 5; i++) { + FileMetaData f; + f.number = 100 + i; + f.file_size = 200 + i; + f.data_size = f.file_size; + f.smallest = InternalKey("apple", 300 + i, kTypeValue); + f.largest = InternalKey("zookeeper", 400 + i, kTypeDeletion); + EncodeSstFile(i, f, dst, with_data_size); } + } - void EncodeSstFile(uint32_t level, const FileMetaData& f, std::string* dst, bool with_data_size) { - std::string str; - PutVarint32(&str,level); // level - PutVarint64(&str, f.number); - PutVarint64(&str, f.file_size); - PutLengthPrefixedSlice(&str, f.smallest.Encode()); - PutLengthPrefixedSlice(&str, f.largest.Encode()); - PutVarint32(&str, 0); // put f.smallest_fake - PutVarint32(&str, 0); // put f.largest_fake + void EncodeSstFile(uint32_t level, const FileMetaData& f, std::string* dst, bool with_data_size) { + std::string str; + PutVarint32(&str, level); // level + PutVarint64(&str, f.number); + PutVarint64(&str, f.file_size); + PutLengthPrefixedSlice(&str, f.smallest.Encode()); + PutLengthPrefixedSlice(&str, f.largest.Encode()); + PutVarint32(&str, 0); // put f.smallest_fake + PutVarint32(&str, 0); // put f.largest_fake - PutVarint32(dst, str.size() + kMaxTag); - PutVarint32(dst, kNewFile); - dst->append(str.data(), str.size()); - // record sst FileData - if (with_data_size) { - str.clear(); - PutVarint64(&str, f.data_size); - PutVarint32(dst, str.size() + kMaxTag); - PutVarint32(dst, kSstFileDataSize); - dst->append(str.data(), str.size()); - } + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kNewFile); + dst->append(str.data(), str.size()); + // record sst FileData + if (with_data_size) { + str.clear(); + PutVarint64(&str, f.data_size); + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kSstFileDataSize); + dst->append(str.data(), str.size()); } + } - void DumpToOldFormat() { - has_comparator_ = HasComparator(); - comparator_ = GetComparatorName(); + void DumpToOldFormat() { + has_comparator_ = HasComparator(); + comparator_ = GetComparatorName(); - has_log_number_ = HasLogNumber(); - log_number_ = GetLogNumber(); + has_log_number_ = HasLogNumber(); + log_number_ = GetLogNumber(); - has_next_file_number_ = HasNextFileNumber(); - next_file_number_ = GetNextFileNumber(); + has_next_file_number_ = HasNextFileNumber(); + next_file_number_ = GetNextFileNumber(); - has_last_sequence_ = HasLastSequence(); - last_sequence_ = GetLastSequence(); - } - private: - bool has_error_tag_; - std::string error_code_; + has_last_sequence_ = HasLastSequence(); + last_sequence_ = GetLastSequence(); + } - std::string comparator_; - uint64_t log_number_; - uint64_t prev_log_number_; - uint64_t next_file_number_; - SequenceNumber last_sequence_; - bool has_comparator_; - bool has_log_number_; - bool has_prev_log_number_; - bool has_next_file_number_; - bool has_last_sequence_; + private: + bool has_error_tag_; + std::string error_code_; + + std::string comparator_; + uint64_t log_number_; + uint64_t prev_log_number_; + uint64_t next_file_number_; + SequenceNumber last_sequence_; + bool has_comparator_; + bool has_log_number_; + bool has_prev_log_number_; + bool has_next_file_number_; + bool has_last_sequence_; }; static void TestEncodeDecode(const VersionEditTest& edit) { @@ -150,8 +149,7 @@ static void TestEncodeDecode(const VersionEditTest& edit) { static void CreateEditContent(VersionEditTest* edit) { for (int i = 0; i < 5; i++) { TestEncodeDecode(*edit); - edit->AddFile(i, 100 + i, 200 + i, - InternalKey("aoo", 300 + i, kTypeValue), + edit->AddFile(i, 100 + i, 200 + i, InternalKey("aoo", 300 + i, kTypeValue), InternalKey("zoo", 400 + i, kTypeDeletion)); edit->DeleteFile(i, 500 + i); edit->SetCompactPointer(i, InternalKey("x00", 600 + i, kTypeValue)); @@ -163,8 +161,8 @@ static void CreateEditContent(VersionEditTest* edit) { edit->SetLastSequence(900); TestEncodeDecode(*edit); } -static void CreateOldEncodedContent(VersionEditTest* edit, std::string* dst, - bool with_sst, bool with_data_size) { +static void CreateOldEncodedContent(VersionEditTest* edit, std::string* dst, bool with_sst, + bool with_data_size) { edit->SetComparatorName("test_nil_cmp"); edit->SetLogNumber(700); edit->SetNextFile(800); @@ -204,10 +202,10 @@ TEST(VersionEditTest, OldFormatRead) { VersionEditTest edit; std::string c1, c3; CreateOldEncodedContent(&edit, &c1, false, false); - edit.EncodeTo(&c3); // dump into new format + edit.EncodeTo(&c3); // dump into new format VersionEditTest parsed; - Status s = parsed.DecodeFrom(c1); // use new Decode to parse old format + Status s = parsed.DecodeFrom(c1); // use new Decode to parse old format ASSERT_TRUE(s.ok()) << s.ToString(); std::string c2; parsed.EncodeTo(&c2); @@ -220,17 +218,16 @@ TEST(VersionEditTest, DecodeFormatWithoutSstFileDataSize) { VersionEditTest edit; std::string c1, c3; CreateOldEncodedContent(&edit, &c1, true, false); - edit.EncodeTo(&c3); // dump into new format + edit.EncodeTo(&c3); // dump into new format VersionEditTest parsed; - Status s = parsed.DecodeFrom(c1); // use new Decode to parse old format + Status s = parsed.DecodeFrom(c1); // use new Decode to parse old format ASSERT_TRUE(s.ok()) << s.ToString(); std::string c2; parsed.EncodeTo(&c2); ASSERT_NE(c2, c3); fprintf(stderr, "%s\n", parsed.DebugString().c_str()); - } TEST(VersionEditTest, EncodeUnknowTag) { @@ -266,8 +263,7 @@ TEST(VersionEditTest, EncodeDecode) { VersionEditTest edit; for (int i = 0; i < 4; i++) { TestEncodeDecode(edit); - edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, - InternalKey("foo", kBig + 500 + i, kTypeValue), + edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, InternalKey("foo", kBig + 500 + i, kTypeValue), InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); edit.DeleteFile(4, kBig + 700 + i); edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); @@ -282,6 +278,4 @@ TEST(VersionEditTest, EncodeDecode) { } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc index a988df7fb..ebb85a5cf 100644 --- a/src/leveldb/db/version_set.cc +++ b/src/leveldb/db/version_set.cc @@ -9,7 +9,6 @@ #include "db/version_set.h" #include - #include #include #include @@ -32,14 +31,14 @@ namespace leveldb { // Maximum bytes of overlaps in grandparent (i.e., level+2) before we // stop building a single file in a level->level+1 compaction. static int64_t MaxGrandParentOverlapBytes(int64_t target_file_size) { - return 10 * target_file_size; + return 10 * target_file_size; } // Maximum number of bytes in all compacted files. We avoid expanding // the lower level file set of a compaction if it would make the // total compaction cover more than this many bytes. static int64_t ExpandedCompactionByteSizeLimit(int64_t target_file_size) { - return 25 * target_file_size; + return 25 * target_file_size; } static double MaxBytesForLevel(int level, int sst_size) { @@ -57,10 +56,11 @@ static double MaxBytesForLevel(int level, int sst_size) { static uint64_t MaxFileSizeForLevel(int level, int64_t target_file_size) { if (level == 2) { return 2 * target_file_size; - } else if(level > 2) { + } else if (level > 2) { return 8 * target_file_size; } - return target_file_size; // We could vary per level to reduce number of files? + return target_file_size; // We could vary per level to reduce number of + // files? } static int64_t TotalFileSize(const std::vector& files) { @@ -101,8 +101,7 @@ Version::~Version() { } } -int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, +int FindFile(const InternalKeyComparator& icmp, const std::vector& files, const Slice& key) { uint32_t left = 0; uint32_t right = files.size(); @@ -122,33 +121,26 @@ int FindFile(const InternalKeyComparator& icmp, return right; } -static bool AfterFile(const Comparator* ucmp, - const Slice* user_key, const FileMetaData* f) { +static bool AfterFile(const Comparator* ucmp, const Slice* user_key, const FileMetaData* f) { // NULL user_key occurs before all keys and is therefore never after *f - return (user_key != NULL && - ucmp->Compare(*user_key, f->largest.user_key()) > 0); + return (user_key != NULL && ucmp->Compare(*user_key, f->largest.user_key()) > 0); } -static bool BeforeFile(const Comparator* ucmp, - const Slice* user_key, const FileMetaData* f) { +// all user key before f +static bool BeforeFile(const Comparator* ucmp, const Slice* user_key, const FileMetaData* f) { // NULL user_key occurs after all keys and is therefore never before *f - return (user_key != NULL && - ucmp->Compare(*user_key, f->smallest.user_key()) < 0); + return (user_key != NULL && ucmp->Compare(*user_key, f->smallest.user_key()) < 0); } -bool SomeFileOverlapsRange( - const InternalKeyComparator& icmp, - const Comparator* ucmp, - bool disjoint_sorted_files, - const std::vector& files, - const Slice* smallest_user_key, - const Slice* largest_user_key) { +bool SomeFileOverlapsRange(const InternalKeyComparator& icmp, const Comparator* ucmp, + bool disjoint_sorted_files, // it means of level > 0 + const std::vector& files, const Slice* smallest_user_key, + const Slice* largest_user_key) { if (!disjoint_sorted_files) { // Need to check against all files for (size_t i = 0; i < files.size(); i++) { const FileMetaData* f = files[i]; - if (AfterFile(ucmp, smallest_user_key, f) || - BeforeFile(ucmp, largest_user_key, f)) { + if (AfterFile(ucmp, smallest_user_key, f) || BeforeFile(ucmp, largest_user_key, f)) { // No overlap } else { return true; // Overlap @@ -161,7 +153,7 @@ bool SomeFileOverlapsRange( uint32_t index = 0; if (smallest_user_key != NULL) { // Find the earliest possible internal key for smallest_user_key - InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek); + InternalKey small(*smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); index = FindFile(icmp, files, small.Encode()); } @@ -180,18 +172,15 @@ bool SomeFileOverlapsRange( // encoded using EncodeFixed64. class Version::LevelFileNumIterator : public Iterator { public: - LevelFileNumIterator(const InternalKeyComparator& icmp, - const std::vector* flist, - const std::string& dbname, - const ReadOptions& opts) + LevelFileNumIterator(const InternalKeyComparator& icmp, const std::vector* flist, + const std::string& dbname, const ReadOptions& opts) : icmp_(icmp), flist_(flist), dbname_(dbname), - index_(flist->size()), // Marks as invalid + index_(flist->size()), // Marks as invalid read_single_row_(opts.read_single_row), row_start_key_(opts.row_start_key, kMaxSequenceNumber, kValueTypeForSeek), - row_end_key_(opts.row_end_key, kMaxSequenceNumber, kValueTypeForSeek) { - } + row_end_key_(opts.row_end_key, kMaxSequenceNumber, kValueTypeForSeek) {} virtual bool Valid() const { if (index_ >= flist_->size()) { return false; @@ -204,13 +193,9 @@ class Version::LevelFileNumIterator : public Iterator { } return true; } - virtual void Seek(const Slice& target) { - index_ = FindFile(icmp_, *flist_, target); - } + virtual void Seek(const Slice& target) { index_ = FindFile(icmp_, *flist_, target); } virtual void SeekToFirst() { index_ = 0; } - virtual void SeekToLast() { - index_ = flist_->empty() ? 0 : flist_->size() - 1; - } + virtual void SeekToLast() { index_ = flist_->empty() ? 0 : flist_->size() - 1; } virtual void Next() { assert(Valid()); index_++; @@ -232,18 +217,19 @@ class Version::LevelFileNumIterator : public Iterator { FileMetaData* f = (*flist_)[index_]; value_buf_.resize(28); EncodeFixed64((char*)value_buf_.data(), f->number); - EncodeFixed64((char*)value_buf_.data()+8, f->file_size); + EncodeFixed64((char*)value_buf_.data() + 8, f->file_size); Slice smallest = f->smallest_fake ? f->smallest.Encode() : ""; Slice largest = f->largest_fake ? f->largest.Encode() : ""; - EncodeFixed32((char*)value_buf_.data()+16, smallest.size()); - EncodeFixed32((char*)value_buf_.data()+20, largest.size()); - EncodeFixed32((char*)value_buf_.data()+24, dbname_.size()); + EncodeFixed32((char*)value_buf_.data() + 16, smallest.size()); + EncodeFixed32((char*)value_buf_.data() + 20, largest.size()); + EncodeFixed32((char*)value_buf_.data() + 24, dbname_.size()); value_buf_.append(smallest.ToString()); value_buf_.append(largest.ToString()); value_buf_.append(dbname_); return Slice(value_buf_); } virtual Status status() const { return Status::OK(); } + private: const InternalKeyComparator icmp_; const std::vector* const flist_; @@ -257,37 +243,29 @@ class Version::LevelFileNumIterator : public Iterator { mutable std::string value_buf_; }; -static Iterator* GetFileIterator(void* arg, - const ReadOptions& options, - const Slice& file_value) { +static Iterator* GetFileIterator(void* arg, const ReadOptions& options, const Slice& file_value) { assert(options.db_opt); TableCache* cache = reinterpret_cast(arg); int32_t ssize = DecodeFixed32(file_value.data() + 16); int32_t lsize = DecodeFixed32(file_value.data() + 20); int32_t dbname_size = DecodeFixed32(file_value.data() + 24); - assert(ssize >= 0 && ssize < 65536 && - lsize >= 0 && lsize < 65536 && - dbname_size > 0 && dbname_size < 1024); - return cache->NewIterator(options, - std::string(file_value.data() + 28 + ssize + lsize, - dbname_size), - DecodeFixed64(file_value.data()), - DecodeFixed64(file_value.data() + 8), - Slice(file_value.data() + 28, ssize), - Slice(file_value.data() + 28 + ssize, lsize)); -} - -Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, - int level) const { + assert(ssize >= 0 && ssize < 65536 && lsize >= 0 && lsize < 65536 && dbname_size > 0 && + dbname_size < 1024); + return cache->NewIterator( + options, std::string(file_value.data() + 28 + ssize + lsize, dbname_size), + DecodeFixed64(file_value.data()), DecodeFixed64(file_value.data() + 8), + Slice(file_value.data() + 28, ssize), Slice(file_value.data() + 28 + ssize, lsize)); +} + +Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, int level) const { ReadOptions opts = options; opts.db_opt = vset_->options_; return NewTwoLevelIterator( - new LevelFileNumIterator(vset_->icmp_, &files_[level], vset_->dbname_, opts), - &GetFileIterator, vset_->table_cache_, opts); + new LevelFileNumIterator(vset_->icmp_, &files_[level], vset_->dbname_, opts), GetFileIterator, + vset_->table_cache_, opts); } -void Version::AddIterators(const ReadOptions& options, - std::vector* iters) { +void Version::AddIterators(const ReadOptions& options, std::vector* iters) { ReadOptions opts = options; opts.db_opt = vset_->options_; // Merge all level zero files together since they may overlap @@ -295,9 +273,8 @@ void Version::AddIterators(const ReadOptions& options, FileMetaData* f = files_[0][i]; Slice smallest = f->smallest_fake ? f->smallest.Encode() : ""; Slice largest = f->largest_fake ? f->largest.Encode() : ""; - iters->emplace_back(vset_->table_cache_->NewIterator( - opts, vset_->dbname_ , f->number, - f->file_size, smallest, largest)); + iters->emplace_back(vset_->table_cache_->NewIterator(opts, vset_->dbname_, f->number, + f->file_size, smallest, largest)); } // For levels > 0, we can use a concatenating iterator that sequentially @@ -338,20 +315,16 @@ static void SaveValue(void* arg, const Slice& ikey, const Slice& v) { if (!s->compact_strategy || !s->compact_strategy->Drop(parsed_key.user_key, 0)) { s->value->assign(v.data(), v.size()); } else { - s->state = kDeleted; // stop searching in other files. + s->state = kDeleted; // stop searching in other files. } } } } } -static bool NewestFirst(FileMetaData* a, FileMetaData* b) { - return a->number > b->number; -} +static bool NewestFirst(FileMetaData* a, FileMetaData* b) { return a->number > b->number; } -Status Version::Get(const ReadOptions& options, - const LookupKey& k, - std::string* value, +Status Version::Get(const ReadOptions& options, const LookupKey& k, std::string* value, GetStats* stats) { ReadOptions opts = options; opts.db_opt = vset_->options_; @@ -427,17 +400,18 @@ Status Version::Get(const ReadOptions& options, saver.ucmp = ucmp; saver.user_key = user_key; saver.value = value; - saver.compact_strategy = vset_->options_->enable_strategy_when_get ? - vset_->options_->compact_strategy_factory->NewInstance() : NULL; - s = vset_->table_cache_->Get(opts, vset_->dbname_, f->number, - f->file_size, ikey, &saver, SaveValue); + saver.compact_strategy = vset_->options_->enable_strategy_when_get + ? vset_->options_->compact_strategy_factory->NewInstance() + : NULL; + s = vset_->table_cache_->Get(opts, vset_->dbname_, f->number, f->file_size, ikey, &saver, + SaveValue); delete saver.compact_strategy; if (!s.ok()) { return s; } switch (saver.state) { case kNotFound: - break; // Keep searching in other files + break; // Keep searching in other files case kFound: return s; case kDeleted: @@ -466,9 +440,7 @@ bool Version::UpdateStats(const GetStats& stats) { return false; } -void Version::Ref() { - ++refs_; -} +void Version::Ref() { ++refs_; } void Version::Unref() { assert(this != &vset_->dummy_versions_); @@ -479,10 +451,272 @@ void Version::Unref() { } } -bool Version::OverlapInLevel(int level, - const Slice* smallest_user_key, - const Slice* largest_user_key) { +bool VersionSet::ExpandInputsToCleanCut(int level, std::vector* inputs) { + // This isn't good compaction + assert(!inputs->empty()); + + if (level == 0) { + return true; + } + + InternalKey smallest, largest; + + // Keep expanding inputs until we are sure that there is a "clean cut" + // boundary between the files in input and the surrounding files. + // This will ensure that no parts of a key are lost during compaction. + size_t old_size; + do { + old_size = inputs->size(); + GetRange(*inputs, &smallest, &largest); + current_->GetOverlappingInputs(level, &smallest, &largest, inputs); + } while (inputs->size() > old_size); + + // If, after the expansion, there are files that are already under + // compaction, then we must drop/cancel this compaction. + if (AreFilesInCompaction(*inputs)) { + return false; + } + return true; +} + +void Version::DEBUG_pick(const std::string& msg, int level, const std::vector& files, + const InternalKey& smallest, const InternalKey& largest) const { + std::string r = ""; + r.append("\nDEBUG_PICK####### level:"); + AppendNumberTo(&r, level); + r.append(" range["); + r.append(smallest.DebugString()); + r.append(", "); + r.append(largest.DebugString()); + r.append("] \n"); + + for (size_t i = 0; i < files.size(); i++) { + r.append("NO."); + AppendNumberTo(&r, static_cast(files[i]->number & 0xffffffff)); + r.append(" f_size:"); + AppendNumberTo(&r, files[i]->file_size); + r.append(" d_size:"); + AppendNumberTo(&r, files[i]->data_size); + r.append(" in_comp:"); + r.append(files[i]->being_compacted ? "T" : "F"); + r.append(" ["); + r.append(files[i]->smallest.DebugString()); + r.append(" .. "); + r.append(files[i]->largest.DebugString()); + r.append("]\n"); + } + LEVELDB_LOG(vset_->options_->info_log, "[%s] %s %s", vset_->dbname_.c_str(), msg.c_str(), + r.c_str()); +} + +void Version::GetCleanCutInputsWithinInterval(int level, const InternalKey* begin, + const InternalKey* end, + std::vector* inputs) { + if (files_[level].size() == 0) { + // empty level, no inputs in it + return; + } + inputs->clear(); + Slice user_begin, user_end; + if (begin != nullptr) { + user_begin = begin->user_key(); + } + if (end != nullptr) { + user_end = end->user_key(); + } + if (begin != nullptr && end != nullptr && level > 0) { + // only when level > 0 files are sorted in same level + BinarySearchOverlappingInputs(level, user_begin, user_end, true /* within interval */, inputs); + } +} + +void Version::BinarySearchOverlappingInputs(int level, const Slice& user_begin, + const Slice& user_end, bool within_interval, + std::vector* inputs) { + assert(level > 0); + // use row key comparator + CompactStrategy* strategy = vset_->options_->compact_strategy_factory->NewInstance(); + const Comparator* ucmp = strategy->RowKeyComparator(); + if (ucmp == NULL) { + ucmp = vset_->icmp_.user_comparator(); + } + + // binary search overlap index and extend range + int start_index, end_index; + bool found = false; + int mid = 0, min = 0, max = static_cast(files_[level].size()) - 1; + while (min <= max) { + mid = (min + max) / 2; + FileMetaData* f = files_[level][mid]; + const Slice file_start = f->smallest.user_key(); + const Slice file_limit = f->largest.user_key(); + if (within_interval) { + if (ucmp->Compare(file_start, user_begin) < 0) { + min = mid + 1; + } else if (ucmp->Compare(file_limit, user_end) > 0) { + max = mid - 1; + } else { + // found middle index within interval with range + ExtendRangeWithinInterval(level, ucmp, user_begin, user_end, mid, &start_index, &end_index); + found = true; + break; + } + } else { + if (ucmp->Compare(file_limit, user_begin) < 0) { + min = mid + 1; + } else if (ucmp->Compare(file_start, user_end) > 0) { + max = mid - 1; + } else { + // found middle index overlap with range + ExtendRangeOverlappingInterval(level, ucmp, user_begin, user_end, mid, &start_index, + &end_index); + found = true; + break; + } + } + } + for (int i = start_index; found && i <= end_index; ++i) { + inputs->push_back(files_[level][i]); + } + delete strategy; +} + +void Version::ExtendRangeOverlappingInterval(int level, const Comparator* user_cmp, + const Slice& user_begin, const Slice& user_end, + unsigned int mid_index, int* start_index, + int* end_index) { + assert(mid_index < files_[level].size()); + *start_index = mid_index + 1; + *end_index = mid_index; + int file_count = 0; + + // Select start index, case patten: + // [ ] + // [f1 ] [f2] [f3] + // find from 'mid' to left, on this case f1 will be selected + for (int i = mid_index; i >= 0; i--) { + const FileMetaData* f = files_[level][i]; + const Slice file_limit = f->largest.user_key(); + // table use rowkey, kv/ttlkv use user_key + if (user_cmp->Compare(file_limit, user_begin) >= 0) { + *start_index = i; + ++file_count; + } else { + break; + } + } + // Select end index, case patten: + // [ ] + // [f3 ] [f4] [f5] + // find from 'mid+1' to right, on this case f5 will be selected + for (unsigned int i = mid_index + 1; i < files_[level].size(); i++) { + const FileMetaData* f = files_[level][i]; + const Slice file_start = f->smallest.user_key(); + // table use rowkey, kv/ttlkv use user_key + if (user_cmp->Compare(file_start, user_end) <= 0) { + ++file_count; + *end_index = i; + } else { + break; + } + } + assert(file_count == *end_index - *start_index + 1); +} + +void Version::ExtendRangeWithinInterval(int level, const Comparator* user_cmp, + const Slice& user_begin, const Slice& user_end, + unsigned int mid_index, int* start_index, int* end_index) { + assert(level > 0); + + ExtendRangeOverlappingInterval(level, user_cmp, user_begin, user_end, mid_index, start_index, + end_index); + int left = *start_index; + int right = *end_index; + // shrink from left to right + while (left <= right) { + const FileMetaData* f = files_[level][left]; + const Slice& first_key_in_range = f->smallest.user_key(); + if (user_cmp->Compare(first_key_in_range, user_begin) < 0) { + left++; + continue; + } + if (left > 0) { // If not first file + const FileMetaData* f_before = files_[level][left - 1]; + const Slice& last_key_before = f_before->largest.user_key(); + if (user_cmp->Compare(first_key_in_range, last_key_before) == 0) { + // The first user key(row key) in range overlaps + // with the previous file's last key + left++; + continue; + } + } + break; + } + // shrink from right to left + while (left <= right) { + const FileMetaData* f = files_[level][right]; + const Slice& last_key_in_range = f->largest.user_key(); + if (user_cmp->Compare(last_key_in_range, user_end) > 0) { + right--; + continue; + } + if (right < static_cast(files_[level].size()) - 1) { // If not the last file + const FileMetaData* f_next = files_[level][right + 1]; + const Slice& first_key_after = f_next->smallest.user_key(); + if (user_cmp->Compare(last_key_in_range, first_key_after) == 0) { + // The last user key(row key) in range overlaps + // with the next file's first key + right--; + continue; + } + } + break; + } + + *start_index = left; + *end_index = right; +} +void VersionSet::SetupGrandparents(int level, const std::vector& inputs, + const std::vector& output_inputs, + std::vector* grandparents) { + InternalKey start, end; + GetRange2(inputs, output_inputs, &start, &end); + current_->GetOverlappingInputs(level + 2, &start, &end, grandparents); +} + +void Version::GetCompactionScores(std::vector>* scores) const { + // add level scores + for (size_t i = 0; i < compaction_score_.size(); i++) { + if (compaction_score_[i] >= 1) { + scores->emplace_back(compaction_score_[i], 0); + } + } + + // add del score + if (del_trigger_compact_ != NULL && !del_trigger_compact_->being_compacted && + del_trigger_compact_->del_percentage > vset_->options_->del_percentage) { + scores->emplace_back((double)(del_trigger_compact_->del_percentage / 100.0), 0); + } + + // add seek score + if (file_to_compact_ != NULL && !file_to_compact_->being_compacted) { + scores->emplace_back(0.1, 0); + } + + // add ttl score + if (ttl_trigger_compact_ != NULL && !ttl_trigger_compact_->being_compacted) { + int64_t ttl = ttl_trigger_compact_->check_ttl_ts - vset_->env_->NowMicros(); + int64_t few_time_to_live = 900 * 1000000; // 9 minutes + if (ttl < few_time_to_live) { + uint64_t wait_time = (ttl <= 0) ? 0 : (ttl / 1000 * 1000); + scores->emplace_back((double)((ttl_trigger_compact_->ttl_percentage + 1) / 100.0), wait_time); + } + } +} + +bool Version::OverlapInLevel(int level, const Slice* smallest_user_key, + const Slice* largest_user_key) { // use row key comparator CompactStrategy* strategy = vset_->options_->compact_strategy_factory->NewInstance(); const Comparator* ucmp = strategy->RowKeyComparator(); @@ -495,9 +729,8 @@ bool Version::OverlapInLevel(int level, return overlap; } -int Version::PickLevelForMemTableOutput( - const Slice& smallest_user_key, - const Slice& largest_user_key) { +int Version::PickLevelForMemTableOutput(const Slice& smallest_user_key, + const Slice& largest_user_key) { int level = 0; if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) { // Push to next level if there is no overlap in next level, @@ -521,11 +754,8 @@ int Version::PickLevelForMemTableOutput( } // Store in "*inputs" all files in "level" that overlap [begin,end] -void Version::GetOverlappingInputs( - int level, - const InternalKey* begin, - const InternalKey* end, - std::vector* inputs) { +void Version::GetOverlappingInputs(int level, const InternalKey* begin, const InternalKey* end, + std::vector* inputs) { inputs->clear(); Slice user_begin, user_end; if (begin != NULL) { @@ -541,7 +771,7 @@ void Version::GetOverlappingInputs( if (user_cmp == NULL) { user_cmp = vset_->icmp_.user_comparator(); } - for (size_t i = 0; i < files_[level].size(); ) { + for (size_t i = 0; i < files_[level].size();) { FileMetaData* f = files_[level][i++]; const Slice file_start = f->smallest.user_key(); const Slice file_limit = f->largest.user_key(); @@ -616,8 +846,7 @@ bool Version::FindSplitKey(double ratio, std::string* split_key) { } const FileMetaData* file = files[now_pos[level]]; if (largest_file == NULL || - user_cmp->Compare(largest_file->largest.user_key(), - file->largest.user_key()) > 0) { + user_cmp->Compare(largest_file->largest.user_key(), file->largest.user_key()) > 0) { largest_file = file; step_level = level; } @@ -627,7 +856,7 @@ bool Version::FindSplitKey(double ratio, std::string* split_key) { return false; } split_size += files_[step_level][now_pos[step_level]]->data_size; - now_pos[step_level] ++; + now_pos[step_level]++; } if (largest_file == NULL) { return false; @@ -643,18 +872,16 @@ bool Version::FindKeyRange(std::string* smallest_key, std::string* largest_key) const std::vector& files = files_[level]; for (size_t i = 0; i < files.size(); i++) { const FileMetaData* file = files[i]; - if (sk.empty() || - user_cmp->Compare(file->smallest.user_key(), sk) < 0) { + if (sk.empty() || user_cmp->Compare(file->smallest.user_key(), sk) < 0) { sk = file->smallest.user_key().ToString(); } - if (lk.empty() || - user_cmp->Compare(file->largest.user_key(), lk) > 0) { + if (lk.empty() || user_cmp->Compare(file->largest.user_key(), lk) > 0) { lk = file->largest.user_key().ToString(); } } } - Log(vset_->options_->info_log, "[%s] find key range: [%s,%s].\n", - vset_->dbname_.c_str(), sk.c_str(), lk.c_str()); + LEVELDB_LOG(vset_->options_->info_log, "[%s] find key range: [%s,%s].\n", vset_->dbname_.c_str(), + sk.c_str(), lk.c_str()); if (smallest_key) { *smallest_key = sk; } @@ -755,7 +982,7 @@ class VersionSetBuilder { continue; } } - fi++; + ++fi; } } }; @@ -766,9 +993,7 @@ class VersionSetBuilder { public: // Initialize a builder with the files from *base and other info from *vset - VersionSetBuilder(VersionSet* vset, Version* base) - : vset_(vset), - base_(base) { + VersionSetBuilder(VersionSet* vset, Version* base) : vset_(vset), base_(base) { base_->Ref(); BySmallestKey cmp; cmp.internal_comparator = &vset_->icmp_; @@ -783,8 +1008,7 @@ class VersionSetBuilder { const FileSet* added = levels_[level].added_files; std::vector to_unref; to_unref.reserve(added->size()); - for (FileSet::const_iterator it = added->begin(); - it != added->end(); ++it) { + for (FileSet::const_iterator it = added->begin(); it != added->end(); ++it) { to_unref.push_back(*it); } delete added; @@ -805,15 +1029,12 @@ class VersionSetBuilder { // Update compaction pointers for (size_t i = 0; i < edit->compact_pointers_.size(); i++) { const int level = edit->compact_pointers_[i].first; - vset_->compact_pointer_[level] = - edit->compact_pointers_[i].second.Encode().ToString(); + vset_->compact_pointer_[level] = edit->compact_pointers_[i].second.Encode().ToString(); } // Delete files const VersionEdit::FileMetaSet& del = edit->deleted_files_; - for (VersionEdit::FileMetaSet::const_iterator iter = del.begin(); - iter != del.end(); - ++iter) { + for (VersionEdit::FileMetaSet::const_iterator iter = del.begin(); iter != del.end(); ++iter) { const int level = iter->first; FileMetaData f = iter->second; ModifyFileMeta(&f); @@ -830,7 +1051,7 @@ class VersionSetBuilder { it = edit->new_files_.erase(it); continue; } - it++; + ++it; FileMetaData* f = new FileMetaData(f_new); f->refs = 1; @@ -849,7 +1070,7 @@ class VersionSetBuilder { // same as the compaction of 40KB of data. We are a little // conservative and allow approximately one seek for every 16KB // of data before triggering a compaction. - uint64_t size_for_one_seek = 16384ULL * vset_->options_->seek_latency / 10000000; + uint64_t size_for_one_seek = 16384ULL * vset_->options_->seek_latency / 10000000; if (size_for_one_seek <= 0) { size_for_one_seek = 1; } @@ -873,14 +1094,12 @@ class VersionSetBuilder { std::vector::const_iterator base_end = base_files.end(); const FileSet* added = levels_[level].added_files; v->files_[level].reserve(base_files.size() + added->size()); - for (FileSet::const_iterator added_iter = added->begin(); - added_iter != added->end(); + for (FileSet::const_iterator added_iter = added->begin(); added_iter != added->end(); ++added_iter) { // Add all smaller files listed in base_ - for (std::vector::const_iterator bpos - = std::upper_bound(base_iter, base_end, *added_iter, cmp); - base_iter != bpos; - ++base_iter) { + for (std::vector::const_iterator bpos = + std::upper_bound(base_iter, base_end, *added_iter, cmp); + base_iter != bpos; ++base_iter) { MaybeAddFile(v, level, *base_iter); } @@ -896,12 +1115,11 @@ class VersionSetBuilder { // Make sure there is no overlap in levels > 0 if (level > 0) { for (uint32_t i = 1; i < v->files_[level].size(); i++) { - const InternalKey& prev_end = v->files_[level][i-1]->largest; + const InternalKey& prev_end = v->files_[level][i - 1]->largest; const InternalKey& this_begin = v->files_[level][i]->smallest; if (vset_->icmp_.Compare(prev_end, this_begin) > 0) { fprintf(stderr, "overlapping ranges in same level %s vs. %s\n", - prev_end.DebugString().c_str(), - this_begin.DebugString().c_str()); + prev_end.DebugString().c_str(), this_begin.DebugString().c_str()); abort(); } } @@ -917,7 +1135,7 @@ class VersionSetBuilder { } std::vector* files = &v->files_[level]; if (level > 0 && !files->empty()) { - FileMetaData* f_old = (*files)[files->size()-1]; + FileMetaData* f_old = (*files)[files->size() - 1]; // Must not overlap assert(vset_->icmp_.Compare(f_old->largest, f->smallest) <= 0); } @@ -931,12 +1149,9 @@ class VersionSetBuilder { if (!vset_->db_key_start_.user_key().empty() && vset_->icmp_.Compare(f->smallest, vset_->db_key_start_) < 0) { if (vset_->icmp_.Compare(f->largest, vset_->db_key_start_) > 0) { - Log(vset_->options_->info_log, - "[%s] reset file smallest key: %s, from %s to %s\n", - vset_->dbname_.c_str(), - FileNumberDebugString(f->number).c_str(), - f->smallest.DebugString().c_str(), - vset_->db_key_start_.DebugString().c_str()); + LEVELDB_LOG(vset_->options_->info_log, "[%s] reset file smallest key: %s, from %s to %s\n", + vset_->dbname_.c_str(), FileNumberDebugString(f->number).c_str(), + f->smallest.DebugString().c_str(), vset_->db_key_start_.DebugString().c_str()); f->smallest = vset_->db_key_start_; f->smallest_fake = true; f->data_size = 0; @@ -948,12 +1163,9 @@ class VersionSetBuilder { if (!vset_->db_key_end_.user_key().empty() && vset_->icmp_.Compare(f->largest, vset_->db_key_end_) > 0) { if (vset_->icmp_.Compare(f->smallest, vset_->db_key_end_) < 0) { - Log(vset_->options_->info_log, - "[%s] reset file largest key: %s, from %s to %s\n", - vset_->dbname_.c_str(), - FileNumberDebugString(f->number).c_str(), - f->largest.DebugString().c_str(), - vset_->db_key_end_.DebugString().c_str()); + LEVELDB_LOG(vset_->options_->info_log, "[%s] reset file largest key: %s, from %s to %s\n", + vset_->dbname_.c_str(), FileNumberDebugString(f->number).c_str(), + f->largest.DebugString().c_str(), vset_->db_key_end_.DebugString().c_str()); f->largest = vset_->db_key_end_; f->largest_fake = true; f->data_size = 0; @@ -966,9 +1178,7 @@ class VersionSetBuilder { } }; -VersionSet::VersionSet(const std::string& dbname, - const Options* options, - TableCache* table_cache, +VersionSet::VersionSet(const std::string& dbname, const Options* options, TableCache* table_cache, const InternalKeyComparator* cmp) : env_(options->env), dbname_(dbname), @@ -985,6 +1195,7 @@ VersionSet::VersionSet(const std::string& dbname, last_sequence_(0), log_number_(0), prev_log_number_(0), + descriptor_size_(0), descriptor_file_(NULL), descriptor_log_(NULL), dummy_versions_(this), @@ -999,12 +1210,11 @@ VersionSet::~VersionSet() { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = current_->files_[level]; for (size_t i = 0; i < files.size(); i++) { - Log(options_->info_log, "[%s] finish : %08u %08u, level: %d, s: %d %s, l: %d %s\n", - dbname_.c_str(), - static_cast(files[i]->number >> 32 & 0x7fffffff), - static_cast(files[i]->number & 0xffffffff), level, - files[i]->smallest_fake, files[i]->smallest.user_key().ToString().data(), - files[i]->largest_fake, files[i]->largest.user_key().ToString().data()); + LEVELDB_LOG(options_->info_log, "[%s] finish : %08u %08u, level: %d, s: %d %s, l: %d %s\n", + dbname_.c_str(), static_cast(files[i]->number >> 32 & 0x7fffffff), + static_cast(files[i]->number & 0xffffffff), level, + files[i]->smallest_fake, files[i]->smallest.user_key().ToString().data(), + files[i]->largest_fake, files[i]->largest.user_key().ToString().data()); } } current_->Unref(); @@ -1039,14 +1249,13 @@ void VersionSet::AppendVersion(Version* v) { // Information kept for every waiting manifest writer struct VersionSet::ManifestWriter { Status status; - VersionEdit* edit; + VersionEdit* edit = nullptr; bool done; port::CondVar cv; - explicit ManifestWriter(port::Mutex* mu) : done(false), cv(mu) { } + explicit ManifestWriter(port::Mutex* mu) : done(false), cv(mu) {} }; -void VersionSet::LogAndApplyHelper(VersionSetBuilder* builder, - VersionEdit* edit) { +void VersionSet::LogAndApplyHelper(VersionSetBuilder* builder, VersionEdit* edit) { if (edit->has_log_number_) { assert(edit->log_number_ >= log_number_); assert(edit->log_number_ < next_file_number_); @@ -1061,8 +1270,8 @@ void VersionSet::LogAndApplyHelper(VersionSetBuilder* builder, edit->SetNextFile(next_file_number_); if (edit->HasLastSequence()) { - Log(options_->info_log, "[%s] LogLastSequence %lu", - dbname_.c_str(), edit->GetLastSequence()); + LEVELDB_LOG(options_->info_log, "[%s] LogLastSequence %lu", dbname_.c_str(), + edit->GetLastSequence()); assert(edit->GetLastSequence() >= last_sequence_); } else { edit->SetLastSequence(last_sequence_); @@ -1075,6 +1284,16 @@ void VersionSet::GetCurrentLevelSize(std::vector* result) { *result = level_size_counter_; } +void VersionSet::MaybeSwitchManifest() { + if (descriptor_log_ != NULL) { + const uint64_t switch_interval = options_->manifest_switch_interval * 1000000UL; + if (last_switch_manifest_ + switch_interval < env_->NowMicros() && + descriptor_size_ >= options_->manifest_switch_size << 20) { + force_switch_manifest_ = true; + } + } +} + Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { mu->AssertHeld(); // multi write control, do not batch edit write, but multi thread safety @@ -1093,17 +1312,15 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { LogAndApplyHelper(&builder, w.edit); builder.SaveTo(v); } - Finalize(v); // recalculate new version score + Finalize(v); // recalculate new version score - const uint64_t switch_interval = options_->manifest_switch_interval * 1000000UL; - if (descriptor_log_ != NULL && - last_switch_manifest_ + switch_interval < env_->NowMicros()) { - force_switch_manifest_ = true; - } + MaybeSwitchManifest(); uint64_t manifest_file_num = manifest_file_number_; int retry_count = 0; Status s; + std::string record; + edit->EncodeTo(&record); // Unlock during expensive MANIFEST log write do { s = Status::OK(); @@ -1121,8 +1338,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { delete descriptor_file_; descriptor_log_ = NULL; descriptor_file_ = NULL; - Log(options_->info_log, "[%s] force switch MANIFEST #%lu to #%lu", - dbname_.c_str(), manifest_file_num, manifest_file_number_); + LEVELDB_LOG(options_->info_log, "[%s] force switch MANIFEST #%lu to #%lu", dbname_.c_str(), + manifest_file_num, manifest_file_number_); force_switch_manifest_ = false; } @@ -1137,43 +1354,41 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { descriptor_log_ = new log::Writer(descriptor_file_); s = WriteSnapshot(descriptor_log_); if (!s.ok()) { - Log(options_->info_log, "[%s][dfs error] writesnapshot MANIFEST[%s] error, status[%s].\n", - dbname_.c_str(), new_manifest_file.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, + "[%s][dfs error] writesnapshot MANIFEST[%s] error, status[%s].\n", + dbname_.c_str(), new_manifest_file.c_str(), s.ToString().c_str()); } } else { - Log(options_->info_log, "[%s][dfs error] open MANIFEST[%s] error, status[%s].\n", - dbname_.c_str(), new_manifest_file.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s][dfs error] open MANIFEST[%s] error, status[%s].\n", + dbname_.c_str(), new_manifest_file.c_str(), s.ToString().c_str()); } } // Write new record to MANIFEST log if (s.ok()) { - std::string record; - edit->EncodeTo(&record); s = descriptor_log_->AddRecord(record); if (s.ok()) { s = descriptor_file_->Sync(); if (!s.ok()) { - Log(options_->info_log, "[%s][dfs error] MANIFEST sync error: %s\n", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s][dfs error] MANIFEST sync error: %s\n", + dbname_.c_str(), s.ToString().c_str()); } } else { - Log(options_->info_log, "[%s][dfs error] AddRecord MANIFEST error: %s\n", - dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s][dfs error] AddRecord MANIFEST error: %s\n", + dbname_.c_str(), s.ToString().c_str()); } } if (s.ok() && !new_manifest_file.empty()) { s = SetCurrentFile(env_, dbname_, manifest_file_number_); if (s.ok()) { - Log(options_->info_log, "[%s] set CURRENT #%lu to #%llu success\n", - dbname_.c_str(),manifest_file_num, - static_cast(manifest_file_number_)); + LEVELDB_LOG(options_->info_log, "[%s] set CURRENT #%lu to #%llu success\n", dbname_.c_str(), + manifest_file_num, static_cast(manifest_file_number_)); manifest_file_num = manifest_file_number_; } else { - Log(options_->info_log, "[%s][dfs error] set CURRENT #%lu to #%lu error: %s\n", - dbname_.c_str(), manifest_file_num, manifest_file_number_, - s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s][dfs error] set CURRENT #%lu to #%lu error: %s\n", + dbname_.c_str(), manifest_file_num, manifest_file_number_, + s.ToString().c_str()); } } @@ -1214,15 +1429,16 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { } if (!keep) { - Log(options_->info_log, "[%s] version_set Delete type=%s #%lld, fname %s\n", - dbname_.c_str(), FileTypeToString(type), - static_cast(number), filenames[i].c_str()); + LEVELDB_LOG(options_->info_log, "[%s] version_set Delete type=%s #%lld, fname %s\n", + dbname_.c_str(), FileTypeToString(type), + static_cast(number), filenames[i].c_str()); env_->DeleteFile(dbname_ + "/" + filenames[i]); } } } } - // if MANIFEST or CURRENT file write error because of losting directory lock, + // if MANIFEST or CURRENT file write error because of losting directory + // lock, // do not try to switch manifest anymore if (!s.ok() && !s.IsIOPermissionDenied()) { force_switch_manifest_ = true; @@ -1238,13 +1454,15 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { for (int i = 1; i < retry_count && i < 4; i++) { sec *= 2; } - Log(options_->info_log, "[%s] Waiting after %d, LogAndApply sync error: %s, retry: %d", - dbname_.c_str(), sec, s.ToString().c_str(), retry_count); + LEVELDB_LOG(options_->info_log, + "[%s] Waiting after %d, LogAndApply sync error: %s, retry: %d", dbname_.c_str(), + sec, s.ToString().c_str(), retry_count); env_->SleepForMicroseconds(sec * 1000000); } mu->Lock(); - } while (force_switch_manifest_); // bugfix issue=tera-10, dfs sync fail, but eventually success, cause reload fail + } while (force_switch_manifest_); // bugfix issue=tera-10, dfs sync fail, but + // eventually success, cause reload fail // Install the new version if (s.ok()) { @@ -1252,10 +1470,11 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { log_number_ = edit->log_number_; prev_log_number_ = edit->prev_log_number_; last_sequence_ = edit->GetLastSequence(); + descriptor_size_ += record.size(); } else { delete v; force_switch_manifest_ = true; - Log(options_->info_log, "[%s][dfs error] set force_switch_manifest", dbname_.c_str()); + LEVELDB_LOG(options_->info_log, "[%s][dfs error] set force_switch_manifest", dbname_.c_str()); } manifest_writers_.pop_front(); @@ -1277,38 +1496,37 @@ Status VersionSet::ReadCurrentFile(uint64_t tablet, std::string* dscname) { std::string current; s = ReadFileToString(env_, CurrentFileName(pdbname), ¤t); if (!s.ok()) { - Log(options_->info_log, "[%s] read CURRENT failed: %s.", - dbname_.c_str(), CurrentFileName(pdbname).c_str()); + LEVELDB_LOG(options_->info_log, "[%s] read CURRENT failed: %s.", dbname_.c_str(), + CurrentFileName(pdbname).c_str()); s = env_->FileExists(CurrentFileName(pdbname)); if (s.IsNotFound()) { // lost CURRENT if (options_->ignore_corruption_in_open) { // try to failover: find and use the newest mainfest - Log(options_->info_log, "[%s] lost CURRENT but ignore_corruption_in_open: %s.", - dbname_.c_str(), CurrentFileName(pdbname).c_str()); + LEVELDB_LOG(options_->info_log, "[%s] lost CURRENT but ignore_corruption_in_open: %s.", + dbname_.c_str(), CurrentFileName(pdbname).c_str()); } else { - Log(options_->info_log, "[%s] lost CURRENT and NOT ignore_corruption_in_open: %s.", - dbname_.c_str(), CurrentFileName(pdbname).c_str()); + LEVELDB_LOG(options_->info_log, "[%s] lost CURRENT and NOT ignore_corruption_in_open: %s.", + dbname_.c_str(), CurrentFileName(pdbname).c_str()); return Status::Corruption("CURRENT lost", s.ToString()); } } else if (s.IsTimeOut()) { - Log(options_->info_log, "[%s]check exist CURRENT timeout: %s.", - dbname_.c_str(), CurrentFileName(pdbname).c_str()); + LEVELDB_LOG(options_->info_log, "[%s]check exist CURRENT timeout: %s.", dbname_.c_str(), + CurrentFileName(pdbname).c_str()); return Status::TimeOut("check exist CURRENT timeout"); } else { // status of current is unknown - Log(options_->info_log, "[%s]status of CURRENT is unknown: %s.", - dbname_.c_str(), CurrentFileName(pdbname).c_str()); + LEVELDB_LOG(options_->info_log, "[%s]status of CURRENT is unknown: %s.", dbname_.c_str(), + CurrentFileName(pdbname).c_str()); return Status::IOError("status of CURRENT is unknown"); } - } else if (current.empty() || current[current.size()-1] != '\n') { - Log(options_->info_log, "[%s] current file error: %s, content:\"%s\", size:%lu.", - dbname_.c_str(), CurrentFileName(pdbname).c_str(), - current.c_str(), current.size()); + } else if (current.empty() || current[current.size() - 1] != '\n') { + LEVELDB_LOG(options_->info_log, "[%s] current file error: %s, content:\"%s\", size:%lu.", + dbname_.c_str(), CurrentFileName(pdbname).c_str(), current.c_str(), current.size()); if (options_->ignore_corruption_in_open) { ArchiveFile(env_, CurrentFileName(pdbname)); } else if (current.size() == 0) { - return Status::Corruption("CURRENT size = 0"); + return Status::Corruption("CURRENT size = 0"); } else { return Status::Corruption("CURRENT incomplete content"); } @@ -1320,8 +1538,7 @@ Status VersionSet::ReadCurrentFile(uint64_t tablet, std::string* dscname) { // if program runs to here, there are only 2 possibilities // 1). read CURRENT success // 2). lost CURRENT and ignore_corruption_in_open - assert((s.ok()) - || (options_->ignore_corruption_in_open && s.IsNotFound())); + assert((s.ok()) || (options_->ignore_corruption_in_open && s.IsNotFound())); if (s.ok()) { // read CURRENT success @@ -1333,13 +1550,13 @@ Status VersionSet::ReadCurrentFile(uint64_t tablet, std::string* dscname) { return Status::OK(); } - if (manifest_status.IsTimeOut()) { + if (manifest_status.IsTimeOut()) { return Status::TimeOut("MANIFSET check file exists timeout:", *dscname); } if (!manifest_status.IsNotFound()) { return Status::IOError("MANIFSET target manifest status is unknown", - manifest_status.ToString()); + manifest_status.ToString()); } // lost manifest @@ -1374,16 +1591,15 @@ Status VersionSet::ReadCurrentFile(uint64_t tablet, std::string* dscname) { } } if (manifest_set.size() < 1) { - Log(options_->info_log, "[%s] none available manifest file", - dbname_.c_str()); + LEVELDB_LOG(options_->info_log, "[%s] none available manifest file", dbname_.c_str()); ArchiveFile(env_, CurrentFileName(pdbname)); return Status::Corruption("MANIFEST lost and haven't available manifest"); } // select the largest manifest number std::set::reverse_iterator it = manifest_set.rbegin(); *dscname = pdbname + "/" + *it; - Log(options_->info_log, "[%s] use backup manifest: %s", - dbname_.c_str(), dscname->c_str()); + LEVELDB_LOG(options_->info_log, "[%s] use backup manifest: %s", dbname_.c_str(), + dscname->c_str()); return Status::OK(); } @@ -1401,42 +1617,42 @@ Status VersionSet::Recover() { Status s; size_t parent_size = options_->parent_tablets.size(); std::string current; - if (parent_size == 0) { - Log(options_->info_log, "[%s] recover old or create new db.", dbname_.c_str()); + if (parent_size == 0) { + LEVELDB_LOG(options_->info_log, "[%s] recover old or create new db.", dbname_.c_str()); dscname.resize(1); s = ReadCurrentFile(0, &dscname[0]); if (!s.ok()) { - Log(options_->info_log, "[%s] fail to read current.", dbname_.c_str()); + LEVELDB_LOG(options_->info_log, "[%s] fail to read current.", dbname_.c_str()); return s; } } else if (parent_size == 1) { - Log(options_->info_log, "[%s] generated by splitting/merging, parent tablet: %llu", - dbname_.c_str(), static_cast(options_->parent_tablets[0])); + LEVELDB_LOG(options_->info_log, "[%s] generated by splitting/merging, parent tablet: %llu", + dbname_.c_str(), static_cast(options_->parent_tablets[0])); dscname.resize(1); s = ReadCurrentFile(options_->parent_tablets[0], &dscname[0]); if (!s.ok()) { - Log(options_->info_log, "[%s] fail to read current (split/merge): %ld.", - dbname_.c_str(), options_->parent_tablets[0]); + LEVELDB_LOG(options_->info_log, "[%s] fail to read current (split/merge): %ld.", + dbname_.c_str(), options_->parent_tablets[0]); return s; } } else if (parent_size == 2) { - Log(options_->info_log, "[%s] generated by merging, parent tablet: %llu, %llu", - dbname_.c_str(), static_cast(options_->parent_tablets[0]), - static_cast(options_->parent_tablets[1])); + LEVELDB_LOG(options_->info_log, "[%s] generated by merging, parent tablet: %llu, %llu", + dbname_.c_str(), static_cast(options_->parent_tablets[0]), + static_cast(options_->parent_tablets[1])); dscname.resize(2); // read first tablet CURRENT s = ReadCurrentFile(options_->parent_tablets[0], &dscname[0]); if (!s.ok()) { - Log(options_->info_log, "[%s] fail to read current (merge0): %ld.", - dbname_.c_str(), options_->parent_tablets[0]); + LEVELDB_LOG(options_->info_log, "[%s] fail to read current (merge0): %ld.", dbname_.c_str(), + options_->parent_tablets[0]); return s; } // read second tablet CURRENT s = ReadCurrentFile(options_->parent_tablets[1], &dscname[1]); if (!s.ok()) { - Log(options_->info_log, "[%s] fail to read current (merge1): %ld.", - dbname_.c_str(), options_->parent_tablets[1]); + LEVELDB_LOG(options_->info_log, "[%s] fail to read current (merge1): %ld.", dbname_.c_str(), + options_->parent_tablets[1]); return s; } } else { @@ -1450,7 +1666,7 @@ Status VersionSet::Recover() { s = env_->NewSequentialFile(dscname[i], &files[i]); if (!s.ok()) { for (size_t j = 0; j < i; ++j) { - delete files[j]; + delete files[j]; } return s; } @@ -1469,7 +1685,7 @@ Status VersionSet::Recover() { VersionSetBuilder builder(this, current_); LogReporter reporter; reporter.status = &s; - log::Reader reader(files[i], &reporter, true/*checksum*/, 0/*initial_offset*/); + log::Reader reader(files[i], &reporter, true /*checksum*/, 0 /*initial_offset*/); Slice record; std::string scratch; int64_t record_num = 0; @@ -1478,20 +1694,18 @@ Status VersionSet::Recover() { VersionEdit edit; s = edit.DecodeFrom(record); if (s.ok()) { - if (edit.has_comparator_ && - edit.comparator_ != icmp_.user_comparator()->Name()) { - s = Status::InvalidArgument( - edit.comparator_ + " does not match existing comparator ", - icmp_.user_comparator()->Name()); - Log(options_->info_log, "[%s] %s\n", dbname_.c_str(), s.ToString().c_str()); + if (edit.has_comparator_ && edit.comparator_ != icmp_.user_comparator()->Name()) { + s = Status::InvalidArgument(edit.comparator_ + " does not match existing comparator ", + icmp_.user_comparator()->Name()); + LEVELDB_LOG(options_->info_log, "[%s] %s\n", dbname_.c_str(), s.ToString().c_str()); } if (files.size() > 1) { // clear compact_pointers if tablet is generated by merging. edit.compact_pointers_.clear(); } } else { - Log(options_->info_log, "[%s] Decode from manifest %s fail\n", - dbname_.c_str(), dscname[i].c_str()); + LEVELDB_LOG(options_->info_log, "[%s] Decode from manifest %s fail\n", dbname_.c_str(), + dscname[i].c_str()); } if (s.ok()) { @@ -1537,11 +1751,11 @@ Status VersionSet::Recover() { builder.SaveTo(v); Finalize(v); AppendVersion(v); - Log(options_->info_log, "[%s] recover manifest finish: %s\n", - dbname_.c_str(), dscname[i].c_str()); + LEVELDB_LOG(options_->info_log, "[%s] recover manifest finish: %s\n", dbname_.c_str(), + dscname[i].c_str()); } else { - Log(options_->info_log, "[%s] recover manifest fail %s, %s\n", - dbname_.c_str(), dscname[i].c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s] recover manifest fail %s, %s\n", dbname_.c_str(), + dscname[i].c_str(), s.ToString().c_str()); // ArchiveFile(env_, dscname[i]); return Status::Corruption("MANIFEST recover fail: " + s.ToString()); } @@ -1580,21 +1794,19 @@ Status VersionSet::Recover() { FileMetaData* f = files[i]; ModifyFileSize(f); // Debug - Log(options_->info_log, "[%s] recover: %s, level: %d, file_size %lu, data_size %lu, " - "del_p: %lu, check_ttl_ts %lu, ttl_p %lu, s: %d %s, l: %d %s\n", - dbname_.c_str(), - FileNumberDebugString(f->number).c_str(), level, - f->file_size, f->data_size, - f->del_percentage, - f->check_ttl_ts, - f->ttl_percentage, - f->smallest_fake, f->smallest.user_key().ToString().data(), - f->largest_fake, f->largest.user_key().ToString().data()); + LEVELDB_LOG(options_->info_log, + "[%s] recover: %s, level: %d, file_size %lu, data_size %lu, " + "del_p: %lu, check_ttl_ts %lu, ttl_p %lu, s: %d %s, l: %d %s\n", + dbname_.c_str(), FileNumberDebugString(f->number).c_str(), level, f->file_size, + f->data_size, f->del_percentage, f->check_ttl_ts, f->ttl_percentage, + f->smallest_fake, f->smallest.user_key().ToString().data(), f->largest_fake, + f->largest.user_key().ToString().data()); } } - Log(options_->info_log, "[%s] recover finish, key_start: %s, key_end: %s\n", - dbname_.c_str(), db_key_start_.DebugString().data(), db_key_end_.DebugString().data()); + LEVELDB_LOG(options_->info_log, "[%s] recover finish, key_start: %s, key_end: %s\n", + dbname_.c_str(), db_key_start_.DebugString().data(), + db_key_end_.DebugString().data()); return s; } @@ -1609,9 +1821,8 @@ bool VersionSet::ModifyFileSize(FileMetaData* f) { uint64_t s_offset = 0; uint64_t l_offset = f->file_size; Table* tableptr = NULL; - Iterator* iter = - table_cache_->NewIterator(ReadOptions(options_), dbname_, - f->number, f->file_size, "", "", &tableptr); + Iterator* iter = table_cache_->NewIterator(ReadOptions(options_), dbname_, f->number, + f->file_size, "", "", &tableptr); if (tableptr != NULL) { if (f->smallest_fake) { s_offset = tableptr->ApproximateOffsetOf(f->smallest.Encode()); @@ -1620,18 +1831,16 @@ bool VersionSet::ModifyFileSize(FileMetaData* f) { l_offset = tableptr->ApproximateOffsetOf(f->largest.Encode()); } } else { - Log(options_->info_log, "[%s] fail to reset file data_size: %s.\n", - dbname_.c_str(), FileNumberDebugString(f->number).c_str()); + LEVELDB_LOG(options_->info_log, "[%s] fail to reset file data_size: %s.\n", dbname_.c_str(), + FileNumberDebugString(f->number).c_str()); } f->data_size = l_offset - s_offset; - Log(options_->info_log, - "[%s] reset file data_size: %s, from %llu to %llu\n, ", - dbname_.c_str(), - FileNumberDebugString(f->number).c_str(), - static_cast(f->file_size), - static_cast(f->data_size)); + LEVELDB_LOG(options_->info_log, "[%s] reset file data_size: %s, from %llu to %llu\n, ", + dbname_.c_str(), FileNumberDebugString(f->number).c_str(), + static_cast(f->file_size), + static_cast(f->data_size)); delete iter; - } else { // for compatibility, we have not decoded f->data_size from MANIFEST + } else { // for compatibility, we have not decoded f->data_size from MANIFEST f->data_size = f->file_size; } return true; @@ -1650,7 +1859,7 @@ void VersionSet::Finalize(Version* v) { int best_ttl_level = -1; int best_ttl_idx = -1; - int base_level = -1; + int base_level = -1; for (int level = config::kNumLevels - 1; level >= 0; level--) { double score = 0; if (level == 0 && level0_compactions_in_progress_.empty()) { @@ -1669,17 +1878,14 @@ void VersionSet::Finalize(Version* v) { // (3) More level0 files means write hotspot. // We give lower score to avoid too much level0 compaction. if (v->files_[level].size() <= (size_t)options_->slow_down_level0_score_limit) { - score = v->files_[level].size() / - static_cast(config::kL0_CompactionTrigger); + score = v->files_[level].size() / static_cast(config::kL0_CompactionTrigger); } else { - score = sqrt(v->files_[level].size() / - static_cast(config::kL0_CompactionTrigger)); + score = sqrt(v->files_[level].size() / static_cast(config::kL0_CompactionTrigger)); } } else if (level > 0) { // Compute the ratio of current size to size limit. const uint64_t level_bytes = TotalFileSizeNotBeingCompacted(v->files_[level]); - score = static_cast(level_bytes) - / MaxBytesForLevel(level, options_->sst_size); + score = static_cast(level_bytes) / MaxBytesForLevel(level, options_->sst_size); } // locate base level @@ -1689,7 +1895,7 @@ void VersionSet::Finalize(Version* v) { if (level < config::kNumLevels - 1) { v->compaction_level_[level] = level; - v->compaction_score_[level] = (score < 1.0) ? 0: score; + v->compaction_score_[level] = (score < 1.0) ? 0 : score; } for (size_t i = 0; i < v->files_[level].size(); i++) { @@ -1731,26 +1937,23 @@ void VersionSet::Finalize(Version* v) { if (best_del_level >= 0) { v->del_trigger_compact_ = v->files_[best_del_level][best_del_idx]; v->del_trigger_compact_level_ = best_del_level; - Log(options_->info_log, - "[%s] del_strategy(current), level %d, num #%lu, file_size %lu, del_p %lu\n", - dbname_.c_str(), - v->del_trigger_compact_level_, - (v->del_trigger_compact_->number) & 0xffffffff, - v->del_trigger_compact_->file_size, - v->del_trigger_compact_->del_percentage); + LEVELDB_LOG(options_->info_log, + "[%s] del_strategy(current), level %d, num #%lu, file_size " + "%lu, del_p %lu\n", + dbname_.c_str(), v->del_trigger_compact_level_, + (v->del_trigger_compact_->number) & 0xffffffff, v->del_trigger_compact_->file_size, + v->del_trigger_compact_->del_percentage); } if (best_ttl_level >= 0) { v->ttl_trigger_compact_ = v->files_[best_ttl_level][best_ttl_idx]; v->ttl_trigger_compact_level_ = best_ttl_level; - Log(options_->info_log, - "[%s] ttl_strategy(current), level %d, num #%lu, file_size %lu, ttl_p %lu, check_ts %lu\n", - dbname_.c_str(), - v->ttl_trigger_compact_level_, - (v->ttl_trigger_compact_->number) & 0xffffffff, - v->ttl_trigger_compact_->file_size, - v->ttl_trigger_compact_->ttl_percentage, - v->ttl_trigger_compact_->check_ttl_ts); + LEVELDB_LOG(options_->info_log, + "[%s] ttl_strategy(current), level %d, num #%lu, file_size " + "%lu, ttl_p %lu, check_ts %lu\n", + dbname_.c_str(), v->ttl_trigger_compact_level_, + (v->ttl_trigger_compact_->number) & 0xffffffff, v->ttl_trigger_compact_->file_size, + v->ttl_trigger_compact_->ttl_percentage, v->ttl_trigger_compact_->check_ttl_ts); } } @@ -1764,6 +1967,8 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { edit.SetLastSequence(last_sequence_); edit.SetLogNumber(log_number_); edit.SetPrevLogNumber(prev_log_number_); + edit.SetStartKey(db_key_start_.user_key().ToString()); + edit.SetEndKey(db_key_end_.user_key().ToString()); // Save compaction pointers for (int level = 0; level < config::kNumLevels; level++) { @@ -1784,7 +1989,12 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { std::string record; edit.EncodeTo(&record); - return log->AddRecord(record); + + Status s = log->AddRecord(record); + if (s.ok()) { + descriptor_size_ = record.size(); + } + return s; } int VersionSet::NumLevelFiles(int level) const { @@ -1796,14 +2006,10 @@ int VersionSet::NumLevelFiles(int level) const { const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const { // Update code if kNumLevels changes assert(config::kNumLevels == 7); - snprintf(scratch->buffer, sizeof(scratch->buffer), - "files[ %d %d %d %d %d %d %d ]", - int(current_->files_[0].size()), - int(current_->files_[1].size()), - int(current_->files_[2].size()), - int(current_->files_[3].size()), - int(current_->files_[4].size()), - int(current_->files_[5].size()), + snprintf(scratch->buffer, sizeof(scratch->buffer), "files[ %d %d %d %d %d %d %d ]", + int(current_->files_[0].size()), int(current_->files_[1].size()), + int(current_->files_[2].size()), int(current_->files_[3].size()), + int(current_->files_[4].size()), int(current_->files_[5].size()), int(current_->files_[6].size())); return scratch->buffer; } @@ -1811,14 +2017,16 @@ const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const { // Return true iff the manifest contains the specified record. bool VersionSet::ManifestContains(const std::string& record) const { std::string fname = DescriptorFileName(dbname_, manifest_file_number_); - Log(options_->info_log, "[%s] ManifestContains: checking %s\n", dbname_.c_str(), fname.c_str()); + LEVELDB_LOG(options_->info_log, "[%s] ManifestContains: checking %s\n", dbname_.c_str(), + fname.c_str()); SequentialFile* file = NULL; Status s = env_->NewSequentialFile(fname, &file); if (!s.ok()) { - Log(options_->info_log, "[%s] ManifestContains: %s\n", dbname_.c_str(), s.ToString().c_str()); + LEVELDB_LOG(options_->info_log, "[%s] ManifestContains: %s\n", dbname_.c_str(), + s.ToString().c_str()); return false; } - log::Reader reader(file, NULL, true/*checksum*/, 0); + log::Reader reader(file, NULL, true /*checksum*/, 0); Slice r; std::string scratch; bool result = false; @@ -1829,8 +2037,8 @@ bool VersionSet::ManifestContains(const std::string& record) const { } } delete file; - Log(options_->info_log, "[%s] ManifestContains: result = %d\n", - dbname_.c_str(), result ? 1 : 0); + LEVELDB_LOG(options_->info_log, "[%s] ManifestContains: result = %d\n", dbname_.c_str(), + result ? 1 : 0); return result; } @@ -1856,9 +2064,9 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { Table* tableptr; Slice smallest = files[i]->smallest_fake ? files[i]->smallest.Encode() : ""; Slice largest = files[i]->largest_fake ? files[i]->largest.Encode() : ""; - Iterator* iter = table_cache_->NewIterator( - ReadOptions(options_), dbname_, files[i]->number, files[i]->file_size, - smallest, largest, &tableptr); + Iterator* iter = + table_cache_->NewIterator(ReadOptions(options_), dbname_, files[i]->number, + files[i]->file_size, smallest, largest, &tableptr); if (tableptr != NULL) { result += tableptr->ApproximateOffsetOf(ikey.Encode()); } @@ -1870,9 +2078,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { } void VersionSet::AddLiveFiles(std::set* live) { - for (Version* v = dummy_versions_.next_; - v != &dummy_versions_; - v = v->next_) { + for (Version* v = dummy_versions_.next_; v != &dummy_versions_; v = v->next_) { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = v->files_[level]; for (size_t i = 0; i < files.size(); i++) { @@ -1883,9 +2089,7 @@ void VersionSet::AddLiveFiles(std::set* live) { } void VersionSet::AddLiveFiles(std::map* live) { - for (Version* v = dummy_versions_.next_; - v != &dummy_versions_; - v = v->next_) { + for (Version* v = dummy_versions_.next_; v != &dummy_versions_; v = v->next_) { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = v->files_[level]; for (size_t i = 0; i < files.size(); i++) { @@ -1896,9 +2100,7 @@ void VersionSet::AddLiveFiles(std::map* live) { } void VersionSet::AddLiveFilesWithSize(std::map* live) { - for (Version* v = dummy_versions_.next_; - v != &dummy_versions_; - v = v->next_) { + for (Version* v = dummy_versions_.next_; v != &dummy_versions_; v = v->next_) { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = v->files_[level]; for (size_t i = 0; i < files.size(); i++) { @@ -1920,8 +2122,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() { for (int level = 1; level < config::kNumLevels - 1; level++) { for (size_t i = 0; i < current_->files_[level].size(); i++) { const FileMetaData* f = current_->files_[level][i]; - current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest, - &overlaps); + current_->GetOverlappingInputs(level + 1, &f->smallest, &f->largest, &overlaps); const int64_t sum = TotalFileSize(overlaps); if (sum > result) { result = sum; @@ -1934,8 +2135,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() { // Stores the minimal range that covers all entries in inputs in // *smallest, *largest. // REQUIRES: inputs is not empty -void VersionSet::GetRange(const std::vector& inputs, - InternalKey* smallest, +void VersionSet::GetRange(const std::vector& inputs, InternalKey* smallest, InternalKey* largest) { assert(!inputs.empty()); smallest->Clear(); @@ -1960,8 +2160,7 @@ void VersionSet::GetRange(const std::vector& inputs, // in *smallest, *largest. // REQUIRES: inputs is not empty void VersionSet::GetRange2(const std::vector& inputs1, - const std::vector& inputs2, - InternalKey* smallest, + const std::vector& inputs2, InternalKey* smallest, InternalKey* largest) { std::vector all = inputs1; all.insert(all.end(), inputs2.begin(), inputs2.end()); @@ -1970,9 +2169,9 @@ void VersionSet::GetRange2(const std::vector& inputs1, Iterator* VersionSet::MakeInputIterator(Compaction* c) { ReadOptions options; - options.verify_checksums = - options_->paranoid_checks || options_->verify_checksums_in_compaction; + options.verify_checksums = options_->paranoid_checks || options_->verify_checksums_in_compaction; options.fill_cache = false; + options.fill_persistent_cache = false; options.prefetch_scan = true; options.db_opt = options_; @@ -1980,7 +2179,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { // we will make a concatenating iterator per level. // TODO(opt): use concatenating iterator for level-0 if there is no overlap const int space = (c->level() == 0 ? c->inputs_[0].size() + 1 : 2); - Iterator** list = new Iterator*[space]; + Iterator** list = new Iterator* [space]; int num = 0; for (int which = 0; which < 2; which++) { if (!c->inputs_[which].empty()) { @@ -1989,14 +2188,14 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { for (size_t i = 0; i < files.size(); i++) { Slice smallest = files[i]->smallest_fake ? files[i]->smallest.Encode() : ""; Slice largest = files[i]->largest_fake ? files[i]->largest.Encode() : ""; - list[num++] = table_cache_->NewIterator( - options, dbname_, files[i]->number, files[i]->file_size, smallest, largest); + list[num++] = table_cache_->NewIterator(options, dbname_, files[i]->number, + files[i]->file_size, smallest, largest); } } else { // Create concatenating iterator for the files from this level list[num++] = NewTwoLevelIterator( new Version::LevelFileNumIterator(icmp_, &c->inputs_[which], dbname_, options), - &GetFileIterator, table_cache_, options); + GetFileIterator, table_cache_, options); } } } @@ -2017,12 +2216,12 @@ void VersionSet::PrintFilesInCompaction(const std::vector& inputs break; } } - Log(options_->info_log, "[%s] test mark level [%s] bening compact.", dbname_.c_str(), - fstr.c_str()); + LEVELDB_LOG(options_->info_log, "[%s] test mark level [%s] bening compact.", dbname_.c_str(), + fstr.c_str()); return; } -bool VersionSet::FilesInCompaction(const std::vector& inputs) { +bool VersionSet::AreFilesInCompaction(const std::vector& inputs) { for (size_t i = 0; i < inputs.size(); i++) { FileMetaData* f = inputs[i]; if (f->being_compacted) { @@ -2032,7 +2231,8 @@ bool VersionSet::FilesInCompaction(const std::vector& inputs) { return false; } -void VersionSet::PrintRangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level) { +void VersionSet::PrintRangeInCompaction(const InternalKey* smallest, const InternalKey* largest, + int level) { std::vector inputs; assert(level < config::kNumLevels); current_->GetOverlappingInputs(level, smallest, largest, &inputs); @@ -2040,11 +2240,12 @@ void VersionSet::PrintRangeInCompaction(const InternalKey* smallest, const Inter return; } -bool VersionSet::RangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level) { +bool VersionSet::RangeInCompaction(const InternalKey* smallest, const InternalKey* largest, + int level) { std::vector inputs; assert(level < config::kNumLevels); current_->GetOverlappingInputs(level, smallest, largest, &inputs); - return FilesInCompaction(inputs); + return AreFilesInCompaction(inputs); } bool VersionSet::PickFutureCompaction(int level, std::vector* inputs) { @@ -2054,14 +2255,13 @@ bool VersionSet::PickFutureCompaction(int level, std::vector* inp double high_level_score = 0; for (size_t li = 0; li < current_->compaction_score_.size(); li++) { if (current_->compaction_level_[li] == level) { - low_level_score = current_->compaction_score_[li]; + low_level_score = current_->compaction_score_[li]; } else if (current_->compaction_level_[li] == level + 1) { - high_level_score = current_->compaction_score_[li]; + high_level_score = current_->compaction_score_[li]; } } - if (low_level_score < 1.0 || - low_level_score <= high_level_score) { - return false; + if (low_level_score < 1.0 || low_level_score <= high_level_score) { + return false; } // file in level need compaction, pick file in next compaction @@ -2101,7 +2301,8 @@ bool VersionSet::IsOverlapInFileRange(FileMetaData* lf, FileMetaData* f) { icmp_.Compare(f->largest.Encode(), lf->smallest.Encode()) < 0) { return false; } - //Log(options_->info_log, "[%s] file range overlap, lfile #%d, [%s, %s] being_compact %d, " + // LEVELDB_LOG(options_->info_log, "[%s] file range overlap, lfile #%d, [%s, + // %s] being_compact %d, " // "file #%d, [%s, %s] being_compact %d\n", // dbname_.c_str(), // static_cast(lf->number & 0xffffffff), @@ -2125,7 +2326,8 @@ bool VersionSet::PickCompactionBySize(int level, std::vector* inp FileMetaData* low_level_file = NULL; if (low_level_inputs.size() > 0) { low_level_file = low_level_inputs[0]; - //Log(options_->info_log, "[%s] PickCompactionBySize, low_level %d, f[%s, %s] being_compact %d\n", + // LEVELDB_LOG(options_->info_log, "[%s] PickCompactionBySize, low_level %d, + // f[%s, %s] being_compact %d\n", // dbname_.c_str(), level - 1, // low_level_file->smallest.Encode().ToString().c_str(), // low_level_file->largest.Encode().ToString().c_str(), @@ -2138,17 +2340,21 @@ bool VersionSet::PickCompactionBySize(int level, std::vector* inp for (size_t i = 0; i < current_->files_[level].size(); i++) { FileMetaData* f = current_->files_[level][i]; if (f->being_compacted) { - //Log(options_->info_log, "[%s] PickCompactionBySize, level %d, f[%s, %s] being_compact %d\n", + // LEVELDB_LOG(options_->info_log, "[%s] PickCompactionBySize, level %d, + // f[%s, %s] being_compact %d\n", // dbname_.c_str(), level, - // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->smallest.Encode().ToString().c_str(), + // f->largest.Encode().ToString().c_str(), // f->being_compacted); continue; } if (!compact_pointer_[level].empty() && icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) <= 0) { - //Log(options_->info_log, "[%s] PickCompactionBySize, skip by compact_pointer_[%d] %s, f[%s, %s] being_compacted %d\n", + // LEVELDB_LOG(options_->info_log, "[%s] PickCompactionBySize, skip by + // compact_pointer_[%d] %s, f[%s, %s] being_compacted %d\n", // dbname_.c_str(), level, compact_pointer_[level].c_str(), - // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->smallest.Encode().ToString().c_str(), + // f->largest.Encode().ToString().c_str(), // f->being_compacted); if (!RangeInCompaction(&f->smallest, &f->largest, level + 1) && !IsOverlapInFileRange(low_level_file, f)) { @@ -2158,7 +2364,7 @@ bool VersionSet::PickCompactionBySize(int level, std::vector* inp } if (RangeInCompaction(&f->smallest, &f->largest, level + 1) || IsOverlapInFileRange(low_level_file, f)) { - //PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); + // PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); continue; } inputs->push_back(f); @@ -2171,11 +2377,13 @@ bool VersionSet::PickCompactionBySize(int level, std::vector* inp !IsOverlapInFileRange(low_level_file, f)) { inputs->push_back(f); } - //Log(options_->info_log, "[%s] PickCompactBySize, wrap-arroud level %d, f[%s, %s] being_compacted %d\n", + // LEVELDB_LOG(options_->info_log, "[%s] PickCompactBySize, wrap-arroud + // level %d, f[%s, %s] being_compacted %d\n", // dbname_.c_str(), level, - // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->smallest.Encode().ToString().c_str(), + // f->largest.Encode().ToString().c_str(), // f->being_compacted); - //PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); + // PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); } if (inputs->empty() && candidate.size() > 0) { inputs->push_back(candidate[candidate.size() - 1]); @@ -2183,40 +2391,8 @@ bool VersionSet::PickCompactionBySize(int level, std::vector* inp return !inputs->empty(); } -// timeout for micro_second -void VersionSet::CompactionScore(std::vector >* scores) { - uint64_t ts = env_->NowMicros(); - Version* v = current_; - for (size_t i = 0; i < v->compaction_score_.size(); i++) { - if (v->compaction_score_[i] >= 1) { - scores->push_back(std::pair(v->compaction_score_[i], 0)); - } - } - if (v->del_trigger_compact_ != NULL && - !v->del_trigger_compact_->being_compacted && - v->del_trigger_compact_->del_percentage > options_->del_percentage) { - scores->push_back(std::pair( - (double)(v->del_trigger_compact_->del_percentage / 100.0), 0)); - } - if (v->ttl_trigger_compact_ != NULL && - !v->ttl_trigger_compact_->being_compacted && - ts >= v->ttl_trigger_compact_->check_ttl_ts) { - scores->push_back(std::pair( - (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0), 0)); - } - if (v->file_to_compact_ != NULL && - !v->file_to_compact_->being_compacted) { - scores->push_back(std::pair(0.1, 0)); - } - - // delay task - if (v->ttl_trigger_compact_ != NULL && - !v->ttl_trigger_compact_->being_compacted && - ts < v->ttl_trigger_compact_->check_ttl_ts) { - scores->push_back(std::pair( - (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0), - ((v->ttl_trigger_compact_->check_ttl_ts - ts + 1000000) / 1000))); - } +void VersionSet::GetCompactionScores(std::vector>* scores) { + current_->GetCompactionScores(scores); } Compaction* VersionSet::NewSubCompact(Compaction* compact) { @@ -2224,7 +2400,7 @@ Compaction* VersionSet::NewSubCompact(Compaction* compact) { c->output_level_ = compact->output_level_; c->max_output_file_size_ = compact->max_output_file_size_; c->input_version_ = compact->input_version_; - c->input_version_->Ref(); // make sure compacting version will not delete + c->input_version_->Ref(); // make sure compacting version will not delete for (size_t i = 0; i < 2; i++) { for (size_t j = 0; j < compact->inputs_[i].size(); j++) { @@ -2244,28 +2420,21 @@ Compaction* VersionSet::NewSubCompact(Compaction* compact) { return c; } -struct InternalKeyCompare { - InternalKeyCompare(const InternalKeyComparator* cmp) - : icmp(cmp) {} - - InternalKeyCompare(const InternalKeyCompare& key_cmp) - : icmp(key_cmp.icmp) {} - +struct BoundaryComparator { + BoundaryComparator(const Comparator* user_cmp) : ucmp(user_cmp) {} + BoundaryComparator(const BoundaryComparator& key_cmp) = default; + BoundaryComparator& operator=(const BoundaryComparator& key_cmp) = default; // retuen true if a < b - bool operator () (const std::string& ikey_a, const std::string& ikey_b) { - InternalKey ikey1, ikey2; - ikey1.DecodeFrom(ikey_a); - ikey2.DecodeFrom(ikey_b); - bool res = icmp->InternalKeyComparator::Compare(ikey1.Encode(), ikey2.Encode()) < 0; - return res; + bool operator()(const std::string& key_a, const std::string& key_b) { + return ucmp->Compare(Slice(key_a), Slice(key_b)) < 0; } - - const InternalKeyComparator* icmp; + const Comparator* ucmp; }; -uint64_t VersionSet::GetApproximateSizeByLevel(Version* v, int level, const InternalKey& ikey) { +uint64_t VersionSet::GetApproximateSizeForBound(Compaction* compact, int input_index, int level, + const InternalKey& ikey) { uint64_t result = 0; - const std::vector& files = v->files_[level]; + const std::vector& files = compact->inputs_[input_index]; for (size_t i = 0; i < files.size(); i++) { if (icmp_.Compare(files[i]->largest, ikey) <= 0) { // Entire file is before "ikey", so just add the file size @@ -2284,9 +2453,8 @@ uint64_t VersionSet::GetApproximateSizeByLevel(Version* v, int level, const Inte Table* tableptr; Slice smallest = files[i]->smallest_fake ? files[i]->smallest.Encode() : ""; Slice largest = files[i]->largest_fake ? files[i]->largest.Encode() : ""; - Iterator* iter = table_cache_->NewIterator( - ReadOptions(options_), dbname_, files[i]->number, files[i]->file_size, - smallest, largest, &tableptr); + Iterator* iter = table_cache_->NewIterator(ReadOptions(options_), dbname_, files[i]->number, + files[i]->file_size, smallest, largest, &tableptr); if (tableptr != NULL) { result += tableptr->ApproximateOffsetOf(ikey.Encode()); } @@ -2296,7 +2464,7 @@ uint64_t VersionSet::GetApproximateSizeByLevel(Version* v, int level, const Inte return result; } -void VersionSet::GenerateSubCompaction(Compaction* compact, std::vector * compact_vec, +void VersionSet::GenerateSubCompaction(Compaction* compact, std::vector* compact_vec, port::Mutex* mu) { mu->AssertHeld(); if (options_->max_sub_parallel_compaction <= 1) { @@ -2305,35 +2473,89 @@ void VersionSet::GenerateSubCompaction(Compaction* compact, std::vectorcompact_strategy_factory->NewInstance(); + const Comparator* ucmp = strategy->RowKeyComparator(); + if (ucmp == NULL) { + ucmp = icmp_.user_comparator(); + } // generate candidate sub compaction split key - InternalKeyCompare icmp(&icmp_); - std::set boundary(icmp); - for (int i = compact->level_; i < compact->output_level_; i++ ) { + std::set boundary(ucmp); + std::string smallest_bound; + std::string largest_bound; + strategy->ExtractRowKey(compact->smallest_internal_key_.user_key(), &smallest_bound); + strategy->ExtractRowKey(compact->largest_internal_key_.user_key(), &largest_bound); + + // boundary always seek keys in sst files, simply because + // we should find complete row for split sub compaction. + // + // Important Remind : uncomplete row maybe lead to 'Tombstones' lost + // at parallel sub compaction. + // Bad Case Example : + // Sub No.1 Range: [r1:cf1:qu1, ..., r1:cfx:qu1, ..., r2:Tombstones, ...] + // Sub No.2 Range: [r2:cf1:qu1, ... ... ] + // + // No.1 maybe del 'r2:Tombstones' and rubbish kv pairs after it, but at another + // thread No.2 can't discover these del opreators at No.1, and hold 'r2:cf1:qu1' + for (int i = compact->level_; i < compact->output_level_; i++) { for (size_t j = 0; j < compact->inputs_[i - compact->level_].size(); j++) { FileMetaData* f = compact->inputs_[i - compact->level_][j]; - boundary.insert(f->smallest.Encode().ToString()); - boundary.insert(f->largest.Encode().ToString()); + + std::string file_smallest_row_key; + std::string file_largest_row_key; + strategy->ExtractRowKey(f->smallest.user_key(), &file_smallest_row_key); + strategy->ExtractRowKey(f->largest.user_key(), &file_largest_row_key); + boundary.insert(file_smallest_row_key); + boundary.insert(file_largest_row_key); + + LEVELDB_LOG(options_->info_log, + "[%s] sub select : input level file , num #%lu, file_size %lu," + " smallest:[%s -> %s], largest:[%s -> %s]\n", + dbname_.c_str(), (f->number) & 0xffffffff, f->file_size, + f->smallest.DebugString().c_str(), file_smallest_row_key.c_str(), + f->largest.DebugString().c_str(), file_largest_row_key.c_str()); } } - for (size_t j = 1; j < compact->inputs_[compact->output_level_ - compact->level_].size(); j++) { + for (size_t j = 0; j < compact->inputs_[compact->output_level_ - compact->level_].size(); j++) { FileMetaData* f = compact->inputs_[compact->output_level_ - compact->level_][j]; - boundary.insert(f->smallest.Encode().ToString()); + std::string file_smallest_row_key; + std::string file_largest_row_key; + strategy->ExtractRowKey(f->smallest.user_key(), &file_smallest_row_key); + strategy->ExtractRowKey(f->largest.user_key(), &file_largest_row_key); + boundary.insert(file_smallest_row_key); + boundary.insert(file_largest_row_key); + + LEVELDB_LOG(options_->info_log, + "[%s] sub select : output level file , num #%lu, file_size %lu," + " smallest:[%s -> %s], largest:[%s -> %s]\n", + dbname_.c_str(), (f->number) & 0xffffffff, f->file_size, + f->smallest.DebugString().c_str(), file_smallest_row_key.c_str(), + f->largest.DebugString().c_str(), file_largest_row_key.c_str()); } - mu->Unlock(); // generate sub compaction range by output file size uint64_t sum = 0, prev_sum = 0; - std::set::iterator it = boundary.begin(); + std::set::iterator it = boundary.begin(); while (it != boundary.end()) { sum = 0; - InternalKey ikey; - ikey.DecodeFrom(*it); - for (int i = compact->level_; i <= compact->output_level_; i++ ) { - sum += GetApproximateSizeByLevel(compact->input_version_, i, ikey); + const std::string& row_key = *it; + // erase bound which smaller than smallest_bound or greater than largest_bound + if (ucmp->Compare(Slice(row_key), Slice(smallest_bound)) <= 0 || + ucmp->Compare(Slice(row_key), Slice(largest_bound)) >= 0) { + it = boundary.erase(it); + continue; } + InternalKey ikey(Slice(row_key), kMaxSequenceNumber, kValueTypeForSeek); + LEVELDB_LOG(options_->info_log, "[%s] internalkey = %s\n", dbname_.c_str(), + ikey.DebugString().c_str()); + for (int i = compact->level_; i <= compact->output_level_; i++) { + sum += GetApproximateSizeForBound(compact, i - compact->level_, i, ikey); + } + LEVELDB_LOG(options_->info_log, "[%s] sum = %lu, prev_sum = %lu\n", dbname_.c_str(), sum, + prev_sum); assert(sum >= prev_sum); - if (compact->max_output_file_size_ > sum - prev_sum) { + // 0.9 sub compact size almost max_output_file_size_ + if (compact->max_output_file_size_ * 0.9 > sum - prev_sum) { it = boundary.erase(it); } else { ++it; @@ -2361,135 +2583,157 @@ void VersionSet::GenerateSubCompaction(Compaction* compact, std::vectorpush_back(c); } else { - std::set::iterator it = boundary.begin(); + std::set::iterator it = boundary.begin(); std::string prev_key; while (true) { Compaction* c = NewSubCompact(compact); c->sub_compact_start_ = prev_key; - c->sub_compact_end_ = *it; + InternalKey end_ikey(Slice(*it), kMaxSequenceNumber, kValueTypeForSeek); + c->sub_compact_end_ = end_ikey.Encode().ToString(); compact_vec->push_back(c); + std::string start_ikey_str; + if (prev_key == "") { + start_ikey_str = ""; + } else { + InternalKey start_ikey; + start_ikey.DecodeFrom(Slice(c->sub_compact_start_)); + start_ikey_str = start_ikey.DebugString(); + } + LEVELDB_LOG(options_->info_log, "[%s] sub select : range: [%s, %s]\n", dbname_.c_str(), + start_ikey_str.c_str(), end_ikey.DebugString().c_str()); + ++it; prev_key = c->sub_compact_end_; if (it == boundary.end()) { Compaction* c1 = NewSubCompact(compact); c1->sub_compact_start_ = prev_key; compact_vec->push_back(c1); + + InternalKey last_sub_start_ikey; + last_sub_start_ikey.DecodeFrom(Slice(c1->sub_compact_start_)); + LEVELDB_LOG(options_->info_log, "[%s] sub select : range: [%s, ]\n", dbname_.c_str(), + last_sub_start_ikey.DebugString().c_str()); break; } } } + delete strategy; } -Compaction* VersionSet::PickCompaction() { - int level = -1; - std::vector inputs; - bool set_non_trivial = false; - - // We prefer compactions triggered by too much data in a level over - // the compactions triggered by seeks. - const bool size_compaction = (current_->compaction_score_[0] >= 1); - const bool seek_compaction = (current_->file_to_compact_ != NULL); - const bool del_compaction = (current_->del_trigger_compact_ != NULL); - const bool ttl_compaction = (current_->ttl_trigger_compact_ != NULL); - - // check size compaction +void VersionSet::SetupSizeInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial) { assert(level0_compactions_in_progress_.size() <= 1); bool skipped_l0 = false; - for (size_t li = 0; size_compaction && li < current_->compaction_score_.size(); li++) { + int level = -1; + for (size_t li = 0; li < current_->compaction_score_.size(); li++) { double score = current_->compaction_score_[li]; level = current_->compaction_level_[li]; assert(li == 0 || score <= current_->compaction_score_[li - 1]); if (score >= 1) { - assert(level >= 0); - assert(level+1 < config::kNumLevels); + assert(level >= 0 && level < config::kNumLevels - 1); if (skipped_l0 && level <= 1) { // level0 in progress and level 0 will not directly compact to level > 1 - //Log(options_->info_log, "[%s] lock level %d, conflict, score %.2f\n", - // dbname_.c_str(), level, score); continue; } if (level == 0 && !level0_compactions_in_progress_.empty()) { - skipped_l0 = true; - //Log(options_->info_log, "[%s] level %d in progress, conflict, score %.2f\n", - // dbname_.c_str(), level, score); + skipped_l0 = true; continue; } - if (PickCompactionBySize(level, &inputs)) { + if (PickCompactionBySize(level, inputs)) { break; } - //Log(options_->info_log, "[%s] pick level %d, conflict, score %.2f\n", - // dbname_.c_str(), level, score); - } - } - - // check seek compaction - if (inputs.empty() && seek_compaction) { - level = current_->file_to_compact_level_; - assert(level >= 0); - assert(level+1 < config::kNumLevels); - FileMetaData* f = current_->file_to_compact_; - if (!f->being_compacted && - (level > 0 || level0_compactions_in_progress_.empty()) && - !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { - inputs.push_back(f); - } - } - - // check del compaction - if (inputs.empty() && del_compaction) { - // compaction trigger by delete tags percentage; - // TODO: multithread should lock it - level = current_->del_trigger_compact_level_; - assert(level >= 0); - assert(level+1 < config::kNumLevels); - FileMetaData* f = current_->del_trigger_compact_; - if (!f->being_compacted && - (level > 0 || level0_compactions_in_progress_.empty()) && - !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { - inputs.push_back(f); - set_non_trivial = true; - Log(options_->info_log, - "[%s] compact trigger by del stragety, level %d, num #%lu, file_size %lu, del_p %lu\n", - dbname_.c_str(), - current_->del_trigger_compact_level_, - (current_->del_trigger_compact_->number) & 0xffffffff, - current_->del_trigger_compact_->file_size, - current_->del_trigger_compact_->del_percentage); - } - } - - // check ttl compaction - if (inputs.empty() && ttl_compaction) { - // compaction trigger by ttl tags percentage - // TODO: multithread should lock it - level = current_->ttl_trigger_compact_level_; - assert(level >= 0); - FileMetaData* f = current_->ttl_trigger_compact_; - if (!f->being_compacted && - (level > 0 || level0_compactions_in_progress_.empty()) && - (level+1 == config::kNumLevels || !RangeInCompaction(&f->smallest, &f->largest, level + 1))) { - inputs.push_back(f); - set_non_trivial = true; - Log(options_->info_log, - "[%s] compact trigger by ttl stragety, level %d, num #%lu, file_size %lu, ttl_p %lu, check_ts %lu\n", - dbname_.c_str(), - current_->ttl_trigger_compact_level_, - (current_->ttl_trigger_compact_->number) & 0xffffffff, - current_->ttl_trigger_compact_->file_size, - current_->ttl_trigger_compact_->ttl_percentage, - current_->ttl_trigger_compact_->check_ttl_ts); } } + *input_level = level; +} + +void VersionSet::SetupSeekInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial) { + int level = current_->file_to_compact_level_; + assert(level >= 0 && level < config::kNumLevels - 1); + FileMetaData* f = current_->file_to_compact_; + if (!f->being_compacted && (level > 0 || level0_compactions_in_progress_.empty()) && + !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { + inputs->push_back(f); + *input_level = level; + } +} + +void VersionSet::SetupDelInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial) { + int level = current_->del_trigger_compact_level_; + assert(level >= 0 && level < config::kNumLevels - 1); + FileMetaData* f = current_->del_trigger_compact_; + if (!f->being_compacted && (level > 0 || level0_compactions_in_progress_.empty()) && + !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { + inputs->push_back(f); + *non_trivial = true; + *input_level = level; + LEVELDB_LOG(options_->info_log, + "[%s] compact trigger by del stragety, level %d, num #%lu, " + "file_size %lu, del_p %lu\n", + dbname_.c_str(), current_->del_trigger_compact_level_, + (current_->del_trigger_compact_->number) & 0xffffffff, + current_->del_trigger_compact_->file_size, + current_->del_trigger_compact_->del_percentage); + } +} + +void VersionSet::SetupTTLInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial) { + int level = current_->ttl_trigger_compact_level_; + assert(level >= 0 && level < config::kNumLevels); + FileMetaData* f = current_->ttl_trigger_compact_; + if (!f->being_compacted && (level > 0 || level0_compactions_in_progress_.empty()) && + (level + 1 == config::kNumLevels || + !RangeInCompaction(&f->smallest, &f->largest, level + 1))) { + inputs->push_back(f); + *non_trivial = true; + *input_level = level; + LEVELDB_LOG(options_->info_log, + "[%s] compact trigger by ttl stragety, level %d, num #%lu, " + "file_size %lu, ttl_p %lu, check_ts %lu\n", + dbname_.c_str(), current_->ttl_trigger_compact_level_, + (current_->ttl_trigger_compact_->number) & 0xffffffff, + current_->ttl_trigger_compact_->file_size, + current_->ttl_trigger_compact_->ttl_percentage, + current_->ttl_trigger_compact_->check_ttl_ts); + } +} + +Compaction* VersionSet::PickCompaction() { + int level = -1; + std::vector inputs; + bool non_trivial = false; + + const bool size_compaction = (current_->compaction_score_[0] >= 1); + if (size_compaction && inputs.empty()) { + SetupSizeInitialFiles(&level, &inputs, &non_trivial); + } + const bool seek_compaction = (current_->file_to_compact_ != NULL); + if (seek_compaction && inputs.empty()) { + SetupSeekInitialFiles(&level, &inputs, &non_trivial); + } + + const bool del_compaction = (current_->del_trigger_compact_ != NULL); + if (del_compaction && inputs.empty()) { + SetupDelInitialFiles(&level, &inputs, &non_trivial); + } + + const bool ttl_compaction = (current_->ttl_trigger_compact_ != NULL); + if (ttl_compaction && inputs.empty()) { + SetupTTLInitialFiles(&level, &inputs, &non_trivial); + } + if (inputs.empty()) { return NULL; } - assert(inputs.size() == 1); - assert(level >= 0); + assert(level >= 0 && inputs.size() == 1); // Files in level 0 may overlap each other, so pick up all overlapping ones if (level == 0) { - assert(level0_compactions_in_progress_.size() == 0); + assert(level0_compactions_in_progress_.empty()); InternalKey smallest, largest; GetRange(inputs, &smallest, &largest); // Note that the next call will discard the file we placed in @@ -2497,114 +2741,156 @@ Compaction* VersionSet::PickCompaction() { // which will include the picked file. current_->GetOverlappingInputs(level, &smallest, &largest, &inputs); GetRange(inputs, &smallest, &largest); - if (RangeInCompaction(&smallest, &largest, level + 1)) { // make sure level1 not in compaction - Log(options_->info_log, "[%s] level1 in compacting, level0 conflict\n", - dbname_.c_str()); + if (RangeInCompaction(&smallest, &largest, level + 1)) { // make sure level1 not in compaction + LEVELDB_LOG(options_->info_log, "[%s] level1 in compacting, level0 conflict\n", + dbname_.c_str()); return NULL; } assert(!inputs.empty()); - assert(!FilesInCompaction(inputs)); + assert(!AreFilesInCompaction(inputs)); + } + if (ExpandInputsToCleanCut(level, &inputs) == false) { + return NULL; } + int output_level = level < config::kNumLevels - 1 ? level + 1 : level; + std::vector output_level_inputs; + if (!SetupOtherInputs(level, &inputs, output_level, &output_level_inputs)) { + return NULL; + } + if (AreFilesInCompaction(inputs) || AreFilesInCompaction(output_level_inputs)) { + return NULL; + } + // calc new range for final result + InternalKey all_smallest, all_largest; + GetRange2(inputs, output_level_inputs, &all_smallest, &all_largest); - // expand inputs Compaction* c = new Compaction(level); - c->SetNonTrivial(set_non_trivial); + c->SetNonTrivial(non_trivial); c->input_version_ = current_; - c->input_version_->Ref(); // make sure compacting version will not delete - if (level == config::kNumLevels - 1) {// level in last level - c->set_output_level(level); - } + c->input_version_->Ref(); // make sure compacting version will not delete + c->set_output_level(output_level); c->max_output_file_size_ = MaxFileSizeForLevel(c->output_level(), current_->vset_->options_->sst_size); c->inputs_[0] = inputs; - SetupOtherInputs(c); - // tera-specific: calculate the smallest rowkey which overlap with file not - // in this compaction. - SetupCompactionBoundary(c); + c->inputs_[1] = output_level_inputs; + compact_pointer_[level] = all_largest.Encode().ToString(); + c->edit_.SetCompactPointer(level, all_largest); + c->smallest_internal_key_ = all_smallest; + c->largest_internal_key_ = all_largest; + + if (level + 2 < config::kNumLevels) { + SetupGrandparents(level, inputs, output_level_inputs, &c->grandparents_); + } - // mark being compacted + SetupCompactionBoundary(c); c->MarkBeingCompacted(true); if (level == 0) { level0_compactions_in_progress_.push_back(c); } - Finalize(current_); // reculate level score + Finalize(current_); // reculate level score return c; } -void VersionSet::SetupOtherInputs(Compaction* c) { - if (c->level() == c->output_level()) { // self level compaction, should select next level - return; + +bool VersionSet::SetupOtherInputs(int level, std::vector* level_inputs, + int next_level, std::vector* next_level_inputs) { + assert(!level_inputs->empty()); + assert(next_level_inputs->empty()); + + if (next_level == level) { + return true; // same level need not to find other inputs } - const int level = c->level(); + InternalKey smallest, largest; - GetRange(c->inputs_[0], &smallest, &largest); - - current_->GetOverlappingInputs(c->output_level(), &smallest, &largest, &c->inputs_[1]); - - // Get entire range covered by compaction - InternalKey all_start, all_limit; - GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); - - // See if we can grow the number of inputs in "level" without - // changing the number of "level+1" files we pick up. - if (!c->inputs_[1].empty()) { - std::vector expanded0; - current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0); - const int64_t inputs0_size = TotalFileSize(c->inputs_[0]); - const int64_t inputs1_size = TotalFileSize(c->inputs_[1]); - const int64_t expanded0_size = TotalFileSize(expanded0); - if (expanded0.size() > c->inputs_[0].size() && - inputs1_size + expanded0_size < - ExpandedCompactionByteSizeLimit(options_->sst_size)) { - InternalKey new_start, new_limit; - GetRange(expanded0, &new_start, &new_limit); - std::vector expanded1; - current_->GetOverlappingInputs(c->output_level(), &new_start, &new_limit, - &expanded1); - // check expanded file wether in compacting - if ((expanded1.size() == c->inputs_[1].size()) && - !RangeInCompaction(&new_start, &new_limit, level) && - !RangeInCompaction(&new_start, &new_limit, c->output_level())) { - Log(options_->info_log, - "[%s] Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n", - dbname_.c_str(), - level, - int(c->inputs_[0].size()), - int(c->inputs_[1].size()), - long(inputs0_size), long(inputs1_size), - int(expanded0.size()), - int(expanded1.size()), - long(expanded0_size), long(inputs1_size)); - smallest = new_start; - largest = new_limit; - c->inputs_[0] = expanded0; - c->inputs_[1] = expanded1; - GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); - } - } - } - - // Compute the set of grandparent files that overlap this compaction - // (parent == level+1; grandparent == level+2) - if (c->output_level() + 1 < config::kNumLevels) { - current_->GetOverlappingInputs(c->output_level() + 1, &all_start, &all_limit, - &c->grandparents_); - } - - if (false) { - Log(options_->info_log, "[%s] Compacting %d '%s' .. '%s'", - dbname_.c_str(), - level, - smallest.DebugString().c_str(), - largest.DebugString().c_str()); + + // Step1: Expand next level + GetRange(*level_inputs, &smallest, &largest); + current_->GetOverlappingInputs(next_level, &smallest, &largest, next_level_inputs); + if (AreFilesInCompaction(*next_level_inputs)) { + return false; + } + if (!next_level_inputs->empty()) { + if (!ExpandInputsToCleanCut(next_level, next_level_inputs)) { + return false; + } } + // After expanded next level, we can get 4 cases, only case(3) or case(4) need + // to try expand level. detail case parttens: + // case(1.0) level [ ] + // next_level nothing founded + // + // case(1.1) level [ ] + // next_level [ ] found smaller range at next level + // + // case(1.2) level [ ] + // next_level [ ] found same range at next level + // + // case(1.3) level [ ] + // next_level [ ] found part overlap range at next level + // + // case(1.4) level [ ] + // next_level [ ] found bigger range at next level + + // Step2: Try to expand level again + if (!next_level_inputs->empty()) { + const uint64_t next_level_inputs_size = TotalFileSize(*next_level_inputs); + // case(2.1) level [ ] + // next_level [ ] + // all range [ ] found new start or limit + // + // + // case(2.2) level [ ] + // next_level [ ] + // all range [ ] nothing need to change + InternalKey all_start, all_limit; + GetRange2(*level_inputs, *next_level_inputs, &all_start, &all_limit); + + std::vector level_expanded_inputs; + + bool try_expand_level_inputs = true; + current_->GetOverlappingInputs(level, &all_start, &all_limit, &level_expanded_inputs); + uint64_t level_expanded_inputs_size = 0; + if (!ExpandInputsToCleanCut(level, &level_expanded_inputs)) { + try_expand_level_inputs = false; + } else { + level_expanded_inputs_size = TotalFileSize(level_expanded_inputs); + } - // Update the place where we will do the next compaction for this level. - // We update this immediately instead of waiting for the VersionEdit - // to be applied so that if the compaction fails, we will try a different - // key range next time. - compact_pointer_[level] = largest.Encode().ToString(); - c->edit_.SetCompactPointer(level, largest); - return; + bool expand_level = false; + const uint64_t limit = ExpandedCompactionByteSizeLimit(options_->sst_size); + if (try_expand_level_inputs // expended level inputs all not being + // compaction + && + level_expanded_inputs.size() > level_inputs->size() // expanded at level + && next_level_inputs_size + level_expanded_inputs_size < limit && + !AreFilesInCompaction(level_expanded_inputs)) { + InternalKey new_start, new_limit; + GetRange(level_expanded_inputs, &new_start, &new_limit); + std::vector next_level_expanded_inputs; + current_->GetOverlappingInputs(next_level, &new_start, &new_limit, + &next_level_expanded_inputs); + assert(!next_level_expanded_inputs.empty()); + if (!AreFilesInCompaction(next_level_expanded_inputs) && + ExpandInputsToCleanCut(next_level, &next_level_expanded_inputs) && + next_level_expanded_inputs.size() == next_level_inputs->size()) { + expand_level = true; + } + } + if (!expand_level) { + current_->GetCleanCutInputsWithinInterval(level, &all_start, &all_limit, + &level_expanded_inputs); + level_expanded_inputs_size = TotalFileSize(level_expanded_inputs); + if (level_expanded_inputs_size + next_level_inputs_size < limit && + level_expanded_inputs.size() > level_inputs->size() && + !AreFilesInCompaction(level_expanded_inputs)) { + expand_level = true; + } + } + if (expand_level) { + // print debug info + *level_inputs = level_expanded_inputs; + } + } + return true; } void VersionSet::SetupCompactionBoundary(Compaction* c) { @@ -2636,11 +2922,9 @@ void VersionSet::SetupCompactionBoundary(Compaction* c) { return; } -Compaction* VersionSet::CompactRange( - int level, - const InternalKey* begin, - const InternalKey* end, bool* being_compacted) { - *being_compacted = false; +Compaction* VersionSet::CompactRange(int level, const InternalKey* begin, const InternalKey* end, + bool* manual_conflict) { + *manual_conflict = false; std::vector inputs; current_->GetOverlappingInputs(level, begin, end, &inputs); if (inputs.empty()) { @@ -2649,7 +2933,7 @@ Compaction* VersionSet::CompactRange( // check level0 wether in compaction if (level == 0 && !level0_compactions_in_progress_.empty()) { - *being_compacted = true; + *manual_conflict = true; return NULL; } @@ -2658,10 +2942,9 @@ Compaction* VersionSet::CompactRange( // and we must not pick one file and drop another older file if the // two files overlap. if (level > 0) { - const uint64_t limit = - MaxFileSizeForLevel(level, current_->vset_->options_->sst_size); + const uint64_t limit = MaxFileSizeForLevel(level, current_->vset_->options_->sst_size); uint64_t total = 0; - for (size_t i = 0; i < inputs.size(); i++) { + for (size_t i = 0; i + 1 < inputs.size(); ++i) { uint64_t s = inputs[i]->file_size; total += s; if (total >= limit) { @@ -2671,35 +2954,52 @@ Compaction* VersionSet::CompactRange( } } - // check being compacting - InternalKey smallest, largest; - GetRange(inputs, &smallest, &largest); - if (FilesInCompaction(inputs) || RangeInCompaction(&smallest, &largest, level + 1)) { - PrintFilesInCompaction(inputs); - PrintRangeInCompaction(&smallest, &largest, level + 1); - Log(options_->info_log, "[%s] RangeCompaction : %s...%s, level: %d or %d, in compaction", - dbname_.c_str(), smallest.DebugString().c_str(), largest.DebugString().c_str(), level, level + 1); - *being_compacted = true; + if (!ExpandInputsToCleanCut(level, &inputs)) { + *manual_conflict = true; return NULL; } + int output_level = level < config::kNumLevels - 1 ? level + 1 : level; + std::vector output_level_inputs; + if (!SetupOtherInputs(level, &inputs, output_level, &output_level_inputs)) { + *manual_conflict = true; + return NULL; + } + if (AreFilesInCompaction(inputs) || AreFilesInCompaction(output_level_inputs)) { + *manual_conflict = true; + return NULL; + } + InternalKey all_smallest, all_largest; + GetRange2(inputs, output_level_inputs, &all_smallest, &all_largest); + + std::vector grandparents; + Compaction* c = new Compaction(level); c->input_version_ = current_; c->input_version_->Ref(); c->max_output_file_size_ = - MaxFileSizeForLevel(c->output_level(), current_->vset_->options_->sst_size); + MaxFileSizeForLevel(c->output_level(), current_->vset_->options_->sst_size); c->inputs_[0] = inputs; - SetupOtherInputs(c); - // tera-specific: calculate the smallest rowkey which overlap with file not - // in this compaction. - SetupCompactionBoundary(c); + c->inputs_[1] = output_level_inputs; + // Update the place where we will do the next compaction for this level. + // We update this immediately instead of waiting for the VersionEdit + // to be applied so that if the compaction fails, we will try a different + // key range next time. + compact_pointer_[level] = all_largest.Encode().ToString(); + c->edit_.SetCompactPointer(level, all_largest); + if (level + 2 < config::kNumLevels) { + SetupGrandparents(level, inputs, output_level_inputs, &c->grandparents_); + } - // mark being compacted + c->smallest_internal_key_ = all_smallest; + c->largest_internal_key_ = all_largest; + + SetupCompactionBoundary(c); c->MarkBeingCompacted(true); if (level == 0) { level0_compactions_in_progress_.push_back(c); } - Finalize(current_); // reculate level score + Finalize(current_); // reculate level score return c; } @@ -2735,9 +3035,7 @@ Compaction::~Compaction() { } } -void Compaction::SetNonTrivial(bool non_trivial) { - force_non_trivial_ = non_trivial; -} +void Compaction::SetNonTrivial(bool non_trivial) { force_non_trivial_ = non_trivial; } bool Compaction::IsTrivialMove() const { if (force_non_trivial_) { return false; @@ -2745,10 +3043,8 @@ bool Compaction::IsTrivialMove() const { // Avoid a move if there is lots of overlapping grandparent data. // Otherwise, the move could create a parent file that will require // a very expensive merge later on. - return (num_input_files(0) == 1 && - num_input_files(1) == 0 && - (TotalFileSize(grandparents_) <= - MaxGrandParentOverlapBytes(max_output_file_size_))); + return (num_input_files(0) == 1 && num_input_files(1) == 0 && + (TotalFileSize(grandparents_) <= MaxGrandParentOverlapBytes(max_output_file_size_))); } void Compaction::AddInputDeletions(VersionEdit* edit) { @@ -2764,7 +3060,7 @@ bool Compaction::IsBaseLevelForKey(const Slice& user_key) { const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator(); for (int lvl = output_level_ + 1; lvl < config::kNumLevels; lvl++) { const std::vector& files = input_version_->files_[lvl]; - for (; level_ptrs_[lvl] < files.size(); ) { + for (; level_ptrs_[lvl] < files.size();) { FileMetaData* f = files[level_ptrs_[lvl]]; if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) { // We've advanced far enough @@ -2784,8 +3080,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) { // Scan to find earliest grandparent file that contains key. const InternalKeyComparator* icmp = &input_version_->vset_->icmp_; while (grandparent_index_ < grandparents_.size() && - icmp->Compare(internal_key, - grandparents_[grandparent_index_]->largest.Encode()) > 0) { + icmp->Compare(internal_key, grandparents_[grandparent_index_]->largest.Encode()) > 0) { if (seen_key_) { overlapped_bytes_ += grandparents_[grandparent_index_]->file_size; } @@ -2805,8 +3100,7 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) { void Compaction::MarkBeingCompacted(bool flag) { for (size_t i = 0; i < 2; i++) { for (size_t j = 0; j < inputs_[i].size(); j++) { - assert(flag ? !inputs_[i][j]->being_compacted - : inputs_[i][j]->being_compacted); + assert(flag ? !inputs_[i][j]->being_compacted : inputs_[i][j]->being_compacted); inputs_[i][j]->being_compacted = flag; } } diff --git a/src/leveldb/db/version_set.h b/src/leveldb/db/version_set.h index e5d4aa6a1..80c21010e 100644 --- a/src/leveldb/db/version_set.h +++ b/src/leveldb/db/version_set.h @@ -31,7 +31,9 @@ namespace leveldb { -namespace log { class Writer; } +namespace log { +class Writer; +} class Compaction; class Iterator; @@ -46,8 +48,7 @@ class WritableFile; // Return the smallest index i such that files[i]->largest >= key. // Return files.size() if there is no such file. // REQUIRES: "files" contains a sorted list of non-overlapping files. -extern int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, +extern int FindFile(const InternalKeyComparator& icmp, const std::vector& files, const Slice& key); // Returns true iff some file in "files" overlaps the user key range @@ -56,13 +57,10 @@ extern int FindFile(const InternalKeyComparator& icmp, // largest==NULL represents a key largest than all keys in the DB. // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges // in sorted order. -extern bool SomeFileOverlapsRange( - const InternalKeyComparator& icmp, - const Comparator* ucmp, - bool disjoint_sorted_files, - const std::vector& files, - const Slice* smallest_user_key, - const Slice* largest_user_key); +extern bool SomeFileOverlapsRange(const InternalKeyComparator& icmp, const Comparator* ucmp, + bool disjoint_sorted_files, + const std::vector& files, + const Slice* smallest_user_key, const Slice* largest_user_key); class Version { public: @@ -78,8 +76,7 @@ class Version { FileMetaData* seek_file; int seek_file_level; }; - Status Get(const ReadOptions&, const LookupKey& key, std::string* val, - GetStats* stats); + Status Get(const ReadOptions&, const LookupKey& key, std::string* val, GetStats* stats); // Adds "stats" into the current state. Returns true if a new // compaction may need to be triggered, false otherwise. @@ -91,24 +88,39 @@ class Version { void Ref(); void Unref(); - void GetOverlappingInputs( - int level, - const InternalKey* begin, // NULL means before all keys - const InternalKey* end, // NULL means after all keys - std::vector* inputs); + // TODO by baorenyi add comment + void ExpendInputsToCleanCut(int level, std::vector* inputs); + + void GetCleanCutInputsWithinInterval(int level, const InternalKey* begin, const InternalKey* end, + std::vector* inputs); + + void BinarySearchOverlappingInputs(int level, const Slice& user_begin, const Slice& user_end, + bool within_interval, std::vector* inputs); + + void ExtendRangeOverlappingInterval(int level, const Comparator* ucmp, const Slice& user_begin, + const Slice& user_end, unsigned int mid_index, + int* start_index, int* end_index); + + void ExtendRangeWithinInterval(int level, const Comparator* ucmp, const Slice& user_begin, + const Slice& user_end, unsigned int mid_index, int* start_index, + int* end_index); + + void GetCompactionScores(std::vector>* scores) const; + + void GetOverlappingInputs(int level, + const InternalKey* begin, // NULL means before all keys + const InternalKey* end, // NULL means after all keys + std::vector* inputs); // Returns true iff some file in the specified level overlaps // some part of [*smallest_user_key,*largest_user_key]. // smallest_user_key==NULL represents a key smaller than all keys in the DB. // largest_user_key==NULL represents a key largest than all keys in the DB. - bool OverlapInLevel(int level, - const Slice* smallest_user_key, - const Slice* largest_user_key); + bool OverlapInLevel(int level, const Slice* smallest_user_key, const Slice* largest_user_key); // Return the level at which we should place a new memtable compaction // result that covers the range [smallest_user_key,largest_user_key]. - int PickLevelForMemTableOutput(const Slice& smallest_user_key, - const Slice& largest_user_key); + int PickLevelForMemTableOutput(const Slice& smallest_user_key, const Slice& largest_user_key); int NumFiles(int level) const { return files_[level].size(); } @@ -120,6 +132,9 @@ class Version { // Return a human readable string that describes this version's contents. std::string DebugString() const; + void DEBUG_pick(const std::string& msg, int level, const std::vector& inputs, + const InternalKey& smallest, const InternalKey& largest) const; + private: friend class Compaction; friend class VersionSet; @@ -128,10 +143,10 @@ class Version { class LevelFileNumIterator; Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; - VersionSet* vset_; // VersionSet to which this Version belongs - Version* next_; // Next version in linked list - Version* prev_; // Previous version in linked list - int refs_; // Number of live refs to this version + VersionSet* vset_; // VersionSet to which this Version belongs + Version* next_; // Next version in linked list + Version* prev_; // Previous version in linked list + int refs_; // Number of live refs to this version // List of files per level std::vector files_[config::kNumLevels]; @@ -154,7 +169,10 @@ class Version { std::vector compaction_level_; explicit Version(VersionSet* vset) - : vset_(vset), next_(this), prev_(this), refs_(0), + : vset_(vset), + next_(this), + prev_(this), + refs_(0), file_to_compact_(NULL), file_to_compact_level_(-1), ttl_trigger_compact_(NULL), @@ -178,9 +196,7 @@ class Version { class VersionSet { public: - VersionSet(const std::string& dbname, - const Options* options, - TableCache* table_cache, + VersionSet(const std::string& dbname, const Options* options, TableCache* table_cache, const InternalKeyComparator*); ~VersionSet(); @@ -189,15 +205,13 @@ class VersionSet { // current version. Will release *mu while actually writing to the file. // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() - void LogAndApplyHelper(VersionSetBuilder* builder, - VersionEdit* edit); - Status LogAndApply(VersionEdit* edit, port::Mutex* mu) - EXCLUSIVE_LOCKS_REQUIRED(mu); + void LogAndApplyHelper(VersionSetBuilder* builder, VersionEdit* edit); + Status LogAndApply(VersionEdit* edit, port::Mutex* mu) EXCLUSIVE_LOCKS_REQUIRED(mu); // Recover the last saved descriptor from persistent storage. Status Recover(); - void GetCurrentLevelSize(std::vector *); + void GetCurrentLevelSize(std::vector*); // Return the current version. Version* current() const { return current_; } @@ -243,7 +257,7 @@ class VersionSet { uint64_t PrevLogNumber() const { return prev_log_number_; } // - void CompactionScore(std::vector >* scores); + void GetCompactionScores(std::vector>* scores); // Pick level and inputs for a new compaction. // Returns NULL if there is no compaction to be done. // Otherwise returns a pointer to a heap-allocated object that @@ -254,10 +268,8 @@ class VersionSet { // the specified level. Returns NULL if there is nothing in that // level that overlaps the specified range. Caller should delete // the result. - Compaction* CompactRange( - int level, - const InternalKey* begin, - const InternalKey* end, bool* being_compacted); + Compaction* CompactRange(int level, const InternalKey* begin, const InternalKey* end, + bool* manual_conflict); // release file's being_compacted flag, and release level0's lock void ReleaseCompaction(Compaction* c, Status& s); @@ -287,8 +299,9 @@ class VersionSet { }; const char* LevelSummary(LevelSummaryStorage* scratch) const; - void GenerateSubCompaction(Compaction* compact, std::vector * compact_vec, + void GenerateSubCompaction(Compaction* compact, std::vector* compact_vec, port::Mutex* mu); + void MaybeSwitchManifest(); private: friend class Compaction; @@ -297,20 +310,48 @@ class VersionSet { struct ManifestWriter; Compaction* NewSubCompact(Compaction* compact); - uint64_t GetApproximateSizeByLevel(Version* v, int level, const InternalKey& ikey); + uint64_t GetApproximateSizeForBound(Compaction* compact, int input_index, int level, + const InternalKey& ikey); void Finalize(Version* v); - void GetRange(const std::vector& inputs, - InternalKey* smallest, + void GetRange(const std::vector& inputs, InternalKey* smallest, InternalKey* largest); void GetRange2(const std::vector& inputs1, - const std::vector& inputs2, - InternalKey* smallest, + const std::vector& inputs2, InternalKey* smallest, InternalKey* largest); - void SetupOtherInputs(Compaction* c); + bool ExpandInputsToCleanCut(int level, std::vector* inputs); + + // Find files at 'next_level' that overlaping with the files at 'level', the + // 'next_level' must equals 'level' + 1. The overlap define by user_compartor + // (1) it is compare use user_key in Leveldb (Tera: kv/ttlkv) + // (2) it is compare use row_key in Tera: table + // All files in 'next_level_inputs' that we found must be clean cut by + // row_key. + // If one of the files that we found is being_compacted -- return false + // TODO by baorenyi this function only use two args struct + // CompactionLevelInputs + bool SetupOtherInputs(int level, std::vector* level_inputs, int next_level, + std::vector* next_level_inputs); + + void SetupSizeInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial); + + void SetupSeekInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial); + + void SetupDelInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial); + + void SetupTTLInitialFiles(int* input_level, std::vector* inputs, + bool* non_trivial); + + void SetupGrandparents(int level, const std::vector& inputs, + const std::vector& output_inputs, + std::vector* grandparents); + void SetupCompactionBoundary(Compaction* c); // Save current contents to *log @@ -326,7 +367,7 @@ class VersionSet { // milti thread compaction relatively void PrintFilesInCompaction(const std::vector& inputs); - bool FilesInCompaction(const std::vector& inputs); + bool AreFilesInCompaction(const std::vector& inputs); void PrintRangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level); bool RangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level); bool IsOverlapInFileRange(FileMetaData* lf, FileMetaData* f); @@ -348,6 +389,7 @@ class VersionSet { uint64_t last_sequence_; uint64_t log_number_; uint64_t prev_log_number_; // 0 or backing store for memtable being compacted + uint32_t descriptor_size_; std::deque manifest_writers_; @@ -366,7 +408,6 @@ class VersionSet { // No copying allowed VersionSet(const VersionSet&); void operator=(const VersionSet&); - }; // A Compaction encapsulates information about a compaction. @@ -377,7 +418,7 @@ class Compaction { // Return the level that is being compacted. Inputs from "level" // and "level+1" will be merged to produce a set of "level+1" files. int level() const { return level_; } - void set_output_level(int output_level) {output_level_ = output_level;} + void set_output_level(int output_level) { output_level_ = output_level; } int output_level() const { return output_level_; } // Return the object that holds the edits to the descriptor done @@ -417,9 +458,7 @@ class Compaction { void ReleaseInputs(); std::string drop_lower_bound() const { return drop_lower_bound_; } - void set_drop_lower_bound(const std::string& lower_bound) { - drop_lower_bound_ = lower_bound; - } + void set_drop_lower_bound(const std::string& lower_bound) { drop_lower_bound_ = lower_bound; } private: friend class Version; @@ -429,13 +468,14 @@ class Compaction { explicit Compaction(int level); int level_; - int output_level_; // compact ouputfile should step into output_level_, use for self level compaction + int output_level_; // compact ouputfile should step into output_level_, use + // for self level compaction uint64_t max_output_file_size_; Version* input_version_; VersionEdit edit_; // Each compaction reads inputs from "level_" and "level_+1" - std::vector inputs_[2]; // The two sets of inputs + std::vector inputs_[2]; // The two sets of inputs // State used to check for number of of overlapping grandparent files // (parent == output_level_ + 1, grandparent == output_level_ + 2) @@ -462,9 +502,12 @@ class Compaction { // support self compaction bool force_non_trivial_; + InternalKey smallest_internal_key_; + InternalKey largest_internal_key_; + // support parallel compaction - std::string sub_compact_start_; // own by child - std::string sub_compact_end_; // own by child + std::string sub_compact_start_; // own by child + std::string sub_compact_end_; // own by child }; } // namespace leveldb diff --git a/src/leveldb/db/version_set_test.cc b/src/leveldb/db/version_set_test.cc index 4292ab0e7..392b8dfb1 100644 --- a/src/leveldb/db/version_set_test.cc +++ b/src/leveldb/db/version_set_test.cc @@ -14,7 +14,13 @@ #include "util/logging.h" #include "util/testharness.h" #include "util/testutil.h" +#include "db/table_cache.h" +#include "leveldb/status.h" #include "leveldb/compact_strategy.h" +#include "io/default_compact_strategy.h" +#include "io/ttlkv_compact_strategy.h" +#include "proto/test_helper.h" +#include namespace leveldb { @@ -23,7 +29,7 @@ class FindFileTest { std::vector files_; bool disjoint_sorted_files_; - FindFileTest() : disjoint_sorted_files_(true) { } + FindFileTest() : disjoint_sorted_files_(true) {} ~FindFileTest() { for (size_t i = 0; i < files_.size(); i++) { @@ -31,8 +37,7 @@ class FindFileTest { } } - void Add(const char* smallest, const char* largest, - SequenceNumber smallest_seq = 100, + void Add(const char* smallest, const char* largest, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100) { FileMetaData* f = new FileMetaData; f->number = files_.size() + 1; @@ -52,17 +57,16 @@ class FindFileTest { Slice s(smallest != NULL ? smallest : ""); Slice l(largest != NULL ? largest : ""); return SomeFileOverlapsRange(cmp, cmp.user_comparator(), disjoint_sorted_files_, files_, - (smallest != NULL ? &s : NULL), - (largest != NULL ? &l : NULL)); + (smallest != NULL ? &s : NULL), (largest != NULL ? &l : NULL)); } }; TEST(FindFileTest, Empty) { ASSERT_EQ(0, Find("foo")); - ASSERT_TRUE(! Overlaps("a", "z")); - ASSERT_TRUE(! Overlaps(NULL, "z")); - ASSERT_TRUE(! Overlaps("a", NULL)); - ASSERT_TRUE(! Overlaps(NULL, NULL)); + ASSERT_TRUE(!Overlaps("a", "z")); + ASSERT_TRUE(!Overlaps(NULL, "z")); + ASSERT_TRUE(!Overlaps("a", NULL)); + ASSERT_TRUE(!Overlaps(NULL, NULL)); } TEST(FindFileTest, Single) { @@ -74,8 +78,8 @@ TEST(FindFileTest, Single) { ASSERT_EQ(1, Find("q1")); ASSERT_EQ(1, Find("z")); - ASSERT_TRUE(! Overlaps("a", "b")); - ASSERT_TRUE(! Overlaps("z1", "z2")); + ASSERT_TRUE(!Overlaps("a", "b")); + ASSERT_TRUE(!Overlaps("z1", "z2")); ASSERT_TRUE(Overlaps("a", "p")); ASSERT_TRUE(Overlaps("a", "q")); ASSERT_TRUE(Overlaps("a", "z")); @@ -87,8 +91,8 @@ TEST(FindFileTest, Single) { ASSERT_TRUE(Overlaps("q", "q")); ASSERT_TRUE(Overlaps("q", "q1")); - ASSERT_TRUE(! Overlaps(NULL, "j")); - ASSERT_TRUE(! Overlaps("r", NULL)); + ASSERT_TRUE(!Overlaps(NULL, "j")); + ASSERT_TRUE(!Overlaps("r", NULL)); ASSERT_TRUE(Overlaps(NULL, "p")); ASSERT_TRUE(Overlaps(NULL, "p1")); ASSERT_TRUE(Overlaps("q", NULL)); @@ -118,10 +122,10 @@ TEST(FindFileTest, Multiple) { ASSERT_EQ(3, Find("450")); ASSERT_EQ(4, Find("451")); - ASSERT_TRUE(! Overlaps("100", "149")); - ASSERT_TRUE(! Overlaps("251", "299")); - ASSERT_TRUE(! Overlaps("451", "500")); - ASSERT_TRUE(! Overlaps("351", "399")); + ASSERT_TRUE(!Overlaps("100", "149")); + ASSERT_TRUE(!Overlaps("251", "299")); + ASSERT_TRUE(!Overlaps("451", "500")); + ASSERT_TRUE(!Overlaps("351", "399")); ASSERT_TRUE(Overlaps("100", "150")); ASSERT_TRUE(Overlaps("100", "200")); @@ -138,8 +142,8 @@ TEST(FindFileTest, MultipleNullBoundaries) { Add("200", "250"); Add("300", "350"); Add("400", "450"); - ASSERT_TRUE(! Overlaps(NULL, "149")); - ASSERT_TRUE(! Overlaps("451", NULL)); + ASSERT_TRUE(!Overlaps(NULL, "149")); + ASSERT_TRUE(!Overlaps("451", NULL)); ASSERT_TRUE(Overlaps(NULL, NULL)); ASSERT_TRUE(Overlaps(NULL, "150")); ASSERT_TRUE(Overlaps(NULL, "199")); @@ -155,8 +159,8 @@ TEST(FindFileTest, MultipleNullBoundaries) { TEST(FindFileTest, OverlapSequenceChecks) { Add("200", "200", 5000, 3000); - ASSERT_TRUE(! Overlaps("199", "199")); - ASSERT_TRUE(! Overlaps("201", "300")); + ASSERT_TRUE(!Overlaps("199", "199")); + ASSERT_TRUE(!Overlaps("201", "300")); ASSERT_TRUE(Overlaps("200", "200")); ASSERT_TRUE(Overlaps("190", "200")); ASSERT_TRUE(Overlaps("200", "210")); @@ -166,8 +170,8 @@ TEST(FindFileTest, OverlappingFiles) { Add("150", "600"); Add("400", "500"); disjoint_sorted_files_ = false; - ASSERT_TRUE(! Overlaps("100", "149")); - ASSERT_TRUE(! Overlaps("601", "700")); + ASSERT_TRUE(!Overlaps("100", "149")); + ASSERT_TRUE(!Overlaps("601", "700")); ASSERT_TRUE(Overlaps("100", "150")); ASSERT_TRUE(Overlaps("100", "200")); ASSERT_TRUE(Overlaps("100", "300")); @@ -181,37 +185,103 @@ TEST(FindFileTest, OverlappingFiles) { } class VersionSetTest { -public: - VersionSetTest () - : icmp(opt.comparator), - t_log_number(10), - t_next_file(20), - t_last_seq(100) { - opt.compact_strategy_factory = new DummyCompactStrategyFactory(); - opt.env->DeleteDirRecursive("/tmp/db/test"); - opt.env->CreateDir("/tmp/db/test"); - t_vset = new VersionSet(std::string("/tmp/db/test"), &opt, NULL, &icmp); - t_vset->manifest_file_number_ = 100; + public: + VersionSetTest() : icmp(opt.comparator), t_log_number(10), t_next_file(20), t_last_seq(100) { + Logger* logger; + Env::Default()->NewLogger("/tmp/db_test.log", LogOption::LogOptionBuilder().Build(), &logger); + Env::Default()->SetLogger(logger); + opt.info_log = logger; + opt.compact_strategy_factory = new DummyCompactStrategyFactory(); + opt.env->DeleteDirRecursive("/tmp/db/test"); + opt.env->CreateDir("/tmp/db/test"); + t_vset = new VersionSet(std::string("/tmp/db/test"), &opt, new TableCache(10240), &icmp); + t_vset->manifest_file_number_ = 100; + } + + tera::TableSchema SetTableSchema() { + tera::TableSchema table_s = tera::DefaultTableSchema(); + table_s.set_raw_key(raw_type); + table_schema = table_s; + return table_schema; + } + + struct LightFileMeta { + int level; + InternalKey smallest; + InternalKey largest; + uint64_t file_size; + }; + + LightFileMeta BuildLightFileMeta(int level, uint64_t file_size, const std::string& row_key, + const std::string& cf, const std::string& qu, int64_t ts, + TeraKeyType type, const std::string& row_key1, + const std::string& cf1, const std::string& qu1, int64_t ts1, + TeraKeyType type1) { + const leveldb::RawKeyOperator* opt = tera::GetRawKeyOperatorFromSchema(table_schema); + std::string begin_key, end_key; + opt->EncodeTeraKey(row_key, cf, qu, ts, type, &begin_key); + opt->EncodeTeraKey(row_key1, cf1, qu1, ts1, type1, &end_key); + InternalKey smallest(begin_key, 1, kTypeValue); + InternalKey largest(end_key, 1, kTypeValue); + LightFileMeta l; + l.level = level; + l.file_size = file_size; + l.smallest = smallest; + l.largest = largest; + return l; + } + + InternalKey BuildInternalKey(const std::string& row_key, const std::string& cf, + const std::string& qu, int64_t ts, TeraKeyType type) { + const leveldb::RawKeyOperator* opt = tera::GetRawKeyOperatorFromSchema(table_schema); + std::string begin_key; + opt->EncodeTeraKey(row_key, cf, qu, ts, type, &begin_key); + InternalKey smallest(begin_key, 1, kTypeValue); + return smallest; + } + + std::string GetInternalKeyStr0(const Slice& user_key, SequenceNumber s, ValueType t) { + InternalKey ikey(user_key, s, t); + return ikey.Encode().ToString(); + } + + std::string GetInternalKeyStr(const Slice& user_key, SequenceNumber s, ValueType t) { + std::string key; + const leveldb::RawKeyOperator* opt = tera::GetRawKeyOperatorFromSchema(table_schema); + opt->EncodeTeraKey(user_key.ToString(), "", "", tera::kLatestTs, TKT_FORSEEK, &key); + InternalKey ikey(Slice(key), s, t); + return ikey.Encode().ToString(); + } + + void AddVersionToVersionSet(const std::vector& version_metas) { + VersionEdit edit; + for (const auto& v : version_metas) { + edit.AddFile(v.level, t_vset->NewFileNumber(), v.file_size, v.smallest, v.largest); } + edit.SetComparatorName(leveldb::BytewiseComparator()->Name()); + t_mu.Lock(); + t_vset->LogAndApply(&edit, &t_mu); + t_mu.Unlock(); + } -public: - Options opt; - const InternalKeyComparator icmp; - VersionSet* t_vset; - uint64_t t_log_number; - uint64_t t_next_file; - uint64_t t_last_seq; - port::Mutex t_mu; + public: + Options opt; + const InternalKeyComparator icmp; + VersionSet* t_vset; + uint64_t t_log_number; + uint64_t t_next_file; + uint64_t t_last_seq; + port::Mutex t_mu; + tera::RawKey raw_type; + tera::TableSchema table_schema; }; TEST(VersionSetTest, PickCompactionTest) { VersionEdit edit; - edit.AddFile(0, t_vset->NewFileNumber(), 200, - InternalKey("a0001", 1, kTypeValue), + edit.AddFile(0, t_vset->NewFileNumber(), 200, InternalKey("a0001", 1, kTypeValue), InternalKey("a0002", 1, kTypeDeletion)); - edit.AddFile(0, t_vset->NewFileNumber(), 200, - InternalKey("a0003", 1, kTypeValue), + edit.AddFile(0, t_vset->NewFileNumber(), 200, InternalKey("a0003", 1, kTypeValue), InternalKey("a0004", 1, kTypeValue)); edit.SetComparatorName(leveldb::BytewiseComparator()->Name()); t_mu.Lock(); @@ -221,9 +291,8 @@ TEST(VersionSetTest, PickCompactionTest) { ASSERT_TRUE((uint64_t)t_vset->level0_compactions_in_progress_[0] == (uint64_t)c); VersionEdit edit1; - edit1.AddFile(0, t_vset->NewFileNumber(), 200, - InternalKey("a0005", 1, kTypeValue), - InternalKey("a0006", 1, kTypeValue)); + edit1.AddFile(0, t_vset->NewFileNumber(), 200, InternalKey("a0005", 1, kTypeValue), + InternalKey("a0006", 1, kTypeValue)); edit1.SetComparatorName(leveldb::BytewiseComparator()->Name()); t_mu.Lock(); t_vset->LogAndApply(&edit1, &t_mu); @@ -231,8 +300,320 @@ TEST(VersionSetTest, PickCompactionTest) { ASSERT_TRUE(t_vset->PickCompaction() == NULL); } -} // namespace leveldb +/* pick compaction files process test */ + +TEST(VersionSetTest, ExtendRangeOverlappingIntervalTest0) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {2, InternalKey("a", 1, kTypeValue), InternalKey("b", 2, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + int start_index, end_index; + const Comparator* user_cmp = t_vset->icmp_.user_comparator(); + // level 2 only one file + t_vset->current_->ExtendRangeOverlappingInterval(2, user_cmp, "a", "b", 0, &start_index, + &end_index); + ASSERT_EQ(0, start_index); + ASSERT_EQ(0, end_index); +} + +TEST(VersionSetTest, ExtendRangeOverlappingIntervalTest1) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("b", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + int start_index, end_index; + const Comparator* user_cmp = t_vset->icmp_.user_comparator(); + // select from mid_index = 1 + t_vset->current_->ExtendRangeOverlappingInterval(1, user_cmp, "b", "c", 1, &start_index, + &end_index); + ASSERT_EQ(0, start_index); + ASSERT_EQ(2, end_index); +} + +TEST(VersionSetTest, ExtendRangeOverlappingIntervalTest2) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("aa", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c1", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + int start_index, end_index; + const Comparator* user_cmp = t_vset->icmp_.user_comparator(); + // select from mid_index = 1 + t_vset->current_->ExtendRangeOverlappingInterval(1, user_cmp, "b", "c", 1, &start_index, + &end_index); + ASSERT_EQ(1, start_index); + ASSERT_EQ(1, end_index); +} + +TEST(VersionSetTest, ExtendRangeWithinIntervalTest0) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("aa", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c1", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + int start_index, end_index; + const Comparator* user_cmp = t_vset->icmp_.user_comparator(); + // select from mid_index = 1 + t_vset->current_->ExtendRangeWithinInterval(1, user_cmp, "b", "c", 1, &start_index, &end_index); + ASSERT_EQ(1, start_index); + ASSERT_EQ(1, end_index); +} + +TEST(VersionSetTest, ExtendRangeWithinIntervalTest1) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("b", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + int start_index, end_index; + const Comparator* user_cmp = t_vset->icmp_.user_comparator(); + // select from mid_index = 1 + t_vset->current_->ExtendRangeWithinInterval(1, user_cmp, "b", "c", 1, &start_index, &end_index); + ASSERT_EQ(3, start_index); + ASSERT_EQ(2, end_index); +} + +TEST(VersionSetTest, GetCleanCutInputsWithinInterval) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("b", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + // select from mid_index = 1 + InternalKey begin = InternalKey("b", 1, kTypeValue); + InternalKey end = InternalKey("c", 2, kTypeValue); + std::vector inputs; + // not found clean cut + t_vset->current_->GetCleanCutInputsWithinInterval(1, &begin, &end, &inputs); + ASSERT_EQ(0, inputs.size()); +} + +TEST(VersionSetTest, GetCleanCutInputsWithinInterval1) { + std::vector version_metas = { + /* level, smallest, largest, file_size */ + {1, InternalKey("a", 1, kTypeValue), InternalKey("aa", 2, kTypeValue), 100}, + {1, InternalKey("b", 1, kTypeValue), InternalKey("c", 2, kTypeValue), 100}, + {1, InternalKey("c1", 1, kTypeValue), InternalKey("d", 2, kTypeValue), 100}, + {1, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + // select from mid_index = 1 + InternalKey begin = InternalKey("b", 1, kTypeValue); + InternalKey end = InternalKey("c", 2, kTypeValue); + std::vector inputs; + // not found clean cut + t_vset->current_->GetCleanCutInputsWithinInterval(1, &begin, &end, &inputs); + ASSERT_EQ(1, inputs.size()); +} + +TEST(VersionSetTest, GenerateSubCompaction0) { // DummyCompactStrategyFactory + opt.compact_strategy_factory = new DummyCompactStrategyFactory(); + std::vector version_metas = { + {1, InternalKey("a", 1, kTypeValue), InternalKey("z", 1, kTypeValue), 100}, + {2, InternalKey("b", 1, kTypeValue), InternalKey("c", 1, kTypeValue), 100}, + {2, InternalKey("d", 1, kTypeValue), InternalKey("e", 1, kTypeValue), 100}, + {2, InternalKey("f", 1, kTypeValue), InternalKey("g", 1, kTypeValue), 100}, + {2, InternalKey("h", 1, kTypeValue), InternalKey("i", 1, kTypeValue), 100}, + }; + AddVersionToVersionSet(version_metas); + std::vector inputs = t_vset->current_->files_[1]; + std::vector output_inputs = t_vset->current_->files_[2]; + + Compaction* c = new Compaction(1); // level 1 + c->SetNonTrivial(true); + c->input_version_ = t_vset->current_; + c->input_version_->Ref(); + c->set_output_level(2); // output level 2 + c->max_output_file_size_ = 200; + c->inputs_[0] = inputs; + c->inputs_[1] = output_inputs; + c->smallest_internal_key_ = InternalKey("a", 1, kTypeValue); + c->largest_internal_key_ = InternalKey("z", 1, kTypeValue); + + t_mu.Lock(); + std::vector compaction_vec; + t_vset->GenerateSubCompaction(c, &compaction_vec, &t_mu); + t_mu.Unlock(); + // check + ASSERT_EQ(2, compaction_vec.size()); + ASSERT_EQ((compaction_vec[0])->sub_compact_end_, + GetInternalKeyStr0("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_start_, + GetInternalKeyStr0("f", kMaxSequenceNumber, kValueTypeForSeek)); + + c->input_version_->Unref(); +} + +TEST(VersionSetTest, GenerateSubCompaction1) { // DefaultCompactStrategyFactory + opt.compact_strategy_factory = + new tera::io::DefaultCompactStrategyFactory(tera::DefaultTableSchema()); + std::vector version_metas = { + /* level, smallest, largest, file_size */ + BuildLightFileMeta(1, 100, "a", "cf", "qu", 123, TKT_VALUE, "z", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "b", "cf", "qu", 123, TKT_VALUE, "c", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "d", "cf", "qu", 123, TKT_VALUE, "e", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "f", "cf", "qu", 123, TKT_VALUE, "g", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "h", "cf", "qu", 123, TKT_VALUE, "i", "cf", "qu", 123, TKT_VALUE), + }; + AddVersionToVersionSet(version_metas); + std::vector inputs = t_vset->current_->files_[1]; + std::vector output_inputs = t_vset->current_->files_[2]; + + Compaction* c = new Compaction(1); // level 1 + c->SetNonTrivial(true); + c->input_version_ = t_vset->current_; + c->input_version_->Ref(); + c->set_output_level(2); // output level 2 + c->max_output_file_size_ = 200; + c->inputs_[0] = inputs; + c->inputs_[1] = output_inputs; + c->smallest_internal_key_ = BuildInternalKey("a", "cf", "qu", 123, TKT_VALUE); + c->largest_internal_key_ = BuildInternalKey("z", "cf", "qu", 123, TKT_VALUE); + + t_mu.Lock(); + std::vector compaction_vec; + t_vset->GenerateSubCompaction(c, &compaction_vec, &t_mu); + t_mu.Unlock(); + // check + ASSERT_EQ(2, compaction_vec.size()); + ASSERT_EQ((compaction_vec[0])->sub_compact_end_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_start_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); + c->input_version_->Unref(); } + +TEST(VersionSetTest, GenerateSubCompaction2) { // DefaultCompactStrategyFactory with del mark + raw_type = tera::RawKey::Binary; + opt.compact_strategy_factory = new tera::io::DefaultCompactStrategyFactory(SetTableSchema()); + std::vector version_metas = { + /* level, smallest, largest, file_size */ + BuildLightFileMeta(1, 100, "a", "cf", "qu", 123, TKT_VALUE, "p", "", "", 123, TKT_DEL), + BuildLightFileMeta(1, 100, "p", "cf", "qu2", 123, TKT_VALUE, "z", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "b", "cf", "qu", 123, TKT_VALUE, "c", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "d", "cf", "qu", 123, TKT_VALUE, "e", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "f", "cf", "qu", 123, TKT_VALUE, "g", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "h", "cf", "qu", 123, TKT_VALUE, "i", "cf", "qu", 123, TKT_VALUE), + }; + AddVersionToVersionSet(version_metas); + std::vector inputs = t_vset->current_->files_[1]; + std::vector output_inputs = t_vset->current_->files_[2]; + + Compaction* c = new Compaction(1); // level 1 + c->SetNonTrivial(true); + c->input_version_ = t_vset->current_; + c->input_version_->Ref(); + c->set_output_level(2); // output level 2 + c->max_output_file_size_ = 200; + c->inputs_[0] = inputs; + c->inputs_[1] = output_inputs; + c->smallest_internal_key_ = BuildInternalKey("a", "cf", "qu", 123, TKT_VALUE); + c->largest_internal_key_ = BuildInternalKey("z", "cf", "qu", 123, TKT_VALUE); + + t_mu.Lock(); + std::vector compaction_vec; + t_vset->GenerateSubCompaction(c, &compaction_vec, &t_mu); + t_mu.Unlock(); + // check + ASSERT_EQ(3, compaction_vec.size()); + ASSERT_EQ((compaction_vec[0])->sub_compact_end_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_start_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_end_, + GetInternalKeyStr("p", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[2])->sub_compact_start_, + GetInternalKeyStr("p", kMaxSequenceNumber, kValueTypeForSeek)); + + c->input_version_->Unref(); +} + +TEST(VersionSetTest, GenerateSubCompaction3) { // DefaultCompactStrategyFactory with del mark + raw_type = tera::RawKey::Readable; + opt.compact_strategy_factory = new tera::io::DefaultCompactStrategyFactory(SetTableSchema()); + std::vector version_metas = { + /* level, smallest, largest, file_size */ + BuildLightFileMeta(1, 100, "a", "cf", "qu", 123, TKT_VALUE, "p", "", "", 123, TKT_DEL), + BuildLightFileMeta(1, 100, "p", "cf", "qu2", 123, TKT_VALUE, "z", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "b", "cf", "qu", 123, TKT_VALUE, "c", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "d", "cf", "qu", 123, TKT_VALUE, "e", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "f", "cf", "qu", 123, TKT_VALUE, "g", "cf", "qu", 123, TKT_VALUE), + BuildLightFileMeta(2, 100, "h", "cf", "qu", 123, TKT_VALUE, "i", "cf", "qu", 123, TKT_VALUE), + }; + AddVersionToVersionSet(version_metas); + std::vector inputs = t_vset->current_->files_[1]; + std::vector output_inputs = t_vset->current_->files_[2]; + + Compaction* c = new Compaction(1); // level 1 + c->SetNonTrivial(true); + c->input_version_ = t_vset->current_; + c->input_version_->Ref(); + c->set_output_level(2); // output level 2 + c->max_output_file_size_ = 200; + c->inputs_[0] = inputs; + c->inputs_[1] = output_inputs; + c->smallest_internal_key_ = BuildInternalKey("a", "cf", "qu", 123, TKT_VALUE); + c->largest_internal_key_ = BuildInternalKey("z", "cf", "qu", 123, TKT_VALUE); + + t_mu.Lock(); + std::vector compaction_vec; + t_vset->GenerateSubCompaction(c, &compaction_vec, &t_mu); + t_mu.Unlock(); + // check + ASSERT_EQ(3, compaction_vec.size()); + ASSERT_EQ((compaction_vec[0])->sub_compact_end_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_start_, + GetInternalKeyStr("f", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[1])->sub_compact_end_, + GetInternalKeyStr("p", kMaxSequenceNumber, kValueTypeForSeek)); + ASSERT_EQ((compaction_vec[2])->sub_compact_start_, + GetInternalKeyStr("p", kMaxSequenceNumber, kValueTypeForSeek)); + + c->input_version_->Unref(); +} + +TEST(VersionSetTest, OldDescriptorAndNewDescriptorCompatible) { + VersionEdit edit; + + // old descriptor + edit.SetComparatorName(leveldb::BytewiseComparator()->Name()); + + t_mu.Lock(); + t_vset->LogAndApply(&edit, &t_mu); + t_mu.Unlock(); + Status s = t_vset->Recover(); + ASSERT_TRUE(s.ok()); + + // new descriptor + edit.SetStartKey("user1"); + edit.SetEndKey("user100"); + t_mu.Lock(); + t_vset->LogAndApply(&edit, &t_mu); + t_mu.Unlock(); + + s = t_vset->Recover(); + ASSERT_TRUE(s.ok()); +} + +} // namespace leveldb + +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/db/write_batch.cc b/src/leveldb/db/write_batch.cc index 7abcf7262..c3f14c98c 100644 --- a/src/leveldb/db/write_batch.cc +++ b/src/leveldb/db/write_batch.cc @@ -33,22 +33,18 @@ namespace leveldb { // WriteBatch header has an 8-byte sequence number followed by a 4-byte count. static const size_t kHeader = 12; -WriteBatch::WriteBatch() { - Clear(); -} +WriteBatch::WriteBatch() { Clear(); } -WriteBatch::~WriteBatch() { } +WriteBatch::~WriteBatch() {} -WriteBatch::Handler::~Handler() { } +WriteBatch::Handler::~Handler() {} void WriteBatch::Clear() { rep_.clear(); rep_.resize(kHeader); } -size_t WriteBatch::DataSize() { - return rep_.size(); -} +size_t WriteBatch::DataSize() { return rep_.size(); } Status WriteBatch::Iterate(Handler* handler) const { Slice input(rep_); @@ -65,8 +61,7 @@ Status WriteBatch::Iterate(Handler* handler) const { input.remove_prefix(1); switch (tag) { case kTypeValue: - if (GetLengthPrefixedSlice(&input, &key) && - GetLengthPrefixedSlice(&input, &value)) { + if (GetLengthPrefixedSlice(&input, &key) && GetLengthPrefixedSlice(&input, &value)) { handler->Put(key, value); } else { return Status::Corruption("bad WriteBatch Put"); @@ -109,9 +104,9 @@ Status WriteBatch::SeperateLocalityGroup(std::vector* lg_bw) const } else { Slice tmp_key = key; if (!GetFixed32LGId(&tmp_key, &lg_id)) { - lg_id = 0; + lg_id = 0; } else { - key = tmp_key; + key = tmp_key; } assert(lg_id < lg_bw->size()); if ((*lg_bw)[lg_id] == NULL) { @@ -136,8 +131,7 @@ Status WriteBatch::SeperateLocalityGroup(std::vector* lg_bw) const } } - uint64_t last_sequence = WriteBatchInternal::Sequence(this) - + WriteBatchInternal::Count(this) - 1; + uint64_t last_sequence = WriteBatchInternal::Sequence(this) + WriteBatchInternal::Count(this) - 1; for (uint32_t i = 0; i < lg_bw->size(); ++i) { if ((*lg_bw)[i] == NULL) { (*lg_bw)[i] = new WriteBatch(); @@ -153,13 +147,9 @@ Status WriteBatch::SeperateLocalityGroup(std::vector* lg_bw) const } } -int WriteBatchInternal::Count(const WriteBatch* b) { - return DecodeFixed32(b->rep_.data() + 8); -} +int WriteBatchInternal::Count(const WriteBatch* b) { return DecodeFixed32(b->rep_.data() + 8); } -void WriteBatchInternal::SetCount(WriteBatch* b, int n) { - EncodeFixed32(&b->rep_[8], n); -} +void WriteBatchInternal::SetCount(WriteBatch* b, int n) { EncodeFixed32(&b->rep_[8], n); } SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { return SequenceNumber(DecodeFixed64(b->rep_.data())); @@ -199,8 +189,7 @@ class MemTableInserter : public WriteBatch::Handler { }; } // namespace -Status WriteBatchInternal::InsertInto(const WriteBatch* b, - MemTable* memtable) { +Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* memtable) { MemTableInserter inserter; inserter.sequence_ = WriteBatchInternal::Sequence(b); inserter.mem_ = memtable; diff --git a/src/leveldb/db/write_batch_internal.h b/src/leveldb/db/write_batch_internal.h index fa1d95777..d5785646b 100644 --- a/src/leveldb/db/write_batch_internal.h +++ b/src/leveldb/db/write_batch_internal.h @@ -32,13 +32,9 @@ class WriteBatchInternal { // this batch. static void SetSequence(WriteBatch* batch, SequenceNumber seq); - static Slice Contents(const WriteBatch* batch) { - return Slice(batch->rep_); - } + static Slice Contents(const WriteBatch* batch) { return Slice(batch->rep_); } - static size_t ByteSize(const WriteBatch* batch) { - return batch->rep_.size(); - } + static size_t ByteSize(const WriteBatch* batch) { return batch->rep_.size(); } static void SetContents(WriteBatch* batch, const Slice& contents); @@ -49,5 +45,4 @@ class WriteBatchInternal { } // namespace leveldb - #endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ diff --git a/src/leveldb/db/write_batch_test.cc b/src/leveldb/db/write_batch_test.cc index c30079d73..2cc72cc04 100644 --- a/src/leveldb/db/write_batch_test.cc +++ b/src/leveldb/db/write_batch_test.cc @@ -18,7 +18,7 @@ namespace leveldb { static std::string PrintContents(WriteBatch* b) { InternalKeyComparator cmp(BytewiseComparator()); - MemTable* mem = new MemTable(cmp); + MemTable* mem = new BaseMemTable(cmp, nullptr); mem->Ref(); std::string state; Status s = WriteBatchInternal::InsertInto(b, mem); @@ -56,7 +56,7 @@ static std::string PrintContents(WriteBatch* b) { return state; } -class WriteBatchTest { }; +class WriteBatchTest {}; TEST(WriteBatchTest, Empty) { WriteBatch batch; @@ -72,10 +72,11 @@ TEST(WriteBatchTest, Multiple) { WriteBatchInternal::SetSequence(&batch, 100); ASSERT_EQ(100u, WriteBatchInternal::Sequence(&batch)); ASSERT_EQ(3, WriteBatchInternal::Count(&batch)); - ASSERT_EQ("Put(baz, boo)@102" - "Delete(box)@101" - "Put(foo, bar)@100", - PrintContents(&batch)); + ASSERT_EQ( + "Put(baz, boo)@102" + "Delete(box)@101" + "Put(foo, bar)@100", + PrintContents(&batch)); } TEST(WriteBatchTest, Corruption) { @@ -84,11 +85,11 @@ TEST(WriteBatchTest, Corruption) { batch.Delete(Slice("box")); WriteBatchInternal::SetSequence(&batch, 200); Slice contents = WriteBatchInternal::Contents(&batch); - WriteBatchInternal::SetContents(&batch, - Slice(contents.data(),contents.size()-1)); - ASSERT_EQ("Put(foo, bar)@200" - "ParseError()", - PrintContents(&batch)); + WriteBatchInternal::SetContents(&batch, Slice(contents.data(), contents.size() - 1)); + ASSERT_EQ( + "Put(foo, bar)@200" + "ParseError()", + PrintContents(&batch)); } TEST(WriteBatchTest, Append) { @@ -96,29 +97,27 @@ TEST(WriteBatchTest, Append) { WriteBatchInternal::SetSequence(&b1, 200); WriteBatchInternal::SetSequence(&b2, 300); WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("", - PrintContents(&b1)); + ASSERT_EQ("", PrintContents(&b1)); b2.Put("a", "va"); WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200", - PrintContents(&b1)); + ASSERT_EQ("Put(a, va)@200", PrintContents(&b1)); b2.Clear(); b2.Put("b", "vb"); WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200" - "Put(b, vb)@201", - PrintContents(&b1)); + ASSERT_EQ( + "Put(a, va)@200" + "Put(b, vb)@201", + PrintContents(&b1)); b2.Delete("foo"); WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200" - "Put(b, vb)@202" - "Put(b, vb)@201" - "Delete(foo)@203", - PrintContents(&b1)); + ASSERT_EQ( + "Put(a, va)@200" + "Put(b, vb)@202" + "Put(b, vb)@201" + "Delete(foo)@203", + PrintContents(&b1)); } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/helpers/memenv/memenv.cc b/src/leveldb/helpers/memenv/memenv.cc index 98669c37e..a234b70a3 100644 --- a/src/leveldb/helpers/memenv/memenv.cc +++ b/src/leveldb/helpers/memenv/memenv.cc @@ -127,9 +127,8 @@ class FileState { private: // Private since only Unref() should be used to delete it. ~FileState() { - for (std::vector::iterator i = blocks_.begin(); i != blocks_.end(); - ++i) { - delete [] *i; + for (std::vector::iterator i = blocks_.begin(); i != blocks_.end(); ++i) { + delete[] * i; } } @@ -151,13 +150,9 @@ class FileState { class SequentialFileImpl : public SequentialFile { public: - explicit SequentialFileImpl(FileState* file) : file_(file), pos_(0) { - file_->Ref(); - } + explicit SequentialFileImpl(FileState* file) : file_(file), pos_(0) { file_->Ref(); } - ~SequentialFileImpl() { - file_->Unref(); - } + ~SequentialFileImpl() { file_->Unref(); } virtual Status Read(size_t n, Slice* result, char* scratch) { Status s = file_->Read(pos_, n, result, scratch); @@ -186,16 +181,11 @@ class SequentialFileImpl : public SequentialFile { class RandomAccessFileImpl : public RandomAccessFile { public: - explicit RandomAccessFileImpl(FileState* file) : file_(file) { - file_->Ref(); - } + explicit RandomAccessFileImpl(FileState* file) : file_(file) { file_->Ref(); } - ~RandomAccessFileImpl() { - file_->Unref(); - } + ~RandomAccessFileImpl() { file_->Unref(); } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { return file_->Read(offset, n, result, scratch); } @@ -205,17 +195,11 @@ class RandomAccessFileImpl : public RandomAccessFile { class WritableFileImpl : public WritableFile { public: - WritableFileImpl(FileState* file) : file_(file) { - file_->Ref(); - } + WritableFileImpl(FileState* file) : file_(file) { file_->Ref(); } - ~WritableFileImpl() { - file_->Unref(); - } + ~WritableFileImpl() { file_->Unref(); } - virtual Status Append(const Slice& data) { - return file_->Append(data); - } + virtual Status Append(const Slice& data) { return file_->Append(data); } virtual Status Close() { return Status::OK(); } virtual Status Flush() { return Status::OK(); } @@ -227,22 +211,21 @@ class WritableFileImpl : public WritableFile { class NoOpLogger : public Logger { public: - virtual void Logv(const char* format, va_list ap) { } + virtual void Logv(const char* file, int64_t line, const char* format, va_list ap) {} }; class MemoryEnv : public EnvWrapper { public: - explicit MemoryEnv(Env* base_env) : EnvWrapper(base_env) { } + explicit MemoryEnv(Env* base_env) : EnvWrapper(base_env) {} virtual ~MemoryEnv() { - for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){ + for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i) { i->second->Unref(); } } // Partial implementation of the Env interface. - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) { + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result) { MutexLock lock(&mutex_); if (file_map_.find(fname) == file_map_.end()) { *result = NULL; @@ -253,8 +236,7 @@ class MemoryEnv : public EnvWrapper { return Status::OK(); } - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, const EnvOptions& options) { MutexLock lock(&mutex_); if (file_map_.find(fname) == file_map_.end()) { @@ -266,8 +248,8 @@ class MemoryEnv : public EnvWrapper { return Status::OK(); } - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, const EnvOptions&) { + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions&) { MutexLock lock(&mutex_); if (file_map_.find(fname) != file_map_.end()) { DeleteFileInternal(fname); @@ -294,7 +276,7 @@ class MemoryEnv : public EnvWrapper { MutexLock lock(&mutex_); result->clear(); - for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){ + for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i) { const std::string& filename = i->first; if (filename.size() >= dir.size() + 1 && filename[dir.size()] == '/' && @@ -325,13 +307,9 @@ class MemoryEnv : public EnvWrapper { return Status::OK(); } - virtual Status CreateDir(const std::string& dirname) { - return Status::OK(); - } + virtual Status CreateDir(const std::string& dirname) { return Status::OK(); } - virtual Status DeleteDir(const std::string& dirname) { - return Status::OK(); - } + virtual Status DeleteDir(const std::string& dirname) { return Status::OK(); } virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) { MutexLock lock(&mutex_); @@ -343,8 +321,7 @@ class MemoryEnv : public EnvWrapper { return Status::OK(); } - virtual Status RenameFile(const std::string& src, - const std::string& target) { + virtual Status RenameFile(const std::string& src, const std::string& target) { MutexLock lock(&mutex_); if (file_map_.find(src) == file_map_.end()) { return Status::IOError(src, "File not found"); @@ -373,7 +350,7 @@ class MemoryEnv : public EnvWrapper { return Status::OK(); } - virtual Status NewLogger(const std::string& fname, Logger** result) { + virtual Status NewLogger(const std::string& fname, const LogOption& opt, Logger** result) { *result = new NoOpLogger; return Status::OK(); } @@ -387,8 +364,6 @@ class MemoryEnv : public EnvWrapper { } // namespace -Env* NewMemEnv(Env* base_env) { - return new MemoryEnv(base_env); -} +Env* NewMemEnv(Env* base_env) { return new MemoryEnv(base_env); } } // namespace leveldb diff --git a/src/leveldb/helpers/memenv/memenv_test.cc b/src/leveldb/helpers/memenv/memenv_test.cc index 3ef5e51a6..9c1f3563f 100644 --- a/src/leveldb/helpers/memenv/memenv_test.cc +++ b/src/leveldb/helpers/memenv/memenv_test.cc @@ -21,12 +21,8 @@ class MemEnvTest { public: Env* env_; - MemEnvTest() - : env_(NewMemEnv(Env::Default())) { - } - ~MemEnvTest() { - delete env_; - } + MemEnvTest() : env_(NewMemEnv(Env::Default())) {} + ~MemEnvTest() { delete env_; } }; TEST(MemEnvTest, Basics) { @@ -104,25 +100,25 @@ TEST(MemEnvTest, ReadWrite) { // Read sequentially. ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); - ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". + ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". ASSERT_EQ(0, result.compare("hello")); ASSERT_OK(seq_file->Skip(1)); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". + ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. + ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. ASSERT_EQ(0u, result.size()); - ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. + ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. ASSERT_OK(seq_file->Read(1000, &result, scratch)); ASSERT_EQ(0u, result.size()); delete seq_file; // Random reads. ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file, EnvOptions())); - ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". + ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". + ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". + ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". ASSERT_EQ(0, result.compare("d")); // Too high offset. @@ -171,7 +167,7 @@ TEST(MemEnvTest, LargeWrite) { SequentialFile* seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); - ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". + ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". ASSERT_EQ(0, result.compare("foo")); size_t read = 0; @@ -183,7 +179,7 @@ TEST(MemEnvTest, LargeWrite) { } ASSERT_TRUE(write_data == read_data); delete seq_file; - delete [] scratch; + delete[] scratch; } TEST(MemEnvTest, DBTest) { @@ -230,6 +226,4 @@ TEST(MemEnvTest, DBTest) { } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/include/hdfs.h b/src/leveldb/include/hdfs.h index ac0ca4f2b..703e74861 100644 --- a/src/leveldb/include/hdfs.h +++ b/src/leveldb/include/hdfs.h @@ -44,421 +44,387 @@ #define EINTERNAL 255 #endif - /** All APIs set errno to meaningful values */ #ifdef __cplusplus -extern "C" { +extern "C" { #endif - /** - * Some utility decls used in libhdfs. - */ - - typedef int32_t tSize; /// size of data for read/write io ops - typedef time_t tTime; /// time type in seconds - typedef int64_t tOffset;/// offset within the file - typedef uint16_t tPort; /// port - typedef enum tObjectKind { - kObjectKindFile = 'F', - kObjectKindDirectory = 'D', - } tObjectKind; - - - /** - * The C reflection of org.apache.org.hadoop.FileSystem . - */ - typedef void* hdfsFS; - - - /** - * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream . - */ - enum hdfsStreamType - { - UNINITIALIZED = 0, - INPUT = 1, - OUTPUT = 2, - }; - - - /** - * The 'file-handle' to a file in hdfs. - */ - struct hdfsFile_internal { - void* file; - enum hdfsStreamType type; - }; - typedef struct hdfsFile_internal* hdfsFile; - - - /** - * hdfsConnectAsUser - Connect to a hdfs file system as a specific user - * Connect to the hdfs. - * @param host A string containing either a host name, or an ip address - * of the namenode of a hdfs cluster. 'host' should be passed as NULL if - * you want to connect to local filesystem. 'host' should be passed as - * 'default' (and port as 0) to used the 'configured' filesystem - * (core-site/core-default.xml). - * @param port The port on which the server is listening. - * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port) - * @return Returns a handle to the filesystem or NULL on error. - */ - hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user); - - - /** - * hdfsConnect - Connect to a hdfs file system. - * Connect to the hdfs. - * @param host A string containing either a host name, or an ip address - * of the namenode of a hdfs cluster. 'host' should be passed as NULL if - * you want to connect to local filesystem. 'host' should be passed as - * 'default' (and port as 0) to used the 'configured' filesystem - * (core-site/core-default.xml). - * @param port The port on which the server is listening. - * @return Returns a handle to the filesystem or NULL on error. - */ - hdfsFS hdfsConnect(const char* host, tPort port); - - - /** - * hdfsDisconnect - Disconnect from the hdfs file system. - * Disconnect from hdfs. - * @param fs The configured filesystem handle. - * @return Returns 0 on success, -1 on error. - */ - int hdfsDisconnect(hdfsFS fs); - - - /** - * hdfsOpenFile - Open a hdfs file in given mode. - * @param fs The configured filesystem handle. - * @param path The full path to the file. - * @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), - * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP. - * @param bufferSize Size of buffer for read/write - pass 0 if you want - * to use the default configured values. - * @param replication Block replication - pass 0 if you want to use - * the default configured values. - * @param blocksize Size of block - pass 0 if you want to use the - * default configured values. - * @return Returns the handle to the open file or NULL on error. - */ - hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, - int bufferSize, short replication, tSize blocksize); - - - /** - * hdfsCloseFile - Close an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns 0 on success, -1 on error. - */ - int hdfsCloseFile(hdfsFS fs, hdfsFile file); - - - /** - * hdfsExists - Checks if a given path exsits on the filesystem - * @param fs The configured filesystem handle. - * @param path The path to look for - * @return Returns 0 on success, -1 on error. - */ - int hdfsExists(hdfsFS fs, const char *path); - - - /** - * hdfsSeek - Seek to given offset in file. - * This works only for files opened in read-only mode. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param desiredPos Offset into the file to seek into. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); - - - /** - * hdfsTell - Get the current offset in the file, in bytes. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Current offset, -1 on error. - */ - tOffset hdfsTell(hdfsFS fs, hdfsFile file); - - - /** - * hdfsRead - Read data from an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param buffer The buffer to copy read bytes into. - * @param length The length of the buffer. - * @return Returns the number of bytes actually read, possibly less - * than than length;-1 on error. - */ - tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length); - - - /** - * hdfsPread - Positional read of data from an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param position Position from which to read - * @param buffer The buffer to copy read bytes into. - * @param length The length of the buffer. - * @return Returns the number of bytes actually read, possibly less than - * than length;-1 on error. - */ - tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, - void* buffer, tSize length); - - - /** - * hdfsWrite - Write data into an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param buffer The data. - * @param length The no. of bytes to write. - * @return Returns the number of bytes written, -1 on error. - */ - tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer, - tSize length); - - - /** - * hdfsWrite - Flush the data. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns 0 on success, -1 on error. - */ - int hdfsFlush(hdfsFS fs, hdfsFile file); - - - /** - * hdfsSync - Sync the data to persistent store. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSync(hdfsFS fs, hdfsFile file); - - /** - * hdfsAvailable - Number of bytes that can be read from this - * input stream without blocking. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns available bytes; -1 on error. - */ - int hdfsAvailable(hdfsFS fs, hdfsFile file); - - - /** - * hdfsCopy - Copy file from one filesystem to another. - * @param srcFS The handle to source filesystem. - * @param src The path of source file. - * @param dstFS The handle to destination filesystem. - * @param dst The path of destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); - - - /** - * hdfsMove - Move file from one filesystem to another. - * @param srcFS The handle to source filesystem. - * @param src The path of source file. - * @param dstFS The handle to destination filesystem. - * @param dst The path of destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); - - - /** - * hdfsDelete - Delete file. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsDelete(hdfsFS fs, const char* path); - - - /** - * hdfsRename - Rename file. - * @param fs The configured filesystem handle. - * @param oldPath The path of the source file. - * @param newPath The path of the destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath); - - - /** - * hdfsGetWorkingDirectory - Get the current working directory for - * the given filesystem. - * @param fs The configured filesystem handle. - * @param buffer The user-buffer to copy path of cwd into. - * @param bufferSize The length of user-buffer. - * @return Returns buffer, NULL on error. - */ - char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize); - - - /** - * hdfsSetWorkingDirectory - Set the working directory. All relative - * paths will be resolved relative to it. - * @param fs The configured filesystem handle. - * @param path The path of the new 'cwd'. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSetWorkingDirectory(hdfsFS fs, const char* path); - - - /** - * hdfsCreateDirectory - Make the given file and all non-existent - * parents into directories. - * @param fs The configured filesystem handle. - * @param path The path of the directory. - * @return Returns 0 on success, -1 on error. - */ - int hdfsCreateDirectory(hdfsFS fs, const char* path); - - - /** - * hdfsSetReplication - Set the replication of the specified - * file to the supplied value - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication); - - - /** - * hdfsFileInfo - Information about a file/directory. - */ - typedef struct { - tObjectKind mKind; /* file or directory */ - char *mName; /* the name of the file */ - tTime mLastMod; /* the last modification time for the file in seconds */ - tOffset mSize; /* the size of the file in bytes */ - short mReplication; /* the count of replicas */ - tOffset mBlockSize; /* the block size for the file */ - char *mOwner; /* the owner of the file */ - char *mGroup; /* the group associated with the file */ - short mPermissions; /* the permissions associated with the file */ - tTime mLastAccess; /* the last access time for the file in seconds */ - } hdfsFileInfo; - - - /** - * hdfsListDirectory - Get list of files/directories for a given - * directory-path. hdfsFreeFileInfo should be called to deallocate memory. - * @param fs The configured filesystem handle. - * @param path The path of the directory. - * @param numEntries Set to the number of files/directories in path. - * @return Returns a dynamically-allocated array of hdfsFileInfo - * objects; NULL on error. - */ - hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path, - int *numEntries); - - - /** - * hdfsGetPathInfo - Get information about a path as a (dynamically - * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be - * called when the pointer is no longer needed. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @return Returns a dynamically-allocated hdfsFileInfo object; - * NULL on error. - */ - hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path); - - - /** - * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) - * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo - * objects. - * @param numEntries The size of the array. - */ - void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries); - - - /** - * hdfsGetHosts - Get hostnames where a particular block (determined by - * pos & blocksize) of a file is stored. The last element in the array - * is NULL. Due to replication, a single block could be present on - * multiple hosts. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @param start The start of the block. - * @param length The length of the block. - * @return Returns a dynamically-allocated 2-d array of blocks-hosts; - * NULL on error. - */ - char*** hdfsGetHosts(hdfsFS fs, const char* path, - tOffset start, tOffset length); - - - /** - * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts - * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo - * objects. - * @param numEntries The size of the array. - */ - void hdfsFreeHosts(char ***blockHosts); - - - /** - * hdfsGetDefaultBlockSize - Get the optimum blocksize. - * @param fs The configured filesystem handle. - * @return Returns the blocksize; -1 on error. - */ - tOffset hdfsGetDefaultBlockSize(hdfsFS fs); - - - /** - * hdfsGetCapacity - Return the raw capacity of the filesystem. - * @param fs The configured filesystem handle. - * @return Returns the raw-capacity; -1 on error. - */ - tOffset hdfsGetCapacity(hdfsFS fs); - - - /** - * hdfsGetUsed - Return the total raw size of all files in the filesystem. - * @param fs The configured filesystem handle. - * @return Returns the total-size; -1 on error. - */ - tOffset hdfsGetUsed(hdfsFS fs); - - /** - * hdfsChown - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param owner this is a string in Hadoop land. Set to null or "" if only setting group - * @param group this is a string in Hadoop land. Set to null or "" if only setting user - * @return 0 on success else -1 - */ - int hdfsChown(hdfsFS fs, const char* path, const char *owner, const char *group); - - /** - * hdfsChmod - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param mode the bitmask to set it to - * @return 0 on success else -1 - */ - int hdfsChmod(hdfsFS fs, const char* path, short mode); - - /** - * hdfsUtime - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param mtime new modification time or 0 for only set access time in seconds - * @param atime new access time or 0 for only set modification time in seconds - * @return 0 on success else -1 - */ - int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime); +/** + * Some utility decls used in libhdfs. + */ + +typedef int32_t tSize; /// size of data for read/write io ops +typedef time_t tTime; /// time type in seconds +typedef int64_t tOffset; /// offset within the file +typedef uint16_t tPort; /// port +typedef enum tObjectKind { + kObjectKindFile = 'F', + kObjectKindDirectory = 'D', +} tObjectKind; + +/** + * The C reflection of org.apache.org.hadoop.FileSystem . + */ +typedef void* hdfsFS; + +/** + * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream . + */ +enum hdfsStreamType { + UNINITIALIZED = 0, + INPUT = 1, + OUTPUT = 2, +}; + +/** + * The 'file-handle' to a file in hdfs. + */ +struct hdfsFile_internal { + void* file; + enum hdfsStreamType type; +}; +typedef struct hdfsFile_internal* hdfsFile; + +/** + * hdfsConnectAsUser - Connect to a hdfs file system as a specific user + * Connect to the hdfs. + * @param host A string containing either a host name, or an ip address + * of the namenode of a hdfs cluster. 'host' should be passed as NULL if + * you want to connect to local filesystem. 'host' should be passed as + * 'default' (and port as 0) to used the 'configured' filesystem + * (core-site/core-default.xml). + * @param port The port on which the server is listening. + * @param user the user name (this is hadoop domain user). Or NULL is equivelant + * to hhdfsConnect(host, port) + * @return Returns a handle to the filesystem or NULL on error. + */ +hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char* user); + +/** + * hdfsConnect - Connect to a hdfs file system. + * Connect to the hdfs. + * @param host A string containing either a host name, or an ip address + * of the namenode of a hdfs cluster. 'host' should be passed as NULL if + * you want to connect to local filesystem. 'host' should be passed as + * 'default' (and port as 0) to used the 'configured' filesystem + * (core-site/core-default.xml). + * @param port The port on which the server is listening. + * @return Returns a handle to the filesystem or NULL on error. + */ +hdfsFS hdfsConnect(const char* host, tPort port); + +/** + * hdfsDisconnect - Disconnect from the hdfs file system. + * Disconnect from hdfs. + * @param fs The configured filesystem handle. + * @return Returns 0 on success, -1 on error. + */ +int hdfsDisconnect(hdfsFS fs); + +/** + * hdfsOpenFile - Open a hdfs file in given mode. + * @param fs The configured filesystem handle. + * @param path The full path to the file. + * @param flags - an | of bits/fcntl.h file flags - supported flags are + * O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), + * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || + * (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP. + * @param bufferSize Size of buffer for read/write - pass 0 if you want + * to use the default configured values. + * @param replication Block replication - pass 0 if you want to use + * the default configured values. + * @param blocksize Size of block - pass 0 if you want to use the + * default configured values. + * @return Returns the handle to the open file or NULL on error. + */ +hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, int bufferSize, short replication, + tSize blocksize); + +/** + * hdfsCloseFile - Close an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + */ +int hdfsCloseFile(hdfsFS fs, hdfsFile file); + +/** + * hdfsExists - Checks if a given path exsits on the filesystem + * @param fs The configured filesystem handle. + * @param path The path to look for + * @return Returns 0 on success, -1 on error. + */ +int hdfsExists(hdfsFS fs, const char* path); + +/** + * hdfsSeek - Seek to given offset in file. + * This works only for files opened in read-only mode. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param desiredPos Offset into the file to seek into. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); + +/** + * hdfsTell - Get the current offset in the file, in bytes. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Current offset, -1 on error. + */ +tOffset hdfsTell(hdfsFS fs, hdfsFile file); + +/** + * hdfsRead - Read data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return Returns the number of bytes actually read, possibly less + * than than length;-1 on error. + */ +tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length); + +/** + * hdfsPread - Positional read of data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param position Position from which to read + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return Returns the number of bytes actually read, possibly less than + * than length;-1 on error. + */ +tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, void* buffer, tSize length); + +/** + * hdfsWrite - Write data into an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The data. + * @param length The no. of bytes to write. + * @return Returns the number of bytes written, -1 on error. + */ +tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer, tSize length); + +/** + * hdfsWrite - Flush the data. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + */ +int hdfsFlush(hdfsFS fs, hdfsFile file); + +/** + * hdfsSync - Sync the data to persistent store. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSync(hdfsFS fs, hdfsFile file); + +/** + * hdfsAvailable - Number of bytes that can be read from this + * input stream without blocking. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns available bytes; -1 on error. + */ +int hdfsAvailable(hdfsFS fs, hdfsFile file); + +/** + * hdfsCopy - Copy file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); + +/** + * hdfsMove - Move file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); + +/** + * hdfsDelete - Delete file. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsDelete(hdfsFS fs, const char* path); + +/** + * hdfsRename - Rename file. + * @param fs The configured filesystem handle. + * @param oldPath The path of the source file. + * @param newPath The path of the destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath); + +/** + * hdfsGetWorkingDirectory - Get the current working directory for + * the given filesystem. + * @param fs The configured filesystem handle. + * @param buffer The user-buffer to copy path of cwd into. + * @param bufferSize The length of user-buffer. + * @return Returns buffer, NULL on error. + */ +char* hdfsGetWorkingDirectory(hdfsFS fs, char* buffer, size_t bufferSize); + +/** + * hdfsSetWorkingDirectory - Set the working directory. All relative + * paths will be resolved relative to it. + * @param fs The configured filesystem handle. + * @param path The path of the new 'cwd'. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSetWorkingDirectory(hdfsFS fs, const char* path); + +/** + * hdfsCreateDirectory - Make the given file and all non-existent + * parents into directories. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @return Returns 0 on success, -1 on error. + */ +int hdfsCreateDirectory(hdfsFS fs, const char* path); + +/** + * hdfsSetReplication - Set the replication of the specified + * file to the supplied value + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication); + +/** + * hdfsFileInfo - Information about a file/directory. + */ +typedef struct { + tObjectKind mKind; /* file or directory */ + char* mName; /* the name of the file */ + tTime mLastMod; /* the last modification time for the file in seconds */ + tOffset mSize; /* the size of the file in bytes */ + short mReplication; /* the count of replicas */ + tOffset mBlockSize; /* the block size for the file */ + char* mOwner; /* the owner of the file */ + char* mGroup; /* the group associated with the file */ + short mPermissions; /* the permissions associated with the file */ + tTime mLastAccess; /* the last access time for the file in seconds */ +} hdfsFileInfo; + +/** + * hdfsListDirectory - Get list of files/directories for a given + * directory-path. hdfsFreeFileInfo should be called to deallocate memory. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @param numEntries Set to the number of files/directories in path. + * @return Returns a dynamically-allocated array of hdfsFileInfo + * objects; NULL on error. + */ +hdfsFileInfo* hdfsListDirectory(hdfsFS fs, const char* path, int* numEntries); + +/** + * hdfsGetPathInfo - Get information about a path as a (dynamically + * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be + * called when the pointer is no longer needed. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns a dynamically-allocated hdfsFileInfo object; + * NULL on error. + */ +hdfsFileInfo* hdfsGetPathInfo(hdfsFS fs, const char* path); + +/** + * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ +void hdfsFreeFileInfo(hdfsFileInfo* hdfsFileInfo, int numEntries); + +/** + * hdfsGetHosts - Get hostnames where a particular block (determined by + * pos & blocksize) of a file is stored. The last element in the array + * is NULL. Due to replication, a single block could be present on + * multiple hosts. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @param start The start of the block. + * @param length The length of the block. + * @return Returns a dynamically-allocated 2-d array of blocks-hosts; + * NULL on error. + */ +char*** hdfsGetHosts(hdfsFS fs, const char* path, tOffset start, tOffset length); + +/** + * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ +void hdfsFreeHosts(char*** blockHosts); + +/** + * hdfsGetDefaultBlockSize - Get the optimum blocksize. + * @param fs The configured filesystem handle. + * @return Returns the blocksize; -1 on error. + */ +tOffset hdfsGetDefaultBlockSize(hdfsFS fs); + +/** + * hdfsGetCapacity - Return the raw capacity of the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the raw-capacity; -1 on error. + */ +tOffset hdfsGetCapacity(hdfsFS fs); + +/** + * hdfsGetUsed - Return the total raw size of all files in the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the total-size; -1 on error. + */ +tOffset hdfsGetUsed(hdfsFS fs); + +/** + * hdfsChown + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param owner this is a string in Hadoop land. Set to null or "" if only + * setting group + * @param group this is a string in Hadoop land. Set to null or "" if only + * setting user + * @return 0 on success else -1 + */ +int hdfsChown(hdfsFS fs, const char* path, const char* owner, const char* group); + +/** + * hdfsChmod + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mode the bitmask to set it to + * @return 0 on success else -1 + */ +int hdfsChmod(hdfsFS fs, const char* path, short mode); + +/** + * hdfsUtime + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mtime new modification time or 0 for only set access time in seconds + * @param atime new access time or 0 for only set modification time in seconds + * @return 0 on success else -1 + */ +int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime); #ifdef __cplusplus } diff --git a/src/leveldb/include/hdfs2.h b/src/leveldb/include/hdfs2.h index 781c2cc85..bca795acd 100644 --- a/src/leveldb/include/hdfs2.h +++ b/src/leveldb/include/hdfs2.h @@ -19,10 +19,10 @@ #ifndef LIBHDFS_HDFS2_H #define LIBHDFS_HDFS2_H -#include /* for EINTERNAL, etc. */ -#include /* for O_RDONLY, O_WRONLY */ +#include /* for EINTERNAL, etc. */ +#include /* for O_RDONLY, O_WRONLY */ #include /* for uint64_t, etc. */ -#include /* for time_t */ +#include /* for time_t */ #ifndef O_RDONLY #define O_RDONLY 1 @@ -36,761 +36,723 @@ #define EINTERNAL 255 #endif -#define ELASTIC_BYTE_BUFFER_POOL_CLASS \ - "org/apache/hadoop/io/ElasticByteBufferPool" +#define ELASTIC_BYTE_BUFFER_POOL_CLASS "org/apache/hadoop/io/ElasticByteBufferPool" /** All APIs set errno to meaningful values */ #ifdef __cplusplus -extern "C" { +extern "C" { #endif - /** - * Some utility decls used in libhdfs. - */ - struct hdfsBuilder; - typedef int32_t tSize; /// size of data for read/write io ops - typedef time_t tTime; /// time type in seconds - typedef int64_t tOffset;/// offset within the file - typedef uint16_t tPort; /// port - typedef enum tObjectKind { - kObjectKindFile = 'F', - kObjectKindDirectory = 'D', - } tObjectKind; - - - /** - * The C reflection of org.apache.org.hadoop.FileSystem . - */ - struct hdfs_internal; - typedef struct hdfs_internal* hdfsFS; - - struct hdfsFile_internal; - typedef struct hdfsFile_internal* hdfsFile; - - struct hadoopRzOptions; - - struct hadoopRzBuffer; - - /** - * Determine if a file is open for read. - * - * @param file The HDFS file - * @return 1 if the file is open for read; 0 otherwise - */ - int hdfsFileIsOpenForRead(hdfsFile file); - - /** - * Determine if a file is open for write. - * - * @param file The HDFS file - * @return 1 if the file is open for write; 0 otherwise - */ - int hdfsFileIsOpenForWrite(hdfsFile file); - - struct hdfsReadStatistics { - uint64_t totalBytesRead; - uint64_t totalLocalBytesRead; - uint64_t totalShortCircuitBytesRead; - uint64_t totalZeroCopyBytesRead; - }; - - /** - * Get read statistics about a file. This is only applicable to files - * opened for reading. - * - * @param file The HDFS file - * @param stats (out parameter) on a successful return, the read - * statistics. Unchanged otherwise. You must free the - * returned statistics with hdfsFileFreeReadStatistics. - * @return 0 if the statistics were successfully returned, - * -1 otherwise. On a failure, please check errno against - * ENOTSUP. webhdfs, LocalFilesystem, and so forth may - * not support read statistics. - */ - int hdfsFileGetReadStatistics(hdfsFile file, - struct hdfsReadStatistics **stats); - - /** - * @param stats HDFS read statistics for a file. - * - * @return the number of remote bytes read. - */ - int64_t hdfsReadStatisticsGetRemoteBytesRead( - const struct hdfsReadStatistics *stats); - - /** - * Free some HDFS read statistics. - * - * @param stats The HDFS read statistics to free. - */ - void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats); - - /** - * hdfsConnectAsUser - Connect to a hdfs file system as a specific user - * Connect to the hdfs. - * @param nn The NameNode. See hdfsBuilderSetNameNode for details. - * @param port The port on which the server is listening. - * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port) - * @return Returns a handle to the filesystem or NULL on error. - * @deprecated Use hdfsBuilderConnect instead. - */ - hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user); - - /** - * hdfsConnect - Connect to a hdfs file system. - * Connect to the hdfs. - * @param nn The NameNode. See hdfsBuilderSetNameNode for details. - * @param port The port on which the server is listening. - * @return Returns a handle to the filesystem or NULL on error. - * @deprecated Use hdfsBuilderConnect instead. - */ - hdfsFS hdfsConnect(const char* nn, tPort port); - - /** - * hdfsConnect - Connect to an hdfs file system. - * - * Forces a new instance to be created - * - * @param nn The NameNode. See hdfsBuilderSetNameNode for details. - * @param port The port on which the server is listening. - * @param user The user name to use when connecting - * @return Returns a handle to the filesystem or NULL on error. - * @deprecated Use hdfsBuilderConnect instead. - */ - hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user ); - - /** - * hdfsConnect - Connect to an hdfs file system. - * - * Forces a new instance to be created - * - * @param nn The NameNode. See hdfsBuilderSetNameNode for details. - * @param port The port on which the server is listening. - * @return Returns a handle to the filesystem or NULL on error. - * @deprecated Use hdfsBuilderConnect instead. - */ - hdfsFS hdfsConnectNewInstance(const char* nn, tPort port); - - /** - * Connect to HDFS using the parameters defined by the builder. - * - * The HDFS builder will be freed, whether or not the connection was - * successful. - * - * Every successful call to hdfsBuilderConnect should be matched with a call - * to hdfsDisconnect, when the hdfsFS is no longer needed. - * - * @param bld The HDFS builder - * @return Returns a handle to the filesystem, or NULL on error. - */ - hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld); - - /** - * Create an HDFS builder. - * - * @return The HDFS builder, or NULL on error. - */ - struct hdfsBuilder *hdfsNewBuilder(void); - - /** - * Force the builder to always create a new instance of the FileSystem, - * rather than possibly finding one in the cache. - * - * @param bld The HDFS builder - */ - void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld); - - /** - * Set the HDFS NameNode to connect to. - * - * @param bld The HDFS builder - * @param nn The NameNode to use. - * - * If the string given is 'default', the default NameNode - * configuration will be used (from the XML configuration files) - * - * If NULL is given, a LocalFileSystem will be created. - * - * If the string starts with a protocol type such as file:// or - * hdfs://, this protocol type will be used. If not, the - * hdfs:// protocol type will be used. - * - * You may specify a NameNode port in the usual way by - * passing a string of the format hdfs://:. - * Alternately, you may set the port with - * hdfsBuilderSetNameNodePort. However, you must not pass the - * port in two different ways. - */ - void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn); - - /** - * Set the port of the HDFS NameNode to connect to. - * - * @param bld The HDFS builder - * @param port The port. - */ - void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port); - - /** - * Set the username to use when connecting to the HDFS cluster. - * - * @param bld The HDFS builder - * @param userName The user name. The string will be shallow-copied. - */ - void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName); - - /** - * Set the path to the Kerberos ticket cache to use when connecting to - * the HDFS cluster. - * - * @param bld The HDFS builder - * @param kerbTicketCachePath The Kerberos ticket cache path. The string - * will be shallow-copied. - */ - void hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld, - const char *kerbTicketCachePath); - - /** - * Free an HDFS builder. - * - * It is normally not necessary to call this function since - * hdfsBuilderConnect frees the builder. - * - * @param bld The HDFS builder - */ - void hdfsFreeBuilder(struct hdfsBuilder *bld); - - /** - * Set a configuration string for an HdfsBuilder. - * - * @param key The key to set. - * @param val The value, or NULL to set no value. - * This will be shallow-copied. You are responsible for - * ensuring that it remains valid until the builder is - * freed. - * - * @return 0 on success; nonzero error code otherwise. - */ - int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key, - const char *val); - - /** - * Get a configuration string. - * - * @param key The key to find - * @param val (out param) The value. This will be set to NULL if the - * key isn't found. You must free this string with - * hdfsConfStrFree. - * - * @return 0 on success; nonzero error code otherwise. - * Failure to find the key is not an error. - */ - int hdfsConfGetStr(const char *key, char **val); - - /** - * Get a configuration integer. - * - * @param key The key to find - * @param val (out param) The value. This will NOT be changed if the - * key isn't found. - * - * @return 0 on success; nonzero error code otherwise. - * Failure to find the key is not an error. - */ - int hdfsConfGetInt(const char *key, int32_t *val); - - /** - * Free a configuration string found with hdfsConfGetStr. - * - * @param val A configuration string obtained from hdfsConfGetStr - */ - void hdfsConfStrFree(char *val); - - /** - * hdfsDisconnect - Disconnect from the hdfs file system. - * Disconnect from hdfs. - * @param fs The configured filesystem handle. - * @return Returns 0 on success, -1 on error. - * Even if there is an error, the resources associated with the - * hdfsFS will be freed. - */ - int hdfsDisconnect(hdfsFS fs); - - - /** - * hdfsOpenFile - Open a hdfs file in given mode. - * @param fs The configured filesystem handle. - * @param path The full path to the file. - * @param flags - an | of bits/fcntl.h file flags - supported flags are O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), - * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP. - * @param bufferSize Size of buffer for read/write - pass 0 if you want - * to use the default configured values. - * @param replication Block replication - pass 0 if you want to use - * the default configured values. - * @param blocksize Size of block - pass 0 if you want to use the - * default configured values. - * @return Returns the handle to the open file or NULL on error. - */ - hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags, - int bufferSize, short replication, tSize blocksize); - - - /** - * hdfsCloseFile - Close an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns 0 on success, -1 on error. - * On error, errno will be set appropriately. - * If the hdfs file was valid, the memory associated with it will - * be freed at the end of this call, even if there was an I/O - * error. - */ - int hdfsCloseFile(hdfsFS fs, hdfsFile file); - - - /** - * hdfsExists - Checks if a given path exsits on the filesystem - * @param fs The configured filesystem handle. - * @param path The path to look for - * @return Returns 0 on success, -1 on error. - */ - int hdfsExists(hdfsFS fs, const char *path); - - - /** - * hdfsSeek - Seek to given offset in file. - * This works only for files opened in read-only mode. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param desiredPos Offset into the file to seek into. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); - - - /** - * hdfsTell - Get the current offset in the file, in bytes. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Current offset, -1 on error. - */ - tOffset hdfsTell(hdfsFS fs, hdfsFile file); - - - /** - * hdfsRead - Read data from an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param buffer The buffer to copy read bytes into. - * @param length The length of the buffer. - * @return On success, a positive number indicating how many bytes - * were read. - * On end-of-file, 0. - * On error, -1. Errno will be set to the error code. - * Just like the POSIX read function, hdfsRead will return -1 - * and set errno to EINTR if data is temporarily unavailable, - * but we are not yet at the end of the file. - */ - tSize hdfsRead(hdfsFS fs, hdfsFile file, void* buffer, tSize length); - - /** - * hdfsPread - Positional read of data from an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param position Position from which to read - * @param buffer The buffer to copy read bytes into. - * @param length The length of the buffer. - * @return See hdfsRead - */ - tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, - void* buffer, tSize length); - - - /** - * hdfsWrite - Write data into an open file. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @param buffer The data. - * @param length The no. of bytes to write. - * @return Returns the number of bytes written, -1 on error. - */ - tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void* buffer, - tSize length); - - - /** - * hdfsWrite - Flush the data. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns 0 on success, -1 on error. - */ - int hdfsFlush(hdfsFS fs, hdfsFile file); - - - /** - * hdfsHFlush - Flush out the data in client's user buffer. After the - * return of this call, new readers will see the data. - * @param fs configured filesystem handle - * @param file file handle - * @return 0 on success, -1 on error and sets errno - */ - int hdfsHFlush(hdfsFS fs, hdfsFile file); - - - /** - * hdfsHSync - Similar to posix fsync, Flush out the data in client's - * user buffer. all the way to the disk device (but the disk may have - * it in its cache). - * @param fs configured filesystem handle - * @param file file handle - * @return 0 on success, -1 on error and sets errno - */ - int hdfsHSync(hdfsFS fs, hdfsFile file); - - - /** - * hdfsAvailable - Number of bytes that can be read from this - * input stream without blocking. - * @param fs The configured filesystem handle. - * @param file The file handle. - * @return Returns available bytes; -1 on error. - */ - int hdfsAvailable(hdfsFS fs, hdfsFile file); - - - /** - * hdfsCopy - Copy file from one filesystem to another. - * @param srcFS The handle to source filesystem. - * @param src The path of source file. - * @param dstFS The handle to destination filesystem. - * @param dst The path of destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsCopy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); - - - /** - * hdfsMove - Move file from one filesystem to another. - * @param srcFS The handle to source filesystem. - * @param src The path of source file. - * @param dstFS The handle to destination filesystem. - * @param dst The path of destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsMove(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); - - - /** - * hdfsDelete - Delete file. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @param recursive if path is a directory and set to - * non-zero, the directory is deleted else throws an exception. In - * case of a file the recursive argument is irrelevant. - * @return Returns 0 on success, -1 on error. - */ - int hdfsDelete(hdfsFS fs, const char* path, int recursive); - - /** - * hdfsRename - Rename file. - * @param fs The configured filesystem handle. - * @param oldPath The path of the source file. - * @param newPath The path of the destination file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath); - - - /** - * hdfsGetWorkingDirectory - Get the current working directory for - * the given filesystem. - * @param fs The configured filesystem handle. - * @param buffer The user-buffer to copy path of cwd into. - * @param bufferSize The length of user-buffer. - * @return Returns buffer, NULL on error. - */ - char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize); - - - /** - * hdfsSetWorkingDirectory - Set the working directory. All relative - * paths will be resolved relative to it. - * @param fs The configured filesystem handle. - * @param path The path of the new 'cwd'. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSetWorkingDirectory(hdfsFS fs, const char* path); - - - /** - * hdfsCreateDirectory - Make the given file and all non-existent - * parents into directories. - * @param fs The configured filesystem handle. - * @param path The path of the directory. - * @return Returns 0 on success, -1 on error. - */ - int hdfsCreateDirectory(hdfsFS fs, const char* path); - - - /** - * hdfsSetReplication - Set the replication of the specified - * file to the supplied value - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @return Returns 0 on success, -1 on error. - */ - int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication); - - - /** - * hdfsFileInfo - Information about a file/directory. - */ - typedef struct { - tObjectKind mKind; /* file or directory */ - char *mName; /* the name of the file */ - tTime mLastMod; /* the last modification time for the file in seconds */ - tOffset mSize; /* the size of the file in bytes */ - short mReplication; /* the count of replicas */ - tOffset mBlockSize; /* the block size for the file */ - char *mOwner; /* the owner of the file */ - char *mGroup; /* the group associated with the file */ - short mPermissions; /* the permissions associated with the file */ - tTime mLastAccess; /* the last access time for the file in seconds */ - } hdfsFileInfo; - - - /** - * hdfsListDirectory - Get list of files/directories for a given - * directory-path. hdfsFreeFileInfo should be called to deallocate memory. - * @param fs The configured filesystem handle. - * @param path The path of the directory. - * @param numEntries Set to the number of files/directories in path. - * @return Returns a dynamically-allocated array of hdfsFileInfo - * objects; NULL on error. - */ - hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path, - int *numEntries); - - - /** - * hdfsGetPathInfo - Get information about a path as a (dynamically - * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be - * called when the pointer is no longer needed. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @return Returns a dynamically-allocated hdfsFileInfo object; - * NULL on error. - */ - hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path); - - - /** - * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) - * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo - * objects. - * @param numEntries The size of the array. - */ - void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries); - - - /** - * hdfsGetHosts - Get hostnames where a particular block (determined by - * pos & blocksize) of a file is stored. The last element in the array - * is NULL. Due to replication, a single block could be present on - * multiple hosts. - * @param fs The configured filesystem handle. - * @param path The path of the file. - * @param start The start of the block. - * @param length The length of the block. - * @return Returns a dynamically-allocated 2-d array of blocks-hosts; - * NULL on error. - */ - char*** hdfsGetHosts(hdfsFS fs, const char* path, - tOffset start, tOffset length); - - - /** - * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts - * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo - * objects. - * @param numEntries The size of the array. - */ - void hdfsFreeHosts(char ***blockHosts); - - - /** - * hdfsGetDefaultBlockSize - Get the default blocksize. - * - * @param fs The configured filesystem handle. - * @deprecated Use hdfsGetDefaultBlockSizeAtPath instead. - * - * @return Returns the default blocksize, or -1 on error. - */ - tOffset hdfsGetDefaultBlockSize(hdfsFS fs); - - - /** - * hdfsGetDefaultBlockSizeAtPath - Get the default blocksize at the - * filesystem indicated by a given path. - * - * @param fs The configured filesystem handle. - * @param path The given path will be used to locate the actual - * filesystem. The full path does not have to exist. - * - * @return Returns the default blocksize, or -1 on error. - */ - tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path); - - - /** - * hdfsGetCapacity - Return the raw capacity of the filesystem. - * @param fs The configured filesystem handle. - * @return Returns the raw-capacity; -1 on error. - */ - tOffset hdfsGetCapacity(hdfsFS fs); - - - /** - * hdfsGetUsed - Return the total raw size of all files in the filesystem. - * @param fs The configured filesystem handle. - * @return Returns the total-size; -1 on error. - */ - tOffset hdfsGetUsed(hdfsFS fs); - - /** - * Change the user and/or group of a file or directory. - * - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param owner User string. Set to NULL for 'no change' - * @param group Group string. Set to NULL for 'no change' - * @return 0 on success else -1 - */ - int hdfsChown(hdfsFS fs, const char* path, const char *owner, - const char *group); - - /** - * hdfsChmod - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param mode the bitmask to set it to - * @return 0 on success else -1 - */ - int hdfsChmod(hdfsFS fs, const char* path, short mode); - - /** - * hdfsUtime - * @param fs The configured filesystem handle. - * @param path the path to the file or directory - * @param mtime new modification time or -1 for no change - * @param atime new access time or -1 for no change - * @return 0 on success else -1 - */ - int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime); - - /** - * Allocate a zero-copy options structure. - * - * You must free all options structures allocated with this function using - * hadoopRzOptionsFree. - * - * @return A zero-copy options structure, or NULL if one could - * not be allocated. If NULL is returned, errno will - * contain the error number. - */ - struct hadoopRzOptions *hadoopRzOptionsAlloc(void); - - /** - * Determine whether we should skip checksums in read0. - * - * @param opts The options structure. - * @param skip Nonzero to skip checksums sometimes; zero to always - * check them. - * - * @return 0 on success; -1 plus errno on failure. - */ - int hadoopRzOptionsSetSkipChecksum( - struct hadoopRzOptions *opts, int skip); - - /** - * Set the ByteBufferPool to use with read0. - * - * @param opts The options structure. - * @param className If this is NULL, we will not use any - * ByteBufferPool. If this is non-NULL, it will be - * treated as the name of the pool class to use. - * For example, you can use - * ELASTIC_BYTE_BUFFER_POOL_CLASS. - * - * @return 0 if the ByteBufferPool class was found and - * instantiated; - * -1 plus errno otherwise. - */ - int hadoopRzOptionsSetByteBufferPool( - struct hadoopRzOptions *opts, const char *className); - - /** - * Free a hadoopRzOptionsFree structure. - * - * @param opts The options structure to free. - * Any associated ByteBufferPool will also be freed. - */ - void hadoopRzOptionsFree(struct hadoopRzOptions *opts); - - /** - * Perform a byte buffer read. - * If possible, this will be a zero-copy (mmap) read. - * - * @param file The file to read from. - * @param opts An options structure created by hadoopRzOptionsAlloc. - * @param maxLength The maximum length to read. We may read fewer bytes - * than this length. - * - * @return On success, we will return a new hadoopRzBuffer. - * This buffer will continue to be valid and readable - * until it is released by readZeroBufferFree. Failure to - * release a buffer will lead to a memory leak. - * You can access the data within the hadoopRzBuffer with - * hadoopRzBufferGet. If you have reached EOF, the data - * within the hadoopRzBuffer will be NULL. You must still - * free hadoopRzBuffer instances containing NULL. - * - * On failure, we will return NULL plus an errno code. - * errno = EOPNOTSUPP indicates that we could not do a - * zero-copy read, and there was no ByteBufferPool - * supplied. - */ - struct hadoopRzBuffer* hadoopReadZero(hdfsFile file, - struct hadoopRzOptions *opts, int32_t maxLength); - - /** - * Determine the length of the buffer returned from readZero. - * - * @param buffer a buffer returned from readZero. - * @return the length of the buffer. - */ - int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer); - - /** - * Get a pointer to the raw buffer returned from readZero. - * - * To find out how many bytes this buffer contains, call - * hadoopRzBufferLength. - * - * @param buffer a buffer returned from readZero. - * @return a pointer to the start of the buffer. This will be - * NULL when end-of-file has been reached. - */ - const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer); - - /** - * Release a buffer obtained through readZero. - * - * @param file The hdfs stream that created this buffer. This must be - * the same stream you called hadoopReadZero on. - * @param buffer The buffer to release. - */ - void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer); +/** + * Some utility decls used in libhdfs. + */ +struct hdfsBuilder; +typedef int32_t tSize; /// size of data for read/write io ops +typedef time_t tTime; /// time type in seconds +typedef int64_t tOffset; /// offset within the file +typedef uint16_t tPort; /// port +typedef enum tObjectKind { + kObjectKindFile = 'F', + kObjectKindDirectory = 'D', +} tObjectKind; + +/** + * The C reflection of org.apache.org.hadoop.FileSystem . + */ +struct hdfs_internal; +typedef struct hdfs_internal *hdfsFS; + +struct hdfsFile_internal; +typedef struct hdfsFile_internal *hdfsFile; + +struct hadoopRzOptions; + +struct hadoopRzBuffer; + +/** + * Determine if a file is open for read. + * + * @param file The HDFS file + * @return 1 if the file is open for read; 0 otherwise + */ +int hdfsFileIsOpenForRead(hdfsFile file); + +/** + * Determine if a file is open for write. + * + * @param file The HDFS file + * @return 1 if the file is open for write; 0 otherwise + */ +int hdfsFileIsOpenForWrite(hdfsFile file); + +struct hdfsReadStatistics { + uint64_t totalBytesRead; + uint64_t totalLocalBytesRead; + uint64_t totalShortCircuitBytesRead; + uint64_t totalZeroCopyBytesRead; +}; + +/** + * Get read statistics about a file. This is only applicable to files + * opened for reading. + * + * @param file The HDFS file + * @param stats (out parameter) on a successful return, the read + * statistics. Unchanged otherwise. You must free the + * returned statistics with hdfsFileFreeReadStatistics. + * @return 0 if the statistics were successfully returned, + * -1 otherwise. On a failure, please check errno against + * ENOTSUP. webhdfs, LocalFilesystem, and so forth may + * not support read statistics. + */ +int hdfsFileGetReadStatistics(hdfsFile file, struct hdfsReadStatistics **stats); + +/** + * @param stats HDFS read statistics for a file. + * + * @return the number of remote bytes read. + */ +int64_t hdfsReadStatisticsGetRemoteBytesRead(const struct hdfsReadStatistics *stats); + +/** + * Free some HDFS read statistics. + * + * @param stats The HDFS read statistics to free. + */ +void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats); + +/** + * hdfsConnectAsUser - Connect to a hdfs file system as a specific user + * Connect to the hdfs. + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @param user the user name (this is hadoop domain user). Or NULL is equivelant + * to hhdfsConnect(host, port) + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ +hdfsFS hdfsConnectAsUser(const char *nn, tPort port, const char *user); + +/** + * hdfsConnect - Connect to a hdfs file system. + * Connect to the hdfs. + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ +hdfsFS hdfsConnect(const char *nn, tPort port); + +/** + * hdfsConnect - Connect to an hdfs file system. + * + * Forces a new instance to be created + * + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @param user The user name to use when connecting + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ +hdfsFS hdfsConnectAsUserNewInstance(const char *nn, tPort port, const char *user); + +/** + * hdfsConnect - Connect to an hdfs file system. + * + * Forces a new instance to be created + * + * @param nn The NameNode. See hdfsBuilderSetNameNode for details. + * @param port The port on which the server is listening. + * @return Returns a handle to the filesystem or NULL on error. + * @deprecated Use hdfsBuilderConnect instead. + */ +hdfsFS hdfsConnectNewInstance(const char *nn, tPort port); + +/** + * Connect to HDFS using the parameters defined by the builder. + * + * The HDFS builder will be freed, whether or not the connection was + * successful. + * + * Every successful call to hdfsBuilderConnect should be matched with a call + * to hdfsDisconnect, when the hdfsFS is no longer needed. + * + * @param bld The HDFS builder + * @return Returns a handle to the filesystem, or NULL on error. + */ +hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld); + +/** + * Create an HDFS builder. + * + * @return The HDFS builder, or NULL on error. + */ +struct hdfsBuilder *hdfsNewBuilder(void); + +/** + * Force the builder to always create a new instance of the FileSystem, + * rather than possibly finding one in the cache. + * + * @param bld The HDFS builder + */ +void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld); + +/** + * Set the HDFS NameNode to connect to. + * + * @param bld The HDFS builder + * @param nn The NameNode to use. + * + * If the string given is 'default', the default NameNode + * configuration will be used (from the XML configuration files) + * + * If NULL is given, a LocalFileSystem will be created. + * + * If the string starts with a protocol type such as file:// or + * hdfs://, this protocol type will be used. If not, the + * hdfs:// protocol type will be used. + * + * You may specify a NameNode port in the usual way by + * passing a string of the format hdfs://:. + * Alternately, you may set the port with + * hdfsBuilderSetNameNodePort. However, you must not pass the + * port in two different ways. + */ +void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn); + +/** + * Set the port of the HDFS NameNode to connect to. + * + * @param bld The HDFS builder + * @param port The port. + */ +void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port); + +/** + * Set the username to use when connecting to the HDFS cluster. + * + * @param bld The HDFS builder + * @param userName The user name. The string will be shallow-copied. + */ +void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName); + +/** + * Set the path to the Kerberos ticket cache to use when connecting to + * the HDFS cluster. + * + * @param bld The HDFS builder + * @param kerbTicketCachePath The Kerberos ticket cache path. The string + * will be shallow-copied. + */ +void hdfsBuilderSetKerbTicketCachePath(struct hdfsBuilder *bld, const char *kerbTicketCachePath); + +/** + * Free an HDFS builder. + * + * It is normally not necessary to call this function since + * hdfsBuilderConnect frees the builder. + * + * @param bld The HDFS builder + */ +void hdfsFreeBuilder(struct hdfsBuilder *bld); + +/** + * Set a configuration string for an HdfsBuilder. + * + * @param key The key to set. + * @param val The value, or NULL to set no value. + * This will be shallow-copied. You are responsible for + * ensuring that it remains valid until the builder is + * freed. + * + * @return 0 on success; nonzero error code otherwise. + */ +int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key, const char *val); + +/** + * Get a configuration string. + * + * @param key The key to find + * @param val (out param) The value. This will be set to NULL if the + * key isn't found. You must free this string with + * hdfsConfStrFree. + * + * @return 0 on success; nonzero error code otherwise. + * Failure to find the key is not an error. + */ +int hdfsConfGetStr(const char *key, char **val); + +/** + * Get a configuration integer. + * + * @param key The key to find + * @param val (out param) The value. This will NOT be changed if the + * key isn't found. + * + * @return 0 on success; nonzero error code otherwise. + * Failure to find the key is not an error. + */ +int hdfsConfGetInt(const char *key, int32_t *val); + +/** + * Free a configuration string found with hdfsConfGetStr. + * + * @param val A configuration string obtained from hdfsConfGetStr + */ +void hdfsConfStrFree(char *val); + +/** + * hdfsDisconnect - Disconnect from the hdfs file system. + * Disconnect from hdfs. + * @param fs The configured filesystem handle. + * @return Returns 0 on success, -1 on error. + * Even if there is an error, the resources associated with the + * hdfsFS will be freed. + */ +int hdfsDisconnect(hdfsFS fs); + +/** + * hdfsOpenFile - Open a hdfs file in given mode. + * @param fs The configured filesystem handle. + * @param path The full path to the file. + * @param flags - an | of bits/fcntl.h file flags - supported flags are + * O_RDONLY, O_WRONLY (meaning create or overwrite i.e., implies O_TRUNCAT), + * O_WRONLY|O_APPEND. Other flags are generally ignored other than (O_RDWR || + * (O_EXCL & O_CREAT)) which return NULL and set errno equal ENOTSUP. + * @param bufferSize Size of buffer for read/write - pass 0 if you want + * to use the default configured values. + * @param replication Block replication - pass 0 if you want to use + * the default configured values. + * @param blocksize Size of block - pass 0 if you want to use the + * default configured values. + * @return Returns the handle to the open file or NULL on error. + */ +hdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags, int bufferSize, short replication, + tSize blocksize); + +/** + * hdfsCloseFile - Close an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + * On error, errno will be set appropriately. + * If the hdfs file was valid, the memory associated with it will + * be freed at the end of this call, even if there was an I/O + * error. + */ +int hdfsCloseFile(hdfsFS fs, hdfsFile file); + +/** + * hdfsExists - Checks if a given path exsits on the filesystem + * @param fs The configured filesystem handle. + * @param path The path to look for + * @return Returns 0 on success, -1 on error. + */ +int hdfsExists(hdfsFS fs, const char *path); + +/** + * hdfsSeek - Seek to given offset in file. + * This works only for files opened in read-only mode. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param desiredPos Offset into the file to seek into. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos); + +/** + * hdfsTell - Get the current offset in the file, in bytes. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Current offset, -1 on error. + */ +tOffset hdfsTell(hdfsFS fs, hdfsFile file); + +/** + * hdfsRead - Read data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return On success, a positive number indicating how many bytes + * were read. + * On end-of-file, 0. + * On error, -1. Errno will be set to the error code. + * Just like the POSIX read function, hdfsRead will return -1 + * and set errno to EINTR if data is temporarily unavailable, + * but we are not yet at the end of the file. + */ +tSize hdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length); + +/** + * hdfsPread - Positional read of data from an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param position Position from which to read + * @param buffer The buffer to copy read bytes into. + * @param length The length of the buffer. + * @return See hdfsRead + */ +tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, void *buffer, tSize length); + +/** + * hdfsWrite - Write data into an open file. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @param buffer The data. + * @param length The no. of bytes to write. + * @return Returns the number of bytes written, -1 on error. + */ +tSize hdfsWrite(hdfsFS fs, hdfsFile file, const void *buffer, tSize length); + +/** + * hdfsWrite - Flush the data. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns 0 on success, -1 on error. + */ +int hdfsFlush(hdfsFS fs, hdfsFile file); + +/** + * hdfsHFlush - Flush out the data in client's user buffer. After the + * return of this call, new readers will see the data. + * @param fs configured filesystem handle + * @param file file handle + * @return 0 on success, -1 on error and sets errno + */ +int hdfsHFlush(hdfsFS fs, hdfsFile file); + +/** + * hdfsHSync - Similar to posix fsync, Flush out the data in client's + * user buffer. all the way to the disk device (but the disk may have + * it in its cache). + * @param fs configured filesystem handle + * @param file file handle + * @return 0 on success, -1 on error and sets errno + */ +int hdfsHSync(hdfsFS fs, hdfsFile file); + +/** + * hdfsAvailable - Number of bytes that can be read from this + * input stream without blocking. + * @param fs The configured filesystem handle. + * @param file The file handle. + * @return Returns available bytes; -1 on error. + */ +int hdfsAvailable(hdfsFS fs, hdfsFile file); + +/** + * hdfsCopy - Copy file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsCopy(hdfsFS srcFS, const char *src, hdfsFS dstFS, const char *dst); + +/** + * hdfsMove - Move file from one filesystem to another. + * @param srcFS The handle to source filesystem. + * @param src The path of source file. + * @param dstFS The handle to destination filesystem. + * @param dst The path of destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsMove(hdfsFS srcFS, const char *src, hdfsFS dstFS, const char *dst); + +/** + * hdfsDelete - Delete file. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @param recursive if path is a directory and set to + * non-zero, the directory is deleted else throws an exception. In + * case of a file the recursive argument is irrelevant. + * @return Returns 0 on success, -1 on error. + */ +int hdfsDelete(hdfsFS fs, const char *path, int recursive); + +/** + * hdfsRename - Rename file. + * @param fs The configured filesystem handle. + * @param oldPath The path of the source file. + * @param newPath The path of the destination file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsRename(hdfsFS fs, const char *oldPath, const char *newPath); + +/** + * hdfsGetWorkingDirectory - Get the current working directory for + * the given filesystem. + * @param fs The configured filesystem handle. + * @param buffer The user-buffer to copy path of cwd into. + * @param bufferSize The length of user-buffer. + * @return Returns buffer, NULL on error. + */ +char *hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize); + +/** + * hdfsSetWorkingDirectory - Set the working directory. All relative + * paths will be resolved relative to it. + * @param fs The configured filesystem handle. + * @param path The path of the new 'cwd'. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSetWorkingDirectory(hdfsFS fs, const char *path); + +/** + * hdfsCreateDirectory - Make the given file and all non-existent + * parents into directories. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @return Returns 0 on success, -1 on error. + */ +int hdfsCreateDirectory(hdfsFS fs, const char *path); + +/** + * hdfsSetReplication - Set the replication of the specified + * file to the supplied value + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns 0 on success, -1 on error. + */ +int hdfsSetReplication(hdfsFS fs, const char *path, int16_t replication); + +/** + * hdfsFileInfo - Information about a file/directory. + */ +typedef struct { + tObjectKind mKind; /* file or directory */ + char *mName; /* the name of the file */ + tTime mLastMod; /* the last modification time for the file in seconds */ + tOffset mSize; /* the size of the file in bytes */ + short mReplication; /* the count of replicas */ + tOffset mBlockSize; /* the block size for the file */ + char *mOwner; /* the owner of the file */ + char *mGroup; /* the group associated with the file */ + short mPermissions; /* the permissions associated with the file */ + tTime mLastAccess; /* the last access time for the file in seconds */ +} hdfsFileInfo; + +/** + * hdfsListDirectory - Get list of files/directories for a given + * directory-path. hdfsFreeFileInfo should be called to deallocate memory. + * @param fs The configured filesystem handle. + * @param path The path of the directory. + * @param numEntries Set to the number of files/directories in path. + * @return Returns a dynamically-allocated array of hdfsFileInfo + * objects; NULL on error. + */ +hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char *path, int *numEntries); + +/** + * hdfsGetPathInfo - Get information about a path as a (dynamically + * allocated) single hdfsFileInfo struct. hdfsFreeFileInfo should be + * called when the pointer is no longer needed. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @return Returns a dynamically-allocated hdfsFileInfo object; + * NULL on error. + */ +hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char *path); + +/** + * hdfsFreeFileInfo - Free up the hdfsFileInfo array (including fields) + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ +void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries); + +/** + * hdfsGetHosts - Get hostnames where a particular block (determined by + * pos & blocksize) of a file is stored. The last element in the array + * is NULL. Due to replication, a single block could be present on + * multiple hosts. + * @param fs The configured filesystem handle. + * @param path The path of the file. + * @param start The start of the block. + * @param length The length of the block. + * @return Returns a dynamically-allocated 2-d array of blocks-hosts; + * NULL on error. + */ +char ***hdfsGetHosts(hdfsFS fs, const char *path, tOffset start, tOffset length); + +/** + * hdfsFreeHosts - Free up the structure returned by hdfsGetHosts + * @param hdfsFileInfo The array of dynamically-allocated hdfsFileInfo + * objects. + * @param numEntries The size of the array. + */ +void hdfsFreeHosts(char ***blockHosts); + +/** + * hdfsGetDefaultBlockSize - Get the default blocksize. + * + * @param fs The configured filesystem handle. + * @deprecated Use hdfsGetDefaultBlockSizeAtPath instead. + * + * @return Returns the default blocksize, or -1 on error. + */ +tOffset hdfsGetDefaultBlockSize(hdfsFS fs); + +/** + * hdfsGetDefaultBlockSizeAtPath - Get the default blocksize at the + * filesystem indicated by a given path. + * + * @param fs The configured filesystem handle. + * @param path The given path will be used to locate the actual + * filesystem. The full path does not have to exist. + * + * @return Returns the default blocksize, or -1 on error. + */ +tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path); + +/** + * hdfsGetCapacity - Return the raw capacity of the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the raw-capacity; -1 on error. + */ +tOffset hdfsGetCapacity(hdfsFS fs); + +/** + * hdfsGetUsed - Return the total raw size of all files in the filesystem. + * @param fs The configured filesystem handle. + * @return Returns the total-size; -1 on error. + */ +tOffset hdfsGetUsed(hdfsFS fs); + +/** + * Change the user and/or group of a file or directory. + * + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param owner User string. Set to NULL for 'no change' + * @param group Group string. Set to NULL for 'no change' + * @return 0 on success else -1 + */ +int hdfsChown(hdfsFS fs, const char *path, const char *owner, const char *group); + +/** + * hdfsChmod + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mode the bitmask to set it to + * @return 0 on success else -1 + */ +int hdfsChmod(hdfsFS fs, const char *path, short mode); + +/** + * hdfsUtime + * @param fs The configured filesystem handle. + * @param path the path to the file or directory + * @param mtime new modification time or -1 for no change + * @param atime new access time or -1 for no change + * @return 0 on success else -1 + */ +int hdfsUtime(hdfsFS fs, const char *path, tTime mtime, tTime atime); + +/** + * Allocate a zero-copy options structure. + * + * You must free all options structures allocated with this function using + * hadoopRzOptionsFree. + * + * @return A zero-copy options structure, or NULL if one could + * not be allocated. If NULL is returned, errno will + * contain the error number. + */ +struct hadoopRzOptions *hadoopRzOptionsAlloc(void); + +/** + * Determine whether we should skip checksums in read0. + * + * @param opts The options structure. + * @param skip Nonzero to skip checksums sometimes; zero to always + * check them. + * + * @return 0 on success; -1 plus errno on failure. + */ +int hadoopRzOptionsSetSkipChecksum(struct hadoopRzOptions *opts, int skip); + +/** + * Set the ByteBufferPool to use with read0. + * + * @param opts The options structure. + * @param className If this is NULL, we will not use any + * ByteBufferPool. If this is non-NULL, it will be + * treated as the name of the pool class to use. + * For example, you can use + * ELASTIC_BYTE_BUFFER_POOL_CLASS. + * + * @return 0 if the ByteBufferPool class was found and + * instantiated; + * -1 plus errno otherwise. + */ +int hadoopRzOptionsSetByteBufferPool(struct hadoopRzOptions *opts, const char *className); + +/** + * Free a hadoopRzOptionsFree structure. + * + * @param opts The options structure to free. + * Any associated ByteBufferPool will also be freed. + */ +void hadoopRzOptionsFree(struct hadoopRzOptions *opts); + +/** + * Perform a byte buffer read. + * If possible, this will be a zero-copy (mmap) read. + * + * @param file The file to read from. + * @param opts An options structure created by hadoopRzOptionsAlloc. + * @param maxLength The maximum length to read. We may read fewer bytes + * than this length. + * + * @return On success, we will return a new hadoopRzBuffer. + * This buffer will continue to be valid and readable + * until it is released by readZeroBufferFree. Failure to + * release a buffer will lead to a memory leak. + * You can access the data within the hadoopRzBuffer with + * hadoopRzBufferGet. If you have reached EOF, the data + * within the hadoopRzBuffer will be NULL. You must still + * free hadoopRzBuffer instances containing NULL. + * + * On failure, we will return NULL plus an errno code. + * errno = EOPNOTSUPP indicates that we could not do a + * zero-copy read, and there was no ByteBufferPool + * supplied. + */ +struct hadoopRzBuffer *hadoopReadZero(hdfsFile file, struct hadoopRzOptions *opts, + int32_t maxLength); + +/** + * Determine the length of the buffer returned from readZero. + * + * @param buffer a buffer returned from readZero. + * @return the length of the buffer. + */ +int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer); + +/** + * Get a pointer to the raw buffer returned from readZero. + * + * To find out how many bytes this buffer contains, call + * hadoopRzBufferLength. + * + * @param buffer a buffer returned from readZero. + * @return a pointer to the start of the buffer. This will be + * NULL when end-of-file has been reached. + */ +const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer); + +/** + * Release a buffer obtained through readZero. + * + * @param file The hdfs stream that created this buffer. This must be + * the same stream you called hadoopReadZero on. + * @param buffer The buffer to release. + */ +void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer); #ifdef __cplusplus } diff --git a/src/leveldb/include/leveldb/c.h b/src/leveldb/include/leveldb/c.h index c5f552a37..ef16a6968 100644 --- a/src/leveldb/include/leveldb/c.h +++ b/src/leveldb/include/leveldb/c.h @@ -55,100 +55,68 @@ extern "C" { /* Exported types */ -typedef struct leveldb_t leveldb_t; -typedef struct leveldb_cache_t leveldb_cache_t; -typedef struct leveldb_comparator_t leveldb_comparator_t; -typedef struct leveldb_env_t leveldb_env_t; -typedef struct leveldb_filelock_t leveldb_filelock_t; -typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t; -typedef struct leveldb_iterator_t leveldb_iterator_t; -typedef struct leveldb_logger_t leveldb_logger_t; -typedef struct leveldb_options_t leveldb_options_t; -typedef struct leveldb_randomfile_t leveldb_randomfile_t; -typedef struct leveldb_readoptions_t leveldb_readoptions_t; -typedef struct leveldb_seqfile_t leveldb_seqfile_t; -typedef struct leveldb_snapshot_t leveldb_snapshot_t; -typedef struct leveldb_writablefile_t leveldb_writablefile_t; -typedef struct leveldb_writebatch_t leveldb_writebatch_t; -typedef struct leveldb_writeoptions_t leveldb_writeoptions_t; +typedef struct leveldb_t leveldb_t; +typedef struct leveldb_cache_t leveldb_cache_t; +typedef struct leveldb_comparator_t leveldb_comparator_t; +typedef struct leveldb_env_t leveldb_env_t; +typedef struct leveldb_filelock_t leveldb_filelock_t; +typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t; +typedef struct leveldb_iterator_t leveldb_iterator_t; +typedef struct leveldb_logger_t leveldb_logger_t; +typedef struct leveldb_options_t leveldb_options_t; +typedef struct leveldb_randomfile_t leveldb_randomfile_t; +typedef struct leveldb_readoptions_t leveldb_readoptions_t; +typedef struct leveldb_seqfile_t leveldb_seqfile_t; +typedef struct leveldb_snapshot_t leveldb_snapshot_t; +typedef struct leveldb_writablefile_t leveldb_writablefile_t; +typedef struct leveldb_writebatch_t leveldb_writebatch_t; +typedef struct leveldb_writeoptions_t leveldb_writeoptions_t; /* DB operations */ -extern leveldb_t* leveldb_open( - const leveldb_options_t* options, - const char* name, - char** errptr); +extern leveldb_t* leveldb_open(const leveldb_options_t* options, const char* name, char** errptr); extern void leveldb_close(leveldb_t* db); -extern void leveldb_put( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - const char* val, size_t vallen, - char** errptr); - -extern void leveldb_delete( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - char** errptr); - -extern void leveldb_write( - leveldb_t* db, - const leveldb_writeoptions_t* options, - leveldb_writebatch_t* batch, - char** errptr); +extern void leveldb_put(leveldb_t* db, const leveldb_writeoptions_t* options, const char* key, + size_t keylen, const char* val, size_t vallen, char** errptr); + +extern void leveldb_delete(leveldb_t* db, const leveldb_writeoptions_t* options, const char* key, + size_t keylen, char** errptr); + +extern void leveldb_write(leveldb_t* db, const leveldb_writeoptions_t* options, + leveldb_writebatch_t* batch, char** errptr); /* Returns NULL if not found. A malloc()ed array otherwise. Stores the length of the array in *vallen. */ -extern char* leveldb_get( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, size_t keylen, - size_t* vallen, - char** errptr); +extern char* leveldb_get(leveldb_t* db, const leveldb_readoptions_t* options, const char* key, + size_t keylen, size_t* vallen, char** errptr); -extern leveldb_iterator_t* leveldb_create_iterator( - leveldb_t* db, - const leveldb_readoptions_t* options); +extern leveldb_iterator_t* leveldb_create_iterator(leveldb_t* db, + const leveldb_readoptions_t* options); -extern const leveldb_snapshot_t* leveldb_create_snapshot( - leveldb_t* db); +extern const leveldb_snapshot_t* leveldb_create_snapshot(leveldb_t* db); -extern void leveldb_release_snapshot( - leveldb_t* db, - const leveldb_snapshot_t* snapshot); +extern void leveldb_release_snapshot(leveldb_t* db, const leveldb_snapshot_t* snapshot); /* Returns NULL if property name is unknown. Else returns a pointer to a malloc()-ed null-terminated value. */ -extern char* leveldb_property_value( - leveldb_t* db, - const char* propname); - -extern void leveldb_approximate_sizes( - leveldb_t* db, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes); - -extern void leveldb_compact_range( - leveldb_t* db, - const char* start_key, size_t start_key_len, - const char* limit_key, size_t limit_key_len); +extern char* leveldb_property_value(leveldb_t* db, const char* propname); + +extern void leveldb_approximate_sizes(leveldb_t* db, int num_ranges, + const char* const* range_start_key, + const size_t* range_start_key_len, + const char* const* range_limit_key, + const size_t* range_limit_key_len, uint64_t* sizes); + +extern void leveldb_compact_range(leveldb_t* db, const char* start_key, size_t start_key_len, + const char* limit_key, size_t limit_key_len); /* Management operations */ -extern void leveldb_destroy_db( - const leveldb_options_t* options, - const char* name, - char** errptr); +extern void leveldb_destroy_db(const leveldb_options_t* options, const char* name, char** errptr); -extern void leveldb_repair_db( - const leveldb_options_t* options, - const char* name, - char** errptr); +extern void leveldb_repair_db(const leveldb_options_t* options, const char* name, char** errptr); /* Iterator */ @@ -168,33 +136,22 @@ extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr); extern leveldb_writebatch_t* leveldb_writebatch_create(); extern void leveldb_writebatch_destroy(leveldb_writebatch_t*); extern void leveldb_writebatch_clear(leveldb_writebatch_t*); -extern void leveldb_writebatch_put( - leveldb_writebatch_t*, - const char* key, size_t klen, - const char* val, size_t vlen); -extern void leveldb_writebatch_delete( - leveldb_writebatch_t*, - const char* key, size_t klen); -extern void leveldb_writebatch_iterate( - leveldb_writebatch_t*, - void* state, - void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), - void (*deleted)(void*, const char* k, size_t klen)); +extern void leveldb_writebatch_put(leveldb_writebatch_t*, const char* key, size_t klen, + const char* val, size_t vlen); +extern void leveldb_writebatch_delete(leveldb_writebatch_t*, const char* key, size_t klen); +extern void leveldb_writebatch_iterate(leveldb_writebatch_t*, void* state, + void (*put)(void*, const char* k, size_t klen, const char* v, + size_t vlen), + void (*deleted)(void*, const char* k, size_t klen)); /* Options */ extern leveldb_options_t* leveldb_options_create(); extern void leveldb_options_destroy(leveldb_options_t*); -extern void leveldb_options_set_comparator( - leveldb_options_t*, - leveldb_comparator_t*); -extern void leveldb_options_set_filter_policy( - leveldb_options_t*, - leveldb_filterpolicy_t*); -extern void leveldb_options_set_error_if_exists( - leveldb_options_t*, unsigned char); -extern void leveldb_options_set_paranoid_checks( - leveldb_options_t*, unsigned char); +extern void leveldb_options_set_comparator(leveldb_options_t*, leveldb_comparator_t*); +extern void leveldb_options_set_filter_policy(leveldb_options_t*, leveldb_filterpolicy_t*); +extern void leveldb_options_set_error_if_exists(leveldb_options_t*, unsigned char); +extern void leveldb_options_set_paranoid_checks(leveldb_options_t*, unsigned char); extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*); extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*); extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t); @@ -203,63 +160,44 @@ extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*); extern void leveldb_options_set_block_size(leveldb_options_t*, size_t); extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int); -enum { - leveldb_no_compression = 0, - leveldb_snappy_compression = 1 -}; +enum { leveldb_no_compression = 0, leveldb_snappy_compression = 1 }; extern void leveldb_options_set_compression(leveldb_options_t*, int); /* Comparator */ -extern leveldb_comparator_t* leveldb_comparator_create( - void* state, - void (*destructor)(void*), - int (*compare)( - void*, - const char* a, size_t alen, - const char* b, size_t blen), - const char* (*name)(void*)); +extern leveldb_comparator_t* leveldb_comparator_create(void* state, void (*destructor)(void*), + int (*compare)(void*, const char* a, + size_t alen, const char* b, + size_t blen), + const char* (*name)(void*)); extern void leveldb_comparator_destroy(leveldb_comparator_t*); /* Filter policy */ extern leveldb_filterpolicy_t* leveldb_filterpolicy_create( - void* state, - void (*destructor)(void*), - char* (*create_filter)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length), + void* state, void (*destructor)(void*), + char* (*create_filter)(void*, const char* const* key_array, const size_t* key_length_array, + int num_keys, size_t* filter_length), + unsigned char (*key_may_match)(void*, const char* key, size_t length, const char* filter, + size_t filter_length), const char* (*name)(void*)); extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*); -extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom( - int bits_per_key); +extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key); /* Read options */ extern leveldb_readoptions_t* leveldb_readoptions_create(); extern void leveldb_readoptions_destroy(leveldb_readoptions_t*); -extern void leveldb_readoptions_set_verify_checksums( - leveldb_readoptions_t*, - unsigned char); -extern void leveldb_readoptions_set_fill_cache( - leveldb_readoptions_t*, unsigned char); -extern void leveldb_readoptions_set_snapshot( - leveldb_readoptions_t*, - const leveldb_snapshot_t*); +extern void leveldb_readoptions_set_verify_checksums(leveldb_readoptions_t*, unsigned char); +extern void leveldb_readoptions_set_fill_cache(leveldb_readoptions_t*, unsigned char); +extern void leveldb_readoptions_set_snapshot(leveldb_readoptions_t*, const leveldb_snapshot_t*); /* Write options */ extern leveldb_writeoptions_t* leveldb_writeoptions_create(); extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*); -extern void leveldb_writeoptions_set_sync( - leveldb_writeoptions_t*, unsigned char); +extern void leveldb_writeoptions_set_sync(leveldb_writeoptions_t*, unsigned char); /* Cache */ @@ -287,7 +225,7 @@ extern int leveldb_major_version(); extern int leveldb_minor_version(); #ifdef __cplusplus -} /* end extern "C" */ +} /* end extern "C" */ #endif -#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */ +#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */ diff --git a/src/leveldb/include/leveldb/cache.h b/src/leveldb/include/leveldb/cache.h index b34cea82d..caf6b4558 100644 --- a/src/leveldb/include/leveldb/cache.h +++ b/src/leveldb/include/leveldb/cache.h @@ -37,11 +37,11 @@ struct LRUHandle { LRUHandle* next_hash; LRUHandle* next; LRUHandle* prev; - size_t charge; // TODO(opt): Only allow uint32_t? + size_t charge; // TODO(opt): Only allow uint32_t? size_t key_length; uint32_t refs; uint32_t hash; // Hash of key(); used for fast sharding and comparisons - uint64_t cache_id; // cache id, user spec + uint64_t cache_id; // cache id, user spec char key_data[1]; // Beginning of key Slice key() const { @@ -62,14 +62,14 @@ extern Cache* NewBlockBasedCache(size_t capacity); class Cache { public: - Cache() : rep_(NULL) { } + Cache() : rep_(NULL) {} // Destroys all existing entries by calling the "deleter" // function that was passed to the constructor. virtual ~Cache(); // Opaque handle to an entry stored in the cache. - struct Handle { }; + struct Handle {}; // Insert a mapping from key->value into the cache and assign it // the specified charge against the total cache capacity. diff --git a/src/leveldb/include/leveldb/compact_strategy.h b/src/leveldb/include/leveldb/compact_strategy.h index b581c4a99..ed41ba9ad 100644 --- a/src/leveldb/include/leveldb/compact_strategy.h +++ b/src/leveldb/include/leveldb/compact_strategy.h @@ -20,97 +20,87 @@ class InternalKeyComparator; // determine whether user records are drop during // compaction. class CompactStrategy { -public: - virtual ~CompactStrategy() {} + public: + virtual ~CompactStrategy() {} - virtual const Comparator* RowKeyComparator() = 0; + virtual const Comparator* RowKeyComparator() = 0; - virtual bool Drop(const Slice& k, uint64_t n, - const std::string& lower_bound = "") = 0; + virtual void ExtractRowKey(const Slice& tera_key, std::string* row_key) = 0; - // tera-specific, based on all-level iterators. - // used in LowLevelScan - virtual bool ScanDrop(const Slice& k, uint64_t n) = 0; + virtual bool Drop(const Slice& k, uint64_t n, const std::string& lower_bound = "") = 0; - virtual bool ScanMergedValue(Iterator* it, std::string* merged_value, - int64_t* merged_num = NULL) = 0; + // tera-specific, based on all-level iterators. + // used in LowLevelScan + virtual bool ScanDrop(const Slice& k, uint64_t n) = 0; - virtual bool MergeAtomicOPs(Iterator* it, std::string* merged_value, - std::string* merged_key) = 0; + virtual bool ScanMergedValue(Iterator* it, std::string* merged_value, + int64_t* merged_num = NULL) = 0; - // Set snapshot for CompactStrategy so that tera will not drop data entries which - // are protected by snpashot - virtual void SetSnapshot(uint64_t snapshot) = 0; + virtual bool MergeAtomicOPs(Iterator* it, std::string* merged_value, std::string* merged_key) = 0; - virtual bool CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) = 0; + // Set snapshot for CompactStrategy so that tera will not drop data entries + // which + // are protected by snpashot + virtual void SetSnapshot(uint64_t snapshot) = 0; - virtual const char* Name() const = 0; -}; + virtual bool CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) = 0; + virtual const char* Name() const = 0; +}; class DummyCompactStrategy : public CompactStrategy { -public: - virtual ~DummyCompactStrategy() {} - - virtual const Comparator* RowKeyComparator() { return NULL;} - - virtual bool Drop(const Slice& k, uint64_t n, const std::string& lower_bound) { - return false; - } - - virtual bool ScanDrop(const Slice& k, uint64_t n) { - return false; - } - - virtual const char* Name() const { - return "leveldb.DummyCompactStrategy"; - } - - virtual void SetSnapshot(uint64_t snapshot) { - // snapshot is taken care of by leveldb - } - - virtual bool MergeAtomicOPs(Iterator* it, std::string* merged_value, - std::string* merged_key) { - return false; - } - - virtual bool ScanMergedValue(Iterator* it, std::string* merged_value, - int64_t* merged_num) { - return false; - } - - virtual bool CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) { - *del_tag = false; - *ttl_tag = -1; - return true; - } + public: + virtual ~DummyCompactStrategy() {} + + virtual const Comparator* RowKeyComparator() { return NULL; } + + virtual void ExtractRowKey(const Slice& tera_key, std::string* row_key) { + *row_key = tera_key.ToString(); + } + + virtual bool Drop(const Slice& k, uint64_t n, const std::string& lower_bound) { return false; } + + virtual bool ScanDrop(const Slice& k, uint64_t n) { return false; } + + virtual const char* Name() const { return "leveldb.DummyCompactStrategy"; } + + virtual void SetSnapshot(uint64_t snapshot) { + // snapshot is taken care of by leveldb + } + + virtual bool MergeAtomicOPs(Iterator* it, std::string* merged_value, std::string* merged_key) { + return false; + } + + virtual bool ScanMergedValue(Iterator* it, std::string* merged_value, int64_t* merged_num) { + return false; + } + + virtual bool CheckTag(const Slice& tera_key, bool* del_tag, int64_t* ttl_tag) { + *del_tag = false; + *ttl_tag = -1; + return true; + } }; // each strategy object has its own inner status or context, // so create anew one when needed. class CompactStrategyFactory { -public: - virtual ~CompactStrategyFactory() {} - virtual CompactStrategy* NewInstance() = 0; - virtual const char* Name() const = 0; - virtual void SetArg(const void* arg) = 0; + public: + virtual ~CompactStrategyFactory() {} + virtual CompactStrategy* NewInstance() = 0; + virtual const char* Name() const = 0; + virtual void SetArg(const void* arg) = 0; }; class DummyCompactStrategyFactory : public CompactStrategyFactory { -public: - virtual CompactStrategy* NewInstance() { - return new DummyCompactStrategy(); - } - virtual const char* Name() const { - return "leveldb.DummyCompactStrategyFactory"; - } - virtual void SetArg(const void* arg) {} + public: + virtual CompactStrategy* NewInstance() { return new DummyCompactStrategy(); } + virtual const char* Name() const { return "leveldb.DummyCompactStrategyFactory"; } + virtual void SetArg(const void* arg) {} }; -} // namespace leveldb - +} // namespace leveldb #endif // STORAGE_LEVELDB_INCLUDE_COMPACT_STRATEGY_H_ - diff --git a/src/leveldb/include/leveldb/comparator.h b/src/leveldb/include/leveldb/comparator.h index a8986712e..b48f3271d 100644 --- a/src/leveldb/include/leveldb/comparator.h +++ b/src/leveldb/include/leveldb/comparator.h @@ -48,9 +48,7 @@ class Comparator { // If *start < limit, changes *start to a short string in [start,limit). // Simple comparator implementations may return with *start unchanged, // i.e., an implementation of this method that does nothing is correct. - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const = 0; + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const = 0; // Changes *key to a short string >= *key. // Simple comparator implementations may return with *key unchanged, diff --git a/src/leveldb/include/leveldb/config.h b/src/leveldb/include/leveldb/config.h index a58dca1ff..1b191be17 100644 --- a/src/leveldb/include/leveldb/config.h +++ b/src/leveldb/include/leveldb/config.h @@ -19,10 +19,10 @@ static const int kNumLevels = 7; static const int kL0_CompactionTrigger = 2; // Soft limit on number of level-0 files. We slow down writes at this point. -static const int kL0_SlowdownWritesTrigger = (1 << 30);//12; +static const int kL0_SlowdownWritesTrigger = (1 << 30); // 12; // Maximum number of level-0 files. We stop writes at this point. -static const int kL0_StopWritesTrigger = (1 << 30);//20; +static const int kL0_StopWritesTrigger = (1 << 30); // 20; // Maximum level to which a new compacted memtable is pushed if it // does not create overlap. We try to push to level 2 to avoid the @@ -33,6 +33,6 @@ static const int kL0_StopWritesTrigger = (1 << 30);//20; static const int kMaxMemCompactLevel = 2; } // namespace config -} // namespace leveldb +} // namespace leveldb #endif \ No newline at end of file diff --git a/src/leveldb/include/leveldb/db.h b/src/leveldb/include/leveldb/db.h index b7350064d..96ca01253 100644 --- a/src/leveldb/include/leveldb/db.h +++ b/src/leveldb/include/leveldb/db.h @@ -28,11 +28,11 @@ class WriteBatch; // A range of keys struct Range { - Slice start; // Included in the range - Slice limit; // Not included in the range + Slice start; // Included in the range + Slice limit; // Not included in the range - Range() { } - Range(const Slice& s, const Slice& l) : start(s), limit(l) { } + Range() {} + Range(const Slice& s, const Slice& l) : start(s), limit(l) {} }; // A DB is a persistent ordered map from keys to values. @@ -40,23 +40,16 @@ struct Range { // any external synchronization. class DB { public: - enum State { - kNotOpen = 0, - kOpened = 1, - kShutdown1 = 2, - kShutdown2 = 3 - }; + enum State { kNotOpen = 0, kOpened = 1, kShutdown1 = 2, kShutdown2 = 3 }; // Open the database with the specified "name". // Stores a pointer to a heap-allocated database in *dbptr and returns // OK on success. // Stores NULL in *dbptr and returns a non-OK status on error. // Caller should delete *dbptr when it is no longer needed. - static Status Open(const Options& options, - const std::string& name, - DB** dbptr); + static Status Open(const Options& options, const std::string& name, DB** dbptr); - DB() { } + DB() {} virtual ~DB(); // Abort all background work and do some clean work. DB is free to @@ -69,9 +62,7 @@ class DB { // Set the database entry for "key" to "value". Returns OK on success, // and a non-OK status on error. // Note: consider setting options.sync = true. - virtual Status Put(const WriteOptions& options, - const Slice& key, - const Slice& value) = 0; + virtual Status Put(const WriteOptions& options, const Slice& key, const Slice& value) = 0; // Remove the database entry (if any) for "key". Returns OK on // success, and a non-OK status on error. It is not an error if "key" @@ -91,8 +82,7 @@ class DB { // a status for which Status::IsNotFound() returns true. // // May return some other Status on an error. - virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) = 0; + virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) = 0; // Return a heap-allocated iterator over the contents of the database. // The result of NewIterator() is initially invalid (caller must @@ -113,7 +103,8 @@ class DB { virtual void ReleaseSnapshot(uint64_t sequence_number) = 0; // Rollback to a spcific snapshot - virtual const uint64_t Rollback(uint64_t snapshot_seq, uint64_t rollback_point = kMaxSequenceNumber) = 0; + virtual const uint64_t Rollback(uint64_t snapshot_seq, + uint64_t rollback_point = kMaxSequenceNumber) = 0; // DB implementations can export properties about their state // via this method. If "property" is a valid property understood by this @@ -139,13 +130,13 @@ class DB { // sizes will be one-tenth the size of the corresponding user data size. // // The results may not include the sizes of recently written data. - virtual void GetApproximateSizes(const Range* range, int n, - uint64_t* sizes) = 0; + virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) = 0; // tera-specific // size: db size, include mem, imm, all sst files // lgsize: each lg size, include all storage - virtual void GetApproximateSizes(uint64_t* size, - std::vector* lgsize = NULL) = 0; + // mem_table_size: memtable's size, for analyzing memory usage. + virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL, + uint64_t* mem_table_size = NULL) = 0; // tera-specific // result: each level's total file size @@ -170,8 +161,7 @@ class DB { virtual void Workload(double* write_workload) = 0; virtual bool FindSplitKey(double ratio, std::string* split_key) = 0; - virtual bool FindKeyRange(std::string* smallest_key = NULL, - std::string* largest_key = NULL) = 0; + virtual bool FindKeyRange(std::string* smallest_key = NULL, std::string* largest_key = NULL) = 0; virtual bool MinorCompact() = 0; @@ -180,7 +170,7 @@ class DB { virtual bool ShouldForceUnloadOnError() { return false; } - // Default : False, + // Default : False, // Only activate the strategy for speeding up the process of shutdown DB. // Strategy : Always return True begin shutdown1 finished. virtual bool IsShutdown1Finished() const { return false; } diff --git a/src/leveldb/include/leveldb/dfs.h b/src/leveldb/include/leveldb/dfs.h index b5df4b0b2..a1284bd25 100644 --- a/src/leveldb/include/leveldb/dfs.h +++ b/src/leveldb/include/leveldb/dfs.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_LEVELDB_DFS_H_ -#define TERA_LEVELDB_DFS_H_ +#ifndef TERA_LEVELDB_DFS_H_ +#define TERA_LEVELDB_DFS_H_ #include #include @@ -13,73 +13,71 @@ namespace leveldb { class DfsFile { -public: - DfsFile() {} - virtual ~DfsFile() {} - /// Returns the number of bytes written, -1 on error. - virtual int32_t Write(const char* buf, int32_t len) = 0; - /// Returns 0 on success. - virtual int32_t Flush() = 0; - /// Returns 0 on success. - virtual int32_t Sync() = 0; - /// Returns the number of bytes actually read, possibly less - /// than than length;-1 on error. - virtual int32_t Read(char* buf, int32_t len) = 0; - /// Returns the number of bytes actually read, possibly less than - /// than length;-1 on error. - virtual int32_t Pread(int64_t offset, char* buf, int32_t len) = 0; - /// Return Current offset. - virtual int64_t Tell() = 0; - /// Returns 0 on success. - virtual int32_t Seek(int64_t offset) = 0; - /// Returns 0 on success. - virtual int32_t CloseFile() = 0; -private: - DfsFile(const DfsFile&); - void operator=(const DfsFile&); -}; + public: + DfsFile() {} + virtual ~DfsFile() {} + /// Returns the number of bytes written, -1 on error. + virtual int32_t Write(const char* buf, int32_t len) = 0; + /// Returns 0 on success. + virtual int32_t Flush() = 0; + /// Returns 0 on success. + virtual int32_t Sync() = 0; + /// Returns the number of bytes actually read, possibly less + /// than than length;-1 on error. + virtual int32_t Read(char* buf, int32_t len) = 0; + /// Returns the number of bytes actually read, possibly less than + /// than length;-1 on error. + virtual int32_t Pread(int64_t offset, char* buf, int32_t len) = 0; + /// Return Current offset. + virtual int64_t Tell() = 0; + /// Returns 0 on success. + virtual int32_t Seek(int64_t offset) = 0; + /// Returns 0 on success. + virtual int32_t CloseFile() = 0; -enum open_type { - RDONLY = 1, - WRONLY = 2 + private: + DfsFile(const DfsFile&); + void operator=(const DfsFile&); }; +enum open_type { RDONLY = 1, WRONLY = 2 }; + class Dfs { -public: - Dfs() {} - virtual ~Dfs() {} - /// Returns 0 on success. - virtual int32_t CreateDirectory(const std::string& path) = 0; - /// Returns 0 on success. - virtual int32_t DeleteDirectory(const std::string& path) = 0; - /// Returns 0 on success. - virtual int32_t Exists(const std::string& filename) = 0; - /// Returns 0 on success. - virtual int32_t Delete(const std::string& filename) = 0; - /// Returns 0 on success. - virtual int32_t GetFileSize(const std::string& filename, uint64_t* size) = 0; - /// Returns 0 on success. - virtual int32_t Rename(const std::string& from, const std::string& to) = 0; - /// Returns 0 on success. - virtual int32_t Copy(const std::string& from, const std::string& to) = 0; - /// Returns 0 on success. - virtual int32_t ListDirectory(const std::string& path, - std::vector* result) = 0; - /// Returns 0 on success. - virtual int32_t LockDirectory(const std::string& path) = 0; - /// Returns Dfs handler on success, NULL on error. - static Dfs* NewDfs(const std::string& so_path, const std::string& conf); - /// Returns 0 on success. - virtual int32_t UnlockDirectory(const std::string& path) = 0; + public: + Dfs() {} + virtual ~Dfs() {} + /// Returns 0 on success. + virtual int32_t CreateDirectory(const std::string& path) = 0; + /// Returns 0 on success. + virtual int32_t DeleteDirectory(const std::string& path) = 0; + /// Returns 0 on success. + virtual int32_t Exists(const std::string& filename) = 0; + /// Returns 0 on success. + virtual int32_t Delete(const std::string& filename) = 0; + /// Returns 0 on success. + virtual int32_t GetFileSize(const std::string& filename, uint64_t* size) = 0; + /// Returns 0 on success. + virtual int32_t Rename(const std::string& from, const std::string& to) = 0; + /// Returns 0 on success. + virtual int32_t Copy(const std::string& from, const std::string& to) = 0; + /// Returns 0 on success. + virtual int32_t ListDirectory(const std::string& path, std::vector* result) = 0; + /// Returns 0 on success. + virtual int32_t LockDirectory(const std::string& path) = 0; + /// Returns Dfs handler on success, NULL on error. + static Dfs* NewDfs(const std::string& so_path, const std::string& conf); + /// Returns 0 on success. + virtual int32_t UnlockDirectory(const std::string& path) = 0; + + virtual int32_t ClearDirOwner(const std::string& path) = 0; + /// Returns DfsFile handler on success, NULL on error.WithTime + virtual DfsFile* OpenFile(const std::string& filename, int32_t flags) = 0; - virtual int32_t ClearDirOwner(const std::string& path) = 0; - /// Returns DfsFile handler on success, NULL on error.WithTime - virtual DfsFile* OpenFile(const std::string& filename, int32_t flags) = 0; + virtual int32_t Stat(const std::string& filename, struct stat* fstat) = 0; - virtual int32_t Stat(const std::string& filename, struct stat* fstat) = 0; -private: - Dfs(const Dfs&); - void operator=(const Dfs&); + private: + Dfs(const Dfs&); + void operator=(const Dfs&); }; /// Dfs creator type. @@ -101,7 +99,7 @@ typedef Dfs* (*DfsCreator)(const char*); /// } /// } -} // namespace leveldb -#endif //TERA_LEVELDB_DFS_H_ +} // namespace leveldb +#endif // TERA_LEVELDB_DFS_H_ /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/leveldb/include/leveldb/env.h b/src/leveldb/include/leveldb/env.h index 2e6bc8dfb..e9345d3bd 100644 --- a/src/leveldb/include/leveldb/env.h +++ b/src/leveldb/include/leveldb/env.h @@ -17,16 +17,18 @@ #ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_ #define STORAGE_LEVELDB_INCLUDE_ENV_H_ +#include #include +#include #include #include -#include -#include "leveldb/status.h" #include "leveldb/options.h" +#include "leveldb/status.h" namespace leveldb { class FileLock; +class LogOption; class Logger; class RandomAccessFile; class SequentialFile; @@ -34,13 +36,15 @@ class Slice; class WritableFile; enum ThreadPoolScore { - kDeleteLogUrgentScore = 100, - kDumpMemTableUrgentScore = 90, - kDumpMemTableScore = 60, - kDeleteLogScore = 50, - kManualCompactScore = 10, + kDeleteLogUrgentScore = 100, + kDumpMemTableUrgentScore = 90, + kDumpMemTableScore = 60, + kDeleteLogScore = 50, + kManualCompactScore = 10, }; +enum class SystemFileType { kRegularFile, kDir, kOthers }; + const size_t kDefaultPageSize = 4 * 1024; struct EnvOptions { @@ -55,7 +59,7 @@ struct EnvOptions { class Env { public: - Env() { } + Env() {} virtual ~Env(); // Return a default environment suitable for the current operating @@ -71,8 +75,7 @@ class Env { // not exist, returns a non-OK status. // // The returned file will only be accessed by one thread at a time. - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) = 0; + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result) = 0; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in @@ -82,14 +85,11 @@ class Env { // // The returned file may be concurrently accessed by multiple threads. // Implement may check file size against the given fsize. - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, const EnvOptions& options) = 0; - virtual Status NewRandomAccessFile(const std::string& fname, - uint64_t fsize, - RandomAccessFile** result, - const EnvOptions& options) = 0; + virtual Status NewRandomAccessFile(const std::string& fname, uint64_t fsize, + RandomAccessFile** result, const EnvOptions& options) = 0; // Create an object that writes to a new file with the specified // name. Deletes any existing file with the same name and creates a @@ -98,8 +98,7 @@ class Env { // returns non-OK. // // The returned file will only be accessed by one thread at a time. - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, const EnvOptions& options) = 0; // Returns: @@ -112,8 +111,7 @@ class Env { // Store in *result the names of the children of the specified directory. // The names are relative to "dir". // Original contents of *results are dropped. - virtual Status GetChildren(const std::string& dir, - std::vector* result) = 0; + virtual Status GetChildren(const std::string& dir, std::vector* result) = 0; // Delete the named file. virtual Status DeleteFile(const std::string& fname) = 0; @@ -126,19 +124,21 @@ class Env { // Deprecated, use GetChildren - virtual Status CopyFile(const std::string& from, - const std::string& to) = 0; + virtual Status CopyFile(const std::string& from, const std::string& to) = 0; // Delete the specified directory recursive. virtual Status DeleteDirRecursive(const std::string& name) = 0; + + // Detect file type + virtual Status GetFileType(const std::string& path, SystemFileType* type) = 0; + virtual Status IsSamePath(const std::string& path1, const std::string& path2, bool* same) = 0; // end of tera_specific // Store the size of fname in *file_size. virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0; // Rename file src to target. - virtual Status RenameFile(const std::string& src, - const std::string& target) = 0; + virtual Status RenameFile(const std::string& src, const std::string& target) = 0; // Lock the specified file. Used to prevent concurrent access to // the same db by multiple processes. On failure, stores NULL in @@ -170,11 +170,8 @@ class Env { // added to the same Env may run concurrently in different threads. // I.e., the caller may not assume that background work items are // serialized. - virtual int64_t Schedule( - void (*function)(void* arg), - void* arg, - double prio = 0.0, - int64_t wait_time_millisec = 0) = 0; + virtual int64_t Schedule(void (*function)(void* arg), void* arg, double prio = 0.0, + int64_t wait_time_millisec = 0) = 0; // Update background task priority with the schedule id return by Schedule. // If wait_time_millisec < 0, the exec time will not be updated. @@ -191,7 +188,7 @@ class Env { virtual Status GetTestDirectory(std::string* path) = 0; // Create and return a log file for storing informational messages. - virtual Status NewLogger(const std::string& fname, Logger** result) = 0; + virtual Status NewLogger(const std::string& fname, const LogOption& opt, Logger** result) = 0; virtual void SetLogger(Logger* logger) = 0; // Returns the number of micro-seconds since some fixed point in time. Only @@ -213,7 +210,7 @@ class Env { // A file abstraction for reading sequentially through a file class SequentialFile { public: - SequentialFile() { } + SequentialFile() {} virtual ~SequentialFile(); // Read up to "n" bytes from the file. "scratch[0..n-1]" may be @@ -244,7 +241,7 @@ class SequentialFile { // A file abstraction for randomly reading the contents of a file. class RandomAccessFile { public: - RandomAccessFile() { } + RandomAccessFile() {} virtual ~RandomAccessFile(); // Read up to "n" bytes from the file starting at "offset". @@ -256,12 +253,15 @@ class RandomAccessFile { // status. // // Safe for concurrent use by multiple threads. - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const = 0; - + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; + // Use the returned alignment value to allocate // aligned buffer for Direct I/O virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize; } + virtual std::string GetFileName() const { + assert(!"Not Implement"); + return ""; + }; private: // No copying allowed @@ -274,13 +274,17 @@ class RandomAccessFile { // at a time to the file. class WritableFile { public: - WritableFile() { } + WritableFile() {} virtual ~WritableFile(); virtual Status Append(const Slice& data) = 0; virtual Status Close() = 0; virtual Status Flush() = 0; virtual Status Sync() = 0; + virtual std::string GetFileName() const { + assert(!"Not Implement"); + return ""; + }; private: // No copying allowed @@ -288,14 +292,58 @@ class WritableFile { void operator=(const WritableFile&); }; +struct LogOption { + public: + class LogOptionBuilder; + + std::string ToString() const { + std::ostringstream ss; + ss << "max_log_size:" << max_log_size << ", flush_trigger_size:" << flush_trigger_size + << ", flush_trigger_interval_ms:" << flush_trigger_interval_ms; + return ss.str(); + } + + uint64_t max_log_size; + uint32_t flush_trigger_size; + uint32_t flush_trigger_interval_ms; + + private: + LogOption() + : max_log_size(1 << 30), flush_trigger_size(1 << 20), flush_trigger_interval_ms(1000) {} +}; + +class LogOption::LogOptionBuilder { + public: + LogOptionBuilder() {} + LogOptionBuilder& SetMaxLogSize(uint64_t size) { + opt_.max_log_size = size; + return *this; + } + LogOptionBuilder& SetFlushTriggerSize(uint32_t size) { + opt_.flush_trigger_size = size; + return *this; + } + LogOptionBuilder& SetFlushTriggerIntervalMs(uint32_t interval) { + opt_.flush_trigger_interval_ms = interval; + return *this; + } + LogOption Build() { return opt_; } + + private: + LogOption opt_; +}; + // An interface for writing log messages. class Logger { public: - Logger() { } + Logger() {} virtual ~Logger(); // Write an entry to the log file with the specified format. - virtual void Logv(const char* format, va_list ap) = 0; + virtual void Logv(const char* file, int64_t line, const char* format, va_list ap) = 0; + + // Logger should ensure all log messages have been saved after Exit called. + virtual void Exit() {} // Default Logger can be used anywhere static void SetDefaultLogger(Logger* logger); @@ -311,33 +359,34 @@ class Logger { // Identifies a locked file. class FileLock { public: - FileLock() { } + FileLock() {} virtual ~FileLock(); + private: // No copying allowed FileLock(const FileLock&); void operator=(const FileLock&); }; +#define LEVELDB_LOG(args...) leveldb::LogImpl(__FILE__, __LINE__, args) + // Log the specified data to *info_log if info_log is non-NULL. -extern void Log(Logger* info_log, const char* format, ...) -# if defined(__GNUC__) || defined(__clang__) - __attribute__((__format__ (__printf__, 2, 3))) -# endif +extern void LogImpl(const char* file, int64_t line, Logger* info_log, const char* format, ...) +#if defined(__GNUC__) || defined(__clang__) + __attribute__((__format__(__printf__, 4, 5))) +#endif ; -extern void Log(const char* format, ...) -# if defined(__GNUC__) || defined(__clang__) - __attribute__((__format__ (__printf__, 1, 2))) -# endif +extern void LogImpl(const char* file, int64_t line, const char* format, ...) +#if defined(__GNUC__) || defined(__clang__) + __attribute__((__format__(__printf__, 3, 4))) +#endif ; // A utility routine: write "data" to the named file. -extern Status WriteStringToFile(Env* env, const Slice& data, - const std::string& fname); +extern Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname); // A utility routine: read contents of named file into *data -extern Status ReadFileToString(Env* env, const std::string& fname, - std::string* data); +extern Status ReadFileToString(Env* env, const std::string& fname, std::string* data); // An implementation of Env that forwards all calls to another Env. // May be useful to clients who wish to override just part of the @@ -345,7 +394,7 @@ extern Status ReadFileToString(Env* env, const std::string& fname, class EnvWrapper : public Env { public: // Initialize an EnvWrapper that delegates all calls to *t - explicit EnvWrapper(Env* t) : target_(t) { } + explicit EnvWrapper(Env* t) : target_(t) {} virtual ~EnvWrapper(); // Return the target to which this Env forwards all calls @@ -373,21 +422,13 @@ class EnvWrapper : public Env { Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); } Status CreateDir(const std::string& d) { return target_->CreateDir(d); } Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); } - Status CopyFile(const std::string& f, const std::string& t) { - return target_->CopyFile(f, t); - } - Status DeleteDirRecursive(const std::string& name) { - return target_->DeleteDirRecursive(name); - } - Status GetFileSize(const std::string& f, uint64_t* s) { - return target_->GetFileSize(f, s); - } + Status CopyFile(const std::string& f, const std::string& t) { return target_->CopyFile(f, t); } + Status DeleteDirRecursive(const std::string& name) { return target_->DeleteDirRecursive(name); } + Status GetFileSize(const std::string& f, uint64_t* s) { return target_->GetFileSize(f, s); } Status RenameFile(const std::string& s, const std::string& t) { return target_->RenameFile(s, t); } - Status LockFile(const std::string& f, FileLock** l) { - return target_->LockFile(f, l); - } + Status LockFile(const std::string& f, FileLock** l) { return target_->LockFile(f, l); } Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); } int64_t Schedule(void (*f)(void*), void* a, double prio, int64_t wait_time_millisec = 0) { return target_->Schedule(f, a, prio, wait_time_millisec); @@ -396,27 +437,22 @@ class EnvWrapper : public Env { void ReSchedule(int64_t id, double prio, int64_t millisec = -1) { return target_->ReSchedule(id, prio, millisec); } - void StartThread(void (*f)(void*), void* a) { - return target_->StartThread(f, a); - } - virtual Status GetTestDirectory(std::string* path) { - return target_->GetTestDirectory(path); - } - virtual Status NewLogger(const std::string& fname, Logger** result) { - return target_->NewLogger(fname, result); - } - virtual void SetLogger(Logger* logger) { - return target_->SetLogger(logger); - } - uint64_t NowMicros() { - return target_->NowMicros(); - } - void SleepForMicroseconds(int micros) { - target_->SleepForMicroseconds(micros); - } - int SetBackgroundThreads(int number) { - return target_->SetBackgroundThreads(number); + void StartThread(void (*f)(void*), void* a) { return target_->StartThread(f, a); } + virtual Status GetTestDirectory(std::string* path) { return target_->GetTestDirectory(path); } + virtual Status NewLogger(const std::string& fname, const LogOption& opt, Logger** result) { + return target_->NewLogger(fname, opt, result); } + virtual void SetLogger(Logger* logger) { return target_->SetLogger(logger); } + uint64_t NowMicros() { return target_->NowMicros(); } + void SleepForMicroseconds(int micros) { target_->SleepForMicroseconds(micros); } + int SetBackgroundThreads(int number) { return target_->SetBackgroundThreads(number); } + Status GetFileType(const std::string& path, SystemFileType* type) { + return target_->GetFileType(path, type); + }; + Status IsSamePath(const std::string& path1, const std::string& path2, bool* same) { + return target_->IsSamePath(path1, path2, same); + }; + private: Env* target_; }; diff --git a/src/leveldb/include/leveldb/env_dfs.h b/src/leveldb/include/leveldb/env_dfs.h index 3dd97b157..5b2de12c1 100644 --- a/src/leveldb/include/leveldb/env_dfs.h +++ b/src/leveldb/include/leveldb/env_dfs.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_LEVELDB_ENV_DFS_H_ -#define TERA_LEVELDB_ENV_DFS_H_ +#ifndef TERA_LEVELDB_ENV_DFS_H_ +#define TERA_LEVELDB_ENV_DFS_H_ #include #include @@ -17,68 +17,69 @@ #include "leveldb/dfs.h" #include "leveldb/env.h" #include "leveldb/status.h" -#include "../../../common/counter.h" +#include "common/counter.h" namespace leveldb { class DfsEnv : public EnvWrapper { -public: - DfsEnv(Dfs* dfs); + public: + DfsEnv(Dfs* dfs); - virtual ~DfsEnv(); + virtual ~DfsEnv(); - virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); - virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, - const EnvOptions& options); + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions& options); - virtual Status NewWritableFile(const std::string& fname, WritableFile** result, const EnvOptions& options); + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options); - // Returns: - // OK: exists - // NotFound: not found - // TimeOut: timeout - // IOError: other errors - virtual Status FileExists(const std::string& fname); + // Returns: + // OK: exists + // NotFound: not found + // TimeOut: timeout + // IOError: other errors + virtual Status FileExists(const std::string& fname); - bool CheckDelete(const std::string& fname, std::vector* flags); + bool CheckDelete(const std::string& fname, std::vector* flags); - virtual Status GetChildren(const std::string& path, std::vector* result); + virtual Status GetChildren(const std::string& path, std::vector* result); - virtual Status DeleteFile(const std::string& fname); + virtual Status DeleteFile(const std::string& fname); - virtual Status CreateDir(const std::string& name); + virtual Status CreateDir(const std::string& name); - virtual Status DeleteDir(const std::string& name); + virtual Status DeleteDir(const std::string& name); - virtual Status CopyFile(const std::string& from, const std::string& to); + virtual Status CopyFile(const std::string& from, const std::string& to); - virtual Status GetFileSize(const std::string& fname, uint64_t* size); + virtual Status GetFileSize(const std::string& fname, uint64_t* size); - virtual Status RenameFile(const std::string& src, const std::string& target); + virtual Status RenameFile(const std::string& src, const std::string& target); - virtual Status LockFile(const std::string& fname, FileLock** lock); + virtual Status LockFile(const std::string& fname, FileLock** lock); - virtual Status UnlockFile(FileLock* lock); + virtual Status UnlockFile(FileLock* lock); - int32_t ClearDirOwner(const std::string& dir) {return dfs_->ClearDirOwner(dir);} + int32_t ClearDirOwner(const std::string& dir) { return dfs_->ClearDirOwner(dir); } - virtual Env* CacheEnv() { return this; } + virtual Env* CacheEnv() { return this; } - static uint64_t gettid() { - pid_t tid = syscall(SYS_gettid); - return tid; - } -private: - Dfs* dfs_; + static uint64_t gettid() { + pid_t tid = syscall(SYS_gettid); + return tid; + } + + private: + Dfs* dfs_; }; /// Init dfs env void InitDfsEnv(const std::string& so_path, const std::string& conf); void InitHdfsEnv(); void InitHdfs2Env(const std::string& namenode_list); -void InitNfsEnv(const std::string& mountpoint, - const std::string& conf_path); +void InitNfsEnv(const std::string& mountpoint, const std::string& conf_path); /// default dfs env Env* EnvDfs(); /// new dfs env diff --git a/src/leveldb/include/leveldb/env_flash.h b/src/leveldb/include/leveldb/env_flash.h index 20e18f77b..62cd4c9ba 100644 --- a/src/leveldb/include/leveldb/env_flash.h +++ b/src/leveldb/include/leveldb/env_flash.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef __LEVELDB_ENV_FLASH_H_ -#define __LEVELDB_ENV_FLASH_H_ +#ifndef __LEVELDB_ENV_FLASH_H_ +#define __LEVELDB_ENV_FLASH_H_ #include #include @@ -14,93 +14,82 @@ #include "status.h" #include "util/thread_pool.h" - namespace leveldb { -class FlashEnv : public EnvWrapper{ -public: - FlashEnv(Env* base_env); +class FlashEnv : public EnvWrapper { + public: + FlashEnv(Env* base_env); - ~FlashEnv(); + ~FlashEnv(); - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result); + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions& options); + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions& options); - virtual Status NewRandomAccessFile(const std::string& fname, - uint64_t fsize, - RandomAccessFile** result, - const EnvOptions& options); + virtual Status NewRandomAccessFile(const std::string& fname, uint64_t fsize, + RandomAccessFile** result, const EnvOptions& options); - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options); + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options); - virtual Status FileExists(const std::string& fname); + virtual Status FileExists(const std::string& fname); - virtual Status GetChildren(const std::string& path, std::vector* result); + virtual Status GetChildren(const std::string& path, std::vector* result); - virtual Status DeleteFile(const std::string& fname); + virtual Status DeleteFile(const std::string& fname); - virtual Status CreateDir(const std::string& name); + virtual Status CreateDir(const std::string& name); - virtual Status DeleteDir(const std::string& name); + virtual Status DeleteDir(const std::string& name); - virtual Status CopyFile(const std::string& from, - const std::string& to) { - return Status::OK(); - } + virtual Status CopyFile(const std::string& from, const std::string& to) { return Status::OK(); } - virtual Status GetFileSize(const std::string& fname, uint64_t* size); + virtual Status GetFileSize(const std::string& fname, uint64_t* size); - virtual Status RenameFile(const std::string& src, const std::string& target); + virtual Status RenameFile(const std::string& src, const std::string& target); - virtual Status LockFile(const std::string& fname, FileLock** lock); + virtual Status LockFile(const std::string& fname, FileLock** lock); - virtual Status UnlockFile(FileLock* lock); + virtual Status UnlockFile(FileLock* lock); - virtual Env* CacheEnv() { return posix_env_; } + virtual Env* CacheEnv() { return posix_env_; } - Env* BaseEnv() { return dfs_env_; } + Env* BaseEnv() { return dfs_env_; } - /// flash path for local flash cache - void SetFlashPath(const std::string& path, bool vanish_allowed); - const std::string& FlashPath(const std::string& fname); - const bool VanishAllowed() { - return vanish_allowed_; - } - const std::vector& GetFlashPaths() { - return flash_paths_; - } + /// flash path for local flash cache + void SetFlashPath(const std::string& path, bool vanish_allowed); + const std::string& FlashPath(const std::string& fname); + const bool VanishAllowed() { return vanish_allowed_; } + const std::vector& GetFlashPaths() { return flash_paths_; } - /// copy to local - void SetIfForceReadFromCache(bool force); - bool ForceReadFromCache(); - void SetUpdateFlashThreadNumber(int thread_num); + /// copy to local + void SetIfForceReadFromCache(bool force); + bool ForceReadFromCache(); + void SetUpdateFlashThreadNumber(int thread_num); + void TryRollbackPersistentCacheFiles(); + void DoRollbackPersistentCacheFiles(const std::string& path); - bool FlashFileIdentical(const std::string& fname, uint64_t fsize); - void ScheduleUpdateFlash(const std::string& fname, uint64_t fsize, int64_t priority); - void UpdateFlashFile(const std::string& fname, uint64_t fsize); + bool FlashFileIdentical(const std::string& fname, uint64_t fsize); + void ScheduleUpdateFlash(const std::string& fname, uint64_t fsize, int64_t priority); + void UpdateFlashFile(const std::string& fname, uint64_t fsize); -private: - Env* dfs_env_; - Env* posix_env_; + private: + Env* dfs_env_; + Env* posix_env_; - std::vector flash_paths_; - bool vanish_allowed_; + std::vector flash_paths_; + bool vanish_allowed_; - bool force_read_from_cache_; - ThreadPool update_flash_threads_; - port::Mutex update_flash_mutex_; - struct UpdateFlashTask { - int64_t id; - int64_t priority; - }; - std::map update_flash_waiting_files_; - int64_t update_flash_retry_interval_millis_; + bool force_read_from_cache_; + ThreadPool update_flash_threads_; + port::Mutex update_flash_mutex_; + struct UpdateFlashTask { + int64_t id; + int64_t priority; + }; + std::map update_flash_waiting_files_; + int64_t update_flash_retry_interval_millis_; }; /// new flash env diff --git a/src/leveldb/include/leveldb/env_flash_block_cache.h b/src/leveldb/include/leveldb/env_flash_block_cache.h deleted file mode 100644 index e216bd766..000000000 --- a/src/leveldb/include/leveldb/env_flash_block_cache.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -#include "leveldb/env.h" -#include "leveldb/options.h" -#include "leveldb/status.h" - -// block-based cache env -namespace leveldb { - -class FlashBlockCacheImpl; - -class FlashBlockCacheEnv : public EnvWrapper { -public: - explicit FlashBlockCacheEnv(Env* base); - - virtual ~FlashBlockCacheEnv(); - - virtual Status FileExists(const std::string& fname); - - virtual Status GetChildren(const std::string& path, - std::vector* result); - - virtual Status DeleteFile(const std::string& fname); - - virtual Status CreateDir(const std::string& name); - - virtual Status DeleteDir(const std::string& name); - - virtual Status CopyFile(const std::string& from, - const std::string& to); - - virtual Status GetFileSize(const std::string& fname, uint64_t* size); - - virtual Status RenameFile(const std::string& src, const std::string& target); - - virtual Status LockFile(const std::string& fname, FileLock** lock); - - virtual Status UnlockFile(FileLock* lock); - - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result); // never cache log - - // cache relatively - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions& options); // cache Pread - virtual Status NewRandomAccessFile(const std::string& fname, - uint64_t fsize, - RandomAccessFile** result, - const EnvOptions& options); // cache Pread - - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options); // cache Append - virtual Status LoadCache(const FlashBlockCacheOptions& opts, const std::string& cache_dir); - -private: - std::vector caches_; - Env* dfs_env_; -}; - -Env* NewFlashBlockCacheEnv(Env* base); - -} // leveldb - diff --git a/src/leveldb/include/leveldb/env_inmem.h b/src/leveldb/include/leveldb/env_inmem.h index d5712ade3..273254f31 100644 --- a/src/leveldb/include/leveldb/env_inmem.h +++ b/src/leveldb/include/leveldb/env_inmem.h @@ -2,9 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef __LEVELDB_ENV_INMEM_H_ -#define __LEVELDB_ENV_INMEM_H_ - +#ifndef __LEVELDB_ENV_INMEM_H_ +#define __LEVELDB_ENV_INMEM_H_ #include #include @@ -14,56 +13,49 @@ #include "env.h" #include "status.h" - namespace leveldb { -class InMemoryEnv : public EnvWrapper{ -public: - InMemoryEnv(Env* base_env); +class InMemoryEnv : public EnvWrapper { + public: + InMemoryEnv(Env* base_env); - ~InMemoryEnv(); + ~InMemoryEnv(); - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result); + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions& options); + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions& options); - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options); + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options); - virtual Status FileExists(const std::string& fname); + virtual Status FileExists(const std::string& fname); - bool CheckDelete(const std::string& fname, std::vector* flags); + bool CheckDelete(const std::string& fname, std::vector* flags); - virtual Status GetChildren(const std::string& path, std::vector* result); + virtual Status GetChildren(const std::string& path, std::vector* result); - virtual Status DeleteFile(const std::string& fname); + virtual Status DeleteFile(const std::string& fname); - virtual Status CreateDir(const std::string& name); + virtual Status CreateDir(const std::string& name); - virtual Status DeleteDir(const std::string& name); + virtual Status DeleteDir(const std::string& name); - virtual Status CopyFile(const std::string& from, - const std::string& to) { - return Status::OK(); - } + virtual Status CopyFile(const std::string& from, const std::string& to) { return Status::OK(); } - virtual Status GetFileSize(const std::string& fname, uint64_t* size); + virtual Status GetFileSize(const std::string& fname, uint64_t* size); - virtual Status RenameFile(const std::string& src, const std::string& target); + virtual Status RenameFile(const std::string& src, const std::string& target); - virtual Status LockFile(const std::string& fname, FileLock** lock); + virtual Status LockFile(const std::string& fname, FileLock** lock); - virtual Status UnlockFile(FileLock* lock); + virtual Status UnlockFile(FileLock* lock); - virtual Env* CacheEnv() { return mem_env_; } + virtual Env* CacheEnv() { return mem_env_; } -private: - Env* dfs_env_; - Env* mem_env_; + private: + Env* dfs_env_; + Env* mem_env_; }; /// new mem env diff --git a/src/leveldb/include/leveldb/env_mock.h b/src/leveldb/include/leveldb/env_mock.h index b2a1c414d..6915f1a1b 100644 --- a/src/leveldb/include/leveldb/env_mock.h +++ b/src/leveldb/include/leveldb/env_mock.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_LEVELDB_ENV_TEST_H_ -#define TERA_LEVELDB_ENV_TEST_H_ +#ifndef TERA_LEVELDB_ENV_TEST_H_ +#define TERA_LEVELDB_ENV_TEST_H_ #include #include @@ -21,57 +21,56 @@ namespace leveldb { class MockEnv : public EnvWrapper { -public: - MockEnv(); + public: + MockEnv(); - virtual ~MockEnv(); - void SetPrefix(const std::string& p); - void ResetMock(); + virtual ~MockEnv(); + void SetPrefix(const std::string& p); + void ResetMock(); - void SetNewSequentialFileFailedCallback(bool (*p)(int32_t i, const std::string& fname)); - void SetSequentialFileReadCallback(bool (*p)(int32_t i, char* scratch, size_t* mock_size)); - virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); + void SetNewSequentialFileFailedCallback(bool (*p)(int32_t i, const std::string& fname)); + void SetSequentialFileReadCallback(bool (*p)(int32_t i, char* scratch, size_t* mock_size)); + virtual Status NewSequentialFile(const std::string& fname, SequentialFile** result); - virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, - const EnvOptions& options); + virtual Status NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions& options); - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options); + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options); - virtual Status FileExists(const std::string& fname); + virtual Status FileExists(const std::string& fname); - bool CheckDelete(const std::string& fname, std::vector* flags); + bool CheckDelete(const std::string& fname, std::vector* flags); - void SetGetChildrenCallback(bool (*p)(int32_t i, const std::string& fname)); - virtual Status GetChildren(const std::string& path, std::vector* result); + void SetGetChildrenCallback(bool (*p)(int32_t i, const std::string& fname)); + virtual Status GetChildren(const std::string& path, std::vector* result); - virtual Status DeleteFile(const std::string& fname); + virtual Status DeleteFile(const std::string& fname); - virtual Status CreateDir(const std::string& name); + virtual Status CreateDir(const std::string& name); - virtual Status DeleteDir(const std::string& name); + virtual Status DeleteDir(const std::string& name); - virtual Status CopyFile(const std::string& from, const std::string& to); + virtual Status CopyFile(const std::string& from, const std::string& to); - virtual Status GetFileSize(const std::string& fname, uint64_t* size); + virtual Status GetFileSize(const std::string& fname, uint64_t* size); - virtual Status RenameFile(const std::string& src, const std::string& target); + virtual Status RenameFile(const std::string& src, const std::string& target); - virtual Status LockFile(const std::string& fname, FileLock** lock); + virtual Status LockFile(const std::string& fname, FileLock** lock); - virtual Status UnlockFile(FileLock* lock); + virtual Status UnlockFile(FileLock* lock); - virtual Env* CacheEnv() { return this; } + virtual Env* CacheEnv() { return this; } - static uint64_t gettid() { - pid_t tid = syscall(SYS_gettid); - return tid; - } + static uint64_t gettid() { + pid_t tid = syscall(SYS_gettid); + return tid; + } }; Env* NewMockEnv(); -} // namespace leveldb +} // namespace leveldb #endif // TERA_LEVELDB_ENV_DFS_H_ diff --git a/src/leveldb/include/leveldb/filter_policy.h b/src/leveldb/include/leveldb/filter_policy.h index 4ba4706f8..641eb409f 100644 --- a/src/leveldb/include/leveldb/filter_policy.h +++ b/src/leveldb/include/leveldb/filter_policy.h @@ -44,8 +44,7 @@ class FilterPolicy { // // Warning: do not change the initial contents of *dst. Instead, // append the newly constructed filter to *dst. - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) - const = 0; + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const = 0; // "filter" contains the data appended by a preceding call to // CreateFilter() on this class. This method must return true if @@ -75,7 +74,6 @@ extern const FilterPolicy* NewTTLKvBloomFilterPolicy(int bits_per_key); // for bigtable mode extern const FilterPolicy* NewRowKeyBloomFilterPolicy(int bits_per_key, const RawKeyOperator* raw_key_operator); - } #endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ diff --git a/src/leveldb/include/leveldb/lg_coding.h b/src/leveldb/include/leveldb/lg_coding.h index 461edc2a8..130d90f3f 100644 --- a/src/leveldb/include/leveldb/lg_coding.h +++ b/src/leveldb/include/leveldb/lg_coding.h @@ -11,10 +11,10 @@ namespace leveldb { -extern void PutFixed32LGId(std::string *dst, uint32_t lg_id); +extern void PutFixed32LGId(std::string* dst, uint32_t lg_id); extern bool GetFixed32LGId(Slice* input, uint32_t* lg_id); -} // namespace leveldb +} // namespace leveldb -#endif // STORAGE_LEVELDB_UTIL_LG_CODING_H_ +#endif // STORAGE_LEVELDB_UTIL_LG_CODING_H_ diff --git a/src/leveldb/include/leveldb/options.h b/src/leveldb/include/leveldb/options.h index 79c8f0135..aafbb012d 100644 --- a/src/leveldb/include/leveldb/options.h +++ b/src/leveldb/include/leveldb/options.h @@ -15,8 +15,11 @@ #include #include #include +#include #include +#include "common/semaphore.h" + namespace leveldb { // We leave eight bits empty at the bottom so a type and sequence# @@ -24,7 +27,7 @@ namespace leveldb { static const uint64_t kMaxSequenceNumber = ((0x1ull << 56) - 1); static const size_t kDefaultBlockSize = 4096; -static const size_t kDefaultSstSize = 8 * 1024 * 1024; // 8 MB +static const size_t kDefaultSstSize = 8 * 1024 * 1024; // 8 MB class Cache; class TableCache; class CompactStrategyFactory; @@ -32,6 +35,7 @@ class Comparator; class Env; class FilterPolicy; class Logger; +class PersistentCache; // DB contents are stored in a set of blocks, each of which holds a // sequence of key,value pairs. Each block may be compressed before @@ -40,10 +44,10 @@ class Logger; enum CompressionType { // NOTE: do not change the values of existing entries, as these are // part of the persistent format on disk. - kNoCompression = 0x0, + kNoCompression = 0x0, kSnappyCompression = 0x1, - kBmzCompression = 0x2, - kLZ4Compression = 0x3 + kBmzCompression = 0x2, + kLZ4Compression = 0x3 }; enum RawKeyFormat { @@ -85,6 +89,8 @@ struct LG_info { uint64_t posix_write_buffer_size; bool table_builder_batch_write; uint64_t table_builder_batch_size; + int32_t memtable_shard_num; + std::shared_ptr persistent_cache; // Other LG properties // ... @@ -102,9 +108,10 @@ struct LG_info { seek_latency(0), use_direct_io_read(false), use_direct_io_write(false), - posix_write_buffer_size(512<<10), + posix_write_buffer_size(512 << 10), table_builder_batch_write(false), - table_builder_batch_size(0) {} + table_builder_batch_size(0), + memtable_shard_num(0) {} }; // Options to control the behavior of a database (passed to DB::Open) @@ -269,11 +276,15 @@ struct Options { // default: 3600(seconds) uint64_t manifest_switch_interval; + // Create new manifest to avoid dfs failure + // default: 2(MB) + uint64_t manifest_switch_size; + // tera raw key encoding format RawKeyFormat raw_key_format; // seek latency(in ns) - int seek_latency; + int64_t seek_latency; // dump imm & mem table at db close, and unlink all log file bool dump_mem_on_shutdown; @@ -347,6 +358,10 @@ struct Options { bool table_builder_batch_write; uint64_t table_builder_batch_size; + int32_t memtable_shard_num; + std::shared_ptr persistent_cache; + // used for removing leveldb path's prefix when access persistent cache. + std::string dfs_storage_path_prefix; // Create an Options object with default values for all fields. Options(); @@ -364,6 +379,11 @@ struct ReadOptions { // Default: true bool fill_cache; + // Should the file this iteration read be cached in persistent cache? + // It's invalid when persistent cache is disabled. + // Default: true + bool fill_persistent_cache; + // If "snapshot" is non-NULL, read as of the supplied snapshot // (which must belong to the DB that is being read and which must // not have been released). If "snapshot" is NULL, use an impliicit @@ -392,19 +412,23 @@ struct ReadOptions { // size to prefetch, default:1MB uint64_t prefetch_scan_size; + // If limit the max thread for reading from dfs, which will reserve some empty thread for + // reading from ssd. + // Default: false + bool enable_dfs_read_thread_limiter; + ReadOptions(const Options* db_option) : verify_checksums(false), fill_cache(true), + fill_persistent_cache(true), snapshot(kMaxSequenceNumber), target_lgs(NULL), read_single_row(false), db_opt(db_option), prefetch_scan(false), - prefetch_scan_size(1 << 20) { - } - ReadOptions() { - *this = ReadOptions(NULL); - } + prefetch_scan_size(1 << 20), + enable_dfs_read_thread_limiter(false) {} + ReadOptions() { *this = ReadOptions(NULL); } }; // Options that control write operations @@ -429,67 +453,8 @@ struct WriteOptions { bool disable_wal; - WriteOptions() - : sync(false), - disable_wal(false) { - } + WriteOptions() : sync(false), disable_wal(false) {} }; - -// block based cache options -struct FlashBlockCacheOptions { - Options opts; - - // SSD's dir - std::string cache_dir; - - // ignore local conf, force update conf from FLAG file - bool force_update_conf_enabled; - - // Max available size for ssd cache - // Default: 350G - uint64_t cache_size; - - // Size of each block set - // Default: 1GB - uint64_t blockset_size; - - // Size of user data packed per block. - // Default: 8KB - uint64_t block_size; - - // Batch write size for fid alloctor - // Default: 100000 - uint64_t fid_batch_num; - - // block set number, which is equal to cache_size / blockset_size - uint64_t blockset_num; - - // number of blocks per block set - uint64_t blocks_per_set; - - // block cache's meta leveldb's block cache size - // Default: 2G - uint64_t meta_block_cache_size; - - // block cache's meta leveldb's table cache size - // Default: 512M - uint64_t meta_table_cache_size; - - // block cache's meta leveldb's write buffer size - // Default: 1M - uint64_t write_buffer_size; - - // Base env for block cache - // Default: dfs_env - Env* env; - - // Local env for block cache - // Default: posix_env - Env* cache_env; - - FlashBlockCacheOptions(); -}; - } // namespace leveldb #endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ diff --git a/src/leveldb/include/leveldb/persistent_cache.h b/src/leveldb/include/leveldb/persistent_cache.h new file mode 100644 index 000000000..5ef17f53c --- /dev/null +++ b/src/leveldb/include/leveldb/persistent_cache.h @@ -0,0 +1,151 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include +#include +#include +#include +#include +#include + +#include "env.h" +#include "table/format.h" +#include "status.h" + +namespace leveldb { +class Env; +class WriteableCacheFile; + +// persistent cache's metric name for prometheus. +namespace PersistentCacheMetricNames { +const char* const kWriteThroughput = "persistent_cache_write_through_put"; +const char* const kWriteCount = "persistent_cache_write_cnt"; +const char* const kReadThroughput = "persistent_cache_read_through_put"; +const char* const kCacheHits = "persistent_cache_hits"; +const char* const kCacheMisses = "persistent_cache_misses"; +const char* const kCacheErrors = "persistent_cache_errors"; +const char* const kCacheCapacity = "persistent_cache_capacity"; +const char* const kFileEntries = "persistent_cache_file_entries"; +const char* const kCacheSize = "persistent_cache_size"; +const char* const kMetaDataSize = "persistent_cache_metadata_size"; +}; // PersistentCacheMetricNames + +// Persistent Cache Config +// +// This struct captures all the options that are used to configure persistent +// cache. Some of the terminologies used in naming the options are +// +// cache size : +// This is the logical maximum for the cache size +// +struct PersistentCacheConfig { + PersistentCacheConfig(leveldb::Env* _env, const std::string& _path, const uint64_t _cache_size) + : env(_env), path(_path), cache_size(_cache_size) { + if (path.back() != '/') { + path.append("/"); + } + } + + // + // Validate the settings. Our intentions are to catch erroneous settings ahead + // of time instead going violating invariants or causing dead locks. + // + leveldb::Status ValidateSettings() const { + if (!env || path.empty()) { + return leveldb::Status::InvalidArgument("empty or null args"); + } + + if (cache_size <= 0) { + return leveldb::Status::InvalidArgument("cache size <= 0"); + } + + return leveldb::Status::OK(); + } + + // + // Env abstraction to use for system level operations + // + leveldb::Env* env; + + // + // Path for the block cache where blocks are persisted + // + std::string path; + + // + // Logical cache size + // + int64_t cache_size = std::numeric_limits::max(); + + // + // Retry times when reserve space failed + // + uint64_t write_retry_times = 5; + + // + // Transfer existing flash_env files to persistent cache + // + bool transfer_flash_env_files = false; + + std::string ToString() const; + + void SetEnvOptions(const EnvOptions& opt) { env_opt = opt; } + + EnvOptions env_opt; +}; + +// PersistentCache +// +// Persistent cache interface for caching IO pages on a persistent medium. The +// cache interface is specifically designed for persistent read cache. +class PersistentCache { + public: + using StatsType = std::map; + + virtual ~PersistentCache() = default; + + // Read page cache by page identifier. + // + // key user cache key for target cache file. + // offset offset to read in the file. + // length length to read in the file. + // content user should use data in content. + // data buffer where the data should be copied. + virtual Status Read(const Slice& key, size_t offset, size_t length, Slice* content, + SstDataScratch* scratch) = 0; + + // Force evict a file from the cache. + virtual void ForceEvict(const Slice& key) = 0; + + // Generate a new cache file for write. + // After write done, user should call two finish functions: + // 1. file->Close(key): + // This function will insert the cache file to persistent + // cache system automatically. + // 2. file->Abandon(): + // This funtion will abandon this file, + // and this file will never be read by user. + virtual Status NewWriteableCacheFile(const std::string& path, WriteableCacheFile**) = 0; + + // Return total capacity of persistent cache in bytes, including used and unused space. + virtual size_t GetCapacity() const = 0; + + // Return used bytes of persistent cache. + virtual size_t GetUsage() const = 0; + + virtual Status Open() = 0; + + virtual std::vector GetAllKeys() = 0; + virtual void GarbageCollect() = 0; +}; + +Status NewPersistentCache(const PersistentCacheConfig&, std::shared_ptr*); + +Status NewShardedPersistentCache(const std::vector&, + std::shared_ptr*); +} // leveldb diff --git a/src/leveldb/include/leveldb/raw_key_operator.h b/src/leveldb/include/leveldb/raw_key_operator.h index c8868c237..621e8ccca 100644 --- a/src/leveldb/include/leveldb/raw_key_operator.h +++ b/src/leveldb/include/leveldb/raw_key_operator.h @@ -13,28 +13,20 @@ namespace leveldb { class RawKeyOperator { -public: - virtual void EncodeTeraKey(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t timestamp, - TeraKeyType type, - std::string* tera_key) const = 0; - - virtual bool ExtractTeraKey(const Slice& tera_key, - Slice* row_key, - Slice* family, - Slice* qualifier, - int64_t* timestamp, - TeraKeyType* type) const = 0; - virtual int Compare(const Slice& key1, - const Slice& key2) const = 0; - virtual const char* Name() const = 0; + public: + virtual void EncodeTeraKey(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp, TeraKeyType type, + std::string* tera_key) const = 0; + + virtual bool ExtractTeraKey(const Slice& tera_key, Slice* row_key, Slice* family, + Slice* qualifier, int64_t* timestamp, TeraKeyType* type) const = 0; + virtual int Compare(const Slice& key1, const Slice& key2) const = 0; + virtual const char* Name() const = 0; }; const RawKeyOperator* ReadableRawKeyOperator(); const RawKeyOperator* BinaryRawKeyOperator(); const RawKeyOperator* KvRawKeyOperator(); -} // namespace leveldb -#endif //TERA_LEVELDB_UTILS_RAW_KEY_OPERATOR_H +} // namespace leveldb +#endif // TERA_LEVELDB_UTILS_RAW_KEY_OPERATOR_H diff --git a/src/leveldb/include/leveldb/slice.h b/src/leveldb/include/leveldb/slice.h index 286f303f7..eb5dd426c 100644 --- a/src/leveldb/include/leveldb/slice.h +++ b/src/leveldb/include/leveldb/slice.h @@ -31,16 +31,16 @@ namespace leveldb { class Slice { public: // Create an empty slice. - Slice() : data_(""), size_(0) { } + Slice() : data_(""), size_(0) {} // Create a slice that refers to d[0,n-1]. - Slice(const char* d, size_t n) : data_(d), size_(n) { } + Slice(const char* d, size_t n) : data_(d), size_(n) {} // Create a slice that refers to the contents of "s" - Slice(const std::string& s) : data_(s.data()), size_(s.size()) { } + Slice(const std::string& s) : data_(s.data()), size_(s.size()) {} // Create a slice that refers to s[0,strlen(s)-1] - Slice(const char* s) : data_(s), size_(strlen(s)) { } + Slice(const char* s) : data_(s), size_(strlen(s)) {} // Return a pointer to the beginning of the referenced data const char* data() const { return data_; } @@ -59,7 +59,10 @@ class Slice { } // Change this slice to refer to an empty array - void clear() { data_ = ""; size_ = 0; } + void clear() { + data_ = ""; + size_ = 0; + } // Drop the first "n" bytes from this slice. void remove_prefix(size_t n) { @@ -85,8 +88,18 @@ class Slice { // Return true iff "x" is a prefix of "*this" bool starts_with(const Slice& x) const { - return ((size_ >= x.size_) && - (memcmp(data_, x.data_, x.size_) == 0)); + return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0)); + } + + // Return true iff "x" is a suffix of "*this" + bool ends_with(const Slice& x) const { + return ((size_ >= x.size_) && (memcmp(data_ + (size_ - x.size_), x.data_, x.size_) == 0)); + } + + void remove_specified_prefix(const Slice& x) { + assert(size() >= x.size()); + assert(starts_with(x)); + remove_prefix(x.size()); } private: @@ -97,25 +110,23 @@ class Slice { }; inline bool operator==(const Slice& x, const Slice& y) { - return ((x.size() == y.size()) && - (memcmp(x.data(), y.data(), x.size()) == 0)); + return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); } -inline bool operator!=(const Slice& x, const Slice& y) { - return !(x == y); -} +inline bool operator!=(const Slice& x, const Slice& y) { return !(x == y); } inline int Slice::compare(const Slice& b) const { const int min_len = (size_ < b.size_) ? size_ : b.size_; int r = memcmp(data_, b.data_, min_len); if (r == 0) { - if (size_ < b.size_) r = -1; - else if (size_ > b.size_) r = +1; + if (size_ < b.size_) + r = -1; + else if (size_ > b.size_) + r = +1; } return r; } } // namespace leveldb - #endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_ diff --git a/src/leveldb/include/leveldb/slog.h b/src/leveldb/include/leveldb/slog.h index 9b6d3651c..3f1059fdc 100644 --- a/src/leveldb/include/leveldb/slog.h +++ b/src/leveldb/include/leveldb/slog.h @@ -10,44 +10,41 @@ namespace leveldb { enum LogLevel { - LOG_LEVEL_FATAL = 0, - LOG_LEVEL_ERROR = 1, - LOG_LEVEL_WARNING = 2, - LOG_LEVEL_NOTICE = 3, - LOG_LEVEL_INFO = 3, - LOG_LEVEL_TRACE = 4, - LOG_LEVEL_DEBUG = 5, + LOG_LEVEL_FATAL = 0, + LOG_LEVEL_ERROR = 1, + LOG_LEVEL_WARNING = 2, + LOG_LEVEL_NOTICE = 3, + LOG_LEVEL_INFO = 3, + LOG_LEVEL_TRACE = 4, + LOG_LEVEL_DEBUG = 5, }; LogLevel GetLogLevel(); void SetLogLevel(LogLevel level); -void LogHandler(LogLevel level, const char* filename, int line, - const char *fmt, ...); +void LogHandler(LogLevel level, const char* filename, int line, const char* fmt, ...); -} // namespace leveldb +} // namespace leveldb -#define LEVELDB_SET_LOG_LEVEL(level) \ - ::leveldb::SetLogLevel(::leveldb::LOG_LEVEL_##level) +#define LEVELDB_SET_LOG_LEVEL(level) ::leveldb::SetLogLevel(::leveldb::LOG_LEVEL_##level) -#define LDB_SLOG(level, fmt, arg...) \ - (::leveldb::LOG_LEVEL_##level > ::leveldb::GetLogLevel()) ? \ - (void)0 : ::leveldb::LogHandler( \ - ::leveldb::LOG_LEVEL_##level, __FILE__, __LINE__, fmt, ##arg) \ +#define LDB_SLOG(level, fmt, arg...) \ + (::leveldb::LOG_LEVEL_##level > ::leveldb::GetLogLevel()) \ + ? (void)0 \ + : ::leveldb::LogHandler(::leveldb::LOG_LEVEL_##level, __FILE__, __LINE__, fmt, ##arg) -#define LDB_SLOG_IF(condition, level, fmt, arg...) \ - !(condition) ? (void)0 : ::leveldb::log_handler( \ - ::leveldb::LOG_LEVEL_##level, __FILE__, __LINE__, fmt, ##arg) +#define LDB_SLOG_IF(condition, level, fmt, arg...) \ + !(condition) ? (void)0 : ::leveldb::log_handler(::leveldb::LOG_LEVEL_##level, __FILE__, \ + __LINE__, fmt, ##arg) -#define LDB_SCHECK(expression) \ - LDB_SLOG_IF(!(expression), FATAL, "CHECK failed: " #expression) +#define LDB_SCHECK(expression) LDB_SLOG_IF(!(expression), FATAL, "CHECK failed: " #expression) #define LDB_SCHECK_EQ(a, b) LDB_SCHECK((a) == (b)) #define LDB_SCHECK_NE(a, b) LDB_SCHECK((a) != (b)) -#define LDB_SCHECK_LT(a, b) LDB_SCHECK((a) < (b)) +#define LDB_SCHECK_LT(a, b) LDB_SCHECK((a) < (b)) #define LDB_SCHECK_LE(a, b) LDB_SCHECK((a) <= (b)) -#define LDB_SCHECK_GT(a, b) LDB_SCHECK((a) > (b)) +#define LDB_SCHECK_GT(a, b) LDB_SCHECK((a) > (b)) #define LDB_SCHECK_GE(a, b) LDB_SCHECK((a) >= (b)) -#endif // STORAGE_LEVELDB_UTIL_SLOG_H +#endif // STORAGE_LEVELDB_UTIL_SLOG_H diff --git a/src/leveldb/include/leveldb/statistics.h b/src/leveldb/include/leveldb/statistics.h deleted file mode 100644 index 31fc45444..000000000 --- a/src/leveldb/include/leveldb/statistics.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -#include -#include - -#include -#include - -namespace leveldb { - -/** - * Keep adding ticker's here. - * 1. Any ticker should be added before TICKER_ENUM_MAX. - * 2. Add a readable string in TickersNameMap below for the newly added ticker. - */ -enum Tickers : uint32_t { - TICKER_ENUM_MAX -}; - -// The order of items listed in Tickers should be the same as -// the order listed in TickersNameMap -const std::vector > TickersNameMap = { -}; - -/** - * Keep adding histogram's here. - * Any histogram whould have value less than HISTOGRAM_ENUM_MAX - * Add a new Histogram by assigning it the current value of HISTOGRAM_ENUM_MAX - * Add a string representation in HistogramsNameMap below - * And increment HISTOGRAM_ENUM_MAX - */ -enum Histograms : uint32_t { - // tera flash block cache spec - FLASH_BLOCK_CACHE_PREAD_QUEUE = 0, - FLASH_BLOCK_CACHE_PREAD_SSD_READ, - FLASH_BLOCK_CACHE_PREAD_FILL_USER_DATA, - FLASH_BLOCK_CACHE_PREAD_RELEASE_BLOCK, - FLASH_BLOCK_CACHE_LOCKMAP_BS_RELOAD_NR, - FLASH_BLOCK_CACHE_PREAD_GET_BLOCK, - FLASH_BLOCK_CACHE_PREAD_BLOCK_NR, - FLASH_BLOCK_CACHE_GET_BLOCK_SET, - FLASH_BLOCK_CACHE_BS_LRU_LOOKUP, - FLASH_BLOCK_CACHE_PREAD_WAIT_UNLOCK, - FLASH_BLOCK_CACHE_ALLOC_FID, - FLASH_BLOCK_CACHE_GET_FID, - FLASH_BLOCK_CACHE_EVICT_NR, - FLASH_BLOCK_CACHE_PREAD_DFS_READ, - FLASH_BLOCK_CACHE_PREAD_SSD_WRITE, - HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match -}; - -const std::vector > HistogramsNameMap = { - {FLASH_BLOCK_CACHE_PREAD_QUEUE, "flash_block_cache.pread_queue"}, - {FLASH_BLOCK_CACHE_PREAD_SSD_READ, "flash_block_cache.pread_ssd_read"}, - {FLASH_BLOCK_CACHE_PREAD_FILL_USER_DATA, "flash_block_cache.pread_fill_user_data"}, - {FLASH_BLOCK_CACHE_PREAD_RELEASE_BLOCK, "flash_block_cache.pread_release_block"}, - {FLASH_BLOCK_CACHE_LOCKMAP_BS_RELOAD_NR, "flash_block_cache.lockmap_bs_reload_nr"}, - {FLASH_BLOCK_CACHE_PREAD_GET_BLOCK, "flash_block_cache.pread_get_block"}, - {FLASH_BLOCK_CACHE_PREAD_BLOCK_NR, "flash_block_cache.pread_block_nr"}, - {FLASH_BLOCK_CACHE_GET_BLOCK_SET, "flash_block_cache.get_block_set"}, - {FLASH_BLOCK_CACHE_BS_LRU_LOOKUP, "flash_block_cache.bs_lru_lookup"}, - {FLASH_BLOCK_CACHE_PREAD_WAIT_UNLOCK, "flash_block_cache.pread_wait_unlock"}, - {FLASH_BLOCK_CACHE_ALLOC_FID, "flash_block_cache.alloc_fid"}, - {FLASH_BLOCK_CACHE_GET_FID, "flash_block_cache.get_fid"}, - {FLASH_BLOCK_CACHE_EVICT_NR, "flash_block_cache.evict_nr"}, - {FLASH_BLOCK_CACHE_PREAD_DFS_READ, "flash_block_cache.pread_dfs_read"}, - {FLASH_BLOCK_CACHE_PREAD_SSD_WRITE, "flash_block_cache.pread_ssd_write"}, -}; - -struct HistogramData { - double median; // 中值 - double percentile95; - double percentile99; // 99分为点 - double average; - double standard_deviation; -}; - -// Analyze the performance of a db -class Statistics { - public: - virtual ~Statistics() {} - - virtual int64_t GetTickerCount(uint32_t ticker_type) = 0; - virtual void RecordTick(uint32_t ticker_type, uint64_t count = 0) = 0; - virtual void SetTickerCount(uint32_t ticker_type, uint64_t count) = 0; - - virtual void GetHistogramData(uint32_t type, - HistogramData* const data) = 0; - virtual std::string GetBriefHistogramString(uint32_t type) { return ""; } - virtual std::string GetHistogramString(uint32_t type) const { return ""; } - virtual void MeasureTime(uint32_t histogram_type, uint64_t time) = 0; - virtual void ClearHistogram(uint32_t type) = 0; - - // String representation of the statistic object. - virtual std::string ToString() { - // Do nothing by default - return std::string("ToString(): not implemented"); - } - virtual void ClearAll() = 0; -}; - -// Create a concrete DBStatistics object -Statistics* CreateDBStatistics(); - -} // namespace leveldb - diff --git a/src/leveldb/include/leveldb/status.h b/src/leveldb/include/leveldb/status.h index e7d40d8e1..75b8b7768 100644 --- a/src/leveldb/include/leveldb/status.h +++ b/src/leveldb/include/leveldb/status.h @@ -25,12 +25,12 @@ namespace leveldb { class Status { public: // Create a success status. - Status() : state_(NULL) { } + Status() : state_(NULL) {} ~Status() { delete[] state_; } // Copy the specified status. Status(const Status& s); - void operator=(const Status& s); + Status& operator=(const Status& s); // Return a success status. static Status OK() { return Status(); } @@ -63,6 +63,10 @@ class Status { return Status(kShutdownInProgress, msg, msg2); } + static Status Reject(const Slice& msg, const Slice& msg2 = Slice()) { + return Status(kReject, msg, msg2); + } + // Returns true iff the status indicates success. bool ok() const { return (state_ == NULL); } @@ -85,6 +89,9 @@ class Status { // Returns the string "OK" for success. std::string ToString() const; + // Returns true iff the status indicates an Reject Error. + bool IsReject() const { return code() == kReject; } + private: // OK status has a NULL state_. Otherwise, state_ is a new[] array // of the following form: @@ -102,27 +109,25 @@ class Status { kIOError = 5, kTimeOut = 6, kIOPermissionDenied = 13, - kShutdownInProgress = 14 + kShutdownInProgress = 14, + kReject = 15 }; - Code code() const { - return (state_ == NULL) ? kOk : static_cast(state_[4]); - } + Code code() const { return (state_ == NULL) ? kOk : static_cast(state_[4]); } Status(Code code, const Slice& msg, const Slice& msg2); static const char* CopyState(const char* s); }; -inline Status::Status(const Status& s) { - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); -} -inline void Status::operator=(const Status& s) { +inline Status::Status(const Status& s) { state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); } +inline Status& Status::operator=(const Status& s) { // The following condition catches both aliasing (when this == &s), // and the common case where both s and *this are ok. if (state_ != s.state_) { delete[] state_; state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); } + return *this; } } // namespace leveldb diff --git a/src/leveldb/include/leveldb/table.h b/src/leveldb/include/leveldb/table.h index 4c101553f..183467051 100644 --- a/src/leveldb/include/leveldb/table.h +++ b/src/leveldb/include/leveldb/table.h @@ -39,9 +39,7 @@ class Table { // for the duration of the returned table's lifetime. // // *file must remain live while this Table is in use. - static Status Open(const Options& options, - RandomAccessFile* file, - uint64_t file_size, + static Status Open(const Options& options, RandomAccessFile* file, uint64_t file_size, Table** table); ~Table(); @@ -52,8 +50,7 @@ class Table { Iterator* NewIterator(const ReadOptions&) const; // Specify the range of iterator - Iterator* NewIterator(const ReadOptions&, const Slice& smallest, - const Slice& largest) const; + Iterator* NewIterator(const ReadOptions&, const Slice& smallest, const Slice& largest) const; // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were @@ -77,11 +74,8 @@ class Table { // to Seek(key). May not make such a call if filter policy says // that key is not present. friend class TableCache; - Status InternalGet( - const ReadOptions&, const Slice& key, - void* arg, - void (*handle_result)(void* arg, const Slice& k, const Slice& v)); - + Status InternalGet(const ReadOptions&, const Slice& key, void* arg, + void (*handle_result)(void* arg, const Slice& k, const Slice& v)); void ReadMeta(const Footer& footer); void ReadFilter(const Slice& filter_handle_value); diff --git a/src/leveldb/include/leveldb/table_builder.h b/src/leveldb/include/leveldb/table_builder.h index 46fc35bc7..445b30218 100644 --- a/src/leveldb/include/leveldb/table_builder.h +++ b/src/leveldb/include/leveldb/table_builder.h @@ -86,6 +86,7 @@ class TableBuilder { void WriteBlock(BlockBuilder* block, BlockHandle* handle); void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle); void AppendToFile(const Slice& slice); + void AppendToCacheFile(const Slice& slice); void FlushBatchBuffer(); struct Rep; diff --git a/src/leveldb/include/leveldb/table_utils.h b/src/leveldb/include/leveldb/table_utils.h index d8e2a6a74..af00adfbe 100644 --- a/src/leveldb/include/leveldb/table_utils.h +++ b/src/leveldb/include/leveldb/table_utils.h @@ -18,6 +18,6 @@ void ArchiveFile(Env* env, const std::string& fname); bool HandleDumpCommand(Env* env, char** files, int num); bool DumpFile(Env* env, const std::string& fname); -} // namespace leveldb +} // namespace leveldb -#endif // STORAGE_LEVELDB_DB_TABLE_UTILS_H +#endif // STORAGE_LEVELDB_DB_TABLE_UTILS_H diff --git a/src/leveldb/include/leveldb/tera_key.h b/src/leveldb/include/leveldb/tera_key.h index 35abae553..1de880223 100644 --- a/src/leveldb/include/leveldb/tera_key.h +++ b/src/leveldb/include/leveldb/tera_key.h @@ -12,62 +12,61 @@ namespace leveldb { enum TeraKeyType { - TKT_FORSEEK = 0, - TKT_DEL = 1, - TKT_DEL_COLUMN = 2, - TKT_DEL_QUALIFIERS = 3, - TKT_DEL_QUALIFIER = 4, - TKT_VALUE = 5, - // 6 is reserved, do not use - TKT_ADD = 7, - TKT_PUT_IFABSENT = 8, - TKT_APPEND = 9, - TKT_ADDINT64 = 10, - TKT_TYPE_NUM = 11 + TKT_FORSEEK = 0, + TKT_DEL = 1, + TKT_DEL_COLUMN = 2, + TKT_DEL_QUALIFIERS = 3, + TKT_DEL_QUALIFIER = 4, + TKT_VALUE = 5, + // 6 is reserved, do not use + TKT_ADD = 7, + TKT_PUT_IFABSENT = 8, + TKT_APPEND = 9, + TKT_ADDINT64 = 10, + TKT_TYPE_NUM = 11 }; class RawKeyOperator; class TeraKey { -public: - static bool IsTypeAllowUserSetTimestamp(TeraKeyType type); + public: + static bool IsTypeAllowUserSetTimestamp(TeraKeyType type); - explicit TeraKey(const RawKeyOperator* op); - explicit TeraKey(const TeraKey& tk); - ~TeraKey(); + explicit TeraKey(const RawKeyOperator* op); + explicit TeraKey(const TeraKey& tk); + ~TeraKey(); - bool Encode(const std::string& key, const std::string& column, - const std::string& qualifier, int64_t timestamp, - TeraKeyType type); - bool Decode(const Slice& raw_key); + bool Encode(const std::string& key, const std::string& column, const std::string& qualifier, + int64_t timestamp, TeraKeyType type); + bool Decode(const Slice& raw_key); - bool SameRow(const TeraKey& tk); - bool SameColumn(const TeraKey& tk); - bool SameQualifier(const TeraKey& tk); + bool SameRow(const TeraKey& tk); + bool SameColumn(const TeraKey& tk); + bool SameQualifier(const TeraKey& tk); - bool IsDel(); - int Compare(const TeraKey& tk); - std::string DebugString(); + bool IsDel(); + int Compare(const TeraKey& tk); + std::string DebugString(); - bool empty() const { return is_empty_; } - Slice raw_key() const { return raw_key_; } - Slice key() const { return key_; } - Slice column() const { return column_; } - Slice qualifier() const { return qualifier_; } - int64_t timestamp() const { return timestamp_; } - TeraKeyType type() const { return type_; } + bool empty() const { return is_empty_; } + Slice raw_key() const { return raw_key_; } + Slice key() const { return key_; } + Slice column() const { return column_; } + Slice qualifier() const { return qualifier_; } + int64_t timestamp() const { return timestamp_; } + TeraKeyType type() const { return type_; } -private: - TeraKey(); - const RawKeyOperator* operator_; - std::string raw_key_; - Slice key_; - Slice column_; - Slice qualifier_; - int64_t timestamp_; - TeraKeyType type_; - bool is_empty_; + private: + TeraKey(); + const RawKeyOperator* operator_; + std::string raw_key_; + Slice key_; + Slice column_; + Slice qualifier_; + int64_t timestamp_; + TeraKeyType type_; + bool is_empty_; }; -} // namespace leveldb -#endif //TERA_LEVELDB_UTILS_TERA_KEY_H +} // namespace leveldb +#endif // TERA_LEVELDB_UTILS_TERA_KEY_H diff --git a/src/leveldb/include/nfs.h b/src/leveldb/include/nfs.h index 6c90e3f1e..48de959a1 100644 --- a/src/leveldb/include/nfs.h +++ b/src/leveldb/include/nfs.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef __NFS_API_NFS_H_ -#define __NFS_API_NFS_H_ +#ifndef __NFS_API_NFS_H_ +#define __NFS_API_NFS_H_ #include #include @@ -25,36 +25,46 @@ struct NFSDIR; struct ClientConfig; struct NfsOptions { - // required - const char *master_ip; - uint16_t master_port; - const char *username; - const char *password; - // optional - const char *interface; // network adaptor - uint32_t timeout; // ms - uint32_t ebusy_timeout; // ms - uint32_t tranbuf_size; // bytes - uint32_t tranbuf_num; // num of above - uint16_t write_cache; - uint16_t read_cache; - uint16_t is_read_primary_only; - - NfsOptions() : master_ip(NULL), master_port(0), username(NULL), password(NULL), - interface(NULL), timeout(180000), ebusy_timeout(180000), - tranbuf_size(4096), tranbuf_num(65536), - write_cache(1), read_cache(1), is_read_primary_only(0) { - } + // required + const char* master_ip; + uint16_t master_port; + const char* username; + const char* password; + // optional + const char* interface; // network adaptor + uint32_t timeout; // ms + uint32_t ebusy_timeout; // ms + uint32_t tranbuf_size; // bytes + uint32_t tranbuf_num; // num of above + uint16_t write_cache; + uint16_t read_cache; + uint16_t is_read_primary_only; + + NfsOptions() + : master_ip(NULL), + master_port(0), + username(NULL), + password(NULL), + interface(NULL), + timeout(180000), + ebusy_timeout(180000), + tranbuf_size(4096), + tranbuf_num(65536), + write_cache(1), + read_cache(1), + is_read_primary_only(0) {} }; /** * Length of any path should be less than or equals to NFS_MAX_FILEPATH_LEN, - * or else, api will return error, and the nfs errno will be set as ENAMETOOLONG. + * or else, api will return error, and the nfs errno will be set as + *ENAMETOOLONG. * NFS_MAX_FILEPATH_LEN is defined as 4095 defaultly. * That means, the max length of path is 4095. * * Length of any name should be less than or equals to NFS_MAX_FILENAME_LEN, - * or else, api will return error, and the nfs errno will be set as ENAMETOOLONG. + * or else, api will return error, and the nfs errno will be set as + *ENAMETOOLONG. * NFS_MAX_FILENAME_LEN is defined as 255 defaultly. * That means, the max length of name is 255. */ @@ -100,9 +110,8 @@ void Perror(const char* s = NULL); * EACCES - user has no permission on mountpoint * EIO - other error */ -int Init(const char* mountpoint, const char* master_ip, uint16_t master_port, - const char* username, const char* password, int cache = 1, - const char* interface = NULL); +int Init(const char* mountpoint, const char* master_ip, uint16_t master_port, const char* username, + const char* password, int cache = 1, const char* interface = NULL); /** * @brief Init nfs client system. This interface is process-level. @@ -127,9 +136,9 @@ int Init(const char* mountpoint, const char* master_ip, uint16_t master_port, * EACCES - user has no permission on mountpoint * EIO - other error */ -int Init(const char* mountpoint, const char* master_ip, uint16_t master_port, - const char* username, const char* password, uint64_t espaddr, - int host_id_base, int host_id_num, int cache = 1, const char* interface = NULL); +int Init(const char* mountpoint, const char* master_ip, uint16_t master_port, const char* username, + const char* password, uint64_t espaddr, int host_id_base, int host_id_num, int cache = 1, + const char* interface = NULL); /** * @brief Init nfs client system. This interface is process-level. @@ -222,10 +231,11 @@ int Destroy(); * ENOENT - path not exist * EIO - other error */ -//int Chdir(const char* path); +// int Chdir(const char* path); /** - * @brief Get current working directory. The returned cwd string will be normalized. + * @brief Get current working directory. The returned cwd string will be + * normalized. * @param buf * @param size * @return @@ -235,7 +245,7 @@ int Destroy(); * EINVAL - buf is invalid, or NFS not inited * ERANGE - buf is not enough to store path */ -//ssize_t Getcwd(char* buf, size_t size); +// ssize_t Getcwd(char* buf, size_t size); /** * @brief Check permission or existence of path. @@ -276,7 +286,8 @@ int Mkdir(const char* path); * @brief Make a directory. * @param path * @param is_spread_all_namespaces: whether do mkdir for all federations - * @param is_pass_when_already_exist: whether pass and continue when some one federation ack EEXIST + * @param is_pass_when_already_exist: whether pass and continue when some one + * federation ack EEXIST * @return * 0 - on success * -1 - on error @@ -312,7 +323,8 @@ int Rmdir(const char* path); * @brief Remove a directory. * @param path * @param is_spread_all_namespaces: whether do rmdir for all federations - * @param is_pass_when_already_exist: whether pass and continue when some one federation ack ENOENT + * @param is_pass_when_already_exist: whether pass and continue when some one + * federation ack ENOENT * @return * 0 - on success * -1 - on error @@ -331,7 +343,8 @@ int Rmdir(const char* path, bool is_spread_all_namespaces, bool is_pass_when_not * @brief Open a directory. * @param path * @return A ptr of struct NFSDIR. - * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use 'Closedir()'. + * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use + * 'Closedir()'. * @errno When error, the nfs errno will be set appropriately: * EINVAL - path is invalid * EACCES - user has no permission on the path @@ -345,10 +358,13 @@ NFSDIR* Opendir(const char* path); /** * @brief Open a directory. * @param path - * @param is_spread_all_namespaces: whether do opendir/readdir for all federations - * @param is_pass_when_already_exist: whether pass and continue when some one federation ack ENOENT + * @param is_spread_all_namespaces: whether do opendir/readdir for all + * federations + * @param is_pass_when_already_exist: whether pass and continue when some one + * federation ack ENOENT * @return A ptr of struct NFSDIR. - * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use 'Closedir()'. + * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use + * 'Closedir()'. * @errno When error, the nfs errno will be set appropriately: * EINVAL - path is invalid * EACCES - user has no permission on the path @@ -367,7 +383,8 @@ NFSDIR* Opendir(const char* path, bool is_spread_all_namespaces, bool is_pass_wh * @return A ptr of struct dirent. * NOT-NULL - the ptr to next entry * NULL - read finished or occurs error - * Do NOT use 'delete' or 'free()' or any other method to free dirent pointer. + * Do NOT use 'delete' or 'free()' or any other method to free dirent + * pointer. * @errno When error, the nfs errno will be set appropriately: * EBADF - dir is invalid * EIO - other error @@ -458,15 +475,19 @@ uint64_t GetInode(const NFSFILE* file); /** * @brief Open a file stream. - * If mode is "w"/"a" and file not exist, file will be created automatically with 0666. - * If mode is "w" and file already exist, file will be truncated to 0 automatically. - * If mode is "a" and file already exist, file will not be truncated, and write stream ptr will be moved to the end of stream automatically. + * If mode is "w"/"a" and file not exist, file will be created + * automatically with 0666. + * If mode is "w" and file already exist, file will be truncated to 0 + * automatically. + * If mode is "a" and file already exist, file will not be truncated, + * and write stream ptr will be moved to the end of stream automatically. * @param path * @param mode - only "r" or "w" or "a" * @return A stream ptr of struct NFSFILE * NOT-NULL - on success * NULL - on error - * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use 'Close()' + * Do NOT use 'delete' or 'free()' to free ptr. Just only and MUST use + * 'Close()' * @errno When error, the nfs errno will be set appropriately: * EINVAL - path is invalid, or mode is invalid * EACCES - user has no permission on the path @@ -474,8 +495,10 @@ uint64_t GetInode(const NFSFILE* file); * ENOENT - path not exist when open with "r" mode * EISDIR - path is not a file * ENOTDIR - path's parents is not directory - * EBUSY - file has been opened with "w" or "a" by someone other (and I also want to open with "w" or "a") - * ENOSPC - no space to create(if needed) (in NFS, it means that exceeds quota) + * EBUSY - file has been opened with "w" or "a" by someone other (and I also + * want to open with "w" or "a") + * ENOSPC - no space to create(if needed) (in NFS, it means that exceeds + * quota) * EIO - mode is "w" and file has been opened in "w" mode; or other error */ NFSFILE* Open(const char* path, const char* mode); @@ -489,9 +512,12 @@ NFSFILE* Open(const char* path, const char* mode); * If file is opened with "r" mode, close will always return success. * If file is opened with "w" mode, it perhaps will be complex: * a) Use NFS system only, then close will always return success; - * b) Use NFS system along with RBS system, then close may return error. - * And libnfs will wait until all the blocks of the file have been committed or timeout. - * If some blocks commit failed or timeout, libnfs will return error, that means close failed. + * b) Use NFS system along with RBS system, then close may return + * error. + * And libnfs will wait until all the blocks of the file have + * been committed or timeout. + * If some blocks commit failed or timeout, libnfs will return + * error, that means close failed. * @errno When error, the nfs errno will be set appropriately: * EBADF - stream is invalid * EIO - other error (only in RBS system) @@ -505,8 +531,10 @@ int Close(NFSFILE* stream); * 0 - on success * -1 - on error * Usually used to force close the file "write opened" by other client - * If file is opened by self, will close and clear open info from NFSCLient, but not NFSFILE, will memory leak - * If file is opened by other client, force release will close it, if other is writing, will reopen and .. + * If file is opened by self, will close and clear open info from + * NFSCLient, but not NFSFILE, will memory leak + * If file is opened by other client, force release will close it, if + * other is writing, will reopen and .. * Must be used very caseful * @errno the same to Close */ @@ -514,13 +542,15 @@ int ForceRelease(const char* path); /** * @brief Read size bytes to the buf pointed by ptr from the file stream. - * Libnfs will assume that the offset is the finished offset you read last time. + * Libnfs will assume that the offset is the finished offset you read + * last time. * Actually, it is atomically, will not cause EAGAIN or EWOULDBLOCK. * @param stream * @param ptr * @param size * @return - * >=0 - on success, return the actually read-size, if it is less than size, stream should be eof. + * >=0 - on success, return the actually read-size, if it is less than + * size, stream should be eof. * -1 - on error * @errno When error, the nfs errno will be set appropriately: * EBADF - stream is invalid @@ -530,7 +560,8 @@ int ForceRelease(const char* path); ssize_t Read(NFSFILE* stream, void* ptr, size_t size); /** - * @brief Read size bytes to the buf pointed by ptr from the file stream, started with the offset. + * @brief Read size bytes to the buf pointed by ptr from the file stream, + * started with the offset. * PRead will not change any state of stream, so, PRead is thread-safe. * @param stream * @param ptr @@ -548,13 +579,15 @@ ssize_t PRead(NFSFILE* stream, void* ptr, size_t size, uint64_t offset); /** * @brief Write size bytes to the buf pointed by ptr from the file stream. - * Libnfs will assume that the offset is the finished offset you written last time. + * Libnfs will assume that the offset is the finished offset you + * written last time. * Actually, it is atomically, will not cause EAGAIN or EWOULDBLOCK. * @param stream * @param ptr * @param size * @return - * >=0 - on success, return the actually written-size, it should be equals to size. + * >=0 - on success, return the actually written-size, it should be equals + * to size. * -1 - on error * @errno When error, the nfs errno will be set appropriately: * EBADF - stream is invalid @@ -564,13 +597,15 @@ ssize_t PRead(NFSFILE* stream, void* ptr, size_t size, uint64_t offset); ssize_t Write(NFSFILE* stream, const void* ptr, size_t size); /** - * @brief Write size bytes to the buf pointed by ptr from the file stream, started with the offset. + * @brief Write size bytes to the buf pointed by ptr from the file stream, + * started with the offset. * PWrite will not change any state of stream. * @param stream * @param ptr * @param size * @return - * >=0 - on success, return the actually written-size, it should be equals to size. + * >=0 - on success, return the actually written-size, it should be equals + * to size. * -1 - on error * @errno When error, the nfs errno will be set appropriately: * EBADF - stream is invalid @@ -581,7 +616,8 @@ ssize_t PWrite(NFSFILE* stream, const void* ptr, size_t size, uint64_t offset); /** * @brief Sync the file stream. - * Currently, we do not support sync rbs write stream, but this api will return succ to avoid make caller troubled. + * Currently, we do not support sync rbs write stream, but this api + * will return succ to avoid make caller troubled. * Though, sync read stream (whether in nfs or rbs) will cause fail. * @param stream * @return @@ -612,8 +648,10 @@ int64_t Tell(NFSFILE* stream); * @return * 0 - on success * -1 - on error - * If use NFS system only, seek will always return success (except the invalid arguments). - * If use NFS system along with RBS system, you can not seek the write stream backward, or else RBS may return other error. + * If use NFS system only, seek will always return success (except the + * invalid arguments). + * If use NFS system along with RBS system, you can not seek the write + * stream backward, or else RBS may return other error. * @errno When error, the nfs errno will be set appropriately: * EBADF - stream is invalid * EINVAL - seek the write stream backward in RBS system @@ -646,7 +684,8 @@ int Eof(NFSFILE* stream); * ENAMETOOLONG - path too long * ENOENT - oldpath not exist * EISDIR - oldpath is not a directory while newpath is a existing directory - * ENOTEMPTY - oldpath is directory and newpath is directory and newpath contains entries other than . and .. + * ENOTEMPTY - oldpath is directory and newpath is directory and newpath + * contains entries other than . and .. * EIO - other error */ int Rename(const char* oldpath, const char* newpath); @@ -700,8 +739,6 @@ int Truncate(const char* path, uint64_t size); * EIO - other error */ int SetModifyTime(const char* path, time_t mtime); - } #endif //__NFS_API_NFS_H_ - diff --git a/src/leveldb/include/snappy-stubs-public.h b/src/leveldb/include/snappy-stubs-public.h index 96d886508..a8a72301f 100644 --- a/src/leveldb/include/snappy-stubs-public.h +++ b/src/leveldb/include/snappy-stubs-public.h @@ -47,8 +47,7 @@ #define SNAPPY_MAJOR 1 #define SNAPPY_MINOR 1 #define SNAPPY_PATCHLEVEL 0 -#define SNAPPY_VERSION \ - ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) +#define SNAPPY_VERSION ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) #include diff --git a/src/leveldb/include/snappy.h b/src/leveldb/include/snappy.h index 03ef6ce5b..4ae068eed 100644 --- a/src/leveldb/include/snappy.h +++ b/src/leveldb/include/snappy.h @@ -45,119 +45,111 @@ #include "snappy-stubs-public.h" namespace snappy { - class Source; - class Sink; - - // ------------------------------------------------------------------------ - // Generic compression/decompression routines. - // ------------------------------------------------------------------------ - - // Compress the bytes read from "*source" and append to "*sink". Return the - // number of bytes written. - size_t Compress(Source* source, Sink* sink); - - // Find the uncompressed length of the given stream, as given by the header. - // Note that the true length could deviate from this; the stream could e.g. - // be truncated. - // - // Also note that this leaves "*source" in a state that is unsuitable for - // further operations, such as RawUncompress(). You will need to rewind - // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); - - // ------------------------------------------------------------------------ - // Higher-level string based routines (should be sufficient for most users) - // ------------------------------------------------------------------------ - - // Sets "*output" to the compressed version of "input[0,input_length-1]". - // Original contents of *output are lost. - // - // REQUIRES: "input[]" is not an alias of "*output". - size_t Compress(const char* input, size_t input_length, string* output); - - // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". - // Original contents of "*uncompressed" are lost. - // - // REQUIRES: "compressed[]" is not an alias of "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(const char* compressed, size_t compressed_length, - string* uncompressed); - - - // ------------------------------------------------------------------------ - // Lower-level character array based routines. May be useful for - // efficiency reasons in certain circumstances. - // ------------------------------------------------------------------------ - - // REQUIRES: "compressed" must point to an area of memory that is at - // least "MaxCompressedLength(input_length)" bytes in length. - // - // Takes the data stored in "input[0..input_length]" and stores - // it in the array pointed to by "compressed". - // - // "*compressed_length" is set to the length of the compressed output. - // - // Example: - // char* output = new char[snappy::MaxCompressedLength(input_length)]; - // size_t output_length; - // RawCompress(input, input_length, output, &output_length); - // ... Process(output, output_length) ... - // delete [] output; - void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to - // uncompressed[0..GetUncompressedLength(compressed)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(const char* compressed, size_t compressed_length, - char* uncompressed); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to - // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(Source* compressed, char* uncompressed); - - // Returns the maximal size of the compressed representation of - // input data that is "source_bytes" bytes in length; - size_t MaxCompressedLength(size_t source_bytes); - - // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() - // Returns true and stores the length of the uncompressed data in - // *result normally. Returns false on parsing error. - // This operation takes O(1) time. - bool GetUncompressedLength(const char* compressed, size_t compressed_length, - size_t* result); - - // Returns true iff the contents of "compressed[]" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to compressed_length, but is usually at least - // a factor of four faster than actual decompression. - bool IsValidCompressedBuffer(const char* compressed, - size_t compressed_length); - - // The size of a compression block. Note that many parts of the compression - // code assumes that kBlockSize <= 65536; in particular, the hash table - // can only store 16-bit offsets, and EmitCopy() also assumes the offset - // is 65535 bytes or less. Note also that if you change this, it will - // affect the framing format (see framing_format.txt). - // - // Note that there might be older data around that is compressed with larger - // block sizes, so the decompression code should not rely on the - // non-existence of long backreferences. - static const int kBlockLog = 16; - static const size_t kBlockSize = 1 << kBlockLog; - - static const int kMaxHashTableBits = 14; - static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; +class Source; +class Sink; -} // end namespace snappy +// ------------------------------------------------------------------------ +// Generic compression/decompression routines. +// ------------------------------------------------------------------------ + +// Compress the bytes read from "*source" and append to "*sink". Return the +// number of bytes written. +size_t Compress(Source* source, Sink* sink); + +// Find the uncompressed length of the given stream, as given by the header. +// Note that the true length could deviate from this; the stream could e.g. +// be truncated. +// +// Also note that this leaves "*source" in a state that is unsuitable for +// further operations, such as RawUncompress(). You will need to rewind +// or recreate the source yourself before attempting any further calls. +bool GetUncompressedLength(Source* source, uint32* result); + +// ------------------------------------------------------------------------ +// Higher-level string based routines (should be sufficient for most users) +// ------------------------------------------------------------------------ + +// Sets "*output" to the compressed version of "input[0,input_length-1]". +// Original contents of *output are lost. +// +// REQUIRES: "input[]" is not an alias of "*output". +size_t Compress(const char* input, size_t input_length, string* output); + +// Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". +// Original contents of "*uncompressed" are lost. +// +// REQUIRES: "compressed[]" is not an alias of "*uncompressed". +// +// returns false if the message is corrupted and could not be decompressed +bool Uncompress(const char* compressed, size_t compressed_length, string* uncompressed); + +// ------------------------------------------------------------------------ +// Lower-level character array based routines. May be useful for +// efficiency reasons in certain circumstances. +// ------------------------------------------------------------------------ +// REQUIRES: "compressed" must point to an area of memory that is at +// least "MaxCompressedLength(input_length)" bytes in length. +// +// Takes the data stored in "input[0..input_length]" and stores +// it in the array pointed to by "compressed". +// +// "*compressed_length" is set to the length of the compressed output. +// +// Example: +// char* output = new char[snappy::MaxCompressedLength(input_length)]; +// size_t output_length; +// RawCompress(input, input_length, output, &output_length); +// ... Process(output, output_length) ... +// delete [] output; +void RawCompress(const char* input, size_t input_length, char* compressed, + size_t* compressed_length); + +// Given data in "compressed[0..compressed_length-1]" generated by +// calling the Snappy::Compress routine, this routine +// stores the uncompressed data to +// uncompressed[0..GetUncompressedLength(compressed)-1] +// returns false if the message is corrupted and could not be decrypted +bool RawUncompress(const char* compressed, size_t compressed_length, char* uncompressed); + +// Given data from the byte source 'compressed' generated by calling +// the Snappy::Compress routine, this routine stores the uncompressed +// data to +// uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] +// returns false if the message is corrupted and could not be decrypted +bool RawUncompress(Source* compressed, char* uncompressed); + +// Returns the maximal size of the compressed representation of +// input data that is "source_bytes" bytes in length; +size_t MaxCompressedLength(size_t source_bytes); + +// REQUIRES: "compressed[]" was produced by RawCompress() or Compress() +// Returns true and stores the length of the uncompressed data in +// *result normally. Returns false on parsing error. +// This operation takes O(1) time. +bool GetUncompressedLength(const char* compressed, size_t compressed_length, size_t* result); + +// Returns true iff the contents of "compressed[]" can be uncompressed +// successfully. Does not return the uncompressed data. Takes +// time proportional to compressed_length, but is usually at least +// a factor of four faster than actual decompression. +bool IsValidCompressedBuffer(const char* compressed, size_t compressed_length); + +// The size of a compression block. Note that many parts of the compression +// code assumes that kBlockSize <= 65536; in particular, the hash table +// can only store 16-bit offsets, and EmitCopy() also assumes the offset +// is 65535 bytes or less. Note also that if you change this, it will +// affect the framing format (see framing_format.txt). +// +// Note that there might be older data around that is compressed with larger +// block sizes, so the decompression code should not rely on the +// non-existence of long backreferences. +static const int kBlockLog = 16; +static const size_t kBlockSize = 1 << kBlockLog; + +static const int kMaxHashTableBits = 14; +static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; + +} // end namespace snappy #endif // UTIL_SNAPPY_SNAPPY_H__ diff --git a/src/leveldb/issues/issue178_test.cc b/src/leveldb/issues/issue178_test.cc index 4d896bffd..073b5d87b 100644 --- a/src/leveldb/issues/issue178_test.cc +++ b/src/leveldb/issues/issue178_test.cc @@ -25,11 +25,9 @@ std::string Key1(int i) { return buf; } -std::string Key2(int i) { - return Key1(i) + "_xxx"; -} +std::string Key2(int i) { return Key1(i) + "_xxx"; } -class Issue178 { }; +class Issue178 {}; TEST(Issue178, Test) { // Get rid of any state from an old run. @@ -90,6 +88,4 @@ TEST(Issue178, Test) { } // anonymous namespace -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/persistent_cache/env_flash_compatibility.cc b/src/leveldb/persistent_cache/env_flash_compatibility.cc new file mode 100644 index 000000000..b21a7b84e --- /dev/null +++ b/src/leveldb/persistent_cache/env_flash_compatibility.cc @@ -0,0 +1,166 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file is methods used for transfering from env_flash to persistent cache. +// Maybe deprecated in future version. + +#include +#include "persistent_cache_impl.h" +#include "util/stop_watch.h" + +namespace leveldb { + +static void CollectLgFiles(const std::string &lg_path, Env *env, std::vector *result) { + assert(result); + std::vector sst_files; + auto status = env->GetChildren(lg_path, &sst_files); + if (!status.ok()) { + LEVELDB_LOG( + "Get children failed when move env_flash file to persistent cache, path: %s, " + "reason: %s\n", + lg_path.c_str(), status.ToString().c_str()); + return; + } + std::for_each(sst_files.begin(), sst_files.end(), + [result, &lg_path, env](const std::string &sst_file_name) { + auto file_path = lg_path + "/" + sst_file_name; + Slice file_path_slice{file_path}; + if (!file_path_slice.ends_with(".sst")) { + return; + } + + SystemFileType type; + auto s = env->GetFileType(file_path, &type); + if (!s.ok()) { + LEVELDB_LOG("Get file type failed, path %s, reason %s.", file_path.c_str(), + s.ToString().c_str()); + return; + } + + if (type == SystemFileType::kRegularFile) { + result->emplace_back(std::move(file_path)); + } + }); +} + +static void CollectTabletFiles(const std::string &tablet_path, Env *env, + std::vector *result) { + std::vector lg_paths; + auto status = env->GetChildren(tablet_path, &lg_paths); + if (!status.ok()) { + LEVELDB_LOG( + "Get children failed when move env_flash file to persistent cache, path: %s, " + "reason: %s\n", + tablet_path.c_str(), status.ToString().c_str()); + return; + } + for (const auto &lg_path : lg_paths) { + CollectLgFiles(tablet_path + "/" + lg_path, env, result); + } +} + +static void CollectTableFiles(const std::string &table_path, Env *env, + std::vector *result) { + std::vector tablet_paths; + auto status = env->GetChildren(table_path, &tablet_paths); + if (!status.ok()) { + LEVELDB_LOG( + "Get children failed when move env_flash file to persistent cache, path: %s, " + "reason: %s\n", + table_path.c_str(), status.ToString().c_str()); + } + for (const auto &tablet_path : tablet_paths) { + CollectTabletFiles(table_path + "/" + tablet_path, env, result); + } +} + +void PersistentCacheImpl::AddExistingFile(const Slice &key, const std::string &file_name) { + auto env = opt_.env; + Status s; + + auto cache_file_name = file_name + "." + std::to_string(writer_cache_id_) + ".rc"; + + if (!(s = env->RenameFile(file_name, cache_file_name)).ok()) { + LEVELDB_LOG("Rename cache file from %s to %s failed, reason: %s.\n", file_name.c_str(), + cache_file_name.c_str(), s.ToString().c_str()); + return; + } + + std::unique_ptr file{ + new RandomAccessCacheFile{writer_cache_id_, opt_.env, opt_.env_opt, cache_file_name}}; + + uint64_t file_size; + if (!(s = env->GetFileSize(file->Path(), &file_size)).ok()) { + LEVELDB_LOG("Get file size failed, file: %s, reason: %s.\n", file->Path().c_str(), + s.ToString().c_str()); + return; + } + + if (!(s = file->Open()).ok()) { + LEVELDB_LOG("Open cache file failed, reason: %s.\n", s.ToString().c_str()); + return; + } + + if (!(s = MakeRoomForWrite(file_size)).ok()) { + LEVELDB_LOG("Make room for cache file failed, reason: %s.\n", s.ToString().c_str()); + return; + } + + ++writer_cache_id_; + auto raw_file_ptr = file.release(); + auto success = metadata_.AddCacheFile(raw_file_ptr); + assert(success); + + if (!Insert(key, raw_file_ptr)) { + ForceEvict(raw_file_ptr); + return; + } + + LEVELDB_LOG("Add existing cache file success, key:%s, file:%s.\n", key.ToString().c_str(), + file_name.c_str()); +} + +void PersistentCacheImpl::PullEnvFlashFiles() { + StopWatchMicro timer(opt_.env, true); + LEVELDB_LOG("Start pulling env flash files to persistent cache in path %s.\n", + GetCachePath().c_str()); + std::vector sst_files; + std::vector table_paths; + + auto status = opt_.env->GetChildren(GetCachePath(), &table_paths); + if (!status.ok()) { + LEVELDB_LOG( + "Get children failed when move env_flash file to persistent cache, path: %s, " + "reason: %s\n", + GetCachePath().c_str(), status.ToString().c_str()); + } else { + for (const auto &table_path : table_paths) { + auto full_table_path = GetCachePath() + "/" + table_path; + bool is_same_path = false; + status = opt_.env->IsSamePath(full_table_path, GetMetaPath(), &is_same_path); + if (!status.ok()) { + LEVELDB_LOG("Error checking same path, path1 %s, path2 %s, reason %s, skip it.\n", + full_table_path.c_str(), GetMetaPath().c_str(), status.ToString().c_str()); + continue; + } + if (!is_same_path) { + CollectTableFiles(full_table_path, opt_.env, &sst_files); + } + } + + for (auto &file_name : sst_files) { + Slice key{file_name}; + key.remove_specified_prefix(GetCachePath()); + while (key.starts_with("/")) { + key.remove_prefix(1); + } + assert(key.size()); + AddExistingFile(key, file_name); + } + } + LEVELDB_LOG("Pull env flash files in path %s done, cost: %lu ms.", GetCachePath().c_str(), + timer.ElapsedMicros() / 1000); +} + +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/hash_table.h b/src/leveldb/persistent_cache/hash_table.h new file mode 100644 index 000000000..86fd91212 --- /dev/null +++ b/src/leveldb/persistent_cache/hash_table.h @@ -0,0 +1,227 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include +#include +#include + +#include "include/leveldb/env.h" +#include "common/rwmutex.h" + +namespace leveldb { + +// HashTable +// +// Traditional implementation of hash table with synchronization built on top +// don't perform very well in multi-core scenarios. This is an implementation +// designed for multi-core scenarios with high lock contention. +// +// |<-------- alpha ------------->| +// Buckets Collision list +// ---- +----+ +---+---+--- ...... ---+---+---+ +// / | |--->| | | | | | +// / +----+ +---+---+--- ...... ---+---+---+ +// / | | +// Locks/ +----+ +// +--+/ . . +// | | . . +// +--+ . . +// | | . . +// +--+ . . +// | | . . +// +--+ . . +// \ +----+ +// \ | | +// \ +----+ +// \ | | +// \---- +----+ +// +// The lock contention is spread over an array of locks. This helps improve +// concurrent access. The spine is designed for a certain capacity and load +// factor. When the capacity planning is done correctly we can expect +// O(load_factor = 1) insert, access and remove time. +// +// Micro benchmark on debug build gives about .5 Million/sec rate of insert, +// erase and lookup in parallel (total of about 1.5 Million ops/sec). If the +// blocks were of 4K, the hash table can support a virtual throughput of +// 6 GB/s. +// +// T Object type (contains both key and value) +// Hash Function that returns an hash from type T +// Equal Returns if two objects are equal +// (We need explicit equal for pointer type) +// +template +class HashTable { + public: + explicit HashTable(const size_t capacity = 1024 * 1024, const float load_factor = 2.0, + const uint32_t nlocks = 256) + : nbuckets_(static_cast(load_factor ? capacity / load_factor : 0)), + nlocks_(nlocks) { + // pre-conditions + assert(capacity); + assert(load_factor); + assert(nbuckets_); + assert(nlocks_); + + buckets_.reset(new Bucket[nbuckets_]); + mlock(buckets_.get(), nbuckets_ * sizeof(Bucket)); + + // initialize locks + locks_.reset(new RWMutex[nlocks_]); + mlock(locks_.get(), nlocks_ * sizeof(RWMutex)); + + // post-conditions + assert(buckets_); + assert(locks_); + } + + virtual ~HashTable() { AssertEmptyBuckets(); } + + // + // Insert given record to hash table + // + bool Insert(const T& t) { + const uint64_t h = Hash()(t); + const uint32_t bucket_idx = h % nbuckets_; + const uint32_t lock_idx = bucket_idx % nlocks_; + + WriteLock _(&locks_[lock_idx]); + auto& bucket = buckets_[bucket_idx]; + return Insert(&bucket, t); + } + + // Read hash table + // + // Please note that read lock should be held by the caller. This is because + // the caller owns the data, and should hold the read lock as long as he + // operates on the data. + bool Find(const T& t, T* ret, RWMutex** ret_lock) { + const uint64_t h = Hash()(t); + const uint32_t bucket_idx = h % nbuckets_; + const uint32_t lock_idx = bucket_idx % nlocks_; + + RWMutex& lock = locks_[lock_idx]; + lock.ReadLock(); + + auto& bucket = buckets_[bucket_idx]; + if (Find(&bucket, t, ret)) { + *ret_lock = &lock; + return true; + } + + lock.ReadUnlock(); + return false; + } + + // + // Erase a given key from the hash table + // + bool Erase(const T& t, T* ret) { + const uint64_t h = Hash()(t); + const uint32_t bucket_idx = h % nbuckets_; + const uint32_t lock_idx = bucket_idx % nlocks_; + + WriteLock _(&locks_[lock_idx]); + + auto& bucket = buckets_[bucket_idx]; + return Erase(&bucket, t, ret); + } + + // Fetch the mutex associated with a key + // This call is used to hold the lock for a given data for extended period of + // time. + RWMutex* GetMutex(const T& t) { + const uint64_t h = Hash()(t); + const uint32_t bucket_idx = h % nbuckets_; + const uint32_t lock_idx = bucket_idx % nlocks_; + + return &locks_[lock_idx]; + } + + void Clear(void (*fn)(T)) { + for (uint32_t i = 0; i < nbuckets_; ++i) { + const uint32_t lock_idx = i % nlocks_; + WriteLock _(&locks_[lock_idx]); + for (auto& t : buckets_[i].list_) { + (*fn)(t); + } + buckets_[i].list_.clear(); + } + } + + protected: + // Models bucket of keys that hash to the same bucket number + struct Bucket { + std::list list_; + }; + + // Substitute for std::find with custom comparator operator + typename std::list::iterator Find(std::list* list, const T& t) { + for (auto it = list->begin(); it != list->end(); ++it) { + if (Equal()(*it, t)) { + return it; + } + } + return list->end(); + } + + bool Insert(Bucket* bucket, const T& t) { + // Check if the key already exists + auto it = Find(&bucket->list_, t); + if (it != bucket->list_.end()) { + return false; + } + + // insert to bucket + bucket->list_.push_back(t); + return true; + } + + bool Find(Bucket* bucket, const T& t, T* ret) { + auto it = Find(&bucket->list_, t); + if (it != bucket->list_.end()) { + if (ret) { + *ret = *it; + } + return true; + } + return false; + } + + bool Erase(Bucket* bucket, const T& t, T* ret) { + auto it = Find(&bucket->list_, t); + if (it != bucket->list_.end()) { + if (ret) { + *ret = *it; + } + + bucket->list_.erase(it); + return true; + } + return false; + } + + // assert that all buckets are empty + void AssertEmptyBuckets() { +#ifndef NDEBUG + for (size_t i = 0; i < nbuckets_; ++i) { + WriteLock _(&locks_[i % nlocks_]); + assert(buckets_[i].list_.empty()); + } +#endif + } + + const uint32_t nbuckets_; // No. of buckets in the spine + std::unique_ptr buckets_; // Spine of the hash buckets + const uint32_t nlocks_; // No. of locks + std::unique_ptr locks_; // Granular locks +}; +} // namespace leveldb \ No newline at end of file diff --git a/src/leveldb/persistent_cache/hash_table_evictable.h b/src/leveldb/persistent_cache/hash_table_evictable.h new file mode 100644 index 000000000..00e42252c --- /dev/null +++ b/src/leveldb/persistent_cache/hash_table_evictable.h @@ -0,0 +1,178 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include + +#include "common/rwmutex.h" +#include "lrulist.h" +#include "persistent_cache/hash_table.h" +#include "util/random.h" + +namespace leveldb { + +// Evictable Hash Table +// +// Hash table index where least accessed (or one of the least accessed) elements +// can be evicted. +// +// Please note EvictableHashTable can only be created for pointer type objects +template +class EvictableHashTable : private HashTable { + public: + typedef HashTable hash_table; + + explicit EvictableHashTable(const size_t capacity = 1024 * 1024, const float load_factor = 2.0, + const uint32_t nlocks = 16) + : HashTable(capacity, load_factor, nlocks), + lru_lists_(new LRUList[hash_table::nlocks_]) { + assert(lru_lists_); + } + + virtual ~EvictableHashTable() { AssertEmptyLRU(); } + + // + // Insert given record to hash table (and LRU list) + // + bool Insert(T* t) { + const uint64_t h = Hash()(t); + typename hash_table::Bucket& bucket = GetBucket(h); + LRUListType& lru = GetLRUList(h); + RWMutex& lock = GetMutex(h); + + WriteLock _(&lock); + if (hash_table::Insert(&bucket, t)) { + lru.Push(t); + lru.Touch(t); + return true; + } + return false; + } + + bool Find(T* t, T** ret) { + const uint64_t h = Hash()(t); + typename hash_table::Bucket& bucket = GetBucket(h); + LRUListType& lru = GetLRUList(h); + RWMutex& lock = GetMutex(h); + + ReadLock _(&lock); + if (hash_table::Find(&bucket, t, ret)) { + ++(*ret)->refs_; + lru.Touch(*ret); + return true; + } + return false; + } + + T* Remove(T* t) { + T* target = nullptr; + const uint64_t h = Hash()(t); + typename hash_table::Bucket& bucket = GetBucket(h); + LRUListType& lru = GetLRUList(h); + RWMutex& lock = GetMutex(h); + + WriteLock _(&lock); + if (hash_table::Find(&bucket, t, &target)) { + assert(t == target); + lru.Unlink(target); + T* tmp = nullptr; + bool status = hash_table::Erase(&bucket, target, &tmp); + assert(target == tmp); + assert(status); + } + return target; + } + + // + // Evict one of the least recently used object + // + T* Evict(const std::function& fn = nullptr) { + uint32_t random = Random::GetTLSInstance()->Next(); + const size_t start_idx = random % hash_table::nlocks_; + T* t = nullptr; + + for (size_t i = 0; !t && i < hash_table::nlocks_; ++i) { + const size_t idx = (start_idx + i) % hash_table::nlocks_; + + WriteLock _(&hash_table::locks_[idx]); + LRUListType& lru = lru_lists_[idx]; + if (!lru.IsEmpty() && (t = lru.Pop()) != nullptr) { + assert(!t->refs_); + // We got an item to evict, erase from the bucket + const uint64_t h = Hash()(t); + typename hash_table::Bucket& bucket = GetBucket(h); + T* tmp = nullptr; + bool status = hash_table::Erase(&bucket, t, &tmp); + assert(t == tmp); + assert(status); + break; + } + assert(!t); + } + + if (t && fn) { + fn(t); + } + + return t; + } + + void Clear(void (*fn)(T*)) { + for (uint32_t i = 0; i < hash_table::nbuckets_; ++i) { + const uint32_t lock_idx = i % hash_table::nlocks_; + WriteLock _(&hash_table::locks_[lock_idx]); + auto& lru_list = lru_lists_[lock_idx]; + auto& bucket = hash_table::buckets_[i]; + for (auto* t : bucket.list_) { + lru_list.Unlink(t); + (*fn)(t); + } + bucket.list_.clear(); + } + // make sure that all LRU lists are emptied + AssertEmptyLRU(); + } + + void AssertEmptyLRU() { + for (uint32_t i = 0; i < hash_table::nlocks_; ++i) { + WriteLock _(&hash_table::locks_[i]); + auto& lru_list = lru_lists_[i]; + assert(lru_list.IsEmpty()); + } + } + + // + // Fetch the mutex associated with a key + // This call is used to hold the lock for a given data for extended period of + // time. + RWMutex* GetMutex(T* t) { return hash_table::GetMutex(t); } + + private: + typedef LRUList LRUListType; + + typename hash_table::Bucket& GetBucket(const uint64_t h) { + const uint32_t bucket_idx = h % hash_table::nbuckets_; + return hash_table::buckets_[bucket_idx]; + } + + LRUListType& GetLRUList(const uint64_t h) { + const uint32_t bucket_idx = h % hash_table::nbuckets_; + const uint32_t lock_idx = bucket_idx % hash_table::nlocks_; + return lru_lists_[lock_idx]; + } + + RWMutex& GetMutex(const uint64_t h) { + const uint32_t bucket_idx = h % hash_table::nbuckets_; + const uint32_t lock_idx = bucket_idx % hash_table::nlocks_; + return hash_table::locks_[lock_idx]; + } + + std::unique_ptr lru_lists_; +}; +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/lrulist.h b/src/leveldb/persistent_cache/lrulist.h new file mode 100644 index 000000000..c9595692d --- /dev/null +++ b/src/leveldb/persistent_cache/lrulist.h @@ -0,0 +1,175 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "common/mutex.h" + +namespace leveldb { +// LRU element definition +// +// Any object that needs to be part of the LRU algorithm should extend this +// class +template +struct LRUElement { + explicit LRUElement() : next_(nullptr), prev_(nullptr), refs_(0) {} + + virtual ~LRUElement() { assert(!refs_); } + + T* next_; + T* prev_; + std::atomic refs_; +}; + +// LRU implementation +// +// In place LRU implementation. There is no copy or allocation involved when +// inserting or removing an element. This makes the data structure slim +template +class LRUList { + public: + virtual ~LRUList() { + common::MutexLock _(&lock_); + assert(!head_); + assert(!tail_); + } + + // Push element into the LRU at the cold end + inline void Push(T* const t) { + assert(t); + assert(!t->next_); + assert(!t->prev_); + + common::MutexLock _(&lock_); + + assert((!head_ && !tail_) || (head_ && tail_)); + assert(!head_ || !head_->prev_); + assert(!tail_ || !tail_->next_); + + t->next_ = head_; + if (head_) { + head_->prev_ = t; + } + + head_ = t; + if (!tail_) { + tail_ = t; + } + } + + // Unlink the element from the LRU + inline void Unlink(T* const t) { + common::MutexLock _(&lock_); + UnlinkImpl(t); + } + + // Evict an element from the LRU + inline T* Pop() { + common::MutexLock _(&lock_); + if (!head_ && !tail_) { + LEVELDB_LOG("Empty lru list, nothing to pop.\n"); + return nullptr; + } + + assert(tail_ && head_); + assert(!tail_->next_); + assert(!head_->prev_); + + T* t = head_; + while (t && t->refs_) { + t = t->next_; + } + + if (!t) { + LEVELDB_LOG("Nothing to pop.\n"); + // nothing can be evicted + return nullptr; + } + + assert(!t->refs_); + + // unlike the element + UnlinkImpl(t); + return t; + } + + // Move the element from the front of the list to the back of the list + inline void Touch(T* const t) { + common::MutexLock _(&lock_); + UnlinkImpl(t); + PushBackImpl(t); + } + + // Check if the LRU is empty + inline bool IsEmpty() const { + common::MutexLock _(&lock_); + return !head_ && !tail_; + } + + private: + // Unlink an element from the LRU + void UnlinkImpl(T* const t) { + assert(t); + + lock_.AssertHeld(); + + assert(head_ && tail_); + assert(t->prev_ || head_ == t); + assert(t->next_ || tail_ == t); + + if (t->prev_) { + t->prev_->next_ = t->next_; + } + if (t->next_) { + t->next_->prev_ = t->prev_; + } + + if (tail_ == t) { + tail_ = tail_->prev_; + } + if (head_ == t) { + head_ = head_->next_; + } + + t->next_ = t->prev_ = nullptr; + } + + // Insert an element at the hot end + inline void PushBack(T* const t) { + common::MutexLock _(&lock_); + PushBackImpl(t); + } + + inline void PushBackImpl(T* const t) { + assert(t); + assert(!t->next_); + assert(!t->prev_); + + lock_.AssertHeld(); + + assert((!head_ && !tail_) || (head_ && tail_)); + assert(!head_ || !head_->prev_); + assert(!tail_ || !tail_->next_); + + t->prev_ = tail_; + if (tail_) { + tail_->next_ = t; + } + + tail_ = t; + if (!head_) { + head_ = tail_; + } + } + + mutable Mutex lock_; // synchronization primitive + T* head_ = nullptr; // front (cold) + T* tail_ = nullptr; // back (hot) +}; +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_file.cc b/src/leveldb/persistent_cache/persistent_cache_file.cc new file mode 100644 index 000000000..115f1cdcc --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_file.cc @@ -0,0 +1,137 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include + +#include "persistent_cache/persistent_cache_file.h" +#include "persistent_cache/persistent_cache_impl.h" +#include "table/format.h" + +namespace leveldb { + +// +// CacheFile +// +Status CacheFile::Delete(uint64_t* size) { + Status status = env_->GetFileSize(Path(), size); + if (!status.ok()) { + return status; + } + return env_->DeleteFile(Path()); +} + +// +// RandomAccessFile +// +Status RandomAccessCacheFile::Open() { return OpenImpl(); } + +Status RandomAccessCacheFile::OpenImpl() { + RandomAccessFile* file; + auto status = env_->NewRandomAccessFile(Path(), &file, env_opt_); + if (!status.ok()) { + LEVELDB_LOG("Error opening random access file %s. %s\n", Path().c_str(), + status.ToString().c_str()); + } else { + file_.reset(file); + } + + return status; +} + +Status RandomAccessCacheFile::Read(size_t offset, size_t length, Slice* val, + SstDataScratch* scratch) { + if (!file_) { + LEVELDB_LOG("Not Open\n"); + return Status::IOError("File Not Open"); + } + Status s = ReadSstFile(file_.get(), env_opt_.use_direct_io_read, offset, length, val, scratch); + if (!s.ok()) { + LEVELDB_LOG("Error reading from file %s. %s\n", Path().c_str(), s.ToString().c_str()); + return s; + } + return Status::OK(); +} + +// +// WriteableCacheFile +// +Status WriteableCacheFile::Create() { + auto path = Path(); + auto dir_pos = path.find_last_of('/'); + Status s; + if (dir_pos != std::string::npos) { + s = env_->CreateDir(path.substr(0, dir_pos)); + if (!s.ok()) { + LEVELDB_LOG("Create dir failed %s, reason %s.\n", path.substr(0, dir_pos).c_str(), + s.ToString().c_str()); + return s; + } + } + WritableFile* file; + s = env_->NewWritableFile(path, &file, env_opt_); + file_.reset(file); + if (!s.ok()) { + LEVELDB_LOG("Unable to create file %s. %s\n", Path().c_str(), s.ToString().c_str()); + return s; + } + + assert(!refs_); + ++refs_; + + return s; +} + +Status WriteableCacheFile::Append(const Slice& val) { + uint64_t i = 0; + + Status s; + while (!(s = cache_->MakeRoomForWrite(val.size())).ok()) { + Env::Default()->SleepForMicroseconds(kRetryIntervalUs); + if (i++ >= write_retry_times_) { + cache_->GetStats()->cache_errors.Inc(); + return s; + } + } + + s = file_->Append(val); + cache_->GetStats()->write_count.Inc(); + + if (s.ok()) { + cache_->GetStats()->write_throughput.Add(val.size()); + } else { + cache_->GetStats()->cache_errors.Inc(); + } + + return s; +} + +void WriteableCacheFile::Close(const Slice& key) { + file_.reset(); + assert(refs_); + // Our env abstraction do not allow reading from a file opened for appending + // We need close the file and re-open it for reading + if (!RandomAccessCacheFile::OpenImpl().ok() || !cache_->Insert(key, this)) { + LEVELDB_LOG("Close cache file: %s -> %s failed.\n", Path().c_str(), key.ToString().c_str()); + cache_->ForceEvict(this); + } + LEVELDB_LOG("Close cache file: %s -> %s succeed.\n", Path().c_str(), key.ToString().c_str()); + + if (InsertCallback) { + InsertCallback(key); + } + + --refs_; +} + +void WriteableCacheFile::Abandon() { + file_.reset(); + assert(refs_); + LEVELDB_LOG("Abandon cache file: %s\n", Path().c_str()); + cache_->ForceEvict(this); + --refs_; +} +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_file.h b/src/leveldb/persistent_cache/persistent_cache_file.h new file mode 100644 index 000000000..e0ae53f7e --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_file.h @@ -0,0 +1,161 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include +#include +#include + +#include "common/mutex.h" +#include "leveldb/db.h" +#include "leveldb/env.h" +#include "lrulist.h" +#include "table/format.h" + +// The file level operations are encapsulated in the following abstractions +// +// CacheFile +// ^ +// | +// | +// RandomAccessCacheFile (For reading) +// ^ +// | +// | +// WriteableCacheFile (For writing) +// +// Write IO code path : +// + +namespace leveldb { + +class PersistentCacheImpl; +class FileInfo; + +// class CacheFile +// +// Generic interface to support building file specialized for read/writing +class CacheFile : public LRUElement { + public: + explicit CacheFile(const uint64_t cache_id) : cache_id_(cache_id) {} + + CacheFile(uint64_t cache_id, Env* env, const EnvOptions& opt, const std::string& path, + PersistentCacheImpl* cache) + : LRUElement(), + cache_id_(cache_id), + env_(env), + env_opt_(opt), + path_(path), + cache_(cache) {} + + virtual ~CacheFile() = default; + + // append key/value to file and return LBA locator to user + virtual Status Append(const Slice& /*val*/) { + assert(!"not implemented"); + return Status::InvalidArgument("Not Implemented"); + } + + // read from the record locator (LBA) and return key, value and status + virtual Status Read(size_t offset, size_t length, Slice* /*block*/, + std::unique_ptr>* /*scratch*/) { + assert(!"not implemented"); + return Status::InvalidArgument("Not Implemented"); + } + + // get file path + std::string Path() const { return path_; } + // get cache ID + uint64_t cacheid() const { return cache_id_; } + + Status Delete(uint64_t* size); + + void SetInfo(FileInfo* info) { info_ = info; } + + FileInfo* Info() { return info_; } + + protected: + const uint64_t cache_id_; // Cache id for the file + Env* const env_ = nullptr; // Env for IO + EnvOptions env_opt_; // Env options for env (dio, write buffer...) + std::string path_; // Directory name + PersistentCacheImpl* cache_ = nullptr; + FileInfo* info_ = nullptr; // Related file info +}; + +// class RandomAccessFile +// +// Thread safe implementation for reading random data from file +class RandomAccessCacheFile : public CacheFile { + public: + RandomAccessCacheFile(uint64_t cache_id, Env* env, const EnvOptions& opt, const std::string& path, + PersistentCacheImpl* cache = nullptr) + : CacheFile(cache_id, env, opt, path, cache) {} + + ~RandomAccessCacheFile() override = default; + + // open file for reading + Status Open(); + + // read data from the disk + Status Read(size_t offset, size_t length, Slice* block, SstDataScratch* scratch) override; + + private: + std::unique_ptr file_ = nullptr; + + protected: + Status OpenImpl(); +}; + +// class WriteableCacheFile +// +// All writes to the files are cached in buffers. The buffers are flushed to +// disk as they get filled up. When file size reaches a certain size, a new file +// will be created provided there is free space +class WriteableCacheFile : public RandomAccessCacheFile { + static constexpr uint32_t kRetryIntervalUs = 1000000; // sleep 1s when reserve failed. + public: + explicit WriteableCacheFile(uint64_t cache_id, Env* env, const EnvOptions& opt, + const std::string& path, uint64_t write_retry_times, + PersistentCacheImpl* cache) + : RandomAccessCacheFile(cache_id, env, opt, path, cache), + write_retry_times_(write_retry_times) {} + + ~WriteableCacheFile() override = default; + + // create file on disk + Status Create(); + + // append data to end of file + Status Append(const Slice&) override; + + // Called when user successfully finish cache file's writing, and want to insert this file to + // persistent cache with specified cache key. + // This method will close file and open it for reading, and insert it to persistent cache's + // metadata. + // After called this method, user can read this cache file with the specified cache key. + // (If no error occured when insert to metadata) + void Close(const Slice& key); + + // Called when user write cache file failed, and want to directly remove it. + // This method will close file without inserting to persistent cache's metadata. + // And it will be removed in next GC procedure. + void Abandon(); + + void SetInsertCallback(std::function cb) { InsertCallback = cb; } + + private: + // Leveldb Env file abstraction + std::unique_ptr file_ = nullptr; + // This call back will be set by ShardedPersistentCache for updating its cache_index_. + // And it will be called after successfully close and insert. + std::function InsertCallback; + const uint64_t write_retry_times_ = 5; +}; +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_impl.cc b/src/leveldb/persistent_cache/persistent_cache_impl.cc new file mode 100644 index 000000000..5a9831c49 --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_impl.cc @@ -0,0 +1,375 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "persistent_cache_impl.h" +#include "persistent_cache_file.h" +#include "persistent_cache_metadata.h" + +#include "util/logging.h" + +namespace leveldb { +using SubscriberType = tera::Subscriber::SubscriberType; +using PersistentCacheMetricNames::kWriteThroughput; +using PersistentCacheMetricNames::kWriteCount; +using PersistentCacheMetricNames::kReadThroughput; +using PersistentCacheMetricNames::kCacheHits; +using PersistentCacheMetricNames::kCacheMisses; +using PersistentCacheMetricNames::kCacheErrors; +using PersistentCacheMetricNames::kCacheCapacity; +using PersistentCacheMetricNames::kFileEntries; +using PersistentCacheMetricNames::kCacheSize; +using PersistentCacheMetricNames::kMetaDataSize; + +std::string PersistentCacheConfig::ToString() const { + std::string ret; + ret.reserve(20000); + const int kBufferSize = 200; + char buffer[kBufferSize]; + + snprintf(buffer, kBufferSize, " path: %s\n", path.c_str()); + ret.append(buffer); + snprintf(buffer, kBufferSize, " use_direct_reads: %d\n", env_opt.use_direct_io_read); + ret.append(buffer); + snprintf(buffer, kBufferSize, " use_direct_writes: %d\n", env_opt.use_direct_io_write); + ret.append(buffer); + snprintf(buffer, kBufferSize, " cache_size: %lu\n", cache_size); + ret.append(buffer); + + return ret; +} + +bool PersistentCacheImpl::IsCacheFile(const std::string& file) { + // check if the file has .rc suffix + if (file.size() <= 3 || file.substr(file.size() - 3) != ".rc") { + return false; + } + + std::string prefix = file.substr(0, file.size() - 3); + auto last_point_pos = prefix.find_last_of('.'); + if (last_point_pos == std::string::npos) { + return false; + } + + auto cache_id_str = prefix.substr(last_point_pos + 1); + uint64_t cache_id{0}; + try { + cache_id = std::stoul(cache_id_str); + } catch (...) { + return false; + }; + + auto cache_file = metadata_.Lookup(cache_id); + if (!cache_file) { + return false; + } + + bool is_same_path = false; + + auto status = opt_.env->IsSamePath(file, cache_file->Path(), &is_same_path); + if (!status.ok()) { + LEVELDB_LOG("Error checking same path, path1 %s, path2 %s, reason %s, remove it.\n", + file.c_str(), cache_file->Path().c_str(), status.ToString().c_str()); + return false; + } + + return true; +} + +PersistentCacheImpl::Statistics::Statistics() + : write_throughput(kWriteThroughput, {SubscriberType::THROUGHPUT}), + write_count(kWriteCount, {SubscriberType::QPS}), + read_throughput(kReadThroughput, {SubscriberType::THROUGHPUT}), + cache_hits(kCacheHits, {SubscriberType::QPS}), + cache_misses(kCacheMisses, {SubscriberType::QPS}), + cache_errors(kCacheErrors, {SubscriberType::QPS}), + file_entries(kFileEntries, {SubscriberType::LATEST}, false) {} + +PersistentCacheImpl::PersistentCacheImpl(const PersistentCacheConfig& opt, + const std::shared_ptr& stats) + : opt_(opt), + metadata_(opt), + size_(kCacheSize, "path:" + opt.path, {SubscriberType::LATEST}, false), + capacity_(kCacheCapacity, "path:" + opt.path, {SubscriberType::LATEST}, false), + metadata_size_(kMetaDataSize, "path:" + opt.path, {SubscriberType::LATEST}, false), + stats_(stats) { + capacity_.Set(opt.cache_size); +} + +void PersistentCacheImpl::CleanupCacheFolder(const std::string& folder) { + LEVELDB_LOG("Begin Cleanup Cache Folder %s.\n", folder.c_str()); + DoCleanupCacheFolder(folder); +} + +void PersistentCacheImpl::DoCleanupCacheFolder(const std::string& folder) { + std::vector children; + Status status = opt_.env->GetChildren(folder, &children); + if (!status.ok()) { + LEVELDB_LOG("Error getting files for %s. %s\n", folder.c_str(), status.ToString().c_str()); + return; + } + for (const auto& child : children) { + auto path = folder + "/" + child; + SystemFileType type; + status = opt_.env->GetFileType(path, &type); + if (!status.ok()) { + LEVELDB_LOG("Get file type failed for %s, reason %s, remove it.\n", path.c_str(), + status.ToString().c_str()); + status = opt_.env->DeleteFile(path); + if (status.ok()) { + continue; + } + LEVELDB_LOG("Error deleting file %s. %s, try delete as dir\n", path.c_str(), + status.ToString().c_str()); + + status = opt_.env->DeleteDirRecursive(path); + if (status.ok()) { + continue; + } + LEVELDB_LOG("Error deleting dir %s. %s\n", path.c_str(), status.ToString().c_str()); + } + + switch (type) { + case SystemFileType::kRegularFile: { + if (!IsCacheFile(path)) { + // non cache file + LEVELDB_LOG("Removing non-cache file %s.\n", path.c_str()); + status = opt_.env->DeleteFile(path); + if (!status.ok()) { + LEVELDB_LOG("Error deleting file %s. %s, try delete as dir\n", path.c_str(), + status.ToString().c_str()); + } + } + break; + } + case SystemFileType::kDir: { + bool is_same_path = false; + status = opt_.env->IsSamePath(path, GetMetaPath(), &is_same_path); + if (!status.ok()) { + LEVELDB_LOG("Error checking same path, path1 %s, path2 %s, reason %s, skip it.\n", + path.c_str(), GetMetaPath().c_str(), status.ToString().c_str()); + continue; + } + if (!is_same_path) { + DoCleanupCacheFolder(path); + } + break; + } + default: { LEVELDB_LOG("Unknown file type for path: %s.\n", path.c_str()); } + } + } +} + +Status PersistentCacheImpl::Open() { + Status status; + + assert(!size_.Get()); + + // Check the validity of the options + status = opt_.ValidateSettings(); + assert(status.ok()); + if (!status.ok()) { + LEVELDB_LOG("Invalid persistent cache options.\n"); + return status; + } + + // Create base directory + status = opt_.env->CreateDir(opt_.path); + if (!status.ok()) { + LEVELDB_LOG("Error creating directory %s. %s.\n", opt_.path.c_str(), status.ToString().c_str()); + return status; + } + + // Create meta directory + status = opt_.env->CreateDir(GetMetaPath()); + assert(status.ok()); + if (!status.ok()) { + LEVELDB_LOG("Error creating directory %s. %s.\n", GetMetaPath().c_str(), + status.ToString().c_str()); + return status; + } + + status = metadata_.Init(&writer_cache_id_, this); + if (!status.ok()) { + LEVELDB_LOG("Init metadata failed, reason: %s.\n", status.ToString().c_str()); + return status; + } + metadata_size_.Set(metadata_.GetDBSize()); + if (opt_.transfer_flash_env_files) { + PullEnvFlashFiles(); + } + // Clean up non-cache file + CleanupCacheFolder(GetCachePath()); + + LEVELDB_LOG("Persistent Cache Init Done, writer_cache_id: %lu\n", writer_cache_id_); + + return Status::OK(); +} + +Status PersistentCacheImpl::Read(const Slice& key, size_t offset, size_t length, Slice* content, + SstDataScratch* scratch) { + uint64_t cache_id; + bool ok = metadata_.Lookup(key, &cache_id); + if (!ok) { + stats_->cache_misses.Inc(); + return Status::NotFound("persistent cache: cache id not found"); + } + + auto file = metadata_.Lookup(cache_id); + if (!file) { + // this can happen because the file index and cache file index are + // different, and the cache file might be removed between the two lookups + stats_->cache_misses.Inc(); + return Status::NotFound("persistent cache: cache file not found"); + } + + assert(file->refs_); + auto s = file->Read(offset, length, content, scratch); + file.reset(); + + if (!s.ok()) { + stats_->cache_misses.Inc(); + stats_->cache_errors.Inc(); + return s; + } + + stats_->read_throughput.Add(content->size()); + stats_->cache_hits.Inc(); + return Status::OK(); +} + +void PersistentCacheImpl::ForceEvict(const Slice& key) { + std::unique_ptr file{metadata_.ForceEvict(key)}; + if (file) { + assert(!file->refs_); + if (DeleteFileAndReleaseCache(file.get()).ok()) { + LEVELDB_LOG("Remove force evicted cache file: %s.", file->Path().c_str()); + } + } +} + +void PersistentCacheImpl::ForceEvict(CacheFile* file) { metadata_.ForceEvict(file); } + +Status PersistentCacheImpl::NewWriteableCacheFile(const std::string& path, + WriteableCacheFile** file) { + std::lock_guard _(lock_); + auto real_path = GetCachePath() + path + "." + std::to_string(writer_cache_id_) + ".rc"; + std::unique_ptr f(new WriteableCacheFile{ + writer_cache_id_, opt_.env, opt_.env_opt, real_path, opt_.write_retry_times, this}); + + auto status = f->Create(); + + if (!status.ok()) { + return status; + } + + LEVELDB_LOG("Created cache file %s\n", f->Path().c_str()); + + ++writer_cache_id_; + + auto success = metadata_.AddCacheFile(f.get()); + assert(success); + *file = f.release(); + return Status::OK(); +} + +Status PersistentCacheImpl::MakeRoomForWrite(int64_t size) { + std::lock_guard _(lock_); + assert(size_.Get() <= opt_.cache_size); + + if (size + size_.Get() <= opt_.cache_size) { + // there is enough space to write + size_.Add(size); + return Status::OK(); + } + + // there is not enough space to fit the requested data + // we can clear some space by evicting cold data + while (size + size_.Get() > opt_.cache_size) { + std::unique_ptr f(metadata_.Evict()); + if (!f) { + // nothing is evictable + return Status::IOError("No space for writing persistent cache."); + } + assert(!f->refs_); + if (DeleteFileAndReleaseCache(f.get()).ok()) { + LEVELDB_LOG("Remove evicted cache file: %lu.rc.", f->cacheid()); + } + } + + size_.Add(size); + assert(size_.Get() <= opt_.cache_size); + return Status::OK(); +} + +bool PersistentCacheImpl::Insert(const Slice& key, CacheFile* file) { + std::lock_guard _(lock_); + uint64_t cache_id; + if (metadata_.Lookup(key, &cache_id)) { + LEVELDB_LOG("File already exists, force evict it: %s\n", key.ToString().c_str()); + ForceEvict(key); + } + + // Insert file to meta data; + auto file_in_meta = metadata_.Lookup(file->cacheid()); + assert(file_in_meta.get() == file); + + auto f_info = metadata_.Insert(key, file); + metadata_size_.Set(metadata_.GetDBSize()); + + if (!f_info) { + return false; + } + + file->SetInfo(f_info); + return true; +} + +std::vector PersistentCacheImpl::GetAllKeys() { return metadata_.GetAllKeys(); } + +Status PersistentCacheImpl::DeleteFileAndReleaseCache(CacheFile* file) { + uint64_t file_size; + auto status = file->Delete(&file_size); + if (!status.ok()) { + // unable to delete file + LEVELDB_LOG("Remove evicted cache file %lu.rc failed, reason: %s.", file->cacheid(), + status.ToString().c_str()); + return status; + } + + assert(file_size <= (uint64_t)size_.Get()); + size_.Sub(file_size); + assert(size_.Get() >= 0); + return status; +} + +void PersistentCacheImpl::GarbageCollect() { + std::vector> evictable_files = metadata_.CollectEvictableFiles(); + for (auto& file : evictable_files) { + assert(!file->refs_); + DeleteFileAndReleaseCache(file.get()); + LEVELDB_LOG("Remove evictable file in GC: %lu.rc.", file->cacheid()); + } +} + +Status NewPersistentCache(const PersistentCacheConfig& config, + std::shared_ptr* cache) { + if (!cache) { + return Status::IOError("invalid argument cache"); + } + std::shared_ptr stats{new PersistentCacheImpl::Statistics}; + auto pcache = std::make_shared(config, stats); + Status s = pcache->Open(); + + if (!s.ok()) { + return s; + } + + *cache = pcache; + return s; +} +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_impl.h b/src/leveldb/persistent_cache/persistent_cache_impl.h new file mode 100644 index 000000000..24a002b9c --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_impl.h @@ -0,0 +1,89 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include +#include +#include + +#include "common/metric/metric_counter.h" +#include "leveldb/persistent_cache.h" +#include "persistent_cache_metadata.h" + +namespace leveldb { + +class PersistentCacheImpl : public PersistentCache { + friend class WriteableCacheFile; + friend class PersistentCacheMetaData; + + public: + struct Statistics { + Statistics(); + tera::MetricCounter write_throughput; + tera::MetricCounter write_count; + tera::MetricCounter read_throughput; + tera::MetricCounter cache_hits; + tera::MetricCounter cache_misses; + tera::MetricCounter cache_errors; + tera::MetricCounter file_entries; + }; + + PersistentCacheImpl(const PersistentCacheConfig &opt, const std::shared_ptr &stats); + + // Interface Impl + ~PersistentCacheImpl() override { metadata_.Clear(); } + Status Open() override; + Status Read(const Slice &key, size_t offset, size_t length, Slice *content, + SstDataScratch *scratch) override; + void ForceEvict(const Slice &key) override; + Status NewWriteableCacheFile(const std::string &path, WriteableCacheFile **file) override; + + size_t GetCapacity() const override { return static_cast(opt_.cache_size); } + size_t GetUsage() const override { return static_cast(size_.Get()); } + std::vector GetAllKeys() override; + void GarbageCollect() override; + + private: + Status MakeRoomForWrite(int64_t size); + void ForceEvict(CacheFile *file); + bool Insert(const Slice &key, CacheFile *file); + + // These two methods is used for moving env_flash's file to Persistent Cache with out refill + // cache. + // Maybe deprecated in future version. + void AddExistingFile(const Slice &key, const std::string &file_name); + void PullEnvFlashFiles(); + + // Get cache directory path + std::string GetCachePath() const { return opt_.path; } + + // Get cache metadata directory path + std::string GetMetaPath() const { return opt_.path + "/persistent_cache_meta"; } + + std::shared_ptr GetStats() { return stats_; } + + bool IsCacheFile(const std::string &file); + + Status DeleteFileAndReleaseCache(CacheFile *file); + + void CleanupCacheFolder(const std::string &folder); + void DoCleanupCacheFolder(const std::string &folder); + + private: + std::mutex lock_; // Synchronization + const PersistentCacheConfig opt_; // BlockCache options + PersistentCacheMetaData metadata_; + uint64_t writer_cache_id_ = 0; // Current cache file identifier + tera::MetricCounter size_; // Size of the cache + tera::MetricCounter capacity_; // Capacity of the cache + tera::MetricCounter metadata_size_; + std::shared_ptr stats_; // Statistics +}; + +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_metadata.cc b/src/leveldb/persistent_cache/persistent_cache_metadata.cc new file mode 100644 index 000000000..071d58723 --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_metadata.cc @@ -0,0 +1,305 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "persistent_cache_metadata.h" +#include "persistent_cache_impl.h" +#include "leveldb/db.h" +#include "db/table_cache.h" +#include "leveldb/persistent_cache.h" + +namespace leveldb { + +bool PersistentCacheMetaData::AddCacheFile(CacheFile* file) { + return cache_file_index_.Insert(file); +} + +PersistentCacheMetaData::CacheFileWrapper PersistentCacheMetaData::Lookup(uint64_t cache_id) { + CacheFile* ret = nullptr; + CacheFile lookup_key(cache_id); + bool ok = cache_file_index_.Find(&lookup_key, &ret); + if (ok) { + assert(ret->refs_); + return CacheFileWrapper{ret, [](CacheFile* f) { --f->refs_; }}; + } + return nullptr; +} + +CacheFile* PersistentCacheMetaData::Evict() { + CacheFile* evicted_file = nullptr; + + { + std::lock_guard _(evictable_files_lock_); + if (!evictable_files_.empty()) { + auto iter = evictable_files_.begin(); + while (iter != evictable_files_.end() && (*iter)->refs_) { + ++iter; + } + if (iter != evictable_files_.end()) { + evicted_file = *iter; + evictable_files_.erase(iter); + LEVELDB_LOG("Find evictable file for evict, current evictable files size: %lu\n", + evictable_files_.size()); + } + } + } + + if (!evicted_file) { + using std::placeholders::_1; + auto fn = std::bind(&PersistentCacheMetaData::RemoveFileInfo, this, _1); + evicted_file = cache_file_index_.Evict(fn); + } + + return evicted_file; +} + +void PersistentCacheMetaData::Clear() { + cache_file_index_.Clear([](CacheFile* arg) { delete arg; }); + cache_id_index_.Clear([](FileInfo* arg) { delete arg; }); +} + +FileInfo* PersistentCacheMetaData::InsertWithoutPutDb(const Slice& key, uint64_t cache_id) { + std::unique_ptr f_info(new FileInfo(key, cache_id)); + if (!cache_id_index_.Insert(f_info.get())) { + LEVELDB_LOG("Insert key failed %s , cache_id %lu\n", key.ToString().c_str(), cache_id); + return nullptr; + } else { + std::lock_guard _(keys_lock_); + keys_.emplace(key.ToString()); + } + return f_info.release(); +} + +FileInfo* PersistentCacheMetaData::Insert(const Slice& key, CacheFile* cache_file) { + auto finfo = InsertWithoutPutDb(key, cache_file->cacheid()); + if (finfo) { + db_->Put({}, key, cache_file->Path()); + } + return finfo; +} + +bool PersistentCacheMetaData::Lookup(const Slice& key, uint64_t* cache_id) { + FileInfo lookup_key(key, 0); + FileInfo* info; + RWMutex* rlock = nullptr; + if (!cache_id_index_.Find(&lookup_key, &info, &rlock)) { + return false; + } + + ReadUnlock _(rlock); + assert(info->key_ == key.ToString()); + if (cache_id) { + *cache_id = info->cache_id_; + } + return true; +} + +FileInfo* PersistentCacheMetaData::Remove(const Slice& key) { + FileInfo lookup_key(key, 0); + FileInfo* finfo = nullptr; + cache_id_index_.Erase(&lookup_key, &finfo); + { + std::lock_guard _(keys_lock_); + keys_.erase(key.ToString()); + } + db_->Delete({}, key); + return finfo; +} + +void PersistentCacheMetaData::RemoveFileInfo(CacheFile* f) { + FileInfo* tmp = nullptr; + auto f_info = f->Info(); + if (f_info) { + auto status = cache_id_index_.Erase(f_info, &tmp); + { + std::lock_guard _(keys_lock_); + keys_.erase(f_info->key_); + } + assert(status); + assert(tmp == f_info); + db_->Delete({}, f_info->key_); + LEVELDB_LOG("Remove file from persistent cache: %s\n", f_info->key_.c_str()); + delete f_info; + } + // assert(f_info); +} + +Status PersistentCacheMetaData::Init(uint64_t* recovered_cache_id, PersistentCacheImpl* cache) { + Options opt; + opt.filter_policy = NewBloomFilterPolicy(10); + opt.block_cache = leveldb::NewLRUCache(8UL * 1024 * 1024); + opt.table_cache = new leveldb::TableCache(8UL * 1024 * 1024); + opt.info_log = Logger::DefaultLogger(); + + auto lg_info = new leveldb::LG_info(0); + lg_info->env = NewPosixEnv(); + lg_info->env->SetBackgroundThreads(5); + + opt.lg_info_list = new std::map; + opt.lg_info_list->insert(std::make_pair(0, lg_info)); + opt.use_file_lock = false; // Single process access is guaranteed by ts port. + + DB* db; + auto status = DB::Open(opt, cache->GetMetaPath(), &db); + + if (!status.ok()) { + LEVELDB_LOG("Open persistent cache metadata failed, reason: %s\n", status.ToString().c_str()); + return status; + } + + db_.reset(db); + + ReadOptions read_options(&opt); + read_options.fill_cache = false; + read_options.verify_checksums = true; + std::unique_ptr iterator{db_->NewIterator(read_options)}; + iterator->SeekToFirst(); + uint64_t max_cache_id{0}; + + // db format(kv): + // key -> file_path + // when restore meta data from disk, we get k-v pair from leveldb + // where key is the user cache key, and value is file_path in system + while (iterator->Valid()) { + const Slice key = iterator->key(); + Slice val = iterator->value(); + assert(val.ends_with(".rc")); + + // path format : /xxx/yyy/zzz/k.cache_id.rc + // try extract cache_id from path. + std::string path = val.ToString(); + std::string cache_id_str = path.substr(0, path.size() - 3); + auto last_point_pos = cache_id_str.find_last_of('.'); + assert(last_point_pos != std::string::npos); + cache_id_str = cache_id_str.substr(last_point_pos + 1); + uint64_t cache_id{0}; + + try { + cache_id = std::stoul(cache_id_str); + } catch (...) { + LEVELDB_LOG("[%s] Fail to recover cache_id key: %s, value: %s\n", + cache->GetCachePath().c_str(), key.ToString().c_str(), val.ToString().c_str()); + db->Delete({}, key); + continue; + }; + + LEVELDB_LOG("[%s] Recover persistent key %s, value %s , cache id %lu\n", + cache->GetCachePath().c_str(), key.ToString().c_str(), val.ToString().c_str(), + cache_id); + + auto cache_file = Lookup(cache_id); + assert(!cache_file); + + max_cache_id = std::max(cache_id, max_cache_id); + + std::unique_ptr file{ + new RandomAccessCacheFile{cache_id, config_.env, config_.env_opt, path}}; + + uint64_t file_size; + Status s; + + // Get Cache File Size + if (!(s = config_.env->GetFileSize(file->Path(), &file_size)).ok()) { + LEVELDB_LOG("[%s] Get size for file %s failed, reason: %s.\n", cache->GetCachePath().c_str(), + file->Path().c_str(), s.ToString().c_str()); + } + + // Try open file for read + if (s.ok() && !(s = file->Open()).ok()) { + LEVELDB_LOG("[%s] Get cache file %s for read failed, reason: %s.\n", + cache->GetCachePath().c_str(), file->Path().c_str(), s.ToString().c_str()); + } + + // Try make space for insert cache file + if (s.ok() && !(s = cache->MakeRoomForWrite(file_size)).ok()) { + LEVELDB_LOG("[%s] Make room for cache file %s failed, reason: %s.\n", + cache->GetCachePath().c_str(), file->Path().c_str(), s.ToString().c_str()); + } + + // Insert file to meta data + if (s.ok()) { + auto success = AddCacheFile(file.get()); + assert(success); + FileInfo* info = InsertWithoutPutDb(key, file->cacheid()); + assert(info); + file->SetInfo(info); + file.release(); + } + + if (!s.ok()) { + db_->Delete({}, iterator->key()); + } + iterator->Next(); + } + *recovered_cache_id = (uint64_t)max_cache_id + 1; + return Status::OK(); +} + +CacheFile* PersistentCacheMetaData::ForceEvict(const Slice& key) { + auto f_info = Remove(key); + if (!f_info) { + return nullptr; + } + + auto file = Lookup(f_info->cache_id_); + delete f_info; + + if (!file) { + return nullptr; + } else { + file->SetInfo(nullptr); + } + + // All cache files should be found in cache_file_index. + auto removed_file = cache_file_index_.Remove(file.get()); + assert(removed_file == file.get()); + file.reset(); + + if (removed_file->refs_ == 0) { + // No body use this file, return to persistent cache for delete. + return removed_file; + } else { + { + // Someone is using this file, so put it to evictable_files_, waiting for gc or evict. + std::lock_guard _(evictable_files_lock_); + evictable_files_.emplace(removed_file); + } + return nullptr; + } +} + +void PersistentCacheMetaData::ForceEvict(CacheFile* file) { + assert(file && file->refs_ == 1); + file->SetInfo(nullptr); + auto removed_file = cache_file_index_.Remove(file); + assert(removed_file == file); + { + // This method is only called when write cache file failed. + // So there must be one refs_ handle in CacheFile. + // We temporarily add it to evictable_files_, and it will be removed in next gc. + std::lock_guard _(evictable_files_lock_); + evictable_files_.emplace(removed_file); + } +} + +std::vector> PersistentCacheMetaData::CollectEvictableFiles() { + std::vector> files; + { + std::lock_guard _{evictable_files_lock_}; + auto iter = evictable_files_.begin(); + while (iter != evictable_files_.end()) { + // Find all evictable files whose refs_ is zero. + if ((*iter)->refs_ == 0) { + files.emplace_back(*iter); + evictable_files_.erase(iter++); + } else { + ++iter; + } + } + } + return std::move(files); +} +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/persistent_cache_metadata.h b/src/leveldb/persistent_cache/persistent_cache_metadata.h new file mode 100644 index 000000000..b93e8fe89 --- /dev/null +++ b/src/leveldb/persistent_cache/persistent_cache_metadata.h @@ -0,0 +1,146 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include +#include +#include +#include + +#include "hash_table.h" +#include "hash_table_evictable.h" +#include "leveldb/persistent_cache.h" +#include "leveldb/slice.h" +#include "lrulist.h" +#include "persistent_cache_file.h" + +namespace leveldb { + +struct FileInfo { + explicit FileInfo(const Slice& key, uint64_t cache_id) + : key_(key.ToString()), cache_id_(cache_id) {} + + std::string key_; + uint64_t cache_id_; +}; + +// +// Persistent Cache Metadata +// +// The Persistent Cache Metadata holds all the metadata +// associated with persistent cache. +// It fundamentally contains 2 indexes and an LRU. +// +// Cache Id Index +// +// This is a forward index that maps a given fname to a cache id +// +// Cache File Index +// +// This is a forward index that maps a given cache-id to a cache file object. +class PersistentCacheImpl; + +// Thread safe. But Init() method can only be called once. +class PersistentCacheMetaData { + using CacheFileWrapper = std::unique_ptr>; + + public: + explicit PersistentCacheMetaData(const PersistentCacheConfig& config, + const uint32_t cache_file_capacity = 128 * 1024) + : config_(config), + cache_file_index_(cache_file_capacity), + cache_id_index_(cache_file_capacity), + db_(nullptr) {} + + virtual ~PersistentCacheMetaData() = default; + + // Restore Persistent cache meta data + Status Init(uint64_t* recovered_cache_id, PersistentCacheImpl* cache); + + bool AddCacheFile(CacheFile* file); + + // Read cache file based on cache_id + CacheFileWrapper Lookup(uint64_t cache_id); + + FileInfo* Insert(const Slice& key, CacheFile* cache_file); + + bool Lookup(const Slice& key, uint64_t* cache_id); + + FileInfo* Remove(const Slice& key); + + // Find and evict a cache file using LRU policy + CacheFile* Evict(); + + virtual void Clear(); + + CacheFile* ForceEvict(const Slice& key); + void ForceEvict(CacheFile* file); + + uint64_t GetDBSize() { + uint64_t db_size(0); + db_->GetApproximateSizes(&db_size); + return db_size; + } + + std::vector GetAllKeys() { + std::vector res; + { + std::lock_guard _{keys_lock_}; + res.reserve(keys_.size()); + res.insert(res.end(), keys_.begin(), keys_.end()); + } + return std::move(res); + } + + std::vector> CollectEvictableFiles(); + + protected: + virtual void RemoveFileInfo(CacheFile* file); + + private: + FileInfo* InsertWithoutPutDb(const Slice& key, uint64_t cache_id); + // Cache file index definition + // + // cache-id => CacheFile + struct CacheFileHash { + uint64_t operator()(const CacheFile* rec) { return std::hash()(rec->cacheid()); } + }; + + struct CacheFileEqual { + bool operator()(const CacheFile* lhs, const CacheFile* rhs) { + return lhs->cacheid() == rhs->cacheid(); + } + }; + + typedef EvictableHashTable CacheFileIndexType; + + // cache_id Index + // + // key => cache_id + struct Hash { + size_t operator()(FileInfo* node) const { return std::hash()(node->key_); } + }; + + struct Equal { + bool operator()(FileInfo* lhs, FileInfo* rhs) const { return lhs->key_ == rhs->key_; } + }; + + typedef HashTable CacheIdIndexType; + + const PersistentCacheConfig config_; + CacheFileIndexType cache_file_index_; + CacheIdIndexType cache_id_index_; + std::unique_ptr db_; + std::unordered_set evictable_files_; + std::mutex evictable_files_lock_; + // all keys in meta data + std::unordered_set keys_; + std::mutex keys_lock_; +}; +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/sharded_persistent_cache_impl.cc b/src/leveldb/persistent_cache/sharded_persistent_cache_impl.cc new file mode 100644 index 000000000..0a199232a --- /dev/null +++ b/src/leveldb/persistent_cache/sharded_persistent_cache_impl.cc @@ -0,0 +1,24 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "sharded_persistent_cache_impl.h" +#include "leveldb/persistent_cache.h" + +namespace leveldb { +Status NewShardedPersistentCache(const std::vector &configs, + std::shared_ptr *cache) { + if (!cache) { + return Status::IOError("invalid argument cache"); + } + + auto pcache = std::make_shared(configs); + Status s = pcache->Open(); + + if (!s.ok()) { + return s; + } + + *cache = pcache; + return s; +} +} // namespace leveldb diff --git a/src/leveldb/persistent_cache/sharded_persistent_cache_impl.h b/src/leveldb/persistent_cache/sharded_persistent_cache_impl.h new file mode 100644 index 000000000..c0879c6ba --- /dev/null +++ b/src/leveldb/persistent_cache/sharded_persistent_cache_impl.h @@ -0,0 +1,192 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once + +#include +#include +#include +#include + +#include "common/rwmutex.h" +#include "common/this_thread.h" +#include "persistent_cache_impl.h" +#include "util/hash.h" +#include "util/random.h" + +namespace leveldb { +class CacheFile; + +class ShardedPersistentCacheImpl : public PersistentCache { + // This private label is used for unit test flag -Dprivate=public; + private: + using PersistentCachePtr = std::unique_ptr; + + public: + Status NewWriteableCacheFile(const std::string& path, WriteableCacheFile** file) override { + uint64_t index{PickPersistentCacheIndex()}; + auto status = persistent_caches_[index]->NewWriteableCacheFile(path, file); + + if (status.ok()) { + assert(*file); + assert(index < persistent_caches_.size()); + (*file)->SetInsertCallback(std::bind(&ShardedPersistentCacheImpl::InsertCallback, this, + std::placeholders::_1, index)); + } + return status; + } + + explicit ShardedPersistentCacheImpl(const std::vector& opts) + : stats_{new PersistentCacheImpl::Statistics} { + for (const auto& opt : opts) { + persistent_caches_.emplace_back(PersistentCachePtr{new PersistentCacheImpl{opt, stats_}}); + } + } + + ~ShardedPersistentCacheImpl() override = default; + + Status Read(const Slice& key, size_t offset, size_t length, Slice* content, + SstDataScratch* scratch) override { + uint64_t index; + auto key_str = key.ToString(); + { + ReadLock _(&index_rw_lock_); + if (cache_index_.find(key_str) == cache_index_.end()) { + stats_->cache_misses.Inc(); + return Status::NotFound("persistent cache: index not found."); + } + index = cache_index_[key_str]; + } + return persistent_caches_[index]->Read(key, offset, length, content, scratch); + } + + // Only can be called in single thread once before use persistent cache, + // so it doesn't need any lock mechanism. + Status Open() override { + for (auto& persistent_cache : persistent_caches_) { + auto s = persistent_cache->Open(); + if (!s.ok()) { + return s; + } + } + + for (size_t i = 0; i != persistent_caches_.size(); ++i) { + auto& cache = persistent_caches_[i]; + auto keys = cache->GetAllKeys(); + for (auto& key : keys) { + // Same key in more than one persistent cache impl, remove the latter one. + if (cache_index_.find(key) != cache_index_.end()) { + cache->ForceEvict(key); + } else { + cache_index_.emplace(std::move(key), i); + } + } + } + + return Status::OK(); + } + + void ForceEvict(const Slice& key) override { + uint64_t index; + auto key_str = key.ToString(); + { + WriteLock _{&index_rw_lock_}; + if (cache_index_.find(key_str) == cache_index_.end()) { + return; + } + index = cache_index_[key_str]; + cache_index_.erase(key_str); + } + return persistent_caches_[index]->ForceEvict(key); + } + + size_t GetCapacity() const override { + size_t size = 0; + for (auto& persistent_cache : persistent_caches_) { + size += persistent_cache->GetCapacity(); + } + return size; + } + + size_t GetUsage() const override { + size_t size = 0; + for (auto& persistent_cache : persistent_caches_) { + size += persistent_cache->GetUsage(); + } + return size; + } + + std::vector GetAllKeys() override { + std::vector result; + std::vector> sub_results; + size_t total_size{0}; + for (auto& persistent_cache : persistent_caches_) { + sub_results.emplace_back(persistent_cache->GetAllKeys()); + total_size += sub_results.back().size(); + } + result.reserve(total_size); + + for (auto& sub_result : sub_results) { + result.insert(result.end(), std::make_move_iterator(sub_result.begin()), + std::make_move_iterator(sub_result.end())); + } + + return std::move(result); + } + + void GarbageCollect() override { + for (auto& persistent_cache : persistent_caches_) { + persistent_cache->GarbageCollect(); + } + } + + private: + void InsertCallback(const Slice& key, uint64_t index) { + auto key_str = key.ToString(); + WriteLock _{&index_rw_lock_}; + LEVELDB_LOG("Insert to sharded persistent cache, key %s, index %lu.\n", key_str.c_str(), index); + cache_index_[key_str] = index; + } + + size_t FreeSpace(const PersistentCachePtr& p) { return p->GetCapacity() - p->GetUsage(); } + + uint64_t PickPersistentCacheIndex() { + // Read doc/persistent_cache.md for Pick strategy's detail. + std::vector indexes_for_random_pick; + for (size_t i = 0; i != persistent_caches_.size(); ++i) { + const auto& p_cache = persistent_caches_[i]; + if ((double)p_cache->GetUsage() / p_cache->GetCapacity() < 0.9) { + indexes_for_random_pick.push_back(i); + } + } + + if (indexes_for_random_pick.empty()) { + // Pick persistent cache index by free space size for using every disk's space as much as + // possible. This strategy will be enabled when all persistent_cache_impl_'s usage percent + // is larger than 90%. + auto iter = + std::max_element(persistent_caches_.begin(), persistent_caches_.end(), + [this](const PersistentCachePtr& x, const PersistentCachePtr& y) { + return FreeSpace(x) < FreeSpace(y); + }); + + return iter - persistent_caches_.begin(); + } else { + // Pick persistent cache index by random for best performance. + auto idx = ThisThread::GetRandomValue(0, indexes_for_random_pick.size() - 1); + return indexes_for_random_pick[idx]; + } + } + + private: + // After Open() method called, this vector is only used for read, so it's lock-free. + std::vector persistent_caches_; + + // This class handles serval persistent-caches, and cache_index_ managers each key's index as + // follow: + // user key=>index of persistent_caches_; + std::unordered_map cache_index_; + std::shared_ptr stats_; + RWMutex index_rw_lock_; +}; +} // namespace leveldb diff --git a/src/leveldb/port/atomic_pointer.h b/src/leveldb/port/atomic_pointer.h index 29026aa94..f28919183 100644 --- a/src/leveldb/port/atomic_pointer.h +++ b/src/leveldb/port/atomic_pointer.h @@ -74,9 +74,7 @@ inline void MemoryBarrier() { // Mac OS #elif defined(OS_MACOSX) -inline void MemoryBarrier() { - OSMemoryBarrier(); -} +inline void MemoryBarrier() { OSMemoryBarrier(); } #define LEVELDB_HAVE_MEMORY_BARRIER // ARM Linux @@ -92,9 +90,7 @@ typedef void (*LinuxKernelMemoryBarrierFunc)(void); // shows that the extra function call cost is completely negligible on // multi-core devices. // -inline void MemoryBarrier() { - (*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)(); -} +inline void MemoryBarrier() { (*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)(); } #define LEVELDB_HAVE_MEMORY_BARRIER // PPC @@ -113,8 +109,9 @@ inline void MemoryBarrier() { class AtomicPointer { private: void* rep_; + public: - AtomicPointer() { } + AtomicPointer() {} explicit AtomicPointer(void* p) : rep_(p) {} inline void* NoBarrier_Load() const { return rep_; } inline void NoBarrier_Store(void* v) { rep_ = v; } @@ -134,21 +131,14 @@ class AtomicPointer { class AtomicPointer { private: std::atomic rep_; + public: - AtomicPointer() { } - explicit AtomicPointer(void* v) : rep_(v) { } - inline void* Acquire_Load() const { - return rep_.load(std::memory_order_acquire); - } - inline void Release_Store(void* v) { - rep_.store(v, std::memory_order_release); - } - inline void* NoBarrier_Load() const { - return rep_.load(std::memory_order_relaxed); - } - inline void NoBarrier_Store(void* v) { - rep_.store(v, std::memory_order_relaxed); - } + AtomicPointer() {} + explicit AtomicPointer(void* v) : rep_(v) {} + inline void* Acquire_Load() const { return rep_.load(std::memory_order_acquire); } + inline void Release_Store(void* v) { rep_.store(v, std::memory_order_release); } + inline void* NoBarrier_Load() const { return rep_.load(std::memory_order_relaxed); } + inline void NoBarrier_Store(void* v) { rep_.store(v, std::memory_order_relaxed); } }; // Atomic pointer based on sparc memory barriers @@ -156,25 +146,26 @@ class AtomicPointer { class AtomicPointer { private: void* rep_; + public: - AtomicPointer() { } - explicit AtomicPointer(void* v) : rep_(v) { } + AtomicPointer() {} + explicit AtomicPointer(void* v) : rep_(v) {} inline void* Acquire_Load() const { void* val; - __asm__ __volatile__ ( + __asm__ __volatile__( "ldx [%[rep_]], %[val] \n\t" - "membar #LoadLoad|#LoadStore \n\t" - : [val] "=r" (val) - : [rep_] "r" (&rep_) + "membar #LoadLoad|#LoadStore \n\t" + : [val] "=r"(val) + : [rep_] "r"(&rep_) : "memory"); return val; } inline void Release_Store(void* v) { - __asm__ __volatile__ ( + __asm__ __volatile__( "membar #LoadStore|#StoreStore \n\t" "stx %[v], [%[rep_]] \n\t" : - : [rep_] "r" (&rep_), [v] "r" (v) + : [rep_] "r"(&rep_), [v] "r"(v) : "memory"); } inline void* NoBarrier_Load() const { return rep_; } @@ -186,26 +177,23 @@ class AtomicPointer { class AtomicPointer { private: void* rep_; + public: - AtomicPointer() { } - explicit AtomicPointer(void* v) : rep_(v) { } + AtomicPointer() {} + explicit AtomicPointer(void* v) : rep_(v) {} inline void* Acquire_Load() const { - void* val ; - __asm__ __volatile__ ( - "ld8.acq %[val] = [%[rep_]] \n\t" - : [val] "=r" (val) - : [rep_] "r" (&rep_) - : "memory" - ); + void* val; + __asm__ __volatile__("ld8.acq %[val] = [%[rep_]] \n\t" + : [val] "=r"(val) + : [rep_] "r"(&rep_) + : "memory"); return val; } inline void Release_Store(void* v) { - __asm__ __volatile__ ( - "st8.rel [%[rep_]] = %[v] \n\t" - : - : [rep_] "r" (&rep_), [v] "r" (v) - : "memory" - ); + __asm__ __volatile__("st8.rel [%[rep_]] = %[v] \n\t" + : + : [rep_] "r"(&rep_), [v] "r"(v) + : "memory"); } inline void* NoBarrier_Load() const { return rep_; } inline void NoBarrier_Store(void* v) { rep_ = v; } diff --git a/src/leveldb/port/port_example.h b/src/leveldb/port/port_example.h index 93d5e1742..fb70236f0 100644 --- a/src/leveldb/port/port_example.h +++ b/src/leveldb/port/port_example.h @@ -79,12 +79,13 @@ extern void InitOnce(port::OnceType*, void (*initializer)()); class AtomicPointer { private: intptr_t rep_; + public: // Initialize to arbitrary value AtomicPointer(); // Initialize to hold v - explicit AtomicPointer(void* v) : rep_(v) { } + explicit AtomicPointer(void* v) : rep_(v) {} // Read and return the stored pointer with the guarantee that no // later memory access (read or write) by this thread can be @@ -107,14 +108,12 @@ class AtomicPointer { // Store the snappy compression of "input[0,input_length-1]" in *output. // Returns false if snappy is not supported by this port. -extern bool Snappy_Compress(const char* input, size_t input_length, - std::string* output); +extern bool Snappy_Compress(const char* input, size_t input_length, std::string* output); // If input[0,input_length-1] looks like a valid snappy compressed // buffer, store the size of the uncompressed data in *result and // return true. Else return false. -extern bool Snappy_GetUncompressedLength(const char* input, size_t length, - size_t* result); +extern bool Snappy_GetUncompressedLength(const char* input, size_t length, size_t* result); // Attempt to snappy uncompress input[0,input_length-1] into *output. // Returns true if successful, false if the input is invalid lightweight @@ -123,8 +122,7 @@ extern bool Snappy_GetUncompressedLength(const char* input, size_t length, // REQUIRES: at least the first "n" bytes of output[] must be writable // where "n" is the result of a successful call to // Snappy_GetUncompressedLength. -extern bool Snappy_Uncompress(const char* input_data, size_t input_length, - char* output); +extern bool Snappy_Uncompress(const char* input_data, size_t input_length, char* output); // ------------------ Miscellaneous ------------------- diff --git a/src/leveldb/port/port_posix.cc b/src/leveldb/port/port_posix.cc index 0ad21d620..6a7732e41 100644 --- a/src/leveldb/port/port_posix.cc +++ b/src/leveldb/port/port_posix.cc @@ -35,11 +35,11 @@ static void PthreadCall(const char* label, int result) { } Mutex::Mutex() { - pthread_mutexattr_t attr; - PthreadCall("init mutexattr", pthread_mutexattr_init(&attr)); - PthreadCall("set mutexattr", pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); - PthreadCall("init mutex", pthread_mutex_init(&mu_, &attr)); - PthreadCall("destroy mutexattr", pthread_mutexattr_destroy(&attr)); + pthread_mutexattr_t attr; + PthreadCall("init mutexattr", pthread_mutexattr_init(&attr)); + PthreadCall("set mutexattr", pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); + PthreadCall("init mutex", pthread_mutex_init(&mu_, &attr)); + PthreadCall("destroy mutexattr", pthread_mutexattr_destroy(&attr)); } Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } @@ -48,42 +48,45 @@ void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } -CondVar::CondVar(Mutex* mu) - : mu_(mu) { - // use monotonic clock - PthreadCall("condattr init ", pthread_condattr_init(&attr_)); - PthreadCall("condattr setclock ", pthread_condattr_setclock(&attr_, CLOCK_MONOTONIC)); - PthreadCall("condvar init with attr", pthread_cond_init(&cond_, &attr_)); +CondVar::CondVar(Mutex* mu) : mu_(mu) { + // use monotonic clock + PthreadCall("condattr init ", pthread_condattr_init(&attr_)); + PthreadCall("condattr setclock ", pthread_condattr_setclock(&attr_, CLOCK_MONOTONIC)); + PthreadCall("condvar init with attr", pthread_cond_init(&cond_, &attr_)); } CondVar::~CondVar() { - PthreadCall("condvar destroy", pthread_cond_destroy(&cond_)); - PthreadCall("condattr destroy", pthread_condattr_destroy(&attr_)); + PthreadCall("condvar destroy", pthread_cond_destroy(&cond_)); + PthreadCall("condattr destroy", pthread_condattr_destroy(&attr_)); } -void CondVar::Wait() { - PthreadCall("condvar wait", pthread_cond_wait(&cond_, &mu_->mu_)); -} +void CondVar::Wait() { PthreadCall("condvar wait", pthread_cond_wait(&cond_, &mu_->mu_)); } // wait in ms bool CondVar::Wait(int64_t wait_millisec) { - assert(wait_millisec >= 0); - // ref: http://www.qnx.com/developers/docs/6.5.0SP1.update/com.qnx.doc.neutrino_lib_ref/p/pthread_cond_timedwait.html + // ref: + // http://www.qnx.com/developers/docs/6.5.0SP1.update/com.qnx.doc.neutrino_lib_ref/p/pthread_cond_timedwait.html struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); int64_t nsec = ((int64_t)wait_millisec) * 1000000 + ts.tv_nsec; + assert(nsec > 0 && wait_millisec >= 0); ts.tv_sec += nsec / 1000000000; ts.tv_nsec = nsec % 1000000000; - return (0 == pthread_cond_timedwait(&cond_, &mu_->mu_, &ts)); + int err = pthread_cond_timedwait(&cond_, &mu_->mu_, &ts); + if (err == 0) { + return true; + } else if (err == ETIMEDOUT) { + // The time specified by 'ts' to pthread_cond_timedwait() has passed. + return false; + } else { + PthreadCall("condvar timedwait", err); + return false; + } } -void CondVar::Signal() { - PthreadCall("signal", pthread_cond_signal(&cond_)); -} +void CondVar::Signal() { PthreadCall("signal", pthread_cond_signal(&cond_)); } -void CondVar::SignalAll() { - PthreadCall("broadcast", pthread_cond_broadcast(&cond_)); -} +void CondVar::SignalAll() { PthreadCall("broadcast", pthread_cond_broadcast(&cond_)); } void InitOnce(OnceType* once, void (*initializer)()) { PthreadCall("once", pthread_once(once, initializer)); @@ -91,55 +94,50 @@ void InitOnce(OnceType* once, void (*initializer)()) { /////////// Compression Ext /////////// -bool Bmz_Compress(const char* input, size_t input_size, - std::string* output) { +bool Bmz_Compress(const char* input, size_t input_size, std::string* output) { #ifdef USE_COMPRESS_EXT - size_t output_size = input_size * 2; - output->resize(output_size); - if (!bmc.Compress(input, input_size, &(*output)[0], &output_size) - || output_size == 0 || output_size > input_size) { - return false; - } - output->resize(output_size); - return true; -#else + size_t output_size = input_size * 2; + output->resize(output_size); + if (!bmc.Compress(input, input_size, &(*output)[0], &output_size) || output_size == 0 || + output_size > input_size) { return false; + } + output->resize(output_size); + return true; +#else + return false; #endif } -bool Bmz_Uncompress(const char* input, size_t input_size, - char* output, size_t* output_size) { +bool Bmz_Uncompress(const char* input, size_t input_size, char* output, size_t* output_size) { #ifdef USE_COMPRESS_EXT - return bmc.Uncompress(input, input_size, output, output_size); + return bmc.Uncompress(input, input_size, output, output_size); #else - return false; + return false; #endif } -bool Lz4_Compress(const char* input, size_t input_size, - std::string* output) { +bool Lz4_Compress(const char* input, size_t input_size, std::string* output) { #ifdef USE_COMPRESS_EXT - output->resize(input_size * 2); - size_t output_size = LZ4LevelDB_compress(input, &(*output)[0], input_size); - if (output_size == 0 || output_size > input_size) { - return false; - } - output->resize(output_size); - return true; -#else + output->resize(input_size * 2); + size_t output_size = LZ4LevelDB_compress(input, &(*output)[0], input_size); + if (output_size == 0 || output_size > input_size) { return false; + } + output->resize(output_size); + return true; +#else + return false; #endif } -bool Lz4_Uncompress(const char* input, size_t input_size, - char* output, size_t* output_size) { +bool Lz4_Uncompress(const char* input, size_t input_size, char* output, size_t* output_size) { #ifdef USE_COMPRESS_EXT - size_t max_output_size = *output_size; - *output_size = LZ4LevelDB_decompress_fast( - input, output, input_size); - return true; + size_t max_output_size = *output_size; + *output_size = LZ4LevelDB_decompress_fast(input, output, input_size); + return true; #else - return false; + return false; #endif } diff --git a/src/leveldb/port/port_posix.h b/src/leveldb/port/port_posix.h index 65f4274a1..f4b98865b 100644 --- a/src/leveldb/port/port_posix.h +++ b/src/leveldb/port/port_posix.h @@ -13,36 +13,34 @@ #undef PLATFORM_IS_LITTLE_ENDIAN #if defined(OS_MACOSX) - #include - #if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) - #define PLATFORM_IS_LITTLE_ENDIAN \ - (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) - #endif +#include +#if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) +#define PLATFORM_IS_LITTLE_ENDIAN (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) +#endif #elif defined(OS_SOLARIS) - #include - #ifdef _LITTLE_ENDIAN - #define PLATFORM_IS_LITTLE_ENDIAN true - #else - #define PLATFORM_IS_LITTLE_ENDIAN false - #endif +#include +#ifdef _LITTLE_ENDIAN +#define PLATFORM_IS_LITTLE_ENDIAN true +#else +#define PLATFORM_IS_LITTLE_ENDIAN false +#endif #elif defined(OS_FREEBSD) - #include - #include - #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) -#elif defined(OS_OPENBSD) || defined(OS_NETBSD) ||\ - defined(OS_DRAGONFLYBSD) - #include - #include +#include +#include +#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) +#elif defined(OS_OPENBSD) || defined(OS_NETBSD) || defined(OS_DRAGONFLYBSD) +#include +#include #elif defined(OS_HPUX) - #define PLATFORM_IS_LITTLE_ENDIAN false +#define PLATFORM_IS_LITTLE_ENDIAN false #elif defined(OS_ANDROID) - // Due to a bug in the NDK x86 definition, - // _BYTE_ORDER must be used instead of __BYTE_ORDER on Android. - // See http://code.google.com/p/android/issues/detail?id=39824 - #include - #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) +// Due to a bug in the NDK x86 definition, +// _BYTE_ORDER must be used instead of __BYTE_ORDER on Android. +// See http://code.google.com/p/android/issues/detail?id=39824 +#include +#define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) #else - #include +#include #endif #include @@ -55,17 +53,15 @@ #define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) #endif -#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ - defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ - defined(OS_ANDROID) || defined(OS_HPUX) +#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) || defined(OS_NETBSD) || \ + defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID) || defined(OS_HPUX) // Use fread/fwrite/fflush on platforms without _unlocked variants #define fread_unlocked fread #define fwrite_unlocked fwrite #define fflush_unlocked fflush #endif -#if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\ - defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) +#if defined(OS_MACOSX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) // Use fsync() on platforms without fdatasync() #define fdatasync fsync #endif @@ -91,7 +87,7 @@ class Mutex { void Lock(); void Unlock(); - void AssertHeld() { } + void AssertHeld() {} private: friend class CondVar; @@ -110,6 +106,7 @@ class CondVar { bool Wait(int64_t wait_millisec); void Signal(); void SignalAll(); + private: pthread_cond_t cond_; pthread_condattr_t attr_; @@ -120,8 +117,7 @@ typedef pthread_once_t OnceType; #define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT extern void InitOnce(OnceType* once, void (*initializer)()); -inline bool Snappy_Compress(const char* input, size_t length, - ::std::string* output) { +inline bool Snappy_Compress(const char* input, size_t length, ::std::string* output) { output->resize(snappy::MaxCompressedLength(length)); size_t outlen; snappy::RawCompress(input, length, &(*output)[0], &outlen); @@ -129,37 +125,29 @@ inline bool Snappy_Compress(const char* input, size_t length, return true; } -inline bool Snappy_GetUncompressedLength(const char* input, size_t length, - size_t* result) { +inline bool Snappy_GetUncompressedLength(const char* input, size_t length, size_t* result) { return snappy::GetUncompressedLength(input, length, result); } -inline bool Snappy_Uncompress(const char* input, size_t length, - char* output) { +inline bool Snappy_Uncompress(const char* input, size_t length, char* output) { return snappy::RawUncompress(input, length, output); } /////////// Compression Ext /////////// -bool Bmz_Compress(const char* input, size_t input_size, - std::string* output); +bool Bmz_Compress(const char* input, size_t input_size, std::string* output); -bool Bmz_Uncompress(const char* input, size_t input_size, - char* output, size_t* output_size); +bool Bmz_Uncompress(const char* input, size_t input_size, char* output, size_t* output_size); -bool Lz4_Compress(const char* input, size_t input_size, - std::string* output); +bool Lz4_Compress(const char* input, size_t input_size, std::string* output); -bool Lz4_Uncompress(const char* input, size_t input_size, - char* output, size_t* output_size); +bool Lz4_Uncompress(const char* input, size_t input_size, char* output, size_t* output_size); ////////////////////////////// -inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { - return false; -} +inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { return false; } -} // namespace port -} // namespace leveldb +} // namespace port +} // namespace leveldb #endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_ diff --git a/src/leveldb/port/win/stdint.h b/src/leveldb/port/win/stdint.h index afd0be8a2..42f2667cd 100644 --- a/src/leveldb/port/win/stdint.h +++ b/src/leveldb/port/win/stdint.h @@ -16,13 +16,13 @@ #endif // Define C99 equivalent types. -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; -typedef signed long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef signed long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; #endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_ diff --git a/src/leveldb/table/block.cc b/src/leveldb/table/block.cc index 9bc2f640e..f54db3e0b 100644 --- a/src/leveldb/table/block.cc +++ b/src/leveldb/table/block.cc @@ -11,9 +11,9 @@ #include "table/block.h" #include -#include + +#include "format.h" #include "leveldb/comparator.h" -#include "table/format.h" #include "util/coding.h" #include "util/logging.h" @@ -32,7 +32,7 @@ Block::Block(const BlockContents& contents) if (size_ < sizeof(uint32_t)) { size_ = 0; // Error marker } else { - size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t); + size_t max_restarts_allowed = (size_ - sizeof(uint32_t)) / sizeof(uint32_t); if (NumRestarts() > max_restarts_allowed) { // The size is too small for NumRestarts() size_ = 0; @@ -55,10 +55,8 @@ Block::~Block() { // // If any errors are detected, returns NULL. Otherwise, returns a // pointer to the key delta (just past the three decoded values). -static inline const char* DecodeEntry(const char* p, const char* limit, - uint32_t* shared, - uint32_t* non_shared, - uint32_t* value_length) { +static inline const char* DecodeEntry(const char* p, const char* limit, uint32_t* shared, + uint32_t* non_shared, uint32_t* value_length) { if (limit - p < 3) return NULL; *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; @@ -81,9 +79,9 @@ static inline const char* DecodeEntry(const char* p, const char* limit, class Block::Iter : public Iterator { private: const Comparator* const comparator_; - const char* const data_; // underlying block contents - uint32_t const restarts_; // Offset of restart array (list of fixed32) - uint32_t const num_restarts_; // Number of uint32_t entries in restart array + const char* const data_; // underlying block contents + uint32_t const restarts_; // Offset of restart array (list of fixed32) + uint32_t const num_restarts_; // Number of uint32_t entries in restart array // current_ is offset in data_ of current entry. >= restarts_ if !Valid uint32_t current_; @@ -92,14 +90,10 @@ class Block::Iter : public Iterator { Slice value_; Status status_; - inline int Compare(const Slice& a, const Slice& b) const { - return comparator_->Compare(a, b); - } + inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b); } // Return the offset in data_ just past the end of the current entry. - inline uint32_t NextEntryOffset() const { - return (value_.data() + value_.size()) - data_; - } + inline uint32_t NextEntryOffset() const { return (value_.data() + value_.size()) - data_; } uint32_t GetRestartPoint(uint32_t index) { assert(index < num_restarts_); @@ -117,10 +111,7 @@ class Block::Iter : public Iterator { } public: - Iter(const Comparator* comparator, - const char* data, - uint32_t restarts, - uint32_t num_restarts) + Iter(const Comparator* comparator, const char* data, uint32_t restarts, uint32_t num_restarts) : comparator_(comparator), data_(data), restarts_(restarts), @@ -176,9 +167,8 @@ class Block::Iter : public Iterator { uint32_t mid = (left + right + 1) / 2; uint32_t region_offset = GetRestartPoint(mid); uint32_t shared, non_shared, value_length; - const char* key_ptr = DecodeEntry(data_ + region_offset, - data_ + restarts_, - &shared, &non_shared, &value_length); + const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, + &non_shared, &value_length); if (key_ptr == NULL || (shared != 0)) { CorruptionError(); return; @@ -249,8 +239,7 @@ class Block::Iter : public Iterator { key_.resize(shared); key_.append(p, non_shared); value_ = Slice(p + non_shared, value_length); - while (restart_index_ + 1 < num_restarts_ && - GetRestartPoint(restart_index_ + 1) < current_) { + while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } return true; diff --git a/src/leveldb/table/block.h b/src/leveldb/table/block.h index 161de53c0..16054336a 100644 --- a/src/leveldb/table/block.h +++ b/src/leveldb/table/block.h @@ -33,8 +33,8 @@ class Block { const char* data_; size_t size_; - uint32_t restart_offset_; // Offset in data_ of restart array - bool owned_; // Block owns data_[] + uint32_t restart_offset_; // Offset in data_ of restart array + bool owned_; // Block owns data_[] // No copying allowed Block(const Block&); diff --git a/src/leveldb/table/block_builder.cc b/src/leveldb/table/block_builder.cc index ade272eaa..ee2d0e407 100644 --- a/src/leveldb/table/block_builder.cc +++ b/src/leveldb/table/block_builder.cc @@ -41,27 +41,24 @@ namespace leveldb { BlockBuilder::BlockBuilder(const Options* options) - : options_(options), - restarts_(), - counter_(0), - finished_(false) { + : options_(options), restarts_(), counter_(0), finished_(false) { assert(options->block_restart_interval >= 1); - restarts_.push_back(0); // First restart point is at offset 0 + restarts_.push_back(0); // First restart point is at offset 0 } void BlockBuilder::Reset() { buffer_.clear(); restarts_.clear(); - restarts_.push_back(0); // First restart point is at offset 0 + restarts_.push_back(0); // First restart point is at offset 0 counter_ = 0; finished_ = false; last_key_.clear(); } size_t BlockBuilder::CurrentSizeEstimate() const { - return (buffer_.size() + // Raw data buffer - restarts_.size() * sizeof(uint32_t) + // Restart array - sizeof(uint32_t)); // Restart array length + return (buffer_.size() + // Raw data buffer + restarts_.size() * sizeof(uint32_t) + // Restart array + sizeof(uint32_t)); // Restart array length } Slice BlockBuilder::Finish() { @@ -78,7 +75,7 @@ void BlockBuilder::Add(const Slice& key, const Slice& value) { Slice last_key_piece(last_key_); assert(!finished_); assert(counter_ <= options_->block_restart_interval); - assert(buffer_.empty() // No values yet? + assert(buffer_.empty() // No values yet? || options_->comparator->Compare(key, last_key_piece) > 0); size_t shared = 0; if (counter_ < options_->block_restart_interval) { diff --git a/src/leveldb/table/block_builder.h b/src/leveldb/table/block_builder.h index e3aed8329..19f763e09 100644 --- a/src/leveldb/table/block_builder.h +++ b/src/leveldb/table/block_builder.h @@ -39,17 +39,15 @@ class BlockBuilder { size_t CurrentSizeEstimate() const; // Return true iff no entries have been added since the last Reset() - bool empty() const { - return buffer_.empty(); - } + bool empty() const { return buffer_.empty(); } private: - const Options* options_; - std::string buffer_; // Destination buffer - std::vector restarts_; // Restart points - int counter_; // Number of entries emitted since restart - bool finished_; // Has Finish() been called? - std::string last_key_; + const Options* options_; + std::string buffer_; // Destination buffer + std::vector restarts_; // Restart points + int counter_; // Number of entries emitted since restart + bool finished_; // Has Finish() been called? + std::string last_key_; // No copying allowed BlockBuilder(const BlockBuilder&); diff --git a/src/leveldb/table/filter_block.cc b/src/leveldb/table/filter_block.cc index cb1f83a96..ebebeb0c1 100644 --- a/src/leveldb/table/filter_block.cc +++ b/src/leveldb/table/filter_block.cc @@ -19,9 +19,7 @@ namespace leveldb { static const size_t kFilterBaseLg = 11; static const size_t kFilterBase = 1 << kFilterBaseLg; -FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) - : policy_(policy) { -} +FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) : policy_(policy) {} void FilterBlockBuilder::StartBlock(uint64_t block_offset) { uint64_t filter_index = (block_offset / kFilterBase); @@ -66,7 +64,7 @@ void FilterBlockBuilder::GenerateFilter() { tmp_keys_.resize(num_keys); for (size_t i = 0; i < num_keys; i++) { const char* base = keys_.data() + start_[i]; - size_t length = start_[i+1] - start_[i]; + size_t length = start_[i + 1] - start_[i]; tmp_keys_[i] = Slice(base, length); } @@ -79,16 +77,11 @@ void FilterBlockBuilder::GenerateFilter() { start_.clear(); } -FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, - const Slice& contents) - : policy_(policy), - data_(NULL), - offset_(NULL), - num_(0), - base_lg_(0) { +FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, const Slice& contents) + : policy_(policy), data_(NULL), offset_(NULL), num_(0), base_lg_(0) { size_t n = contents.size(); if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array - base_lg_ = contents[n-1]; + base_lg_ = contents[n - 1]; uint32_t last_word = DecodeFixed32(contents.data() + n - 5); if (last_word > n - 5) return; data_ = contents.data(); @@ -99,17 +92,29 @@ FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { uint64_t index = block_offset >> base_lg_; if (index < num_) { - uint32_t start = DecodeFixed32(offset_ + index*4); - uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); + uint32_t start = DecodeFixed32(offset_ + index * 4); + uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); if (start <= limit && limit <= (offset_ - data_)) { Slice filter = Slice(data_ + start, limit - start); - return policy_->KeyMayMatch(key, filter); + auto ret = policy_->KeyMayMatch(key, filter); + if (ret) { + filter_match_.Inc(); + } else { + filter_unmatch_.Inc(); + } + return ret; } else if (start == limit) { + filter_unmatch_.Inc(); // Empty filters do not match any keys return false; } } + filter_match_.Inc(); return true; // Errors are treated as potential matches } +tera::MetricCounter FilterBlockReader::filter_match_{"tera_filter_match_cnt", + {tera::Subscriber::SubscriberType::SUM}}; +tera::MetricCounter FilterBlockReader::filter_unmatch_{"tera_filter_unmatch_cnt", + {tera::Subscriber::SubscriberType::SUM}}; } diff --git a/src/leveldb/table/filter_block.h b/src/leveldb/table/filter_block.h index bec5dc10c..32f21d5dc 100644 --- a/src/leveldb/table/filter_block.h +++ b/src/leveldb/table/filter_block.h @@ -17,6 +17,8 @@ #include #include #include + +#include "common/metric/metric_counter.h" #include "leveldb/slice.h" #include "util/hash.h" @@ -42,10 +44,10 @@ class FilterBlockBuilder { void GenerateFilter(); const FilterPolicy* policy_; - std::string keys_; // Flattened key contents - std::vector start_; // Starting index in keys_ of each key - std::string result_; // Filter data computed so far - std::vector tmp_keys_; // policy_->CreateFilter() argument + std::string keys_; // Flattened key contents + std::vector start_; // Starting index in keys_ of each key + std::string result_; // Filter data computed so far + std::vector tmp_keys_; // policy_->CreateFilter() argument std::vector filter_offsets_; // No copying allowed @@ -55,18 +57,20 @@ class FilterBlockBuilder { class FilterBlockReader { public: - // REQUIRES: "contents" and *policy must stay live while *this is live. + // REQUIRES: "contents" and *policy must stay live while *this is live. FilterBlockReader(const FilterPolicy* policy, const Slice& contents); bool KeyMayMatch(uint64_t block_offset, const Slice& key); private: const FilterPolicy* policy_; - const char* data_; // Pointer to filter data (at block-start) - const char* offset_; // Pointer to beginning of offset array (at block-end) - size_t num_; // Number of entries in offset array - size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) + const char* data_; // Pointer to filter data (at block-start) + const char* offset_; // Pointer to beginning of offset array (at block-end) + size_t num_; // Number of entries in offset array + size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) + static tera::MetricCounter filter_match_; // Filter match means a read request is saved by this + // filter from read filesystem, + static tera::MetricCounter filter_unmatch_; // vice versa }; - } #endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ diff --git a/src/leveldb/table/filter_block_test.cc b/src/leveldb/table/filter_block_test.cc index f5cb3c070..2a2b41368 100644 --- a/src/leveldb/table/filter_block_test.cc +++ b/src/leveldb/table/filter_block_test.cc @@ -20,9 +20,7 @@ namespace leveldb { // For testing: emit an array with one hash value per key class TestHashFilter : public FilterPolicy { public: - virtual const char* Name() const { - return "TestHashFilter"; - } + virtual const char* Name() const { return "TestHashFilter"; } virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { for (int i = 0; i < n; i++) { @@ -73,8 +71,8 @@ TEST(FilterBlockTest, SingleChunk) { ASSERT_TRUE(reader.KeyMayMatch(100, "box")); ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); - ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); + ASSERT_TRUE(!reader.KeyMayMatch(100, "missing")); + ASSERT_TRUE(!reader.KeyMayMatch(100, "other")); } TEST(FilterBlockTest, MultiChunk) { @@ -103,30 +101,28 @@ TEST(FilterBlockTest, MultiChunk) { // Check first filter ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); + ASSERT_TRUE(!reader.KeyMayMatch(0, "box")); + ASSERT_TRUE(!reader.KeyMayMatch(0, "hello")); // Check second filter ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); + ASSERT_TRUE(!reader.KeyMayMatch(3100, "foo")); + ASSERT_TRUE(!reader.KeyMayMatch(3100, "bar")); + ASSERT_TRUE(!reader.KeyMayMatch(3100, "hello")); // Check third filter (empty) - ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); + ASSERT_TRUE(!reader.KeyMayMatch(4100, "foo")); + ASSERT_TRUE(!reader.KeyMayMatch(4100, "bar")); + ASSERT_TRUE(!reader.KeyMayMatch(4100, "box")); + ASSERT_TRUE(!reader.KeyMayMatch(4100, "hello")); // Check last filter ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); - ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); + ASSERT_TRUE(!reader.KeyMayMatch(9000, "foo")); + ASSERT_TRUE(!reader.KeyMayMatch(9000, "bar")); } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/table/format.cc b/src/leveldb/table/format.cc index 043a587e9..40017cca0 100644 --- a/src/leveldb/table/format.cc +++ b/src/leveldb/table/format.cc @@ -6,15 +6,22 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include #include -#include +#include #include -#include "table/format.h" + +#include "common/base/string_format.h" +#include "format.h" #include "leveldb/env.h" +#include "leveldb/persistent_cache.h" +#include "persistent_cache_helper.h" #include "port/port.h" #include "table/block.h" #include "util/coding.h" #include "util/crc32c.h" +#include "util/dfs_read_thread_limiter.h" +#include "util/stop_watch.h" namespace leveldb { @@ -27,8 +34,7 @@ void BlockHandle::EncodeTo(std::string* dst) const { } Status BlockHandle::DecodeFrom(Slice* input) { - if (GetVarint64(input, &offset_) && - GetVarint64(input, &size_)) { + if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { return Status::OK(); } else { return Status::Corruption("bad block handle"); @@ -51,8 +57,8 @@ Status Footer::DecodeFrom(Slice* input) { const char* magic_ptr = input->data() + kEncodedLength - 8; const uint32_t magic_lo = DecodeFixed32(magic_ptr); const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); - const uint64_t magic = ((static_cast(magic_hi) << 32) | - (static_cast(magic_lo))); + const uint64_t magic = + ((static_cast(magic_hi) << 32) | (static_cast(magic_lo))); if (magic != kTableMagicNumber) { return Status::InvalidArgument("not an sstable (bad magic number)"); } @@ -73,27 +79,29 @@ char* DirectIOAlign(RandomAccessFile* file, uint64_t offset, size_t len, DirectIOArgs* direct_io_args) { assert(direct_io_args); assert(offset >= 0 && len >= 0); - /* use this formula you will find the number Y, - * which Y is the first bigger number than X, at the same time Y is the power of 2. + /* use this formula you will find the number Y, + * which Y is the first bigger number than X, at the same time Y is the power + *of 2. * Y = (X + (2^n - 1)) & (~(2^n - 1)) - * - * this function , accept len [is X] to find aligned_len [is Y + alignment], + * + * this function , accept len [is X] to find aligned_len [is Y + alignment], * accept offset [is X] to find aligned_offset [is Y - alignment] - * + * * example: offset = 123 len = 610 - 123 * 123 610 * [................] need buffer * * aligned_offset = 0, aligned_len = 1024 - * [.................|.................] alloc buffer x % alignment == 0 - * x x+512 x+1024 + * [.................|.................] alloc buffer x % alignment == + *0 + * x x+512 x+1024 */ size_t alignment = file->GetRequiredBufferAlignment(); - direct_io_args->aligned_len = alignment + (len % alignment > 0 ? - (len + alignment - 1) & (~(alignment - 1)) : len); + direct_io_args->aligned_len = + alignment + (len % alignment > 0 ? (len + alignment - 1) & (~(alignment - 1)) : len); - direct_io_args->aligned_offset = offset > 0 ? - ((offset + alignment - 1) & (~(alignment -1))) - alignment : 0; + direct_io_args->aligned_offset = + offset > 0 ? ((offset + alignment - 1) & (~(alignment - 1))) - alignment : 0; return (char*)memalign(alignment, direct_io_args->aligned_len); } @@ -104,28 +112,24 @@ void FreeBuf(char* buf, bool use_direct_io_read) { } else { delete[] buf; } - buf = NULL; } } -Status ReadSstFile(RandomAccessFile* file, - bool use_direct_io_read, - uint64_t offset, - size_t len, - Slice* contents, - char** buf) { +Status ReadSstFile(RandomAccessFile* file, bool use_direct_io_read, uint64_t offset, size_t len, + Slice* contents, SstDataScratch* scratch) { Status s; + char* buf = nullptr; if (use_direct_io_read) { // calc and malloc align memory for direct io DirectIOArgs read_args; - *buf = DirectIOAlign(file, offset, len, &read_args); - if (*buf == NULL) { - return Status::Corruption("direct io allgn failed"); + buf = DirectIOAlign(file, offset, len, &read_args); + if (buf == NULL) { + return Status::Corruption("direct io align failed"); } - //read to align buf - s = file->Read(read_args.aligned_offset, read_args.aligned_len, contents, *buf); + // read to align buf + s = file->Read(read_args.aligned_offset, read_args.aligned_len, contents, buf); if (!s.ok()) { - FreeBuf(*buf, use_direct_io_read); + FreeBuf(buf, use_direct_io_read); return s; } // reset 'contents' to actual block contents @@ -133,28 +137,33 @@ Status ReadSstFile(RandomAccessFile* file, if (contents->size() >= align_offset + len) { contents->remove_prefix(align_offset); contents->remove_suffix(contents->size() - len); + *scratch = SstDataScratch{buf, std::bind(FreeBuf, std::placeholders::_1, true)}; } else { - FreeBuf(*buf, use_direct_io_read); - return Status::Corruption("direct io read contents size invalid"); + FreeBuf(buf, use_direct_io_read); + return Status::Corruption(StringFormat( + "direct io read contents size invalid, " + "aligned_offset: %lu, aligned_len: %lu, contents_len :%lu", + read_args.aligned_offset, read_args.aligned_len, contents->size())); } } else { - *buf = new char[len]; - s = file->Read(offset, len, contents, *buf); + buf = new char[len]; + s = file->Read(offset, len, contents, buf); if (!s.ok()) { - FreeBuf(*buf, use_direct_io_read); + FreeBuf(buf, use_direct_io_read); return s; } + *scratch = SstDataScratch{buf, std::bind(FreeBuf, std::placeholders::_1, false)}; } return s; } -Status ReadBlock(RandomAccessFile* file, - const ReadOptions& options, - const BlockHandle& handle, +Status ReadBlock(RandomAccessFile* file, const ReadOptions& options, const BlockHandle& handle, BlockContents* result) { result->data = Slice(); result->cachable = false; result->heap_allocated = false; + result->read_from_persistent_cache = false; + auto persistent_cache = options.db_opt->persistent_cache; // Read the block contents as well as the type/crc footer. // See table_builder.cc for the code that built this structure. @@ -162,41 +171,72 @@ Status ReadBlock(RandomAccessFile* file, size_t len = n + kBlockTrailerSize; uint64_t offset = handle.offset(); Slice contents; - char* buf = NULL; - const Options& db_opt = *(options.db_opt); - Status s = ReadSstFile(file, db_opt.use_direct_io_read, offset, len, &contents, &buf); + SstDataScratch scratch; + Status s; + + if (persistent_cache) { + std::string fname = file->GetFileName(); + Slice key{fname}; + key.remove_specified_prefix(options.db_opt->dfs_storage_path_prefix); + if (PersistentCacheHelper::TryReadFromPersistentCache(persistent_cache, key, offset, len, + &contents, &scratch).ok()) { + s = ParseBlock(n, offset, options, contents, result); + if (s.ok()) { + result->read_from_persistent_cache = true; + return s; + } else { + LEVELDB_LOG( + "Error parsing block content read from persistent_cache, fname: " + "%s. " + "Evict it and try read from dfs.\n", + file->GetFileName().c_str()); + persistent_cache->ForceEvict(key); + } + } + if (options.enable_dfs_read_thread_limiter) { + auto token = DfsReadThreadLimiter::Instance().GetToken(); + // If enabled dfs thread limiter, first acquire the semaphore, then read. + if (token) { + s = ReadSstFile(file, options.db_opt->use_direct_io_read, offset, len, &contents, &scratch); + } else { + // Acquire failed, reject this request. + s = Status::Reject("Too many dfs read requests."); + } + } else { + // Else, limiter is not enabled, just read from dfs file system. + s = ReadSstFile(file, options.db_opt->use_direct_io_read, offset, len, &contents, &scratch); + } + } else { + s = ReadSstFile(file, options.db_opt->use_direct_io_read, offset, len, &contents, &scratch); + } + if (!s.ok()) { return s; } + s = ParseBlock(n, offset, options, contents, result); - FreeBuf(buf, db_opt.use_direct_io_read); return s; } -Status ParseBlock(size_t n, - size_t offset, - const ReadOptions& options, - Slice contents, +Status ParseBlock(size_t n, size_t offset, const ReadOptions& options, Slice contents, BlockContents* result) { - if (contents.size() != n + kBlockTrailerSize) { return Status::Corruption("truncated block read"); } // Check the crc of the type and the block contents - const char* data = contents.data(); // Pointer to where Read put the data + const char* data = contents.data(); // Pointer to where Read put the data if (options.verify_checksums) { const uint32_t crc = crc32c::Unmask(DecodeFixed32(data + n + 1)); const uint32_t actual = crc32c::Value(data, n + 1); if (actual != crc) { char err[128] = {'\0'}; - sprintf(err, "block checksum mismatch: crc %u, actual %u, offset %lu, size %lu", - crc, actual, offset, n + kBlockTrailerSize); + sprintf(err, "block checksum mismatch: crc %u, actual %u, offset %lu, size %lu", crc, actual, + offset, n + kBlockTrailerSize); return Status::Corruption(Slice(err, strlen(err))); } } - switch (data[n]) { case kNoCompression: { char* buf = new char[n]; @@ -222,30 +262,28 @@ Status ParseBlock(size_t n, break; } case kBmzCompression: { - size_t uncompressed_size = 4 * 1024 * 2; // should be doubled block size, say > 4K * 2 - std::vector uncompressed_buffer; - uncompressed_buffer.resize(uncompressed_size); - if (!port::Bmz_Uncompress(data, n, &uncompressed_buffer[0], - &uncompressed_size)) { - return Status::Corruption("Bmz: corrupted compressed block contents"); - } - result->data = Slice(&uncompressed_buffer[0], uncompressed_size); - result->heap_allocated = true; - result->cachable = true; - break; + size_t uncompressed_size = 4 * 1024 * 2; // should be doubled block size, say > 4K * 2 + std::vector uncompressed_buffer; + uncompressed_buffer.resize(uncompressed_size); + if (!port::Bmz_Uncompress(data, n, &uncompressed_buffer[0], &uncompressed_size)) { + return Status::Corruption("Bmz: corrupted compressed block contents"); + } + result->data = Slice(&uncompressed_buffer[0], uncompressed_size); + result->heap_allocated = true; + result->cachable = true; + break; } case kLZ4Compression: { - size_t uncompressed_size = 4 * 1024 * 2; // should be doubled block size, say > 4K * 2 - std::vector uncompressed_buffer; - uncompressed_buffer.resize(uncompressed_size); - if (!port::Lz4_Uncompress(data, n, &uncompressed_buffer[0], - &uncompressed_size)) { - return Status::Corruption("LZ4: corrupted compressed block contents"); - } - result->data = Slice(&uncompressed_buffer[0], uncompressed_size); - result->heap_allocated = true; - result->cachable = true; - break; + size_t uncompressed_size = 4 * 1024 * 2; // should be doubled block size, say > 4K * 2 + std::vector uncompressed_buffer; + uncompressed_buffer.resize(uncompressed_size); + if (!port::Lz4_Uncompress(data, n, &uncompressed_buffer[0], &uncompressed_size)) { + return Status::Corruption("LZ4: corrupted compressed block contents"); + } + result->data = Slice(&uncompressed_buffer[0], uncompressed_size); + result->heap_allocated = true; + result->cachable = true; + break; } default: return Status::Corruption("bad block type"); @@ -253,5 +291,4 @@ Status ParseBlock(size_t n, return Status::OK(); } - } // namespace leveldb diff --git a/src/leveldb/table/format.h b/src/leveldb/table/format.h index 77444aece..ab03e9e52 100644 --- a/src/leveldb/table/format.h +++ b/src/leveldb/table/format.h @@ -9,8 +9,11 @@ #ifndef STORAGE_LEVELDB_TABLE_FORMAT_H_ #define STORAGE_LEVELDB_TABLE_FORMAT_H_ -#include +#include +#include #include +#include + #include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/table_builder.h" @@ -50,19 +53,15 @@ class BlockHandle { // end of every table file. class Footer { public: - Footer() { } + Footer() {} // The block handle for the metaindex block of the table const BlockHandle& metaindex_handle() const { return metaindex_handle_; } void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } // The block handle for the index block of the table - const BlockHandle& index_handle() const { - return index_handle_; - } - void set_index_handle(const BlockHandle& h) { - index_handle_ = h; - } + const BlockHandle& index_handle() const { return index_handle_; } + void set_index_handle(const BlockHandle& h) { index_handle_ = h; } void EncodeTo(std::string* dst) const; Status DecodeFrom(Slice* input); @@ -70,9 +69,7 @@ class Footer { // Encoded length of a Footer. Note that the serialization of a // Footer will always occupy exactly this many bytes. It consists // of two block handles and a magic number. - enum { - kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8 - }; + enum { kEncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8 }; private: BlockHandle metaindex_handle_; @@ -91,49 +88,38 @@ struct BlockContents { Slice data; // Actual contents of data bool cachable; // True iff data can be cached bool heap_allocated; // True iff caller should delete[] data.data() + bool read_from_persistent_cache = false; }; struct DirectIOArgs { - uint64_t aligned_offset; // aligned offset - size_t aligned_len; // aligned len + uint64_t aligned_offset; // aligned offset + size_t aligned_len; // aligned len }; // calc new aliged offset and len for Direct I/O -extern char* DirectIOAlign(RandomAccessFile* file, - uint64_t offset, - size_t len, +extern char* DirectIOAlign(RandomAccessFile* file, uint64_t offset, size_t len, DirectIOArgs* direct_io_args); // If use_direct_io_read call free() to free memalign() -// Else call delete[] to free new[] +// Else call delete[] to free new[] extern void FreeBuf(char* buf, bool use_direct_io_read); -extern Status ReadSstFile(RandomAccessFile* file, - bool use_direct_io_read, - uint64_t offset, - size_t len, - Slice* contents, - char** buf); - // Read the block identified by "handle" from "file". On failure // return non-OK. On success fill *result and return OK. -extern Status ReadBlock(RandomAccessFile* file, - const ReadOptions& options, - const BlockHandle& handle, - BlockContents* result); - -Status ParseBlock(size_t n, - size_t offset, - const ReadOptions& options, - Slice contents, +extern Status ReadBlock(RandomAccessFile* file, const ReadOptions& options, + const BlockHandle& handle, BlockContents* result); + +Status ParseBlock(size_t n, size_t offset, const ReadOptions& options, Slice contents, BlockContents* result); // Implementation details follow. Clients should ignore, inline BlockHandle::BlockHandle() - : offset_(~static_cast(0)), - size_(~static_cast(0)) { -} + : offset_(~static_cast(0)), size_(~static_cast(0)) {} + +using SstDataScratch = std::unique_ptr>; +extern Status ReadSstFile(RandomAccessFile* file, bool use_direct_io_read, uint64_t offset, + size_t len, Slice* contents, SstDataScratch* scratch); } // namespace leveldb #endif // STORAGE_LEVELDB_TABLE_FORMAT_H_ diff --git a/src/leveldb/table/iterator.cc b/src/leveldb/table/iterator.cc index a8377a16d..0e354eec4 100644 --- a/src/leveldb/table/iterator.cc +++ b/src/leveldb/table/iterator.cc @@ -20,7 +20,7 @@ Iterator::Iterator() { Iterator::~Iterator() { if (cleanup_.function != NULL) { (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); - for (Cleanup* c = cleanup_.next; c != NULL; ) { + for (Cleanup* c = cleanup_.next; c != NULL;) { (*c->function)(c->arg1, c->arg2); Cleanup* next = c->next; delete c; @@ -47,27 +47,30 @@ void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { namespace { class EmptyIterator : public Iterator { public: - EmptyIterator(const Status& s) : status_(s) { } + EmptyIterator(const Status& s) : status_(s) {} virtual bool Valid() const { return false; } - virtual void Seek(const Slice& target) { } - virtual void SeekToFirst() { } - virtual void SeekToLast() { } + virtual void Seek(const Slice& target) {} + virtual void SeekToFirst() {} + virtual void SeekToLast() {} virtual void Next() { assert(false); } virtual void Prev() { assert(false); } - Slice key() const { assert(false); return Slice(); } - Slice value() const { assert(false); return Slice(); } + Slice key() const { + assert(false); + return Slice(); + } + Slice value() const { + assert(false); + return Slice(); + } virtual Status status() const { return status_; } + private: Status status_; }; } // namespace -Iterator* NewEmptyIterator() { - return new EmptyIterator(Status::OK()); -} +Iterator* NewEmptyIterator() { return new EmptyIterator(Status::OK()); } -Iterator* NewErrorIterator(const Status& status) { - return new EmptyIterator(status); -} +Iterator* NewErrorIterator(const Status& status) { return new EmptyIterator(status); } } // namespace leveldb diff --git a/src/leveldb/table/iterator_wrapper.h b/src/leveldb/table/iterator_wrapper.h index 2cc4b04f2..423b1c799 100644 --- a/src/leveldb/table/iterator_wrapper.h +++ b/src/leveldb/table/iterator_wrapper.h @@ -17,10 +17,8 @@ namespace leveldb { // cache locality. class IteratorWrapper { public: - IteratorWrapper(): iter_(NULL), valid_(false) { } - explicit IteratorWrapper(Iterator* iter): iter_(NULL) { - Set(iter); - } + IteratorWrapper() : iter_(NULL), valid_(false) {} + explicit IteratorWrapper(Iterator* iter) : iter_(NULL) { Set(iter); } ~IteratorWrapper() { delete iter_; } Iterator* iter() const { return iter_; } @@ -36,18 +34,46 @@ class IteratorWrapper { } } - // Iterator interface methods - bool Valid() const { return valid_; } - Slice key() const { assert(Valid()); return key_; } - Slice value() const { assert(Valid()); return iter_->value(); } + bool Valid() const { return valid_; } + Slice key() const { + assert(Valid()); + return key_; + } + Slice value() const { + assert(Valid()); + return iter_->value(); + } // Methods below require iter() != NULL - Status status() const { assert(iter_); return iter_->status(); } - void Next() { assert(iter_); iter_->Next(); Update(); } - void Prev() { assert(iter_); iter_->Prev(); Update(); } - void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } - void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } - void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } + Status status() const { + assert(iter_); + return iter_->status(); + } + void Next() { + assert(iter_); + iter_->Next(); + Update(); + } + void Prev() { + assert(iter_); + iter_->Prev(); + Update(); + } + void Seek(const Slice& k) { + assert(iter_); + iter_->Seek(k); + Update(); + } + void SeekToFirst() { + assert(iter_); + iter_->SeekToFirst(); + Update(); + } + void SeekToLast() { + assert(iter_); + iter_->SeekToLast(); + Update(); + } private: void Update() { diff --git a/src/leveldb/table/merger.cc b/src/leveldb/table/merger.cc index 7329f5caa..574927527 100644 --- a/src/leveldb/table/merger.cc +++ b/src/leveldb/table/merger.cc @@ -21,10 +21,8 @@ namespace { class IterWrapper { public: - IterWrapper(): iter_(NULL), valid_(false) { } - explicit IterWrapper(Iterator* iter): iter_(NULL) { - Set(iter); - } + IterWrapper() : iter_(NULL), valid_(false) {} + explicit IterWrapper(Iterator* iter) : iter_(NULL) { Set(iter); } ~IterWrapper() {} Iterator* iter() const { return iter_; } @@ -39,16 +37,45 @@ class IterWrapper { } // Iterator interface methods - bool Valid() const { return valid_; } - Slice key() const { assert(Valid()); return key_; } - Slice value() const { assert(Valid()); return iter_->value(); } + bool Valid() const { return valid_; } + Slice key() const { + assert(Valid()); + return key_; + } + Slice value() const { + assert(Valid()); + return iter_->value(); + } // Methods below require iter() != NULL - Status status() const { assert(iter_); return iter_->status(); } - void Next() { assert(iter_); iter_->Next(); Update(); } - void Prev() { assert(iter_); iter_->Prev(); Update(); } - void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } - void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } - void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } + Status status() const { + assert(iter_); + return iter_->status(); + } + void Next() { + assert(iter_); + iter_->Next(); + Update(); + } + void Prev() { + assert(iter_); + iter_->Prev(); + Update(); + } + void Seek(const Slice& k) { + assert(iter_); + iter_->Seek(k); + Update(); + } + void SeekToFirst() { + assert(iter_); + iter_->SeekToFirst(); + Update(); + } + void SeekToLast() { + assert(iter_); + iter_->SeekToLast(); + Update(); + } private: void Update() { @@ -63,7 +90,7 @@ class IterWrapper { Slice key_; }; struct Greater { - bool operator() (IterWrapper& it1, IterWrapper& it2) { + bool operator()(IterWrapper& it1, IterWrapper& it2) { if (!it1.Valid()) { // iterator 1 is not valid, regard it as the bigger one return true; @@ -75,14 +102,13 @@ struct Greater { return (comp->Compare(it1.key(), it2.key()) > 0); } - Greater(const Comparator* comparator) - : comp(comparator) {} + Greater(const Comparator* comparator) : comp(comparator) {} const Comparator* comp; }; struct Lesser { - bool operator() (IterWrapper& it1, IterWrapper& it2) { + bool operator()(IterWrapper& it1, IterWrapper& it2) { if (!it1.Valid()) { // always regard it as the lesser one return true; @@ -94,8 +120,7 @@ struct Lesser { return (comp->Compare(it1.key(), it2.key()) < 0); } - Lesser (const Comparator* comparator) - : comp(comparator) {} + Lesser(const Comparator* comparator) : comp(comparator) {} const Comparator* comp; }; @@ -120,13 +145,15 @@ class MergingIterator : public Iterator { } } - virtual bool Valid() const { - return (current_ != NULL); - } + virtual bool Valid() const { return (current_ != NULL); } virtual void SeekToFirst() { for (size_t i = 0; i < children_.size(); i++) { children_[i].SeekToFirst(); + if (!CheckIterStatus(children_[i])) { + current_ = NULL; + return; + } } // make children as a min-heap make_heap(children_.begin(), children_.end(), greater_); @@ -137,6 +164,10 @@ class MergingIterator : public Iterator { virtual void SeekToLast() { for (size_t i = 0; i < children_.size(); i++) { children_[i].SeekToLast(); + if (!CheckIterStatus(children_[i])) { + current_ = NULL; + return; + } } // make children as a max-heap make_heap(children_.begin(), children_.end(), lesser_); @@ -147,6 +178,10 @@ class MergingIterator : public Iterator { virtual void Seek(const Slice& target) { for (size_t i = 0; i < children_.size(); i++) { children_[i].Seek(target); + if (!CheckIterStatus(children_[i])) { + current_ = NULL; + return; + } } // make children as a min-heap make_heap(children_.begin(), children_.end(), greater_); @@ -167,10 +202,13 @@ class MergingIterator : public Iterator { IterWrapper* child = &children_[i]; if (child != current_) { child->Seek(key()); - if (child->Valid() && - comparator_->Compare(key(), child->key()) == 0) { + if (child->Valid() && comparator_->Compare(key(), child->key()) == 0) { child->Next(); } + if (!CheckIterStatus(*child)) { + current_ = NULL; + return; + } } } @@ -182,6 +220,10 @@ class MergingIterator : public Iterator { } current_->Next(); + if (!CheckIterStatus(*current_)) { + current_ = NULL; + return; + } std::push_heap(children_.begin(), children_.end(), greater_); FindSmallest(); } @@ -206,6 +248,10 @@ class MergingIterator : public Iterator { // Child has no entries >= key(). Position at last entry. child->SeekToLast(); } + if (!CheckIterStatus(*child)) { + current_ = NULL; + return; + } } } @@ -218,6 +264,10 @@ class MergingIterator : public Iterator { current_->Prev(); std::push_heap(children_.begin(), children_.end(), lesser_); + if (!CheckIterStatus(*current_)) { + current_ = NULL; + return; + } FindLargest(); } @@ -246,6 +296,8 @@ class MergingIterator : public Iterator { void FindSmallest(); void FindLargest(); + bool CheckIterStatus(const IterWrapper& iter) { return iter.Valid() || iter.status().ok(); } + // We might want to use a heap in case there are lots of children. // For now we use a simple array since we expect a very small number // of children in leveldb. @@ -256,10 +308,7 @@ class MergingIterator : public Iterator { const Lesser lesser_; // Which direction is the iterator moving? - enum Direction { - kForward, - kReverse - }; + enum Direction { kForward, kReverse }; Direction direction_; }; diff --git a/src/leveldb/table/merger.h b/src/leveldb/table/merger.h index ec52ad252..bbd1d6319 100644 --- a/src/leveldb/table/merger.h +++ b/src/leveldb/table/merger.h @@ -22,8 +22,7 @@ class Iterator; // key is present in K child iterators, it will be yielded K times. // // REQUIRES: n >= 0 -extern Iterator* NewMergingIterator( - const Comparator* comparator, Iterator** children, int n); +extern Iterator* NewMergingIterator(const Comparator* comparator, Iterator** children, int n); } // namespace leveldb diff --git a/src/leveldb/table/persistent_cache_helper.cc b/src/leveldb/table/persistent_cache_helper.cc new file mode 100644 index 000000000..c7bf831ea --- /dev/null +++ b/src/leveldb/table/persistent_cache_helper.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "leveldb/persistent_cache.h" +#include "leveldb/persistent_cache/persistent_cache_file.h" +#include "persistent_cache_helper.h" + +namespace leveldb { + +std::mutex PersistentCacheHelper::updating_files_mutex_; +common::RWMutex PersistentCacheHelper::closing_file_rw_mutex_; +common::ThreadPool PersistentCacheHelper::copy_to_local_thread_pool_{kThreadNum}; + +std::unordered_set PersistentCacheHelper::updating_files_; +std::atomic PersistentCacheHelper::pending_num_{0}; +const std::uint64_t PersistentCacheHelper::max_pending_num_{kMaxPendingNum}; + +void PersistentCacheHelper::ScheduleCopyToLocal(Env *env, const std::string &fname, uint64_t fsize, + const std::string &key, + const std::shared_ptr &p_cache) { + if (pending_num_.load() >= max_pending_num_ || !p_cache) { + return; + } + + { + std::lock_guard lock(updating_files_mutex_); + if (updating_files_.find(key) != updating_files_.end()) { + return; + } + updating_files_.emplace(key); + } + ++pending_num_; + + copy_to_local_thread_pool_.AddTask([=](int64_t) { + DoCopyToLocal(env, fname, fsize, key, p_cache); + --pending_num_; + std::lock_guard lock(updating_files_mutex_); + assert(updating_files_.find(key) != updating_files_.end()); + updating_files_.erase(key); + }); +} + +void PersistentCacheHelper::DoCopyToLocal(Env *env, const std::string &fname, uint64_t fsize, + const std::string &key, + std::shared_ptr p_cache) { + LEVELDB_LOG("Schedule Copy To Local: %s, Pending Num: %u\n", fname.c_str(), pending_num_.load()); + uint64_t time_s = env->NowMicros(); + + std::unique_ptr dfs_file; + SequentialFile *tmp_file; + auto s = env->NewSequentialFile(fname, &tmp_file); + + if (!s.ok()) { + LEVELDB_LOG("Copy To Local Failed %s : %s\n", fname.c_str(), s.ToString().c_str()); + return; + } + dfs_file.reset(tmp_file); + + WriteableCacheFile *cache_file; + s = p_cache->NewWriteableCacheFile(key, &cache_file); + if (!s.ok()) { + LEVELDB_LOG("Copy To Local Failed %s : %s\n", fname.c_str(), s.ToString().c_str()); + return; + } + assert(cache_file->refs_); + + std::unique_ptr buf(new char[1048576]); // Read 1M data each time + Slice result; + size_t local_size = 0; + + while (dfs_file->Read(1048576, &result, buf.get()).ok() && result.size() > 0 && + cache_file->Append(result).ok()) { + local_size += result.size(); + } + + if (local_size == fsize) { + { + WriteLock _(&closing_file_rw_mutex_); + cache_file->Close(key); + } + uint64_t time_used = env->NowMicros() - time_s; + LEVELDB_LOG("copy %s to local success in %llu ms.\n", fname.c_str(), + static_cast(time_used) / 1000); + } else { + cache_file->Abandon(); + uint64_t dfs_file_size = 0; + s = env->GetFileSize(fname, &dfs_file_size); + if (!s.ok()) { + LEVELDB_LOG("dfs GetFileSize fail %s : %s\n", fname.c_str(), s.ToString().c_str()); + } else { + LEVELDB_LOG( + "copy %s to local fail, size %lu, dfs size %lu, local " + "size %lu\n", + fname.c_str(), fsize, dfs_file_size, local_size); + } + } +} + +Status PersistentCacheHelper::TryReadFromPersistentCache( + const std::shared_ptr &p_cache, const Slice &key, uint64_t offset, + uint64_t length, Slice *contents, SstDataScratch *val) { + ReadLock _(&closing_file_rw_mutex_); + return p_cache->Read(key, offset, length, contents, val); +} +} // leveldb diff --git a/src/leveldb/table/persistent_cache_helper.h b/src/leveldb/table/persistent_cache_helper.h new file mode 100644 index 000000000..57f8b4e02 --- /dev/null +++ b/src/leveldb/table/persistent_cache_helper.h @@ -0,0 +1,44 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include +#include +#include + +#include "common/thread_pool.h" +#include "common/rwmutex.h" +#include "format.h" +#include "leveldb/env.h" +#include "leveldb/status.h" + +#pragma once +namespace leveldb { +class PersistentCache; + +class PersistentCacheHelper { + static constexpr uint32_t kThreadNum = 10; + static constexpr uint32_t kMaxPendingNum = 10; + + public: + static void ScheduleCopyToLocal(Env *env, const std::string &fname, uint64_t fsize, + const std::string &key, + const std::shared_ptr &p_cache); + + static void DoCopyToLocal(Env *env, const std::string &fname, uint64_t fsize, + const std::string &key, std::shared_ptr p_cache); + + static Status TryReadFromPersistentCache(const std::shared_ptr &p_cache, + const Slice &key, uint64_t offset, uint64_t length, + Slice *contents, SstDataScratch *val); + + private: + static std::mutex updating_files_mutex_; + static common::RWMutex closing_file_rw_mutex_; + static std::unordered_set updating_files_; + static common::ThreadPool copy_to_local_thread_pool_; + static std::atomic pending_num_; + static const uint64_t max_pending_num_; +}; +} // leveldb diff --git a/src/leveldb/table/table.cc b/src/leveldb/table/table.cc index 5b043c158..40d0a124e 100644 --- a/src/leveldb/table/table.cc +++ b/src/leveldb/table/table.cc @@ -6,55 +6,64 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include #include -#include "leveldb/table.h" +#include "db/dbformat.h" +#include "format.h" #include "leveldb/cache.h" #include "leveldb/comparator.h" #include "leveldb/env.h" #include "leveldb/filter_policy.h" #include "leveldb/options.h" -#include "db/dbformat.h" +#include "leveldb/persistent_cache.h" +#include "leveldb/table.h" +#include "persistent_cache/persistent_cache_file.h" +#include "persistent_cache_helper.h" #include "table/block.h" #include "table/filter_block.h" -#include "table/format.h" #include "table/two_level_iterator.h" #include "util/coding.h" +#include "common/metric/metric_counter.h" namespace leveldb { struct Table::Rep { ~Rep() { delete filter; - delete [] filter_data; + delete[] filter_data; delete index_block; + filter_block_size_total.Sub(filter_data_size); } + Rep() : filter_data_size(0) {} + Options options; Status status; RandomAccessFile* file; + size_t fsize; uint64_t cache_id; FilterBlockReader* filter; const char* filter_data; + uint64_t filter_data_size; BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer Block* index_block; + static tera::MetricCounter filter_block_size_total; }; +tera::MetricCounter Table::Rep::filter_block_size_total{ + "tera_filter_block_size", {tera::Subscriber::SubscriberType::LATEST}, false}; class TableIter : public Iterator { public: - TableIter(Iterator* iter, - const Comparator* comparator, - const Slice& smallest, + TableIter(Iterator* iter, const Comparator* comparator, const Slice& smallest, const Slice& largest) : iter_(iter), comparator_(comparator), smallest_(smallest.ToString()), - largest_(largest.ToString()) { } + largest_(largest.ToString()) {} - virtual ~TableIter() { - delete iter_; - } + virtual ~TableIter() { delete iter_; } virtual void Seek(const Slice& target) { if (smallest_.empty() && largest_.empty()) { @@ -101,12 +110,8 @@ class TableIter : public Iterator { iter_->Seek(largest_); } } - virtual void Next() { - iter_->Next(); - } - virtual void Prev() { - iter_->Prev(); - } + virtual void Next() { iter_->Next(); } + virtual void Prev() { iter_->Prev(); } virtual bool Valid() const { if (!iter_->Valid()) { return false; @@ -127,9 +132,7 @@ class TableIter : public Iterator { assert(Valid()); return iter_->value(); } - virtual Status status() const { - return iter_->status(); - } + virtual Status status() const { return iter_->status(); } private: Iterator* iter_; @@ -140,20 +143,15 @@ class TableIter : public Iterator { class IndexBlockIter : public Iterator { public: - IndexBlockIter(const ReadOptions& opts, - Block* index_block, - FilterBlockReader* filter) + IndexBlockIter(const ReadOptions& opts, Block* index_block, FilterBlockReader* filter) : valid_(false), iter_(index_block->NewIterator(opts.db_opt->comparator)), comparator_(opts.db_opt->comparator), filter_(filter), read_single_row_(opts.read_single_row), row_start_key_(opts.row_start_key, kMaxSequenceNumber, kValueTypeForSeek), - row_end_key_(opts.row_end_key, kMaxSequenceNumber, kValueTypeForSeek) { - } - virtual ~IndexBlockIter() { - delete iter_; - } + row_end_key_(opts.row_end_key, kMaxSequenceNumber, kValueTypeForSeek) {} + virtual ~IndexBlockIter() { delete iter_; } virtual void Seek(const Slice& target) { iter_->Seek(target); SkipUnmatchedBlocksForward(); @@ -174,9 +172,7 @@ class IndexBlockIter : public Iterator { iter_->Prev(); SkipUnmatchedBlocksBackward(); } - virtual bool Valid() const { - return valid_; - } + virtual bool Valid() const { return valid_; } virtual Slice key() const { assert(Valid()); return iter_->key(); @@ -185,9 +181,7 @@ class IndexBlockIter : public Iterator { assert(Valid()); return iter_->value(); } - virtual Status status() const { - return iter_->status(); - } + virtual Status status() const { return iter_->status(); } private: void SkipUnmatchedBlocksForward() { @@ -198,7 +192,7 @@ class IndexBlockIter : public Iterator { break; } if (!valid_index_key_.empty() && comparator_->Compare(iter_->key(), valid_index_key_) > 0) { - //Log("bloomfilter: skip block by range"); + // LEVELDB_LOG("bloomfilter: skip block by range"); break; } if (comparator_->Compare(iter_->key(), row_end_key_.Encode()) >= 0 && @@ -207,10 +201,10 @@ class IndexBlockIter : public Iterator { } if (CheckFilter()) { valid_ = true; - //Log("bloomfilter: valid block"); + // LEVELDB_LOG("bloomfilter: valid block"); break; } - //Log("bloomfilter: skip block by bloom"); + // LEVELDB_LOG("bloomfilter: skip block by bloom"); iter_->Next(); } } @@ -222,7 +216,7 @@ class IndexBlockIter : public Iterator { break; } if (comparator_->Compare(iter_->key(), row_start_key_.Encode()) < 0) { - //Log("bloomfilter: skip block by range"); + // LEVELDB_LOG("bloomfilter: skip block by range"); break; } if (comparator_->Compare(iter_->key(), row_end_key_.Encode()) >= 0 && @@ -231,10 +225,10 @@ class IndexBlockIter : public Iterator { } if (CheckFilter()) { valid_ = true; - //Log("bloomfilter: valid block"); + // LEVELDB_LOG("bloomfilter: valid block"); break; } - //Log("bloomfilter: skip block by bloom"); + // LEVELDB_LOG("bloomfilter: skip block by bloom"); iter_->Prev(); } } @@ -242,9 +236,7 @@ class IndexBlockIter : public Iterator { assert(iter_->Valid()); Slice handle_value = iter_->value(); BlockHandle handle; - if (!read_single_row_ || - filter_ == NULL || - !handle.DecodeFrom(&handle_value).ok() || + if (!read_single_row_ || filter_ == NULL || !handle.DecodeFrom(&handle_value).ok() || filter_->KeyMayMatch(handle.offset(), row_start_key_.Encode())) { return true; } @@ -264,223 +256,211 @@ class IndexBlockIter : public Iterator { std::string valid_index_key_; }; -static void DeleteBlock(void* arg, void* ignored) { - delete reinterpret_cast(arg); -} - +static void DeleteBlock(void* arg, void* ignored) { delete reinterpret_cast(arg); } -// This iterator is just used in long-scan cases, like compact and batch scan. -// It'll prefetch some continuous data blocks from SSD or dfs in a single read-operation -// for reducing iops and maximizing throughput. -class PrefetchScanIterator : public Iterator { +class PrefetchBlockReader { public: - PrefetchScanIterator(RandomAccessFile* file, - const ReadOptions& opt, - Iterator* index_block_iterator) - : file_(file), - index_block_iterator_(index_block_iterator), - block_iterator_(NULL), - handles_iterator_(handles_.end()), - option_(opt) { - } - virtual ~PrefetchScanIterator() { - delete index_block_iterator_; - delete block_iterator_; - } - virtual void Seek(const Slice& target) { - index_block_iterator_->Seek(target); - if (index_block_iterator_->Valid()) { - PrefetchBlockContent(); - } - if (block_iterator_) { - block_iterator_->Seek(target); - } - } - virtual void SeekToFirst() { - index_block_iterator_->SeekToFirst(); - if (index_block_iterator_->Valid()) { - PrefetchBlockContent(); - } - } - virtual void SeekToLast() { - index_block_iterator_->SeekToLast(); - if (index_block_iterator_->Valid()) { - PrefetchBlockContent(); + PrefetchBlockReader(RandomAccessFile* file, size_t fsize) : file_(file), fsize_(fsize) {} + + Iterator* operator()(void* arg, const ReadOptions& options, const Slice& index_value) { + assert(!arg); + Block* block = NULL; + BlockHandle handle; + Slice input = index_value; + Status s = handle.DecodeFrom(&input); + + if (s.ok()) { + s = ReadBlock(handle, options, &block); } - if (block_iterator_) { - block_iterator_->SeekToLast(); + + Iterator* iter; + if (block != nullptr) { + iter = block->NewIterator(options.db_opt->comparator); + iter->RegisterCleanup(&DeleteBlock, block, nullptr); + } else { + iter = NewErrorIterator(s); } + return iter; } - virtual void Next() { - assert(Valid()); - /* - * block_content_ is the file data we prefetched. - * handles_ is a vector of block handles that record [offset, size] of each block in block_content_. - * handles_iterator_ points to one block handle in handels_. - * block_iterator_ is the iterator of the block currently in use. - * - * So when Next() is called, we first tried to call block_iterator_'s Next(). - * If it is valid, we don't need to do other things. - * Otherwise, it means that we reach the end of current block. - * Then, we try to load next block by moving handles_iterator_ to the next block handle, - * and load this block from prefetched data (block_content_). - * If handles_iterator_ points to the last block handle in handles_, - * it means that we reach the end of prefetched blocks, so PrefetchBlockContent() is called to read new - * blocks from file. - */ - block_iterator_->Next(); - if (!block_iterator_->Valid()) { - if (++handles_iterator_ != handles_.end()) { - LoadBlockIterator(); - } else { - PrefetchBlockContent(); + + private: + Status ReadBlock(const BlockHandle& handle, const ReadOptions& options, Block** block) { + assert(!*block); + Slice block_slice; + Status s; + // Read file content, if missed, it will prefetch data from cache/dfs. + if ((s = ReadFileContent(handle, options, &block_slice)).ok()) { + BlockContents contents; + s = ParseBlock(handle.size(), handle.offset(), options, block_slice, &contents); + if (s.ok()) { + *block = new Block(contents); + } else if (prefetched_from_persistent_cache_) { + // Parse Block failed and it's read from persistent cache. + // May be an invalid cache file, try read directly from dfs. + assert(options.db_opt->persistent_cache); + SstDataScratch scratch; + auto dfs_status = ReadDfsContent(handle, options, &scratch, &block_slice); + if (dfs_status.ok()) { + s = ParseBlock(handle.size(), handle.offset(), options, block_slice, &contents); + if (s.ok()) { + // Parse from dfs content successfully, so force evict the cache file. + auto fname = file_->GetFileName(); + LEVELDB_LOG("Invalid cache data for %s, force evict it.", fname.c_str()); + Slice key{fname}; + key.remove_specified_prefix(options.db_opt->dfs_storage_path_prefix); + options.db_opt->persistent_cache->ForceEvict(key); + + *block = new Block(contents); + } + } else { + // Read from dfs failed, keep the last error. + s = dfs_status; + } } } + return s; } - virtual void Prev() { - //This iterator should just be used in scan, - //so prev() should never be called. - assert(Valid()); - abort(); - } - virtual bool Valid() const { - return block_iterator_ && block_iterator_->Valid(); - } - virtual Slice key() const { - assert(Valid()); - return block_iterator_->key(); + Status ReadDfsContent(const BlockHandle& handle, const ReadOptions& options, + SstDataScratch* scratch, Slice* slice) { + assert(slice); + auto size = handle.size() + kBlockTrailerSize; + auto offset = handle.offset(); + Status s; + return ReadSstFile(file_, false, offset, size, slice, scratch); } - virtual Slice value() const { - assert(Valid()); - return block_iterator_->value(); - } + Status ReadFileContent(const BlockHandle& handle, const ReadOptions& options, Slice* data) { + assert(data); + Status s; + if (handle.offset() < prefetched_offset_) { + s = PrefetchFileContents(handle, options); + } + if (!s.ok()) { + return s; + } - virtual Status status() const { - if (!index_block_iterator_->status().ok()) { - return index_block_iterator_->status(); - } else if (block_iterator_ && !block_iterator_->status().ok()) { - return block_iterator_->status(); - } else { - return status_; + auto relative_offset = handle.offset() - prefetched_offset_; + auto size = handle.size() + kBlockTrailerSize; + if (relative_offset + size > prefetched_data_.size()) { + s = PrefetchFileContents(handle, options); + if (s.ok()) { + assert(handle.offset() == prefetched_offset_); + relative_offset = 0; + } else { + return s; + } } - } - private: - // Load the iterator of the block that handles_iterator_ currently points to. - void LoadBlockIterator() { - assert(handles_iterator_ != handles_.end()); - - delete block_iterator_; - block_iterator_ = NULL; - //Caculate current block's offset in block_content_. - uint64_t offset = handles_iterator_->offset() - handles_[0].offset(); - uint64_t size = handles_iterator_->size() + kBlockTrailerSize; - - assert(offset + size <= block_content_.size()); - Slice block_with_trailer(&block_content_[0] + offset, size); + // No more check needed, because we successfully prefetch file contents. + *data = Slice{prefetched_data_}; + data->remove_prefix(relative_offset); + data->remove_suffix(prefetched_data_.size() - (relative_offset + size)); + assert(data->size() == handle.size() + kBlockTrailerSize); + return Status::OK(); + } - BlockContents contents; - Block* block = NULL; - //Parse block from block_with_trailer slice. - Status s = ParseBlock(handles_iterator_->size(), - handles_iterator_->offset(), - option_, - block_with_trailer, - &contents); - if (s.ok()) { - block = new Block(contents); - } + Status PrefetchFileContents(const BlockHandle& handle, const ReadOptions& options) { + prefetched_offset_ = 0; + prefetched_data_.clear(); + prefetched_from_persistent_cache_ = false; - if (block != NULL) { - //Get current block's iterator. - block_iterator_ = block->NewIterator(option_.db_opt->comparator); - block_iterator_->RegisterCleanup(&DeleteBlock, block, NULL); - block_iterator_->SeekToFirst(); - } else { - block_iterator_ = NewErrorIterator(s); + auto block_offset = handle.offset(); + if (fsize_ < block_offset) { + prefetched_data_.clear(); + return Status::Corruption("truncated block read"); } - } - // Prefetch some continuous data blocks for reducing iops. - // Their total size is less than tera_tabletnode_prefetch_scan_size. - void PrefetchBlockContent() { - handles_.clear(); - uint64_t current_size(0); - while(index_block_iterator_->Valid() && - current_size < option_.prefetch_scan_size) { - BlockHandle handle; - Slice input = index_block_iterator_->value(); - status_ = handle.DecodeFrom(&input); - if (!status_.ok()) { - break; + Slice contents; + SstDataScratch val; + auto block_size = handle.size() + kBlockTrailerSize; + auto prefetch_size = std::max(block_size, options.prefetch_scan_size); + prefetch_size = std::min(prefetch_size, fsize_ - block_offset); + + auto& p_cache = options.db_opt->persistent_cache; + + if (p_cache) { + auto fname = file_->GetFileName(); + Slice key{fname}; + key.remove_specified_prefix(options.db_opt->dfs_storage_path_prefix); + if (PersistentCacheHelper::TryReadFromPersistentCache(p_cache, key, block_offset, + prefetch_size, &contents, &val).ok()) { + prefetched_data_.assign(contents.data(), contents.size()); + prefetched_from_persistent_cache_ = true; + } else if (options.fill_persistent_cache) { + PersistentCacheHelper::ScheduleCopyToLocal(options.db_opt->env, file_->GetFileName(), + fsize_, key.ToString(), p_cache); } - current_size += handle.size() + kBlockTrailerSize; - handles_.push_back(handle); - index_block_iterator_->Next(); } - handles_iterator_ = handles_.end(); - - if (status_.ok() && !handles_.empty()) { - Options db_opt = *(option_.db_opt); - uint64_t offset = handles_[0].offset(); - uint64_t blocks_size = handles_.back().offset() - offset; - blocks_size += handles_.back().size() + kBlockTrailerSize; - Slice contents; - char* buf = NULL; - status_ = ReadSstFile(file_, db_opt.use_direct_io_read, offset, blocks_size, &contents, &buf); - if (!status_.ok()) { - return; - } - block_content_.assign(contents.data(), contents.size()); - FreeBuf(buf, db_opt.use_direct_io_read); - if (block_content_.size() != blocks_size) { - status_ = Status::Corruption("truncated block read"); - return; + if (!prefetched_from_persistent_cache_) { + auto s = ReadSstFile(file_, options.db_opt->use_direct_io_read, block_offset, prefetch_size, + &contents, &val); + if (!s.ok()) { + return s; } - handles_iterator_ = handles_.begin(); - LoadBlockIterator(); + prefetched_data_.assign(contents.data(), contents.size()); } - } -private: + if (prefetched_data_.size() < block_size) { + prefetched_data_.clear(); + return Status::Corruption("truncated block read"); + } - Status status_; - std::string block_content_; - std::vector handles_; + prefetched_offset_ = block_offset; + return Status::OK(); + } + private: RandomAccessFile* file_; - Iterator* index_block_iterator_; - Iterator* block_iterator_; - std::vector::iterator handles_iterator_; - ReadOptions option_; + size_t fsize_; + std::string prefetched_data_; + uint64_t prefetched_offset_ = 0; + bool prefetched_from_persistent_cache_ = false; }; - -Status Table::Open(const Options& options, - RandomAccessFile* file, - uint64_t size, - Table** table) { +Status Table::Open(const Options& options, RandomAccessFile* file, uint64_t size, Table** table) { *table = NULL; if (size < Footer::kEncodedLength) { return Status::InvalidArgument("file is too short to be an sstable"); } - size_t len = Footer::kEncodedLength;; + size_t len = Footer::kEncodedLength; uint64_t offset = size - Footer::kEncodedLength; Slice footer_input; - char* buf = NULL; - Status s = ReadSstFile(file, options.use_direct_io_read, offset, len, &footer_input, &buf); - if (!s.ok()) { - return s; - } + SstDataScratch scratch; + Status s; Footer footer; - s = footer.DecodeFrom(&footer_input); - FreeBuf(buf, options.use_direct_io_read); - if (!s.ok()) { + auto& p_cache = options.persistent_cache; + + if (p_cache) { + Slice key{file->GetFileName()}; + key.remove_specified_prefix(options.dfs_storage_path_prefix); + // Try Read From Persistent Cache + s = PersistentCacheHelper::TryReadFromPersistentCache(p_cache, key, offset, len, &footer_input, + &scratch); + if (s.ok()) { + // Read Success + s = footer.DecodeFrom(&footer_input); + if (!s.ok()) { + // Parse footer failed means this sst file is invalid, remove it. + options.persistent_cache->ForceEvict(key); + } + } + } + + if (!p_cache || !s.ok()) { + // Disable Persistent Cache or + // parse footer failed or + // read file failed, just read from dfs. + s = ReadSstFile(file, options.use_direct_io_read, offset, len, &footer_input, &scratch); + if (!s.ok()) { + return s; + } + s = footer.DecodeFrom(&footer_input); + if (!s.ok()) { return s; + } } // Read the index block @@ -499,6 +479,7 @@ Status Table::Open(const Options& options, // We've successfully read the footer and the index block: we're // ready to serve requests. Rep* rep = new Table::Rep; + rep->fsize = size; rep->options = options; rep->file = file; rep->metaindex_handle = footer.metaindex_handle(); @@ -558,14 +539,14 @@ void Table::ReadFilter(const Slice& filter_handle_value) { return; } if (block.heap_allocated) { - rep_->filter_data = block.data.data(); // Will need to delete later + rep_->filter_data = block.data.data(); // Will need to delete later + rep_->filter_data_size = block.data.size(); + rep_->filter_block_size_total.Add(rep_->filter_data_size); } rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data); } -Table::~Table() { - delete rep_; -} +Table::~Table() { delete rep_; } static void DeleteCachedBlock(const Slice& key, void* value) { Block* block = reinterpret_cast(value); @@ -580,9 +561,7 @@ static void ReleaseBlock(void* arg, void* h) { // Convert an index iterator value (i.e., an encoded BlockHandle) // into an iterator over the contents of the corresponding block. -Iterator* Table::BlockReader(void* arg, - const ReadOptions& options, - const Slice& index_value) { +Iterator* Table::BlockReader(void* arg, const ReadOptions& options, const Slice& index_value) { Table* table = reinterpret_cast(arg); Cache* block_cache = table->rep_->options.block_cache; Block* block = NULL; @@ -599,7 +578,7 @@ Iterator* Table::BlockReader(void* arg, if (block_cache != NULL) { char cache_key_buffer[16]; EncodeFixed64(cache_key_buffer, table->rep_->cache_id); - EncodeFixed64(cache_key_buffer+8, handle.offset()); + EncodeFixed64(cache_key_buffer + 8, handle.offset()); Slice key(cache_key_buffer, sizeof(cache_key_buffer)); cache_handle = block_cache->Lookup(key); if (cache_handle != NULL) { @@ -609,8 +588,17 @@ Iterator* Table::BlockReader(void* arg, if (s.ok()) { block = new Block(contents); if (contents.cachable && options.fill_cache) { - cache_handle = block_cache->Insert( - key, block, block->size(), &DeleteCachedBlock); + cache_handle = block_cache->Insert(key, block, block->size(), &DeleteCachedBlock); + } + + if (table->rep_->options.persistent_cache && options.fill_persistent_cache && + !contents.read_from_persistent_cache) { + std::string fname = table->rep_->file->GetFileName(); + Slice persistent_cache_key{fname}; + persistent_cache_key.remove_specified_prefix(options.db_opt->dfs_storage_path_prefix); + PersistentCacheHelper::ScheduleCopyToLocal( + table->rep_->options.env, table->rep_->file->GetFileName(), table->rep_->fsize, + persistent_cache_key.ToString(), table->rep_->options.persistent_cache); } } } @@ -640,24 +628,29 @@ Iterator* Table::NewIterator(const ReadOptions& options) const { return NewIterator(options, Slice(), Slice()); } -Iterator* Table::NewIterator(const ReadOptions& options, - const Slice& smallest, +void DeletePrefetchBlockReader(void* arg1, void*) { + delete reinterpret_cast(arg1); +} + +Iterator* Table::NewIterator(const ReadOptions& options, const Slice& smallest, const Slice& largest) const { if (options.prefetch_scan) { - return new TableIter( - new PrefetchScanIterator(rep_->file, options, rep_->index_block->NewIterator(options.db_opt->comparator)), - options.db_opt->comparator, smallest, largest); + auto prefetch_block_reader = new PrefetchBlockReader(rep_->file, rep_->fsize); + auto iter = new TableIter( + NewTwoLevelIterator(new IndexBlockIter(options, rep_->index_block, rep_->filter), + *prefetch_block_reader, nullptr, options), + options.db_opt->comparator, smallest, largest); + iter->RegisterCleanup(&DeletePrefetchBlockReader, prefetch_block_reader, nullptr); + return iter; } else { return new TableIter( - NewTwoLevelIterator( - new IndexBlockIter(options, rep_->index_block, rep_->filter), - &Table::BlockReader, const_cast(this), options), - options.db_opt->comparator, smallest, largest); + NewTwoLevelIterator(new IndexBlockIter(options, rep_->index_block, rep_->filter), + Table::BlockReader, const_cast(this), options), + options.db_opt->comparator, smallest, largest); } } -Status Table::InternalGet(const ReadOptions& options, const Slice& k, - void* arg, +Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg, void (*saver)(void*, const Slice&, const Slice&)) { Status s; Iterator* iiter = rep_->index_block->NewIterator(options.db_opt->comparator); @@ -666,8 +659,7 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k, Slice handle_value = iiter->value(); FilterBlockReader* filter = rep_->filter; BlockHandle handle; - if (filter != NULL && - handle.DecodeFrom(&handle_value).ok() && + if (filter != NULL && handle.DecodeFrom(&handle_value).ok() && !filter->KeyMayMatch(handle.offset(), k)) { // Not found } else { @@ -691,10 +683,8 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k, return s; } - uint64_t Table::ApproximateOffsetOf(const Slice& key) const { - Iterator* index_iter = - rep_->index_block->NewIterator(rep_->options.comparator); + Iterator* index_iter = rep_->index_block->NewIterator(rep_->options.comparator); index_iter->Seek(key); uint64_t result; if (index_iter->Valid()) { @@ -719,8 +709,6 @@ uint64_t Table::ApproximateOffsetOf(const Slice& key) const { return result; } -uint64_t Table::IndexBlockSize() const { - return rep_->index_block->size(); -} +uint64_t Table::IndexBlockSize() const { return rep_->index_block->size(); } } // namespace leveldb diff --git a/src/leveldb/table/table_builder.cc b/src/leveldb/table/table_builder.cc index d602f8a37..383be2eae 100644 --- a/src/leveldb/table/table_builder.cc +++ b/src/leveldb/table/table_builder.cc @@ -6,19 +6,21 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "leveldb/table_builder.h" - #include + +#include "common/counter.h" +#include "format.h" +#include "leveldb/table_builder.h" #include "leveldb/comparator.h" #include "leveldb/env.h" #include "leveldb/filter_policy.h" #include "leveldb/options.h" +#include "leveldb/persistent_cache.h" +#include "persistent_cache/persistent_cache_file.h" #include "table/block_builder.h" #include "table/filter_block.h" -#include "table/format.h" #include "util/coding.h" #include "util/crc32c.h" -#include "../common/counter.h" namespace leveldb { @@ -36,7 +38,7 @@ struct TableBuilder::Rep { std::string last_key; int64_t num_entries; uint64_t saved_size; - bool closed; // Either Finish() or Abandon() has been called. + bool closed; // Either Finish() or Abandon() has been called. FilterBlockBuilder* filter_block; // We do not emit the index entry for a block until we have seen the @@ -52,6 +54,7 @@ struct TableBuilder::Rep { BlockHandle pending_handle; // Handle to add to index block std::string compressed_output; + WriteableCacheFile* cache_file; Rep(const Options& opt, WritableFile* f) : options(opt), @@ -63,14 +66,33 @@ struct TableBuilder::Rep { num_entries(0), saved_size(0), closed(false), - filter_block(opt.filter_policy == NULL ? NULL - : new FilterBlockBuilder(opt.filter_policy)), - pending_index_entry(false) { + filter_block(opt.filter_policy == NULL ? NULL : new FilterBlockBuilder(opt.filter_policy)), + pending_index_entry(false), + cache_file(nullptr) { index_block_options.block_restart_interval = 1; + if (options.persistent_cache) { + std::string file_name = file->GetFileName(); + Slice sub_filename{file_name}; + sub_filename.remove_specified_prefix(options.dfs_storage_path_prefix); + auto s = + options.persistent_cache->NewWriteableCacheFile(sub_filename.ToString(), &cache_file); + if (!s.ok()) { + LEVELDB_LOG("Create cache file failed: %s : %s\n", file->GetFileName().c_str(), + s.ToString().c_str()); + delete cache_file; + cache_file = nullptr; + } + } } ~Rep() { delete filter_block; + // cache_file should be close and set to nullptr in TableBuilder::Finish(). + // If not, it means we build table failed, so just abandon this cache_file. + if (cache_file) { + cache_file->Abandon(); + cache_file = nullptr; + } } }; @@ -80,7 +102,7 @@ TableBuilder::TableBuilder(const Options& options, WritableFile* file) rep_->filter_block->StartBlock(0); } if (rep_->options.table_builder_batch_write) { - assert(rep_->options.table_builder_batch_size > 0); + assert(rep_->options.table_builder_batch_size > 0); } } @@ -212,8 +234,7 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { r->saved_size += raw.size() - block_contents.size(); } -void TableBuilder::WriteRawBlock(const Slice& block_contents, - CompressionType type, +void TableBuilder::WriteRawBlock(const Slice& block_contents, CompressionType type, BlockHandle* handle) { Rep* r = rep_; handle->set_offset(r->offset); @@ -224,7 +245,7 @@ void TableBuilder::WriteRawBlock(const Slice& block_contents, trailer[0] = type; uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size()); crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type - EncodeFixed32(trailer+1, crc32c::Mask(crc)); + EncodeFixed32(trailer + 1, crc32c::Mask(crc)); AppendToFile(Slice(trailer, kBlockTrailerSize)); if (r->status.ok()) { r->offset += block_contents.size() + kBlockTrailerSize; @@ -232,9 +253,7 @@ void TableBuilder::WriteRawBlock(const Slice& block_contents, } } -Status TableBuilder::status() const { - return rep_->status; -} +Status TableBuilder::status() const { return rep_->status; } Status TableBuilder::Finish() { Rep* r = rep_; @@ -246,8 +265,7 @@ Status TableBuilder::Finish() { // Write filter block if (ok() && r->filter_block != NULL) { - WriteRawBlock(r->filter_block->Finish(), kNoCompression, - &filter_block_handle); + WriteRawBlock(r->filter_block->Finish(), kNoCompression, &filter_block_handle); } // Write metaindex block @@ -292,6 +310,19 @@ Status TableBuilder::Finish() { } FlushBatchBuffer(); r->status = r->file->Flush(); + + if (rep_->cache_file) { + if (ok()) { + std::string file_name = rep_->file->GetFileName(); + Slice key{file_name}; + key.remove_specified_prefix(rep_->options.dfs_storage_path_prefix); + rep_->cache_file->Close(key); + } else { + rep_->cache_file->Abandon(); + } + rep_->cache_file = nullptr; + } + return r->status; } @@ -301,17 +332,11 @@ void TableBuilder::Abandon() { r->closed = true; } -uint64_t TableBuilder::NumEntries() const { - return rep_->num_entries; -} +uint64_t TableBuilder::NumEntries() const { return rep_->num_entries; } -uint64_t TableBuilder::FileSize() const { - return rep_->offset; -} +uint64_t TableBuilder::FileSize() const { return rep_->offset; } -uint64_t TableBuilder::SavedSize() const { - return rep_->saved_size; -} +uint64_t TableBuilder::SavedSize() const { return rep_->saved_size; } void TableBuilder::FlushBatchBuffer() { if (batch_write_buffer_.empty()) { @@ -319,6 +344,7 @@ void TableBuilder::FlushBatchBuffer() { } Rep* r = rep_; r->status = r->file->Append(Slice(batch_write_buffer_)); + AppendToCacheFile(Slice(batch_write_buffer_)); batch_write_buffer_.clear(); } @@ -331,6 +357,20 @@ void TableBuilder::AppendToFile(const Slice& slice) { } } else { r->status = r->file->Append(slice); + AppendToCacheFile(slice); + } +} + +void TableBuilder::AppendToCacheFile(const Slice& slice) { + if (!rep_->cache_file) { + return; + } + auto s = rep_->cache_file->Append(slice); + if (!s.ok()) { + LEVELDB_LOG("Append to cache file failed: %s : %s\n", rep_->cache_file->Path().c_str(), + s.ToString().c_str()); + rep_->cache_file->Abandon(); + rep_->cache_file = nullptr; } } -} // namespace leveldb +} // leveldb diff --git a/src/leveldb/table/table_test.cc b/src/leveldb/table/table_test.cc index 8752ddd69..1b945c20a 100644 --- a/src/leveldb/table/table_test.cc +++ b/src/leveldb/table/table_test.cc @@ -10,8 +10,17 @@ #include #include +#include +#include +#include +#include +#include +#include + #include "db/dbformat.h" #include "db/memtable.h" +#include "db/memtable_on_leveldb.h" +#include "db/sharded_memtable.h" #include "db/write_batch_internal.h" #include "leveldb/db.h" #include "leveldb/env.h" @@ -19,10 +28,13 @@ #include "leveldb/table_builder.h" #include "table/block.h" #include "table/block_builder.h" -#include "table/format.h" +#include "format.h" #include "util/random.h" #include "util/testharness.h" #include "util/testutil.h" +#include "leveldb/persistent_cache.h" +#include "common/event.h" +#include "util/dfs_read_thread_limiter.h" namespace leveldb { @@ -31,8 +43,7 @@ namespace leveldb { static std::string Reverse(const Slice& key) { std::string str(key.ToString()); std::string rev(""); - for (std::string::reverse_iterator rit = str.rbegin(); - rit != str.rend(); ++rit) { + for (std::string::reverse_iterator rit = str.rbegin(); rit != str.rend(); ++rit) { rev.push_back(*rit); } return rev; @@ -41,17 +52,13 @@ static std::string Reverse(const Slice& key) { namespace { class ReverseKeyComparator : public Comparator { public: - virtual const char* Name() const { - return "leveldb.ReverseBytewiseComparator"; - } + virtual const char* Name() const { return "leveldb.ReverseBytewiseComparator"; } virtual int Compare(const Slice& a, const Slice& b) const { return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); } - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const { std::string s = Reverse(*start); std::string l = Reverse(limit); BytewiseComparator()->FindShortestSeparator(&s, l); @@ -83,17 +90,17 @@ namespace { struct STLLessThan { const Comparator* cmp; - STLLessThan() : cmp(BytewiseComparator()) { } - STLLessThan(const Comparator* c) : cmp(c) { } + STLLessThan() : cmp(BytewiseComparator()) {} + STLLessThan(const Comparator* c) : cmp(c) {} bool operator()(const std::string& a, const std::string& b) const { return cmp->Compare(Slice(a), Slice(b)) < 0; } }; } // namespace -class StringSink: public WritableFile { +class StringSink : public WritableFile { public: - ~StringSink() { } + ~StringSink() {} const std::string& contents() const { return contents_; } @@ -110,19 +117,15 @@ class StringSink: public WritableFile { std::string contents_; }; - -class StringSource: public RandomAccessFile { +class StringSource : public RandomAccessFile { public: - StringSource(const Slice& contents) - : contents_(contents.data(), contents.size()) { - } + StringSource(const Slice& contents) : contents_(contents.data(), contents.size()) {} - virtual ~StringSource() { } + virtual ~StringSource() {} uint64_t Size() const { return contents_.size(); } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { if (offset > contents_.size()) { return Status::InvalidArgument("invalid Read offset"); } @@ -144,24 +147,18 @@ typedef std::map KVMap; // BlockBuilder/TableBuilder and Block/Table. class Constructor { public: - explicit Constructor(const Comparator* cmp) : data_(STLLessThan(cmp)) { } - virtual ~Constructor() { } + explicit Constructor(const Comparator* cmp) : data_(STLLessThan(cmp)) {} + virtual ~Constructor() {} - void Add(const std::string& key, const Slice& value) { - data_[key] = value.ToString(); - } + void Add(const std::string& key, const Slice& value) { data_[key] = value.ToString(); } // Finish constructing the data structure with all the keys that have // been added so far. Returns the keys in sorted order in "*keys" // and stores the key/value pairs in "*kvmap" - void Finish(const Options& options, - std::vector* keys, - KVMap* kvmap) { + void Finish(const Options& options, std::vector* keys, KVMap* kvmap) { *kvmap = data_; keys->clear(); - for (KVMap::const_iterator it = data_.begin(); - it != data_.end(); - ++it) { + for (KVMap::const_iterator it = data_.begin(); it != data_.end(); ++it) { keys->push_back(it->first); } data_.clear(); @@ -182,23 +179,17 @@ class Constructor { KVMap data_; }; -class BlockConstructor: public Constructor { +class BlockConstructor : public Constructor { public: explicit BlockConstructor(const Comparator* cmp) - : Constructor(cmp), - comparator_(cmp), - block_(NULL) { } - ~BlockConstructor() { - delete block_; - } + : Constructor(cmp), comparator_(cmp), block_(NULL) {} + ~BlockConstructor() { delete block_; } virtual Status FinishImpl(const Options& options, const KVMap& data) { delete block_; block_ = NULL; BlockBuilder builder(&options); - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { builder.Add(it->first, it->second); } // Open the block @@ -210,9 +201,7 @@ class BlockConstructor: public Constructor { block_ = new Block(contents); return Status::OK(); } - virtual Iterator* NewIterator() const { - return block_->NewIterator(comparator_); - } + virtual Iterator* NewIterator() const { return block_->NewIterator(comparator_); } private: const Comparator* comparator_; @@ -222,27 +211,25 @@ class BlockConstructor: public Constructor { BlockConstructor(); }; -class TableConstructor: public Constructor { +template +class TableConstructor : public Constructor { public: TableConstructor(const Comparator* cmp) - : Constructor(cmp), - db_opt_(new Options()), - source_(NULL), table_(NULL) { - db_opt_->comparator = cmp; + : Constructor(cmp), db_opt_(new Options()), source_(NULL), table_(NULL) { + db_opt_->comparator = cmp; } ~TableConstructor() { Reset(); - delete db_opt_; // cannot delete db_opt_ in `Reset()', - db_opt_ = NULL; // not only ~Tableconstructor() but also FinishImpl() will call Reset() + delete db_opt_; // cannot delete db_opt_ in `Reset()', + db_opt_ = NULL; // not only ~Tableconstructor() but also FinishImpl() will + // call Reset() } virtual Status FinishImpl(const Options& options, const KVMap& data) { Reset(); StringSink sink; TableBuilder builder(options, &sink); - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { builder.Add(it->first, it->second); ASSERT_TRUE(builder.status().ok()); } @@ -259,14 +246,14 @@ class TableConstructor: public Constructor { } virtual Iterator* NewIterator() const { - return table_->NewIterator(ReadOptions(db_opt_)); + ReadOptions opt(db_opt_); + opt.prefetch_scan = enable_prefetch_scan; + return table_->NewIterator(opt); } - uint64_t ApproximateOffsetOf(const Slice& key) const { - return table_->ApproximateOffsetOf(key); - } + uint64_t ApproximateOffsetOf(const Slice& key) const { return table_->ApproximateOffsetOf(key); } - private: + protected: void Reset() { delete table_; delete source_; @@ -282,9 +269,9 @@ class TableConstructor: public Constructor { }; // A helper class that converts internal format keys into user keys -class KeyConvertingIterator: public Iterator { +class KeyConvertingIterator : public Iterator { public: - explicit KeyConvertingIterator(Iterator* iter) : iter_(iter) { } + explicit KeyConvertingIterator(Iterator* iter) : iter_(iter) {} virtual ~KeyConvertingIterator() { delete iter_; } virtual bool Valid() const { return iter_->Valid(); } virtual void Seek(const Slice& target) { @@ -309,9 +296,7 @@ class KeyConvertingIterator: public Iterator { } virtual Slice value() const { return iter_->value(); } - virtual Status status() const { - return status_.ok() ? iter_->status() : status_; - } + virtual Status status() const { return status_.ok() ? iter_->status() : status_; } private: mutable Status status_; @@ -322,25 +307,26 @@ class KeyConvertingIterator: public Iterator { void operator=(const KeyConvertingIterator&); }; -class MemTableConstructor: public Constructor { +template +static MemTable* NewMemTable(InitArgs... args) { + return new MemTableType(std::forward(args)...); +}; + +class MemTableConstructor : public Constructor { public: - explicit MemTableConstructor(const Comparator* cmp) - : Constructor(cmp), - internal_comparator_(cmp) { - memtable_ = new MemTable(internal_comparator_); + explicit MemTableConstructor(const Comparator* cmp, + std::function new_mem) + : Constructor(cmp), internal_comparator_(cmp), new_mem_(new_mem) { + memtable_ = new_mem_(internal_comparator_); memtable_->Ref(); } - ~MemTableConstructor() { - memtable_->Unref(); - } + ~MemTableConstructor() { memtable_->Unref(); } virtual Status FinishImpl(const Options& options, const KVMap& data) { memtable_->Unref(); - memtable_ = new MemTable(internal_comparator_); + memtable_ = new_mem_(internal_comparator_); memtable_->Ref(); int seq = 1; - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { memtable_->Add(seq, kTypeValue, it->first, it->second); seq++; } @@ -353,35 +339,28 @@ class MemTableConstructor: public Constructor { private: InternalKeyComparator internal_comparator_; MemTable* memtable_; + std::function new_mem_; }; -class DBConstructor: public Constructor { +class DBConstructor : public Constructor { public: - explicit DBConstructor(const Comparator* cmp) - : Constructor(cmp), - comparator_(cmp) { + explicit DBConstructor(const Comparator* cmp) : Constructor(cmp), comparator_(cmp) { db_ = NULL; NewDB(); } - ~DBConstructor() { - delete db_; - } + ~DBConstructor() { delete db_; } virtual Status FinishImpl(const Options& options, const KVMap& data) { delete db_; db_ = NULL; NewDB(); - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { WriteBatch batch; batch.Put(it->first, it->second); ASSERT_TRUE(db_->Write(WriteOptions(), &batch).ok()); } return Status::OK(); } - virtual Iterator* NewIterator() const { - return db_->NewIterator(ReadOptions()); - } + virtual Iterator* NewIterator() const { return db_->NewIterator(ReadOptions()); } virtual DB* db() const { return db_; } @@ -408,7 +387,11 @@ enum TestType { TABLE_TEST, BLOCK_TEST, MEMTABLE_TEST, - DB_TEST + DB_TEST, + MEMTABLE_ON_LEVELDB_TEST, + SHARD_MEMTABLE_TEST, + SHARD_MEMTABLE_ON_LEVELDB_TEST, + PREFETCHED_TABLE_TEST, }; struct TestArgs { @@ -418,33 +401,45 @@ struct TestArgs { }; static const TestArgs kTestArgList[] = { - { TABLE_TEST, false, 16 }, - { TABLE_TEST, false, 1 }, - { TABLE_TEST, false, 1024 }, - { TABLE_TEST, true, 16 }, - { TABLE_TEST, true, 1 }, - { TABLE_TEST, true, 1024 }, - - { BLOCK_TEST, false, 16 }, - { BLOCK_TEST, false, 1 }, - { BLOCK_TEST, false, 1024 }, - { BLOCK_TEST, true, 16 }, - { BLOCK_TEST, true, 1 }, - { BLOCK_TEST, true, 1024 }, - - // Restart interval does not matter for memtables - { MEMTABLE_TEST, false, 16 }, - { MEMTABLE_TEST, true, 16 }, - - // Do not bother with restart interval variations for DB - { DB_TEST, false, 16 }, - { DB_TEST, true, 16 }, + {TABLE_TEST, false, 16}, + {TABLE_TEST, false, 1}, + {TABLE_TEST, false, 1024}, + {TABLE_TEST, true, 16}, + {TABLE_TEST, true, 1}, + {TABLE_TEST, true, 1024}, + {PREFETCHED_TABLE_TEST, false, 16}, + {PREFETCHED_TABLE_TEST, false, 1}, + {PREFETCHED_TABLE_TEST, false, 1024}, + {PREFETCHED_TABLE_TEST, true, 16}, + {PREFETCHED_TABLE_TEST, true, 1}, + {PREFETCHED_TABLE_TEST, true, 1024}, + + {BLOCK_TEST, false, 16}, + {BLOCK_TEST, false, 1}, + {BLOCK_TEST, false, 1024}, + {BLOCK_TEST, true, 16}, + {BLOCK_TEST, true, 1}, + {BLOCK_TEST, true, 1024}, + + // Restart interval does not matter for memtables + {MEMTABLE_TEST, false, 16}, + {MEMTABLE_TEST, true, 16}, + {MEMTABLE_ON_LEVELDB_TEST, false, 16}, + {MEMTABLE_ON_LEVELDB_TEST, true, 16}, + {SHARD_MEMTABLE_TEST, false, 16}, + {SHARD_MEMTABLE_TEST, true, 16}, + {SHARD_MEMTABLE_ON_LEVELDB_TEST, false, 16}, + {SHARD_MEMTABLE_ON_LEVELDB_TEST, true, 16}, + + // Do not bother with restart interval variations for DB + {DB_TEST, false, 16}, + {DB_TEST, true, 16}, }; static const int kNumTestArgs = sizeof(kTestArgList) / sizeof(kTestArgList[0]); class Harness { public: - Harness() : constructor_(NULL) { } + Harness() : constructor_(NULL) {} void Init(const TestArgs& args) { delete constructor_; @@ -460,27 +455,51 @@ class Harness { } switch (args.type) { case TABLE_TEST: - constructor_ = new TableConstructor(options_.comparator); + constructor_ = new TableConstructor(options_.comparator); break; case BLOCK_TEST: constructor_ = new BlockConstructor(options_.comparator); break; case MEMTABLE_TEST: - constructor_ = new MemTableConstructor(options_.comparator); + constructor_ = new MemTableConstructor( + options_.comparator, + std::bind( + NewMemTable, + std::placeholders::_1, nullptr)); break; case DB_TEST: constructor_ = new DBConstructor(options_.comparator); break; + case MEMTABLE_ON_LEVELDB_TEST: + constructor_ = new MemTableConstructor( + options_.comparator, + std::bind(NewMemTable, + "MemTableTest", std::placeholders::_1, nullptr, 1024, 1024, nullptr)); + break; + case SHARD_MEMTABLE_TEST: + constructor_ = new MemTableConstructor( + options_.comparator, + std::bind(NewMemTable, + std::placeholders::_1, nullptr, 16)); + break; + case SHARD_MEMTABLE_ON_LEVELDB_TEST: + constructor_ = new MemTableConstructor( + options_.comparator, + std::bind(NewMemTable, + "MemTableTest", std::placeholders::_1, nullptr, 1024, 1024, nullptr, 16)); + break; + case PREFETCHED_TABLE_TEST: + constructor_ = new TableConstructor(options_.comparator); + break; } } - ~Harness() { - delete constructor_; - } + ~Harness() { delete constructor_; } - void Add(const std::string& key, const std::string& value) { - constructor_->Add(key, value); - } + void Add(const std::string& key, const std::string& value) { constructor_->Add(key, value); } void Test(Random* rnd) { std::vector keys; @@ -492,28 +511,24 @@ class Harness { TestRandomAccess(rnd, keys, data); } - void TestForwardScan(const std::vector& keys, - const KVMap& data) { + void TestForwardScan(const std::vector& keys, const KVMap& data) { Iterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToFirst(); - for (KVMap::const_iterator model_iter = data.begin(); - model_iter != data.end(); - ++model_iter) { - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + for (KVMap::const_iterator model_iter = data.begin(); model_iter != data.end(); ++model_iter) { + ASSERT_EQ(ToString(data, model_iter), ToString(iter)) << ToString(iter) + << ToString(data, model_iter); iter->Next(); } ASSERT_TRUE(!iter->Valid()); delete iter; } - void TestBackwardScan(const std::vector& keys, - const KVMap& data) { + void TestBackwardScan(const std::vector& keys, const KVMap& data) { Iterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToLast(); - for (KVMap::const_reverse_iterator model_iter = data.rbegin(); - model_iter != data.rend(); + for (KVMap::const_reverse_iterator model_iter = data.rbegin(); model_iter != data.rend(); ++model_iter) { ASSERT_EQ(ToString(data, model_iter), ToString(iter)); iter->Prev(); @@ -522,9 +537,7 @@ class Harness { delete iter; } - void TestRandomAccess(Random* rnd, - const std::vector& keys, - const KVMap& data) { + void TestRandomAccess(Random* rnd, const std::vector& keys, const KVMap& data) { static const bool kVerbose = false; Iterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); @@ -554,8 +567,7 @@ class Harness { case 2: { std::string key = PickRandomKey(rnd, keys); model_iter = data.lower_bound(key); - if (kVerbose) fprintf(stderr, "Seek '%s'\n", - EscapeString(key).c_str()); + if (kVerbose) fprintf(stderr, "Seek '%s'\n", EscapeString(key).c_str()); iter->Seek(Slice(key)); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; @@ -566,7 +578,7 @@ class Harness { if (kVerbose) fprintf(stderr, "Prev\n"); iter->Prev(); if (model_iter == data.begin()) { - model_iter = data.end(); // Wrap around to invalid value + model_iter = data.end(); // Wrap around to invalid value } else { --model_iter; } @@ -600,8 +612,7 @@ class Harness { } } - std::string ToString(const KVMap& data, - const KVMap::const_reverse_iterator& it) { + std::string ToString(const KVMap& data, const KVMap::const_reverse_iterator& it) { if (it == data.rend()) { return "END"; } else { @@ -629,8 +640,8 @@ class Harness { break; case 1: { // Attempt to return something smaller than an existing key - if (result.size() > 0 && result[result.size()-1] > '\0') { - result[result.size()-1]--; + if (result.size() > 0 && result[result.size() - 1] > '\0') { + result[result.size() - 1]--; } break; } @@ -725,11 +736,10 @@ TEST(Harness, Randomized) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); Random rnd(test::RandomSeed() + 5); - for (int num_entries = 0; num_entries < 2000; - num_entries += (num_entries < 50 ? 1 : 200)) { + for (int num_entries = 0; num_entries < 2000; num_entries += (num_entries < 50 ? 1 : 200)) { if ((num_entries % 10) == 0) { - fprintf(stderr, "case %d of %d: num_entries = %d\n", - (i + 1), int(kNumTestArgs), num_entries); + fprintf(stderr, "case %d of %d: num_entries = %d\n", (i + 1), int(kNumTestArgs), + num_entries); } for (int e = 0; e < num_entries; e++) { std::string v; @@ -743,7 +753,7 @@ TEST(Harness, Randomized) { TEST(Harness, RandomizedLongDB) { Random rnd(test::RandomSeed()); - TestArgs args = { DB_TEST, false, 16 }; + TestArgs args = {DB_TEST, false, 16}; Init(args); int num_entries = 100000; for (int e = 0; e < num_entries; e++) { @@ -765,11 +775,11 @@ TEST(Harness, RandomizedLongDB) { ASSERT_GT(files, 0); } -class MemTableTest { }; +class MemTableTest {}; TEST(MemTableTest, Simple) { InternalKeyComparator cmp(BytewiseComparator()); - MemTable* memtable = new MemTable(cmp); + MemTable* memtable = new BaseMemTable(cmp, nullptr); memtable->Ref(); WriteBatch batch; WriteBatchInternal::SetSequence(&batch, 100); @@ -782,8 +792,7 @@ TEST(MemTableTest, Simple) { Iterator* iter = memtable->NewIterator(); iter->SeekToFirst(); while (iter->Valid()) { - fprintf(stderr, "key: '%s' -> '%s'\n", - iter->key().ToString().c_str(), + fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(), iter->value().ToString().c_str()); iter->Next(); } @@ -792,21 +801,46 @@ TEST(MemTableTest, Simple) { memtable->Unref(); } +TEST(MemTableTest, ShardedMemTableTest) { + InternalKeyComparator cmp(BytewiseComparator()); + MemTable* memtable = new ShardedMemTable(cmp, nullptr, 16); + memtable->Ref(); + WriteBatch batch; + WriteBatchInternal::SetSequence(&batch, 100); + for (int i = 0; i != 1000; ++i) { + std::stringstream ss; + ss << std::setw(4) << std::setfill('0') << i; + batch.Put(ss.str(), ss.str()); + } + ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, memtable).ok()); + + Iterator* iter = memtable->NewIterator(); + iter->SeekToFirst(); + for (int i = 0; i != 1000; ++i) { + ASSERT_TRUE(iter->Valid()); + std::stringstream ss; + ss << std::setw(4) << std::setfill('0') << i; + ASSERT_EQ(ss.str(), iter->value().ToString()); + iter->Next(); + } + ASSERT_TRUE(!iter->Valid()); + memtable->Unref(); +} + static bool Between(uint64_t val, uint64_t low, uint64_t high) { bool result = (val >= low) && (val <= high); if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), - (unsigned long long)(low), - (unsigned long long)(high)); + fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", (unsigned long long)(val), + (unsigned long long)(low), (unsigned long long)(high)); } return result; } -class TableTest { }; +class TableTest {}; -TEST(TableTest, ApproximateOffsetOfPlain) { - TableConstructor c(BytewiseComparator()); +template +void ApproximateOffsetOfPlainImpl() { + TableConstructor c(BytewiseComparator()); c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", std::string(10000, 'x')); @@ -821,18 +855,22 @@ TEST(TableTest, ApproximateOffsetOfPlain) { options.compression = kNoCompression; c.Finish(options, &keys, &kvmap); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 210000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); +} +TEST(TableTest, ApproximateOffsetOfPlain) { + ApproximateOffsetOfPlainImpl(); + ApproximateOffsetOfPlainImpl(); } static bool SnappyCompressionSupported() { @@ -841,14 +879,15 @@ static bool SnappyCompressionSupported() { return port::Snappy_Compress(in.data(), in.size(), &out); } -TEST(TableTest, ApproximateOffsetOfCompressed) { +template +void ApproximateOffsetOfCompressed() { if (!SnappyCompressionSupported()) { fprintf(stderr, "skipping compression tests\n"); return; } Random rnd(301); - TableConstructor c(BytewiseComparator()); + TableConstructor c(BytewiseComparator()); std::string tmp; c.Add("k01", "hello"); c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); @@ -861,69 +900,202 @@ TEST(TableTest, ApproximateOffsetOfCompressed) { options.compression = kSnappyCompression; c.Finish(options, &keys, &kvmap); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); +} + +TEST(TableTest, ApproximateOffsetOfCompressed) { + ApproximateOffsetOfCompressed(); + ApproximateOffsetOfCompressed(); } class FormatTest {}; static void CheckAlign(RandomAccessFile* file, size_t alignment, uint64_t offset, size_t len) { - DirectIOArgs args; - char* buf = DirectIOAlign(file, offset, len, &args); - if (buf != NULL) { - free(buf); - } - ASSERT_TRUE(args.aligned_offset % alignment == 0); - ASSERT_TRUE(args.aligned_len % alignment == 0); - ASSERT_TRUE(args.aligned_offset >= 0 && args.aligned_offset <= offset); - ASSERT_TRUE(args.aligned_len >= 0 && args.aligned_len >= len); - ASSERT_TRUE(args.aligned_offset + args.aligned_len >= offset + len); + DirectIOArgs args; + char* buf = DirectIOAlign(file, offset, len, &args); + if (buf != NULL) { + free(buf); + } + ASSERT_TRUE(args.aligned_offset % alignment == 0); + ASSERT_TRUE(args.aligned_len % alignment == 0); + ASSERT_TRUE(args.aligned_offset >= 0 && args.aligned_offset <= offset); + ASSERT_TRUE(args.aligned_len >= 0 && args.aligned_len >= len); + ASSERT_TRUE(args.aligned_offset + args.aligned_len >= offset + len); } TEST(FormatTest, DirectIOAlign) { - WritableFile* write_file; - RandomAccessFile* file; - std::string filename = "/tmp/direct_io_align"; - ASSERT_OK(Env::Default()->NewWritableFile(filename, &write_file, EnvOptions())); - ASSERT_OK(write_file->Append("test")); - ASSERT_OK(write_file->Close()); - ASSERT_OK(Env::Default()->NewRandomAccessFile(filename, &file, EnvOptions())); - size_t alignment = file->GetRequiredBufferAlignment(); - - uint64_t offset = 0; - size_t len = 0; - uint64_t offset_before = 1; - size_t len_before = 1; - for (int i = 0; i < 10; ++i) { - for (int j = 0; j < 10; ++j) { - offset = alignment * j; - len = alignment * i; - // same to align - CheckAlign(file, alignment, offset, len); - // offset = align * j + 1 - // len = align * i + 1 - CheckAlign(file, alignment, offset + offset_before, len + len_before); - // offset = align * j - 1 && offset >= 0 - // len = align * i - 1 && len >= 0 - uint64_t tmp_offset = offset == 0 ? 0 : offset - offset_before; - size_t tmp_len = len == 0 ? 0 : len - len_before; - CheckAlign(file, alignment, tmp_offset, tmp_len); - // offset = align * j + 1 - // len = align * i - 1 && len >= 0 - CheckAlign(file, alignment, offset + offset_before, tmp_len); - // offset = align * j - 1 && offset >= 0 - // len = align * i + 1 - CheckAlign(file, alignment, tmp_offset, len + len_before); - } + WritableFile* write_file; + RandomAccessFile* file; + std::string filename = "/tmp/direct_io_align"; + ASSERT_OK(Env::Default()->NewWritableFile(filename, &write_file, EnvOptions())); + ASSERT_OK(write_file->Append("test")); + ASSERT_OK(write_file->Close()); + ASSERT_OK(Env::Default()->NewRandomAccessFile(filename, &file, EnvOptions())); + size_t alignment = file->GetRequiredBufferAlignment(); + + uint64_t offset = 0; + size_t len = 0; + uint64_t offset_before = 1; + size_t len_before = 1; + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < 10; ++j) { + offset = alignment * j; + len = alignment * i; + // same to align + CheckAlign(file, alignment, offset, len); + // offset = align * j + 1 + // len = align * i + 1 + CheckAlign(file, alignment, offset + offset_before, len + len_before); + // offset = align * j - 1 && offset >= 0 + // len = align * i - 1 && len >= 0 + uint64_t tmp_offset = offset == 0 ? 0 : offset - offset_before; + size_t tmp_len = len == 0 ? 0 : len - len_before; + CheckAlign(file, alignment, tmp_offset, tmp_len); + // offset = align * j + 1 + // len = align * i - 1 && len >= 0 + CheckAlign(file, alignment, offset + offset_before, tmp_len); + // offset = align * j - 1 && offset >= 0 + // len = align * i + 1 + CheckAlign(file, alignment, tmp_offset, len + len_before); } + } } -} // namespace leveldb +class MockRandomAccessFile : public RandomAccessFile { + public: + MockRandomAccessFile(std::atomic* r) : reader{r} {}; + + ~MockRandomAccessFile() override = default; + + Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + ++*reader; + event_.Wait(); + return Status::NotFound("Mock random access file"); + } + + std::string GetFileName() const override { return "abc"; } + + void Set() { event_.Set(); } + + private: + mutable common::AutoResetEvent event_; + std::atomic* reader; +}; + +class MockPersistentCache : public PersistentCache { + public: + ~MockPersistentCache() override = default; + + Status Read(const Slice& key, size_t offset, size_t length, Slice* content, + SstDataScratch* scratch) override { + return Status::NotFound("not-found"); + } + + void ForceEvict(const Slice& key) override { return; } -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); + Status NewWriteableCacheFile(const std::string& path, WriteableCacheFile** pFile) override { + return Status::InvalidArgument("Mock Persistent Cache"); + } + + size_t GetCapacity() const override { return 0; } + + size_t GetUsage() const override { return 0; } + + Status Open() override { return Status::InvalidArgument("Mock Persistent Cache"); } + + std::vector GetAllKeys() override { return {}; } + + void GarbageCollect() override { return; } +}; + +class DfsLimiterTest {}; +static void ReadFileWithCheck(RandomAccessFile* file, ReadOptions* opt, + std::function checker) { + // handle is not important + BlockHandle handle; + handle.set_offset(1000); + handle.set_size(100); + BlockContents bc; + auto s = ReadBlock(file, *opt, handle, &bc); + fprintf(stderr, "status %s\n", s.ToString().c_str()); + ASSERT_TRUE(checker(&s)); } +TEST(DfsLimiterTest, RejectTest) { + for (auto limit_val : {0, 10, 50}) { + leveldb::DfsReadThreadLimiter::Instance().SetLimit(limit_val); + Options opt; + opt.persistent_cache.reset(new MockPersistentCache); + ReadOptions read_opt; + read_opt.db_opt = &opt; + read_opt.enable_dfs_read_thread_limiter = true; + std::vector> files; + std::atomic reader{0}; + for (auto i = 0; i != limit_val + 20; ++i) { + files.emplace_back(new MockRandomAccessFile{&reader}); + } + std::vector threads; + std::atomic reject_count{0}; + for (auto i = 0; i != limit_val; ++i) { + threads.emplace_back(ReadFileWithCheck, (RandomAccessFile*)files[i].get(), &read_opt, + &Status::IsNotFound); + } + while (reader.load() != limit_val) { + fprintf(stderr, "Waiting for reader == %d, current %d.\n", limit_val, reader.load()); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + for (auto i = 0; i != 20; ++i) { + threads.emplace_back(ReadFileWithCheck, (RandomAccessFile*)files[i].get(), &read_opt, + [&reject_count](Status* s) { + if (s->IsReject()) { + ++reject_count; + return true; + } else { + return false; + } + }); + } + while (reject_count.load() != 20) { + fprintf(stderr, "Waiting for reject_count == %d, current %d.\n", 20, reject_count.load()); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + for (auto& file : files) { + file->Set(); + } + for (auto& thread : threads) { + thread.join(); + } + } +} + +TEST(TableTest, SeekToKeyGapTest) { + StringSink sink; + Options options; + TableBuilder builder(options, &sink); + builder.Add("ab", "abb"); + builder.Flush(); + builder.Add("ad", "add"); + Status s = builder.Finish(); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(sink.contents().size(), builder.FileSize()); + // Open the table + auto source = new StringSource(sink.contents()); + Table* table; + s = Table::Open(options, source, sink.contents().size(), &table); + ASSERT_TRUE(s.ok()) << s.ToString(); + ReadOptions r_options(&options); + r_options.prefetch_scan = true; + auto iter = table->NewIterator(r_options); + iter->Seek("abb"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().ToString(), "ad"); + ASSERT_EQ(iter->value().ToString(), "add"); + delete iter; +} +} // namespace leveldb + +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/table/two_level_iterator.cc b/src/leveldb/table/two_level_iterator.cc index bb8e6249b..da7225e6b 100644 --- a/src/leveldb/table/two_level_iterator.cc +++ b/src/leveldb/table/two_level_iterator.cc @@ -6,26 +6,23 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "table/two_level_iterator.h" - +#include +#include "format.h" #include "leveldb/table.h" #include "table/block.h" -#include "table/format.h" #include "table/iterator_wrapper.h" +#include "table/two_level_iterator.h" namespace leveldb { namespace { -typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, const Slice&); +using BlockFunction = std::function; -class TwoLevelIterator: public Iterator { +class TwoLevelIterator : public Iterator { public: - TwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options); + TwoLevelIterator(Iterator* index_iter, const BlockFunction& block_function, void* arg, + const ReadOptions& options); virtual ~TwoLevelIterator(); @@ -35,9 +32,7 @@ class TwoLevelIterator: public Iterator { virtual void Next(); virtual void Prev(); - virtual bool Valid() const { - return data_iter_.Valid(); - } + virtual bool Valid() const { return data_iter_.Valid(); } virtual Slice key() const { assert(Valid()); return data_iter_.key(); @@ -71,26 +66,21 @@ class TwoLevelIterator: public Iterator { const ReadOptions options_; Status status_; IteratorWrapper index_iter_; - IteratorWrapper data_iter_; // May be NULL + IteratorWrapper data_iter_; // May be NULL // If data_iter_ is non-NULL, then "data_block_handle_" holds the // "index_value" passed to block_function_ to create the data_iter_. std::string data_block_handle_; }; -TwoLevelIterator::TwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options) +TwoLevelIterator::TwoLevelIterator(Iterator* index_iter, const BlockFunction& block_function, + void* arg, const ReadOptions& options) : block_function_(block_function), arg_(arg), options_(options), index_iter_(index_iter), - data_iter_(NULL) { -} + data_iter_(NULL) {} -TwoLevelIterator::~TwoLevelIterator() { -} +TwoLevelIterator::~TwoLevelIterator() {} void TwoLevelIterator::Seek(const Slice& target) { index_iter_.Seek(target); @@ -125,9 +115,8 @@ void TwoLevelIterator::Prev() { SkipEmptyDataBlocksBackward(); } - void TwoLevelIterator::SkipEmptyDataBlocksForward() { - while (data_iter_.iter() == NULL || !data_iter_.Valid()) { + while (data_iter_.iter() == NULL || (!data_iter_.Valid() && data_iter_.status().ok())) { // Move to next block if (!index_iter_.Valid()) { SetDataIterator(NULL); @@ -140,7 +129,7 @@ void TwoLevelIterator::SkipEmptyDataBlocksForward() { } void TwoLevelIterator::SkipEmptyDataBlocksBackward() { - while (data_iter_.iter() == NULL || !data_iter_.Valid()) { + while (data_iter_.iter() == NULL || (!data_iter_.Valid() && data_iter_.status().ok())) { // Move to next block if (!index_iter_.Valid()) { SetDataIterator(NULL); @@ -166,20 +155,20 @@ void TwoLevelIterator::InitDataBlock() { // data_iter_ is already constructed with this iterator, so // no need to change anything } else { - Iterator* iter = (*block_function_)(arg_, options_, handle); + Iterator* iter = block_function_(arg_, options_, handle); data_block_handle_.assign(handle.data(), handle.size()); SetDataIterator(iter); + if (Valid()) { + assert(status().ok()); + } } } } } // namespace -Iterator* NewTwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options) { +Iterator* NewTwoLevelIterator(Iterator* index_iter, const BlockFunction& block_function, void* arg, + const ReadOptions& options) { return new TwoLevelIterator(index_iter, block_function, arg, options); } diff --git a/src/leveldb/table/two_level_iterator.h b/src/leveldb/table/two_level_iterator.h index 6e396cae2..afe637b01 100644 --- a/src/leveldb/table/two_level_iterator.h +++ b/src/leveldb/table/two_level_iterator.h @@ -9,6 +9,7 @@ #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ +#include #include "leveldb/iterator.h" namespace leveldb { @@ -22,16 +23,17 @@ struct ReadOptions; // in the sequence of blocks. Takes ownership of "index_iter" and // will delete it when no longer needed. // -// Uses a supplied function to convert an index_iter value into +// block_function is a supplied function to convert an index_iter value into // an iterator over the contents of the corresponding block. +// Arguments of block_function: +// void* : user specified argument. +// const ReadOptions& : read options for read operates. +// const Slice&: index value passed to block_function. Usually, it's a block handle data of an sst +// file. extern Iterator* NewTwoLevelIterator( Iterator* index_iter, - Iterator* (*block_function)( - void* arg, - const ReadOptions& options, - const Slice& index_value), - void* arg, - const ReadOptions& options); + const std::function& block_function, + void* arg, const ReadOptions& options); } // namespace leveldb diff --git a/src/leveldb/util/arena.cc b/src/leveldb/util/arena.cc index 73ae99e0f..b667b5ee0 100644 --- a/src/leveldb/util/arena.cc +++ b/src/leveldb/util/arena.cc @@ -44,9 +44,9 @@ char* Arena::AllocateFallback(size_t bytes) { } char* Arena::AllocateAligned(size_t bytes) { - const int align = sizeof(void*); // We'll align to pointer size - assert((align & (align-1)) == 0); // Pointer size should be a power of 2 - size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); + const int align = sizeof(void*); // We'll align to pointer size + assert((align & (align - 1)) == 0); // Pointer size should be a power of 2 + size_t current_mod = reinterpret_cast(alloc_ptr_) & (align - 1); size_t slop = (current_mod == 0 ? 0 : align - current_mod); size_t needed = bytes + slop; char* result; @@ -58,7 +58,7 @@ char* Arena::AllocateAligned(size_t bytes) { // AllocateFallback always returned aligned memory result = AllocateFallback(bytes); } - assert((reinterpret_cast(result) & (align-1)) == 0); + assert((reinterpret_cast(result) & (align - 1)) == 0); return result; } diff --git a/src/leveldb/util/arena.h b/src/leveldb/util/arena.h index 6f067d13e..edae7fec9 100644 --- a/src/leveldb/util/arena.h +++ b/src/leveldb/util/arena.h @@ -30,9 +30,7 @@ class Arena { // Returns an estimate of the total memory usage of data allocated // by the arena (including space allocated but not yet used for user // allocations). - size_t MemoryUsage() const { - return blocks_memory_ + blocks_.capacity() * sizeof(char*); - } + size_t MemoryUsage() const { return blocks_memory_ + blocks_.capacity() * sizeof(char*); } private: char* AllocateFallback(size_t bytes); diff --git a/src/leveldb/util/arena_test.cc b/src/leveldb/util/arena_test.cc index 583409fc8..7a5bac091 100644 --- a/src/leveldb/util/arena_test.cc +++ b/src/leveldb/util/arena_test.cc @@ -13,11 +13,9 @@ namespace leveldb { -class ArenaTest { }; +class ArenaTest {}; -TEST(ArenaTest, Empty) { - Arena arena; -} +TEST(ArenaTest, Empty) { Arena arena; } TEST(ArenaTest, Simple) { std::vector > allocated; @@ -30,8 +28,8 @@ TEST(ArenaTest, Simple) { if (i % (N / 10) == 0) { s = i; } else { - s = rnd.OneIn(4000) ? rnd.Uniform(6000) : - (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); + s = rnd.OneIn(4000) ? rnd.Uniform(6000) + : (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); } if (s == 0) { // Our arena disallows size 0 allocations. @@ -51,7 +49,7 @@ TEST(ArenaTest, Simple) { bytes += s; allocated.push_back(std::make_pair(s, r)); ASSERT_GE(arena.MemoryUsage(), bytes); - if (i > N/10) { + if (i > N / 10) { ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); } } @@ -67,6 +65,4 @@ TEST(ArenaTest, Simple) { } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/bloom.cc b/src/leveldb/util/bloom.cc index 46bc022ee..92807fa76 100644 --- a/src/leveldb/util/bloom.cc +++ b/src/leveldb/util/bloom.cc @@ -34,17 +34,14 @@ class BloomFilterPolicy : public FilterPolicy { public: explicit BloomFilterPolicy(int bits_per_key, BloomHashMethod hash_method) - : bits_per_key_(bits_per_key), - hash_method_(hash_method) { + : bits_per_key_(bits_per_key), hash_method_(hash_method) { // We intentionally round down to reduce probing cost a little bit k_ = static_cast(bits_per_key * 0.69); // 0.69 =~ ln(2) if (k_ < 1) k_ = 1; if (k_ > 30) k_ = 30; } - virtual const char* Name() const { - return "leveldb.BuiltinBloomFilter"; - } + virtual const char* Name() const { return "leveldb.BuiltinBloomFilter"; } virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { // Compute bloom filter size (in both bits and bytes) @@ -68,7 +65,7 @@ class BloomFilterPolicy : public FilterPolicy { const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits for (size_t j = 0; j < k_; j++) { const uint32_t bitpos = h % bits; - array[bitpos/8] |= (1 << (bitpos % 8)); + array[bitpos / 8] |= (1 << (bitpos % 8)); h += delta; } } @@ -83,7 +80,7 @@ class BloomFilterPolicy : public FilterPolicy { // Use the encoded k so that we can read filters generated by // bloom filters created using different parameters. - const size_t k = array[len-1]; + const size_t k = array[len - 1]; if (k > 30) { // Reserved for potentially new encodings for short bloom filters. // Consider it a match. @@ -94,7 +91,7 @@ class BloomFilterPolicy : public FilterPolicy { const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits for (size_t j = 0; j < k; j++) { const uint32_t bitpos = h % bits; - if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; + if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false; h += delta; } return true; @@ -108,13 +105,9 @@ class RowKeyBloomFilterPolicy : public BloomFilterPolicy { public: explicit RowKeyBloomFilterPolicy(int bits_per_key, BloomHashMethod hash_method, const RawKeyOperator* raw_key_operator) - : BloomFilterPolicy(bits_per_key, hash_method), - raw_key_operator_(raw_key_operator) { - } + : BloomFilterPolicy(bits_per_key, hash_method), raw_key_operator_(raw_key_operator) {} - virtual const char* Name() const { - return "tera.RowKeyBloomFilter"; - } + virtual const char* Name() const { return "tera.RowKeyBloomFilter"; } virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { Slice* row_keys = new Slice[n]; @@ -140,7 +133,6 @@ class RowKeyBloomFilterPolicy : public BloomFilterPolicy { } } }; - } const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { @@ -151,7 +143,8 @@ const FilterPolicy* NewTTLKvBloomFilterPolicy(int bits_per_key) { return new BloomFilterPolicy(bits_per_key, TTLKvBloomHash); } -const FilterPolicy* NewRowKeyBloomFilterPolicy(int bits_per_key, const RawKeyOperator* raw_key_operator) { +const FilterPolicy* NewRowKeyBloomFilterPolicy(int bits_per_key, + const RawKeyOperator* raw_key_operator) { return new RowKeyBloomFilterPolicy(bits_per_key, BuiltInBloomHash, raw_key_operator); } diff --git a/src/leveldb/util/bloom_test.cc b/src/leveldb/util/bloom_test.cc index 4bbc7e1d1..c09587b25 100644 --- a/src/leveldb/util/bloom_test.cc +++ b/src/leveldb/util/bloom_test.cc @@ -29,20 +29,16 @@ class BloomTest { std::vector keys_; public: - BloomTest() : policy_(NewBloomFilterPolicy(10)) { } + BloomTest() : policy_(NewBloomFilterPolicy(10)) {} - ~BloomTest() { - delete policy_; - } + ~BloomTest() { delete policy_; } void Reset() { keys_.clear(); filter_.clear(); } - void Add(const Slice& s) { - keys_.push_back(s.ToString()); - } + void Add(const Slice& s) { keys_.push_back(s.ToString()); } void Build() { std::vector key_slices; @@ -55,16 +51,14 @@ class BloomTest { if (kVerbose >= 2) DumpFilter(); } - size_t FilterSize() const { - return filter_.size(); - } + size_t FilterSize() const { return filter_.size(); } void DumpFilter() { fprintf(stderr, "F("); - for (size_t i = 0; i+1 < filter_.size(); i++) { + for (size_t i = 0; i + 1 < filter_.size(); i++) { const unsigned int c = static_cast(filter_[i]); for (int j = 0; j < 8; j++) { - fprintf(stderr, "%c", (c & (1 <= 1) { - fprintf(stderr, "False positives: %5.2f%% @ length = %6zd ; bytes = %6zd\n", - rate*100.0, length, FilterSize()); + fprintf(stderr, "False positives: %5.2f%% @ length = %6zd ; bytes = %6zd\n", rate * 100.0, + length, FilterSize()); } - ASSERT_LE(rate, 0.02); // Must not be over 2% - if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often - else good_filters++; + ASSERT_LE(rate, 0.02); // Must not be over 2% + if (rate > 0.0125) + mediocre_filters++; // Allowed, but not too often + else + good_filters++; } if (kVerbose >= 1) { - fprintf(stderr, "Filters: %d good, %d mediocre\n", - good_filters, mediocre_filters); + fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters, mediocre_filters); } - ASSERT_LE(mediocre_filters, good_filters/5); + ASSERT_LE(mediocre_filters, good_filters / 5); } // Different bits-per-byte } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/cache.cc b/src/leveldb/util/cache.cc index c5209b580..eaeb717f3 100644 --- a/src/leveldb/util/cache.cc +++ b/src/leveldb/util/cache.cc @@ -18,8 +18,7 @@ namespace leveldb { -Cache::~Cache() { -} +Cache::~Cache() {} namespace { @@ -35,9 +34,7 @@ class HandleTable { HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); } ~HandleTable() { delete[] list_; } - LRUHandle* Lookup(const Slice& key, uint32_t hash) { - return *FindPointer(key, hash); - } + LRUHandle* Lookup(const Slice& key, uint32_t hash) { return *FindPointer(key, hash); } LRUHandle* Insert(LRUHandle* h) { LRUHandle** ptr = FindPointer(h->key(), h->hash); @@ -77,8 +74,7 @@ class HandleTable { // pointer to the trailing slot in the corresponding linked list. LRUHandle** FindPointer(const Slice& key, uint32_t hash) { LRUHandle** ptr = &list_[hash & (length_ - 1)]; - while (*ptr != NULL && - ((*ptr)->hash != hash || key != (*ptr)->key())) { + while (*ptr != NULL && ((*ptr)->hash != hash || key != (*ptr)->key())) { ptr = &(*ptr)->next_hash; } return ptr; @@ -89,7 +85,7 @@ class HandleTable { while (new_length < elems_) { new_length *= 2; } - LRUHandle** new_list = new LRUHandle*[new_length]; + LRUHandle** new_list = new LRUHandle* [new_length]; memset(new_list, 0, sizeof(new_list[0]) * new_length); uint32_t count = 0; for (uint32_t i = 0; i < length_; i++) { @@ -121,8 +117,7 @@ class LRUCache { void SetCapacity(size_t capacity) { capacity_ = capacity; } // Like Cache methods, but with an extra "hash" parameter. - Cache::Handle* Insert(const Slice& key, uint32_t hash, - void* value, size_t charge, + Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value)); Cache::Handle* Lookup(const Slice& key, uint32_t hash); void Release(Cache::Handle* handle); @@ -150,17 +145,14 @@ class LRUCache { HandleTable table_; }; -LRUCache::LRUCache() - : capacity_(0), - usage_(0), - entries_(0) { +LRUCache::LRUCache() : capacity_(0), usage_(0), entries_(0) { // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; } LRUCache::~LRUCache() { - for (LRUHandle* e = lru_.next; e != &lru_; ) { + for (LRUHandle* e = lru_.next; e != &lru_;) { LRUHandle* next = e->next; assert(e->refs == 1); // Error if caller has an unreleased handle Unref(e); @@ -208,13 +200,11 @@ void LRUCache::Release(Cache::Handle* handle) { Unref(reinterpret_cast(handle)); } -Cache::Handle* LRUCache::Insert( - const Slice& key, uint32_t hash, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) { +Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value)) { MutexLock l(&mutex_); - LRUHandle* e = reinterpret_cast( - malloc(sizeof(LRUHandle)-1 + key.size())); + LRUHandle* e = reinterpret_cast(malloc(sizeof(LRUHandle) - 1 + key.size())); e->value = value; e->deleter = deleter; e->charge = charge; @@ -262,13 +252,10 @@ size_t LRUCache::TotalCharge() { } // data_set's lru cache is used for ssd's block cache, which limits cache size. -class LRUBlockBasedCache: public Cache { +class LRUBlockBasedCache : public Cache { public: - explicit LRUBlockBasedCache(size_t capacity) - : capacity_(capacity), - usage_(0), - max_cache_id_(0) { - // Make empty circular linked list + explicit LRUBlockBasedCache(size_t capacity) : capacity_(capacity), usage_(0), max_cache_id_(0) { + // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; } @@ -282,22 +269,21 @@ class LRUBlockBasedCache: public Cache { const uint32_t hash = HashSlice(key); MutexLock l(&mutex_); LRUHandle* e = NULL; - //e = (LRUHandle*)DoLookup(key, hash); - //if (e != NULL) { + // e = (LRUHandle*)DoLookup(key, hash); + // if (e != NULL) { // assert(0); // return reinterpret_cast(e); //} - if (usage_ < capacity_) { // cache not full - e = reinterpret_cast( - malloc(sizeof(LRUHandle)-1 + key.size())); + if (usage_ < capacity_) { // cache not full + e = reinterpret_cast(malloc(sizeof(LRUHandle) - 1 + key.size())); e->value = value; e->deleter = deleter; e->charge = 1; e->key_length = key.size(); e->hash = hash; e->refs = 2; // One from LRUCache, one for the returned handle - e->cache_id = cache_id == 0xffffffffffffffff ? usage_: cache_id; + e->cache_id = cache_id == 0xffffffffffffffff ? usage_ : cache_id; memcpy(e->key_data, key.data(), key.size()); max_cache_id_ = max_cache_id_ < e->cache_id ? e->cache_id : max_cache_id_; @@ -308,7 +294,8 @@ class LRUBlockBasedCache: public Cache { } assert(max_cache_id_ + 1 == usage_); assert(usage_ == capacity_); - //fprintf(stderr, "%lu, usage %lu, capacity %lu\n", (uint64_t)this, usage_, capacity_); + // fprintf(stderr, "%lu, usage %lu, capacity %lu\n", (uint64_t)this, usage_, + // capacity_); // cache full, reuse item LRUHandle* old = lru_.next; @@ -317,8 +304,7 @@ class LRUBlockBasedCache: public Cache { old = old->next; continue; } - e = reinterpret_cast( - malloc(sizeof(LRUHandle)-1 + key.size())); + e = reinterpret_cast(malloc(sizeof(LRUHandle) - 1 + key.size())); e->value = value; e->deleter = deleter; e->charge = 1; @@ -361,17 +347,11 @@ class LRUBlockBasedCache: public Cache { Unref(reinterpret_cast(handle)); } - void* Value(Cache::Handle* handle) { - return reinterpret_cast(handle)->value; - } + void* Value(Cache::Handle* handle) { return reinterpret_cast(handle)->value; } - uint64_t NewId() { - return 0; - } + uint64_t NewId() { return 0; } - double HitRate(bool force_clear = false) { - return 99.9999; - } + double HitRate(bool force_clear = false) { return 99.9999; } size_t Entries() { MutexLock l(&mutex_); @@ -387,9 +367,9 @@ class LRUBlockBasedCache: public Cache { Cache::Handle* DoLookup(const Slice& key, uint32_t hash) { LRUHandle* e = table_.Lookup(key, hash); if (e != NULL) { - e->refs++; - LRU_Remove(e); - LRU_Append(e); + e->refs++; + LRU_Remove(e); + LRU_Append(e); } return reinterpret_cast(e); } @@ -417,9 +397,7 @@ class LRUBlockBasedCache: public Cache { } } - inline uint32_t HashSlice(const Slice& s) { - return Hash(s.data(), s.size(), 0); - } + inline uint32_t HashSlice(const Slice& s) { return Hash(s.data(), s.size(), 0); } // Initialized before use. size_t capacity_; @@ -431,8 +409,8 @@ class LRUBlockBasedCache: public Cache { // Dummy head of LRU list. // lru.prev is newest entry, lru.next is oldest entry. - //LRUHandle hot_lru_; - //LRUHandle cold_lru_; + // LRUHandle hot_lru_; + // LRUHandle cold_lru_; LRUHandle lru_; HandleTable table_; @@ -449,25 +427,18 @@ class ShardedLRUCache : public Cache { uint64_t hits_; uint64_t lookups_; - static inline uint32_t HashSlice(const Slice& s) { - return Hash(s.data(), s.size(), 0); - } + static inline uint32_t HashSlice(const Slice& s) { return Hash(s.data(), s.size(), 0); } - static uint32_t Shard(uint32_t hash) { - return hash >> (32 - kNumShardBits); - } + static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); } public: - explicit ShardedLRUCache(size_t capacity) - : last_id_(0), - hits_(0), - lookups_(0) { + explicit ShardedLRUCache(size_t capacity) : last_id_(0), hits_(0), lookups_(0) { const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards; for (int s = 0; s < kNumShards; s++) { shard_[s].SetCapacity(per_shard); } } - virtual ~ShardedLRUCache() { } + virtual ~ShardedLRUCache() {} virtual Handle* Insert(const Slice& key, void* value, size_t charge, void (*deleter)(const Slice& key, void* value)) { const uint32_t hash = HashSlice(key); @@ -491,9 +462,7 @@ class ShardedLRUCache : public Cache { const uint32_t hash = HashSlice(key); shard_[Shard(hash)].Erase(key, hash); } - virtual void* Value(Handle* handle) { - return reinterpret_cast(handle)->value; - } + virtual void* Value(Handle* handle) { return reinterpret_cast(handle)->value; } virtual uint64_t NewId() { MutexLock l(&id_mutex_); return ++(last_id_); @@ -530,12 +499,8 @@ class ShardedLRUCache : public Cache { } // end anonymous namespace -Cache* NewLRUCache(size_t capacity) { - return new ShardedLRUCache(capacity); -} +Cache* NewLRUCache(size_t capacity) { return new ShardedLRUCache(capacity); } -Cache* NewBlockBasedCache(size_t capacity) { - return new LRUBlockBasedCache(capacity); -} +Cache* NewBlockBasedCache(size_t capacity) { return new LRUBlockBasedCache(capacity); } } // namespace leveldb diff --git a/src/leveldb/util/cache_test.cc b/src/leveldb/util/cache_test.cc index 0c9c8ad65..e902c98c1 100644 --- a/src/leveldb/util/cache_test.cc +++ b/src/leveldb/util/cache_test.cc @@ -41,13 +41,9 @@ class CacheTest { std::vector deleted_values_; Cache* cache_; - CacheTest() : cache_(NewLRUCache(kCacheSize)) { - current_ = this; - } + CacheTest() : cache_(NewLRUCache(kCacheSize)) { current_ = this; } - ~CacheTest() { - delete cache_; - } + ~CacheTest() { delete cache_; } int Lookup(int key) { Cache::Handle* handle = cache_->Lookup(EncodeKey(key)); @@ -59,13 +55,11 @@ class CacheTest { } void Insert(int key, int value, int charge = 1) { - cache_->Release(cache_->Insert(EncodeKey(key), EncodeValue(value), charge, - &CacheTest::Deleter)); + cache_->Release( + cache_->Insert(EncodeKey(key), EncodeValue(value), charge, &CacheTest::Deleter)); } - void Erase(int key) { - cache_->Erase(EncodeKey(key)); - } + void Erase(int key) { cache_->Erase(EncodeKey(key)); } }; CacheTest* CacheTest::current_; @@ -74,18 +68,18 @@ TEST(CacheTest, HitAndMiss) { Insert(100, 101); ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(-1, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); + ASSERT_EQ(-1, Lookup(200)); + ASSERT_EQ(-1, Lookup(300)); Insert(200, 201); ASSERT_EQ(101, Lookup(100)); ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); + ASSERT_EQ(-1, Lookup(300)); Insert(100, 102); ASSERT_EQ(102, Lookup(100)); ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); + ASSERT_EQ(-1, Lookup(300)); ASSERT_EQ(1u, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); @@ -99,14 +93,14 @@ TEST(CacheTest, Erase) { Insert(100, 101); Insert(200, 201); Erase(100); - ASSERT_EQ(-1, Lookup(100)); + ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(1u, deleted_keys_.size()); ASSERT_EQ(100, deleted_keys_[0]); ASSERT_EQ(101, deleted_values_[0]); Erase(100); - ASSERT_EQ(-1, Lookup(100)); + ASSERT_EQ(-1, Lookup(100)); ASSERT_EQ(201, Lookup(200)); ASSERT_EQ(1u, deleted_keys_.size()); } @@ -142,8 +136,8 @@ TEST(CacheTest, EvictionPolicy) { // Frequently used entry must be kept around for (int i = 0; i < kCacheSize + 100; i++) { - Insert(1000+i, 2000+i); - ASSERT_EQ(2000+i, Lookup(1000+i)); + Insert(1000 + i, 2000 + i); + ASSERT_EQ(2000 + i, Lookup(1000 + i)); ASSERT_EQ(101, Lookup(100)); } ASSERT_EQ(101, Lookup(100)); @@ -158,9 +152,9 @@ TEST(CacheTest, HeavyEntries) { const int kHeavy = 10; int added = 0; int index = 0; - while (added < 2*kCacheSize) { + while (added < 2 * kCacheSize) { const int weight = (index & 1) ? kLight : kHeavy; - Insert(index, 1000+index, weight); + Insert(index, 1000 + index, weight); added += weight; index++; } @@ -171,10 +165,10 @@ TEST(CacheTest, HeavyEntries) { int r = Lookup(i); if (r >= 0) { cached_weight += weight; - ASSERT_EQ(1000+i, r); + ASSERT_EQ(1000 + i, r); } } - ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10); + ASSERT_LE(cached_weight, kCacheSize + kCacheSize / 10); } TEST(CacheTest, NewId) { @@ -197,13 +191,9 @@ class BlockBasedCacheTest { std::vector deleted_values_; Cache* cache_; - BlockBasedCacheTest() : cache_(NewBlockBasedCache(kCacheSize)) { - current_ = this; - } + BlockBasedCacheTest() : cache_(NewBlockBasedCache(kCacheSize)) { current_ = this; } - ~BlockBasedCacheTest() { - delete cache_; - } + ~BlockBasedCacheTest() { delete cache_; } int Lookup(int key) { Cache::Handle* handle = cache_->Lookup(EncodeKey(key)); @@ -218,13 +208,11 @@ class BlockBasedCacheTest { Cache::Handle* handle = cache_->Insert(EncodeKey(key), EncodeValue(value), 0xffffffffffffffff, &BlockBasedCacheTest::Deleter); if (force_release) { - cache_->Release(handle); + cache_->Release(handle); } } - void Erase(int key) { - cache_->Erase(EncodeKey(key)); - } + void Erase(int key) { cache_->Erase(EncodeKey(key)); } }; BlockBasedCacheTest* BlockBasedCacheTest::current_; @@ -234,8 +222,8 @@ TEST(BlockBasedCacheTest, CommonEvictionPolicy) { // Frequently used entry must be kept around for (int i = 0; i < kCacheSize + 100; i++) { - Insert(1000+i, 2000+i); - ASSERT_EQ(2000+i, Lookup(1000+i)); + Insert(1000 + i, 2000 + i); + ASSERT_EQ(2000 + i, Lookup(1000 + i)); ASSERT_EQ(101, Lookup(100)); } ASSERT_EQ(101, Lookup(100)); @@ -262,6 +250,4 @@ TEST(BlockBasedCacheTest, SpecialEvictionPolicy) { } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/coding.cc b/src/leveldb/util/coding.cc index 353309538..8eeb164bf 100644 --- a/src/leveldb/util/coding.cc +++ b/src/leveldb/util/coding.cc @@ -52,26 +52,26 @@ char* EncodeVarint32(char* dst, uint32_t v) { // Operate on characters as unsigneds unsigned char* ptr = reinterpret_cast(dst); static const int B = 128; - if (v < (1<<7)) { + if (v < (1 << 7)) { *(ptr++) = v; - } else if (v < (1<<14)) { + } else if (v < (1 << 14)) { *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { + *(ptr++) = v >> 7; + } else if (v < (1 << 21)) { *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { + *(ptr++) = (v >> 7) | B; + *(ptr++) = v >> 14; + } else if (v < (1 << 28)) { *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = v >> 21; } else { *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = (v >> 21) | B; + *(ptr++) = v >> 28; } return reinterpret_cast(ptr); } @@ -86,7 +86,7 @@ char* EncodeVarint64(char* dst, uint64_t v) { static const uint32_t B = 128; unsigned char* ptr = reinterpret_cast(dst); while (v >= B) { - *(ptr++) = (v & (B-1)) | B; + *(ptr++) = (v & (B - 1)) | B; v >>= 7; } *(ptr++) = static_cast(v); @@ -113,9 +113,7 @@ int VarintLength(uint64_t v) { return len; } -const char* GetVarint32PtrFallback(const char* p, - const char* limit, - uint32_t* value) { +const char* GetVarint32PtrFallback(const char* p, const char* limit, uint32_t* value) { uint32_t result = 0; for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { uint32_t byte = *(reinterpret_cast(p)); @@ -173,8 +171,7 @@ bool GetVarint64(Slice* input, uint64_t* value) { } } -const char* GetLengthPrefixedSlice(const char* p, const char* limit, - Slice* result) { +const char* GetLengthPrefixedSlice(const char* p, const char* limit, Slice* result) { uint32_t len; p = GetVarint32Ptr(p, limit, &len); if (p == NULL) return NULL; @@ -185,8 +182,7 @@ const char* GetLengthPrefixedSlice(const char* p, const char* limit, bool GetLengthPrefixedSlice(Slice* input, Slice* result) { uint32_t len; - if (GetVarint32(input, &len) && - input->size() >= len) { + if (GetVarint32(input, &len) && input->size() >= len) { *result = Slice(input->data(), len); input->remove_prefix(len); return true; diff --git a/src/leveldb/util/coding.h b/src/leveldb/util/coding.h index 9420f9f21..c49622d0e 100644 --- a/src/leveldb/util/coding.h +++ b/src/leveldb/util/coding.h @@ -39,8 +39,8 @@ extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); // in *v and return a pointer just past the parsed value, or return // NULL on error. These routines only look at bytes in the range // [p..limit-1] -extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v); -extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v); +extern const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v); +extern const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v); // Returns the length of the varint32 or varint64 encoding of "v" extern int VarintLength(uint64_t v); @@ -66,10 +66,10 @@ inline uint32_t DecodeFixed32(const char* ptr) { memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load return result; } else { - return ((static_cast(static_cast(ptr[0]))) - | (static_cast(static_cast(ptr[1])) << 8) - | (static_cast(static_cast(ptr[2])) << 16) - | (static_cast(static_cast(ptr[3])) << 24)); + return ((static_cast(static_cast(ptr[0]))) | + (static_cast(static_cast(ptr[1])) << 8) | + (static_cast(static_cast(ptr[2])) << 16) | + (static_cast(static_cast(ptr[3])) << 24)); } } @@ -87,12 +87,8 @@ inline uint64_t DecodeFixed64(const char* ptr) { } // Internal routine for use by fallback path of GetVarint32Ptr -extern const char* GetVarint32PtrFallback(const char* p, - const char* limit, - uint32_t* value); -inline const char* GetVarint32Ptr(const char* p, - const char* limit, - uint32_t* value) { +extern const char* GetVarint32PtrFallback(const char* p, const char* limit, uint32_t* value); +inline const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* value) { if (p < limit) { uint32_t result = *(reinterpret_cast(p)); if ((result & 128) == 0) { @@ -104,34 +100,34 @@ inline const char* GetVarint32Ptr(const char* p, } inline void EncodeBigEndian32(char* buf, uint32_t value) { - buf[0] = (value >> 24) & 0xff; - buf[1] = (value >> 16) & 0xff; - buf[2] = (value >> 8) & 0xff; - buf[3] = value & 0xff; + buf[0] = (value >> 24) & 0xff; + buf[1] = (value >> 16) & 0xff; + buf[2] = (value >> 8) & 0xff; + buf[3] = value & 0xff; } inline uint32_t DecodeBigEndain32(const char* ptr) { - return ((static_cast(static_cast(ptr[3]))) - | (static_cast(static_cast(ptr[2])) << 8) - | (static_cast(static_cast(ptr[1])) << 16) - | (static_cast(static_cast(ptr[0])) << 24)); + return ((static_cast(static_cast(ptr[3]))) | + (static_cast(static_cast(ptr[2])) << 8) | + (static_cast(static_cast(ptr[1])) << 16) | + (static_cast(static_cast(ptr[0])) << 24)); } inline void EncodeBigEndian(char* buf, uint64_t value) { - buf[0] = (value >> 56) & 0xff; - buf[1] = (value >> 48) & 0xff; - buf[2] = (value >> 40) & 0xff; - buf[3] = (value >> 32) & 0xff; - buf[4] = (value >> 24) & 0xff; - buf[5] = (value >> 16) & 0xff; - buf[6] = (value >> 8) & 0xff; - buf[7] = value & 0xff; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; } inline uint64_t DecodeBigEndain(const char* ptr) { - uint64_t lo = DecodeBigEndain32(ptr + 4); - uint64_t hi = DecodeBigEndain32(ptr); - return (hi << 32) | lo; + uint64_t lo = DecodeBigEndain32(ptr + 4); + uint64_t hi = DecodeBigEndain32(ptr); + return (hi << 32) | lo; } } // namespace leveldb diff --git a/src/leveldb/util/coding_test.cc b/src/leveldb/util/coding_test.cc index 17848377b..5358c6f74 100644 --- a/src/leveldb/util/coding_test.cc +++ b/src/leveldb/util/coding_test.cc @@ -13,7 +13,7 @@ namespace leveldb { -class Coding { }; +class Coding {}; TEST(Coding, Fixed32) { std::string s; @@ -43,15 +43,15 @@ TEST(Coding, Fixed64) { uint64_t v = static_cast(1) << power; uint64_t actual; actual = DecodeFixed64(p); - ASSERT_EQ(v-1, actual); + ASSERT_EQ(v - 1, actual); p += sizeof(uint64_t); actual = DecodeFixed64(p); - ASSERT_EQ(v+0, actual); + ASSERT_EQ(v + 0, actual); p += sizeof(uint64_t); actual = DecodeFixed64(p); - ASSERT_EQ(v+1, actual); + ASSERT_EQ(v + 1, actual); p += sizeof(uint64_t); } } @@ -112,8 +112,8 @@ TEST(Coding, Varint64) { // Test values near powers of two const uint64_t power = 1ull << k; values.push_back(power); - values.push_back(power-1); - values.push_back(power+1); + values.push_back(power - 1); + values.push_back(power + 1); } std::string s; @@ -133,14 +133,12 @@ TEST(Coding, Varint64) { ASSERT_EQ(VarintLength(actual), p - start); } ASSERT_EQ(p, limit); - } TEST(Coding, Varint32Overflow) { uint32_t result; std::string input("\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), &result) - == NULL); + ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), &result) == NULL); } TEST(Coding, Varint32Truncation) { @@ -158,8 +156,7 @@ TEST(Coding, Varint32Truncation) { TEST(Coding, Varint64Overflow) { uint64_t result; std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), &result) - == NULL); + ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), &result) == NULL); } TEST(Coding, Varint64Truncation) { @@ -195,43 +192,41 @@ TEST(Coding, Strings) { } TEST(Coding, PutLG) { - std::string a_input("dfsfsfsfsfs"); - Slice b_slice = a_input; - uint32_t a_id = 3; - PutFixed32LGId(&a_input, a_id); + std::string a_input("dfsfsfsfsfs"); + Slice b_slice = a_input; + uint32_t a_id = 3; + PutFixed32LGId(&a_input, a_id); - uint32_t b_id = 9999999; - Slice a_slice = a_input; - ASSERT_TRUE(GetFixed32LGId(&a_slice, &b_id)); + uint32_t b_id = 9999999; + Slice a_slice = a_input; + ASSERT_TRUE(GetFixed32LGId(&a_slice, &b_id)); - ASSERT_EQ(a_id, b_id); - ASSERT_EQ(a_slice.ToString(), b_slice.ToString()); + ASSERT_EQ(a_id, b_id); + ASSERT_EQ(a_slice.ToString(), b_slice.ToString()); } TEST(Coding, PutLG_ugly) { - Slice a_slice("dfsfsfsfsfs"); - Slice b_slice = a_slice; - uint32_t a_id = 3; - uint32_t b_id = 3; - ASSERT_TRUE(false == GetFixed32LGId(&a_slice, &b_id)); - - ASSERT_EQ(a_id, b_id); - ASSERT_EQ(a_slice.ToString(), b_slice.ToString()); + Slice a_slice("dfsfsfsfsfs"); + Slice b_slice = a_slice; + uint32_t a_id = 3; + uint32_t b_id = 3; + ASSERT_TRUE(false == GetFixed32LGId(&a_slice, &b_id)); + + ASSERT_EQ(a_id, b_id); + ASSERT_EQ(a_slice.ToString(), b_slice.ToString()); } TEST(Coding, PutFixed64Cmp) { - std::string sa, sb; - PutFixed64(&sa, 100); - PutFixed64(&sb, 50); - ASSERT_TRUE(sa > sb); - uint64_t a = DecodeFixed64(sa.c_str()); - uint64_t b = DecodeFixed64(sb.c_str()); - ASSERT_TRUE(a == 100); - ASSERT_TRUE(b == 50); + std::string sa, sb; + PutFixed64(&sa, 100); + PutFixed64(&sb, 50); + ASSERT_TRUE(sa > sb); + uint64_t a = DecodeFixed64(sa.c_str()); + uint64_t b = DecodeFixed64(sb.c_str()); + ASSERT_TRUE(a == 100); + ASSERT_TRUE(b == 50); } } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/comparator.cc b/src/leveldb/util/comparator.cc index ab2aa1db6..480c8f2e3 100644 --- a/src/leveldb/util/comparator.cc +++ b/src/leveldb/util/comparator.cc @@ -16,17 +16,14 @@ namespace leveldb { -Comparator::~Comparator() { } +Comparator::~Comparator() {} namespace { class RowKeyComparator : public Comparator { public: - RowKeyComparator(const RawKeyOperator* key_operator) - : key_operator_(key_operator) {} + RowKeyComparator(const RawKeyOperator* key_operator) : key_operator_(key_operator) {} - virtual const char* Name() const { - return "leveldb.RowKeyComparator"; - } + virtual const char* Name() const { return "leveldb.RowKeyComparator"; } virtual int Compare(const Slice& a, const Slice& b) const { Slice a_key, a_col, a_qual; @@ -37,23 +34,17 @@ class RowKeyComparator : public Comparator { leveldb::TeraKeyType b_type; if (!key_operator_->ExtractTeraKey(a, &a_key, &a_col, &a_qual, &a_ts, &a_type)) { - return key_operator_->Compare(a, b); + return key_operator_->Compare(a, b); } if (!key_operator_->ExtractTeraKey(b, &b_key, &b_col, &b_qual, &b_ts, &b_type)) { - return key_operator_->Compare(a, b); + return key_operator_->Compare(a, b); } return a_key.compare(b_key); } - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { - return; - } + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const { return; } - virtual void FindShortSuccessor(std::string* key) const { - return; - } + virtual void FindShortSuccessor(std::string* key) const { return; } private: const RawKeyOperator* key_operator_; @@ -61,24 +52,17 @@ class RowKeyComparator : public Comparator { class BytewiseComparatorImpl : public Comparator { public: - BytewiseComparatorImpl() { } + BytewiseComparatorImpl() {} - virtual const char* Name() const { - return "leveldb.BytewiseComparator"; - } + virtual const char* Name() const { return "leveldb.BytewiseComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - return a.compare(b); - } + virtual int Compare(const Slice& a, const Slice& b) const { return a.compare(b); } - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const { // Find length of common prefix size_t min_length = std::min(start->size(), limit.size()); size_t diff_index = 0; - while ((diff_index < min_length) && - ((*start)[diff_index] == limit[diff_index])) { + while ((diff_index < min_length) && ((*start)[diff_index] == limit[diff_index])) { diff_index++; } @@ -102,7 +86,7 @@ class BytewiseComparatorImpl : public Comparator { const uint8_t byte = (*key)[i]; if (byte != static_cast(0xff)) { (*key)[i] = byte + 1; - key->resize(i+1); + key->resize(i + 1); return; } } @@ -112,24 +96,18 @@ class BytewiseComparatorImpl : public Comparator { class TeraBinaryComparatorImpl : public Comparator { public: - TeraBinaryComparatorImpl() : key_operator_(BinaryRawKeyOperator()){ } + TeraBinaryComparatorImpl() : key_operator_(BinaryRawKeyOperator()) {} - virtual const char* Name() const { - return "tera.TeraBinaryComparator"; - } + virtual const char* Name() const { return "tera.TeraBinaryComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - return key_operator_->Compare(a, b); - } + virtual int Compare(const Slice& a, const Slice& b) const { return key_operator_->Compare(a, b); } - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { - // TODO: this may waste storage space + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const { + // TODO: this may waste storage space } virtual void FindShortSuccessor(std::string* key) const { - // TODO: this may waste storage space + // TODO: this may waste storage space } private: @@ -137,36 +115,29 @@ class TeraBinaryComparatorImpl : public Comparator { }; class TeraTTLKvComparatorImpl : public Comparator { -public: - TeraTTLKvComparatorImpl() : - key_operator_(KvRawKeyOperator()) { - } + public: + TeraTTLKvComparatorImpl() : key_operator_(KvRawKeyOperator()) {} - virtual const char* Name() const { - return "tera.TeraTTLKvComparator"; - } + virtual const char* Name() const { return "tera.TeraTTLKvComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - Slice row_key_a, row_key_b; - int64_t timestamp_a, timestamp_b; - key_operator_->ExtractTeraKey(a, &row_key_a, NULL, NULL, ×tamp_a, - NULL); - key_operator_->ExtractTeraKey(b, &row_key_b, NULL, NULL, ×tamp_b, - NULL); - return row_key_a.compare(row_key_b); - } + virtual int Compare(const Slice& a, const Slice& b) const { + Slice row_key_a, row_key_b; + int64_t timestamp_a, timestamp_b; + key_operator_->ExtractTeraKey(a, &row_key_a, NULL, NULL, ×tamp_a, NULL); + key_operator_->ExtractTeraKey(b, &row_key_b, NULL, NULL, ×tamp_b, NULL); + return row_key_a.compare(row_key_b); + } - virtual void FindShortestSeparator(std::string* start, - const Slice& limit) const { - // TODO: this may waste storage space - } + virtual void FindShortestSeparator(std::string* start, const Slice& limit) const { + // TODO: this may waste storage space + } - virtual void FindShortSuccessor(std::string* key) const { - // TODO: this may waste storage space - } + virtual void FindShortSuccessor(std::string* key) const { + // TODO: this may waste storage space + } -private: - const RawKeyOperator* key_operator_; + private: + const RawKeyOperator* key_operator_; }; } // namespace @@ -176,28 +147,28 @@ static const Comparator* terabinary; static const Comparator* terakv; static void InitModule() { - bytewise = new BytewiseComparatorImpl; - terabinary = new TeraBinaryComparatorImpl; - terakv = new TeraTTLKvComparatorImpl; + bytewise = new BytewiseComparatorImpl; + terabinary = new TeraBinaryComparatorImpl; + terakv = new TeraTTLKvComparatorImpl; } const Comparator* BytewiseComparator() { - port::InitOnce(&once, InitModule); - return bytewise; + port::InitOnce(&once, InitModule); + return bytewise; } const Comparator* TeraBinaryComparator() { - port::InitOnce(&once, InitModule); - return terabinary; + port::InitOnce(&once, InitModule); + return terabinary; } const Comparator* TeraTTLKvComparator() { - port::InitOnce(&once, InitModule); - return terakv; + port::InitOnce(&once, InitModule); + return terakv; } Comparator* NewRowKeyComparator(const RawKeyOperator* key_operator) { - Comparator* cmp = new RowKeyComparator(key_operator); - return cmp; + Comparator* cmp = new RowKeyComparator(key_operator); + return cmp; } } // namespace leveldb diff --git a/src/leveldb/util/crc32c.cc b/src/leveldb/util/crc32c.cc index fed64351b..ddfb9fe02 100644 --- a/src/leveldb/util/crc32c.cc +++ b/src/leveldb/util/crc32c.cc @@ -18,297 +18,165 @@ namespace leveldb { namespace crc32c { static const uint32_t table0_[256] = { - 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, - 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, - 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, - 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, - 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, - 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, - 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, - 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, - 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, - 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, - 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, - 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, - 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, - 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, - 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, - 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, - 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, - 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, - 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, - 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, - 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, - 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, - 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, - 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, - 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, - 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, - 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, - 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, - 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, - 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, - 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, - 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, - 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, - 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, - 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, - 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, - 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, - 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, - 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, - 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, - 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, - 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, - 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, - 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, - 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, - 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, - 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, - 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, - 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, - 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, - 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, - 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, - 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, - 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, - 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, - 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, - 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, - 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, - 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, - 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, - 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, - 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, - 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, - 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 -}; + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351}; static const uint32_t table1_[256] = { - 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, - 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, - 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, - 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, - 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, - 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, - 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, - 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, - 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, - 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, - 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, - 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, - 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, - 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, - 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, - 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, - 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, - 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, - 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, - 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, - 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, - 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, - 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, - 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, - 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, - 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, - 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, - 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, - 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, - 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, - 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, - 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, - 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, - 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, - 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, - 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, - 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, - 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, - 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, - 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, - 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, - 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, - 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, - 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, - 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, - 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, - 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, - 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, - 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, - 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, - 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, - 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, - 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, - 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, - 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, - 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, - 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, - 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, - 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, - 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, - 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, - 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, - 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, - 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 -}; + 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, + 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, + 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, + 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, + 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, + 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, + 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, + 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, + 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, + 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, + 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, + 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, + 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, + 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, + 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, + 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, + 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, + 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, + 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, + 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, + 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, + 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, + 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, + 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, + 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, + 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, + 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, + 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, + 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, + 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, + 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, + 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483}; static const uint32_t table2_[256] = { - 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, - 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, - 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, - 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, - 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, - 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, - 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, - 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, - 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, - 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, - 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, - 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, - 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, - 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, - 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, - 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, - 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, - 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, - 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, - 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, - 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, - 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, - 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, - 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, - 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, - 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, - 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, - 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, - 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, - 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, - 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, - 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, - 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, - 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, - 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, - 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, - 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, - 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, - 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, - 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, - 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, - 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, - 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, - 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, - 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, - 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, - 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, - 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, - 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, - 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, - 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, - 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, - 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, - 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, - 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, - 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, - 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, - 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, - 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, - 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, - 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, - 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, - 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, - 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 -}; + 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, + 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, + 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, + 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, + 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, + 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, + 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, + 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, + 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, + 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, + 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, + 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, + 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, + 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, + 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, + 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, + 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, + 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, + 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, + 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, + 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, + 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, + 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, + 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, + 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, + 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, + 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, + 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, + 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, + 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, + 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, + 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8}; static const uint32_t table3_[256] = { - 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, - 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, - 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, - 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, - 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, - 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, - 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, - 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, - 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, - 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, - 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, - 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, - 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, - 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, - 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, - 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, - 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, - 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, - 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, - 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, - 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, - 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, - 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, - 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, - 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, - 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, - 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, - 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, - 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, - 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, - 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, - 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, - 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, - 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, - 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, - 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, - 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, - 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, - 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, - 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, - 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, - 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, - 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, - 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, - 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, - 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, - 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, - 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, - 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, - 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, - 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, - 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, - 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, - 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, - 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, - 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, - 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, - 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, - 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, - 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, - 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, - 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, - 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, - 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 -}; + 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, + 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, + 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, + 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, + 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, + 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, + 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, + 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, + 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, + 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, + 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, + 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, + 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, + 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, + 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, + 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, + 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, + 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, + 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, + 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, + 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, + 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, + 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, + 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, + 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, + 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, + 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, + 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, + 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, + 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, + 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, + 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842}; // Used to fetch a naturally-aligned 32-bit word in little endian byte-order static inline uint32_t LE_LOAD32(const uint8_t *p) { - return DecodeFixed32(reinterpret_cast(p)); + return DecodeFixed32(reinterpret_cast(p)); } -uint32_t Extend(uint32_t crc, const char* buf, size_t size) { +uint32_t Extend(uint32_t crc, const char *buf, size_t size) { const uint8_t *p = reinterpret_cast(buf); const uint8_t *e = p + size; uint32_t l = crc ^ 0xffffffffu; -#define STEP1 do { \ - int c = (l & 0xff) ^ *p++; \ - l = table0_[c] ^ (l >> 8); \ -} while (0) -#define STEP4 do { \ - uint32_t c = l ^ LE_LOAD32(p); \ - p += 4; \ - l = table3_[c & 0xff] ^ \ - table2_[(c >> 8) & 0xff] ^ \ - table1_[(c >> 16) & 0xff] ^ \ - table0_[c >> 24]; \ -} while (0) +#define STEP1 \ + do { \ + int c = (l & 0xff) ^ *p++; \ + l = table0_[c] ^ (l >> 8); \ + } while (0) +#define STEP4 \ + do { \ + uint32_t c = l ^ LE_LOAD32(p); \ + p += 4; \ + l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^ table1_[(c >> 16) & 0xff] ^ \ + table0_[c >> 24]; \ + } while (0) // Point x at first 4-byte aligned byte in string. This might be // just past the end of the string. const uintptr_t pval = reinterpret_cast(p); - const uint8_t* x = reinterpret_cast(((pval + 3) >> 2) << 2); + const uint8_t *x = reinterpret_cast(((pval + 3) >> 2) << 2); if (x <= e) { // Process bytes until finished or p is 4-byte aligned while (p != x) { @@ -316,11 +184,14 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) { } } // Process bytes 16 at a time - while ((e-p) >= 16) { - STEP4; STEP4; STEP4; STEP4; + while ((e - p) >= 16) { + STEP4; + STEP4; + STEP4; + STEP4; } // Process bytes 4 at a time - while ((e-p) >= 4) { + while ((e - p) >= 4) { STEP4; } // Process the last few bytes diff --git a/src/leveldb/util/crc32c.h b/src/leveldb/util/crc32c.h index 180310477..7ffc0d4dc 100644 --- a/src/leveldb/util/crc32c.h +++ b/src/leveldb/util/crc32c.h @@ -21,9 +21,7 @@ namespace crc32c { extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); // Return the crc32c of data[0,n-1] -inline uint32_t Value(const char* data, size_t n) { - return Extend(0, data, n); -} +inline uint32_t Value(const char* data, size_t n) { return Extend(0, data, n); } static const uint32_t kMaskDelta = 0xa282ead8ul; diff --git a/src/leveldb/util/crc32c_test.cc b/src/leveldb/util/crc32c_test.cc index 473e59dc6..ae745223f 100644 --- a/src/leveldb/util/crc32c_test.cc +++ b/src/leveldb/util/crc32c_test.cc @@ -12,7 +12,7 @@ namespace leveldb { namespace crc32c { -class CRC { }; +class CRC {}; TEST(CRC, StandardResults) { // From rfc3720 section B.4. @@ -35,30 +35,17 @@ TEST(CRC, StandardResults) { ASSERT_EQ(0x113fdb5cu, Value(buf, sizeof(buf))); unsigned char data[48] = { - 0x01, 0xc0, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x00, 0x14, - 0x00, 0x00, 0x00, 0x18, - 0x28, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, + 0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; ASSERT_EQ(0xd9963a56u, Value(reinterpret_cast(data), sizeof(data))); } -TEST(CRC, Values) { - ASSERT_NE(Value("a", 1), Value("foo", 3)); -} +TEST(CRC, Values) { ASSERT_NE(Value("a", 1), Value("foo", 3)); } -TEST(CRC, Extend) { - ASSERT_EQ(Value("hello world", 11), - Extend(Value("hello ", 6), "world", 5)); -} +TEST(CRC, Extend) { ASSERT_EQ(Value("hello world", 11), Extend(Value("hello ", 6), "world", 5)); } TEST(CRC, Mask) { uint32_t crc = Value("foo", 3); @@ -71,6 +58,4 @@ TEST(CRC, Mask) { } // namespace crc32c } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/dfs.cc b/src/leveldb/util/dfs.cc index 281e20574..9c2c76ec0 100644 --- a/src/leveldb/util/dfs.cc +++ b/src/leveldb/util/dfs.cc @@ -15,25 +15,23 @@ namespace leveldb { static void* handle = NULL; Dfs* Dfs::NewDfs(const std::string& so_path, const std::string& conf) { - dlerror(); - fprintf(stderr, "Open %s\n", so_path.c_str()); - handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); - const char* err = dlerror(); - if (handle == NULL) { - fprintf(stderr, "Open %s fail: %s\n", so_path.c_str(), err); - return NULL; - } - - - DfsCreator creator = (DfsCreator)dlsym(handle, "NewDfs"); - err = dlerror(); - if (err != NULL) { - fprintf(stderr, "Load NewDfs from %s fail: %s\n", so_path.c_str(), err); - return NULL; - } - return (*creator)(conf.c_str()); + dlerror(); + fprintf(stderr, "Open %s\n", so_path.c_str()); + handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND); + const char* err = dlerror(); + if (handle == NULL) { + fprintf(stderr, "Open %s fail: %s\n", so_path.c_str(), err); + return NULL; + } + + DfsCreator creator = (DfsCreator)dlsym(handle, "NewDfs"); + err = dlerror(); + if (err != NULL) { + fprintf(stderr, "Load NewDfs from %s fail: %s\n", so_path.c_str(), err); + return NULL; + } + return (*creator)(conf.c_str()); } - } /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/leveldb/util/dfs_read_thread_limiter.h b/src/leveldb/util/dfs_read_thread_limiter.h new file mode 100644 index 000000000..be6817ba8 --- /dev/null +++ b/src/leveldb/util/dfs_read_thread_limiter.h @@ -0,0 +1,59 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: tianye15@baidu.com + +#pragma once +#include "common/semaphore.h" +#include "common/rwmutex.h" + +namespace leveldb { + +class DfsReadThreadLimiter { + public: + class Token { + public: + explicit Token(const std::shared_ptr &limiter) : limiter_(limiter) {} + Token(const Token &) = delete; + Token(const Token &&) = delete; + void operator=(const Token &) = delete; + + ~Token() { limiter_->Release(); } + + private: + std::shared_ptr limiter_; + }; + + DfsReadThreadLimiter(const DfsReadThreadLimiter &) = delete; + DfsReadThreadLimiter(const DfsReadThreadLimiter &&) = delete; + void operator=(const DfsReadThreadLimiter &) = delete; + + void SetLimit(int64_t val) { + decltype(limiter_) new_limiter{new common::Semaphore{val}}; + common::WriteLock _(&lock_); + swap(limiter_, new_limiter); + } + + std::unique_ptr GetToken() { + common::ReadLock _(&lock_); + if (limiter_->TryAcquire()) { + return std::unique_ptr{new Token{limiter_}}; + } + return nullptr; + } + + inline static DfsReadThreadLimiter &Instance(); + + private: + DfsReadThreadLimiter() = default; + + std::shared_ptr limiter_; + common::RWMutex lock_; +}; + +DfsReadThreadLimiter &DfsReadThreadLimiter::Instance() { + static DfsReadThreadLimiter instance; + return instance; +} +} diff --git a/src/leveldb/util/env.cc b/src/leveldb/util/env.cc index 49ff4e761..f018e731c 100644 --- a/src/leveldb/util/env.cc +++ b/src/leveldb/util/env.cc @@ -11,60 +11,49 @@ namespace leveldb { EnvOptions::EnvOptions(const Options& options) { - use_direct_io_read = options.use_direct_io_read; - use_direct_io_write = options.use_direct_io_write; - posix_write_buffer_size = options.posix_write_buffer_size; + use_direct_io_read = options.use_direct_io_read; + use_direct_io_write = options.use_direct_io_write; + posix_write_buffer_size = options.posix_write_buffer_size; } -Env::~Env() { -} +Env::~Env() {} -SequentialFile::~SequentialFile() { -} +SequentialFile::~SequentialFile() {} -RandomAccessFile::~RandomAccessFile() { -} +RandomAccessFile::~RandomAccessFile() {} -WritableFile::~WritableFile() { -} +WritableFile::~WritableFile() {} -Logger::~Logger() { -} +Logger::~Logger() {} Logger* Logger::default_logger_ = NULL; -void Logger::SetDefaultLogger(Logger* logger) { - default_logger_ = logger; -} +void Logger::SetDefaultLogger(Logger* logger) { default_logger_ = logger; } -Logger* Logger::DefaultLogger() { - return default_logger_; -} +Logger* Logger::DefaultLogger() { return default_logger_; } -FileLock::~FileLock() { -} +FileLock::~FileLock() {} -void Log(Logger* info_log, const char* format, ...) { +void LogImpl(const char* file, int64_t line, Logger* info_log, const char* format, ...) { if (info_log != NULL) { va_list ap; va_start(ap, format); - info_log->Logv(format, ap); + info_log->Logv(file, line, format, ap); va_end(ap); } } -void Log(const char* format, ...) { +void LogImpl(const char* file, int64_t line, const char* format, ...) { Logger* l = Logger::DefaultLogger(); if (l != NULL) { va_list ap; va_start(ap, format); - l->Logv(format, ap); + l->Logv(file, line, format, ap); va_end(ap); } } -static Status DoWriteStringToFile(Env* env, const Slice& data, - const std::string& fname, +static Status DoWriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync) { WritableFile* file; Status s = env->NewWritableFile(fname, &file, EnvOptions()); @@ -85,13 +74,11 @@ static Status DoWriteStringToFile(Env* env, const Slice& data, return s; } -Status WriteStringToFile(Env* env, const Slice& data, - const std::string& fname) { +Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname) { return DoWriteStringToFile(env, data, fname, false); } -Status WriteStringToFileSync(Env* env, const Slice& data, - const std::string& fname) { +Status WriteStringToFileSync(Env* env, const Slice& data, const std::string& fname) { return DoWriteStringToFile(env, data, fname, true); } @@ -120,7 +107,6 @@ Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { return s; } -EnvWrapper::~EnvWrapper() { -} +EnvWrapper::~EnvWrapper() {} } // namespace leveldb diff --git a/src/leveldb/util/env_dfs.cc b/src/leveldb/util/env_dfs.cc index aa6415a1d..ef16b8173 100644 --- a/src/leveldb/util/env_dfs.cc +++ b/src/leveldb/util/env_dfs.cc @@ -15,6 +15,7 @@ #include #include +#include "glog/logging.h" #include "hdfs.h" #include "leveldb/env.h" #include "leveldb/status.h" @@ -22,7 +23,8 @@ #include "leveldb/table_utils.h" #include "nfs.h" #include "util/mutexlock.h" -#include "../common/counter.h" +#include "common/counter.h" +#include "quota/flow_controller.h" namespace leveldb { @@ -75,342 +77,321 @@ tera::Counter dfs_info_hang_counter; tera::Counter dfs_opened_read_files_counter; tera::Counter dfs_opened_write_files_counter; -bool split_filename(const std::string filename, - std::string* path, std::string* file) -{ - size_t pos = filename.rfind('/'); - if (pos == std::string::npos) { - return false; - } - *path = filename.substr(0, pos); - *file = filename.substr(pos + 1); - return true; +bool split_filename(const std::string& filename, std::string* path, std::string* file) { + size_t pos = filename.rfind('/'); + if (pos == std::string::npos) { + return false; + } + *path = filename.substr(0, pos); + *file = filename.substr(pos + 1); + return true; } -char* get_time_str(char* p, size_t len) -{ - const uint64_t thread_id = DfsEnv::gettid(); - struct timeval now_tv; - gettimeofday(&now_tv, NULL); - const time_t seconds = now_tv.tv_sec; - struct tm t; - localtime_r(&seconds, &t); - p += snprintf(p, len, - "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu", - t.tm_year + 1900, - t.tm_mon + 1, - t.tm_mday, - t.tm_hour, - t.tm_min, - t.tm_sec, - static_cast(now_tv.tv_usec), - static_cast(thread_id)); - return p; +char* get_time_str(char* p, size_t len) { + const uint64_t thread_id = DfsEnv::gettid(); + struct timeval now_tv; + gettimeofday(&now_tv, NULL); + const time_t seconds = now_tv.tv_sec; + struct tm t; + localtime_r(&seconds, &t); + p += snprintf(p, len, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu", t.tm_year + 1900, t.tm_mon + 1, + t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), + static_cast(thread_id)); + return p; } // Log error message -static Status IOError(const std::string& context, int err_number) -{ - if (err_number == EACCES) { - return Status::IOPermissionDenied(context, strerror(err_number)); - } - return Status::IOError(context, strerror(err_number)); +static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } + return Status::IOError(context, strerror(err_number)); } - -class DfsReadableFile: virtual public SequentialFile, virtual public RandomAccessFile { -private: - Dfs* fs_; - std::string filename_; - DfsFile* file_; - mutable ssize_t now_pos; - //mutable port::Mutex mu_; -public: - DfsReadableFile(Dfs* fs, const std::string& fname) - : fs_(fs), filename_(fname), file_(NULL), - now_pos(-1) { - tera::AutoCounter ac(&dfs_open_hang_counter, "OpenFile", filename_.c_str()); - file_ = fs->OpenFile(filename_, RDONLY); - dfs_open_counter.Inc(); - // assert(hfile_ != NULL); - if (file_ == NULL) { - dfs_open_error_counter.Inc(); - Log("[env_dfs]: open file for read fail: %s\n", filename_.c_str()); - } else { - dfs_opened_read_files_counter.Inc(); - } - now_pos = 0; - } - - virtual ~DfsReadableFile() { - if (file_) { - tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); - if (file_->CloseFile()) { - Log("[env_dfs]: close dfs file fail: %s\n", IOError(filename_, errno).ToString().c_str()); - dfs_close_error_counter.Inc(); - } - dfs_close_counter.Inc(); - dfs_opened_read_files_counter.Dec(); - } - delete file_; - file_ = NULL; - } - - bool isValid() { - return (file_ != NULL); +class DfsReadableFile : virtual public SequentialFile, virtual public RandomAccessFile { + private: + Dfs* fs_; + std::string filename_; + DfsFile* file_; + mutable ssize_t now_pos; + // mutable port::Mutex mu_; + public: + DfsReadableFile(Dfs* fs, const std::string& fname) + : fs_(fs), filename_(fname), file_(NULL), now_pos(-1) { + tera::AutoCounter ac(&dfs_open_hang_counter, "OpenFile", filename_.c_str()); + file_ = fs->OpenFile(filename_, RDONLY); + dfs_open_counter.Inc(); + // assert(hfile_ != NULL); + if (file_ == NULL) { + dfs_open_error_counter.Inc(); + LOG(ERROR) << "[env_dfs]: open file for read fail: " << filename_.c_str(); + } else { + dfs_opened_read_files_counter.Inc(); } + now_pos = 0; + } - virtual Status Read(size_t n, Slice* result, char* scratch) { - now_pos = -1; - Status s; - int64_t t = tera::get_micros(); - tera::AutoCounter ac(&dfs_read_hang_counter, "Read", filename_.c_str()); - int32_t bytes_read = file_->Read(scratch, (int32_t)n); - dfs_read_delay_counter.Add(tera::get_micros() - t); - dfs_read_counter.Inc(); - *result = Slice(scratch, (bytes_read < 0) ? 0 : bytes_read); - if (bytes_read < static_cast(n)) { - if (feof()) { - // end of the file - } else { - dfs_read_error_counter.Inc(); - s = IOError(filename_, errno); - } - } - if (bytes_read > 0) { - dfs_read_size_counter.Add(bytes_read); - } - return s; + virtual ~DfsReadableFile() { + if (file_) { + tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); + if (file_->CloseFile()) { + LOG(ERROR) << "[env_dfs]: close dfs file fail: " + << IOError(filename_, errno).ToString().c_str(); + dfs_close_error_counter.Inc(); + } + dfs_close_counter.Inc(); + dfs_opened_read_files_counter.Dec(); } + delete file_; + file_ = NULL; + } - virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { - Status s; - int64_t t = tera::get_micros(); - tera::AutoCounter ac(&dfs_read_hang_counter, "Read", filename_.c_str()); - int32_t bytes_read = file_->Pread(offset, scratch, n); - dfs_read_delay_counter.Add(tera::get_micros() - t); - dfs_read_counter.Inc(); - *result = Slice(scratch, (bytes_read < 0) ? 0 : bytes_read); - if (bytes_read < 0) { - dfs_read_error_counter.Inc(); - s = IOError(filename_, errno); - } - if (bytes_read > 0) { - dfs_read_size_counter.Add(bytes_read); - } - return s; - } + bool isValid() { return (file_ != NULL); } - virtual Status Skip(uint64_t n) { - int64_t current = 0; - { - tera::AutoCounter ac(&dfs_tell_hang_counter, "Skip", filename_.c_str()); - current = file_->Tell(); - dfs_tell_counter.Inc(); - } - if (current < 0) { - dfs_tell_error_counter.Inc(); - return IOError(filename_, errno); - } - // seek to new offset - int64_t newoffset = current + n; - - tera::AutoCounter ac(&dfs_other_hang_counter, "Seek", filename_.c_str()); - int val = file_->Seek(newoffset); - dfs_other_counter.Inc(); - if (val < 0) { - dfs_other_error_counter.Inc(); - return IOError(filename_, errno); - } - return Status::OK(); - } + virtual std::string GetFileName() const override { return filename_; } -private: - // at the end of file ? - bool feof() { - tera::AutoCounter ac(&dfs_tell_hang_counter, "feof", filename_.c_str()); - dfs_tell_counter.Inc(); - if (file_ && file_->Tell() >= fileSize()) { - return true; - } - return false; + virtual Status Read(size_t n, Slice* result, char* scratch) { + tera::DfsReadThroughputHardLimiter().BlockingConsume(n); + now_pos = -1; + Status s; + int64_t t = tera::get_micros(); + tera::AutoCounter ac(&dfs_read_hang_counter, "Read", filename_.c_str()); + int32_t bytes_read = file_->Read(scratch, (int32_t)n); + dfs_read_delay_counter.Add(tera::get_micros() - t); + dfs_read_counter.Inc(); + *result = Slice(scratch, (bytes_read < 0) ? 0 : bytes_read); + if (bytes_read < static_cast(n)) { + if (feof()) { + // end of the file + } else { + dfs_read_error_counter.Inc(); + s = IOError(filename_, errno); + } + } + if (bytes_read > 0) { + dfs_read_size_counter.Add(bytes_read); + } + return s; + } + + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { + tera::DfsReadThroughputHardLimiter().BlockingConsume(n); + Status s; + int64_t t = tera::get_micros(); + tera::AutoCounter ac(&dfs_read_hang_counter, "Read", filename_.c_str()); + int32_t bytes_read = file_->Pread(offset, scratch, n); + dfs_read_delay_counter.Add(tera::get_micros() - t); + dfs_read_counter.Inc(); + *result = Slice(scratch, (bytes_read < 0) ? 0 : bytes_read); + if (bytes_read < 0) { + dfs_read_error_counter.Inc(); + s = IOError(filename_, errno); + } + if (bytes_read > 0) { + dfs_read_size_counter.Add(bytes_read); + } + return s; + } + + virtual Status Skip(uint64_t n) { + int64_t current = 0; + { + tera::AutoCounter ac(&dfs_tell_hang_counter, "Skip", filename_.c_str()); + current = file_->Tell(); + dfs_tell_counter.Inc(); + } + if (current < 0) { + dfs_tell_error_counter.Inc(); + return IOError(filename_, errno); + } + // seek to new offset + int64_t newoffset = current + n; + + tera::AutoCounter ac(&dfs_other_hang_counter, "Seek", filename_.c_str()); + int val = file_->Seek(newoffset); + dfs_other_counter.Inc(); + if (val < 0) { + dfs_other_error_counter.Inc(); + return IOError(filename_, errno); } - // file size - int64_t fileSize() { - tera::AutoCounter ac(&dfs_info_hang_counter, "GetFileSize", filename_.c_str()); - dfs_info_counter.Inc(); - uint64_t size = 0; - if (fs_->GetFileSize(filename_, &size) != 0) { - dfs_info_error_counter.Inc(); - return -1; - } - return size; + return Status::OK(); + } + + private: + // at the end of file ? + bool feof() { + tera::AutoCounter ac(&dfs_tell_hang_counter, "feof", filename_.c_str()); + dfs_tell_counter.Inc(); + if (file_ && file_->Tell() >= fileSize()) { + return true; + } + return false; + } + // file size + int64_t fileSize() { + tera::AutoCounter ac(&dfs_info_hang_counter, "GetFileSize", filename_.c_str()); + dfs_info_counter.Inc(); + uint64_t size = 0; + if (fs_->GetFileSize(filename_, &size) != 0) { + dfs_info_error_counter.Inc(); + return -1; } + return size; + } }; // WritableFile -class DfsWritableFile: public WritableFile { -private: - Dfs* fs_; - std::string filename_; - DfsFile* file_; -public: - DfsWritableFile(Dfs* fs, const std::string& fname) - : fs_(fs), filename_(fname) , file_(NULL) { - tera::AutoCounter ac(&dfs_open_hang_counter, "OpenFile", filename_.c_str()); - file_ = fs_->OpenFile(filename_, WRONLY); - dfs_open_counter.Inc(); - if (file_ == NULL) { - dfs_open_error_counter.Inc(); - Log("[env_dfs]: open file for write fail: %s\n", fname.c_str()); - } else { - dfs_opened_write_files_counter.Inc(); - } - } - virtual ~DfsWritableFile() { - if (file_ != NULL) { - tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); - if (file_->CloseFile()) { - Log("[env_dfs]: close dfs file fail: %s\n", IOError(filename_, errno).ToString().c_str()); - dfs_close_error_counter.Inc(); - } - dfs_close_counter.Inc(); - dfs_opened_write_files_counter.Dec(); - } - delete file_; - } - - bool isValid() { - return file_ != NULL; - } - - const std::string& getName() { - return filename_; - } - - virtual Status Append(const Slice& data) { - const char* src = data.data(); - size_t left = data.size(); - - int64_t s = tera::get_micros(); - tera::AutoCounter ac(&dfs_write_hang_counter, "Write", filename_.c_str()); - int32_t ret = file_->Write(src, left); - dfs_write_delay_counter.Add(tera::get_micros() - s); - dfs_write_counter.Inc(); - - if (ret != static_cast(left)) { - dfs_write_error_counter.Inc(); - return IOError(filename_, errno); - } - dfs_write_size_counter.Add(ret); - return Status::OK(); - } - - virtual Status Flush() { - //tera::AutoCounter ac(&dfs_flush_hang_counter, "Flush", filename_.c_str()); - //dfs_flush_counter.Inc(); - // dfs flush efficiency is too low, close it - //if (file_->Flush() != 0) { - // return IOError(filename_, errno); - //} - return Status::OK(); - } - - virtual Status Sync() { - tera::AutoCounter ac(&dfs_sync_hang_counter, "Sync", filename_.c_str()); - Status s; - uint64_t t = EnvDfs()->NowMicros(); - - int32_t ret = file_->Sync(); - dfs_sync_counter.Inc(); - if (ret != 0) { - dfs_sync_error_counter.Inc(); - Log("[env_dfs] dfs sync fail: %s\n", filename_.c_str()); - s = IOError(filename_, errno); - } - - uint64_t diff = EnvDfs()->NowMicros() - t; - dfs_sync_delay_counter.Add(diff); - if (diff > 2000000) { - Log("[env_dfs] dfs sync for %s use %.2fms\n", - filename_.c_str(), diff / 1000.0); - } - return s; - } +class DfsWritableFile : public WritableFile { + private: + Dfs* fs_; + std::string filename_; + DfsFile* file_; + + public: + DfsWritableFile(Dfs* fs, const std::string& fname) : fs_(fs), filename_(fname), file_(NULL) { + tera::AutoCounter ac(&dfs_open_hang_counter, "OpenFile", filename_.c_str()); + file_ = fs_->OpenFile(filename_, WRONLY); + dfs_open_counter.Inc(); + if (file_ == NULL) { + dfs_open_error_counter.Inc(); + LOG(ERROR) << "[env_dfs]: open file for write fail: " << fname.c_str(); + } else { + dfs_opened_write_files_counter.Inc(); + } + } + virtual ~DfsWritableFile() { + if (file_ != NULL) { + tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); + if (file_->CloseFile()) { + LOG(ERROR) << "[env_dfs]: close dfs file fail: " + << IOError(filename_, errno).ToString().c_str(); + dfs_close_error_counter.Inc(); + } + dfs_close_counter.Inc(); + dfs_opened_write_files_counter.Dec(); + } + delete file_; + } + + bool isValid() { return file_ != NULL; } + + std::string GetFileName() const override { return filename_; } + + virtual Status Append(const Slice& data) { + tera::DfsWriteThroughputHardLimiter().BlockingConsume(data.size()); + const char* src = data.data(); + size_t left = data.size(); + + int64_t s = tera::get_micros(); + tera::AutoCounter ac(&dfs_write_hang_counter, "Write", filename_.c_str()); + int32_t ret = file_->Write(src, left); + dfs_write_delay_counter.Add(tera::get_micros() - s); + dfs_write_counter.Inc(); + + if (ret != static_cast(left)) { + dfs_write_error_counter.Inc(); + return IOError(filename_, errno); + } + dfs_write_size_counter.Add(ret); + return Status::OK(); + } + + virtual Status Flush() { + // tera::AutoCounter ac(&dfs_flush_hang_counter, "Flush", + // filename_.c_str()); + // dfs_flush_counter.Inc(); + // dfs flush efficiency is too low, close it + // if (file_->Flush() != 0) { + // return IOError(filename_, errno); + //} + return Status::OK(); + } - virtual Status Close() { - Status result; - if (file_ != NULL) { - tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); - if (file_->CloseFile() != 0) { - result = IOError(filename_, errno); - dfs_close_error_counter.Inc(); - } - dfs_close_counter.Inc(); - dfs_opened_write_files_counter.Dec(); - } - delete file_; - file_ = NULL; - return result; - } + virtual Status Sync() { + tera::AutoCounter ac(&dfs_sync_hang_counter, "Sync", filename_.c_str()); + Status s; + uint64_t t = EnvDfs()->NowMicros(); + + int32_t ret = file_->Sync(); + dfs_sync_counter.Inc(); + if (ret != 0) { + dfs_sync_error_counter.Inc(); + LOG(ERROR) << "[env_dfs] dfs sync fail: " << filename_.c_str(); + s = IOError(filename_, errno); + } + + uint64_t diff = EnvDfs()->NowMicros() - t; + dfs_sync_delay_counter.Add(diff); + if (diff > 2000000) { + LOG(WARNING) << "[env_dfs] dfs sync for " << filename_.c_str() << " use " << diff / 1000.0 + << "ms"; + } + return s; + } + + virtual Status Close() { + Status result; + if (file_ != NULL) { + tera::AutoCounter ac(&dfs_close_hang_counter, "CloseFile", filename_.c_str()); + if (file_->CloseFile() != 0) { + result = IOError(filename_, errno); + dfs_close_error_counter.Inc(); + } + dfs_close_counter.Inc(); + dfs_opened_write_files_counter.Dec(); + } + delete file_; + file_ = NULL; + return result; + } }; class DfsFileLock : public FileLock { -public: - DfsFileLock(const std::string& path) : dir_path_(path) {} - std::string dir_path_; + public: + DfsFileLock(const std::string& path) : dir_path_(path) {} + std::string dir_path_; }; -DfsEnv::DfsEnv(Dfs* dfs) - : EnvWrapper(Env::Default()), dfs_(dfs) { -} +DfsEnv::DfsEnv(Dfs* dfs) : EnvWrapper(Env::Default()), dfs_(dfs) {} -DfsEnv::~DfsEnv() -{ -} +DfsEnv::~DfsEnv() {} // SequentialFile -Status DfsEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) -{ - DfsReadableFile* f = new DfsReadableFile(dfs_, fname); - if (f == NULL || !f->isValid()) { - delete f; - *result = NULL; - return IOError(fname, errno); - } - *result = dynamic_cast(f); - return Status::OK(); +Status DfsEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) { + DfsReadableFile* f = new DfsReadableFile(dfs_, fname); + if (f == NULL || !f->isValid()) { + delete f; + *result = NULL; + return IOError(fname, errno); + } + *result = dynamic_cast(f); + return Status::OK(); } // random read file -Status DfsEnv::NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions&) -{ - DfsReadableFile* f = new DfsReadableFile(dfs_, fname); - if (f == NULL || !f->isValid()) { - delete f; - *result = NULL; - return IOError(fname, errno); - } - *result = dynamic_cast(f); - return Status::OK(); +Status DfsEnv::NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions&) { + DfsReadableFile* f = new DfsReadableFile(dfs_, fname); + if (f == NULL || !f->isValid()) { + delete f; + *result = NULL; + return IOError(fname, errno); + } + *result = dynamic_cast(f); + return Status::OK(); } // writable -Status DfsEnv::NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions&) -{ - Status s; - DfsWritableFile* f = new DfsWritableFile(dfs_, fname); - if (f == NULL || !f->isValid()) { - delete f; - *result = NULL; - return IOError(fname, errno); - } - *result = dynamic_cast(f); - return Status::OK(); +Status DfsEnv::NewWritableFile(const std::string& fname, WritableFile** result, const EnvOptions&) { + Status s; + DfsWritableFile* f = new DfsWritableFile(dfs_, fname); + if (f == NULL || !f->isValid()) { + delete f; + *result = NULL; + return IOError(fname, errno); + } + *result = dynamic_cast(f); + return Status::OK(); } // returns: @@ -418,173 +399,162 @@ Status DfsEnv::NewWritableFile(const std::string& fname, // nofound: not found // timeout: timeout, unknown, should retry // ioerror: io error -Status DfsEnv::FileExists(const std::string& fname) -{ - tera::AutoCounter ac(&dfs_exists_hang_counter, "Exists", fname.c_str()); - int32_t retval = dfs_->Exists(fname); - dfs_exists_counter.Inc(); - if (retval == 0) { - return Status::OK(); - } else if (errno == ENOENT) { - return Status::NotFound("filestatus", fname); - } else if (errno == ETIMEDOUT) { - Log("[env_dfs] exists timeout: %s\n", fname.c_str()); - dfs_exists_error_counter.Inc(); - return Status::TimeOut("filestatus", fname); - } else { - dfs_exists_error_counter.Inc(); - return IOError(fname, errno); - } +Status DfsEnv::FileExists(const std::string& fname) { + tera::AutoCounter ac(&dfs_exists_hang_counter, "Exists", fname.c_str()); + int32_t retval = dfs_->Exists(fname); + dfs_exists_counter.Inc(); + if (retval == 0) { + return Status::OK(); + } else if (errno == ENOENT) { + return Status::NotFound("filestatus", fname); + } else if (errno == ETIMEDOUT) { + LOG(ERROR) << "[env_dfs] exists timeout: " << fname.c_str(); + dfs_exists_error_counter.Inc(); + return Status::TimeOut("filestatus", fname); + } else { + dfs_exists_error_counter.Inc(); + return IOError(fname, errno); + } } Status DfsEnv::CopyFile(const std::string& from, const std::string& to) { - tera::AutoCounter ac(&dfs_other_hang_counter, "Copy", from.c_str()); - dfs_other_counter.Inc(); - std::cerr << "DfsEnv: " << from << " --> " << to << std::endl; - if (from != to && dfs_->Copy(from, to) != 0) { - dfs_other_error_counter.Inc(); - return Status::IOError("DFS Copy", from); - } - return Status::OK(); + tera::AutoCounter ac(&dfs_other_hang_counter, "Copy", from.c_str()); + dfs_other_counter.Inc(); + std::cerr << "DfsEnv: " << from << " --> " << to << std::endl; + if (from != to && dfs_->Copy(from, to) != 0) { + dfs_other_error_counter.Inc(); + return Status::IOError("DFS Copy", from); + } + return Status::OK(); } - -Status DfsEnv::GetChildren(const std::string& path, std::vector* result) -{ - tera::AutoCounter ac(&dfs_list_hang_counter, "ListDirectory", path.c_str()); - dfs_list_counter.Inc(); - if (0 == dfs_->ListDirectory(path, result)) { - return Status::OK(); - } - if (errno == ETIMEDOUT) { - Log("[env_dfs] GetChildren timeout: %s\n", path.c_str()); - dfs_list_error_counter.Inc(); - return Status::TimeOut("ListDirectory", path); - } else { - Log("[env_dfs] GetChildren call with path not exists: %s\n", path.data()); - dfs_list_error_counter.Inc(); - return IOError("Path not exist " + path, errno); - } +Status DfsEnv::GetChildren(const std::string& path, std::vector* result) { + tera::AutoCounter ac(&dfs_list_hang_counter, "ListDirectory", path.c_str()); + dfs_list_counter.Inc(); + if (0 == dfs_->ListDirectory(path, result)) { + return Status::OK(); + } + if (errno == ETIMEDOUT) { + LOG(ERROR) << "[env_dfs] GetChildren timeout: " << path.c_str(); + dfs_list_error_counter.Inc(); + return Status::TimeOut("ListDirectory", path); + } else { + LOG(ERROR) << "[env_dfs] GetChildren call with path not exists: " << path.data(); + dfs_list_error_counter.Inc(); + return IOError("Path not exist " + path, errno); + } } -bool DfsEnv::CheckDelete(const std::string& fname, std::vector* flags) -{ - std::string path, file; - bool r = split_filename(fname, &path, &file); - assert(r); - std::string prefix = file + "_del_"; - std::vector files; - dfs_->ListDirectory(path, &files); - size_t max_len = 0; - size_t value = 0; - for (size_t i = 0; i < files.size(); i++) { - if (files[i].compare(0, prefix.size(), prefix) != 0) { - continue; - } - flags->push_back(path + "/" + files[i]); - std::string id_str = files[i].substr(prefix.size()); - if (id_str.size() > 64) { - return false; - } - if (max_len < id_str.size()) { - value <<= (id_str.size() - max_len); - value ++; - max_len = id_str.size(); - } else { - value += (1ULL << (max_len - id_str.size())); - } +bool DfsEnv::CheckDelete(const std::string& fname, std::vector* flags) { + std::string path, file; + bool r = split_filename(fname, &path, &file); + assert(r); + std::string prefix = file + "_del_"; + std::vector files; + dfs_->ListDirectory(path, &files); + size_t max_len = 0; + size_t value = 0; + for (size_t i = 0; i < files.size(); i++) { + if (files[i].compare(0, prefix.size(), prefix) != 0) { + continue; + } + flags->push_back(path + "/" + files[i]); + std::string id_str = files[i].substr(prefix.size()); + if (id_str.size() > 64) { + return false; + } + if (max_len < id_str.size()) { + value <<= (id_str.size() - max_len); + value++; + max_len = id_str.size(); + } else { + value += (1ULL << (max_len - id_str.size())); } - return (value == (1ULL << max_len)); + } + return (value == (1ULL << max_len)); } -Status DfsEnv::DeleteFile(const std::string& fname) -{ - tera::AutoCounter ac(&dfs_delete_hang_counter, "DeleteFile", fname.c_str()); - dfs_delete_counter.Inc(); - if (dfs_->Delete(fname) == 0) { - Log("[env_dfs] nobody like this file: %s", fname.c_str()); - return Status::OK(); - } - dfs_delete_error_counter.Inc(); - return IOError(fname, errno); +Status DfsEnv::DeleteFile(const std::string& fname) { + tera::AutoCounter ac(&dfs_delete_hang_counter, "DeleteFile", fname.c_str()); + dfs_delete_counter.Inc(); + if (dfs_->Delete(fname) == 0) { + LOG(INFO) << "[env_dfs] nobody like this file: " << fname.c_str(); + return Status::OK(); + } + dfs_delete_error_counter.Inc(); + return IOError(fname, errno); }; -Status DfsEnv::CreateDir(const std::string& name) -{ - tera::AutoCounter ac(&dfs_other_hang_counter, "CreateDirectory", name.c_str()); - dfs_other_counter.Inc(); - if (dfs_->CreateDirectory(name) == 0) { - return Status::OK(); - } - dfs_other_error_counter.Inc(); - return IOError(name, errno); +Status DfsEnv::CreateDir(const std::string& name) { + tera::AutoCounter ac(&dfs_other_hang_counter, "CreateDirectory", name.c_str()); + dfs_other_counter.Inc(); + if (dfs_->CreateDirectory(name) == 0) { + return Status::OK(); + } + dfs_other_error_counter.Inc(); + return IOError(name, errno); }; -Status DfsEnv::DeleteDir(const std::string& name) -{ - tera::AutoCounter ac(&dfs_delete_hang_counter, "DeleteDirectory", name.c_str()); - dfs_delete_counter.Inc(); - if (dfs_->DeleteDirectory(name) == 0) { - Log("[env_dfs] nobody like this dir: %s", name.c_str()); - return Status::OK(); - } - dfs_delete_error_counter.Inc(); - return IOError(name, errno); +Status DfsEnv::DeleteDir(const std::string& name) { + tera::AutoCounter ac(&dfs_delete_hang_counter, "DeleteDirectory", name.c_str()); + dfs_delete_counter.Inc(); + if (dfs_->DeleteDirectory(name) == 0) { + LOG(INFO) << "[env_dfs] nobody like this dir: " << name.c_str(); + return Status::OK(); + } + dfs_delete_error_counter.Inc(); + return IOError(name, errno); }; -Status DfsEnv::GetFileSize(const std::string& fname, uint64_t* size) -{ - tera::AutoCounter ac(&dfs_info_hang_counter, "GetFileSize", fname.c_str()); - dfs_info_counter.Inc(); - *size = 0L; - if (0 != dfs_->GetFileSize(fname, size)) { - dfs_info_error_counter.Inc(); - return IOError(fname, errno); - } else { - return Status::OK(); - } +Status DfsEnv::GetFileSize(const std::string& fname, uint64_t* size) { + tera::AutoCounter ac(&dfs_info_hang_counter, "GetFileSize", fname.c_str()); + dfs_info_counter.Inc(); + *size = 0L; + if (0 != dfs_->GetFileSize(fname, size)) { + dfs_info_error_counter.Inc(); + return IOError(fname, errno); + } else { + return Status::OK(); + } } /// -Status DfsEnv::RenameFile(const std::string& src, const std::string& target) -{ - tera::AutoCounter ac(&dfs_other_hang_counter, "RenameFile", src.c_str()); - dfs_other_counter.Inc(); - int res = dfs_->Rename(src, target); - if (res == 0) { - return Status::OK(); - } else { - dfs_other_error_counter.Inc(); - Log("[env_dfs] rename: %s -> %s failed", src.c_str(), target.c_str()); - return IOError("rename (" + src + ") -> (" + target + ") failed", errno); - } +Status DfsEnv::RenameFile(const std::string& src, const std::string& target) { + tera::AutoCounter ac(&dfs_other_hang_counter, "RenameFile", src.c_str()); + dfs_other_counter.Inc(); + int res = dfs_->Rename(src, target); + if (res == 0) { + return Status::OK(); + } else { + dfs_other_error_counter.Inc(); + LOG(ERROR) << "[env_dfs] rename: " << src.c_str() << " -> " << target.c_str() << " failed"; + return IOError("rename (" + src + ") -> (" + target + ") failed", errno); + } } -Status DfsEnv::LockFile(const std::string& fname, FileLock** lock) -{ - std::size_t found = fname.find_last_of("/"); - if (found == std::string::npos) { - return IOError("lock path error: " + fname, EINVAL); - } - std::string dir_path(fname.c_str(), found); - if (dfs_->LockDirectory(dir_path) != 0) { - return IOError("lock dir failed: " + dir_path, errno); - } - *lock = new DfsFileLock(dir_path); - return Status::OK(); +Status DfsEnv::LockFile(const std::string& fname, FileLock** lock) { + std::size_t found = fname.find_last_of("/"); + if (found == std::string::npos) { + return IOError("lock path error: " + fname, EINVAL); + } + std::string dir_path(fname.c_str(), found); + if (dfs_->LockDirectory(dir_path) != 0) { + return IOError("lock dir failed: " + dir_path, errno); + } + *lock = new DfsFileLock(dir_path); + return Status::OK(); } -Status DfsEnv::UnlockFile(FileLock* lock) -{ - if (DfsFileLock* dfs_lock = dynamic_cast(lock)) { - const std::string& dir_path = dfs_lock->dir_path_; - dfs_->UnlockDirectory(dir_path.c_str()); - delete lock; - return Status::OK(); - } else { - Log("[env_dfs]: wrong file lock at %p\n", lock); - abort(); - } +Status DfsEnv::UnlockFile(FileLock* lock) { + if (DfsFileLock* dfs_lock = dynamic_cast(lock)) { + const std::string& dir_path = dfs_lock->dir_path_; + dfs_->UnlockDirectory(dir_path.c_str()); + delete lock; + return Status::OK(); + } else { + LOG(ERROR) << "[env_dfs]: wrong file lock at " << lock; + abort(); + } } static bool inited = false; @@ -592,68 +562,60 @@ static port::Mutex mutex; static Env* dfs_env; void InitDfsEnv(const std::string& so_path, const std::string& conf) { - MutexLock l(&mutex); - if (inited) { - return; - } - Dfs* dfs = Dfs::NewDfs(so_path, conf); - if (dfs == NULL) { - abort(); - } - dfs_env = new DfsEnv(dfs); - inited = true; + MutexLock l(&mutex); + if (inited) { + return; + } + Dfs* dfs = Dfs::NewDfs(so_path, conf); + if (dfs == NULL) { + abort(); + } + dfs_env = new DfsEnv(dfs); + inited = true; } -void InitHdfsEnv() -{ - MutexLock l(&mutex); - if (inited) { - return; - } - Dfs* dfs = new Hdfs(); - dfs_env = new DfsEnv(dfs); - inited = true; +void InitHdfsEnv() { + MutexLock l(&mutex); + if (inited) { + return; + } + Dfs* dfs = new Hdfs(); + dfs_env = new DfsEnv(dfs); + inited = true; } -void InitHdfs2Env(const std::string& namenode_list) -{ - MutexLock l(&mutex); - if (inited) { - return; - } - Dfs* dfs = new Hdfs2(namenode_list); - dfs_env = new DfsEnv(dfs); - inited = true; +void InitHdfs2Env(const std::string& namenode_list) { + MutexLock l(&mutex); + if (inited) { + return; + } + Dfs* dfs = new Hdfs2(namenode_list); + dfs_env = new DfsEnv(dfs); + inited = true; } -void InitNfsEnv(const std::string& mountpoint, - const std::string& conf_path) -{ - MutexLock l(&mutex); - if (inited) { - return; - } - Nfs::Init(mountpoint, conf_path); - Dfs* dfs = Nfs::GetInstance(); - dfs_env = new DfsEnv(dfs); - inited = true; +void InitNfsEnv(const std::string& mountpoint, const std::string& conf_path) { + MutexLock l(&mutex); + if (inited) { + return; + } + Nfs::Init(mountpoint, conf_path); + Dfs* dfs = Nfs::GetInstance(); + dfs_env = new DfsEnv(dfs); + inited = true; } -Env* NewDfsEnv(Dfs* dfs) -{ - return new DfsEnv(dfs); -} +Env* NewDfsEnv(Dfs* dfs) { return new DfsEnv(dfs); } -Env* EnvDfs() -{ - MutexLock l(&mutex); - if (inited) { - return dfs_env; - } - Dfs* dfs = new Hdfs(); - dfs_env = new DfsEnv(dfs); - inited = true; +Env* EnvDfs() { + MutexLock l(&mutex); + if (inited) { return dfs_env; + } + Dfs* dfs = new Hdfs(); + dfs_env = new DfsEnv(dfs); + inited = true; + return dfs_env; } } // namespace leveldb diff --git a/src/leveldb/util/env_flash.cc b/src/leveldb/util/env_flash.cc index dd4a13224..173db750e 100644 --- a/src/leveldb/util/env_flash.cc +++ b/src/leveldb/util/env_flash.cc @@ -14,6 +14,7 @@ #include #include #include +#include "glog/logging.h" #include "leveldb/env.h" #include "leveldb/status.h" #include "leveldb/env_dfs.h" @@ -21,11 +22,10 @@ #include "util/hash.h" #include "util/mutexlock.h" #include "helpers/memenv/memenv.h" -#include "../common/counter.h" +#include "common/counter.h" #include "leveldb/env_flash.h" - namespace leveldb { tera::Counter ssd_read_counter; @@ -38,608 +38,625 @@ const int64_t kUpdateFlashRetryIntervalMillis = 60 * 1000; // Log error message static Status IOError(const std::string& context, int err_number) { - if (err_number == EACCES) { - return Status::IOPermissionDenied(context, strerror(err_number)); - } - return Status::IOError(context, strerror(err_number)); + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } + return Status::IOError(context, strerror(err_number)); } /// copy file from env to local -Status CopyToLocal(const std::string& local_fname, Env* env, - const std::string& fname, uint64_t fsize, bool vanish_allowed) { - uint64_t time_s = env->NowMicros(); - - uint64_t local_size = 0; - Status s = Env::Default()->GetFileSize(local_fname, &local_size); - if (s.ok() && fsize == local_size) { - return s; - } - Log("[env_flash] local file mismatch, expect %lu, actual %lu, delete %s\n", - fsize, local_size, local_fname.c_str()); - Env::Default()->DeleteFile(local_fname); - -// Log("[env_flash] open dfs_file %s\n", fname.c_str()); - SequentialFile* dfs_file = NULL; - s = env->NewSequentialFile(fname, &dfs_file); - if (!s.ok()) { - return s; - } +Status CopyToLocal(const std::string& local_fname, Env* env, const std::string& fname, + uint64_t fsize, bool vanish_allowed) { + uint64_t time_s = env->NowMicros(); - size_t dir_pos = local_fname.rfind("/"); - if (dir_pos != std::string::npos) { - s = Env::Default()->CreateDir(local_fname.substr(0, dir_pos)); - if (!s.ok()) { - Log("[env_flash] create dir: %s failed: %s, exit", - local_fname.substr(0, dir_pos).c_str(), s.ToString().c_str()); - _exit(-1); - } - } + uint64_t local_size = 0; + Status s = Env::Default()->GetFileSize(local_fname, &local_size); + if (s.ok() && fsize == local_size) { + return s; + } + LOG(INFO) << "[env_flash] local file mismatch, expect " << fsize << ", actual " << local_size + << ", delete " << local_fname.c_str(); + Env::Default()->DeleteFile(local_fname); + + // LOG(INFO) << "[env_flash] open dfs_file " << fname.c_str(); + SequentialFile* dfs_file = NULL; + s = env->NewSequentialFile(fname, &dfs_file); + if (!s.ok()) { + return s; + } -// Log("[env_flash] open local %s\n", local_fname.c_str()); - WritableFile* local_file = NULL; - EnvOptions env_opt; - env_opt.use_direct_io_write = true; - s = Env::Default()->NewWritableFile(local_fname, &local_file, env_opt); + size_t dir_pos = local_fname.rfind("/"); + if (dir_pos != std::string::npos) { + s = Env::Default()->CreateDir(local_fname.substr(0, dir_pos)); if (!s.ok()) { - if (!vanish_allowed) { - Log("[env_flash] create file: %s failed: %s, exit", - local_fname.c_str(), s.ToString().c_str()); - _exit(-1); - } - delete dfs_file; - return s; + LOG(ERROR) << "[env_flash] create dir: " << local_fname.substr(0, dir_pos).c_str() + << " failed: " << s.ToString().c_str() << ", exit"; + _exit(-1); + } + } + + // LOG(INFO) << "[env_flash] open local " << local_fname.c_str(); + WritableFile* local_file = NULL; + EnvOptions env_opt; + env_opt.use_direct_io_write = true; + s = Env::Default()->NewWritableFile(local_fname, &local_file, env_opt); + if (!s.ok()) { + if (!vanish_allowed) { + LOG(ERROR) << "[env_flash] create file: " << local_fname.c_str() + << " failed: " << s.ToString().c_str() << ", exit"; + _exit(-1); } - - char* buf = new char[1048576]; - Slice result; - local_size = 0; - while (dfs_file->Read(1048576, &result, buf).ok() && result.size() > 0 - && local_file->Append(result).ok()) { - ssd_write_counter.Inc(); - ssd_write_size_counter.Add(result.size()); - local_size += result.size(); - } - delete [] buf; delete dfs_file; - delete local_file; - - if (local_size == fsize) { - uint64_t time_used = env->NowMicros() - time_s; - //if (time_used > 200000) { - if (true) { - Log("[env_flash] copy %s to local used %llu ms\n", - fname.c_str(), static_cast(time_used) / 1000); - } - return s; - } - - uint64_t file_size = 0; - s = env->GetFileSize(fname, &file_size); - if (!s.ok()) { - return Status::IOError("dfs GetFileSize fail", s.ToString()); - } - - Log("[env_flash] copy %s to local fail, size %ld, dfs size %ld, local size %ld\n", - fname.c_str(), fsize, file_size, local_size); - if (fsize == file_size) { - // dfs fsize match but local doesn't match - s = IOError("local fsize mismatch", file_size); - } else { - s = IOError("dfs fsize mismatch", file_size); + return s; + } + + char* buf = new char[1048576]; + Slice result; + local_size = 0; + while (dfs_file->Read(1048576, &result, buf).ok() && result.size() > 0 && + local_file->Append(result).ok()) { + ssd_write_counter.Inc(); + ssd_write_size_counter.Add(result.size()); + local_size += result.size(); + } + delete[] buf; + delete dfs_file; + delete local_file; + + if (local_size == fsize) { + uint64_t time_used = env->NowMicros() - time_s; + // if (time_used > 200000) { + if (true) { + LOG(INFO) << "[env_flash] copy " << fname.c_str() << " to local used " + << static_cast(time_used) / 1000 << " ms"; } - Env::Default()->DeleteFile(local_fname); return s; + } + + uint64_t file_size = 0; + s = env->GetFileSize(fname, &file_size); + if (!s.ok()) { + return Status::IOError("dfs GetFileSize fail", s.ToString()); + } + + LOG(WARNING) << "[env_flash] copy " << fname.c_str() << " to local fail, size " << fsize + << ", dfs size " << file_size << ", local size " << local_size; + if (fsize == file_size) { + // dfs fsize match but local doesn't match + s = IOError("local fsize mismatch", file_size); + } else { + s = IOError("dfs fsize mismatch", file_size); + } + Env::Default()->DeleteFile(local_fname); + return s; } -class FlashSequentialFile: public SequentialFile { -private: - SequentialFile* dfs_file_; - SequentialFile* flash_file_; +class FlashSequentialFile : public SequentialFile { + private: + SequentialFile* dfs_file_; + SequentialFile* flash_file_; -public: - FlashSequentialFile(FlashEnv* flash_env, const std::string& fname) - :dfs_file_(NULL), flash_file_(NULL) { - flash_env->BaseEnv()->NewSequentialFile(fname, &dfs_file_); - } + public: + FlashSequentialFile(FlashEnv* flash_env, const std::string& fname) + : dfs_file_(NULL), flash_file_(NULL) { + flash_env->BaseEnv()->NewSequentialFile(fname, &dfs_file_); + } - virtual ~FlashSequentialFile() { - delete dfs_file_; - delete flash_file_; - } + virtual ~FlashSequentialFile() { + delete dfs_file_; + delete flash_file_; + } - virtual Status Read(size_t n, Slice* result, char* scratch) { - if (flash_file_) { - return flash_file_->Read(n, result, scratch); - } - return dfs_file_->Read(n, result, scratch); + virtual Status Read(size_t n, Slice* result, char* scratch) { + if (flash_file_) { + return flash_file_->Read(n, result, scratch); } + return dfs_file_->Read(n, result, scratch); + } - virtual Status Skip(uint64_t n) { - if (flash_file_) { - return flash_file_->Skip(n); - } - return dfs_file_->Skip(n); - } - - bool isValid() { - return (dfs_file_ || flash_file_); + virtual Status Skip(uint64_t n) { + if (flash_file_) { + return flash_file_->Skip(n); } + return dfs_file_->Skip(n); + } + bool isValid() { return (dfs_file_ || flash_file_); } }; // A file abstraction for randomly reading the contents of a file. -class FlashRandomAccessFile :public RandomAccessFile{ -private: - FlashEnv* flash_env_; - mutable RandomAccessFile* dfs_file_; - mutable RandomAccessFile* flash_file_; - std::string fname_; - std::string local_fname_; - uint64_t fsize_; - - mutable port::Mutex mutex_; - mutable bool flash_file_is_checking_; - mutable uint64_t flash_file_last_check_micros_; - mutable uint64_t flash_file_check_interval_micros_; - mutable uint64_t read_dfs_count_; - EnvOptions env_opt_; - size_t logical_sector_size_; -public: - FlashRandomAccessFile(FlashEnv* flash_env, - const std::string& fname, - uint64_t fsize, - const EnvOptions& options) - : flash_env_(flash_env), dfs_file_(NULL), flash_file_(NULL), fname_(fname), - local_fname_(flash_env->FlashPath(fname) + fname), fsize_(fsize), - flash_file_is_checking_(false), flash_file_last_check_micros_(0), - flash_file_check_interval_micros_(kFlashFileCheckIntervalMicros), - read_dfs_count_(0), - env_opt_(options), - logical_sector_size_(kDefaultPageSize) { - - // copy file to cache if force read from cache - if (flash_env_->ForceReadFromCache()) { - Status copy_status = CopyToLocal(local_fname_, flash_env_->BaseEnv(), fname, fsize, - flash_env_->VanishAllowed()); - if (!copy_status.ok()) { - Log("[env_flash] copy to local fail [%s]: %s\n", - copy_status.ToString().c_str(), local_fname_.c_str()); - } +class FlashRandomAccessFile : public RandomAccessFile { + private: + FlashEnv* flash_env_; + mutable RandomAccessFile* dfs_file_; + mutable RandomAccessFile* flash_file_; + std::string fname_; + std::string local_fname_; + uint64_t fsize_; + + mutable port::Mutex mutex_; + mutable bool flash_file_is_checking_; + mutable uint64_t flash_file_last_check_micros_; + mutable uint64_t flash_file_check_interval_micros_; + mutable uint64_t read_dfs_count_; + EnvOptions env_opt_; + size_t logical_sector_size_; + + public: + FlashRandomAccessFile(FlashEnv* flash_env, const std::string& fname, uint64_t fsize, + const EnvOptions& options) + : flash_env_(flash_env), + dfs_file_(NULL), + flash_file_(NULL), + fname_(fname), + local_fname_(flash_env->FlashPath(fname) + fname), + fsize_(fsize), + flash_file_is_checking_(false), + flash_file_last_check_micros_(0), + flash_file_check_interval_micros_(kFlashFileCheckIntervalMicros), + read_dfs_count_(0), + env_opt_(options), + logical_sector_size_(kDefaultPageSize) { + // copy file to cache if force read from cache + if (flash_env_->ForceReadFromCache()) { + Status copy_status = CopyToLocal(local_fname_, flash_env_->BaseEnv(), fname, fsize, + flash_env_->VanishAllowed()); + if (!copy_status.ok()) { + LOG(WARNING) << "[env_flash] copy to local fail [" << copy_status.ToString().c_str() + << "]: " << local_fname_.c_str(); + } + } + + // if cache file is identical with dfs file, use cache file + if (flash_env_->FlashFileIdentical(fname, fsize)) { + Status s = flash_env_->CacheEnv()->NewRandomAccessFile(local_fname_, &flash_file_, env_opt_); + if (s.ok()) { + logical_sector_size_ = flash_file_->GetRequiredBufferAlignment(); + return; + } + LOG(WARNING) << "[env_flash] local file check pass, but open for " + "RandomAccess fail [" << s.ToString().c_str() + << "]: " << local_fname_.c_str(); + } else { + LOG(WARNING) << "[env_flash] local file check fail: " << local_fname_.c_str(); + } + + // else, use dfs file + flash_env_->ScheduleUpdateFlash(fname, fsize, 1); + flash_env_->BaseEnv()->NewRandomAccessFile(fname, &dfs_file_, env_opt_); + flash_file_last_check_micros_ = Env::Default()->NowMicros(); + } + ~FlashRandomAccessFile() { + delete dfs_file_; + delete flash_file_; + } + Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { + bool use_flash = false; + { + MutexLock l(&mutex_); + // evenry 30 seconds, check if flash file is identical to dfs file. + // if so, try open flash file; + // else, reschedule update it with a higher priority + if (flash_file_ == NULL && !flash_file_is_checking_ && + flash_file_last_check_micros_ + flash_file_check_interval_micros_ <= + Env::Default()->NowMicros()) { + flash_file_is_checking_ = true; + mutex_.Unlock(); + RandomAccessFile* tmp_file = NULL; + if (flash_env_->FlashFileIdentical(fname_, fsize_)) { + flash_env_->CacheEnv()->NewRandomAccessFile(local_fname_, &tmp_file, env_opt_); } - - // if cache file is identical with dfs file, use cache file - if (flash_env_->FlashFileIdentical(fname, fsize)) { - - Status s = flash_env_->CacheEnv()->NewRandomAccessFile(local_fname_, &flash_file_, env_opt_); - if (s.ok()) { - logical_sector_size_ = flash_file_->GetRequiredBufferAlignment(); - return; - } - Log("[env_flash] local file check pass, but open for RandomAccess fail [%s]: %s\n", - s.ToString().c_str(), local_fname_.c_str()); + mutex_.Lock(); + if (tmp_file != NULL) { + flash_file_ = tmp_file; + LOG(INFO) << "[env_flash] switch to local file: " << local_fname_.c_str(); } else { - Log("[env_flash] local file check fail: %s\n", local_fname_.c_str()); + flash_env_->ScheduleUpdateFlash(fname_, fsize_, read_dfs_count_); + read_dfs_count_ = 0; } - - // else, use dfs file - flash_env_->ScheduleUpdateFlash(fname, fsize, 1); - flash_env_->BaseEnv()->NewRandomAccessFile(fname, &dfs_file_, env_opt_); + flash_file_is_checking_ = false; flash_file_last_check_micros_ = Env::Default()->NowMicros(); + } + if (flash_file_ != NULL) { + use_flash = true; + } else { + ++read_dfs_count_; + } } - ~FlashRandomAccessFile() { - delete dfs_file_; - delete flash_file_; - } - Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { - bool use_flash = false; - { - MutexLock l(&mutex_); - // evenry 30 seconds, check if flash file is identical to dfs file. - // if so, try open flash file; - // else, reschedule update it with a higher priority - if (flash_file_ == NULL && - !flash_file_is_checking_ && - flash_file_last_check_micros_ + flash_file_check_interval_micros_ - <= Env::Default()->NowMicros()) { - flash_file_is_checking_ = true; - mutex_.Unlock(); - RandomAccessFile* tmp_file = NULL; - if (flash_env_->FlashFileIdentical(fname_, fsize_)) { - flash_env_->CacheEnv()->NewRandomAccessFile(local_fname_, &tmp_file, env_opt_); - } - mutex_.Lock(); - if (tmp_file != NULL) { - flash_file_ = tmp_file; - Log("[env_flash] switch to local file: %s\n", local_fname_.c_str()); - } else { - flash_env_->ScheduleUpdateFlash(fname_, fsize_, read_dfs_count_); - read_dfs_count_ = 0; - } - flash_file_is_checking_ = false; - flash_file_last_check_micros_ = Env::Default()->NowMicros(); - } - if (flash_file_ != NULL) { - use_flash = true; - } else { - ++read_dfs_count_; - } - } - if (use_flash) { - Status read_status = flash_file_->Read(offset, n, result, scratch); - if (read_status.ok()) { - ssd_read_counter.Inc(); - ssd_read_size_counter.Add(result->size()); - } - return read_status; - } - return dfs_file_->Read(offset, n, result, scratch); + if (use_flash) { + Status read_status = flash_file_->Read(offset, n, result, scratch); + if (read_status.ok()) { + ssd_read_counter.Inc(); + ssd_read_size_counter.Add(result->size()); + } + return read_status; } + return dfs_file_->Read(offset, n, result, scratch); + } + size_t GetRequiredBufferAlignment() const { return logical_sector_size_; } - size_t GetRequiredBufferAlignment() const { - return logical_sector_size_; - } - - bool isValid() { - return (dfs_file_ || flash_file_); - } + bool isValid() { return (dfs_file_ || flash_file_); } + + std::string GetFileName() const override { return local_fname_; } }; // WritableFile -class FlashWritableFile: public WritableFile { -private: - WritableFile* dfs_file_; - WritableFile* flash_file_; - std::string local_fname_; - EnvOptions env_opt_; -public: - FlashWritableFile(FlashEnv* flash_env, const std::string& fname, const EnvOptions& options) - :dfs_file_(NULL), flash_file_(NULL), env_opt_(options) { - Status s = flash_env->BaseEnv()->NewWritableFile(fname, &dfs_file_, env_opt_); - if (!s.ok()) { - return; - } - if (fname.rfind(".sst") != fname.size()-4) { - // Log(logger, "[env_flash] Don't cache %s\n", fname.c_str()); - return; - } - local_fname_ = flash_env->FlashPath(fname) + fname; - for(size_t i = 1; i < local_fname_.size(); i++) { - if (local_fname_.at(i) == '/') { - flash_env->CacheEnv()->CreateDir(local_fname_.substr(0,i)); - } - } - s = flash_env->CacheEnv()->NewWritableFile(local_fname_, &flash_file_, env_opt_); - if (!s.ok()) { - Log("[env_flash] Open local flash file for write fail: %s\n", - local_fname_.c_str()); - } +class FlashWritableFile : public WritableFile { + private: + WritableFile* dfs_file_; + WritableFile* flash_file_; + std::string local_fname_; + EnvOptions env_opt_; + + public: + FlashWritableFile(FlashEnv* flash_env, const std::string& fname, const EnvOptions& options) + : dfs_file_(NULL), flash_file_(NULL), env_opt_(options) { + Status s = flash_env->BaseEnv()->NewWritableFile(fname, &dfs_file_, env_opt_); + if (!s.ok()) { + return; } - virtual ~FlashWritableFile() { - delete dfs_file_; - delete flash_file_; + if (fname.rfind(".sst") != fname.size() - 4) { + // LOG(INFO) << "[env_flash] Don't cache " << fname.c_str(); + return; } - void DeleteLocal() { - delete flash_file_; - flash_file_ = NULL; - Env::Default()->DeleteFile(local_fname_); + local_fname_ = flash_env->FlashPath(fname) + fname; + for (size_t i = 1; i < local_fname_.size(); i++) { + if (local_fname_.at(i) == '/') { + flash_env->CacheEnv()->CreateDir(local_fname_.substr(0, i)); + } } - virtual Status Append(const Slice& data) { - Status s = dfs_file_->Append(data); - if (!s.ok()) { - return s; - } - if (flash_file_) { - Status local_s = flash_file_->Append(data); - if (!local_s.ok()) { - DeleteLocal(); - }else{ - ssd_write_counter.Inc(); - ssd_write_size_counter.Add(data.size()); - } - } - return s; + s = flash_env->CacheEnv()->NewWritableFile(local_fname_, &flash_file_, env_opt_); + if (!s.ok()) { + LOG(ERROR) << "[env_flash] Open local flash file for write fail: " << local_fname_.c_str(); + } + } + virtual ~FlashWritableFile() { + delete dfs_file_; + delete flash_file_; + } + void DeleteLocal() { + delete flash_file_; + flash_file_ = NULL; + Env::Default()->DeleteFile(local_fname_); + } + virtual Status Append(const Slice& data) { + Status s = dfs_file_->Append(data); + if (!s.ok()) { + return s; } - - bool isValid() { - return (dfs_file_ || flash_file_); + if (flash_file_) { + Status local_s = flash_file_->Append(data); + if (!local_s.ok()) { + DeleteLocal(); + } else { + ssd_write_counter.Inc(); + ssd_write_size_counter.Add(data.size()); + } } + return s; + } - virtual Status Flush() { - Status s = dfs_file_->Flush(); - if (!s.ok()) { - return s; - } - // Don't flush cache file - /* - if (flash_file_) { - Status local_s = flash_file_->Flush(); - if (!local_s.ok()) { - DeleteLocal(); - } - }*/ - return s; - } + bool isValid() { return (dfs_file_ || flash_file_); } - virtual Status Sync() { - Status s = dfs_file_->Sync(); - if (!s.ok()) { - return s; + virtual Status Flush() { + Status s = dfs_file_->Flush(); + if (!s.ok()) { + return s; + } + // Don't flush cache file + /* + if (flash_file_) { + Status local_s = flash_file_->Flush(); + if (!local_s.ok()) { + DeleteLocal(); } - /* Don't sync cache file - if (flash_file_) { - Status local_s = flash_file_->Sync(); - if (!local_s.ok()) { - DeleteLocal(); - } - }*/ - return s; - } + }*/ + return s; + } - virtual Status Close() { - if (flash_file_) { - Status local_s = flash_file_->Close(); - if (!local_s.ok()) { - DeleteLocal(); - } + virtual Status Sync() { + Status s = dfs_file_->Sync(); + if (!s.ok()) { + return s; + } + /* Don't sync cache file + if (flash_file_) { + Status local_s = flash_file_->Sync(); + if (!local_s.ok()) { + DeleteLocal(); } - return dfs_file_->Close(); + }*/ + return s; + } + + virtual Status Close() { + if (flash_file_) { + Status local_s = flash_file_->Close(); + if (!local_s.ok()) { + DeleteLocal(); + } } -}; + return dfs_file_->Close(); + } + std::string GetFileName() const override { return local_fname_; } +}; FlashEnv::FlashEnv(Env* base_env) - : EnvWrapper(Env::Default()), dfs_env_(base_env), posix_env_(Env::Default()), - flash_paths_(1, "./flash"), vanish_allowed_(false), force_read_from_cache_(true), - update_flash_retry_interval_millis_(kUpdateFlashRetryIntervalMillis) -{ -} + : EnvWrapper(Env::Default()), + dfs_env_(base_env), + posix_env_(Env::Default()), + flash_paths_(1, "./flash"), + vanish_allowed_(false), + force_read_from_cache_(true), + update_flash_retry_interval_millis_(kUpdateFlashRetryIntervalMillis) {} -FlashEnv::~FlashEnv() -{ -} +FlashEnv::~FlashEnv() {} // SequentialFile -Status FlashEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) -{ - FlashSequentialFile* f = new FlashSequentialFile(this, fname); - if (!f->isValid()) { - delete f; - *result = NULL; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status FlashEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) { + FlashSequentialFile* f = new FlashSequentialFile(this, fname); + if (!f->isValid()) { + delete f; + *result = NULL; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } // random read file -Status FlashEnv::NewRandomAccessFile(const std::string& fname, - uint64_t fsize, - RandomAccessFile** result, - const EnvOptions& options) -{ - FlashRandomAccessFile* f = - new FlashRandomAccessFile(this, fname, fsize, options); - if (f == NULL || !f->isValid()) { - *result = NULL; - delete f; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status FlashEnv::NewRandomAccessFile(const std::string& fname, uint64_t fsize, + RandomAccessFile** result, const EnvOptions& options) { + FlashRandomAccessFile* f = new FlashRandomAccessFile(this, fname, fsize, options); + if (f == NULL || !f->isValid()) { + *result = NULL; + delete f; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } -Status FlashEnv::NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, +Status FlashEnv::NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, const EnvOptions& options) { - // not implement - abort(); + // not implement + abort(); } // writable -Status FlashEnv::NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options) -{ - Status s; - FlashWritableFile* f = new FlashWritableFile(this, fname, options); - if (f == NULL || !f->isValid()) { - *result = NULL; - delete f; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status FlashEnv::NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options) { + Status s; + FlashWritableFile* f = new FlashWritableFile(this, fname, options); + if (f == NULL || !f->isValid()) { + *result = NULL; + delete f; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } // FileExists -Status FlashEnv::FileExists(const std::string& fname) -{ - return dfs_env_->FileExists(fname); -} +Status FlashEnv::FileExists(const std::string& fname) { return dfs_env_->FileExists(fname); } // -Status FlashEnv::GetChildren(const std::string& path, - std::vector* result) -{ - return dfs_env_->GetChildren(path, result); +Status FlashEnv::GetChildren(const std::string& path, std::vector* result) { + return dfs_env_->GetChildren(path, result); } -Status FlashEnv::DeleteFile(const std::string& fname) -{ - posix_env_->DeleteFile(FlashEnv::FlashPath(fname) + fname); - return dfs_env_->DeleteFile(fname); +Status FlashEnv::DeleteFile(const std::string& fname) { + posix_env_->DeleteFile(FlashEnv::FlashPath(fname) + fname); + return dfs_env_->DeleteFile(fname); } -Status FlashEnv::CreateDir(const std::string& name) -{ - std::string local_name = FlashEnv::FlashPath(name) + name; - for(size_t i=1 ;iCreateDir(local_name.substr(0,i)); - } +Status FlashEnv::CreateDir(const std::string& name) { + std::string local_name = FlashEnv::FlashPath(name) + name; + for (size_t i = 1; i < local_name.size(); i++) { + if (local_name.at(i) == '/') { + posix_env_->CreateDir(local_name.substr(0, i)); } - posix_env_->CreateDir(local_name); - return dfs_env_->CreateDir(name); + } + posix_env_->CreateDir(local_name); + return dfs_env_->CreateDir(name); }; -Status FlashEnv::DeleteDir(const std::string& name) -{ - posix_env_->DeleteDir(FlashEnv::FlashPath(name) + name); - return dfs_env_->DeleteDir(name); +Status FlashEnv::DeleteDir(const std::string& name) { + posix_env_->DeleteDir(FlashEnv::FlashPath(name) + name); + return dfs_env_->DeleteDir(name); }; -Status FlashEnv::GetFileSize(const std::string& fname, uint64_t* size) -{ - return dfs_env_->GetFileSize(fname, size); +Status FlashEnv::GetFileSize(const std::string& fname, uint64_t* size) { + return dfs_env_->GetFileSize(fname, size); } /// -Status FlashEnv::RenameFile(const std::string& src, const std::string& target) -{ - posix_env_->RenameFile(FlashEnv::FlashPath(src) + src, FlashEnv::FlashPath(target) + target); - return dfs_env_->RenameFile(src, target); +Status FlashEnv::RenameFile(const std::string& src, const std::string& target) { + posix_env_->RenameFile(FlashEnv::FlashPath(src) + src, FlashEnv::FlashPath(target) + target); + return dfs_env_->RenameFile(src, target); } -Status FlashEnv::LockFile(const std::string& fname, FileLock** lock) -{ - return dfs_env_->LockFile(fname, lock); +Status FlashEnv::LockFile(const std::string& fname, FileLock** lock) { + return dfs_env_->LockFile(fname, lock); } -Status FlashEnv::UnlockFile(FileLock* lock) -{ - return dfs_env_->UnlockFile(lock); -} +Status FlashEnv::UnlockFile(FileLock* lock) { return dfs_env_->UnlockFile(lock); } void FlashEnv::SetFlashPath(const std::string& path, bool vanish_allowed) { - std::vector backup; + std::vector backup; + flash_paths_.swap(backup); + vanish_allowed_ = vanish_allowed; + + size_t beg = 0; + const char* str = path.c_str(); + for (size_t i = 0; i <= path.size(); ++i) { + if ((str[i] == '\0' || str[i] == ';') && i - beg > 0) { + flash_paths_.push_back(std::string(str + beg, i - beg)); + beg = i + 1; + if (!vanish_allowed && !Env::Default()->FileExists(flash_paths_.back()).ok() && + !Env::Default()->CreateDir(flash_paths_.back()).ok()) { + LOG(ERROR) << "[env_flash] cannot access cache dir: " << flash_paths_.back().c_str(); + _exit(-1); + } + } + } + if (!flash_paths_.size()) { flash_paths_.swap(backup); - vanish_allowed_ = vanish_allowed; - - size_t beg = 0; - const char *str = path.c_str(); - for (size_t i = 0; i <= path.size(); ++i) { - if ((str[i] == '\0' || str[i] == ';') && i - beg > 0) { - flash_paths_.push_back(std::string(str + beg, i - beg)); - beg = i +1; - if (!vanish_allowed - && !Env::Default()->FileExists(flash_paths_.back()).ok() - && !Env::Default()->CreateDir(flash_paths_.back()).ok()) { - Log("[env_flash] cannot access cache dir: %s\n", - flash_paths_.back().c_str()); - _exit(-1); - } - } - } - if (!flash_paths_.size()) { - flash_paths_.swap(backup); - } + } } const std::string& FlashEnv::FlashPath(const std::string& fname) { - if (flash_paths_.size() == 1) { - return flash_paths_[0]; - } - uint32_t hash = Hash(fname.c_str(), fname.size(), 13); - return flash_paths_[hash % flash_paths_.size()]; + if (flash_paths_.size() == 1) { + return flash_paths_[0]; + } + uint32_t hash = Hash(fname.c_str(), fname.size(), 13); + return flash_paths_[hash % flash_paths_.size()]; } -void FlashEnv::SetIfForceReadFromCache(bool force) { - force_read_from_cache_ = force; -} +void FlashEnv::SetIfForceReadFromCache(bool force) { force_read_from_cache_ = force; } -bool FlashEnv::ForceReadFromCache() { - return force_read_from_cache_; -} +bool FlashEnv::ForceReadFromCache() { return force_read_from_cache_; } void FlashEnv::SetUpdateFlashThreadNumber(int thread_num) { - update_flash_threads_.SetBackgroundThreads(thread_num); + update_flash_threads_.SetBackgroundThreads(thread_num); } bool FlashEnv::FlashFileIdentical(const std::string& fname, uint64_t fsize) { - uint64_t local_size = 0; - std::string local_fname = FlashEnv::FlashPath(fname) + fname; - Status s = Env::Default()->GetFileSize(local_fname, &local_size); - if (s.ok() && fsize == local_size) { - return true; - } - return false; + uint64_t local_size = 0; + std::string local_fname = FlashEnv::FlashPath(fname) + fname; + Status s = Env::Default()->GetFileSize(local_fname, &local_size); + if (s.ok() && fsize == local_size) { + return true; + } + return false; } struct UpdateFlashFileParam { - FlashEnv* flash_env; - std::string fname; - uint64_t fsize; + FlashEnv* flash_env; + std::string fname; + uint64_t fsize; }; void UpdateFlashFileFunc(void* arg) { - UpdateFlashFileParam* update_arg = (UpdateFlashFileParam*)arg; - update_arg->flash_env->UpdateFlashFile(update_arg->fname, update_arg->fsize); - delete update_arg; + UpdateFlashFileParam* update_arg = (UpdateFlashFileParam*)arg; + update_arg->flash_env->UpdateFlashFile(update_arg->fname, update_arg->fsize); + delete update_arg; } void FlashEnv::ScheduleUpdateFlash(const std::string& fname, uint64_t fsize, int64_t priority) { - MutexLock l(&update_flash_mutex_); - if (update_flash_waiting_files_.find(fname) == update_flash_waiting_files_.end()) { - UpdateFlashFileParam* param = new UpdateFlashFileParam; - param->flash_env = this; - param->fname = fname; - param->fsize = fsize; - - UpdateFlashTask& task = update_flash_waiting_files_[fname]; - task.priority = priority; - task.id = update_flash_threads_.Schedule(UpdateFlashFileFunc, param, (double)task.priority); - Log("[env_flash] schedule copy to local, id: %ld, prio: %ld, file: %s, pend: %ld\n", - task.id, task.priority, fname.c_str(), update_flash_threads_.GetPendingTaskNum()); - } else { - UpdateFlashTask& task = update_flash_waiting_files_[fname]; - task.priority += priority; - update_flash_threads_.ReSchedule(task.id, (double)task.priority); - Log("[env_flash] reschedule copy to local, id: %ld, prio: %ld, file: %s, pend: %ld\n", - task.id, task.priority, fname.c_str(), update_flash_threads_.GetPendingTaskNum()); - } + MutexLock l(&update_flash_mutex_); + if (update_flash_waiting_files_.find(fname) == update_flash_waiting_files_.end()) { + UpdateFlashFileParam* param = new UpdateFlashFileParam; + param->flash_env = this; + param->fname = fname; + param->fsize = fsize; + + UpdateFlashTask& task = update_flash_waiting_files_[fname]; + task.priority = priority; + task.id = update_flash_threads_.Schedule(UpdateFlashFileFunc, param, (double)task.priority); + LOG(INFO) << "[env_flash] schedule copy to local, id: " << task.id + << ", prio: " << task.priority << ", file: " << fname.c_str() + << ", pend: " << update_flash_threads_.GetPendingTaskNum(); + } else { + UpdateFlashTask& task = update_flash_waiting_files_[fname]; + task.priority += priority; + update_flash_threads_.ReSchedule(task.id, (double)task.priority); + LOG(INFO) << "[env_flash] reschedule copy to local, id: " << task.id + << ", prio: " << task.priority << ", file: " << fname.c_str() + << ", pend: " << update_flash_threads_.GetPendingTaskNum(); + } } void FlashEnv::UpdateFlashFile(const std::string& fname, uint64_t fsize) { - std::string local_fname = FlashEnv::FlashPath(fname) + fname; - Status copy_status = CopyToLocal(local_fname, dfs_env_, fname, fsize, vanish_allowed_); - - MutexLock l(&update_flash_mutex_); - if (copy_status.ok()) { - UpdateFlashTask& task = update_flash_waiting_files_[fname]; - Log("[env_flash] copy to local success, id: %ld, prio: %ld, file: %s, pend: %ld\n", - task.id, task.priority, local_fname.c_str(), update_flash_threads_.GetPendingTaskNum()); - update_flash_waiting_files_.erase(fname); + std::string local_fname = FlashEnv::FlashPath(fname) + fname; + Status copy_status = CopyToLocal(local_fname, dfs_env_, fname, fsize, vanish_allowed_); + + MutexLock l(&update_flash_mutex_); + if (copy_status.ok()) { + UpdateFlashTask& task = update_flash_waiting_files_[fname]; + LOG(INFO) << "[env_flash] copy to local success, id: " << task.id << ", prio: " << task.priority + << ", file: " << local_fname.c_str() + << ", pend: " << update_flash_threads_.GetPendingTaskNum(); + update_flash_waiting_files_.erase(fname); + } else { + UpdateFlashTask& task = update_flash_waiting_files_[fname]; + LOG(WARNING) << "[env_flash] copy to local fail [" << copy_status.ToString().c_str() + << "], id: " << task.id << ", prio: " << task.priority + << ", file: " << local_fname.c_str() + << ", pend: " << update_flash_threads_.GetPendingTaskNum(); + + task.priority >>= 1; // cut down priority to half + if (task.priority > 0) { + UpdateFlashFileParam* param = new UpdateFlashFileParam; + param->flash_env = this; + param->fname = fname; + param->fsize = fsize; + + task.id = update_flash_threads_.Schedule(UpdateFlashFileFunc, param, (double)task.priority, + update_flash_retry_interval_millis_); + LOG(INFO) << "[env_flash] schedule copy to local after " + << update_flash_retry_interval_millis_ << " ms, id: " << task.id + << ", prio: " << task.priority << ", file: " << local_fname.c_str(); } else { - UpdateFlashTask& task = update_flash_waiting_files_[fname]; - Log("[env_flash] copy to local fail [%s], id: %ld, prio: %ld, file: %s, pend: %ld\n", - copy_status.ToString().c_str(), task.id, task.priority, - local_fname.c_str(), update_flash_threads_.GetPendingTaskNum()); - - task.priority >>= 1; // cut down priority to half - if (task.priority > 0) { - UpdateFlashFileParam* param = new UpdateFlashFileParam; - param->flash_env = this; - param->fname = fname; - param->fsize = fsize; - - task.id = update_flash_threads_.Schedule(UpdateFlashFileFunc, param, (double)task.priority, - update_flash_retry_interval_millis_); - Log("[env_flash] schedule copy to local after %ld ms, id: %ld, prio: %ld, file: %s\n", - update_flash_retry_interval_millis_, task.id, task.priority, local_fname.c_str()); - } else { - Log("[env_flash] abort schedule copy to local, file: %s\n", local_fname.c_str()); - update_flash_waiting_files_.erase(fname); - } + LOG(INFO) << "[env_flash] abort schedule copy to local, file: " << local_fname.c_str(); + update_flash_waiting_files_.erase(fname); } + } } -Env* NewFlashEnv(Env* base_env) -{ - return new FlashEnv(base_env); +void FlashEnv::TryRollbackPersistentCacheFiles() { + for (auto& flash_path : flash_paths_) { + DoRollbackPersistentCacheFiles(flash_path); + } } -} // namespace leveldb +void FlashEnv::DoRollbackPersistentCacheFiles(const std::string& path) { + SystemFileType type; + auto status = CacheEnv()->GetFileType(path, &type); + if (!status.ok()) { + LOG(ERROR) << "[env_flash] get file type failed for " << path + << " reason: " << status.ToString(); + return; + } + + switch (type) { + case SystemFileType::kRegularFile: { + auto pos = path.find(".sst"); + if (pos == std::string::npos || Slice{path}.ends_with(".sst")) { + return; + } + + auto new_path = path.substr(0, pos + 4); + status = CacheEnv()->RenameFile(path, new_path); + + if (!status.ok()) { + LOG(ERROR) << "[env_flash] rename file failed from " << path << " to " << new_path + << ", reason: " << status.ToString(); + } + break; + } + case SystemFileType::kDir: { + std::vector children; + status = CacheEnv()->GetChildren(path, &children); + if (!status.ok()) { + LOG(ERROR) << "[env_flash] get children failed for " << path + << " reason: " << status.ToString(); + return; + } + for (auto child : children) { + auto next_path = path + "/" + child; + DoRollbackPersistentCacheFiles(next_path); + } + break; + } + default: + LOG(ERROR) << "[env_flash] unkonwn file type for" << path; + } +} +Env* NewFlashEnv(Env* base_env) { return new FlashEnv(base_env); } +} // namespace leveldb diff --git a/src/leveldb/util/env_flash_block_cache.cc b/src/leveldb/util/env_flash_block_cache.cc deleted file mode 100644 index 0d429125c..000000000 --- a/src/leveldb/util/env_flash_block_cache.cc +++ /dev/null @@ -1,661 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "leveldb/env_flash_block_cache.h" - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "common/counter.h" -#include "db/table_cache.h" -#include "leveldb/db.h" -#include "leveldb/cache.h" -#include "leveldb/env.h" -#include "leveldb/iterator.h" -#include "leveldb/options.h" -#include "leveldb/statistics.h" -#include "leveldb/status.h" -#include "leveldb/table_utils.h" -#include "leveldb/write_batch.h" -#include "port/port.h" -#include "util/flash_block_cache_impl.h" -#include "util/flash_block_cache_write_buffer.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/mutexlock.h" -#include "util/string_ext.h" -#include "util/thread_pool.h" - -namespace leveldb { - -class FlashBlockCacheWritableFile : public WritableFile { -public: - FlashBlockCacheWritableFile(FlashBlockCacheImpl* c, const std::string& fname, - const EnvOptions& options, Status* s) - : cache_(c), - bg_cv_(&mu_), - bg_block_flush_(0), - pending_block_num_(0), - write_buffer_(cache_->WorkPath(), fname, cache_->options_.block_size), - fname_(fname), - env_opt_(options) { // file open - *s = cache_->dfs_env_->NewWritableFile(fname_, &dfs_file_, env_opt_); - if (!s->ok()) { - Log("[%s] dfs open: %s, block_size: %lu, status: %s\n", - cache_->WorkPath().c_str(), - fname.c_str(), - cache_->options_.block_size, - s->ToString().c_str()); - } - bg_status_ = *s; - fid_ = cache_->FileId(fname_); - } - - virtual ~FlashBlockCacheWritableFile() { Close(); } - - Status Append(const Slice& data) { - Status s = dfs_file_->Append(data); - if (!s.ok()) { - Log("[%s] dfs append fail: %s, status: %s\n", - cache_->WorkPath().c_str(), - fname_.c_str(), - s.ToString().c_str()); - return s; - } - write_buffer_.Append(data); - - MutexLock lockgard(&mu_); - MaybeScheduleBGFlush(); - return s; - } - - Status Close() { - Status s, s1; - if (dfs_file_ != nullptr) { - s = dfs_file_->Close(); - delete dfs_file_; - dfs_file_ = nullptr; - } - - uint64_t block_idx; - std::string* block_data = write_buffer_.PopBackBlock(&block_idx); - if (block_data != nullptr) { - s1 = FillCache(block_data, block_idx); - } - - MutexLock lockgard(&mu_); - while (bg_block_flush_ > 0) { - bg_cv_.Wait(); - } - if (bg_status_.ok()) { - bg_status_ = s.ok() ? s1: s; - } - //Log("[%s] end close %s, status %s\n", cache_->WorkPath().c_str(), fname_.c_str(), - //s.ToString().c_str()); - return bg_status_; - } - - Status Flush() { - //Log("[%s] dfs flush: %s\n", cache_->WorkPath().c_str(), fname_.c_str()); - return dfs_file_->Flush(); - } - - Status Sync() { - //Log("[%s] dfs sync: %s\n", cache_->WorkPath().c_str(), fname_.c_str()); - return dfs_file_->Sync(); - } - -private: - void MaybeScheduleBGFlush() { - mu_.AssertHeld(); - //Log("[%s] Maybe schedule BGFlush: %s, bg_block_flush: %u, block_nr: %u\n", - //cache_->WorkPath().c_str(), - //fname_.c_str(), - //bg_block_flush_, - //write_buffer_.NumFullBlock()); - while (bg_block_flush_ < (write_buffer_.NumFullBlock() + pending_block_num_)) { - bg_block_flush_++; - cache_->bg_flush_.Schedule(&FlashBlockCacheWritableFile::BGFlushFunc, this, 10); - } - } - - static void BGFlushFunc(void* arg) { - reinterpret_cast(arg)->BGFlush(); - } - void BGFlush() { - //Log("[%s] Begin BGFlush: %s\n", cache_->WorkPath().c_str(), fname_.c_str()); - Status s; - MutexLock lockgard(&mu_); - uint64_t block_idx; - std::string* block_data = write_buffer_.PopFrontBlock(&block_idx); - if (block_data != nullptr) { - pending_block_num_++; - mu_.Unlock(); - - s = FillCache(block_data, block_idx); - mu_.Lock(); - pending_block_num_--; - } - - bg_status_ = bg_status_.ok() ? s: bg_status_; - bg_block_flush_--; - MaybeScheduleBGFlush(); - bg_cv_.Signal(); - } - - Status FillCache(std::string* block_data, uint64_t block_idx) { - Status s; - uint64_t fid = fid_; - CacheBlock* block = nullptr; - while ((block = cache_->GetAndAllocBlock(fid, block_idx)) == nullptr) { - Log("[%s] fill cache for write %s, fid %lu, block_idx %lu, wait 10ms after retry\n", - cache_->WorkPath().c_str(), fname_.c_str(), - fid, block_idx); - cache_->options_.cache_env->SleepForMicroseconds(10000); - } - - block->mu.Lock(); - block->state = 0; - block->GetDataBlock(cache_->options_.block_size, Slice(*block_data)); - block->mu.Unlock(); - - // Do io without lock - block->s = cache_->LogRecord(block); - if (block->s.ok()) { - block->s = cache_->FillCache(block); - if (block->s.ok()) { - MutexLock l(&block->mu); - block->state = kCacheBlockValid; - } - } - s = cache_->ReleaseBlock(block, true); - write_buffer_.ReleaseBlock(block_data); - return s; - } - -private: - FlashBlockCacheImpl* cache_; - //port::AtomicPointer shutting_down_; - port::Mutex mu_; - port::CondVar bg_cv_; // Signalled when background work finishes - Status bg_status_; - WritableFile* dfs_file_; - // protected by cache_.mu_ - uint32_t bg_block_flush_; - uint32_t pending_block_num_; - FlashBlockCacheWriteBuffer write_buffer_; - std::string fname_; - uint64_t fid_; - EnvOptions env_opt_; -}; - -class FlashBlockCacheRandomAccessFile : public RandomAccessFile { -public: - FlashBlockCacheRandomAccessFile(FlashBlockCacheImpl* c, const std::string& fname, - uint64_t fsize, const EnvOptions& options, Status* s) - : cache_(c), - fname_(fname), - fsize_(fsize), - env_opt_(options) { - *s = cache_->dfs_env_->NewRandomAccessFile(fname_, &dfs_file_, env_opt_); - //Log("[%s] dfs open for read: %s, block_size: %lu, status: %s\n", - //cache_->WorkPath().c_str(), - //fname.c_str(), - //cache_->options_.block_size, - //s->ToString().c_str()); - - fid_ = cache_->FileId(fname_); - } - - virtual ~FlashBlockCacheRandomAccessFile() { - delete dfs_file_; - } - - Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - Status s; - uint64_t begin = offset / cache_->options_.block_size; - uint64_t end = (offset + n) / cache_->options_.block_size; - assert(begin <= end); - uint64_t fid = fid_; - std::vector c_miss; - std::vector c_locked; - std::vector c_valid; - std::vector block_queue; - - //Log("[%s] Begin Pread %s, size %lu, offset %lu, fid %lu, start_block %lu, end_block %lu" - //", block_size %lu\n", - //cache_->WorkPath().c_str(), fname_.c_str(), n, offset, fid, - //begin, end, cache_->options_.block_size); - - uint64_t start_ts = cache_->options_.cache_env->NowMicros(); - for (uint64_t block_idx = begin; block_idx <= end; ++block_idx) { - uint64_t get_block_ts = cache_->options_.cache_env->NowMicros(); - CacheBlock* block = nullptr; - while ((block = cache_->GetAndAllocBlock(fid, block_idx)) == nullptr) { - Log("[%s] fill cache for read %s, fid %lu, block_idx %lu, wait 10ms after retry\n", - cache_->WorkPath().c_str(), fname_.c_str(), - fid, block_idx); - cache_->options_.cache_env->SleepForMicroseconds(10000); - } - - block->mu.Lock(); - assert(block->fid == fid && block->block_idx == block_idx); - block->GetDataBlock(cache_->options_.block_size, Slice()); - block_queue.push_back(block); // sort by block_idx - if (!block->Test(kCacheBlockLocked) && - block->Test(kCacheBlockValid)) { - block->Set(kCacheBlockLocked | kCacheBlockCacheRead); - c_valid.push_back(block); - } else if (!block->Test(kCacheBlockLocked)) { - block->Set(kCacheBlockLocked | kCacheBlockDfsRead); - c_miss.push_back(block); - } else { - c_locked.push_back(block); - } - block->mu.Unlock(); - - //Log("[%s] Queue block: %s, refs %u, data_block_refs %lu, alloc %u\n", - //cache_->WorkPath().c_str(), block->ToString().c_str(), - //block->handle->refs, block->data_block_refs, - //block->data_block_alloc); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_GET_BLOCK, - cache_->options_.cache_env->NowMicros() - get_block_ts); - } - uint64_t queue_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_QUEUE, queue_ts - start_ts); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_BLOCK_NR, end - begin + 1); - - // async read miss data - for (uint32_t i = 0; i < c_miss.size(); ++i) { - CacheBlock* block = c_miss[i]; - AsyncDfsReader* reader = new AsyncDfsReader; - reader->file = const_cast(this); - reader->block = block; - //Log("[%s] pread in miss list, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - cache_->bg_dfs_read_.Schedule(&FlashBlockCacheRandomAccessFile::AsyncDfsRead, reader, 10); - } - //uint64_t miss_read_sched_ts = cache_->options_.cache_env->NowMicros(); - - // async read valid data - for (uint32_t i = 0; i < c_valid.size(); ++i) { - CacheBlock* block = c_valid[i]; - AsyncCacheReader* reader = new AsyncCacheReader; - reader->file = const_cast(this); - reader->block = block; - //Log("[%s] pread in valid list, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - cache_->bg_read_.Schedule(&FlashBlockCacheRandomAccessFile::AsyncCacheRead, reader, 10); - } - //uint64_t ssd_read_sched_ts = cache_->options_.cache_env->NowMicros(); - - // wait async cache read done - for (uint32_t i = 0; i < c_valid.size(); ++i) { - CacheBlock* block = c_valid[i]; - block->mu.Lock(); - block->WaitOnClear(kCacheBlockCacheRead); - assert(block->Test(kCacheBlockValid)); - if (!block->s.ok() && s.ok()) { - s = block->s; // degrade read - } - block->Clear(kCacheBlockLocked); - block->cv.SignalAll(); - block->mu.Unlock(); - //Log("[%s] cache read done, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - } - uint64_t ssd_read_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_SSD_READ, ssd_read_ts - queue_ts); - - // wait dfs read done and async cache file - for (uint32_t i = 0; i < c_miss.size(); ++i) { - CacheBlock* block = c_miss[i]; - block->mu.Lock(); - block->WaitOnClear(kCacheBlockDfsRead); - block->Set(kCacheBlockCacheFill); - if (!block->s.ok() && s.ok()) { - s = block->s; // degrade read - } - block->mu.Unlock(); - //Log("[%s] dfs read done, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - } - uint64_t dfs_read_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_DFS_READ, dfs_read_ts - ssd_read_ts); - - for (uint32_t i = 0; i < c_miss.size(); ++i) { - CacheBlock* block = c_miss[i]; - AsyncCacheWriter* writer = new AsyncCacheWriter; - writer->file = const_cast(this); - writer->block = block; - //Log("[%s] pread in miss list(fill cache), %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - cache_->bg_fill_.Schedule(&FlashBlockCacheRandomAccessFile::AsyncCacheWrite, writer, 10); - } - uint64_t ssd_write_sched_ts = cache_->options_.cache_env->NowMicros(); - //cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_SSD_WRITE_SCHED, ssd_write_sched_ts - dfs_read_ts); - - for (uint32_t i = 0; i < c_miss.size(); ++i) { // wait cache fill finish - CacheBlock* block = c_miss[i]; - block->mu.Lock(); - block->WaitOnClear(kCacheBlockCacheFill); - if (block->s.ok()) { - block->Set(kCacheBlockValid); - } else if (s.ok()) { - s = block->s; // degrade read - } - block->Clear(kCacheBlockLocked); - block->cv.SignalAll(); - block->mu.Unlock(); - //Log("[%s] cache fill done, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - } - uint64_t ssd_write_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_SSD_WRITE, ssd_write_ts - ssd_write_sched_ts); - - // wait other async read finish - for (uint32_t i = 0; i < c_locked.size(); ++i) { - CacheBlock* block = c_locked[i]; - block->mu.Lock(); - block->WaitOnClear(kCacheBlockLocked); - block->mu.Unlock(); - //Log("[%s] wait locked done, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - } - uint64_t wait_unlock_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_WAIT_UNLOCK, wait_unlock_ts - ssd_write_ts); - - // fill user mem - size_t msize = 0; - for (uint64_t block_idx = begin; block_idx <= end; ++block_idx) { - CacheBlock* block = block_queue[block_idx - begin]; - Slice data_block = block->data_block; - if (block_idx == begin) { - data_block.remove_prefix(offset % cache_->options_.block_size); - } - if (block_idx == end) { - data_block.remove_suffix(cache_->options_.block_size - (n + offset) % cache_->options_.block_size); - } - memcpy(scratch + msize, data_block.data(), data_block.size()); - msize += data_block.size(); - //Log("[%s] Fill user data, %s, fill_offset %lu, fill_size %lu, prefix %lu, suffix %lu, msize %lu, offset %lu\n", - //cache_->WorkPath().c_str(), fname_.c_str(), - //block_idx * cache_->options_.block_size + (block_idx == begin ? offset % cache_->options_.block_size: 0), - //data_block.size(), - //block_idx == begin ? offset % cache_->options_.block_size: 0, - //block_idx == end ? cache_->options_.block_size - (n + offset) % cache_->options_.block_size - //: cache_->options_.block_size, - //msize, offset); - } - assert(msize == n); - *result = Slice(scratch, n); - uint64_t fill_user_data_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_FILL_USER_DATA, fill_user_data_ts - wait_unlock_ts); - - for (uint32_t i = 0; i < c_miss.size(); ++i) { - CacheBlock* block = c_miss[i]; - //Log("[%s] wakeup for miss, %s\n", cache_->WorkPath().c_str(), block->ToString().c_str()); - cache_->ReleaseBlock(block, true); - } - for (uint32_t i = 0; i < c_valid.size(); ++i) { - CacheBlock* block = c_valid[i]; - //Log("[%s] wakeup for valid, %s\n", cache_->WorkPath().c_str(), block->ToString().c_str()); - cache_->ReleaseBlock(block, false); - } - for (uint32_t i = 0; i < c_locked.size(); ++i) { - CacheBlock* block = c_locked[i]; - //Log("[%s] wakeup for lock, %s\n", cache_->WorkPath().c_str(), block->ToString().c_str()); - cache_->ReleaseBlock(block, false); - } - uint64_t release_cache_block_ts = cache_->options_.cache_env->NowMicros(); - cache_->stat_->MeasureTime(FLASH_BLOCK_CACHE_PREAD_RELEASE_BLOCK, release_cache_block_ts - fill_user_data_ts); - - if (!s.ok()) { - s = dfs_file_->Read(offset, n, result, scratch); - Log("[%s] Pread degrade %s, offset %lu, size %lu, status %s\n", - cache_->WorkPath().c_str(), fname_.c_str(), - offset, n, s.ToString().c_str()); - } - //Log("[%s] Done Pread %s, size %lu, offset %lu, fid %lu, res %lu, status %s, start_block %lu, end_block %lu" - //", block_size %lu\n", - //cache_->WorkPath().c_str(), fname_.c_str(), n, offset, fid, - //result->size(), s.ToString().c_str(), - //begin, end, cache_->options_.block_size); - return s; - } - -private: - struct AsyncDfsReader { - FlashBlockCacheRandomAccessFile* file; - CacheBlock* block; - }; - static void AsyncDfsRead(void* arg) { - AsyncDfsReader* reader = (AsyncDfsReader*)arg; - reader->file->HandleDfsRead(reader); - delete reader; - } - void HandleDfsRead(AsyncDfsReader* reader) { - Status s; - CacheBlock* block = reader->block; - char* scratch = (char*)(block->data_block.data()); - Slice result; - uint64_t offset = block->block_idx * cache_->options_.block_size; - size_t n = cache_->options_.block_size; - block->s = dfs_file_->Read(offset, n, &result, scratch); - if (!block->s.ok()) { - Log("[%s] dfs read, %s" - ", offset %lu, size %lu, status %s, res %lu\n", - cache_->WorkPath().c_str(), block->ToString().c_str(), - offset, n, - block->s.ToString().c_str(), result.size()); - } - - block->mu.Lock(); - block->Clear(kCacheBlockDfsRead); - block->cv.SignalAll(); - block->mu.Unlock(); - } - - struct AsyncCacheReader { - FlashBlockCacheRandomAccessFile* file; - CacheBlock* block; - }; - // use use thread module to enhance sync io - static void AsyncCacheRead(void* arg) { - AsyncCacheReader* reader = (AsyncCacheReader*)arg; - reader->file->HandleCacheRead(reader); - delete reader; - } - void HandleCacheRead(AsyncCacheReader* reader) { - CacheBlock* block = reader->block; - block->s = cache_->ReadCache(block); - - block->mu.Lock(); - block->Clear(kCacheBlockCacheRead); - block->cv.SignalAll(); - block->mu.Unlock(); - //Log("[%s] async.cacheread signal, %s\n", cache_->WorkPath().c_str(), - //block->ToString().c_str()); - } - - struct AsyncCacheWriter { - FlashBlockCacheRandomAccessFile* file; - CacheBlock* block; - }; - static void AsyncCacheWrite(void* arg) { - AsyncCacheWriter* writer = (AsyncCacheWriter*)arg; - writer->file->HandleCacheWrite(writer); - delete writer; - } - void HandleCacheWrite(AsyncCacheWriter* writer) { - CacheBlock* block = writer->block; - //Log("[%s] cache fill, %s\n", - //cache_->WorkPath().c_str(), - //block->ToString().c_str()); - block->s = cache_->LogRecord(block); - if (block->s.ok()) { - block->s = cache_->FillCache(block); - } - - block->mu.Lock(); - block->Clear(kCacheBlockCacheFill); - block->cv.SignalAll(); - block->mu.Unlock(); - } - -private: - FlashBlockCacheImpl* cache_; - RandomAccessFile* dfs_file_; - std::string fname_; - uint64_t fid_; - uint64_t fsize_; - EnvOptions env_opt_; -}; - -// Must insure not init more than twice -Env* NewFlashBlockCacheEnv(Env* base) { - return new FlashBlockCacheEnv(base); -} - -FlashBlockCacheEnv::FlashBlockCacheEnv(Env* base) - : EnvWrapper(NewPosixEnv()), dfs_env_(base) { - //target()->SetBackgroundThreads(30); -} - -FlashBlockCacheEnv::~FlashBlockCacheEnv() {} - -Status FlashBlockCacheEnv::FileExists(const std::string& fname) { - return dfs_env_->FileExists(fname); -} - -Status FlashBlockCacheEnv::GetChildren(const std::string& path, - std::vector* result) { - return dfs_env_->GetChildren(path, result); -} - -Status FlashBlockCacheEnv::DeleteFile(const std::string& fname) { - if (fname.rfind(".sst") == fname.size() - 4) { - uint32_t hash = (Hash(fname.c_str(), fname.size(), 13)) % caches_.size(); - FlashBlockCacheImpl* cache = caches_[hash]; - cache->DeleteFile(fname); - } - return dfs_env_->DeleteFile(fname); -} - -Status FlashBlockCacheEnv::CreateDir(const std::string& name) { - return dfs_env_->CreateDir(name); -} - -Status FlashBlockCacheEnv::DeleteDir(const std::string& name) { - return dfs_env_->DeleteDir(name); -} - -Status FlashBlockCacheEnv::CopyFile(const std::string& from, - const std::string& to) { - return dfs_env_->CopyFile(from, to); -} - -Status FlashBlockCacheEnv::GetFileSize(const std::string& fname, uint64_t* size) { - return dfs_env_->GetFileSize(fname, size); -} - -Status FlashBlockCacheEnv::RenameFile(const std::string& src, const std::string& target) { - return dfs_env_->RenameFile(src, target); -} - -Status FlashBlockCacheEnv::LockFile(const std::string& fname, FileLock** lock) { - return dfs_env_->LockFile(fname, lock); -} - -Status FlashBlockCacheEnv::UnlockFile(FileLock* lock) { - return dfs_env_->UnlockFile(lock); -} - -Status FlashBlockCacheEnv::LoadCache(const FlashBlockCacheOptions& opts, const std::string& cache_dir) { - FlashBlockCacheOptions options = opts; - options.cache_dir = cache_dir; - options.env = dfs_env_; - options.cache_env = this->target(); - FlashBlockCacheImpl* cache = new FlashBlockCacheImpl(options); - Status s = cache->LoadCache(); - caches_.push_back(cache); // no need lock - return s; -} - -Status FlashBlockCacheEnv::NewSequentialFile(const std::string& fname, - SequentialFile** result) { - return dfs_env_->NewSequentialFile(fname, result); -} - -Status FlashBlockCacheEnv::NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options) { - if (fname.rfind(".sst") != fname.size() - 4) { - return dfs_env_->NewWritableFile(fname, result, options); - } - - // cache sst file - *result = nullptr; - Status s; - uint32_t hash = (Hash(fname.c_str(), fname.size(), 13)) % caches_.size(); - FlashBlockCacheImpl* cache = caches_[hash]; - FlashBlockCacheWritableFile* file = new FlashBlockCacheWritableFile(cache, fname, options, &s); - if (s.ok()) { - *result = (WritableFile*)file; - } else { - delete file; - file = nullptr; - *result = nullptr; - Log("[flash_block_cache %s] open file write fail: %s, hash: %u, status: %s\n", - cache->WorkPath().c_str(), fname.c_str(), hash, s.ToString().c_str()); - } - return s; -} - -Status FlashBlockCacheEnv::NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions&) { - // never use it - abort(); - return Status::OK(); -} - -Status FlashBlockCacheEnv::NewRandomAccessFile(const std::string& fname, - uint64_t fsize, - RandomAccessFile** result, - const EnvOptions& options) { - *result = nullptr; - Status s; - uint32_t hash = (Hash(fname.c_str(), fname.size(), 13)) % caches_.size(); - FlashBlockCacheImpl* cache = caches_[hash]; - FlashBlockCacheRandomAccessFile* file = new FlashBlockCacheRandomAccessFile(cache, fname, fsize, options, &s); - if (s.ok()) { - *result = (RandomAccessFile*)file; - } else { - delete file; - file = nullptr; - *result = nullptr; - Log("[flash_block_cache %s] open file read fail: %s, hash: %u, status: %s, fsize %lu\n", - cache->WorkPath().c_str(), fname.c_str(), hash, s.ToString().c_str(), fsize); - } - return s; -} - -} // namespace leveldb - diff --git a/src/leveldb/util/env_inmem.cc b/src/leveldb/util/env_inmem.cc index 5e63b6a4c..eb2a1e1af 100644 --- a/src/leveldb/util/env_inmem.cc +++ b/src/leveldb/util/env_inmem.cc @@ -20,271 +20,241 @@ #include "leveldb/table_utils.h" #include "util/mutexlock.h" #include "helpers/memenv/memenv.h" -#include "../common/counter.h" +#include "common/counter.h" #include "leveldb/env_inmem.h" - namespace leveldb { // Log error message -static Status IOError(const std::string& context, int err_number) -{ - return Status::IOError(context, strerror(err_number)); +static Status IOError(const std::string& context, int err_number) { + return Status::IOError(context, strerror(err_number)); } -class InMemorySequentialFile: public SequentialFile { -private: - SequentialFile* dfs_file_; - SequentialFile* mem_file_; -public: - InMemorySequentialFile(Env* mem_env, Env* dfs_env, const std::string& fname) - :dfs_file_(NULL), mem_file_(NULL) { - dfs_env->NewSequentialFile(fname, &dfs_file_); - } - - virtual ~InMemorySequentialFile() { - delete dfs_file_; - delete mem_file_; - } - - virtual Status Read(size_t n, Slice* result, char* scratch) { - if (mem_file_) { - return mem_file_->Read(n, result, scratch); - } - return dfs_file_->Read(n, result, scratch); +class InMemorySequentialFile : public SequentialFile { + private: + SequentialFile* dfs_file_; + SequentialFile* mem_file_; + + public: + InMemorySequentialFile(Env* mem_env, Env* dfs_env, const std::string& fname) + : dfs_file_(NULL), mem_file_(NULL) { + dfs_env->NewSequentialFile(fname, &dfs_file_); + } + + virtual ~InMemorySequentialFile() { + delete dfs_file_; + delete mem_file_; + } + + virtual Status Read(size_t n, Slice* result, char* scratch) { + if (mem_file_) { + return mem_file_->Read(n, result, scratch); } + return dfs_file_->Read(n, result, scratch); + } - virtual Status Skip(uint64_t n) { - if (mem_file_) { - return mem_file_->Skip(n); - } - return dfs_file_->Skip(n); - } - - bool isValid() { - return (dfs_file_ || mem_file_); + virtual Status Skip(uint64_t n) { + if (mem_file_) { + return mem_file_->Skip(n); } + return dfs_file_->Skip(n); + } + bool isValid() { return (dfs_file_ || mem_file_); } }; // A file abstraction for randomly reading the contents of a file. -class InMemoryRandomAccessFile :public RandomAccessFile{ -private: - RandomAccessFile* dfs_file_; - RandomAccessFile* mem_file_; - EnvOptions env_opt_; -public: - InMemoryRandomAccessFile(Env* mem_env, Env* dfs_env, const std::string& fname, - const EnvOptions& options) - :dfs_file_(NULL), mem_file_(NULL), env_opt_(options) { - Status s = mem_env->NewRandomAccessFile(fname, &mem_file_, env_opt_); - if (s.ok()) { - return; - } - mem_file_ = NULL; - s = dfs_env->NewRandomAccessFile(fname, &dfs_file_, env_opt_); - if (!s.ok()) { - return; - } - } - ~InMemoryRandomAccessFile() { - delete dfs_file_; - delete mem_file_; +class InMemoryRandomAccessFile : public RandomAccessFile { + private: + RandomAccessFile* dfs_file_; + RandomAccessFile* mem_file_; + EnvOptions env_opt_; + + public: + InMemoryRandomAccessFile(Env* mem_env, Env* dfs_env, const std::string& fname, + const EnvOptions& options) + : dfs_file_(NULL), mem_file_(NULL), env_opt_(options) { + Status s = mem_env->NewRandomAccessFile(fname, &mem_file_, env_opt_); + if (s.ok()) { + return; } - Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - if (mem_file_) { - return mem_file_->Read(offset, n, result, scratch); - } - return dfs_file_->Read(offset, n, result, scratch); + mem_file_ = NULL; + s = dfs_env->NewRandomAccessFile(fname, &dfs_file_, env_opt_); + if (!s.ok()) { + return; } - bool isValid() { - return (dfs_file_ || mem_file_); + } + ~InMemoryRandomAccessFile() { + delete dfs_file_; + delete mem_file_; + } + Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { + if (mem_file_) { + return mem_file_->Read(offset, n, result, scratch); } + return dfs_file_->Read(offset, n, result, scratch); + } + + bool isValid() { return (dfs_file_ || mem_file_); } }; // WritableFile -class InMemoryWritableFile: public WritableFile { -private: - WritableFile* dfs_file_; - WritableFile* mem_file_; -public: - InMemoryWritableFile(Env* mem_env, Env* dfs_env, const std::string& fname, const EnvOptions& options) - :dfs_file_(NULL), mem_file_(NULL) { - Status s = dfs_env->NewWritableFile(fname, &dfs_file_, options); - if (!s.ok()) { - return; - } - if (fname.rfind(".sst") != fname.size()-4) { - return; - } - s = mem_env->NewWritableFile(fname, &mem_file_, options); - assert(s.ok()); +class InMemoryWritableFile : public WritableFile { + private: + WritableFile* dfs_file_; + WritableFile* mem_file_; + + public: + InMemoryWritableFile(Env* mem_env, Env* dfs_env, const std::string& fname, + const EnvOptions& options) + : dfs_file_(NULL), mem_file_(NULL) { + Status s = dfs_env->NewWritableFile(fname, &dfs_file_, options); + if (!s.ok()) { + return; } - virtual ~InMemoryWritableFile() { - delete dfs_file_; - delete mem_file_; + if (fname.rfind(".sst") != fname.size() - 4) { + return; } - virtual Status Append(const Slice& data) { - Status s = dfs_file_->Append(data); - if (!s.ok()) { - return s; - } - if (mem_file_) { - s = mem_file_->Append(data); - assert(s.ok()); - } - return s; + s = mem_env->NewWritableFile(fname, &mem_file_, options); + assert(s.ok()); + } + virtual ~InMemoryWritableFile() { + delete dfs_file_; + delete mem_file_; + } + virtual Status Append(const Slice& data) { + Status s = dfs_file_->Append(data); + if (!s.ok()) { + return s; } - - bool isValid() { - return (dfs_file_ || mem_file_); + if (mem_file_) { + s = mem_file_->Append(data); + assert(s.ok()); } + return s; + } - virtual Status Flush() { - Status s = dfs_file_->Flush(); - if (!s.ok()) { - return s; - } - if (mem_file_) { - s = mem_file_->Flush(); - assert(s.ok()); - } - return s; + bool isValid() { return (dfs_file_ || mem_file_); } + + virtual Status Flush() { + Status s = dfs_file_->Flush(); + if (!s.ok()) { + return s; + } + if (mem_file_) { + s = mem_file_->Flush(); + assert(s.ok()); } + return s; + } - virtual Status Sync() { - Status s = dfs_file_->Sync(); - if (!s.ok()) { - return s; - } - if (mem_file_) { - s = mem_file_->Sync(); - assert(s.ok()); - } - return s; + virtual Status Sync() { + Status s = dfs_file_->Sync(); + if (!s.ok()) { + return s; + } + if (mem_file_) { + s = mem_file_->Sync(); + assert(s.ok()); } + return s; + } - virtual Status Close() { - if (mem_file_) { - Status s = mem_file_->Close(); - assert(s.ok()); - } - return dfs_file_->Close(); + virtual Status Close() { + if (mem_file_) { + Status s = mem_file_->Close(); + assert(s.ok()); } + return dfs_file_->Close(); + } }; -InMemoryEnv::InMemoryEnv(Env* base_env) : EnvWrapper(Env::Default()) -{ - dfs_env_ = base_env; - mem_env_ = NewMemEnv(dfs_env_); +InMemoryEnv::InMemoryEnv(Env* base_env) : EnvWrapper(Env::Default()) { + dfs_env_ = base_env; + mem_env_ = NewMemEnv(dfs_env_); } -InMemoryEnv::~InMemoryEnv() -{ - delete mem_env_; -} +InMemoryEnv::~InMemoryEnv() { delete mem_env_; } // SequentialFile -Status InMemoryEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) -{ - InMemorySequentialFile* f = new InMemorySequentialFile(mem_env_, dfs_env_, fname); - if (!f->isValid()) { - delete f; - *result = NULL; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status InMemoryEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) { + InMemorySequentialFile* f = new InMemorySequentialFile(mem_env_, dfs_env_, fname); + if (!f->isValid()) { + delete f; + *result = NULL; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } // random read file -Status InMemoryEnv::NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result, - const EnvOptions& options) -{ - InMemoryRandomAccessFile* f = new InMemoryRandomAccessFile(mem_env_, dfs_env_, fname, options); - if (f == NULL || !f->isValid()) { - *result = NULL; - delete f; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status InMemoryEnv::NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, + const EnvOptions& options) { + InMemoryRandomAccessFile* f = new InMemoryRandomAccessFile(mem_env_, dfs_env_, fname, options); + if (f == NULL || !f->isValid()) { + *result = NULL; + delete f; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } // writable -Status InMemoryEnv::NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions& options) -{ - Status s; - InMemoryWritableFile* f = new InMemoryWritableFile(mem_env_, dfs_env_, fname, options); - if (f == NULL || !f->isValid()) { - *result = NULL; - delete f; - return IOError(fname, errno); - } - *result = f; - return Status::OK(); +Status InMemoryEnv::NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions& options) { + Status s; + InMemoryWritableFile* f = new InMemoryWritableFile(mem_env_, dfs_env_, fname, options); + if (f == NULL || !f->isValid()) { + *result = NULL; + delete f; + return IOError(fname, errno); + } + *result = f; + return Status::OK(); } // FileExists -Status InMemoryEnv::FileExists(const std::string& fname) -{ - return dfs_env_->FileExists(fname); -} +Status InMemoryEnv::FileExists(const std::string& fname) { return dfs_env_->FileExists(fname); } // -Status InMemoryEnv::GetChildren(const std::string& path, std::vector* result) -{ - return dfs_env_->GetChildren(path, result); +Status InMemoryEnv::GetChildren(const std::string& path, std::vector* result) { + return dfs_env_->GetChildren(path, result); } -Status InMemoryEnv::DeleteFile(const std::string& fname) -{ - mem_env_->DeleteFile(fname); - return dfs_env_->DeleteFile(fname); +Status InMemoryEnv::DeleteFile(const std::string& fname) { + mem_env_->DeleteFile(fname); + return dfs_env_->DeleteFile(fname); } -Status InMemoryEnv::CreateDir(const std::string& name) -{ - mem_env_->CreateDir(name); - return dfs_env_->CreateDir(name); +Status InMemoryEnv::CreateDir(const std::string& name) { + mem_env_->CreateDir(name); + return dfs_env_->CreateDir(name); }; -Status InMemoryEnv::DeleteDir(const std::string& name) -{ - mem_env_->DeleteDir(name); - return dfs_env_->DeleteDir(name); +Status InMemoryEnv::DeleteDir(const std::string& name) { + mem_env_->DeleteDir(name); + return dfs_env_->DeleteDir(name); }; -Status InMemoryEnv::GetFileSize(const std::string& fname, uint64_t* size) -{ - return dfs_env_->GetFileSize(fname, size); +Status InMemoryEnv::GetFileSize(const std::string& fname, uint64_t* size) { + return dfs_env_->GetFileSize(fname, size); } /// -Status InMemoryEnv::RenameFile(const std::string& src, const std::string& target) -{ - mem_env_->RenameFile(src, target); - return dfs_env_->RenameFile(src, target); +Status InMemoryEnv::RenameFile(const std::string& src, const std::string& target) { + mem_env_->RenameFile(src, target); + return dfs_env_->RenameFile(src, target); } -Status InMemoryEnv::LockFile(const std::string& fname, FileLock** lock) -{ - return dfs_env_->LockFile(fname, lock); +Status InMemoryEnv::LockFile(const std::string& fname, FileLock** lock) { + return dfs_env_->LockFile(fname, lock); } -Status InMemoryEnv::UnlockFile(FileLock* lock) -{ - return dfs_env_->UnlockFile(lock); -} +Status InMemoryEnv::UnlockFile(FileLock* lock) { return dfs_env_->UnlockFile(lock); } -Env* NewInMemoryEnv(Env* base_env) -{ - return new InMemoryEnv(base_env); -} +Env* NewInMemoryEnv(Env* base_env) { return new InMemoryEnv(base_env); } } // namespace leveldb diff --git a/src/leveldb/util/env_mock.cc b/src/leveldb/util/env_mock.cc index bf1ab3dd7..49cee186e 100644 --- a/src/leveldb/util/env_mock.cc +++ b/src/leveldb/util/env_mock.cc @@ -38,49 +38,36 @@ namespace leveldb { static std::string mock_path_prefix = "/tmp/mock-env/"; -static std::string MockPath(const std::string& o) -{ - return mock_path_prefix + o; -} +static std::string MockPath(const std::string& o) { return mock_path_prefix + o; } -void MockEnv::SetPrefix(const std::string& p) -{ - mock_path_prefix = p + "/"; -} +void MockEnv::SetPrefix(const std::string& p) { mock_path_prefix = p + "/"; } // Log error message -static Status IOError(const std::string& context, int err_number) -{ - if (err_number == EACCES) { - return Status::IOPermissionDenied(context, strerror(err_number)); - } - return Status::IOError(context, strerror(err_number)); +static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } + return Status::IOError(context, strerror(err_number)); } +MockEnv::MockEnv() : EnvWrapper(Env::Default()) {} -MockEnv::MockEnv() : EnvWrapper(Env::Default()) -{ -} - -MockEnv::~MockEnv() -{ -} +MockEnv::~MockEnv() {} static bool (*SequentialFileRead)(int32_t i, char* scratch, size_t* mock_size); static int32_t iSequentialFileRead; -void MockEnv::SetSequentialFileReadCallback(bool (*p)(int32_t i, char* scratch, size_t* mock_size)) -{ - SequentialFileRead = p; +void MockEnv::SetSequentialFileReadCallback(bool (*p)(int32_t i, char* scratch, + size_t* mock_size)) { + SequentialFileRead = p; } -class MockSequentialFile: public SequentialFile { +class MockSequentialFile : public SequentialFile { private: std::string filename_; FILE* file_; public: - MockSequentialFile(const std::string& fname, FILE* f) - : filename_(fname), file_(f) { } + MockSequentialFile(const std::string& fname, FILE* f) : filename_(fname), file_(f) {} virtual ~MockSequentialFile() { fclose(file_); } virtual Status Read(size_t n, Slice* result, char* scratch) { @@ -123,8 +110,7 @@ class MockWritableFile : public WritableFile { FILE* file_; public: - MockWritableFile(const std::string& fname, FILE* f) - : filename_(fname), file_(f) { } + MockWritableFile(const std::string& fname, FILE* f) : filename_(fname), file_(f) {} ~MockWritableFile() { if (file_ != NULL) { @@ -190,8 +176,7 @@ class MockWritableFile : public WritableFile { if (!s.ok()) { return s; } - if (fflush_unlocked(file_) != 0 || - fdatasync(fileno(file_)) != 0) { + if (fflush_unlocked(file_) != 0 || fdatasync(fileno(file_)) != 0) { s = Status::IOError(filename_, strerror(errno)); } return s; @@ -199,18 +184,16 @@ class MockWritableFile : public WritableFile { }; // pread() based random-access -class MockRandomAccessFile: public RandomAccessFile { +class MockRandomAccessFile : public RandomAccessFile { private: std::string filename_; int fd_; public: - MockRandomAccessFile(const std::string& fname, int fd) - : filename_(fname), fd_(fd) { } + MockRandomAccessFile(const std::string& fname, int fd) : filename_(fname), fd_(fd) {} virtual ~MockRandomAccessFile() { close(fd_); } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { Status s; ssize_t r = pread(fd_, scratch, n, static_cast(offset)); *result = Slice(scratch, (r < 0) ? 0 : r); @@ -225,197 +208,177 @@ class MockRandomAccessFile: public RandomAccessFile { static bool (*NewSequentialFileFailed)(int32_t i, const std::string& fname); static int32_t iNewSequentialFile; -void MockEnv::SetNewSequentialFileFailedCallback(bool (*p)(int32_t i, const std::string& fname)) -{ - NewSequentialFileFailed = p; +void MockEnv::SetNewSequentialFileFailedCallback(bool (*p)(int32_t i, const std::string& fname)) { + NewSequentialFileFailed = p; } // SequentialFile -Status MockEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) -{ - iNewSequentialFile++; - - FILE* f = fopen(MockPath(fname).c_str(), "r"); - if (f == NULL) { - *result = NULL; - return IOError(MockPath(fname), errno); +Status MockEnv::NewSequentialFile(const std::string& fname, SequentialFile** result) { + iNewSequentialFile++; + + FILE* f = fopen(MockPath(fname).c_str(), "r"); + if (f == NULL) { + *result = NULL; + return IOError(MockPath(fname), errno); + } else { + if (NewSequentialFileFailed && NewSequentialFileFailed(iNewSequentialFile, fname)) { + std::cerr << "[mockenv] NewSequentialFile failed" << std::endl; + fclose(f); + return Status::IOError("open failed: " + fname); } else { - if (NewSequentialFileFailed && NewSequentialFileFailed(iNewSequentialFile, fname)) { - std::cerr << "[mockenv] NewSequentialFile failed" << std::endl; - fclose(f); - return Status::IOError("open failed: " + fname); - } else { - *result = new MockSequentialFile(MockPath(fname), f); - return Status::OK(); - } + *result = new MockSequentialFile(MockPath(fname), f); + return Status::OK(); } + } } // random read file Status MockEnv::NewRandomAccessFile(const std::string& fname, RandomAccessFile** result, - const EnvOptions&) -{ - *result = NULL; - Status s; - int fd = open(MockPath(fname).c_str(), O_RDONLY); - if (fd < 0) { - s = IOError(MockPath(fname), errno); - } else { - *result = new MockRandomAccessFile(MockPath(fname), fd); - } - return s; + const EnvOptions&) { + *result = NULL; + Status s; + int fd = open(MockPath(fname).c_str(), O_RDONLY); + if (fd < 0) { + s = IOError(MockPath(fname), errno); + } else { + *result = new MockRandomAccessFile(MockPath(fname), fd); + } + return s; } // writable -Status MockEnv::NewWritableFile(const std::string& fname, - WritableFile** result, - const EnvOptions&) -{ - Status s; - FILE* f = fopen(MockPath(fname).c_str(), "w"); - if (f == NULL) { - *result = NULL; - s = IOError(MockPath(fname), errno); - } else { - *result = new MockWritableFile(MockPath(fname), f); - } - return s; +Status MockEnv::NewWritableFile(const std::string& fname, WritableFile** result, + const EnvOptions&) { + Status s; + FILE* f = fopen(MockPath(fname).c_str(), "w"); + if (f == NULL) { + *result = NULL; + s = IOError(MockPath(fname), errno); + } else { + *result = new MockWritableFile(MockPath(fname), f); + } + return s; } // FileExists -Status MockEnv::FileExists(const std::string& fname) -{ - int32_t retval = access(MockPath(fname).c_str(), F_OK); - if (retval == 0) { - return Status::OK(); - } else if (errno == ENOENT) { - return Status::NotFound("filestatus", MockPath(fname)); - } else { - return Status::IOError(MockPath(fname)); - } +Status MockEnv::FileExists(const std::string& fname) { + int32_t retval = access(MockPath(fname).c_str(), F_OK); + if (retval == 0) { + return Status::OK(); + } else if (errno == ENOENT) { + return Status::NotFound("filestatus", MockPath(fname)); + } else { + return Status::IOError(MockPath(fname)); + } } Status MockEnv::CopyFile(const std::string& from, const std::string& to) { - abort(); - return Status::OK(); + abort(); + return Status::OK(); } static bool (*GetChildrenDrop)(int32_t i, const std::string& fname); static int32_t iGetChildren; -void MockEnv::SetGetChildrenCallback(bool (*p)(int32_t i, const std::string& fname)) -{ - GetChildrenDrop = p; +void MockEnv::SetGetChildrenCallback(bool (*p)(int32_t i, const std::string& fname)) { + GetChildrenDrop = p; } -Status MockEnv::GetChildren(const std::string& dir, std::vector* result) -{ - iGetChildren++; - - result->clear(); - DIR* d = opendir(MockPath(dir).c_str()); - if (d == NULL) { - abort(); - return IOError(MockPath(dir), errno); +Status MockEnv::GetChildren(const std::string& dir, std::vector* result) { + iGetChildren++; + + result->clear(); + DIR* d = opendir(MockPath(dir).c_str()); + if (d == NULL) { + abort(); + return IOError(MockPath(dir), errno); + } + struct dirent* entry; + while ((entry = readdir(d)) != NULL) { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { + continue; } - struct dirent* entry; - while ((entry = readdir(d)) != NULL) { - if (strcmp(entry->d_name, ".") == 0 || - strcmp(entry->d_name, "..") == 0) { - continue; - } - if (GetChildrenDrop && GetChildrenDrop(iGetChildren, entry->d_name)) { - std::cerr << "[mockenv] GetChildren drop:" << entry->d_name << std::endl; - } else { - result->push_back(entry->d_name); - } + if (GetChildrenDrop && GetChildrenDrop(iGetChildren, entry->d_name)) { + std::cerr << "[mockenv] GetChildren drop:" << entry->d_name << std::endl; + } else { + result->push_back(entry->d_name); } - closedir(d); - return Status::OK(); + } + closedir(d); + return Status::OK(); } -Status MockEnv::DeleteFile(const std::string& fname) -{ - Status result; - if (unlink(MockPath(fname).c_str()) != 0) { - result = IOError(MockPath(fname), errno); - } - return result; +Status MockEnv::DeleteFile(const std::string& fname) { + Status result; + if (unlink(MockPath(fname).c_str()) != 0) { + result = IOError(MockPath(fname), errno); + } + return result; }; -Status MockEnv::CreateDir(const std::string& name) -{ - Status result; - std::vector items; - SplitString(MockPath(name), "/", &items); - std::string path; - if (MockPath(name)[0] == '/') { - path = "/"; - } - for (uint32_t i = 0; i < items.size() && result.ok(); ++i) { - path += items[i]; - if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) { - result = IOError(path, errno); - } - path += "/"; +Status MockEnv::CreateDir(const std::string& name) { + Status result; + std::vector items; + SplitString(MockPath(name), "/", &items); + std::string path; + if (MockPath(name)[0] == '/') { + path = "/"; + } + for (uint32_t i = 0; i < items.size() && result.ok(); ++i) { + path += items[i]; + if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) { + result = IOError(path, errno); } - return result; + path += "/"; + } + return result; }; -Status MockEnv::DeleteDir(const std::string& name) -{ - Status result; - if (rmdir(MockPath(name).c_str()) != 0) { - result = IOError(MockPath(name), errno); - } - return result; +Status MockEnv::DeleteDir(const std::string& name) { + Status result; + if (rmdir(MockPath(name).c_str()) != 0) { + result = IOError(MockPath(name), errno); + } + return result; }; -Status MockEnv::GetFileSize(const std::string& fname, uint64_t* size) -{ - Status s; - struct stat sbuf; - if (stat(MockPath(fname).c_str(), &sbuf) != 0) { - *size = 0; - s = IOError(MockPath(fname), errno); - } else { - *size = sbuf.st_size; - } - return s; +Status MockEnv::GetFileSize(const std::string& fname, uint64_t* size) { + Status s; + struct stat sbuf; + if (stat(MockPath(fname).c_str(), &sbuf) != 0) { + *size = 0; + s = IOError(MockPath(fname), errno); + } else { + *size = sbuf.st_size; + } + return s; } /// -Status MockEnv::RenameFile(const std::string& src, const std::string& target) -{ - Status result; - if (rename(MockPath(src).c_str(), MockPath(target).c_str()) != 0) { - result = IOError(MockPath(src), errno); - } - return result; +Status MockEnv::RenameFile(const std::string& src, const std::string& target) { + Status result; + if (rename(MockPath(src).c_str(), MockPath(target).c_str()) != 0) { + result = IOError(MockPath(src), errno); + } + return result; } -Status MockEnv::LockFile(const std::string& fname, FileLock** lock) -{ - *lock = NULL; - return Status::OK(); +Status MockEnv::LockFile(const std::string& fname, FileLock** lock) { + *lock = NULL; + return Status::OK(); } -Status MockEnv::UnlockFile(FileLock* lock) -{ - return Status::OK(); -} +Status MockEnv::UnlockFile(FileLock* lock) { return Status::OK(); } -void MockEnv::ResetMock() -{ - iGetChildren = 0; - GetChildrenDrop = NULL; +void MockEnv::ResetMock() { + iGetChildren = 0; + GetChildrenDrop = NULL; - iNewSequentialFile = 0; - NewSequentialFileFailed = NULL; + iNewSequentialFile = 0; + NewSequentialFileFailed = NULL; - iSequentialFileRead = 0; - SequentialFileRead = NULL; + iSequentialFileRead = 0; + SequentialFileRead = NULL; } - static pthread_once_t g_once = PTHREAD_ONCE_INIT; static Env* g_mock_env; static void InitMockEnv() { g_mock_env = new MockEnv; } diff --git a/src/leveldb/util/env_posix.cc b/src/leveldb/util/env_posix.cc index f31113b6d..0b6d9b123 100644 --- a/src/leveldb/util/env_posix.cc +++ b/src/leveldb/util/env_posix.cc @@ -31,7 +31,7 @@ #include "util/posix_logger.h" #include "util/string_ext.h" #include "util/thread_pool.h" -#include "../common/counter.h" +#include "common/counter.h" #include "util/env_posix.h" namespace leveldb { @@ -70,8 +70,7 @@ size_t GetLogicalBufferSize(int fd) { const int kBufferSize = 100; char path[kBufferSize]; char real_path[PATH_MAX + 1]; - snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev), - minor(buf.st_dev)); + snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev), minor(buf.st_dev)); if (realpath(path, real_path) == nullptr) { return kDefaultPageSize; } @@ -91,8 +90,11 @@ size_t GetLogicalBufferSize(int fd) { if (parent_begin == std::string::npos) { return kDefaultPageSize; } - if (device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1) != - "block") { + + // This operates will fail on nvme ssd due to different queue path. + // However, this is not a bug, but a feature. Because on nvme ssd, it's better to + // read blocks with 4k Alignment(aka: kDefaultPageSize). + if (device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1) != "block") { device_dir = device_dir.substr(0, parent_end); } std::string fname = device_dir + "/queue/logical_block_size"; @@ -113,7 +115,7 @@ size_t GetLogicalBufferSize(int fd) { } return kDefaultPageSize; } -} // namespace +} // namespace namespace { @@ -124,14 +126,13 @@ static Status IOError(const std::string& context, int err_number) { return Status::IOError(context, strerror(err_number)); } -class PosixSequentialFile: public SequentialFile { +class PosixSequentialFile : public SequentialFile { private: std::string filename_; FILE* file_; public: - PosixSequentialFile(const std::string& fname, FILE* f) - : filename_(fname), file_(f) { } + PosixSequentialFile(const std::string& fname, FILE* f) : filename_(fname), file_(f) {} virtual ~PosixSequentialFile() { fclose(file_); } virtual Status Read(size_t n, Slice* result, char* scratch) { @@ -162,7 +163,7 @@ class PosixSequentialFile: public SequentialFile { }; // pread() based random-access -class PosixRandomAccessFile: public RandomAccessFile { +class PosixRandomAccessFile : public RandomAccessFile { private: std::string filename_; int fd_; @@ -174,12 +175,11 @@ class PosixRandomAccessFile: public RandomAccessFile { : filename_(fname), fd_(fd), env_opt_(options), - logical_sector_size_(GetLogicalBufferSize(fd_)) { } + logical_sector_size_(GetLogicalBufferSize(fd_)) {} virtual ~PosixRandomAccessFile() { close(fd_); } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { posix_read_counter.Inc(); Status s; ssize_t r = pread(fd_, scratch, n, static_cast(offset)); @@ -191,15 +191,14 @@ class PosixRandomAccessFile: public RandomAccessFile { posix_read_size_counter.Add(r); } if (!env_opt_.use_direct_io_read) { - posix_fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); + posix_fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); } return s; } - virtual size_t GetRequiredBufferAlignment() const { - return logical_sector_size_; - } + virtual size_t GetRequiredBufferAlignment() const { return logical_sector_size_; } + std::string GetFileName() const override { return filename_; } }; // Helper class to limit mmap file usage so that we do not end up @@ -208,12 +207,12 @@ class PosixRandomAccessFile: public RandomAccessFile { class MmapLimiter { public: MmapLimiter() { - //Disable mmap in tera for reducing memory use. + // Disable mmap in tera for reducing memory use. SetAllowed(0); // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes. - //SetAllowed(sizeof(void*) >= 8 ? 1000 : 0); - //If you want to enable mmap, uncomment the line above. + // SetAllowed(sizeof(void*) >= 8 ? 1000 : 0); + // If you want to enable mmap, uncomment the line above. } // If another mmap slot is available, acquire it and return true. @@ -242,21 +241,17 @@ class MmapLimiter { port::Mutex mu_; port::AtomicPointer allowed_; - intptr_t GetAllowed() const { - return reinterpret_cast(allowed_.Acquire_Load()); - } + intptr_t GetAllowed() const { return reinterpret_cast(allowed_.Acquire_Load()); } // REQUIRES: mu_ must be held - void SetAllowed(intptr_t v) { - allowed_.Release_Store(reinterpret_cast(v)); - } + void SetAllowed(intptr_t v) { allowed_.Release_Store(reinterpret_cast(v)); } MmapLimiter(const MmapLimiter&); void operator=(const MmapLimiter&); }; // mmap() based random-access -class PosixMmapReadableFile: public RandomAccessFile { +class PosixMmapReadableFile : public RandomAccessFile { private: std::string filename_; void* mmapped_region_; @@ -265,19 +260,15 @@ class PosixMmapReadableFile: public RandomAccessFile { public: // base[0,length-1] contains the mmapped contents of the file. - PosixMmapReadableFile(const std::string& fname, void* base, size_t length, - MmapLimiter* limiter) - : filename_(fname), mmapped_region_(base), length_(length), - limiter_(limiter) { - } + PosixMmapReadableFile(const std::string& fname, void* base, size_t length, MmapLimiter* limiter) + : filename_(fname), mmapped_region_(base), length_(length), limiter_(limiter) {} virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); limiter_->Release(); } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { posix_read_counter.Inc(); Status s; if (offset + n > length_) { @@ -289,6 +280,8 @@ class PosixMmapReadableFile: public RandomAccessFile { } return s; } + + std::string GetFileName() const override { return filename_; } }; // We preallocate up to an extra megabyte and use memcpy to append new @@ -311,9 +304,7 @@ class PosixMmapFile : public WritableFile { bool pending_sync_; // Roundup x to a multiple of y - static size_t Roundup(size_t x, size_t y) { - return ((x + y - 1) / y) * y; - } + static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; } size_t TruncateToPageBoundary(size_t s) { s -= (s & (page_size_ - 1)); @@ -338,7 +329,7 @@ class PosixMmapFile : public WritableFile { dst_ = NULL; // Increase the amount we map the next time, but capped at 1MB - if (map_size_ < (1<<20)) { + if (map_size_ < (1 << 20)) { map_size_ *= 2; } } @@ -350,8 +341,7 @@ class PosixMmapFile : public WritableFile { if (ftruncate(fd_, file_offset_ + map_size_) < 0) { return false; } - void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_, file_offset_); + void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, file_offset_); if (ptr == MAP_FAILED) { return false; } @@ -377,7 +367,6 @@ class PosixMmapFile : public WritableFile { assert((page_size & (page_size - 1)) == 0); } - ~PosixMmapFile() { if (fd_ >= 0) { PosixMmapFile::Close(); @@ -393,8 +382,7 @@ class PosixMmapFile : public WritableFile { assert(dst_ <= limit_); size_t avail = limit_ - dst_; if (avail == 0) { - if (!UnmapCurrentRegion() || - !MapNewRegion()) { + if (!UnmapCurrentRegion() || !MapNewRegion()) { return IOError(filename_, errno); } } @@ -434,9 +422,7 @@ class PosixMmapFile : public WritableFile { return s; } - virtual Status Flush() { - return Status::OK(); - } + virtual Status Flush() { return Status::OK(); } virtual Status Sync() { posix_sync_counter.Inc(); @@ -462,9 +448,9 @@ class PosixMmapFile : public WritableFile { } return s; } -}; - + std::string GetFileName() const override { return filename_; } +}; static int LockOrUnlock(int fd, bool lock) { errno = 0; @@ -473,7 +459,7 @@ static int LockOrUnlock(int fd, bool lock) { f.l_type = (lock ? F_WRLCK : F_UNLCK); f.l_whence = SEEK_SET; f.l_start = 0; - f.l_len = 0; // Lock/unlock entire file + f.l_len = 0; // Lock/unlock entire file return fcntl(fd, F_SETLK, &f); } @@ -490,6 +476,7 @@ class PosixLockTable { private: port::Mutex mu_; std::set locked_files_; + public: bool Insert(const std::string& fname) { MutexLock l(&mu_); @@ -505,13 +492,12 @@ class PosixEnv : public Env { public: PosixEnv(); virtual ~PosixEnv() { - for (size_t i=0; id_name, ".") == 0 || - strcmp(entry->d_name, "..") == 0) { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { continue; } result->push_back(entry->d_name); @@ -627,14 +609,14 @@ class PosixEnv : public Env { SplitString(name, "/", &items); std::string path; if (name[0] == '/') { - path = "/"; + path = "/"; } for (uint32_t i = 0; i < items.size() && result.ok(); ++i) { - path += items[i]; - if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) { - result = IOError(path, errno); - } - path += "/"; + path += items[i]; + if (mkdir(path.c_str(), 0755) != 0 && errno != EEXIST) { + result = IOError(path, errno); + } + path += "/"; } return result; @@ -648,18 +630,18 @@ class PosixEnv : public Env { if (!s.ok()) { return s; } - for (size_t i=0; i(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; } - virtual void SleepForMicroseconds(int micros) { - usleep(micros); - } + virtual void SleepForMicroseconds(int micros) { usleep(micros); } // Allow increasing the number of worker threads. virtual int SetBackgroundThreads(int num) { @@ -820,6 +795,33 @@ class PosixEnv : public Env { return thread_pool_.GetThreadNumber(); } + virtual Status GetFileType(const std::string& path, SystemFileType* type) { + struct stat st; + if (stat(path.c_str(), &st) == -1) { + return IOError("Get file type failed.", errno); + } + if (S_ISDIR(st.st_mode)) { + *type = SystemFileType::kDir; + } else if (S_ISREG(st.st_mode)) { + *type = SystemFileType::kRegularFile; + } else { + *type = SystemFileType::kOthers; + } + return Status::OK(); + }; + + virtual Status IsSamePath(const std::string& path1, const std::string& path2, bool* same) { + struct stat st1, st2; + if (stat(path1.c_str(), &st1) == -1) { + return IOError("Get path1 file type failed.", errno); + } + if (stat(path2.c_str(), &st2) == -1) { + return IOError("Get path2 file type failed.", errno); + } + *same = (st1.st_ino == st2.st_ino); + return Status::OK(); + }; + private: static void PthreadCall(const char* label, int result) { if (result != 0) { @@ -845,7 +847,6 @@ class PosixEnv : public Env { pthread_mutex_t mu_; std::vector threads_to_join_; - }; Logger* PosixEnv::info_log_ = NULL; @@ -864,10 +865,10 @@ void PosixEnv::ReSchedule(int64_t id, double prio, int64_t millisec = -1) { } namespace { - struct StartThreadState { - void (*user_function)(void*); - void* arg; - }; +struct StartThreadState { + void (*user_function)(void*); + void* arg; +}; } static void* StartThreadWrapper(void* arg) { StartThreadState* state = reinterpret_cast(arg); @@ -881,8 +882,7 @@ void PosixEnv::StartThread(void (*function)(void* arg), void* arg) { StartThreadState* state = new StartThreadState; state->user_function = function; state->arg = arg; - PthreadCall("start thread", - pthread_create(&t, NULL, &StartThreadWrapper, state)); + PthreadCall("start thread", pthread_create(&t, NULL, &StartThreadWrapper, state)); PthreadCall("lock", pthread_mutex_lock(&mu_)); threads_to_join_.push_back(t); PthreadCall("unlock", pthread_mutex_unlock(&mu_)); @@ -899,27 +899,25 @@ Env* Env::Default() { return default_env; } -Env* NewPosixEnv() { - return new PosixEnv; -} +Env* NewPosixEnv() { return new PosixEnv; } -PosixWritableFile::PosixWritableFile(const std::string& fname, - int fd, - const EnvOptions& options) - : filename_(fname), - fd_(fd), - pos_(0), - is_dio_(options.use_direct_io_write), - buffer_size_(options.posix_write_buffer_size), - align_size_(GetLogicalBufferSize(fd_)) { - //Buffer size should never be set to zero +PosixWritableFile::PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options) + : filename_(fname), + fd_(fd), + pos_(0), + is_dio_(options.use_direct_io_write), + buffer_size_(options.posix_write_buffer_size), + align_size_(GetLogicalBufferSize(fd_)) { + // Buffer size should never be set to zero assert(buffer_size_ > 0); if (is_dio_) { - //See format.cc:76 DirectIOAlign() about this code. - buffer_size_ = buffer_size_ % align_size_ == 0 ? buffer_size_ : - (buffer_size_ + align_size_ - 1) & (~(align_size_ - 1)); + // See format.cc:76 DirectIOAlign() about this code. + buffer_size_ = buffer_size_ % align_size_ == 0 + ? buffer_size_ + : (buffer_size_ + align_size_ - 1) & (~(align_size_ - 1)); buf_ = (char*)memalign(align_size_, buffer_size_); - //fprintf(stderr, "Dio: %s, Aligned Buffer Size: %lu\n", filename_.c_str(), buffer_size_); + // fprintf(stderr, "Dio: %s, Aligned Buffer Size: %lu\n", filename_.c_str(), + // buffer_size_); } else { buf_ = (char*)malloc(buffer_size_); } @@ -955,7 +953,8 @@ Status PosixWritableFile::Append(const Slice& data) { return s; } - // In DIO: Small writes go to buffer, large writes are appended again for aligned write. + // In DIO: Small writes go to buffer, large writes are appended again for + // aligned write. // Not In DIO: Small writes go to buffer, large writes are written directly. if (n < buffer_size_) { memcpy(buf_, p, n); @@ -973,6 +972,7 @@ Status PosixWritableFile::Append(const Slice& data) { Status PosixWritableFile::Close() { posix_close_counter.Inc(); Status result = FlushBuffered(); + result = Sync(); const int r = close(fd_); if (r < 0 && result.ok()) { result = IOError(filename_, errno); @@ -981,9 +981,7 @@ Status PosixWritableFile::Close() { return result; } -Status PosixWritableFile::Flush() { - return FlushBuffered(); -} +Status PosixWritableFile::Flush() { return FlushBuffered(); } Status PosixWritableFile::SyncDirIfManifest() { const char* f = filename_.c_str(); @@ -1030,11 +1028,11 @@ Status PosixWritableFile::Sync() { Status PosixWritableFile::LeaveDio() { int flags = fcntl(fd_, F_GETFL, 0); - if(flags < 0) { + if (flags < 0) { return IOError(filename_, errno); } flags &= ~O_DIRECT; - if(fcntl(fd_, F_SETFL, flags) < 0) { + if (fcntl(fd_, F_SETFL, flags) < 0) { return IOError(filename_, errno); } is_dio_ = false; @@ -1043,17 +1041,19 @@ Status PosixWritableFile::LeaveDio() { Status PosixWritableFile::FlushBuffered() { Status s; - // Once flushed with not aligned buffer_size_, + // Once flushed with not aligned buffer_size_, // we flush the last aligned buffer, and leave // dio mode, and never enter again. - if (is_dio_ && - (pos_ & (align_size_ - 1)) != 0) { - // For example, assume buffer_size_ is 524288 (aka 512kB), and align_size is 512 bytes. - // When user write 400000 bytes to buffer and call Flush(). We do followed steps: - // 1. get the maximum aligned size for a dio write by caculationg 400000 & (~(512 -1)) = 399872. + if (is_dio_ && (pos_ & (align_size_ - 1)) != 0) { + // For example, assume buffer_size_ is 524288 (aka 512kB), and align_size is + // 512 bytes. + // When user write 400000 bytes to buffer and call Flush(). We do followed + // steps: + // 1. get the maximum aligned size for a dio write by caculationg 400000 & + // (~(512 -1)) = 399872. size_t aligned_pos = pos_ & (~(align_size_ - 1)); if (aligned_pos != 0) { - // 2. Write this 399872 bytes to file in dio mode. + // 2. Write this 399872 bytes to file in dio mode. s = WriteRaw(buf_, aligned_pos); } // 3. Leave Dio Mode (never enter again). @@ -1086,5 +1086,4 @@ Status PosixWritableFile::WriteRaw(const char* p, size_t n) { } return Status::OK(); } - } // namespace leveldb diff --git a/src/leveldb/util/env_posix.h b/src/leveldb/util/env_posix.h index e4a66276f..b1ca4d823 100644 --- a/src/leveldb/util/env_posix.h +++ b/src/leveldb/util/env_posix.h @@ -17,7 +17,7 @@ size_t GetLogicalBufferSize(int fd); } class PosixWritableFile : public WritableFile { -private: + private: // buf_[0, pos_-1] contains data to be written to fd_. std::string filename_; int fd_; @@ -27,10 +27,8 @@ class PosixWritableFile : public WritableFile { size_t align_size_; char* buf_ = NULL; -public: - PosixWritableFile(const std::string& fname, - int fd, - const EnvOptions& options); + public: + PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options); virtual ~PosixWritableFile(); @@ -46,9 +44,11 @@ class PosixWritableFile : public WritableFile { Status LeaveDio(); -private: + private: Status FlushBuffered(); Status WriteRaw(const char* p, size_t n); + + std::string GetFileName() const override { return filename_; } }; } \ No newline at end of file diff --git a/src/leveldb/util/env_test.cc b/src/leveldb/util/env_test.cc index b583dc927..88ba256db 100644 --- a/src/leveldb/util/env_test.cc +++ b/src/leveldb/util/env_test.cc @@ -6,135 +6,132 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/env_flash_block_cache.h" - #include "leveldb/env.h" #include "leveldb/slog.h" #include "port/port.h" #include "util/string_ext.h" #include "util/testharness.h" #include "util/testutil.h" + +#define private public #include "util/env_posix.h" +#undef private namespace leveldb { static const int kDelayMicros = 100000; class PosixWritableFileTest { -public: - Env* env_; - PosixWritableFile* file_ = nullptr; - const std::string kTmpFileName = "/tmp/posix_writable_test_file"; + public: + Env* env_; + PosixWritableFile* file_ = nullptr; + const std::string kTmpFileName = "/tmp/posix_writable_test_file"; -public: - PosixWritableFileTest() : env_(Env::Default()) { - EnvOptions env_opt; - env_opt.use_direct_io_write = true; - env_opt.posix_write_buffer_size = 256<<10; - WritableFile* file; - env_->NewWritableFile(kTmpFileName, &file, env_opt); - if (file) { - file_ = dynamic_cast(file); - } + public: + PosixWritableFileTest() : env_(Env::Default()) { + EnvOptions env_opt; + env_opt.use_direct_io_write = true; + env_opt.posix_write_buffer_size = 256 << 10; + WritableFile* file; + env_->NewWritableFile(kTmpFileName, &file, env_opt); + if (file) { + file_ = dynamic_cast(file); } + } - ~PosixWritableFileTest() { - env_->DeleteFile(kTmpFileName); - } + ~PosixWritableFileTest() { env_->DeleteFile(kTmpFileName); } }; TEST(PosixWritableFileTest, PosixWriteTest) { - std::string content1((1 << 20) + 50, 'k'); - file_->Append(content1); - uint64_t size(0); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, 1 << 20); - ASSERT_EQ(file_->pos_, 50); - ASSERT_TRUE(file_->is_dio_); - - std::string content2((1 << 20) + 80, 't'); - file_->Append(content2); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, 1 << 21); - ASSERT_EQ(file_->pos_, 130); - ASSERT_TRUE(file_->is_dio_); - - std::string content3(file_->align_size_ - 130, 'p'); - file_->Append(content3); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, 1 << 21); - ASSERT_EQ(file_->pos_, file_->align_size_); - ASSERT_TRUE(file_->is_dio_); - - file_->Flush(); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, (1 << 21) + file_->align_size_); - ASSERT_EQ(file_->pos_, 0); - ASSERT_TRUE(file_->is_dio_); - - std::string content4(30, 'p'); - file_->Append(content4); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, (1 << 21) + file_->align_size_); - ASSERT_EQ(file_->pos_, 30); - ASSERT_TRUE(file_->is_dio_); - - file_->Flush(); - env_->GetFileSize(kTmpFileName, &size); - ASSERT_EQ(size, (1 << 21) + file_->align_size_ + 30); - ASSERT_EQ(file_->pos_, 0); - ASSERT_TRUE(!file_->is_dio_); - - ASSERT_TRUE(file_->Close().ok()); - RandomAccessFile* rfile; - EnvOptions env_opt; - env_opt.use_direct_io_read = true; - env_->NewRandomAccessFile(kTmpFileName, &rfile, env_opt); - Slice result; - char* buf = new char[30 << 20]; - rfile->Read(0, 30 << 20, &result, buf); - ASSERT_EQ(content1.size() + content2.size() - + content3.size() + content4.size(), result.size()); - ASSERT_EQ(content1 + content2 + content3 + content4, result.ToString()); - - delete [] buf; + std::string content1((1 << 20) + 50, 'k'); + file_->Append(content1); + uint64_t size(0); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, 1 << 20); + ASSERT_EQ(file_->pos_, 50); + ASSERT_TRUE(file_->is_dio_); + + std::string content2((1 << 20) + 80, 't'); + file_->Append(content2); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, 1 << 21); + ASSERT_EQ(file_->pos_, 130); + ASSERT_TRUE(file_->is_dio_); + + std::string content3(file_->align_size_ - 130, 'p'); + file_->Append(content3); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, 1 << 21); + ASSERT_EQ(file_->pos_, file_->align_size_); + ASSERT_TRUE(file_->is_dio_); + + file_->Flush(); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, (1 << 21) + file_->align_size_); + ASSERT_EQ(file_->pos_, 0); + ASSERT_TRUE(file_->is_dio_); + + std::string content4(30, 'p'); + file_->Append(content4); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, (1 << 21) + file_->align_size_); + ASSERT_EQ(file_->pos_, 30); + ASSERT_TRUE(file_->is_dio_); + + file_->Flush(); + env_->GetFileSize(kTmpFileName, &size); + ASSERT_EQ(size, (1 << 21) + file_->align_size_ + 30); + ASSERT_EQ(file_->pos_, 0); + ASSERT_TRUE(!file_->is_dio_); + + ASSERT_TRUE(file_->Close().ok()); + RandomAccessFile* rfile; + EnvOptions env_opt; + env_opt.use_direct_io_read = true; + env_->NewRandomAccessFile(kTmpFileName, &rfile, env_opt); + Slice result; + char* buf = new char[30 << 20]; + rfile->Read(0, 30 << 20, &result, buf); + ASSERT_EQ(content1.size() + content2.size() + content3.size() + content4.size(), result.size()); + ASSERT_EQ(content1 + content2 + content3 + content4, result.ToString()); + + delete[] buf; } class EnvPosixTest { -private: - port::Mutex mu_; - std::string events_; + private: + port::Mutex mu_; + std::string events_; -public: - Env* env_; - EnvPosixTest() : env_(Env::Default()) { } + public: + Env* env_; + EnvPosixTest() : env_(Env::Default()) {} }; static void SetBool(void* ptr) { - reinterpret_cast(ptr)->NoBarrier_Store(ptr); + reinterpret_cast(ptr)->NoBarrier_Store(ptr); } TEST(EnvPosixTest, RunImmediately) { - port::AtomicPointer called (NULL); + port::AtomicPointer called(NULL); env_->Schedule(&SetBool, &called); Env::Default()->SleepForMicroseconds(kDelayMicros); ASSERT_TRUE(called.NoBarrier_Load() != NULL); } TEST(EnvPosixTest, RunMany) { - port::AtomicPointer last_id (NULL); + port::AtomicPointer last_id(NULL); struct CB { - port::AtomicPointer* last_id_ptr; // Pointer to shared slot - uintptr_t id; // Order# for the execution of this callback + port::AtomicPointer* last_id_ptr; // Pointer to shared slot + uintptr_t id; // Order# for the execution of this callback - CB(port::AtomicPointer* p, int i) : last_id_ptr(p), id(i) { } + CB(port::AtomicPointer* p, int i) : last_id_ptr(p), id(i) {} static void Run(void* v) { CB* cb = reinterpret_cast(v); void* cur = cb->last_id_ptr->NoBarrier_Load(); - ASSERT_EQ(cb->id-1, reinterpret_cast(cur)); + ASSERT_EQ(cb->id - 1, reinterpret_cast(cur)); cb->last_id_ptr->Release_Store(reinterpret_cast(cb->id)); } }; @@ -158,140 +155,47 @@ TEST(EnvPosixTest, RunMany) { } struct State { - port::Mutex mu; - int val; - int num_running; + port::Mutex mu; + int val; + int num_running; }; static void ThreadBody(void* arg) { - State* s = reinterpret_cast(arg); - s->mu.Lock(); - s->val += 1; - s->num_running -= 1; - s->mu.Unlock(); + State* s = reinterpret_cast(arg); + s->mu.Lock(); + s->val += 1; + s->num_running -= 1; + s->mu.Unlock(); } TEST(EnvPosixTest, StartThread) { - State state; - state.val = 0; - state.num_running = 3; - for (int i = 0; i < 3; i++) { - env_->StartThread(&ThreadBody, &state); - } - while (true) { - state.mu.Lock(); - int num = state.num_running; - state.mu.Unlock(); - if (num == 0) { - break; - } - Env::Default()->SleepForMicroseconds(kDelayMicros); + State state; + state.val = 0; + state.num_running = 3; + for (int i = 0; i < 3; i++) { + env_->StartThread(&ThreadBody, &state); + } + while (true) { + state.mu.Lock(); + int num = state.num_running; + state.mu.Unlock(); + if (num == 0) { + break; } - ASSERT_EQ(state.val, 3); + Env::Default()->SleepForMicroseconds(kDelayMicros); + } + ASSERT_EQ(state.val, 3); } -#define TEST_DATA_SIZE 384 // will cross buffer boundary -#define TEST_DATA_NUM 500 +#define TEST_DATA_SIZE 384 // will cross buffer boundary +#define TEST_DATA_NUM 500 const std::string block_based_cache_paths = "./block_based_dir"; const std::string base_env_path = "./base_dir/"; -class FlashBlockCacheEnvTest { -public: - FlashBlockCacheEnvTest() { - // init env - Logger* logger; - Env::Default()->NewLogger("/tmp/block_cache.log", &logger); - Env::Default()->SetLogger(logger); - base_env = Env::Default(); - flash_block_cache_env = new leveldb::FlashBlockCacheEnv(base_env); - cache_dir = block_based_cache_paths; - base_dir = base_env_path; - test_sst_file = base_dir + "/00001234.sst"; - - // init block-based cache - leveldb::FlashBlockCacheOptions opts; - opts.block_size = 8192; - opts.blockset_size = 64UL << 20; - opts.fid_batch_num = 10000; - opts.cache_size = 512UL << 20; - opts.meta_block_cache_size = 2000; - opts.meta_table_cache_size = 500; - opts.write_buffer_size = 1048576UL; - reinterpret_cast(flash_block_cache_env)->LoadCache(opts, cache_dir + "/block_cache"); - - // prepare test data - SetupTestData(); - } - - void SetupTestData() { - fprintf(stderr, "SetupTestData()\n"); - Status s = flash_block_cache_env->CreateDir(base_dir); - flash_block_cache_env->DeleteFile(test_sst_file); - //ASSERT_TRUE(s.ok()) << ": status = " << s.ToString(); - - WritableFile* wf = NULL; - s = flash_block_cache_env->NewWritableFile(test_sst_file, &wf, EnvOptions()); - ASSERT_TRUE(s.ok()) << ": status = " << s.ToString(); - - Random rnd(test::RandomSeed()); - uint32_t succ_num = 0; - test_data.resize(TEST_DATA_NUM); - for (uint32_t i = 0; i < TEST_DATA_NUM; ++i) { - std::string key; - test::RandomString(&rnd, TEST_DATA_SIZE, &key); - s = wf->Append(key); - if (s.ok()) { - succ_num++; - } - test_data[i] = key; - } - ASSERT_EQ(succ_num, TEST_DATA_NUM); - delete wf; - } - - void CheckRandomRead(uint32_t idx, const std::string& tip = "") { - RandomAccessFile* rfile; - Status s = flash_block_cache_env->NewRandomAccessFile(test_sst_file, - TEST_DATA_SIZE * TEST_DATA_NUM, &rfile, EnvOptions()); - ASSERT_TRUE(s.ok()) << ": status = " << s.ToString(); - - Slice data; - char scratch[8 * 1024]; - s = rfile->Read(idx * TEST_DATA_SIZE, TEST_DATA_SIZE, &data, scratch); - ASSERT_TRUE(s.ok()) << ": status = " << s.ToString(); - ASSERT_TRUE(memcmp(data.data(), &test_data[idx][0], TEST_DATA_SIZE) == 0) - << tip << ": data = " << data.data() - << "\n vs. test_data[" << idx << "] = " << test_data[idx]; - delete rfile; - } - -public: - Env* base_env; - Env* flash_block_cache_env; - std::string cache_dir; - std::string base_dir; - std::string test_sst_file; - std::vector test_data; -}; - -TEST(FlashBlockCacheEnvTest, RandomReadTest) { - Random rnd(test::RandomSeed()); - for (uint32_t i = 0; i < TEST_DATA_NUM; ++i) { - uint32_t idx = rnd.Uniform(TEST_DATA_NUM); - CheckRandomRead(idx); - } -} - -TEST(FlashBlockCacheEnvTest, SequenceReadTest) { - for (uint32_t i = 0; i < TEST_DATA_NUM; ++i) { - CheckRandomRead(i); - } -} - // disable by anqin, because it needs HDFS env #if 0 -#define TEST_DATA_SIZE 384 // will cross buffer boundary -#define TEST_DATA_NUM 500 +#define TEST_DATA_SIZE 384 // will cross buffer boundary +#define TEST_DATA_NUM 500 const std::string cache_paths = "./cache_dir_1/;./cache_dir_2/"; const std::string cache_name = "tera.test_cache"; @@ -476,6 +380,4 @@ TEST(EnvCacheTest, ScheduleBlockOut) {} } // namespace leveldb -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char** argv) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/filter_policy.cc b/src/leveldb/util/filter_policy.cc index 42c5fb3d0..2e1189806 100644 --- a/src/leveldb/util/filter_policy.cc +++ b/src/leveldb/util/filter_policy.cc @@ -10,6 +10,6 @@ namespace leveldb { -FilterPolicy::~FilterPolicy() { } +FilterPolicy::~FilterPolicy() {} } // namespace leveldb diff --git a/src/leveldb/util/flash_block_cache_impl.cc b/src/leveldb/util/flash_block_cache_impl.cc deleted file mode 100644 index 16fbe9cd3..000000000 --- a/src/leveldb/util/flash_block_cache_impl.cc +++ /dev/null @@ -1,697 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "util/flash_block_cache_impl.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "common/counter.h" -#include "db/table_cache.h" -#include "leveldb/db.h" -#include "leveldb/cache.h" -#include "leveldb/env.h" -#include "leveldb/env_flash_block_cache.h" -#include "leveldb/iterator.h" -#include "leveldb/options.h" -#include "leveldb/statistics.h" -#include "leveldb/status.h" -#include "leveldb/table_utils.h" -#include "leveldb/write_batch.h" -#include "port/port.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/mutexlock.h" -#include "util/string_ext.h" -#include "util/thread_pool.h" - -namespace leveldb { -::tera::Counter tera_flash_block_cache_evict_counter; - -// Each SSD will New a BlockCache -// block state -uint64_t kCacheBlockValid = 0x1; -uint64_t kCacheBlockLocked = 0x2; -uint64_t kCacheBlockDfsRead = 0x4; -uint64_t kCacheBlockCacheRead = 0x8; -uint64_t kCacheBlockCacheFill = 0x10; - -FlashBlockCacheImpl::FlashBlockCacheImpl(const FlashBlockCacheOptions& options) - : options_(options), - dfs_env_(options.env), - new_fid_(0), - prev_fid_(0), - block_set_cache_(nullptr), - meta_db_(nullptr) { - bg_fill_.SetBackgroundThreads(30); - bg_read_.SetBackgroundThreads(30); - bg_dfs_read_.SetBackgroundThreads(30); - bg_flush_.SetBackgroundThreads(30); - bg_control_.SetBackgroundThreads(2); - stat_ = CreateDBStatistics(); -} - -FlashBlockCacheImpl::~FlashBlockCacheImpl() { - delete stat_; -} - -void FlashBlockCacheImpl::BGControlThreadFunc(void* arg) { - reinterpret_cast(arg)->BGControlThread(); -} - -void FlashBlockCacheImpl::BGControlThread() { - stat_->MeasureTime(FLASH_BLOCK_CACHE_EVICT_NR, - tera_flash_block_cache_evict_counter.Clear()); - - Log("[%s] statistics: " - "%s, %s, %s, %s, %s, " - "%s, %s, %s, %s, %s, " - "%s, %s, %s, %s, %s\n", - this->WorkPath().c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_QUEUE).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_SSD_READ).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_DFS_READ).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_SSD_WRITE).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_FILL_USER_DATA).c_str(), - - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_RELEASE_BLOCK).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_LOCKMAP_BS_RELOAD_NR).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_GET_BLOCK).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_BLOCK_NR).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_GET_BLOCK_SET).c_str(), - - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_BS_LRU_LOOKUP).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_PREAD_WAIT_UNLOCK).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_ALLOC_FID).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_GET_FID).c_str(), - stat_->GetBriefHistogramString(FLASH_BLOCK_CACHE_EVICT_NR).c_str()); - - Log("[%s] statistics(meta): " - "table_cache: %lf/%lu/%lu, " - "block_cache: %lf/%lu/%lu\n", - this->WorkPath().c_str(), - options_.opts.table_cache->HitRate(true), - options_.opts.table_cache->TableEntries(), - options_.opts.table_cache->ByteSize(), - options_.opts.block_cache->HitRate(true), - options_.opts.block_cache->Entries(), - options_.opts.block_cache->TotalCharge()); - - // resched after 6s - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_QUEUE); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_SSD_READ); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_DFS_READ); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_SSD_WRITE); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_FILL_USER_DATA); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_RELEASE_BLOCK); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_LOCKMAP_BS_RELOAD_NR); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_GET_BLOCK); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_BLOCK_NR); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_GET_BLOCK_SET); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_BS_LRU_LOOKUP); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_PREAD_WAIT_UNLOCK); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_ALLOC_FID); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_GET_FID); - stat_->ClearHistogram(FLASH_BLOCK_CACHE_EVICT_NR); - bg_control_.Schedule(&FlashBlockCacheImpl::BGControlThreadFunc, this, 10, 6000); -} - -void FlashBlockCacheImpl::BlockDeleter(const Slice& key, void* v) { - CacheBlock* block = (CacheBlock*)v; - //Log("Evict blockcache: %s\n", block->ToString().c_str()); - delete block; - tera_flash_block_cache_evict_counter.Inc(); -} - -// if lock succ, put lock_val, else get newer value -Status FlashBlockCacheImpl::LockAndPut(LockContent* lc) { - mu_.AssertHeld(); - Status s; - std::string key; - if ((key = lc->Encode()) == "") { - return Status::NotSupported("key type error"); - } - //Log("[%s] trylock key: %s\n", - //this->WorkPath().c_str(), - //key.c_str()); - - Waiter* w = NULL; - LockKeyMap::iterator it = lock_key_.find(key); - if (it != lock_key_.end()) { - w = it->second; - w->wait_num ++; - mu_.Unlock(); - w->Wait(); - - s = GetContentAfterWait(lc); - mu_.Lock(); - if (--w->wait_num == 0) { - // last thread wait for open - lock_key_.erase(key); - //Log("[%s] wait done %s, delete cv\n", - //this->WorkPath().c_str(), - //key.c_str()); - delete w; - } else { - //Log("[%s] wait done %s, not last\n", - //this->WorkPath().c_str(), - //key.c_str()); - } - } else { - w = new Waiter; - w->wait_num = 1; - lock_key_[key] = w; - mu_.Unlock(); - - s = PutContentAfterLock(lc); - mu_.Lock(); - if (--w->wait_num == 0) { - lock_key_.erase(key); - //Log("[%s] put done %s, no wait thread\n", - //this->WorkPath().c_str(), - //key.c_str()); - delete w; - } else { - mu_.Unlock(); - //Log("[%s] put done %s, signal all wait thread\n", - //this->WorkPath().c_str(), - //key.c_str()); - w->SignalAll(); - - mu_.Lock(); - } - } - return s; -} - -Status FlashBlockCacheImpl::GetContentAfterWait(LockContent* lc) { - Status s; - std::string key = lc->Encode(); - - if (lc->type == LockKeyType::kDBKey) { - ReadOptions r_opts; - s = meta_db_->Get(r_opts, key, lc->db_val); - //Log("[%s] get lock key: %s, val: %s, status: %s\n", - //this->WorkPath().c_str(), - //key.c_str(), - //lc->db_val->c_str(), - //s.ToString().c_str()); - } else if (lc->type == LockKeyType::kBlockSetKey) { - std::string bs_key; - PutFixed64(&bs_key, lc->sid); - LRUHandle* bs_handle = (LRUHandle*)block_set_cache_->Lookup(bs_key); - lc->block_set = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)bs_handle)); - assert(bs_handle == lc->block_set->handle); - //Log("[%s] get blockset sid: %lu\n", - //this->WorkPath().c_str(), - //lc->sid); - } - return s; -} - -Status FlashBlockCacheImpl::PutContentAfterLock(LockContent* lc) { - Status s; - std::string key = lc->Encode(); - - if (lc->type == LockKeyType::kDBKey) { - WriteOptions w_opts; - s = meta_db_->Put(w_opts, key, lc->db_lock_val); - if (s.ok()) { - lc->db_val->append(lc->db_lock_val.data(), lc->db_lock_val.size()); - } - //Log("[%s] Insert db key : %s, val %s, status %s\n", - //this->WorkPath().c_str(), - //lc->KeyToString().c_str(), - //lc->ValToString().c_str(), - //s.ToString().c_str()); - } else if (lc->type == LockKeyType::kDeleteDBKey) { - WriteOptions w_opts; - s = meta_db_->Delete(w_opts, key); - //Log("[%s] Delete db key : %s, val %s, status %s\n", - //this->WorkPath().c_str(), - //lc->KeyToString().c_str(), - //lc->ValToString().c_str(), - //s.ToString().c_str()); - } else if (lc->type == LockKeyType::kBlockSetKey) { // cannot double insert - std::string bs_key; - PutFixed64(&bs_key, lc->sid); - LRUHandle* bs_handle = (LRUHandle*)block_set_cache_->Lookup(bs_key); - if (bs_handle != NULL) { - lc->block_set = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)bs_handle)); - assert(bs_handle == lc->block_set->handle); - } else { - s = ReloadBlockSet(lc); - } - } - return s; -} - -Status FlashBlockCacheImpl::ReloadBlockSet(LockContent* lc) { - Status s; - std::string key = lc->Encode(); - - lc->block_set = new BlockSet; - lc->block_set->block_cache = NewBlockBasedCache(options_.blocks_per_set);// number of blocks in BS - std::string file = options_.cache_dir + "/" + Uint64ToString(lc->sid); - lc->block_set->fd = open(file.c_str(), O_RDWR | O_CREAT, 0644); - assert(lc->block_set->fd > 0); - Log("[%s] New BlockSet %s, file: %s, nr_block: %lu, fd: %d\n", - this->WorkPath().c_str(), - lc->KeyToString().c_str(), - file.c_str(), options_.blocks_per_set, - lc->block_set->fd); - - // reload hash lru - uint64_t total_items = 0; - ReadOptions s_opts; - leveldb::Iterator* db_it = meta_db_->NewIterator(s_opts); - for (db_it->Seek(key); - db_it->Valid() && db_it->key().starts_with(kKeyPrefixBlockSet); - db_it->Next()) { - Slice lkey = db_it->key(); - CacheBlock* block = new CacheBlock; - - // decode key - block->DecodeDBKey(lkey); - if (block->sid != lc->sid) { - delete block; - block = nullptr; - break; - } - total_items++; - - // decode value - block->DecodeDBValue(db_it->value()); // get fid and block_idx - block->state = (block->Test(kCacheBlockValid)) ? kCacheBlockValid : 0; - //Log("[%s] Recovery %s, insert cacheblock into BlockBasedLRU, %s\n", - //this->WorkPath().c_str(), - //lc->KeyToString().c_str(), - //block->ToString().c_str()); - - LRUHandle* handle = (LRUHandle*)(lc->block_set->block_cache->Insert(block->CacheKey(), block, block->offset_in_blockset, &FlashBlockCacheImpl::BlockDeleter)); - assert((uint64_t)(lc->block_set->block_cache->Value((Cache::Handle*)handle)) == (uint64_t)block); - assert(handle->cache_id == block->offset_in_blockset); - block->handle = handle; - lc->block_set->block_cache->Release((Cache::Handle*)handle); - } - delete db_it; - stat_->MeasureTime(FLASH_BLOCK_CACHE_LOCKMAP_BS_RELOAD_NR, total_items); - - LRUHandle* bs_handle = (LRUHandle*)block_set_cache_->Insert(lc->block_set->CacheKey(lc->sid), lc->block_set, 1, NULL); - assert(bs_handle != NULL); - lc->block_set->handle = bs_handle; - return s; -} - -std::string FlashBlockCacheImpl::WorkPath() const { - return work_path_; -} - -Status FlashBlockCacheImpl::LoadCache() { - // open meta db - work_path_ = options_.cache_dir; - std::string dbname = options_.cache_dir + "/" + kMetaDBName; - options_.opts.env = options_.cache_env; // local write - options_.opts.filter_policy = NewBloomFilterPolicy(10); - options_.opts.block_cache = leveldb::NewLRUCache(options_.meta_block_cache_size * 1024UL * 1024); - options_.opts.table_cache = new leveldb::TableCache(options_.meta_table_cache_size * 1024UL * 1024); - options_.opts.write_buffer_size = options_.write_buffer_size; - options_.opts.info_log = Logger::DefaultLogger(); - - // give meta db's lg0 a seperate PosixEnv including a seperate ThreadPool - leveldb::LG_info* lg_info = new leveldb::LG_info(0); - lg_info->env = NewPosixEnv(); - lg_info->env->SetBackgroundThreads(5); - std::map* lg_info_list = new std::map; - (*lg_info_list)[0] = lg_info; - options_.opts.lg_info_list = lg_info_list; - - Log("[flash_block_cache %s] open meta db: block_cache: %lu, table_cache: %lu, write_buffer: %lu", - dbname.c_str(), - options_.meta_block_cache_size, - options_.meta_table_cache_size, - options_.write_buffer_size); - - Status s = DB::Open(options_.opts, dbname, &meta_db_); - assert(s.ok()); - - // recover fid - std::string key = FIDDBKey(); - std::string val; - ReadOptions r_opts; - s = meta_db_->Get(r_opts, key, &val); - if (!s.ok()) { - prev_fid_ = 0; - } else { - prev_fid_ = DecodeFixed64(val.c_str()); - } - new_fid_ = prev_fid_ + options_.fid_batch_num; - Log("[flash_block_cache %s]: recover fid: prev_fid: %lu, new_fid: %lu\n", - dbname.c_str(), prev_fid_, new_fid_); - - // recover cache size - key = kKeyConfCacheSize; - val = ""; - s = meta_db_->Get(r_opts, key, &val); - if (!s.ok() || options_.force_update_conf_enabled) { - // first load or need force update, use conf from FLAG file, - // and save conf to meta db - if (!s.ok()) { - // first load - Log("[flash_block_cache %s]: cache size not exist in meta db, load from FLAG file, cache size: %lu\n", dbname.c_str(), options_.cache_size); - } else { - // force update conf - Log("[flash_block_cache %s]: force update conf from FLAG file, cache size: %lu\n", dbname.c_str(), options_.cache_size); - } - val = ""; - PutFixed64(&val, options_.cache_size); - leveldb::WriteBatch batch; - batch.Put(key, val); - s = meta_db_->Write(leveldb::WriteOptions(), &batch); - if (s.ok()) { - Log("[flash_block_cache %s]: save cache size success, cache size: %lu\n", dbname.c_str(), options_.cache_size); - } else { - Log("[flash_block_cache %s]: save cache size fail, cache size: %lu\n", dbname.c_str(), options_.cache_size); - } - } else { - // load conf from meta db - uint64_t cache_size = DecodeFixed64(val.c_str()); - if (cache_size != options_.cache_size) { - Log("[flash_block_cache %s]: WARNING: ignore cache size in conf: %lu, set block_cache_force_update_conf_enabled true to update local db from conf file\n", dbname.c_str(), options_.cache_size); - } - options_.cache_size = cache_size; - Log("[flash_block_cache %s]: recover cache size from meta db: %lu\n", dbname.c_str(), cache_size); - } - - // recover blockset size - key = kKeyConfBlockSetSize; - val = ""; - s = meta_db_->Get(r_opts, key, &val); - if (!s.ok() || options_.force_update_conf_enabled) { - // first load or need force update, use conf from FLAG file, - // and save conf to meta db - if (!s.ok()) { - // first load - Log("[flash_block_cache %s]: blockset size not exist in meta db, load from FLAG file, blockset size: %lu\n", dbname.c_str(), options_.blockset_size); - } else { - // force update conf - Log("[flash_block_cache %s]: force update conf from FLAG file, blockset size: %lu\n", dbname.c_str(), options_.blockset_size); - } - val = ""; - PutFixed64(&val, options_.blockset_size); - leveldb::WriteBatch batch; - batch.Put(key, val); - s = meta_db_->Write(leveldb::WriteOptions(), &batch); - if (s.ok()) { - Log("[flash_block_cache %s]: save blockset size success, blockset size: %lu\n", dbname.c_str(), options_.blockset_size); - } else { - Log("[flash_block_cache %s]: save blockset size fail, blockset size: %lu\n", dbname.c_str(), options_.blockset_size); - } - } else { - // load conf from meta db - uint64_t blockset_size = DecodeFixed64(val.c_str()); - if (blockset_size != options_.blockset_size) { - Log("[flash_block_cache %s]: WARNING: ignore blockset size in conf: %lu, set block_cache_force_update_conf_enabled true to update local db from conf file\n", dbname.c_str(), options_.blockset_size); - } - options_.blockset_size = blockset_size; - Log("[flash_block_cache %s]: recover blockset size from meta db: %lu\n", dbname.c_str(), blockset_size); - } - - // recover block size - key = kKeyConfBlockSize; - val = ""; - s = meta_db_->Get(r_opts, key, &val); - if (!s.ok() || options_.force_update_conf_enabled) { - // first load or need force update, use conf from FLAG file, - // and save conf to meta db - if (!s.ok()) { - // first load - Log("[flash_block_cache %s]: block size not exist in meta db, load from FLAG file, block size: %lu\n", dbname.c_str(), options_.block_size); - } else { - // force update conf - Log("[flash_block_cache %s]: force update conf from FLAG file, block size: %lu\n", dbname.c_str(), options_.block_size); - } - val = ""; - PutFixed64(&val, options_.block_size); - leveldb::WriteBatch batch; - batch.Put(key, val); - s = meta_db_->Write(leveldb::WriteOptions(), &batch); - if (s.ok()) { - Log("[flash_block_cache %s]: save block size success, block size: %lu\n", dbname.c_str(), options_.block_size); - } else { - Log("[flash_block_cache %s]: save block size fail, block size: %lu\n", dbname.c_str(), options_.block_size); - } - } else { - // load conf from meta db - uint64_t block_size = DecodeFixed64(val.c_str()); - if (block_size != options_.block_size) { - Log("[flash_block_cache %s]: WARNING: ignore block size in conf: %lu, set block_cache_force_update_conf_enabled true to update local db from conf file\n", dbname.c_str(), options_.block_size); - } - options_.block_size = block_size; - Log("[flash_block_cache %s]: recover block size from meta db: %lu\n", dbname.c_str(), block_size); - } - - options_.blockset_num = options_.cache_size / options_.blockset_size + 1; - Log("[flash_block_cache %s]: blockset num: %lu\n", dbname.c_str(), options_.blockset_num); - - options_.blocks_per_set = options_.blockset_size / options_.block_size + 1; - Log("[flash_block_cache %s]: block num per blockset: %lu\n", dbname.c_str(), options_.blocks_per_set); - - block_set_cache_ = leveldb::NewBlockBasedCache(options_.blockset_num); - - bg_control_.Schedule(&FlashBlockCacheImpl::BGControlThreadFunc, this, 10, 6000); - s = Status::OK(); - return s; -} - -Status FlashBlockCacheImpl::FillCache(CacheBlock* block) { - uint64_t offset_in_blockset = block->offset_in_blockset; - BlockSet* bs = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)block->bs_handle)); - int fd = bs->fd; - - // do io without lock - ssize_t res = pwrite(fd, block->data_block.data(), block->data_block.size(), - offset_in_blockset * options_.block_size); - - if (res < 0) { - Log("[%s] cache fill: sid %lu, blockset.fd %d, datablock size %lu, cb_idx %lu, %s, res %ld\n", - this->WorkPath().c_str(), block->sid, fd, block->data_block.size(), - offset_in_blockset, - block->ToString().c_str(), - res); - return Status::Corruption("FillCache error"); - } - return Status::OK(); -} - -Status FlashBlockCacheImpl::ReadCache(CacheBlock* block) { - uint64_t offset_in_blockset = block->offset_in_blockset; - BlockSet* bs = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)block->bs_handle)); - int fd = bs->fd; - - // do io without lock - ssize_t res = pread(fd, (char*)block->data_block.data(), block->data_block.size(), - offset_in_blockset * options_.block_size); - if (res < 0) { - Log("[%s] cache read: sid %lu, blockset.fd %d, datablock size %lu, cb_idx %lu, %s, res %ld\n", - this->WorkPath().c_str(), block->sid, fd, block->data_block.size(), - offset_in_blockset, - block->ToString().c_str(), - res); - return Status::Corruption("ReadCache error"); - } - return Status::OK(); -} - -uint64_t FlashBlockCacheImpl::AllocFileId() { // no more than fid_batch_num - mu_.AssertHeld(); - uint64_t start_ts = options_.cache_env->NowMicros(); - uint64_t fid = ++new_fid_; - while (new_fid_ - prev_fid_ >= options_.fid_batch_num) { - std::string key = FIDDBKey(); - std::string lock_val; - PutFixed64(&lock_val, new_fid_); - std::string val; - - LockContent lc; - lc.type = LockKeyType::kDBKey; - lc.db_lock_key = key; - lc.db_lock_val = lock_val; - lc.db_val = &val; - Status s = LockAndPut(&lc); - if (s.ok()) { - prev_fid_ = DecodeFixed64(val.c_str()); - } - //Log("[%s] alloc fid: key %s, new_fid: %lu, prev_fid: %lu\n", - //this->WorkPath().c_str(), - //key.c_str(), - //new_fid_, - //prev_fid_); - } - stat_->MeasureTime(FLASH_BLOCK_CACHE_ALLOC_FID, - options_.cache_env->NowMicros() - start_ts); - return fid; -} - -uint64_t FlashBlockCacheImpl::FileId(const std::string& fname) { - uint64_t fid = 0; - std::string key = FNameDBKey(fname); - uint64_t start_ts = options_.cache_env->NowMicros(); - ReadOptions r_opts; - std::string val; - - Status s = meta_db_->Get(r_opts, key, &val); - if (!s.ok()) { // not exist - MutexLock l(&mu_); - fid = AllocFileId(); - std::string v; - PutFixed64(&val, fid); - - LockContent lc; - lc.type = LockKeyType::kDBKey; - lc.db_lock_key = key; - lc.db_lock_val = val; - lc.db_val = &v; - //Log("[%s] alloc fid: %lu, key: %s", - //this->WorkPath().c_str(), - //fid, key.c_str()); - s = LockAndPut(&lc); - assert(s.ok()); - fid = DecodeFixed64(v.c_str()); - } else { // fid in cache - fid = DecodeFixed64(val.c_str()); - } - - //Log("[%s] Fid: %lu, fname: %s\n", - //this->WorkPath().c_str(), - //fid, fname.c_str()); - stat_->MeasureTime(FLASH_BLOCK_CACHE_GET_FID, - options_.cache_env->NowMicros() - start_ts); - return fid; -} - -Status FlashBlockCacheImpl::DeleteFile(const std::string& fname) { - Status s; - std::string key = FNameDBKey(fname); - { - MutexLock l(&mu_); - LockContent lc; - lc.type = LockKeyType::kDeleteDBKey; - lc.db_lock_key = key; - s = LockAndPut(&lc); - } - return s; -} - -BlockSet* FlashBlockCacheImpl::GetBlockSet(uint64_t sid) { - std::string key; - PutFixed64(&key, sid); - BlockSet* block_set = NULL; - uint64_t start_ts = options_.cache_env->NowMicros(); - - LRUHandle* h = (LRUHandle*)block_set_cache_->Lookup(key); - if (h == NULL) { - MutexLock l(&mu_); - LockContent lc; - lc.type = LockKeyType::kBlockSetKey; - lc.sid = sid; - lc.block_set = NULL; - Status s = LockAndPut(&lc); - block_set = lc.block_set; - } else { - //Log("[%s] get blockset from memcache, sid %lu\n", - //this->WorkPath().c_str(), sid); - block_set = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)h)); - assert(block_set->handle == h); - } - stat_->MeasureTime(FLASH_BLOCK_CACHE_GET_BLOCK_SET, - options_.cache_env->NowMicros() - start_ts); - return block_set; -} - -CacheBlock* FlashBlockCacheImpl::GetAndAllocBlock(uint64_t fid, uint64_t block_idx) { - std::string key; - PutFixed64(&key, fid); - PutFixed64(&key, block_idx); - uint32_t hash = Hash(key.c_str(), key.size(), 7); - uint64_t sid = hash % options_.blockset_num; - - //Log("[%s] alloc block, try get blockset, fid: %lu, block_idx: %lu, hash: %u, sid %lu, blockset_num: %lu\n", - //this->WorkPath().c_str(), fid, block_idx, hash, sid, options_.blockset_num); - CacheBlock* block = NULL; - BlockSet* bs = GetBlockSet(sid); // get and alloc bs - Cache* block_cache = bs->block_cache; - - uint64_t start_ts = options_.cache_env->NowMicros(); - bs->mu.Lock(); - LRUHandle* h = (LRUHandle*)block_cache->Lookup(key); - if (h == NULL) { - block = new CacheBlock; - block->fid = fid; - block->block_idx = block_idx; - block->sid = sid; - h = (LRUHandle*)block_cache->Insert(key, block, 0xffffffffffffffff, &FlashBlockCacheImpl::BlockDeleter); - if (h != NULL) { - assert((uint64_t)(block_cache->Value((Cache::Handle*)h)) == (uint64_t)block); - block->offset_in_blockset = h->cache_id; - block->handle = h; - block->bs_handle = bs->handle; - //Log("[%s] Alloc Block: %s, sid %lu, fid %lu, block_idx %lu, hash %u, usage: %lu/%lu\n", - //this->WorkPath().c_str(), - //block->ToString().c_str(), - //sid, fid, block_idx, hash, - //block_cache->TotalCharge(), - //options_.blocks_per_set); - } else { - delete block; - block = NULL; - assert(0); - } - } else { - block = reinterpret_cast(block_cache->Value((Cache::Handle*)h)); - block->bs_handle = block->bs_handle == NULL? bs->handle: block->bs_handle; - } - bs->mu.Unlock(); - - block_set_cache_->Release((Cache::Handle*)bs->handle); - stat_->MeasureTime(FLASH_BLOCK_CACHE_BS_LRU_LOOKUP, - options_.cache_env->NowMicros() - start_ts); - return block; -} - -Status FlashBlockCacheImpl::LogRecord(CacheBlock* block) { - leveldb::WriteBatch batch; - batch.Put(block->EncodeDBKey(), block->EncodeDBValue()); - return meta_db_->Write(leveldb::WriteOptions(), &batch); -} - -Status FlashBlockCacheImpl::ReleaseBlock(CacheBlock* block, bool need_sync) { - Status s; - if (need_sync) { // TODO: dump meta into memtable - s = LogRecord(block); - } - - block->mu.Lock(); - block->ReleaseDataBlock(); - block->s = Status::OK(); // clear io status - block->cv.SignalAll(); - block->mu.Unlock(); - - //Log("[%s] release block: %s\n", this->WorkPath().c_str(), block->ToString().c_str()); - LRUHandle* h = block->handle; - BlockSet* bs = reinterpret_cast(block_set_cache_->Value((Cache::Handle*)block->bs_handle)); - bs->block_cache->Release((Cache::Handle*)h); - return s; -} - -} // namespace leveldb - diff --git a/src/leveldb/util/flash_block_cache_impl.h b/src/leveldb/util/flash_block_cache_impl.h deleted file mode 100644 index 684e59c35..000000000 --- a/src/leveldb/util/flash_block_cache_impl.h +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#include "db/table_cache.h" -#include "leveldb/db.h" -#include "leveldb/cache.h" -#include "leveldb/env.h" -#include "leveldb/env_flash_block_cache.h" -#include "leveldb/options.h" -#include "leveldb/statistics.h" -#include "leveldb/status.h" -#include "leveldb/table_utils.h" -#include "port/port.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/mutexlock.h" -#include "util/string_ext.h" -#include "util/thread_pool.h" - -// We provide FlashBlockCacheImpl to manage each flash storage. Each flash's block-based cache -// devides into a set of block_set files, which managed by BlockSet. Each block_set devieds -// into a set of flash cache block, which managed by CacheBlock. - -namespace leveldb { - -static const std::string kMetaDBName = "meta_db"; -static const std::string kKeyPrefixBlockSet = "BS#"; -static const std::string kKeyPrefixFName = "FNAME#"; -static const std::string kKeyFID = "FID#"; -static const std::string kKeyConfCacheSize = "CONF#CACHE_SIZE"; -static const std::string kKeyConfBlockSetSize = "CONF#BLOCKSET_SIZE"; -static const std::string kKeyConfBlockSize = "CONF#BLOCK_SIZE"; - -// block state -extern uint64_t kCacheBlockValid; -extern uint64_t kCacheBlockLocked; -extern uint64_t kCacheBlockDfsRead; -extern uint64_t kCacheBlockCacheRead; -extern uint64_t kCacheBlockCacheFill; - -struct CacheBlock { - // file id alloced by FID alloctor, just like inode number - uint64_t fid; - - // block offset number in user's file - uint64_t block_idx; - - // block set's id - uint64_t sid; - - // block offset number in block_set's file - uint64_t offset_in_blockset; - - // block state bit, includes {valid, locked, dfs_read, cache_read, cache_write} - volatile uint64_t state; - - port::Mutex mu; - port::CondVar cv; - - // block data - Slice data_block; - bool data_block_alloc; // if true, delete data block by itself - uint64_t data_block_refs; - - // handle of this CacheBlock node in the LRU of block - LRUHandle* handle; - - // handle of the BlockSet node which this CacheBlock belongs to - LRUHandle* bs_handle; - - Status s; - - CacheBlock() - : fid(0), - block_idx(0), - sid(0xffffffffffffffff), - offset_in_blockset(0xffffffffffffffff), - state(0), - cv(&mu), - data_block_alloc(false), - data_block_refs(0), - handle(NULL), - bs_handle(NULL) { - } - - bool Test(uint64_t c_state) { - mu.AssertHeld(); - return (state & c_state) == c_state; - } - - void Clear(uint64_t c_state) { - mu.AssertHeld(); - state &= ~c_state; - } - - void Set(uint64_t c_state) { - mu.AssertHeld(); - state |= c_state; - } - - void WaitOnClear(uint64_t c_state) { // access in lock - mu.AssertHeld(); - while (Test(c_state)) { - cv.Wait(); - } - } - - // access in cache lock - void GetDataBlock(uint64_t block_size, Slice data) { - if (data_block_refs == 0) { // first one alloc mem - assert(data_block.size() == 0); - assert(data_block_alloc == false); - if (data.size() == 0) { - char* buf = new char[block_size]; - data = Slice(buf, block_size); - data_block_alloc = true; - } - data_block = data; - } - ++data_block_refs; - } - - // access in cache lock - void ReleaseDataBlock() { - --data_block_refs; - if (data_block_refs == 0) { - if (data_block_alloc) { - char* data = (char*)data_block.data(); - delete[] data; - data_block_alloc = false; - } - data_block = Slice(); - } - } - - // key of the CacheBlock in LRU - std::string CacheKey() { - std::string key; - PutFixed64(&key, fid); - PutFixed64(&key, block_idx); - return key; - } - - std::string EncodeDBKey() { - std::string key = kKeyPrefixBlockSet; - PutFixed64(&key, sid); - PutFixed64(&key, offset_in_blockset); - return key; - } - - void DecodeDBKey(Slice lkey) { - lkey.remove_prefix(kKeyPrefixBlockSet.size());// lkey = BS#, sid, offset - sid = DecodeFixed64(lkey.data()); - lkey.remove_prefix(sizeof(uint64_t)); - offset_in_blockset = DecodeFixed64(lkey.data()); - } - - void DecodeDBValue(Slice record) { - fid = DecodeFixed64(record.data()); - record.remove_prefix(sizeof(uint64_t)); - block_idx = DecodeFixed64(record.data()); - record.remove_prefix(sizeof(uint64_t)); - state = DecodeFixed64(record.data()); - return; - } - - std::string EncodeDBValue() { - std::string r; - PutFixed64(&r, fid); - PutFixed64(&r, block_idx); - PutFixed64(&r, state); - return r; - } - - std::string ToString() { - std::stringstream ss; - ss << "CacheBlock(" << (uint64_t)this << "): fid: " << fid << ", block_idx: " << block_idx - << ", sid: " << sid << ", offset_in_blockset: " << offset_in_blockset - << ", state " << state << ", status " << s.ToString(); - return ss.str(); - } -}; - -struct BlockSet { - // handle of this BlockSet node in the LRU of block set - LRUHandle* handle; - port::Mutex mu; - // LRU of block - Cache* block_cache; - int fd; - - BlockSet(): handle(NULL), block_cache(NULL), fd(-1) {} - - // key of the BlockSet in LRU - std::string CacheKey(uint64_t sid) { - std::string key; - PutFixed64(&key, sid); - return key; - } -}; - -class FlashBlockCacheImpl { -public: - explicit FlashBlockCacheImpl(const FlashBlockCacheOptions& options); - - ~FlashBlockCacheImpl(); - - std::string WorkPath() const; - - Status LoadCache(); // init cache - - static void BlockDeleter(const Slice& key, void* v); - - static void BGControlThreadFunc(void* arg); - - // alloc fid for sst file - uint64_t FileId(const std::string& fname); - - // delete fid from cache - Status DeleteFile(const std::string& fname); - - // alloc data block for sst file - CacheBlock* GetAndAllocBlock(uint64_t fid, uint64_t block_idx); - - Status ReleaseBlock(CacheBlock* block, bool need_sync); - - Status FillCache(CacheBlock* block); - - Status ReadCache(CacheBlock* block); - - Status LogRecord(CacheBlock* block); - -private: - friend struct BlockSet; - struct LockContent; - - Status LockAndPut(LockContent* lc); - - Status GetContentAfterWait(LockContent* lc); - - Status PutContentAfterLock(LockContent* lc); - - Status ReloadBlockSet(LockContent* lc); - - uint64_t AllocFileId(); // no more than fid_batch_num - - BlockSet* GetBlockSet(uint64_t sid); - - void BGControlThread(); - -public: - FlashBlockCacheOptions options_; - - Statistics* stat_; - - Env* dfs_env_; - - ThreadPool bg_fill_; - ThreadPool bg_read_; - ThreadPool bg_dfs_read_; - ThreadPool bg_flush_; - ThreadPool bg_control_; - -private: - std::string work_path_; - //Env* posix_env_; - - port::Mutex mu_; - // key lock list - struct Waiter { - int wait_num; // protected by FlashBlockCacheImpl.mu_ - - port::Mutex mu; - port::CondVar cv; - bool done; - Waiter(): wait_num(0), cv(&mu), done(false) {} - - void Wait() { - MutexLock l(&mu); - while (!done) { cv.Wait(); } - } - - void SignalAll() { - MutexLock l(&mu); - done = true; - cv.SignalAll(); - } - }; - typedef std::map LockKeyMap; - LockKeyMap lock_key_; - - uint64_t new_fid_; - uint64_t prev_fid_; - - enum class LockKeyType { - kDBKey = 0, - kBlockSetKey = 1, - kDeleteDBKey = 2, - }; - struct LockContent { - LockKeyType type; - - // DB key - Slice db_lock_key; - Slice db_lock_val; - std::string* db_val; - - // block set id - uint64_t sid; - BlockSet* block_set; - - std::string Encode() { - if (type == LockKeyType::kDBKey || type == LockKeyType::kDeleteDBKey) { - return db_lock_key.ToString(); - } else if (type == LockKeyType::kBlockSetKey) { - std::string key = kKeyPrefixBlockSet; - PutFixed64(&key, sid); - return key; - } - return ""; - } - - std::string KeyToString() { - if (type == LockKeyType::kDBKey || type == LockKeyType::kDeleteDBKey) { - return db_lock_key.ToString(); - } else if (type == LockKeyType::kBlockSetKey) { - std::stringstream ss; - ss << kKeyPrefixBlockSet << sid; - return ss.str(); - } else { - return ""; - } - } - - std::string ValToString() { - if (type == LockKeyType::kDBKey) { - uint64_t val = DecodeFixed64(db_lock_val.data()); - std::stringstream ss; - ss << val; - return ss.str(); - } - return ""; - } - }; - - std::string FNameDBKey(const std::string& fname) { - std::string key = kKeyPrefixFName + fname; - return key; - } - - std::string FIDDBKey() { - return kKeyFID; - } - - // LRU of block set - Cache* block_set_cache_; - - // store meta - DB* meta_db_; -}; - -} // namespace leveldb - diff --git a/src/leveldb/util/flash_block_cache_write_buffer.h b/src/leveldb/util/flash_block_cache_write_buffer.h deleted file mode 100644 index 5a8e80db0..000000000 --- a/src/leveldb/util/flash_block_cache_write_buffer.h +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -#include - -#include "common/counter.h" -#include "leveldb/env.h" -#include "leveldb/statistics.h" -#include "leveldb/status.h" -#include "port/port.h" -#include "util/mutexlock.h" - -namespace leveldb { - -class FlashBlockCacheWriteBuffer { -public: - FlashBlockCacheWriteBuffer(const std::string& path, - const std::string& file, - int block_size) - : offset_(0), - block_size_(block_size), - block_idx_(0), - storage_(NULL), - path_(path), - file_(file) { - } - - ~FlashBlockCacheWriteBuffer() { - assert(block_list_.empty()); - } - - uint32_t NumFullBlock() { // use for BGFlush - MutexLock l(&mu_); - if (block_list_.empty()) { - return 0; - } else if ((block_list_.back())->size() < block_size_) { - return block_list_.size() - 1; - } else { - return block_list_.size(); - } - } - - Status Append(const Slice& data) { - MutexLock l(&mu_); - if (storage_ == NULL) { - storage_ = new std::string(); - block_list_.push_back(storage_); - } - uint32_t begin = offset_ / block_size_; - uint32_t end = (offset_ + data.size()) / block_size_; - if (begin == end) { // in the same block - storage_->append(data.data(), data.size()); - } else { - uint32_t tmp_size = block_size_ - (offset_ % block_size_); - storage_->append(data.data(), tmp_size); - assert(storage_->size() == block_size_); - Slice buf(data.data() + tmp_size, data.size() - tmp_size); - for (uint32_t i = begin + 1; i <= end; ++i) { - storage_ = new std::string(); - block_list_.push_back(storage_); - if (i < end) { - storage_->append(buf.data(), block_size_); - buf.remove_prefix(block_size_); - } else { // last block - storage_->append(buf.data(), buf.size()); - buf.remove_prefix(buf.size()); - } - //Log("[%s] add tmp_storage %s: offset: %lu, buf_size: %lu, idx %u\n", - //path_.c_str(), - //file_.c_str(), - //offset_, - //buf.size(), i); - } - } - offset_ += data.size(); - //Log("[%s] add record: %s, begin: %u, end: %u, offset: %lu, data_size: %lu, block_size: %u\n", - //path_.c_str(), - //file_.c_str(), - //begin, end, - //offset_ - data.size() , data.size(), block_size_); - return Status::OK(); - } - - std::string* PopFrontBlock(uint64_t* block_idx) { - MutexLock l(&mu_); - if (block_list_.empty()) { - return NULL; - } - std::string* block = block_list_.front(); - assert(block->size() <= block_size_); - if (block->size() != block_size_) { - return NULL; - } - block_list_.pop_front(); - *block_idx = block_idx_; - block_idx_++; - return block; - } - - std::string* PopBackBlock(uint64_t* block_idx) { - MutexLock l(&mu_); - if (block_list_.empty()) { - return NULL; - } - std::string* block = block_list_.back(); - block_list_.pop_back(); - *block_idx = offset_ / block_size_; - return block; - } - - void ReleaseBlock(std::string* block) { - delete block; - } - -private: - port::Mutex mu_; - uint64_t offset_; - uint32_t block_size_; - uint64_t block_idx_; - std::string* storage_; - std::list block_list_; // kBlockSize - std::string path_; - std::string file_; -}; - -} // namespace leveldb - diff --git a/src/leveldb/util/hash.cc b/src/leveldb/util/hash.cc index 533607faa..0eb39ae6e 100644 --- a/src/leveldb/util/hash.cc +++ b/src/leveldb/util/hash.cc @@ -14,7 +14,9 @@ // between switch labels. The real definition should be provided externally. // This one is a fallback version for unsupported compilers. #ifndef FALLTHROUGH_INTENDED -#define FALLTHROUGH_INTENDED do { } while (0) +#define FALLTHROUGH_INTENDED \ + do { \ + } while (0) #endif namespace leveldb { @@ -52,5 +54,4 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) { return h; } - } // namespace leveldb diff --git a/src/leveldb/util/hash.h b/src/leveldb/util/hash.h index d3347680c..3cbcd4811 100644 --- a/src/leveldb/util/hash.h +++ b/src/leveldb/util/hash.h @@ -17,7 +17,6 @@ namespace leveldb { extern uint32_t Hash(const char* data, size_t n, uint32_t seed); - } #endif // STORAGE_LEVELDB_UTIL_HASH_H_ diff --git a/src/leveldb/util/hdfs.cc b/src/leveldb/util/hdfs.cc index 4a9721bc2..3b770bed4 100644 --- a/src/leveldb/util/hdfs.cc +++ b/src/leveldb/util/hdfs.cc @@ -9,7 +9,7 @@ #include "hdfs.h" #include "include/hdfs.h" #include "hdfs_util.h" -#include "../common/counter.h" +#include "common/counter.h" namespace leveldb { @@ -17,27 +17,22 @@ static hdfsFS (*hdfsConnect)(const char* nn, tPort port); static int (*hdfsDisconnect)(hdfsFS fs); static int (*hdfsCreateDirectory)(hdfsFS fs, const char* path); -static hdfsFileInfo* (*hdfsListDirectory)(hdfsFS fs, const char* path, - int *numEntries); +static hdfsFileInfo* (*hdfsListDirectory)(hdfsFS fs, const char* path, int* numEntries); static hdfsFileInfo* (*hdfsGetPathInfo)(hdfsFS fs, const char* path); -static void (*hdfsFreeFileInfo)(hdfsFileInfo *hdfsFileInfo, int numEntries); +static void (*hdfsFreeFileInfo)(hdfsFileInfo* hdfsFileInfo, int numEntries); static int (*hdfsDelete)(hdfsFS fs, const char* path); -static int (*hdfsExists)(hdfsFS fs, const char *path); +static int (*hdfsExists)(hdfsFS fs, const char* path); static int (*hdfsRename)(hdfsFS fs, const char* oldPath, const char* newPath); -static int (*hdfsCopy)(hdfsFS srcFS, const char* src, - hdfsFS dstFS, const char* dst); +static int (*hdfsCopy)(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); -static hdfsFile (*hdfsOpenFile)(hdfsFS fs, const char* path, int flags, - int bufferSize, short replication, - tSize blocksize); +static hdfsFile (*hdfsOpenFile)(hdfsFS fs, const char* path, int flags, int bufferSize, + short replication, tSize blocksize); static int (*hdfsCloseFile)(hdfsFS fs, hdfsFile file); static tSize (*hdfsRead)(hdfsFS fs, hdfsFile file, void* buffer, tSize length); -static tSize (*hdfsPread)(hdfsFS fs, hdfsFile file, tOffset position, - void* buffer, tSize length); -static tSize (*hdfsWrite)(hdfsFS fs, hdfsFile file, const void* buffer, - tSize length); +static tSize (*hdfsPread)(hdfsFS fs, hdfsFile file, tOffset position, void* buffer, tSize length); +static tSize (*hdfsWrite)(hdfsFS fs, hdfsFile file, const void* buffer, tSize length); static int (*hdfsFlush)(hdfsFS fs, hdfsFile file); static int (*hdfsSync)(hdfsFS fs, hdfsFile file); static tOffset (*hdfsTell)(hdfsFS fs, hdfsFile file); @@ -83,9 +78,7 @@ bool Hdfs::LoadSymbol() { return true; } -HFile::HFile(void* fs, void* file, const std::string& name) - : fs_(fs), file_(file), name_(name) { -} +HFile::HFile(void* fs, void* file, const std::string& name) : fs_(fs), file_(file), name_(name) {} HFile::~HFile() { if (file_) { CloseFile(); @@ -95,12 +88,8 @@ HFile::~HFile() { int32_t HFile::Write(const char* buf, int32_t len) { return (*hdfsWrite)((hdfsFS)fs_, (hdfsFile)file_, buf, len); } -int32_t HFile::Flush() { - return (*hdfsFlush)((hdfsFS)fs_, (hdfsFile)file_); -} -int32_t HFile::Sync() { - return (*hdfsSync)((hdfsFS)fs_, (hdfsFile)file_); -} +int32_t HFile::Flush() { return (*hdfsFlush)((hdfsFS)fs_, (hdfsFile)file_); } +int32_t HFile::Sync() { return (*hdfsSync)((hdfsFS)fs_, (hdfsFile)file_); } int32_t HFile::Read(char* buf, int32_t len) { return (*hdfsRead)((hdfsFS)fs_, (hdfsFile)file_, buf, len); } @@ -111,9 +100,7 @@ int64_t HFile::Tell() { int64_t retval = (*hdfsTell)((hdfsFS)fs_, (hdfsFile)file_); return retval; } -int32_t HFile::Seek(int64_t offset) { - return (*hdfsSeek)((hdfsFS)fs_, (hdfsFile)file_, offset); -} +int32_t HFile::Seek(int64_t offset) { return (*hdfsSeek)((hdfsFS)fs_, (hdfsFile)file_, offset); } int32_t HFile::CloseFile() { int32_t retval = 0; @@ -140,7 +127,7 @@ Hdfs::Hdfs() { dl_init_ = true; } } - fs_ = (*hdfsConnect)("default", 0); + fs_ = (*hdfsConnect)("default", 0); } Hdfs::~Hdfs() { if (fs_) { @@ -180,12 +167,12 @@ int32_t Hdfs::Rename(const std::string& from, const std::string& to) { DfsFile* Hdfs::OpenFile(const std::string& filename, int32_t flags) { // fprintf(stderr, "OpenFile %s %d\n", filename.c_str(), flags); int32_t hflags = (flags == RDONLY ? O_RDONLY : O_WRONLY); - hdfsFile file = (*hdfsOpenFile)((hdfsFS)fs_, filename.c_str(), hflags, 0 ,0 ,0); + hdfsFile file = (*hdfsOpenFile)((hdfsFS)fs_, filename.c_str(), hflags, 0, 0, 0); if (!file) { if (hflags == O_WRONLY) { // open failed, delete and reopen it Delete(filename); - file = (*hdfsOpenFile)((hdfsFS)fs_, filename.c_str(), hflags, 0 ,0 ,0); + file = (*hdfsOpenFile)((hdfsFS)fs_, filename.c_str(), hflags, 0, 0, 0); if (!file) { return NULL; } @@ -200,8 +187,7 @@ int32_t Hdfs::Copy(const std::string& from, const std::string& to) { return (*hdfsCopy)((hdfsFS)fs_, from.c_str(), (hdfsFS)fs_, to.c_str()); } -int32_t Hdfs::ListDirectory(const std::string& path, - std::vector* result) { +int32_t Hdfs::ListDirectory(const std::string& path, std::vector* result) { int numEntries = 0; hdfsFileInfo* pHdfsFileInfo = 0; if (0 != Exists(path)) { @@ -233,7 +219,6 @@ int32_t Hdfs::UnlockDirectory(const std::string& path) { return -1; } - int32_t Hdfs::Stat(const std::string& filename, struct stat* fstat) { hdfsFileInfo* pFileInfo = (*hdfsGetPathInfo)((hdfsFS)fs_, filename.c_str()); if (pFileInfo != NULL) { @@ -248,6 +233,5 @@ int32_t Hdfs::ClearDirOwner(const std::string& path) { // hdfs has no dir owner, so we return succ directly return 0; } - } /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/hdfs.h b/src/leveldb/util/hdfs.h index ebf464f6b..5fa3676ce 100644 --- a/src/leveldb/util/hdfs.h +++ b/src/leveldb/util/hdfs.h @@ -4,8 +4,8 @@ // // Author: yanshiguang02@baidu.com -#ifndef TERA_LEVELDB_AHDFS_H_ -#define TERA_LEVELDB_AHDFS_H_ +#ifndef TERA_LEVELDB_AHDFS_H_ +#define TERA_LEVELDB_AHDFS_H_ #include #include @@ -16,7 +16,7 @@ namespace leveldb { class HFile : public DfsFile { -public: + public: HFile(void* fs, void* file, const std::string& name); ~HFile(); int32_t Write(const char* buf, int32_t len); @@ -28,14 +28,14 @@ class HFile : public DfsFile { int32_t Seek(int64_t offset); int32_t CloseFile(); -private: + private: void* fs_; void* file_; std::string name_; }; class Hdfs : public Dfs { -public: + public: Hdfs(); ~Hdfs(); int32_t CreateDirectory(const std::string& path); @@ -51,7 +51,8 @@ class Hdfs : public Dfs { int32_t ClearDirOwner(const std::string& path); DfsFile* OpenFile(const std::string& filename, int32_t flags); int32_t Stat(const std::string& filename, struct stat* fstat); -private: + + private: void* fs_; // for dynamic library @@ -61,7 +62,7 @@ class Hdfs : public Dfs { }; class H2File : public DfsFile { -public: + public: H2File(void* fs, void* file, const std::string& name); ~H2File(); int32_t Write(const char* buf, int32_t len); @@ -73,14 +74,14 @@ class H2File : public DfsFile { int32_t Seek(int64_t offset); int32_t CloseFile(); -private: + private: void* fs_; void* file_; std::string name_; }; class Hdfs2 : public Dfs { -public: + public: Hdfs2(const std::string& namenode_list); ~Hdfs2(); int32_t CreateDirectory(const std::string& path); @@ -97,7 +98,8 @@ class Hdfs2 : public Dfs { DfsFile* OpenFile(const std::string& filename, int32_t flags); int32_t Stat(const std::string& filename, struct stat* fstat); -private: + + private: void* GetFSHandle(const std::string& path); std::vector fs_list_; @@ -106,9 +108,8 @@ class Hdfs2 : public Dfs { static port::Mutex dl_mu_; static bool dl_init_; }; - } -#endif //TERA_LEVELDB_AHDFS_H_ +#endif // TERA_LEVELDB_AHDFS_H_ /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/hdfs2.cc b/src/leveldb/util/hdfs2.cc index 0eac0ecea..4b9e809c2 100644 --- a/src/leveldb/util/hdfs2.cc +++ b/src/leveldb/util/hdfs2.cc @@ -9,7 +9,7 @@ #include "include/hdfs2.h" #include "hdfs_util.h" #include "util/hash.h" -#include "../common/counter.h" +#include "common/counter.h" namespace leveldb { @@ -17,27 +17,22 @@ static hdfsFS (*hdfsConnect)(const char* nn, tPort port); static int (*hdfsDisconnect)(hdfsFS fs); static int (*hdfsCreateDirectory)(hdfsFS fs, const char* path); -static hdfsFileInfo* (*hdfsListDirectory)(hdfsFS fs, const char* path, - int *numEntries); +static hdfsFileInfo* (*hdfsListDirectory)(hdfsFS fs, const char* path, int* numEntries); static hdfsFileInfo* (*hdfsGetPathInfo)(hdfsFS fs, const char* path); -static void (*hdfsFreeFileInfo)(hdfsFileInfo *hdfsFileInfo, int numEntries); +static void (*hdfsFreeFileInfo)(hdfsFileInfo* hdfsFileInfo, int numEntries); static int (*hdfsDelete)(hdfsFS fs, const char* path, int recursive); -static int (*hdfsExists)(hdfsFS fs, const char *path); +static int (*hdfsExists)(hdfsFS fs, const char* path); static int (*hdfsRename)(hdfsFS fs, const char* oldPath, const char* newPath); -static int (*hdfsCopy)(hdfsFS srcFS, const char* src, - hdfsFS dstFS, const char* dst); +static int (*hdfsCopy)(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst); -static hdfsFile (*hdfsOpenFile)(hdfsFS fs, const char* path, int flags, - int bufferSize, short replication, - tSize blocksize); +static hdfsFile (*hdfsOpenFile)(hdfsFS fs, const char* path, int flags, int bufferSize, + short replication, tSize blocksize); static int (*hdfsCloseFile)(hdfsFS fs, hdfsFile file); static tSize (*hdfsRead)(hdfsFS fs, hdfsFile file, void* buffer, tSize length); -static tSize (*hdfsPread)(hdfsFS fs, hdfsFile file, tOffset position, - void* buffer, tSize length); -static tSize (*hdfsWrite)(hdfsFS fs, hdfsFile file, const void* buffer, - tSize length); +static tSize (*hdfsPread)(hdfsFS fs, hdfsFile file, tOffset position, void* buffer, tSize length); +static tSize (*hdfsWrite)(hdfsFS fs, hdfsFile file, const void* buffer, tSize length); static int (*hdfsFlush)(hdfsFS fs, hdfsFile file); static int (*hdfsHSync)(hdfsFS fs, hdfsFile file); static tOffset (*hdfsTell)(hdfsFS fs, hdfsFile file); @@ -82,9 +77,7 @@ bool Hdfs2::LoadSymbol() { return true; } -H2File::H2File(void* fs, void* file, const std::string& name) - : fs_(fs), file_(file), name_(name) { -} +H2File::H2File(void* fs, void* file, const std::string& name) : fs_(fs), file_(file), name_(name) {} H2File::~H2File() { if (file_) { CloseFile(); @@ -94,12 +87,8 @@ H2File::~H2File() { int32_t H2File::Write(const char* buf, int32_t len) { return (*hdfsWrite)((hdfsFS)fs_, (hdfsFile)file_, buf, len); } -int32_t H2File::Flush() { - return (*hdfsFlush)((hdfsFS)fs_, (hdfsFile)file_); -} -int32_t H2File::Sync() { - return (*hdfsHSync)((hdfsFS)fs_, (hdfsFile)file_); -} +int32_t H2File::Flush() { return (*hdfsFlush)((hdfsFS)fs_, (hdfsFile)file_); } +int32_t H2File::Sync() { return (*hdfsHSync)((hdfsFS)fs_, (hdfsFile)file_); } int32_t H2File::Read(char* buf, int32_t len) { return (*hdfsRead)((hdfsFS)fs_, (hdfsFile)file_, buf, len); } @@ -110,9 +99,7 @@ int64_t H2File::Tell() { int64_t retval = (*hdfsTell)((hdfsFS)fs_, (hdfsFile)file_); return retval; } -int32_t H2File::Seek(int64_t offset) { - return (*hdfsSeek)((hdfsFS)fs_, (hdfsFile)file_, offset); -} +int32_t H2File::Seek(int64_t offset) { return (*hdfsSeek)((hdfsFS)fs_, (hdfsFile)file_, offset); } int32_t H2File::CloseFile() { int32_t retval = 0; @@ -198,9 +185,9 @@ int32_t Hdfs2::Rename(const std::string& from, const std::string& to) { } DfsFile* Hdfs2::OpenFile(const std::string& filename, int32_t flags) { - //fprintf(stderr, "OpenFile %s %d\n", filename.c_str(), flags); + // fprintf(stderr, "OpenFile %s %d\n", filename.c_str(), flags); int32_t hflags = (flags == RDONLY ? O_RDONLY : O_WRONLY); - hdfsFile file = (*hdfsOpenFile)((hdfsFS)GetFSHandle(filename), filename.c_str(), hflags, 0 ,0 ,0); + hdfsFile file = (*hdfsOpenFile)((hdfsFS)GetFSHandle(filename), filename.c_str(), hflags, 0, 0, 0); if (!file) { return NULL; } else { @@ -212,8 +199,7 @@ int32_t Hdfs2::Copy(const std::string& from, const std::string& to) { return (*hdfsCopy)((hdfsFS)GetFSHandle(from), from.c_str(), (hdfsFS)GetFSHandle(to), to.c_str()); } -int32_t Hdfs2::ListDirectory(const std::string& path, - std::vector* result) { +int32_t Hdfs2::ListDirectory(const std::string& path, std::vector* result) { int numEntries = 0; hdfsFileInfo* pHdfsFileInfo = 0; pHdfsFileInfo = (*hdfsListDirectory)((hdfsFS)GetFSHandle(path), path.c_str(), &numEntries); @@ -270,10 +256,8 @@ int32_t Hdfs2::Stat(const std::string& filepath, struct stat* st) { return 0; } return -1; - } - -} // namespace leveldb +} // namespace leveldb /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/hdfs_util.h b/src/leveldb/util/hdfs_util.h index ba2eb720b..64a47a740 100644 --- a/src/leveldb/util/hdfs_util.h +++ b/src/leveldb/util/hdfs_util.h @@ -14,9 +14,10 @@ namespace leveldb { static void HdfsFileInfo2PosixFileStat(hdfsFileInfo* info, struct stat* st) { memset(st, 0, sizeof(struct stat)); - //by default: set to 0 to indicate not support for directory because we can not get this info + // by default: set to 0 to indicate not support for directory because we can + // not get this info st->st_nlink = (info->mKind == kObjectKindDirectory) ? 0 : 1; - uid_t owner_id = 99; // no body, magic number in linux + uid_t owner_id = 99; // no body, magic number in linux if (info->mOwner != NULL) { struct passwd passwd_info; struct passwd* result = NULL; @@ -28,9 +29,9 @@ static void HdfsFileInfo2PosixFileStat(hdfsFileInfo* info, struct stat* st) { owner_id = passwd_info.pw_uid; } } - delete [] pwbuf; + delete[] pwbuf; } - gid_t group_id = 99; // no body, magic number in posix + gid_t group_id = 99; // no body, magic number in posix if (info->mGroup != NULL) { struct group result; struct group* resultp; @@ -42,16 +43,16 @@ static void HdfsFileInfo2PosixFileStat(hdfsFileInfo* info, struct stat* st) { group_id = result.gr_gid; } } - delete [] group_buf; + delete[] group_buf; } - short file_mode = (info->mKind == kObjectKindDirectory) ? (S_IFDIR | 0777) : (S_IFREG | 0666); + short file_mode = (info->mKind == kObjectKindDirectory) ? (S_IFDIR | 0777) : (S_IFREG | 0666); if (info->mPermissions > 0) { - file_mode = (info->mKind == kObjectKindDirectory) ? S_IFDIR: S_IFREG; + file_mode = (info->mKind == kObjectKindDirectory) ? S_IFDIR : S_IFREG; file_mode |= info->mPermissions; } st->st_size = (info->mKind == kObjectKindDirectory) ? 4096 : info->mSize; - st->st_blksize = 512; // posix default block size - st->st_blocks = (st->st_size + st->st_blksize - 1)/st->st_blksize; + st->st_blksize = 512; // posix default block size + st->st_blocks = (st->st_size + st->st_blksize - 1) / st->st_blksize; st->st_mode = file_mode; st->st_uid = owner_id; st->st_gid = group_id; diff --git a/src/leveldb/util/histogram.cc b/src/leveldb/util/histogram.cc index d636f9c73..38d7927b1 100644 --- a/src/leveldb/util/histogram.cc +++ b/src/leveldb/util/histogram.cc @@ -14,29 +14,25 @@ namespace leveldb { const double Histogram::kBucketLimit[kNumBuckets] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, - 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, - 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, - 3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, 14000, - 16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000, - 70000, 80000, 90000, 100000, 120000, 140000, 160000, 180000, 200000, - 250000, 300000, 350000, 400000, 450000, 500000, 600000, 700000, 800000, - 900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, 2500000, - 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000, - 9000000, 10000000, 12000000, 14000000, 16000000, 18000000, 20000000, - 25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 60000000, - 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, - 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, - 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, - 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, - 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, - 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, - 1e200, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, + 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, + 1600, 1800, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, + 14000, 16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000, 70000, 80000, + 90000, 100000, 120000, 140000, 160000, 180000, 200000, 250000, 300000, 350000, 400000, 450000, + 500000, 600000, 700000, 800000, 900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, + 2500000, 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000, 9000000, + 10000000, 12000000, 14000000, 16000000, 18000000, 20000000, 25000000, 30000000, 35000000, + 40000000, 45000000, 50000000, 60000000, 70000000, 80000000, 90000000, 100000000, 120000000, + 140000000, 160000000, 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, + 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, 1000000000, 1200000000, + 1400000000, 1600000000, 1800000000, 2000000000, 2500000000.0, 3000000000.0, 3500000000.0, + 4000000000.0, 4500000000.0, 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, + 9000000000.0, 1e200, }; void Histogram::Clear() { MutexLock lock(&mutex_); - min_ = kBucketLimit[kNumBuckets-1]; + min_ = kBucketLimit[kNumBuckets - 1]; max_ = 0; num_ = 0; sum_ = 0; @@ -73,9 +69,7 @@ void Histogram::Merge(const Histogram& other) { } } -double Histogram::Median() const { - return Percentile(50.0); -} +double Histogram::Median() const { return Percentile(50.0); } double Histogram::Percentile(double p) const { MutexLock lock(&mutex_); @@ -85,7 +79,7 @@ double Histogram::Percentile(double p) const { sum += buckets_[b]; if (sum >= threshold) { // Scale linearly within this bucket - double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; + double left_point = (b == 0) ? 0 : kBucketLimit[b - 1]; double right_point = kBucketLimit[b]; double left_sum = sum - buckets_[b]; double right_sum = sum; @@ -115,13 +109,11 @@ double Histogram::StandardDeviation() const { std::string Histogram::ToString() const { std::string r; char buf[200]; - snprintf(buf, sizeof(buf), - "Count: %.0f Average: %.4f StdDev: %.2f\n", - num_, Average(), StandardDeviation()); + snprintf(buf, sizeof(buf), "Count: %.0f Average: %.4f StdDev: %.2f\n", num_, Average(), + StandardDeviation()); r.append(buf); - snprintf(buf, sizeof(buf), - "Min: %.4f Median: %.4f Max: %.4f\n", - (num_ == 0.0 ? 0.0 : min_), Median(), max_); + snprintf(buf, sizeof(buf), "Min: %.4f Median: %.4f Max: %.4f\n", (num_ == 0.0 ? 0.0 : min_), + Median(), max_); r.append(buf); r.append("------------------------------------------------------\n"); const double mult = 100.0 / num_; @@ -129,17 +121,16 @@ std::string Histogram::ToString() const { for (int b = 0; b < kNumBuckets; b++) { if (buckets_[b] <= 0.0) continue; sum += buckets_[b]; - snprintf(buf, sizeof(buf), - "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", - ((b == 0) ? 0.0 : kBucketLimit[b-1]), // left - kBucketLimit[b], // right - buckets_[b], // count - mult * buckets_[b], // percentage - mult * sum); // cumulative percentage + snprintf(buf, sizeof(buf), "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", + ((b == 0) ? 0.0 : kBucketLimit[b - 1]), // left + kBucketLimit[b], // right + buckets_[b], // count + mult * buckets_[b], // percentage + mult * sum); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. - int marks = static_cast(20*(buckets_[b] / num_) + 0.5); + int marks = static_cast(20 * (buckets_[b] / num_) + 0.5); r.append(marks, '#'); r.push_back('\n'); } diff --git a/src/leveldb/util/histogram.h b/src/leveldb/util/histogram.h index f60602efd..7d6f9ad76 100644 --- a/src/leveldb/util/histogram.h +++ b/src/leveldb/util/histogram.h @@ -18,7 +18,7 @@ namespace leveldb { class Histogram { public: Histogram() { Clear(); } - ~Histogram() { } + ~Histogram() {} void Clear(); void Add(double value); diff --git a/src/leveldb/util/lg_coding.cc b/src/leveldb/util/lg_coding.cc index 2b4db3fe5..e67d713f8 100644 --- a/src/leveldb/util/lg_coding.cc +++ b/src/leveldb/util/lg_coding.cc @@ -10,28 +10,28 @@ namespace leveldb { const std::string KG_PREFIX = "//LG_ID//"; -void PutFixed32LGId(std::string *dst, uint32_t lg_id) { - std::string lg_str; - PutLengthPrefixedSlice(&lg_str, KG_PREFIX); - PutVarint32(&lg_str, lg_id); - PutLengthPrefixedSlice(&lg_str, *dst); - *dst = lg_str; +void PutFixed32LGId(std::string* dst, uint32_t lg_id) { + std::string lg_str; + PutLengthPrefixedSlice(&lg_str, KG_PREFIX); + PutVarint32(&lg_str, lg_id); + PutLengthPrefixedSlice(&lg_str, *dst); + *dst = lg_str; } bool GetFixed32LGId(Slice* input, uint32_t* lg_id) { - Slice lg_str(*input); - Slice str; - if (!GetLengthPrefixedSlice(&lg_str, &str)) { - return false; - } else if (str != KG_PREFIX) { - return false; - } else if (!GetVarint32(&lg_str, lg_id)) { - return false; - } - if (!GetLengthPrefixedSlice(&lg_str, input)) { - return false; - } - return true; + Slice lg_str(*input); + Slice str; + if (!GetLengthPrefixedSlice(&lg_str, &str)) { + return false; + } else if (str != KG_PREFIX) { + return false; + } else if (!GetVarint32(&lg_str, lg_id)) { + return false; + } + if (!GetLengthPrefixedSlice(&lg_str, input)) { + return false; + } + return true; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/logging.cc b/src/leveldb/util/logging.cc index 0b53c64da..87611e67c 100644 --- a/src/leveldb/util/logging.cc +++ b/src/leveldb/util/logging.cc @@ -20,7 +20,7 @@ namespace leveldb { void AppendNumberTo(std::string* str, uint64_t num) { char buf[30]; - snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); + snprintf(buf, sizeof(buf), "%llu", (unsigned long long)num); str->append(buf); } @@ -31,8 +31,7 @@ void AppendEscapedStringTo(std::string* str, const Slice& value) { str->push_back(c); } else { char buf[10]; - snprintf(buf, sizeof(buf), "\\x%02x", - static_cast(c) & 0xff); + snprintf(buf, sizeof(buf), "\\x%02x", static_cast(c) & 0xff); str->append(buf); } } @@ -61,7 +60,7 @@ bool ConsumeChar(Slice* in, char c) { bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { if (in->size() > 1 && (*in)[0] == 'H') { - return ConsumeHexDecimalNumber(in, val); + return ConsumeHexDecimalNumber(in, val); } uint64_t v = 0; int digits = 0; @@ -71,8 +70,8 @@ bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { ++digits; const int delta = (c - '0'); static const uint64_t kMaxUint64 = ~static_cast(0); - if (v > kMaxUint64/10 || - (v == kMaxUint64/10 && static_cast(delta) > kMaxUint64%10)) { + if (v > kMaxUint64 / 10 || + (v == kMaxUint64 / 10 && static_cast(delta) > kMaxUint64 % 10)) { // Overflow return false; } @@ -87,20 +86,20 @@ bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { } bool ConsumeHexDecimalNumber(Slice* in, uint64_t* val) { - char c = (*in)[0]; - if (c != 'H') { - return false; - } - in->remove_prefix(1); - std::string hex_str = in->ToString(); - std::string log_num_str; - SplitStringStart(hex_str, &log_num_str, NULL); - if (log_num_str.empty()) { - return false; - } - *val = StringToUint64(log_num_str, 16); - in->remove_prefix(log_num_str.length()); - return true; + char c = (*in)[0]; + if (c != 'H') { + return false; + } + in->remove_prefix(1); + std::string hex_str = in->ToString(); + std::string log_num_str; + SplitStringStart(hex_str, &log_num_str, NULL); + if (log_num_str.empty()) { + return false; + } + *val = StringToUint64(log_num_str, 16); + in->remove_prefix(log_num_str.length()); + return true; } } // namespace leveldb diff --git a/src/leveldb/util/mutexlock.h b/src/leveldb/util/mutexlock.h index 73e1c9f90..acfa8300b 100644 --- a/src/leveldb/util/mutexlock.h +++ b/src/leveldb/util/mutexlock.h @@ -24,22 +24,46 @@ namespace leveldb { // ... some complex code, possibly with multiple return paths ... // } +/* + * This class is designed for locking guard of a mutex. The right way to lock a critical region is + * that using the constructor to construct an object in the stack, and the automatic unlock will be + * done at the end of the critical region by the destructor. + * + * However, there are so many wrong uses in our code. We hope we can optimized these codes one by + * one from now on. And we will use this class in the right way when we add new codes in the + * future. + * + * Here, the Lock and Unlock member functions are designed just for the bad coded function + * DBImpl::GetProperty(). They are NOT recommended to use at any other places. + */ class SCOPED_LOCKABLE MutexLock { public: - explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) - : mu_(mu) { - this->mu_->Lock(); + explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu), hold_mutex_(true) { + mu_->Lock(); + } + ~MutexLock() UNLOCK_FUNCTION() { + if (hold_mutex_) { + mu_->Unlock(); + } + } + + void Lock() { + hold_mutex_ = true; + mu_->Lock(); + } + void Unlock() { + hold_mutex_ = false; + mu_->Unlock(); } - ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); } private: port::Mutex *const mu_; + bool hold_mutex_; // No copying allowed - MutexLock(const MutexLock&); - void operator=(const MutexLock&); + MutexLock(const MutexLock &); + void operator=(const MutexLock &); }; } // namespace leveldb - #endif // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ diff --git a/src/leveldb/util/nfs.cc b/src/leveldb/util/nfs.cc index 0c6219301..3b81598a7 100644 --- a/src/leveldb/util/nfs.cc +++ b/src/leveldb/util/nfs.cc @@ -12,8 +12,8 @@ #include "util/hash.h" #include "util/mutexlock.h" #include "util/string_ext.h" -#include "../common/timer.h" -#include "../common/counter.h" +#include "common/timer.h" +#include "common/counter.h" namespace leveldb { @@ -41,8 +41,7 @@ static int (*nfsClose)(nfs::NFSFILE* stream); static int (*nfsForceRelease)(const char* path); static ssize_t (*nfsRead)(nfs::NFSFILE* stream, void* ptr, size_t size); -static ssize_t (*nfsPRead)(nfs::NFSFILE* stream, void* ptr, size_t size, - uint64_t offset); +static ssize_t (*nfsPRead)(nfs::NFSFILE* stream, void* ptr, size_t size, uint64_t offset); static ssize_t (*nfsWrite)(nfs::NFSFILE* stream, const void* ptr, size_t size); static int (*nfsFsync)(nfs::NFSFILE* stream); @@ -57,26 +56,25 @@ void* ResolveSymbol(void* dl, const char* sym) { dlerror(); void* sym_ptr = dlsym(dl, sym); const char* error = dlerror(); - if (strcmp(sym,"SetAssignNamespaceIdFunc") == 0 && error != NULL) { - fprintf(stderr, "libnfs.so does not support federation\n"); - return NULL; + if (strcmp(sym, "SetAssignNamespaceIdFunc") == 0 && error != NULL) { + fprintf(stderr, "libnfs.so does not support federation\n"); + return NULL; } - if (strcmp(sym,"SetDirOwner") == 0 && error != NULL) { - fprintf(stderr, "libnfs.so does not support SetDirOwner\n"); - return NULL; + if (strcmp(sym, "SetDirOwner") == 0 && error != NULL) { + fprintf(stderr, "libnfs.so does not support SetDirOwner\n"); + return NULL; } - if (strcmp(sym,"ClearDirOwner") == 0 && error != NULL) { - fprintf(stderr, "libnfs.so does not support ClearDirOwner\n"); - return NULL; + if (strcmp(sym, "ClearDirOwner") == 0 && error != NULL) { + fprintf(stderr, "libnfs.so does not support ClearDirOwner\n"); + return NULL; } - if (strcmp(sym,"ForceRelease") == 0 && error != NULL) { - fprintf(stderr, "libnfs.so does not support ForceRelease\n"); - return NULL; + if (strcmp(sym, "ForceRelease") == 0 && error != NULL) { + fprintf(stderr, "libnfs.so does not support ForceRelease\n"); + return NULL; } if (error != NULL) { - fprintf(stderr, "resolve symbol %s from libnfs.so error: %s\n", - sym, error); + fprintf(stderr, "resolve symbol %s from libnfs.so error: %s\n", sym, error); abort(); } return sym_ptr; @@ -91,7 +89,7 @@ void Nfs::LoadSymbol() { } *(void**)(&printVersion) = ResolveSymbol(dl, "PrintNfsVersion"); - //fprintf(stderr, "libnfs.so version: \n%s\n\n", (*printVersion)()); + // fprintf(stderr, "libnfs.so version: \n%s\n\n", (*printVersion)()); *(void**)(&nfsInit) = ResolveSymbol(dl, "Init"); *(void**)(&nfsSetComlogLevel) = ResolveSymbol(dl, "SetComlogLevel"); @@ -120,9 +118,7 @@ void Nfs::LoadSymbol() { *(void**)(&nfsSetAssignNamespaceIdFunc) = ResolveSymbol(dl, "SetAssignNamespaceIdFunc"); } -NFile::NFile(nfs::NFSFILE* file, const std::string& name) - : file_(file), name_(name) { -} +NFile::NFile(nfs::NFSFILE* file, const std::string& name) : file_(file), name_(name) {} NFile::~NFile() { if (file_) { CloseFile(); @@ -196,27 +192,25 @@ port::Mutex Nfs::mu_; static Nfs* instance = NULL; int Nfs::CalcNamespaceId(const char* c_path, int max_namespaces) { - if (!c_path) { - fprintf(stderr, "null path for Nfs::CalcNamespaceId\n"); - return -1; - } - std::string path(c_path); - size_t pos = path.rfind("tablet"); - if (pos == std::string::npos) { - return 0; - } - size_t pos2 = path.find('/', pos); - if (pos2 == std::string::npos) { - pos2 = path.size(); - } - std::string hash_path = path.substr(pos, pos2 - pos); - uint32_t index = Hash(hash_path.c_str(), hash_path.size(), - 1984) % max_namespaces; - return index; + if (!c_path) { + fprintf(stderr, "null path for Nfs::CalcNamespaceId\n"); + return -1; + } + std::string path(c_path); + size_t pos = path.rfind("tablet"); + if (pos == std::string::npos) { + return 0; + } + size_t pos2 = path.find('/', pos); + if (pos2 == std::string::npos) { + pos2 = path.size(); + } + std::string hash_path = path.substr(pos, pos2 - pos); + uint32_t index = Hash(hash_path.c_str(), hash_path.size(), 1984) % max_namespaces; + return index; } -void Nfs::Init(const std::string& mountpoint, const std::string& conf_path) -{ +void Nfs::Init(const std::string& mountpoint, const std::string& conf_path) { MutexLock l(&mu_); if (!dl_init_) { LoadSymbol(); @@ -224,7 +218,7 @@ void Nfs::Init(const std::string& mountpoint, const std::string& conf_path) } (*nfsSetComlogLevel)(2); if (nfsSetAssignNamespaceIdFunc) { - nfsSetAssignNamespaceIdFunc(&CalcNamespaceId); + nfsSetAssignNamespaceIdFunc(&CalcNamespaceId); } if (0 != (*nfsInit)(mountpoint.c_str(), conf_path.c_str())) { char err[256]; @@ -258,7 +252,8 @@ int32_t Nfs::CreateDirectory(const std::string& name) { if (0 != (*nfsAccess)(path.c_str(), F_OK) && (*nfsGetErrno)() == ENOENT) { if (0 != (*nfsMkdir)(path.c_str()) && (*nfsGetErrno)() != EEXIST) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Createdir %s fail: %d\n", tera::get_curtime_str().c_str(), name.c_str(), errno); + fprintf(stderr, "[%s] Createdir %s fail: %d\n", tera::get_curtime_str().c_str(), + name.c_str(), errno); return -1; } } @@ -270,7 +265,8 @@ int32_t Nfs::DeleteDirectory(const std::string& name) { int32_t retval = (*nfsRmdir)(name.c_str()); if (retval != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] DeleteDirectory %s fail: %d\n", tera::get_curtime_str().c_str(), name.c_str(), errno); + fprintf(stderr, "[%s] DeleteDirectory %s fail: %d\n", tera::get_curtime_str().c_str(), + name.c_str(), errno); } return retval; } @@ -279,7 +275,8 @@ int32_t Nfs::Exists(const std::string& filename) { if (retval != 0) { errno = (*nfsGetErrno)(); int errno_saved = errno; - fprintf(stderr, "[%s] Exists %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Exists %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), + errno); errno = errno_saved; } return retval; @@ -289,7 +286,8 @@ int32_t Nfs::Delete(const std::string& filename) { if (retval != 0) { errno = (*nfsGetErrno)(); if (errno != EISDIR) { - fprintf(stderr, "[%s] Delete %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Delete %s fail: %d\n", tera::get_curtime_str().c_str(), + filename.c_str(), errno); } } return retval; @@ -301,7 +299,8 @@ int32_t Nfs::GetFileSize(const std::string& filename, uint64_t* size) { *size = fileinfo.st_size; } else { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Getfilesize %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Getfilesize %s fail: %d\n", tera::get_curtime_str().c_str(), + filename.c_str(), errno); } return retval; } @@ -309,13 +308,14 @@ int32_t Nfs::Rename(const std::string& from, const std::string& to) { int32_t retval = (*nfsRename)(from.c_str(), to.c_str()); if (retval != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Rename %s to %s fail: %d\n", tera::get_curtime_str().c_str(), from.c_str(), to.c_str(), errno); + fprintf(stderr, "[%s] Rename %s to %s fail: %d\n", tera::get_curtime_str().c_str(), + from.c_str(), to.c_str(), errno); } return retval; } DfsFile* Nfs::OpenFile(const std::string& filename, int32_t flags) { - //fprintf(stderr, "OpenFile %s %d\n", filename.c_str(), flags); + // fprintf(stderr, "OpenFile %s %d\n", filename.c_str(), flags); nfs::NFSFILE* file = NULL; if (flags == RDONLY) { file = (*nfsOpen)(filename.c_str(), "r"); @@ -327,7 +327,8 @@ DfsFile* Nfs::OpenFile(const std::string& filename, int32_t flags) { return new NFile(file, filename); } errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Openfile %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Openfile %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), + errno); return NULL; } @@ -335,7 +336,8 @@ int32_t Nfs::Stat(const std::string& filename, struct stat* fstat) { int32_t retval = (*nfsStat)(filename.c_str(), fstat); if (retval != 0) { errno = (*nfsGetErrno)(); - //fprintf(stderr, "[%s] Stat %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + // fprintf(stderr, "[%s] Stat %s fail: %d\n", + // tera::get_curtime_str().c_str(), filename.c_str(), errno); } return retval; } @@ -344,13 +346,13 @@ int32_t Nfs::Copy(const std::string& from, const std::string& to) { // not support return -1; } -int32_t Nfs::ListDirectory(const std::string& path, - std::vector* result) { +int32_t Nfs::ListDirectory(const std::string& path, std::vector* result) { nfs::NFSDIR* dir = (*nfsOpendir)(path.c_str()); if (NULL == dir) { errno = (*nfsGetErrno)(); int errno_saved = errno; - fprintf(stderr, "[%s] Opendir %s fail: %d\n", tera::get_curtime_str().c_str(), path.c_str(), errno); + fprintf(stderr, "[%s] Opendir %s fail: %d\n", tera::get_curtime_str().c_str(), path.c_str(), + errno); errno = errno_saved; return -1; } @@ -364,7 +366,8 @@ int32_t Nfs::ListDirectory(const std::string& path, errno = (*nfsGetErrno)(); int errno_saved = errno; if (0 != errno) { - fprintf(stderr, "[%s] List %s error: %d\n", tera::get_curtime_str().c_str(), path.c_str(), errno); + fprintf(stderr, "[%s] List %s error: %d\n", tera::get_curtime_str().c_str(), path.c_str(), + errno); (*nfsClosedir)(dir); errno = errno_saved; return -1; @@ -377,8 +380,7 @@ int32_t Nfs::LockDirectory(const std::string& path) { int ret = (*nfsSetDirOwner)(path.c_str()); if (ret != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[LockDirectory] lock dir %s fail, errno: %d\n", - path.c_str(), errno); + fprintf(stderr, "[LockDirectory] lock dir %s fail, errno: %d\n", path.c_str(), errno); return -1; } @@ -386,8 +388,7 @@ int32_t Nfs::LockDirectory(const std::string& path) { ret = ListDirectory(path, &files); if (ret != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[LockDirectory] list dir %s fail, errno: %d\n", - path.c_str(), errno); + fprintf(stderr, "[LockDirectory] list dir %s fail, errno: %d\n", path.c_str(), errno); return -1; } @@ -399,7 +400,7 @@ int32_t Nfs::LockDirectory(const std::string& path) { if (ret != 0) { errno = (*nfsGetErrno)(); fprintf(stderr, "[LockDirectory] force release file %s fail, errno: %d\n", - file_name.c_str(), errno); + file_name.c_str(), errno); return -1; } } @@ -407,13 +408,10 @@ int32_t Nfs::LockDirectory(const std::string& path) { return 0; } -int32_t Nfs::UnlockDirectory(const std::string& path) { - return (*nfsClearDirOwner)(path.c_str()); -} +int32_t Nfs::UnlockDirectory(const std::string& path) { return (*nfsClearDirOwner)(path.c_str()); } int32_t Nfs::ClearDirOwner(const std::string& path) { return (*nfsForceClearDirOwner)(path.c_str()); } - } /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/nfs.h b/src/leveldb/util/nfs.h index ab286d82b..fb20ec51a 100644 --- a/src/leveldb/util/nfs.h +++ b/src/leveldb/util/nfs.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_LEVELDB_NFS_H_ -#define TERA_LEVELDB_NFS_H_ +#ifndef TERA_LEVELDB_NFS_H_ +#define TERA_LEVELDB_NFS_H_ #include #include @@ -11,14 +11,14 @@ #include "port/port_posix.h" namespace nfs { - struct NFSFILE; - typedef int (*AssignNamespaceIdFunc)(const char* path, int max_namespaces); +struct NFSFILE; +typedef int (*AssignNamespaceIdFunc)(const char* path, int max_namespaces); } namespace leveldb { class NFile : public DfsFile { -public: + public: NFile(nfs::NFSFILE* file, const std::string& name); ~NFile(); int32_t Write(const char* buf, int32_t len); @@ -29,13 +29,14 @@ class NFile : public DfsFile { int64_t Tell(); int32_t Seek(int64_t offset); int32_t CloseFile(); -private: + + private: nfs::NFSFILE* file_; std::string name_; }; class Nfs : public Dfs { -public: + public: static void Init(const std::string& mountpoint, const std::string& conf_path); static int CalcNamespaceId(const char* c_path, int max_namespaces); static Nfs* GetInstance(); @@ -54,15 +55,15 @@ class Nfs : public Dfs { DfsFile* OpenFile(const std::string& filename, int32_t flags); int32_t Stat(const std::string& filename, struct stat* fstat); -private: + + private: Nfs(); static port::Mutex mu_; static void LoadSymbol(); static bool dl_init_; }; - } -#endif //TERA_LEVELDB_NFS_H_ +#endif // TERA_LEVELDB_NFS_H_ /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/nfs_version.h b/src/leveldb/util/nfs_version.h index 8bbad5fbf..865769025 100644 --- a/src/leveldb/util/nfs_version.h +++ b/src/leveldb/util/nfs_version.h @@ -2,7 +2,7 @@ #define STORAGE_LEVELDB_UTIL_NFS_VERSION_H_ #ifdef __cplusplus - extern "C" { +extern "C" { #endif extern const char kNfsSvnInfo[]; @@ -14,7 +14,7 @@ extern const char kNfsCompiler[]; const char* PrintNfsVersion(); #ifdef __cplusplus - } +} #endif -#endif // STORAGE_LEVELDB_UTIL_NFS_VERSION_H_ +#endif // STORAGE_LEVELDB_UTIL_NFS_VERSION_H_ diff --git a/src/leveldb/util/nfs_wrapper.h b/src/leveldb/util/nfs_wrapper.h index b51fe7cb6..e19512155 100644 --- a/src/leveldb/util/nfs_wrapper.h +++ b/src/leveldb/util/nfs_wrapper.h @@ -2,12 +2,12 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef STORAGE_LEVELDB_UTIL_NFS_WRAPPER_H_ -#define STORAGE_LEVELDB_UTIL_NFS_WRAPPER_H_ +#ifndef STORAGE_LEVELDB_UTIL_NFS_WRAPPER_H_ +#define STORAGE_LEVELDB_UTIL_NFS_WRAPPER_H_ #include "nfs_version.h" -#include "../include/nfs.h" +#include "include/nfs.h" namespace nfs { struct NFSFILE; @@ -15,7 +15,7 @@ struct NFSDIR; }; #ifdef __cplusplus -extern "C" { +extern "C" { #endif void InitKylinLog(int loglevel); @@ -77,4 +77,3 @@ void SetAssignNamespaceIdFunc(nfs::AssignNamespaceIdFunc func); #endif #endif // STORAGE_LEVELDB_UTIL_NFS_WRAPPER_H_ - diff --git a/src/leveldb/util/options.cc b/src/leveldb/util/options.cc index 534399229..ff1cae1fc 100644 --- a/src/leveldb/util/options.cc +++ b/src/leveldb/util/options.cc @@ -7,7 +7,6 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "leveldb/options.h" - #include "leveldb/comparator.h" #include "leveldb/env.h" @@ -38,7 +37,7 @@ Options::Options() write_log_time_out(5), flush_triggered_log_num(100000), flush_triggered_log_size(40 << 20), - manifest_switch_interval(60*60), + manifest_switch_interval(10 * 60), raw_key_format(kReadable), seek_latency(10000000), dump_mem_on_shutdown(true), @@ -59,23 +58,8 @@ Options::Options() max_sub_parallel_compaction(10), use_direct_io_read(false), use_direct_io_write(false), - posix_write_buffer_size(512<<10), + posix_write_buffer_size(512 << 10), table_builder_batch_write(false), - table_builder_batch_size(0) { } - -FlashBlockCacheOptions::FlashBlockCacheOptions() - : force_update_conf_enabled(false), - cache_size(350UL << 30), - blockset_size(1UL << 30), - block_size(8192), - fid_batch_num(100000), - meta_block_cache_size(2000), - meta_table_cache_size(500), - write_buffer_size(1048576UL), - env(NULL), - cache_env(NULL) { - blockset_num = cache_size / blockset_size + 1; - blocks_per_set = blockset_size / block_size + 1; -} - + table_builder_batch_size(0), + memtable_shard_num(0) {} } // namespace leveldb diff --git a/src/leveldb/util/posix_logger.h b/src/leveldb/util/posix_logger.h index 2f06e1da7..957ccd489 100644 --- a/src/leveldb/util/posix_logger.h +++ b/src/leveldb/util/posix_logger.h @@ -12,24 +12,99 @@ #ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ #define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ -#include +#include +#include // for fcntl() #include +#include #include #include +#include // for getpid() + +#include +#include +#include // for std::setw() +#include +#include // for std::ostringstream +#include + #include "leveldb/env.h" namespace leveldb { class PosixLogger : public Logger { private: - FILE* file_; + std::string fname_; + LogOption opt_; uint64_t (*gettid_)(); // Return the thread id for the current thread + FILE* file_; + uint64_t file_length_; + uint64_t size_since_flush_; + uint64_t last_flush_time_ms_; + bool bg_flush_running_; + std::thread bg_flush_; + std::condition_variable flush_cv_; + std::mutex mutex_; + public: - PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { } + PosixLogger(const std::string& fname, const LogOption& opt, uint64_t (*gettid)()) + : fname_(fname), + opt_(opt), + gettid_(gettid), + file_(nullptr), + file_length_(0), + size_since_flush_(0), + last_flush_time_ms_(0), + bg_flush_running_(true) { + assert(opt_.max_log_size > 0); + bg_flush_ = std::thread{&PosixLogger::BGFlushWork, this}; + } + virtual ~PosixLogger() { - fclose(file_); + { + std::lock_guard guard(mutex_); + if (bg_flush_running_ == false) { + return; + } + } + Exit(); + } + + void Exit() { + { + std::lock_guard guard(mutex_); + bg_flush_running_ = false; + } + flush_cv_.notify_one(); + bg_flush_.join(); + + if (file_ != nullptr) { + fflush(file_); + fclose(file_); + file_ = nullptr; + } } - virtual void Logv(const char* format, va_list ap) { + + virtual void Logv(const char* file, int64_t line, const char* format, va_list ap) { + std::lock_guard guard(mutex_); + + if (file_length_ >= opt_.max_log_size) { + if (file_ != nullptr) { + fflush(file_); + fclose(file_); + file_ = nullptr; + file_length_ = 0; + size_since_flush_ = 0; + } + } + + if (file_ == nullptr) { + if (!CreateLogFile()) { + return; + } + // TODO + // write a log header + } + const uint64_t thread_id = (*gettid_)(); // We try twice: the first time with a fixed-size stack allocated buffer, @@ -55,18 +130,13 @@ class PosixLogger : public Logger { localtime_r(&seconds, &t); p += snprintf(p, limit - p, #ifdef OS_LINUX - "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu ", + "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu %s:%ld] ", #else - "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", + "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx %s:%ld] ", #endif - t.tm_year + 1900, - t.tm_mon + 1, - t.tm_mday, - t.tm_hour, - t.tm_min, - t.tm_sec, + t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec, static_cast(now_tv.tv_usec), - static_cast(thread_id)); + static_cast(thread_id), FileBaseName(file), line); // Print the message if (p < limit) { @@ -79,7 +149,7 @@ class PosixLogger : public Logger { // Truncate to available space if necessary if (p >= limit) { if (iter == 0) { - continue; // Try again with larger buffer + continue; // Try again with larger buffer } else { p = limit - 1; } @@ -91,13 +161,141 @@ class PosixLogger : public Logger { } assert(p <= limit); + mutex_.unlock(); fwrite(base, 1, p - base, file_); - fflush(file_); + mutex_.lock(); + file_length_ += p - base; + size_since_flush_ += p - base; if (base != buffer) { delete[] base; } break; } + + if (size_since_flush_ >= opt_.flush_trigger_size) { + mutex_.unlock(); + flush_cv_.notify_one(); + mutex_.lock(); + } + } + + private: + bool NeedFlush(bool time_triggered) { + std::lock_guard guard(mutex_); + + if (size_since_flush_ == 0) { + return false; + } + + // triggered by size + if (size_since_flush_ >= opt_.flush_trigger_size) { + return true; + } + + // triggered by time interval + if (time_triggered) { + return true; + } + + return false; + } + + void Flush() { + { + std::lock_guard guard(mutex_); + size_since_flush_ = 0; + } + + if (file_ != nullptr) { + fflush(file_); + } + last_flush_time_ms_ = Env::Default()->NowMicros() / 1000; + } + + void BGFlushWork() { + while (bg_flush_running_) { + uint64_t next_flush_time_ms = last_flush_time_ms_ + opt_.flush_trigger_interval_ms; + uint64_t current_time_ms = Env::Default()->NowMicros() / 1000; + bool time_triggered = (current_time_ms >= next_flush_time_ms) ? true : false; + if (NeedFlush(time_triggered)) { + Flush(); + } else { + if (time_triggered) { + // time triggered, but size_since_flush_ == 0, no data to flush, just refresh the flush + // time, wait a flush_trigger_inteval + last_flush_time_ms_ = Env::Default()->NowMicros() / 1000; + } + uint64_t wait_timeout_ms = + last_flush_time_ms_ + opt_.flush_trigger_interval_ms - current_time_ms; + // size not enough, wait for (next_flush_time_ms - current_time_ms) at most + std::unique_lock lk(mutex_); + flush_cv_.wait_for(lk, std::chrono::milliseconds(wait_timeout_ms), [this] { + return size_since_flush_ >= opt_.flush_trigger_size || bg_flush_running_ == false; + }); + } + } + } + const char* FileBaseName(const char* filepath) { + const char* base = strrchr(filepath, '/'); + return base ? (base + 1) : filepath; + } + + bool CreateLogFile() { + std::string filename_string = fname_ + "." + GetTimePIDString(); + const char* filename = filename_string.c_str(); + int fd = open(filename, O_WRONLY | O_CREAT, 0664); + if (fd == -1) { + fprintf(stderr, "open failed! file:%s, errno:%d, err_msg:%s\n", filename, errno, + strerror(errno)); + return false; + } + + // Mark the file close-on-exec. We don't really care if this fails + fcntl(fd, F_SETFD, FD_CLOEXEC); + + file_ = fdopen(fd, "a"); // Make a FILE*. + if (file_ == nullptr) { // We're screwed! + fprintf(stderr, "fdopen failed! fd:%d, errno:%d, err_msg:%s\n", fd, errno, strerror(errno)); + close(fd); + unlink(filename); // Erase the half-baked evidence: an unusable log file + return false; + } + fprintf(stderr, "create a new log file:%s success\n", filename); + + // We try to create a symlink called fname_, + // which is easier to use. (Every time we create a new logfile, + // we destroy the old symlink and create a new one, so it always + // points to the latest logfile.) If it fails, we're sad but it's + // no error. + std::string linkpath = fname_; + unlink(linkpath.c_str()); // delete old one if it exists + + // We must have unistd.h. + // Make the symlink be relative (in the same dir) so that if the + // entire log directory gets relocated the link is still valid. + const char* slash = strrchr(filename, '/'); + const char* linkdest = slash ? (slash + 1) : filename; + if (symlink(linkdest, linkpath.c_str()) != 0) { + // silently ignore failures + } + fprintf(stderr, "create a link:%s pointed to:%s success\n", linkpath.c_str(), linkdest); + + return true; + } + + std::string GetTimePIDString() const { + time_t timestamp = time(nullptr); + struct ::tm tm_time; + localtime_r(×tamp, &tm_time); + std::ostringstream time_pid_stream; + + time_pid_stream.fill('0'); + time_pid_stream << 1900 + tm_time.tm_year << std::setw(2) << 1 + tm_time.tm_mon << std::setw(2) + << tm_time.tm_mday << '-' << std::setw(2) << tm_time.tm_hour << std::setw(2) + << tm_time.tm_min << std::setw(2) << tm_time.tm_sec << '.' + << static_cast(getpid()); + + return time_pid_stream.str(); } }; diff --git a/src/leveldb/util/random.cc b/src/leveldb/util/random.cc new file mode 100644 index 000000000..89fe6febd --- /dev/null +++ b/src/leveldb/util/random.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "util/random.h" + +#include +#include +#include +#include + +#define STORAGE_DECL static __thread + +#if defined(__GNUC__) && __GNUC__ >= 4 +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +namespace leveldb { + +Random* Random::GetTLSInstance() { + STORAGE_DECL Random* tls_instance; + STORAGE_DECL std::aligned_storage::type tls_instance_bytes; + + auto rv = tls_instance; + if ((rv == nullptr)) { + size_t seed = std::hash()(std::this_thread::get_id()); + rv = new (&tls_instance_bytes) Random((uint32_t)seed); + tls_instance = rv; + } + return rv; +} + +} // namespace rocksdb diff --git a/src/leveldb/util/random.h b/src/leveldb/util/random.h index f821870ab..3666f2c65 100644 --- a/src/leveldb/util/random.h +++ b/src/leveldb/util/random.h @@ -9,6 +9,7 @@ #ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_ #define STORAGE_LEVELDB_UTIL_RANDOM_H_ +#include #include namespace leveldb { @@ -19,11 +20,12 @@ namespace leveldb { class Random { private: uint32_t seed_; + public: - explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { } + explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {} uint32_t Next() { - static const uint32_t M = 2147483647L; // 2^31-1 - static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 + static const uint32_t M = 2147483647L; // 2^31-1 + static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 // We are computing // seed_ = (seed_ * A) % M, where M = 2^31-1 // @@ -53,11 +55,39 @@ class Random { // Skewed: pick "base" uniformly from range [0,max_log] and then // return "base" random bits. The effect is to pick a number in the // range [0,2^max_log-1] with exponential bias towards smaller numbers. - uint32_t Skewed(int max_log) { - return Uniform(1 << Uniform(max_log + 1)); - } + uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); } + + // Returns a Random instance for use by the current thread without + // additional locking + static Random* GetTLSInstance(); }; +// A simple 64bit random number generator based on std::mt19937_64 +class Random64 { + private: + std::mt19937_64 generator_; + + public: + explicit Random64(uint64_t s) : generator_(s) {} + + // Generates the next random number + uint64_t Next() { return generator_(); } + + // Returns a uniformly distributed value in the range [0..n-1] + // REQUIRES: n > 0 + uint64_t Uniform(uint64_t n) { + return std::uniform_int_distribution(0, n - 1)(generator_); + } + + // Randomly returns true ~"1/n" of the time, and false otherwise. + // REQUIRES: n > 0 + bool OneIn(uint64_t n) { return Uniform(n) == 0; } + + // Skewed: pick "base" uniformly from range [0,max_log] and then + // return "base" random bits. The effect is to pick a number in the + // range [0,2^max_log-1] with exponential bias towards smaller numbers. + uint64_t Skewed(int max_log) { return Uniform(uint64_t(1) << Uniform(max_log + 1)); } +}; } // namespace leveldb #endif // STORAGE_LEVELDB_UTIL_RANDOM_H_ diff --git a/src/leveldb/util/raw_key_operator.cc b/src/leveldb/util/raw_key_operator.cc index 8ce699c5b..5e24578f1 100644 --- a/src/leveldb/util/raw_key_operator.cc +++ b/src/leveldb/util/raw_key_operator.cc @@ -7,105 +7,88 @@ #include #include "coding.h" -#include "../common/counter.h" +#include "common/counter.h" namespace leveldb { // performance test tera::Counter rawkey_compare_counter; -static inline void AppendTsAndType(std::string* tera_key, - int64_t timestamp, - TeraKeyType type) { - timestamp &= 0x00FFFFFFFFFFFFFF; - uint64_t n = ((1UL << 56) - 1 - timestamp) << 8 | (type & 0xFF); - char str[8]; - EncodeBigEndian(str, n); - tera_key->append(str, 8); +static inline void AppendTsAndType(std::string* tera_key, int64_t timestamp, TeraKeyType type) { + timestamp &= 0x00FFFFFFFFFFFFFF; + uint64_t n = ((1UL << 56) - 1 - timestamp) << 8 | (type & 0xFF); + char str[8]; + EncodeBigEndian(str, n); + tera_key->append(str, 8); } -static inline void ExtractTsAndType(const Slice& tera_key, - int64_t* timestamp, - TeraKeyType* type) { - uint64_t n = DecodeBigEndain(tera_key.data() + tera_key.size() - sizeof(uint64_t)); - if (type) { - *type = static_cast((n << 56) >> 56); - } - if (timestamp) { - *timestamp = (1L << 56) - 1 - (n >> 8); - } +static inline void ExtractTsAndType(const Slice& tera_key, int64_t* timestamp, TeraKeyType* type) { + uint64_t n = DecodeBigEndain(tera_key.data() + tera_key.size() - sizeof(uint64_t)); + if (type) { + *type = static_cast((n << 56) >> 56); + } + if (timestamp) { + *timestamp = (1L << 56) - 1 - (n >> 8); + } } -static inline void AppendRowQualifierLength(std::string* tera_key, - const std::string& row_key, +static inline void AppendRowQualifierLength(std::string* tera_key, const std::string& row_key, const std::string& qualifier) { - uint32_t rlen = row_key.size(); - uint32_t qlen = qualifier.size(); - uint32_t n = (rlen<<16) | qlen; - char str[4]; - EncodeBigEndian32(str, n); - tera_key->append(str, 4); + uint32_t rlen = row_key.size(); + uint32_t qlen = qualifier.size(); + uint32_t n = (rlen << 16) | qlen; + char str[4]; + EncodeBigEndian32(str, n); + tera_key->append(str, 4); } - /** * readable encoding format: * [rowkey\0|column\0|qualifier\0|type|timestamp] * [ rlen+1B| clen+1B| qlen+1B | 1B | 7B ] **/ class ReadableRawKeyOperatorImpl : public RawKeyOperator { -public: - virtual void EncodeTeraKey(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t timestamp, - TeraKeyType type, - std::string* tera_key) const { - *tera_key = row_key; - tera_key->push_back('\0'); - tera_key->append(family); - tera_key->push_back('\0'); - tera_key->append(qualifier); - tera_key->push_back('\0'); - AppendTsAndType(tera_key, timestamp, type); + public: + virtual void EncodeTeraKey(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp, TeraKeyType type, + std::string* tera_key) const { + *tera_key = row_key; + tera_key->push_back('\0'); + tera_key->append(family); + tera_key->push_back('\0'); + tera_key->append(qualifier); + tera_key->push_back('\0'); + AppendTsAndType(tera_key, timestamp, type); + } + + virtual bool ExtractTeraKey(const Slice& tera_key, Slice* row_key, Slice* family, + Slice* qualifier, int64_t* timestamp, TeraKeyType* type) const { + int key_len = strlen(tera_key.data()); + if (row_key) { + *row_key = Slice(tera_key.data(), key_len); } - virtual bool ExtractTeraKey(const Slice& tera_key, - Slice* row_key, - Slice* family, - Slice* qualifier, - int64_t* timestamp, - TeraKeyType* type) const { - int key_len = strlen(tera_key.data()); - if (row_key) { - *row_key = Slice(tera_key.data(), key_len); - } - - int family_len = strlen(tera_key.data() + key_len + 1); - Slice family_data(tera_key.data() + key_len + 1, family_len); - if (family) { - *family = family_data; - } - - int qualifier_len = strlen(family_data.data() + family_len + 1); - if (qualifier) { - *qualifier = Slice(family_data.data() + family_len + 1, qualifier_len); - } - - if (key_len + family_len + qualifier_len + 3 + sizeof(uint64_t) != tera_key.size()) { - return false; - } - ExtractTsAndType(tera_key, timestamp, type); - return true; + int family_len = strlen(tera_key.data() + key_len + 1); + Slice family_data(tera_key.data() + key_len + 1, family_len); + if (family) { + *family = family_data; } - virtual int Compare(const Slice& key1, const Slice& key2) const { - return key1.compare(key2); + int qualifier_len = strlen(family_data.data() + family_len + 1); + if (qualifier) { + *qualifier = Slice(family_data.data() + family_len + 1, qualifier_len); } - const char* Name() const { - return "tera.RawKeyOperator.readable"; + if (key_len + family_len + qualifier_len + 3 + sizeof(uint64_t) != tera_key.size()) { + return false; } + ExtractTsAndType(tera_key, timestamp, type); + return true; + } + + virtual int Compare(const Slice& key1, const Slice& key2) const { return key1.compare(key2); } + + const char* Name() const { return "tera.RawKeyOperator.readable"; } }; /** @@ -114,171 +97,154 @@ class ReadableRawKeyOperatorImpl : public RawKeyOperator { * [ rlen | clen+1B| qlen | 1B | 7B | 2B | 2B ] **/ class BinaryRawKeyOperatorImpl : public RawKeyOperator { -public: - virtual void EncodeTeraKey(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t timestamp, - TeraKeyType type, - std::string* tera_key) const { - uint32_t rlen = row_key.size(); - uint32_t flen = family.size(); - uint32_t qlen = qualifier.size(); - - tera_key->resize(rlen + flen + qlen + 13); - char* key = (char*)(tera_key->data()); - - // fill rowkey segment - memcpy(key, row_key.data(), rlen); - int pos = rlen; - // fill column family segment - memcpy(key + pos, family.data(), flen); - pos += flen; - key[pos] = '\0'; - pos++; - - // fill qualifier segment - memcpy(key + pos, qualifier.data(), qlen); - pos += qlen; - - // fill timestamp&type segment - uint64_t n = ((1UL << 56) - 1 - timestamp) << 8 | (type & 0xFF); - EncodeBigEndian(key + pos, n); - pos += 8; - - // fill row len and qualifier len segment - uint32_t m = (rlen << 16) | (qlen & 0xFFFF); - EncodeBigEndian32(key + pos, m); + public: + virtual void EncodeTeraKey(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp, TeraKeyType type, + std::string* tera_key) const { + uint32_t rlen = row_key.size(); + uint32_t flen = family.size(); + uint32_t qlen = qualifier.size(); + + tera_key->resize(rlen + flen + qlen + 13); + char* key = (char*)(tera_key->data()); + + // fill rowkey segment + memcpy(key, row_key.data(), rlen); + int pos = rlen; + // fill column family segment + memcpy(key + pos, family.data(), flen); + pos += flen; + key[pos] = '\0'; + pos++; + + // fill qualifier segment + memcpy(key + pos, qualifier.data(), qlen); + pos += qlen; + + // fill timestamp&type segment + uint64_t n = ((1UL << 56) - 1 - timestamp) << 8 | (type & 0xFF); + EncodeBigEndian(key + pos, n); + pos += 8; + + // fill row len and qualifier len segment + uint32_t m = (rlen << 16) | (qlen & 0xFFFF); + EncodeBigEndian32(key + pos, m); + } + + virtual bool ExtractTeraKey(const Slice& tera_key, Slice* row_key, Slice* family, + Slice* qualifier, int64_t* timestamp, TeraKeyType* type) const { + uint32_t len = DecodeBigEndain32(tera_key.data() + tera_key.size() - sizeof(uint32_t)); + int key_len = static_cast(len >> 16); + int family_len = strlen(tera_key.data() + key_len); + int qualifier_len = static_cast(len & 0xFFFF); + + if (key_len + family_len + qualifier_len + 1 + sizeof(uint64_t) + sizeof(uint32_t) != + tera_key.size()) { + return false; + } + + if (row_key) { + *row_key = Slice(tera_key.data(), key_len); + } + Slice family_data(tera_key.data() + key_len, family_len); + if (family) { + *family = family_data; + } + if (qualifier) { + *qualifier = Slice(family_data.data() + family_len + 1, qualifier_len); } - virtual bool ExtractTeraKey(const Slice& tera_key, - Slice* row_key, - Slice* family, - Slice* qualifier, - int64_t* timestamp, - TeraKeyType* type) const { - uint32_t len = DecodeBigEndain32(tera_key.data() + tera_key.size() - sizeof(uint32_t)); - int key_len = static_cast(len >> 16); - int family_len = strlen(tera_key.data() + key_len); - int qualifier_len = static_cast(len & 0xFFFF); - - if (key_len + family_len + qualifier_len + 1 + - sizeof(uint64_t) + sizeof(uint32_t) != tera_key.size()) { - return false; - } - - if (row_key) { - *row_key = Slice(tera_key.data(), key_len); - } - Slice family_data(tera_key.data() + key_len, family_len); - if (family) { - *family = family_data; - } - if (qualifier) { - *qualifier = Slice(family_data.data() + family_len + 1, qualifier_len); - } - - Slice internal_tera_key = Slice(tera_key.data(), tera_key.size() - sizeof(uint32_t)); - ExtractTsAndType(internal_tera_key, timestamp, type); - return true; + Slice internal_tera_key = Slice(tera_key.data(), tera_key.size() - sizeof(uint32_t)); + ExtractTsAndType(internal_tera_key, timestamp, type); + return true; + } + + virtual int Compare(const Slice& key1, const Slice& key2) const { + // for performance optimiztion + // rawkey_compare_counter.Inc(); + uint32_t len1, len2, rlen1, rlen2, clen1, clen2, qlen1, qlen2; + int ret; + const char* data1 = key1.data(); + const char* data2 = key2.data(); + int size1 = key1.size(); + int size2 = key2.size(); + + // decode rowlen and qualifierlen from raw key + len1 = DecodeBigEndain32(data1 + size1 - 4); + len2 = DecodeBigEndain32(data2 + size2 - 4); + + // rowkey compare, if ne, return + rlen1 = static_cast(len1 >> 16); + rlen2 = static_cast(len2 >> 16); + Slice row1(data1, rlen1); + Slice row2(data2, rlen2); + ret = row1.compare(row2); + if (ret != 0) { + return ret; } - virtual int Compare(const Slice& key1, const Slice& key2) const { - // for performance optimiztion - // rawkey_compare_counter.Inc(); - uint32_t len1, len2, rlen1, rlen2, clen1, clen2, qlen1, qlen2; - int ret; - const char* data1 = key1.data(); - const char* data2 = key2.data(); - int size1 = key1.size(); - int size2 = key2.size(); - - // decode rowlen and qualifierlen from raw key - len1 = DecodeBigEndain32(data1 + size1 - 4); - len2 = DecodeBigEndain32(data2 + size2 - 4); - - // rowkey compare, if ne, return - rlen1 = static_cast(len1 >> 16); - rlen2 = static_cast(len2 >> 16); - Slice row1(data1, rlen1); - Slice row2(data2, rlen2); - ret = row1.compare(row2); - if (ret != 0) { - return ret; - } - - // column family compare, if ne, return - qlen1 = static_cast(len1 & 0x00FF); - qlen2 = static_cast(len2 & 0x00FF); - clen1 = size1 - rlen1 - qlen1 - 13; - clen2 = size2 - rlen2 - qlen2 - 13; - Slice col1(data1 + rlen1, clen1); - Slice col2(data2 + rlen2, clen2); - ret = col1.compare(col2); - if (ret != 0) { - return ret; - } - - // qualifier compare, if ne, return - Slice qual1(data1 + size1 - qlen1 - 12, qlen1); - Slice qual2(data2 + size2 - qlen2 - 12, qlen2); - ret = qual1.compare(qual2); - if (ret != 0) { - return ret; - } - - // timestamp&type compared together - Slice ts_type1(data1 + size1 - 12, 8); - Slice ts_type2(data2 + size2 - 12, 8); - return ts_type1.compare(ts_type2); + // column family compare, if ne, return + qlen1 = static_cast(len1 & 0x00FF); + qlen2 = static_cast(len2 & 0x00FF); + clen1 = size1 - rlen1 - qlen1 - 13; + clen2 = size2 - rlen2 - qlen2 - 13; + Slice col1(data1 + rlen1, clen1); + Slice col2(data2 + rlen2, clen2); + ret = col1.compare(col2); + if (ret != 0) { + return ret; } - const char* Name() const { - return "tera.RawKeyOperator.binary"; + // qualifier compare, if ne, return + Slice qual1(data1 + size1 - qlen1 - 12, qlen1); + Slice qual2(data2 + size2 - qlen2 - 12, qlen2); + ret = qual1.compare(qual2); + if (ret != 0) { + return ret; } + + // timestamp&type compared together + Slice ts_type1(data1 + size1 - 12, 8); + Slice ts_type2(data2 + size2 - 12, 8); + return ts_type1.compare(ts_type2); + } + + const char* Name() const { return "tera.RawKeyOperator.binary"; } }; // support KV-pair with TTL, Key's format : // [row_key|expire_timestamp] // [rlen|4B] class KvRawKeyOperatorImpl : public RawKeyOperator { -public: - virtual void EncodeTeraKey(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t timestamp, // must >= 0 - TeraKeyType type, - std::string* tera_key) const { - char expire_timestamp[8]; - EncodeBigEndian(expire_timestamp, timestamp); - tera_key->assign(row_key).append(expire_timestamp, 8); + public: + virtual void EncodeTeraKey(const std::string& row_key, const std::string& family, + const std::string& qualifier, + int64_t timestamp, // must >= 0 + TeraKeyType type, std::string* tera_key) const { + char expire_timestamp[8]; + EncodeBigEndian(expire_timestamp, timestamp); + tera_key->assign(row_key).append(expire_timestamp, 8); + } + + virtual bool ExtractTeraKey(const Slice& tera_key, Slice* row_key, Slice* family, + Slice* qualifier, int64_t* timestamp, TeraKeyType* type) const { + if (row_key) { + *row_key = Slice(tera_key.data(), tera_key.size() - sizeof(int64_t)); } - - virtual bool ExtractTeraKey(const Slice& tera_key, - Slice* row_key, - Slice* family, - Slice* qualifier, - int64_t* timestamp, - TeraKeyType* type) const { - if (row_key) { - *row_key = Slice(tera_key.data(), tera_key.size() - sizeof(int64_t)); - } - if (timestamp) { - *timestamp = DecodeBigEndain(tera_key.data() + tera_key.size() - sizeof(int64_t)); - } - return true; + if (timestamp) { + *timestamp = DecodeBigEndain(tera_key.data() + tera_key.size() - sizeof(int64_t)); } + return true; + } - // only compare row_key - virtual int Compare(const Slice& key1, const Slice& key2) const { - leveldb::Slice key1_rowkey(key1.data(), key1.size() - sizeof(int64_t)); - leveldb::Slice key2_rowkey(key2.data(), key2.size() - sizeof(int64_t)); - return key1_rowkey.compare(key2_rowkey); - } + // only compare row_key + virtual int Compare(const Slice& key1, const Slice& key2) const { + leveldb::Slice key1_rowkey(key1.data(), key1.size() - sizeof(int64_t)); + leveldb::Slice key2_rowkey(key2.data(), key2.size() - sizeof(int64_t)); + return key1_rowkey.compare(key2_rowkey); + } - const char* Name() const { - return "tera.RawKeyOperator.kv"; - } + const char* Name() const { return "tera.RawKeyOperator.kv"; } }; static pthread_once_t once = PTHREAD_ONCE_INIT; @@ -287,24 +253,24 @@ static const RawKeyOperator* binary_key; static const KvRawKeyOperatorImpl* kv_key; static void InitModule() { - readable_key = new ReadableRawKeyOperatorImpl; - binary_key = new BinaryRawKeyOperatorImpl; - kv_key = new KvRawKeyOperatorImpl; + readable_key = new ReadableRawKeyOperatorImpl; + binary_key = new BinaryRawKeyOperatorImpl; + kv_key = new KvRawKeyOperatorImpl; } const RawKeyOperator* ReadableRawKeyOperator() { - pthread_once(&once, InitModule); - return readable_key; + pthread_once(&once, InitModule); + return readable_key; } const RawKeyOperator* BinaryRawKeyOperator() { - pthread_once(&once, InitModule); - return binary_key; + pthread_once(&once, InitModule); + return binary_key; } const RawKeyOperator* KvRawKeyOperator() { - pthread_once(&once, InitModule); - return kv_key; + pthread_once(&once, InitModule); + return kv_key; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/raw_key_operator_test.cc b/src/leveldb/util/raw_key_operator_test.cc index 4288c3257..bad934b27 100644 --- a/src/leveldb/util/raw_key_operator_test.cc +++ b/src/leveldb/util/raw_key_operator_test.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include"leveldb/raw_key_operator.h" +#include "leveldb/raw_key_operator.h" #include #include @@ -12,413 +12,395 @@ namespace leveldb { void print_bytes(const char* str, int len) { - for (int i = 0; i < len; ++i) { - printf("%x ", str[i]); - } - printf("\n"); + for (int i = 0; i < len; ++i) { + printf("%x ", str[i]); + } + printf("\n"); } int64_t get_micros() { - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; } class RawKeyOperatorTest {}; TEST(RawKeyOperatorTest, ReadableEncodeTeraKey) { - const RawKeyOperator* key_operator = ReadableRawKeyOperator(); - std::string key("row_key"); - std::string column("column"); - std::string qualifier("qualifier"); - int64_t timestamp = 0x0001020304050607; + const RawKeyOperator* key_operator = ReadableRawKeyOperator(); + std::string key("row_key"); + std::string column("column"); + std::string qualifier("qualifier"); + int64_t timestamp = 0x0001020304050607; - std::string tera_key1; - std::string tera_key2; + std::string tera_key1; + std::string tera_key2; - key_operator->EncodeTeraKey(key, column, qualifier, timestamp, - TKT_VALUE, &tera_key1); - key_operator->EncodeTeraKey(key, column, qualifier, timestamp, - TKT_DEL, &tera_key2); + key_operator->EncodeTeraKey(key, column, qualifier, timestamp, TKT_VALUE, &tera_key1); + key_operator->EncodeTeraKey(key, column, qualifier, timestamp, TKT_DEL, &tera_key2); - size_t len = key.size() + column.size() + qualifier.size() + sizeof(timestamp) + 3; - ASSERT_EQ(tera_key1.size(), len); + size_t len = key.size() + column.size() + qualifier.size() + sizeof(timestamp) + 3; + ASSERT_EQ(tera_key1.size(), len); - std::string raw1("row_key\0column\0qualifier\0\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x05", len); - ASSERT_TRUE(tera_key1 == raw1); -// print_bytes(tera_key1.data(), tera_key1.size()); -// print_bytes(raw1.data(), raw1.size()); + std::string raw1("row_key\0column\0qualifier\0\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x05", len); + ASSERT_TRUE(tera_key1 == raw1); + // print_bytes(tera_key1.data(), tera_key1.size()); + // print_bytes(raw1.data(), raw1.size()); - std::string raw2("row_key\0column\0qualifier\0\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x01", len); - ASSERT_TRUE(tera_key2 == raw2); + std::string raw2("row_key\0column\0qualifier\0\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x01", len); + ASSERT_TRUE(tera_key2 == raw2); - ASSERT_TRUE(tera_key1.compare(tera_key2) > 0); + ASSERT_TRUE(tera_key1.compare(tera_key2) > 0); } TEST(RawKeyOperatorTest, ReadableExtractTeraKey) { - const RawKeyOperator* key_operator = ReadableRawKeyOperator(); - std::string tera_key1; - std::string row_key1 = "row"; - std::string column1 = "column"; - std::string qualifier1 = "qualifier"; - int64_t timestamp1 = time(NULL); - key_operator->EncodeTeraKey(row_key1, column1,qualifier1, - timestamp1, TKT_VALUE, &tera_key1); - - Slice row_key2; - Slice column2; - Slice qualifier2; - int64_t timestamp2; - TeraKeyType type2; - ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, - &qualifier2, ×tamp2, &type2)); - - ASSERT_EQ(row_key1, row_key2.ToString()); - ASSERT_EQ(column1, column2.ToString()); - ASSERT_EQ(qualifier1, qualifier2.ToString()); - ASSERT_EQ(timestamp1, timestamp2); - ASSERT_EQ(type2, TKT_VALUE); + const RawKeyOperator* key_operator = ReadableRawKeyOperator(); + std::string tera_key1; + std::string row_key1 = "row"; + std::string column1 = "column"; + std::string qualifier1 = "qualifier"; + int64_t timestamp1 = time(NULL); + key_operator->EncodeTeraKey(row_key1, column1, qualifier1, timestamp1, TKT_VALUE, &tera_key1); + + Slice row_key2; + Slice column2; + Slice qualifier2; + int64_t timestamp2; + TeraKeyType type2; + ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, &qualifier2, ×tamp2, + &type2)); + + ASSERT_EQ(row_key1, row_key2.ToString()); + ASSERT_EQ(column1, column2.ToString()); + ASSERT_EQ(qualifier1, qualifier2.ToString()); + ASSERT_EQ(timestamp1, timestamp2); + ASSERT_EQ(type2, TKT_VALUE); } TEST(RawKeyOperatorTest, BinaryEncodeTeraKey) { - const RawKeyOperator* key_operator = BinaryRawKeyOperator(); - std::string key("row_key"); - std::string column("column"); - std::string qualifier("qualifier"); - int64_t timestamp = 0x01020304050607; + const RawKeyOperator* key_operator = BinaryRawKeyOperator(); + std::string key("row_key"); + std::string column("column"); + std::string qualifier("qualifier"); + int64_t timestamp = 0x01020304050607; - std::string tera_key1; - std::string tera_key2; + std::string tera_key1; + std::string tera_key2; - key_operator->EncodeTeraKey(key, column, qualifier, timestamp, - TKT_VALUE, &tera_key1); - key_operator->EncodeTeraKey(key, column, qualifier, timestamp, - TKT_DEL, &tera_key2); + key_operator->EncodeTeraKey(key, column, qualifier, timestamp, TKT_VALUE, &tera_key1); + key_operator->EncodeTeraKey(key, column, qualifier, timestamp, TKT_DEL, &tera_key2); - size_t len = key.size() + column.size() + qualifier.size() + sizeof(timestamp) + 5; - ASSERT_EQ(tera_key1.size(), len); + size_t len = key.size() + column.size() + qualifier.size() + sizeof(timestamp) + 5; + ASSERT_EQ(tera_key1.size(), len); - std::string raw1("row_keycolumn\0qualifier\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x5\x0\x7\x0\x9", len); - ASSERT_TRUE(tera_key1 == raw1); -// print_bytes(tera_key1.data(), tera_key1.size()); -// print_bytes(raw1.data(), raw1.size()); + std::string raw1("row_keycolumn\0qualifier\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x5\x0\x7\x0\x9", len); + ASSERT_TRUE(tera_key1 == raw1); + // print_bytes(tera_key1.data(), tera_key1.size()); + // print_bytes(raw1.data(), raw1.size()); - std::string raw2("row_keycolumn\0qualifier\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x01\x0\x7\x0\x9", len); - ASSERT_TRUE(tera_key2 == raw2); - - ASSERT_TRUE(tera_key1.compare(tera_key2) > 0); + std::string raw2("row_keycolumn\0qualifier\xFE\xFD\xFC\xFB\xFA\xF9\xF8\x01\x0\x7\x0\x9", len); + ASSERT_TRUE(tera_key2 == raw2); + ASSERT_TRUE(tera_key1.compare(tera_key2) > 0); } TEST(RawKeyOperatorTest, BinaryExtractTeraKey) { - const RawKeyOperator* key_operator = BinaryRawKeyOperator(); - std::string tera_key1; - std::string row_key1 = "row"; - std::string column1 = "column"; - std::string qualifier1 = "qualifier"; - key_operator->EncodeTeraKey(row_key1, column1,qualifier1, - 0, TKT_VALUE, &tera_key1); - - Slice row_key2; - Slice column2; - Slice qualifier2; - int64_t timestamp2; - TeraKeyType type2; - ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, - &qualifier2, ×tamp2, &type2)); - - ASSERT_EQ(row_key1, row_key2.ToString()); - ASSERT_EQ(column1, column2.ToString()); - ASSERT_EQ(qualifier1, qualifier2.ToString()); - ASSERT_EQ(timestamp2, 0); - ASSERT_EQ(type2, TKT_VALUE); + const RawKeyOperator* key_operator = BinaryRawKeyOperator(); + std::string tera_key1; + std::string row_key1 = "row"; + std::string column1 = "column"; + std::string qualifier1 = "qualifier"; + key_operator->EncodeTeraKey(row_key1, column1, qualifier1, 0, TKT_VALUE, &tera_key1); + + Slice row_key2; + Slice column2; + Slice qualifier2; + int64_t timestamp2; + TeraKeyType type2; + ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, &qualifier2, ×tamp2, + &type2)); + + ASSERT_EQ(row_key1, row_key2.ToString()); + ASSERT_EQ(column1, column2.ToString()); + ASSERT_EQ(qualifier1, qualifier2.ToString()); + ASSERT_EQ(timestamp2, 0); + ASSERT_EQ(type2, TKT_VALUE); } void GenTestString(int64_t len, std::string* output) { - for (int i = 0; i < len; ++i) { - output->append("a"); - } + for (int i = 0; i < len; ++i) { + output->append("a"); + } } TEST(RawKeyOperatorTest, TestBigRow) { - const RawKeyOperator* key_operator = BinaryRawKeyOperator(); - std::string test_str_60K; - GenTestString(60000, &test_str_60K); - - std::string tera_key1; - std::string row_key1 = test_str_60K; - std::string column1 = test_str_60K; - std::string qualifier1 = test_str_60K; - key_operator->EncodeTeraKey(row_key1, column1,qualifier1, - 0, TKT_VALUE, &tera_key1); - ASSERT_EQ(tera_key1.size(), 180013u); - - Slice row_key2; - Slice column2; - Slice qualifier2; - int64_t timestamp2; - TeraKeyType type2; - ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, - &qualifier2, ×tamp2, &type2)); - - ASSERT_EQ(row_key1, row_key2.ToString()); - ASSERT_EQ(column1, column2.ToString()); - ASSERT_EQ(qualifier1, qualifier2.ToString()); - ASSERT_EQ(timestamp2, 0); - ASSERT_EQ(type2, TKT_VALUE); + const RawKeyOperator* key_operator = BinaryRawKeyOperator(); + std::string test_str_60K; + GenTestString(60000, &test_str_60K); + + std::string tera_key1; + std::string row_key1 = test_str_60K; + std::string column1 = test_str_60K; + std::string qualifier1 = test_str_60K; + key_operator->EncodeTeraKey(row_key1, column1, qualifier1, 0, TKT_VALUE, &tera_key1); + ASSERT_EQ(tera_key1.size(), 180013u); + + Slice row_key2; + Slice column2; + Slice qualifier2; + int64_t timestamp2; + TeraKeyType type2; + ASSERT_TRUE(key_operator->ExtractTeraKey(tera_key1, &row_key2, &column2, &qualifier2, ×tamp2, + &type2)); + + ASSERT_EQ(row_key1, row_key2.ToString()); + ASSERT_EQ(column1, column2.ToString()); + ASSERT_EQ(qualifier1, qualifier2.ToString()); + ASSERT_EQ(timestamp2, 0); + ASSERT_EQ(type2, TKT_VALUE); } TEST(RawKeyOperatorTest, Compare) { - const RawKeyOperator* key_operator = BinaryRawKeyOperator(); - std::string tera_key1, tera_key2; - std::string key1, key2; - std::string column1, column2; - std::string qualifier1, qualifier2; - int64_t ts1, ts2; - TeraKeyType type1, type2; - - key1 = "row"; - column1 = "column"; - qualifier1 = "qualifier"; - ts1 = 0; - type1 = TKT_VALUE; - key_operator->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_EQ(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row1"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "ro"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row"; - column2 = "columny"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifierr"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 1; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_DEL; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); - - // - type1 = TKT_DEL_COLUMN; - key_operator->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 0; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); - - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 1; - type2 = TKT_VALUE; - key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); - ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); + const RawKeyOperator* key_operator = BinaryRawKeyOperator(); + std::string tera_key1, tera_key2; + std::string key1, key2; + std::string column1, column2; + std::string qualifier1, qualifier2; + int64_t ts1, ts2; + TeraKeyType type1, type2; + + key1 = "row"; + column1 = "column"; + qualifier1 = "qualifier"; + ts1 = 0; + type1 = TKT_VALUE; + key_operator->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_EQ(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row1"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "ro"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row"; + column2 = "columny"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifierr"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 1; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_DEL; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); + + // + type1 = TKT_DEL_COLUMN; + key_operator->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 0; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_LT(key_operator->Compare(tera_key1, tera_key2), 0); + + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 1; + type2 = TKT_VALUE; + key_operator->EncodeTeraKey(key2, column2, qualifier2, ts2, type2, &tera_key2); + ASSERT_GT(key_operator->Compare(tera_key1, tera_key2), 0); } -void EncodeTeraKeyPerformanceTest(const RawKeyOperator* key_operator, - const std::string& row, - const std::string& col, - const std::string& qual, - int64_t ts, - TeraKeyType type, - const std::string& desc) { - std::string tera_key; - int64_t start = get_micros(); - for (int i = 0; i < 10000000; ++i) { - key_operator->EncodeTeraKey(row, col, qual, ts, type, &tera_key); - } - int64_t end = get_micros(); - std::cout << "[Encode TeraKey Performance (" - << desc << ")] cost: " << (end - start) / 1000 << "ms\n"; +void EncodeTeraKeyPerformanceTest(const RawKeyOperator* key_operator, const std::string& row, + const std::string& col, const std::string& qual, int64_t ts, + TeraKeyType type, const std::string& desc) { + std::string tera_key; + int64_t start = get_micros(); + for (int i = 0; i < 10000000; ++i) { + key_operator->EncodeTeraKey(row, col, qual, ts, type, &tera_key); + } + int64_t end = get_micros(); + std::cout << "[Encode TeraKey Performance (" << desc << ")] cost: " << (end - start) / 1000 + << "ms\n"; } TEST(RawKeyOperatorTest, EncodeTeraKeyPerformace) { - const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); - std::string tera_key, row, col, qual; - int64_t ts; - TeraKeyType type; - row = "row"; - col = "col"; - qual = "qual"; - ts = 123456789; - type = TKT_VALUE; - - EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary short"); - - row = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; - col = "colcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcol"; - qual = "qualqualqualqualqualqualqualqualqualqualqualqualqualqualqualqual"; - EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary long"); - EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary long qualnull"); + const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); + std::string tera_key, row, col, qual; + int64_t ts; + TeraKeyType type; + row = "row"; + col = "col"; + qual = "qual"; + ts = 123456789; + type = TKT_VALUE; + + EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary short"); + + row = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; + col = "colcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcol"; + qual = "qualqualqualqualqualqualqualqualqualqualqualqualqualqualqualqual"; + EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary long"); + EncodeTeraKeyPerformanceTest(keyop_bin, row, col, qual, ts, type, "binary long qualnull"); } -void ExtractTeraKeyPerformanceTest(const RawKeyOperator* key_operator, - const std::string& key, +void ExtractTeraKeyPerformanceTest(const RawKeyOperator* key_operator, const std::string& key, const std::string& desc) { - Slice row, col, qual; - int64_t ts; - TeraKeyType type; - int64_t start = get_micros(); - for (int i = 0; i < 10000000; ++i) { - key_operator->ExtractTeraKey(key, &row, &col, &qual, &ts, &type); - } - int64_t end = get_micros(); - std::cout << "[Extract TeraKey Performance (" - << desc << ")] cost: " << (end - start) / 1000 << "ms\n"; + Slice row, col, qual; + int64_t ts; + TeraKeyType type; + int64_t start = get_micros(); + for (int i = 0; i < 10000000; ++i) { + key_operator->ExtractTeraKey(key, &row, &col, &qual, &ts, &type); + } + int64_t end = get_micros(); + std::cout << "[Extract TeraKey Performance (" << desc << ")] cost: " << (end - start) / 1000 + << "ms\n"; } TEST(RawKeyOperatorTest, ExtractTeraKeyPerformace) { - const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); - std::string tera_key, row, col, qual; - row = "row"; - col = "col"; - qual = "qual"; - keyop_bin->EncodeTeraKey(row, col, qual, 0, TKT_VALUE, &tera_key); - ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary short"); - - row = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; - col = "colcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcol"; - qual = "qualqualqualqualqualqualqualqualqualqualqualqualqualqualqualqual"; - keyop_bin->EncodeTeraKey(row, col, qual, 0, TKT_VALUE, &tera_key); - ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary long"); - - keyop_bin->EncodeTeraKey(row, col, "", 0, TKT_VALUE, &tera_key); - ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary long qualnull"); + const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); + std::string tera_key, row, col, qual; + row = "row"; + col = "col"; + qual = "qual"; + keyop_bin->EncodeTeraKey(row, col, qual, 0, TKT_VALUE, &tera_key); + ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary short"); + + row = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; + col = "colcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcolcol"; + qual = "qualqualqualqualqualqualqualqualqualqualqualqualqualqualqualqual"; + keyop_bin->EncodeTeraKey(row, col, qual, 0, TKT_VALUE, &tera_key); + ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary long"); + + keyop_bin->EncodeTeraKey(row, col, "", 0, TKT_VALUE, &tera_key); + ExtractTeraKeyPerformanceTest(keyop_bin, tera_key, "binary long qualnull"); } -void ComparePerformanceTest(const RawKeyOperator* key_operator, - const std::string& key1, - const std::string& key2, - const std::string& desc) { - int64_t start = get_micros(); - for (int i = 0; i < 10000000; ++i) { - key_operator->Compare(key1, key2); - } - int64_t end = get_micros(); - std::cout << "[Compare Performance (" - << desc << ")] cost: " << (end - start) / 1000 << "ms\n"; +void ComparePerformanceTest(const RawKeyOperator* key_operator, const std::string& key1, + const std::string& key2, const std::string& desc) { + int64_t start = get_micros(); + for (int i = 0; i < 10000000; ++i) { + key_operator->Compare(key1, key2); + } + int64_t end = get_micros(); + std::cout << "[Compare Performance (" << desc << ")] cost: " << (end - start) / 1000 << "ms\n"; } TEST(RawKeyOperatorTest, ComparePerformace) { - const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); - const RawKeyOperator* keyop_read = ReadableRawKeyOperator(); - std::string tera_key1, tera_key2; - std::string key1, key2; - std::string column1, column2; - std::string qualifier1, qualifier2; - int64_t ts1, ts2; - TeraKeyType type1; - - key1 = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; - column1 = "columncolumncolumncolumn"; - qualifier1 = "qualifierqualifierqualifier"; - ts1 = 123456789; - type1 = TKT_VALUE; - key2 = "row"; - column2 = "column"; - qualifier2 = "qualifier"; - ts2 = 987654321; - - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same none"); - - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key1, column2, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row"); - - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key1, column1, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col"); - - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col/qu"); - - keyop_bin->EncodeTeraKey(key1, column1, "", ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key1, column1, "", ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col/null"); - - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same all"); - - keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key1); - keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key2); - ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary short"); - - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same none"); - - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key1, column2, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row"); - - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key1, column1, qualifier2, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row/col"); - - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts2, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row/col/qu"); - - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same all"); - - keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key1); - keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key2); - ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable short"); + const RawKeyOperator* keyop_bin = BinaryRawKeyOperator(); + const RawKeyOperator* keyop_read = ReadableRawKeyOperator(); + std::string tera_key1, tera_key2; + std::string key1, key2; + std::string column1, column2; + std::string qualifier1, qualifier2; + int64_t ts1, ts2; + TeraKeyType type1; + + key1 = "rowrowrowrowrowrowrowrowrowrowrowrowrowrowrowrow"; + column1 = "columncolumncolumncolumn"; + qualifier1 = "qualifierqualifierqualifier"; + ts1 = 123456789; + type1 = TKT_VALUE; + key2 = "row"; + column2 = "column"; + qualifier2 = "qualifier"; + ts2 = 987654321; + + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same none"); + + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key1, column2, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row"); + + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key1, column1, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col"); + + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col/qu"); + + keyop_bin->EncodeTeraKey(key1, column1, "", ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key1, column1, "", ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same row/col/null"); + + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary long same all"); + + keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key1); + keyop_bin->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key2); + ComparePerformanceTest(keyop_bin, tera_key1, tera_key2, "binary short"); + + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same none"); + + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key1, column2, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row"); + + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key1, column1, qualifier2, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row/col"); + + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts2, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same row/col/qu"); + + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key1, column1, qualifier1, ts1, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable long same all"); + + keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key1); + keyop_read->EncodeTeraKey(key2, column2, qualifier2, ts1, type1, &tera_key2); + ComparePerformanceTest(keyop_read, tera_key1, tera_key2, "readable short"); } } // namespace leveldb -int main(int argc, char* argv[]) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char* argv[]) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/slog.cc b/src/leveldb/util/slog.cc index a3cd2de89..d8704a4a6 100644 --- a/src/leveldb/util/slog.cc +++ b/src/leveldb/util/slog.cc @@ -14,31 +14,24 @@ namespace leveldb { static LogLevel s_log_level = ::leveldb::LOG_LEVEL_ERROR; -LogLevel GetLogLevel() { - return s_log_level; -} +LogLevel GetLogLevel() { return s_log_level; } -void SetLogLevel(LogLevel level) { - s_log_level = level; -} +void SetLogLevel(LogLevel level) { s_log_level = level; } + +void LogHandler(LogLevel level, const char* filename, int line, const char* fmt, ...) { + static const char* level_names[] = {"FATAL", "ERROR", "WARNNING", "INFO", "TRACE", "DEBUG"}; + char buf[1024]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, 1024, fmt, ap); + va_end(ap); + + fprintf(stderr, "[LevelDB %s %s:%d] %s\n", level_names[level], filename, line, buf); + fflush(stderr); -void LogHandler(LogLevel level, const char* filename, int line, const char *fmt, ...) { - static const char* level_names[] = { "FATAL", "ERROR", "WARNNING", - "INFO", "TRACE", "DEBUG" }; - char buf[1024]; - va_list ap; - va_start(ap, fmt); - vsnprintf(buf, 1024, fmt, ap); - va_end(ap); - - fprintf(stderr, "[LevelDB %s %s:%d] %s\n", - level_names[level], - filename, line, buf); - fflush(stderr); - - if (level == ::leveldb::LOG_LEVEL_FATAL) { - abort(); - } + if (level == ::leveldb::LOG_LEVEL_FATAL) { + abort(); + } } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/statistics.cc b/src/leveldb/util/statistics.cc deleted file mode 100644 index ea6332c8f..000000000 --- a/src/leveldb/util/statistics.cc +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "leveldb/statistics.h" - -#include - -#include "common/counter.h" -#include "util/histogram.h" - -namespace leveldb { - -class StatisticsImpl : public Statistics { -public: - StatisticsImpl() {} - - virtual ~StatisticsImpl() {} - - virtual int64_t GetTickerCount(uint32_t ticker_type) { - return counter_[ticker_type].Get(); - } - - virtual void RecordTick(uint32_t ticker_type, uint64_t count = 0) { - counter_[ticker_type].Add(count); - } - - virtual void SetTickerCount(uint32_t ticker_type, uint64_t count) { - counter_[ticker_type].Set(count); - } - - virtual void MeasureTime(uint32_t type, uint64_t time) { - hist_[type].Add(time); - } - - virtual void GetHistogramData(uint32_t type, - HistogramData* const data) { - data->median = hist_[type].Median(); - data->percentile95 = hist_[type].Percentile(95); - data->percentile99 = hist_[type].Percentile(99); - data->average = hist_[type].Average(); - data->standard_deviation = hist_[type].StandardDeviation(); - } - - virtual std::string GetHistogramString(uint32_t type) const { - return hist_[type].ToString(); - } - - virtual std::string GetBriefHistogramString(uint32_t type) { - assert(HistogramsNameMap[type].first == type); - - std::string res; - char buffer[200]; - HistogramData hData; - GetHistogramData(type, &hData); - snprintf(buffer, - 200, - "%s :=> %f(%f)", - HistogramsNameMap[type].second.c_str(), - hData.average, - hData.percentile99 - hData.median); - res.append(buffer); - res.shrink_to_fit(); - return res; - } - - void ClearHistogram(uint32_t type) { - hist_[type].Clear(); - } - - // String representation of the statistic object. - virtual std::string ToString() { - std::string res; - res.reserve(20000); - for (uint32_t i = 0; i < TickersNameMap.size(); i++) { - char buffer[200]; - snprintf(buffer, 200, "%s COUNT : %lu\n", - TickersNameMap[i].second.c_str(), GetTickerCount(TickersNameMap[i].first)); - res.append(buffer); - } - for (uint32_t i = 0; i < HistogramsNameMap.size(); i++) { - char buffer[200]; - HistogramData hData; - GetHistogramData(HistogramsNameMap[i].first, &hData); - snprintf(buffer, - 200, - "%s statistics Percentiles :=> 50 : %f 95 : %f 99 : %f\n", - HistogramsNameMap[i].second.c_str(), - hData.median, - hData.percentile95, - hData.percentile99); - res.append(buffer); - } - res.shrink_to_fit(); - return res; - } - - void ClearAll() { - for (uint32_t i = 0; i < TICKER_ENUM_MAX; i++) { - counter_[i].Clear(); - } - for (uint32_t i = 0; i < HISTOGRAM_ENUM_MAX; i++) { - hist_[i].Clear(); - } - } - -private: - tera::Counter counter_[TICKER_ENUM_MAX]; - Histogram hist_[HISTOGRAM_ENUM_MAX]; -}; - -Statistics* CreateDBStatistics() { - return new StatisticsImpl; -} - -} // namespace leveldb diff --git a/src/leveldb/util/status.cc b/src/leveldb/util/status.cc index 14b22f82e..6c11503a2 100644 --- a/src/leveldb/util/status.cc +++ b/src/leveldb/util/status.cc @@ -68,9 +68,11 @@ std::string Status::ToString() const { case kIOPermissionDenied: type = "IO Permission Denied: "; break; + case kReject: + type = "Reject: "; + break; default: - snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", - static_cast(code())); + snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", static_cast(code())); type = tmp; break; } diff --git a/src/leveldb/util/stop_watch.h b/src/leveldb/util/stop_watch.h new file mode 100644 index 000000000..d2f61a563 --- /dev/null +++ b/src/leveldb/util/stop_watch.h @@ -0,0 +1,42 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include "leveldb/env.h" + +namespace leveldb { +// Auto-scoped. +// Records the measure time into the corresponding histogram if statistics +// is not nullptr. It is also saved into *elapsed if the pointer is not nullptr +// and overwrite is true, it will be added to *elapsed if overwrite is false. + +// a nano second precision stopwatch +class StopWatchMicro { + public: + explicit StopWatchMicro(Env* const env, bool auto_start = false) : env_(env), start_(0) { + if (auto_start) { + Start(); + } + } + + void Start() { start_ = env_->NowMicros(); } + + uint64_t ElapsedMicros(bool reset = false) { + auto now = env_->NowMicros(); + auto elapsed = now - start_; + if (reset) { + start_ = now; + } + return elapsed; + } + + uint64_t ElapsedMicrosSafe(bool reset = false) { + return (env_ != nullptr) ? ElapsedMicros(reset) : 0U; + } + + private: + Env* const env_; + uint64_t start_; +}; + +} // namespace leveldb diff --git a/src/leveldb/util/string_ext.cc b/src/leveldb/util/string_ext.cc index 1f8c67510..0a8e7ea28 100644 --- a/src/leveldb/util/string_ext.cc +++ b/src/leveldb/util/string_ext.cc @@ -10,167 +10,159 @@ namespace leveldb { -void SplitString(const std::string& full, - const std::string& delim, +void SplitString(const std::string& full, const std::string& delim, std::vector* result) { - result->clear(); - if (full.empty()) { - return; + result->clear(); + if (full.empty()) { + return; + } + + std::string tmp; + std::string::size_type pos_begin = full.find_first_not_of(delim); + std::string::size_type comma_pos = 0; + + while (pos_begin != std::string::npos) { + comma_pos = full.find(delim, pos_begin); + if (comma_pos != std::string::npos) { + tmp = full.substr(pos_begin, comma_pos - pos_begin); + pos_begin = comma_pos + delim.length(); + } else { + tmp = full.substr(pos_begin); + pos_begin = comma_pos; } - std::string tmp; - std::string::size_type pos_begin = full.find_first_not_of(delim); - std::string::size_type comma_pos = 0; - - while (pos_begin != std::string::npos) { - comma_pos = full.find(delim, pos_begin); - if (comma_pos != std::string::npos) { - tmp = full.substr(pos_begin, comma_pos - pos_begin); - pos_begin = comma_pos + delim.length(); - } else { - tmp = full.substr(pos_begin); - pos_begin = comma_pos; - } - - if (!tmp.empty()) { - result->push_back(tmp); - tmp.clear(); - } + if (!tmp.empty()) { + result->push_back(tmp); + tmp.clear(); } + } } -void SplitStringEnd(const std::string& full, std::string* begin_part, - std::string* end_part, std::string delim) { - std::string::size_type pos = full.find_last_of(delim); - if (pos != std::string::npos && pos != 0) { - if (end_part) { - *end_part = full.substr(pos + 1); - } - if (begin_part) { - *begin_part = full.substr(0, pos); - } - } else { - if (end_part) { - *end_part = full; - } +void SplitStringEnd(const std::string& full, std::string* begin_part, std::string* end_part, + std::string delim) { + std::string::size_type pos = full.find_last_of(delim); + if (pos != std::string::npos && pos != 0) { + if (end_part) { + *end_part = full.substr(pos + 1); } -} - -void SplitStringStart(const std::string& full, std::string* begin_part, - std::string* end_part, std::string delim) { - std::string::size_type pos = full.find_first_of(delim); - if (pos == std::string::npos || (pos < full.size() - 1)) { - if (end_part) { - *end_part = full.substr(pos + 1); - } + if (begin_part) { + *begin_part = full.substr(0, pos); } - if (pos != std::string::npos && pos >= 1) { - if (begin_part) { - *begin_part = full.substr(0, pos); - } + } else { + if (end_part) { + *end_part = full; } + } } -std::string ReplaceString(const std::string& str, const std::string& src, - const std::string& dest) { - std::string ret; - - std::string::size_type pos_begin = 0; - std::string::size_type pos = str.find(src); - while (pos != std::string::npos) { - // cout <<"replacexxx:" << pos_begin <<" " << pos <<"\n"; - ret.append(str.data() + pos_begin, pos - pos_begin); - ret += dest; - pos_begin = pos + src.length(); - pos = str.find(src, pos_begin); +void SplitStringStart(const std::string& full, std::string* begin_part, std::string* end_part, + std::string delim) { + std::string::size_type pos = full.find_first_of(delim); + if (pos == std::string::npos || (pos < full.size() - 1)) { + if (end_part) { + *end_part = full.substr(pos + 1); } - if (pos_begin < str.length()) { - ret.append(str.begin() + pos_begin, str.end()); + } + if (pos != std::string::npos && pos >= 1) { + if (begin_part) { + *begin_part = full.substr(0, pos); } - return ret; + } +} + +std::string ReplaceString(const std::string& str, const std::string& src, const std::string& dest) { + std::string ret; + + std::string::size_type pos_begin = 0; + std::string::size_type pos = str.find(src); + while (pos != std::string::npos) { + // cout <<"replacexxx:" << pos_begin <<" " << pos <<"\n"; + ret.append(str.data() + pos_begin, pos - pos_begin); + ret += dest; + pos_begin = pos + src.length(); + pos = str.find(src, pos_begin); + } + if (pos_begin < str.length()) { + ret.append(str.begin() + pos_begin, str.end()); + } + return ret; } std::string TrimString(const std::string& str, const std::string& trim) { - std::string::size_type pos = str.find_first_not_of(trim); - if (pos == std::string::npos) { - return str; - } - std::string::size_type pos2 = str.find_last_not_of(trim); - if (pos2 != std::string::npos) { - return str.substr(pos, pos2 - pos + 1); - } - return str.substr(pos); + std::string::size_type pos = str.find_first_not_of(trim); + if (pos == std::string::npos) { + return str; + } + std::string::size_type pos2 = str.find_last_not_of(trim); + if (pos2 != std::string::npos) { + return str.substr(pos, pos2 - pos + 1); + } + return str.substr(pos); } bool StringEndsWith(const std::string& str, const std::string& sub_str) { - if (str.length() < sub_str.length()) { - return false; - } - if (str.substr(str.length() - sub_str.length()) != sub_str) { - return false; - } - return true; + if (str.length() < sub_str.length()) { + return false; + } + if (str.substr(str.length() - sub_str.length()) != sub_str) { + return false; + } + return true; } bool StringStartWith(const std::string& str, const std::string& sub_str) { - if (str.length() < sub_str.length()) { - return false; - } - if (str.substr(0, sub_str.length()) != sub_str) { - return false; - } - return true; + if (str.length() < sub_str.length()) { + return false; + } + if (str.substr(0, sub_str.length()) != sub_str) { + return false; + } + return true; } -char* StringAsArray(std::string* str) { - return str->empty() ? NULL : &*str->begin(); -} +char* StringAsArray(std::string* str) { return str->empty() ? NULL : &*str->begin(); } std::string Uint64ToString(uint64_t i, int base) { - std::stringstream ss; - if (base == 16) { - ss << std::hex << std::setfill('0') << std::setw(16) << i; - } else if (base == 8) { - ss << std::oct << std::setfill('0') << std::setw(8) << i; - } else { - ss << i; - } - return ss.str(); + std::stringstream ss; + if (base == 16) { + ss << std::hex << std::setfill('0') << std::setw(16) << i; + } else if (base == 8) { + ss << std::oct << std::setfill('0') << std::setw(8) << i; + } else { + ss << i; + } + return ss.str(); } uint64_t StringToUint64(const std::string& int_str, int base) { - uint64_t value; - std::istringstream buffer(int_str); - if (base == 16) { - buffer >> std::hex >> value; - } else if (base == 8) { - buffer >> std::oct >> value; - } else { - buffer >> value; - } - return value; + uint64_t value; + std::istringstream buffer(int_str); + if (base == 16) { + buffer >> std::hex >> value; + } else if (base == 8) { + buffer >> std::oct >> value; + } else { + buffer >> value; + } + return value; } -void SplitStringPath(const std::string& full_path, - std::string* dir_part, - std::string* file_part) { - std::string::size_type pos = full_path.rfind("/"); - if (pos != std::string::npos) { - if (dir_part) { - *dir_part = full_path.substr(0, pos); - } - if (file_part) { - *file_part = full_path.substr(pos + 1); - } - } else { - if (dir_part) { - *dir_part = full_path; - } +void SplitStringPath(const std::string& full_path, std::string* dir_part, std::string* file_part) { + std::string::size_type pos = full_path.rfind("/"); + if (pos != std::string::npos) { + if (dir_part) { + *dir_part = full_path.substr(0, pos); + } + if (file_part) { + *file_part = full_path.substr(pos + 1); } + } else { + if (dir_part) { + *dir_part = full_path; + } + } } -bool IsExist(const std::string& path) { - return access(path.c_str(), R_OK) == 0; -} +bool IsExist(const std::string& path) { return access(path.c_str(), R_OK) == 0; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/string_ext.h b/src/leveldb/util/string_ext.h index db15bb03c..ab9d75d04 100644 --- a/src/leveldb/util/string_ext.h +++ b/src/leveldb/util/string_ext.h @@ -12,33 +12,22 @@ namespace leveldb { -void SplitString(const std::string& full, - const std::string& delim, +void SplitString(const std::string& full, const std::string& delim, std::vector* result); -void SplitStringEnd(const std::string& full, - std::string* begin_part, - std::string* end_part, +void SplitStringEnd(const std::string& full, std::string* begin_part, std::string* end_part, std::string delim = "."); -void SplitStringStart(const std::string& full, - std::string* begin_part, - std::string* end_part, +void SplitStringStart(const std::string& full, std::string* begin_part, std::string* end_part, std::string delim = "."); -std::string ReplaceString(const std::string& str, - const std::string& src, - const std::string& dest); +std::string ReplaceString(const std::string& str, const std::string& src, const std::string& dest); +std::string TrimString(const std::string& str, const std::string& trim = " "); -std::string TrimString(const std::string& str, - const std::string& trim = " "); +bool StringEndsWith(const std::string& str, const std::string& sub_str); -bool StringEndsWith(const std::string& str, - const std::string& sub_str); - -bool StringStartWith(const std::string& str, - const std::string& sub_str); +bool StringStartWith(const std::string& str, const std::string& sub_str); char* StringAsArray(std::string* str); @@ -48,12 +37,10 @@ uint64_t StringToUint64(const std::string& int_str, int base = 10); // file path -void SplitStringPath(const std::string& full_path, - std::string* dir_part, - std::string* file_part); +void SplitStringPath(const std::string& full_path, std::string* dir_part, std::string* file_part); bool IsExist(const std::string& file_path); -} // namespace leveldb +} // namespace leveldb -#endif // TERA_LEVELDB_STRING_EXT_H +#endif // TERA_LEVELDB_STRING_EXT_H diff --git a/src/leveldb/util/tera_key.cc b/src/leveldb/util/tera_key.cc index 977b789cc..01ef262f5 100644 --- a/src/leveldb/util/tera_key.cc +++ b/src/leveldb/util/tera_key.cc @@ -13,115 +13,103 @@ namespace leveldb { bool TeraKey::IsTypeAllowUserSetTimestamp(TeraKeyType type) { - bool is_allow = false; - switch (type) { + bool is_allow = false; + switch (type) { case TKT_DEL: case TKT_DEL_COLUMN: case TKT_DEL_QUALIFIERS: case TKT_DEL_QUALIFIER: case TKT_VALUE: - is_allow = true; - break; + is_allow = true; + break; default: - break; - } - return is_allow; + break; + } + return is_allow; } TeraKey::TeraKey(const RawKeyOperator* op) - : operator_(op), - timestamp_(-1), - type_(TKT_FORSEEK), - is_empty_(true) { -} + : operator_(op), timestamp_(-1), type_(TKT_FORSEEK), is_empty_(true) {} TeraKey::TeraKey(const TeraKey& tk) { - *this = tk; - operator_->ExtractTeraKey(raw_key_, &key_, &column_, - &qualifier_, ×tamp_, &type_); + *this = tk; + operator_->ExtractTeraKey(raw_key_, &key_, &column_, &qualifier_, ×tamp_, &type_); } TeraKey::~TeraKey() {} bool TeraKey::Encode(const std::string& key, const std::string& column, - const std::string& qualifier, int64_t timestamp, - TeraKeyType type) { - is_empty_ = false; - operator_->EncodeTeraKey(key, column, qualifier, timestamp, type, &raw_key_); - return operator_->ExtractTeraKey(raw_key_, &key_, &column_, - &qualifier_, ×tamp_, &type_); + const std::string& qualifier, int64_t timestamp, TeraKeyType type) { + is_empty_ = false; + operator_->EncodeTeraKey(key, column, qualifier, timestamp, type, &raw_key_); + return operator_->ExtractTeraKey(raw_key_, &key_, &column_, &qualifier_, ×tamp_, &type_); } bool TeraKey::Decode(const Slice& raw_key) { - raw_key_ = raw_key.ToString(); - bool res = - operator_->ExtractTeraKey(raw_key_, &key_, &column_, &qualifier_, ×tamp_, &type_); - if (res) { - is_empty_ = false; - return true; - } else { - return false; - } + raw_key_ = raw_key.ToString(); + bool res = operator_->ExtractTeraKey(raw_key_, &key_, &column_, &qualifier_, ×tamp_, &type_); + if (res) { + is_empty_ = false; + return true; + } else { + return false; + } } -bool TeraKey::SameRow(const TeraKey& tk) { - return (key_.compare(tk.key()) == 0); -} +bool TeraKey::SameRow(const TeraKey& tk) { return (key_.compare(tk.key()) == 0); } bool TeraKey::SameColumn(const TeraKey& tk) { - return (key_.compare(tk.key()) == 0 - && column_.compare(tk.column()) == 0); + return (key_.compare(tk.key()) == 0 && column_.compare(tk.column()) == 0); } bool TeraKey::SameQualifier(const TeraKey& tk) { - return (key_.compare(tk.key()) == 0 - && column_.compare(tk.column()) == 0 - && qualifier_.compare(tk.qualifier()) == 0); + return (key_.compare(tk.key()) == 0 && column_.compare(tk.column()) == 0 && + qualifier_.compare(tk.qualifier()) == 0); } bool TeraKey::IsDel() { - switch (type_) { + switch (type_) { case TKT_DEL: case TKT_DEL_COLUMN: case TKT_DEL_QUALIFIERS: case TKT_DEL_QUALIFIER: - return true; + return true; default: - return false; - } + return false; + } } int TeraKey::Compare(const TeraKey& tk) { - int res = key_.compare(tk.key()); - if (res != 0) { - return res; - } - res = column_.compare(tk.column()); - if (res != 0) { - return res; - } - res = qualifier_.compare(tk.qualifier()); - if (res != 0) { - return res; - } - if (timestamp_ != tk.timestamp()) { - return timestamp_ > tk.timestamp() ? 1 : -1; - } - if (type_ != tk.type()) { - return type_ > tk.type() ? 1 : -1; - } else { - return 0; - } + int res = key_.compare(tk.key()); + if (res != 0) { + return res; + } + res = column_.compare(tk.column()); + if (res != 0) { + return res; + } + res = qualifier_.compare(tk.qualifier()); + if (res != 0) { + return res; + } + if (timestamp_ != tk.timestamp()) { + return timestamp_ > tk.timestamp() ? 1 : -1; + } + if (type_ != tk.type()) { + return type_ > tk.type() ? 1 : -1; + } else { + return 0; + } } std::string TeraKey::DebugString() { - std::string r; - r.append(EscapeString(key_) + " : "); - r.append(EscapeString(column_) + " : "); - r.append(EscapeString(qualifier_) + " : "); - AppendNumberTo(&r, timestamp_); - r.append(" : "); - AppendNumberTo(&r, type_); - return r; + std::string r; + r.append(EscapeString(key_) + " : "); + r.append(EscapeString(column_) + " : "); + r.append(EscapeString(qualifier_) + " : "); + AppendNumberTo(&r, timestamp_); + r.append(" : "); + AppendNumberTo(&r, type_); + return r; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/tera_key_test.cc b/src/leveldb/util/tera_key_test.cc index 6fc733bd7..e84ec437c 100644 --- a/src/leveldb/util/tera_key_test.cc +++ b/src/leveldb/util/tera_key_test.cc @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include"leveldb/tera_key.h" -#include"leveldb/raw_key_operator.h" +#include "leveldb/tera_key.h" +#include "leveldb/raw_key_operator.h" #include #include @@ -15,78 +15,76 @@ namespace leveldb { class TeraKeyTest {}; void TestFunc(const RawKeyOperator* op) { - TeraKey tk(op); - ASSERT_TRUE(tk.empty()); - std::string key("row_key"); - std::string column("column"); - std::string qualifier("qualifier"); - int64_t timestamp = 0x0001020304050607; - TeraKeyType type = TKT_VALUE; + TeraKey tk(op); + ASSERT_TRUE(tk.empty()); + std::string key("row_key"); + std::string column("column"); + std::string qualifier("qualifier"); + int64_t timestamp = 0x0001020304050607; + TeraKeyType type = TKT_VALUE; - ASSERT_TRUE(tk.Encode(key, column, qualifier, timestamp, type)); - ASSERT_TRUE(!tk.empty()); - ASSERT_EQ(tk.key().ToString(), key); - ASSERT_EQ(tk.column().ToString(), column); - ASSERT_EQ(tk.qualifier().ToString(), qualifier); - ASSERT_EQ(tk.timestamp(), timestamp); - ASSERT_EQ(tk.type(), type); - std::cout << tk.DebugString() << std::endl; + ASSERT_TRUE(tk.Encode(key, column, qualifier, timestamp, type)); + ASSERT_TRUE(!tk.empty()); + ASSERT_EQ(tk.key().ToString(), key); + ASSERT_EQ(tk.column().ToString(), column); + ASSERT_EQ(tk.qualifier().ToString(), qualifier); + ASSERT_EQ(tk.timestamp(), timestamp); + ASSERT_EQ(tk.type(), type); + std::cout << tk.DebugString() << std::endl; - std::string tera_key = tk.raw_key().ToString(); - TeraKey tk2(op); - ASSERT_TRUE(tk2.Decode(tera_key)); - ASSERT_EQ(tk2.Compare(tk), 0); - ASSERT_TRUE(!tk2.empty()); - ASSERT_EQ(tk2.key().ToString(), key); - ASSERT_EQ(tk2.column().ToString(), column); - ASSERT_EQ(tk2.qualifier().ToString(), qualifier); - ASSERT_EQ(tk2.timestamp(), timestamp); - ASSERT_EQ(tk2.type(), type); - std::cout << tk2.DebugString() << std::endl; + std::string tera_key = tk.raw_key().ToString(); + TeraKey tk2(op); + ASSERT_TRUE(tk2.Decode(tera_key)); + ASSERT_EQ(tk2.Compare(tk), 0); + ASSERT_TRUE(!tk2.empty()); + ASSERT_EQ(tk2.key().ToString(), key); + ASSERT_EQ(tk2.column().ToString(), column); + ASSERT_EQ(tk2.qualifier().ToString(), qualifier); + ASSERT_EQ(tk2.timestamp(), timestamp); + ASSERT_EQ(tk2.type(), type); + std::cout << tk2.DebugString() << std::endl; - ASSERT_TRUE(tk.SameRow(tk2)); - ASSERT_TRUE(tk.SameColumn(tk2)); - ASSERT_TRUE(tk.SameQualifier(tk2)); + ASSERT_TRUE(tk.SameRow(tk2)); + ASSERT_TRUE(tk.SameColumn(tk2)); + ASSERT_TRUE(tk.SameQualifier(tk2)); - ASSERT_TRUE(tk2.Encode("haha", column, qualifier, 0, TKT_VALUE)); - ASSERT_LT(tk2.Compare(tk), 0); - ASSERT_TRUE(!tk.SameRow(tk2)); - ASSERT_TRUE(!tk.SameColumn(tk2)); - ASSERT_TRUE(!tk.SameQualifier(tk2)); - std::cout << tk2.DebugString() << std::endl; + ASSERT_TRUE(tk2.Encode("haha", column, qualifier, 0, TKT_VALUE)); + ASSERT_LT(tk2.Compare(tk), 0); + ASSERT_TRUE(!tk.SameRow(tk2)); + ASSERT_TRUE(!tk.SameColumn(tk2)); + ASSERT_TRUE(!tk.SameQualifier(tk2)); + std::cout << tk2.DebugString() << std::endl; - ASSERT_TRUE(tk2.Encode(key, "hello", "world", 0, TKT_VALUE)); - ASSERT_GT(tk2.Compare(tk), 0); - ASSERT_TRUE(tk.SameRow(tk2)); - ASSERT_TRUE(!tk.SameColumn(tk2)); - ASSERT_TRUE(!tk.SameQualifier(tk2)); - std::cout << tk2.DebugString() << std::endl; + ASSERT_TRUE(tk2.Encode(key, "hello", "world", 0, TKT_VALUE)); + ASSERT_GT(tk2.Compare(tk), 0); + ASSERT_TRUE(tk.SameRow(tk2)); + ASSERT_TRUE(!tk.SameColumn(tk2)); + ASSERT_TRUE(!tk.SameQualifier(tk2)); + std::cout << tk2.DebugString() << std::endl; - ASSERT_TRUE(tk2.Encode(key, column, "world", 0, TKT_VALUE)); - ASSERT_GT(tk2.Compare(tk), 0); - ASSERT_TRUE(tk.SameRow(tk2)); - ASSERT_TRUE(tk.SameColumn(tk2)); - ASSERT_TRUE(!tk.SameQualifier(tk2)); - std::cout << tk2.DebugString() << std::endl; + ASSERT_TRUE(tk2.Encode(key, column, "world", 0, TKT_VALUE)); + ASSERT_GT(tk2.Compare(tk), 0); + ASSERT_TRUE(tk.SameRow(tk2)); + ASSERT_TRUE(tk.SameColumn(tk2)); + ASSERT_TRUE(!tk.SameQualifier(tk2)); + std::cout << tk2.DebugString() << std::endl; - TeraKey tk3(tk); - ASSERT_TRUE(tk.Encode("haha", "hello", "world", 0, TKT_VALUE)); - ASSERT_GT(tk3.Compare(tk), 0); - ASSERT_TRUE(!tk3.empty()); - ASSERT_EQ(tk3.key().ToString(), key); - ASSERT_EQ(tk3.column().ToString(), column); - ASSERT_EQ(tk3.qualifier().ToString(), qualifier); - ASSERT_EQ(tk3.timestamp(), timestamp); - ASSERT_EQ(tk3.type(), type); - std::cout << tk3.DebugString() << std::endl; + TeraKey tk3(tk); + ASSERT_TRUE(tk.Encode("haha", "hello", "world", 0, TKT_VALUE)); + ASSERT_GT(tk3.Compare(tk), 0); + ASSERT_TRUE(!tk3.empty()); + ASSERT_EQ(tk3.key().ToString(), key); + ASSERT_EQ(tk3.column().ToString(), column); + ASSERT_EQ(tk3.qualifier().ToString(), qualifier); + ASSERT_EQ(tk3.timestamp(), timestamp); + ASSERT_EQ(tk3.type(), type); + std::cout << tk3.DebugString() << std::endl; } TEST(TeraKeyTest, Readable) { - TestFunc(ReadableRawKeyOperator()); - TestFunc(BinaryRawKeyOperator()); + TestFunc(ReadableRawKeyOperator()); + TestFunc(BinaryRawKeyOperator()); } } // namespace leveldb -int main(int argc, char* argv[]) { - return leveldb::test::RunAllTests(); -} +int main(int argc, char* argv[]) { return leveldb::test::RunAllTests(); } diff --git a/src/leveldb/util/testharness.h b/src/leveldb/util/testharness.h index 7ff67739e..8560157f2 100644 --- a/src/leveldb/util/testharness.h +++ b/src/leveldb/util/testharness.h @@ -51,9 +51,7 @@ class Tester { std::stringstream ss_; public: - Tester(const char* f, int l) - : ok_(true), fname_(f), line_(l) { - } + Tester(const char* f, int l) : ok_(true), fname_(f), line_(l) {} ~Tester() { if (!ok_) { @@ -78,22 +76,22 @@ class Tester { return *this; } -#define BINARY_OP(name,op) \ - template \ - Tester& name(const X& x, const Y& y) { \ - if (! (x op y)) { \ - ss_ << " failed: " << x << (" " #op " ") << y; \ - ok_ = false; \ - } \ - return *this; \ +#define BINARY_OP(name, op) \ + template \ + Tester& name(const X& x, const Y& y) { \ + if (!(x op y)) { \ + ss_ << " failed: " << x << (" " #op " ") << y; \ + ok_ = false; \ + } \ + return *this; \ } - BINARY_OP(IsEq, ==) - BINARY_OP(IsNe, !=) - BINARY_OP(IsGe, >=) - BINARY_OP(IsGt, >) - BINARY_OP(IsLe, <=) - BINARY_OP(IsLt, <) + BINARY_OP(IsEq, == ) + BINARY_OP(IsNe, != ) + BINARY_OP(IsGe, >= ) + BINARY_OP(IsGt, > ) + BINARY_OP(IsLe, <= ) + BINARY_OP(IsLt, < ) #undef BINARY_OP // Attach the specified value to the error message if an error has occurred @@ -108,34 +106,33 @@ class Tester { #define ASSERT_TRUE(c) ::leveldb::test::Tester(__FILE__, __LINE__).Is((c), #c) #define ASSERT_OK(s) ::leveldb::test::Tester(__FILE__, __LINE__).IsOk((s)) -#define ASSERT_EQ(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsEq((a),(b)) -#define ASSERT_NE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsNe((a),(b)) -#define ASSERT_GE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGe((a),(b)) -#define ASSERT_GT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGt((a),(b)) -#define ASSERT_LE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLe((a),(b)) -#define ASSERT_LT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLt((a),(b)) - -#define TCONCAT(a,b) TCONCAT1(a,b) -#define TCONCAT1(a,b) a##b - -#define TEST(base,name) \ -class TCONCAT(_Test_,name) : public base { \ - public: \ - void _Run(); \ - static void _RunIt() { \ - TCONCAT(_Test_,name) t; \ - t._Run(); \ - } \ -}; \ -bool TCONCAT(_Test_ignored_,name) = \ - ::leveldb::test::RegisterTest(#base, #name, &TCONCAT(_Test_,name)::_RunIt); \ -void TCONCAT(_Test_,name)::_Run() +#define ASSERT_EQ(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsEq((a), (b)) +#define ASSERT_NE(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsNe((a), (b)) +#define ASSERT_GE(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGe((a), (b)) +#define ASSERT_GT(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGt((a), (b)) +#define ASSERT_LE(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLe((a), (b)) +#define ASSERT_LT(a, b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLt((a), (b)) + +#define TCONCAT(a, b) TCONCAT1(a, b) +#define TCONCAT1(a, b) a##b + +#define TEST(base, name) \ + class TCONCAT(_Test_, name) : public base { \ + public: \ + void _Run(); \ + static void _RunIt() { \ + TCONCAT(_Test_, name) t; \ + t._Run(); \ + } \ + }; \ + bool TCONCAT(_Test_ignored_, name) = \ + ::leveldb::test::RegisterTest(#base, #name, &TCONCAT(_Test_, name)::_RunIt); \ + void TCONCAT(_Test_, name)::_Run() // Register the specified test. Typically not used directly, but // invoked via the macro expansion of TEST. extern bool RegisterTest(const char* base, const char* name, void (*func)()); - } // namespace test } // namespace leveldb diff --git a/src/leveldb/util/testutil.cc b/src/leveldb/util/testutil.cc index 0d0ba8e9a..03f7851f9 100644 --- a/src/leveldb/util/testutil.cc +++ b/src/leveldb/util/testutil.cc @@ -16,7 +16,7 @@ namespace test { Slice RandomString(Random* rnd, int len, std::string* dst) { dst->resize(len); for (int i = 0; i < len; i++) { - (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' + (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' } return Slice(*dst); } @@ -24,9 +24,7 @@ Slice RandomString(Random* rnd, int len, std::string* dst) { std::string RandomKey(Random* rnd, int len) { // Make sure to generate a wide variety of characters so we // test the boundary conditions for short-key optimizations. - static const char kTestChars[] = { - '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff' - }; + static const char kTestChars[] = {'\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff'}; std::string result; for (int i = 0; i < len; i++) { result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; @@ -34,9 +32,8 @@ std::string RandomKey(Random* rnd, int len) { return result; } - -extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst) { +extern Slice CompressibleString(Random* rnd, double compressed_fraction, int len, + std::string* dst) { int raw = static_cast(len * compressed_fraction); if (raw < 1) raw = 1; std::string raw_data; diff --git a/src/leveldb/util/testutil.h b/src/leveldb/util/testutil.h index a578f9ab9..1c8847bdf 100644 --- a/src/leveldb/util/testutil.h +++ b/src/leveldb/util/testutil.h @@ -27,8 +27,7 @@ extern std::string RandomKey(Random* rnd, int len); // Store in *dst a string of length "len" that will compress to // "N*compressed_fraction" bytes and return a Slice that references // the generated data. -extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst); +extern Slice CompressibleString(Random* rnd, double compressed_fraction, int len, std::string* dst); // A wrapper that allows injection of errors. class ErrorEnv : public EnvWrapper { @@ -36,12 +35,10 @@ class ErrorEnv : public EnvWrapper { bool writable_file_error_; int num_writable_file_errors_; - ErrorEnv() : EnvWrapper(Env::Default()), - writable_file_error_(false), - num_writable_file_errors_(0) { } + ErrorEnv() + : EnvWrapper(Env::Default()), writable_file_error_(false), num_writable_file_errors_(0) {} - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result, + virtual Status NewWritableFile(const std::string& fname, WritableFile** result, const EnvOptions& options) { if (writable_file_error_ && num_writable_file_errors_ < 10) { ++num_writable_file_errors_; diff --git a/src/leveldb/util/thread.cc b/src/leveldb/util/thread.cc index 2fc63ab09..4958ef3bd 100644 --- a/src/leveldb/util/thread.cc +++ b/src/leveldb/util/thread.cc @@ -6,55 +6,53 @@ namespace leveldb { -Thread::Thread() - : started_(false), - id_(0) {} +Thread::Thread() : started_(false), id_(0) {} Thread::~Thread() { - Cancel(); - Join(); + Cancel(); + Join(); } bool Thread::Start() { - { - MutexLock lock(&mutex_); - if (!started_) { - started_ = true; - } else { - return false; - } + { + MutexLock lock(&mutex_); + if (!started_) { + started_ = true; + } else { + return false; } + } - if (0 != pthread_create(&id_, NULL, StartRunner, this)) { - started_ = false; - return false; - } + if (0 != pthread_create(&id_, NULL, StartRunner, this)) { + started_ = false; + return false; + } - return true; + return true; } void Thread::Join() { - if (started_) { - pthread_join(id_, NULL); - } + if (started_) { + pthread_join(id_, NULL); + } } void Thread::Cancel() { - if (started_) { - pthread_cancel(id_); - } + if (started_) { + pthread_cancel(id_); + } } void Thread::Stop() { - MutexLock lock(&mutex_); - started_ = false; + MutexLock lock(&mutex_); + started_ = false; } void* Thread::StartRunner(void* params) { - Thread* runner = static_cast(params); - runner->Run(params); - runner->Stop(); - return NULL; + Thread* runner = static_cast(params); + runner->Run(params); + runner->Stop(); + return NULL; } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/thread.h b/src/leveldb/util/thread.h index 65609a284..68341bfa7 100644 --- a/src/leveldb/util/thread.h +++ b/src/leveldb/util/thread.h @@ -12,37 +12,35 @@ namespace leveldb { class Thread { -public: - Thread(); - virtual ~Thread(); - - bool Start(); - void Join(); - void Cancel(); - pthread_t Id() const; - bool IsRunning() const; - - virtual void Run(void* params) = 0; - -private: - void Stop(); - static void* StartRunner(void* params); - -private: - bool started_; - pthread_t id_; - mutable port::Mutex mutex_; + public: + Thread(); + virtual ~Thread(); + + bool Start(); + void Join(); + void Cancel(); + pthread_t Id() const; + bool IsRunning() const; + + virtual void Run(void* params) = 0; + + private: + void Stop(); + static void* StartRunner(void* params); + + private: + bool started_; + pthread_t id_; + mutable port::Mutex mutex_; }; -inline pthread_t Thread::Id() const { - return id_; -} +inline pthread_t Thread::Id() const { return id_; } inline bool Thread::IsRunning() const { - MutexLock lock(&mutex_); - return started_; + MutexLock lock(&mutex_); + return started_; } -} // namespace leveldb +} // namespace leveldb -#endif // LEVELDB_UTIL_THREAD_H_ +#endif // LEVELDB_UTIL_THREAD_H_ diff --git a/src/leveldb/util/thread_pool.cc b/src/leveldb/util/thread_pool.cc index e28c08952..d4e45e5d9 100644 --- a/src/leveldb/util/thread_pool.cc +++ b/src/leveldb/util/thread_pool.cc @@ -4,6 +4,7 @@ // // Author: leiliyuan@baidu.com +#include "glog/logging.h" #include "leveldb/env.h" #include "util/thread_pool.h" #include "util/mutexlock.h" @@ -53,8 +54,8 @@ void ThreadPool::SetBackgroundThreads(int num) { total_threads_limit_ = num; } -int64_t ThreadPool::Schedule(void (*function)(void*), void* arg, - double priority, int64_t wait_time_millisec) { +int64_t ThreadPool::Schedule(void (*function)(void*), void* arg, double priority, + int64_t wait_time_millisec) { assert(wait_time_millisec >= 0); MutexLock lock(&mutex_); if (exit_all_threads_) { @@ -175,8 +176,8 @@ void ThreadPool::BGThread() { void* arg = bg_item.arg; latest_.erase(it); mutex_.Unlock(); - Log(info_log_, "[ThreadPool(%d/%d)] Do thread id = %ld score = %.2f", - active_number_, total_threads_limit_, bg_item.id, bg_item.priority); + LOG(INFO) << "[ThreadPool(" << active_number_ << "/" << total_threads_limit_ + << ")] Do thread id = " << bg_item.id << " score = " << bg_item.priority; (*function)(arg); mutex_.Lock(); } @@ -205,11 +206,10 @@ void ThreadPool::PutInQueue(BGItem& bg_item, int64_t wait_time_millisec) { } bool ThreadPool::IsLatest(const BGItem& latest, double priority, int64_t exe_time) { - double priority_diff = std::max(latest.priority - priority, - priority - latest.priority); + double priority_diff = std::max(latest.priority - priority, priority - latest.priority); bool same_time = (latest.exe_time == exe_time); bool in_pri_queue = latest.exe_time == 0; return (priority_diff < 1e-4) && (in_pri_queue || same_time); } -} // namespace leveldb +} // namespace leveldb diff --git a/src/leveldb/util/thread_pool.h b/src/leveldb/util/thread_pool.h index 85866bb83..d21f9037e 100644 --- a/src/leveldb/util/thread_pool.h +++ b/src/leveldb/util/thread_pool.h @@ -17,7 +17,7 @@ namespace leveldb { class ThreadPool { -public: + public: ThreadPool(); ~ThreadPool(); @@ -35,7 +35,7 @@ class ThreadPool { void SetLogger(Logger* info_log) { info_log_ = info_log; } int64_t GetPendingTaskNum(); -private: + private: struct BGItem { void* arg; void (*function)(void*); @@ -83,6 +83,6 @@ class ThreadPool { BGMap latest_; }; -} // namespace leveldb +} // namespace leveldb -#endif // LEVELDB_THREAD_POOL_H_ +#endif // LEVELDB_THREAD_POOL_H_ diff --git a/src/load_balancer/action.h b/src/load_balancer/action.h index 5814d69aa..593d4d1ae 100644 --- a/src/load_balancer/action.h +++ b/src/load_balancer/action.h @@ -12,40 +12,33 @@ namespace tera { namespace load_balancer { class Action { -public: - enum class Type { - ASSIGN, - MOVE, - SWAP, - EMPTY, - }; + public: + enum class Type { + ASSIGN, + MOVE, + SWAP, + EMPTY, + }; - Type GetType() const { - return type_; - } + Type GetType() const { return type_; } - std::string GetGeneratorName() const { - return generator_; - } + std::string GetGeneratorName() const { return generator_; } -public: - Action(Type t, const std::string& generator) : - type_(t), - generator_(generator) { - } + public: + Action(Type t, const std::string& generator) : type_(t), generator_(generator) {} - virtual ~Action() {} + virtual ~Action() {} - virtual Action* UndoAction() = 0; + virtual Action* UndoAction() = 0; - virtual std::string ToString() const = 0; + virtual std::string ToString() const = 0; -private: - Type type_; - std::string generator_; + private: + Type type_; + std::string generator_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_ACTION_H_ +#endif // TERA_LOAD_BALANCER_ACTION_H_ diff --git a/src/load_balancer/action_generator.h b/src/load_balancer/action_generator.h index 77403bfe1..38bcb3b22 100644 --- a/src/load_balancer/action_generator.h +++ b/src/load_balancer/action_generator.h @@ -5,6 +5,7 @@ #ifndef TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ #define TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ +#include #include #include #include @@ -16,52 +17,150 @@ namespace tera { namespace load_balancer { -const uint32_t kInvalidNodeIndex = std::numeric_limits::max(); -const uint32_t kInvalidTabletIndex = std::numeric_limits::max(); - class ActionGenerator { -public: - virtual ~ActionGenerator() {} + public: + virtual ~ActionGenerator() {} + + virtual Action* Generate(const std::shared_ptr& cluster) = 0; + + virtual std::string Name() = 0; + + public: + static uint32_t PickRandomNode(const std::shared_ptr& cluster) { + if (cluster->tablet_node_num_ > 0) { + return Random::Rand(0, cluster->tablet_node_num_); + } else { + return kInvalidNodeIndex; + } + } + + static uint32_t PickRandomTabletFromSourceNode(const std::shared_ptr& cluster, + uint32_t node_index, + const std::function& is_proper) { + uint32_t tablet_num = cluster->tablets_per_node_[node_index].size(); + if (tablet_num < 1) { + return kInvalidTabletIndex; + } + + uint32_t tablet_index = kInvalidTabletIndex; + uint32_t cnt = 0; + while (true) { + ++cnt; + if (cnt > 5 * tablet_num) { + tablet_index = kInvalidTabletIndex; + break; + } + + uint32_t rand = Random::Rand(0, tablet_num); + tablet_index = cluster->tablets_per_node_[node_index][rand]; + if (is_proper(tablet_index)) { + break; + } + } + + return tablet_index; + } + + // pick a different node with the picked_node_index + static uint32_t PickRandomDestNode( + const std::shared_ptr& cluster, uint32_t picked_node_index, + uint32_t chosen_tablet_index, + const std::function& is_proper_location) { + if (cluster->tablet_node_num_ < 2) { + return kInvalidNodeIndex; + } - virtual Action* Generate(const std::shared_ptr& cluster) = 0; + uint32_t node_index = kInvalidNodeIndex; + uint32_t cnt = 0; + while (true) { + ++cnt; + if (cnt > 5 * cluster->tablet_node_num_) { + node_index = kInvalidNodeIndex; + break; + } - virtual std::string Name() = 0; + node_index = PickRandomNode(cluster); + if (node_index == picked_node_index) { + continue; + } + if (is_proper_location(chosen_tablet_index, node_index)) { + break; + } + } + + return node_index; + } - virtual uint32_t PickRandomNode(const std::shared_ptr& cluster) { - if (cluster->tablet_node_num_ > 0) { - return Random::Rand(0, cluster->tablet_node_num_); - } else { - return kInvalidNodeIndex; - } + static uint32_t PickLightestNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index, + uint32_t chosen_tablet_index, + const std::function& is_proper_location) { + uint32_t node_num = sorted_node_index.size(); + if (node_num < 1) { + return kInvalidNodeIndex; } - // pick a different node with the picked_index - virtual uint32_t PickOtherRandomNode(const std::shared_ptr& cluster, - const uint32_t picked_index) { - assert(cluster->tablet_node_num_ >= 2); - - while (true) { - uint32_t node_index = PickRandomNode(cluster); - if (node_index != picked_index) { - return node_index; - } - } + uint32_t i = 0; + while (!is_proper_location(chosen_tablet_index, sorted_node_index[i])) { + ++i; + if (i == node_num) { + return kInvalidNodeIndex; + } } - virtual uint32_t PickRandomTabletOfNode(const std::shared_ptr& cluster, - const uint32_t node_index) { - uint32_t tablet_num = cluster->tablets_per_node_[node_index].size(); + return sorted_node_index[i]; + } + + static uint32_t PickHeaviestNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index) { + uint32_t node_num = sorted_node_index.size(); + if (node_num < 1) { + return kInvalidNodeIndex; + } + + return sorted_node_index[node_num - 1]; + } + + static uint32_t PickHeaviestNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + const std::function& is_proper) { + uint32_t node_num = sorted_node_index.size(); + if (node_num < 1) { + return kInvalidNodeIndex; + } - if (tablet_num > 0) { - uint32_t rand = Random::Rand(0, tablet_num); - return cluster->tablets_per_node_[node_index][rand]; - } else { - return kInvalidTabletIndex; - } + uint32_t i = node_num - 1; + while (!is_proper(sorted_node_index[i])) { + if (i == 0) { + return kInvalidNodeIndex; + } + --i; } + + return sorted_node_index[i]; + } + + static uint32_t PickHeaviestTabletFromSourceNode(const std::shared_ptr& cluster, + const std::vector& sorted_tablet_index, + const std::function& is_proper) { + uint32_t tablet_num = sorted_tablet_index.size(); + if (tablet_num < 1) { + return kInvalidTabletIndex; + } + + uint32_t i = tablet_num - 1; + while (!is_proper(sorted_tablet_index[i])) { + if (i == 0) { + return kInvalidTabletIndex; + } + --i; + } + + return sorted_tablet_index[i]; + } }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ +#endif // TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ diff --git a/src/load_balancer/action_generators.cc b/src/load_balancer/action_generators.cc index eeafd0d96..0b4a3eee2 100644 --- a/src/load_balancer/action_generators.cc +++ b/src/load_balancer/action_generators.cc @@ -2,428 +2,530 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "load_balancer/action_generators.h" + #include #include #include "glog/logging.h" -#include "load_balancer/action_generators.h" #include "load_balancer/actions.h" #include "load_balancer/random.h" -#include namespace tera { namespace load_balancer { -RandomActionGenerator::RandomActionGenerator() : - name_("RandomActionGenerator") { -} +RandomActionGenerator::RandomActionGenerator() : name_("RandomActionGenerator") {} -RandomActionGenerator::~RandomActionGenerator() { -} +RandomActionGenerator::~RandomActionGenerator() {} Action* RandomActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] RandomActionGenerator worked"; + VLOG(20) << "[lb] RandomActionGenerator worked"; - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - uint32_t source_node_index = PickRandomNode(cluster); - uint32_t dest_node_index = PickOtherRandomNode(cluster, source_node_index); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + uint32_t source_node_index = PickRandomNode(cluster); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex) { - return new EmptyAction(); - } + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + uint32_t tablet_index = PickRandomTabletFromSourceNode(cluster, source_node_index, is_proper); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); -} + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + uint32_t dest_node_index = + PickRandomDestNode(cluster, source_node_index, tablet_index, is_proper_location); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } -std::string RandomActionGenerator::Name() { - return name_; + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -TabletCountActionGenerator::TabletCountActionGenerator() : - name_("TabletCountActionGenerator") { -} +std::string RandomActionGenerator::Name() { return name_; } -TabletCountActionGenerator::~TabletCountActionGenerator() { -} +TabletCountActionGenerator::TabletCountActionGenerator() : name_("TabletCountActionGenerator") {} + +TabletCountActionGenerator::~TabletCountActionGenerator() {} Action* TabletCountActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] TabletCountActionGenerator worked"; + VLOG(20) << "[lb] TabletCountActionGenerator worked"; - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - cluster->SortNodesByTabletCount(); + std::vector sorted_node_index; + cluster->SortNodesByTabletCount(&sorted_node_index); - uint32_t source_node_index = PickMostTabletsNode(cluster); - uint32_t dest_node_index = PickLeastTabletsNode(cluster); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + uint32_t source_node_index = PickMostTabletsNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex || - source_node_index == dest_node_index) { - return new EmptyAction(); - } + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + uint32_t tablet_index = PickRandomTabletFromSourceNode(cluster, source_node_index, is_proper); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); -} + uint32_t dest_node_index = PickLeastTabletsNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } -uint32_t TabletCountActionGenerator::PickMostTabletsNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_tablet_count_.size() >= 1) { - return cluster->node_index_sorted_by_tablet_count_[cluster->node_index_sorted_by_tablet_count_.size() - 1]; - } else { - return kInvalidNodeIndex; - } + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -uint32_t TabletCountActionGenerator::PickLeastTabletsNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_tablet_count_.size() >= 1) { - uint32_t index = 0; - if (cluster->lb_options_.meta_table_isolate_enabled) { - while (cluster->node_index_sorted_by_tablet_count_[index] == cluster->meta_table_node_index_) { - ++index; - if (index == cluster->node_index_sorted_by_tablet_count_.size()) { - return kInvalidNodeIndex; - } - } - } - return cluster->node_index_sorted_by_tablet_count_[index]; - } else { - return kInvalidNodeIndex; - } +uint32_t TabletCountActionGenerator::PickMostTabletsNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index) { + return PickHeaviestNode(cluster, sorted_node_index); } -std::string TabletCountActionGenerator::Name() { - return name_; +uint32_t TabletCountActionGenerator::PickLeastTabletsNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } -SizeActionGenerator::SizeActionGenerator() : - name_("SizeActionGenerator") { -} +std::string TabletCountActionGenerator::Name() { return name_; } -SizeActionGenerator::~SizeActionGenerator() { -} +SizeActionGenerator::SizeActionGenerator() : name_("SizeActionGenerator") {} + +SizeActionGenerator::~SizeActionGenerator() {} Action* SizeActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] SizeActionGenerator worked"; + VLOG(20) << "[lb] SizeActionGenerator worked"; - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - cluster->SortNodesBySize(); + std::vector sorted_node_index; + cluster->SortNodesBySize(&sorted_node_index); - uint32_t source_node_index = PickLargestSizeNode(cluster); - uint32_t dest_node_index = PickSmallestSizeNode(cluster); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + uint32_t source_node_index = PickLargestSizeNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex || - source_node_index == dest_node_index) { - return new EmptyAction(); - } + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + uint32_t tablet_index = PickRandomTabletFromSourceNode(cluster, source_node_index, is_proper); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); + uint32_t dest_node_index = PickSmallestSizeNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -uint32_t SizeActionGenerator::PickLargestSizeNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_size_.size() >= 1) { - return cluster->node_index_sorted_by_size_[cluster->node_index_sorted_by_size_.size() - 1]; - } else { - return kInvalidNodeIndex; - } +uint32_t SizeActionGenerator::PickLargestSizeNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index) { + return PickHeaviestNode(cluster, sorted_node_index); } -uint32_t SizeActionGenerator::PickSmallestSizeNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_size_.size() >= 1) { - uint32_t index = 0; - if (cluster->lb_options_.meta_table_isolate_enabled) { - while (cluster->node_index_sorted_by_size_[index] == cluster->meta_table_node_index_) { - ++index; - if (index == cluster->node_index_sorted_by_size_.size()) { - return kInvalidNodeIndex; - } - } - } - return cluster->node_index_sorted_by_size_[index]; - } else { - return kInvalidNodeIndex; - } +uint32_t SizeActionGenerator::PickSmallestSizeNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } -std::string SizeActionGenerator::Name() { - return name_; +std::string SizeActionGenerator::Name() { return name_; } + +FlashSizeActionGenerator::FlashSizeActionGenerator() : name_("FlashSizeActionGenerator") {} + +FlashSizeActionGenerator::~FlashSizeActionGenerator() {} + +Action* FlashSizeActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] FlashSizeActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + std::vector sorted_node_index; + cluster->SortNodesByFlashSizePercent(&sorted_node_index); + + uint32_t source_node_index = PickHighestFlashSizePercentNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } + + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + uint32_t tablet_index = PickRandomTabletFromSourceNode(cluster, source_node_index, is_proper); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } + + uint32_t dest_node_index = + PickLowestFlashSizePercentNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -ReadLoadActionGenerator::ReadLoadActionGenerator() : - name_("ReadLoadActionGenerator") { +uint32_t FlashSizeActionGenerator::PickHighestFlashSizePercentNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index) { + return PickHeaviestNode(cluster, sorted_node_index); } -ReadLoadActionGenerator::~ReadLoadActionGenerator() { +uint32_t FlashSizeActionGenerator::PickLowestFlashSizePercentNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } +std::string FlashSizeActionGenerator::Name() { return name_; } + +ReadLoadActionGenerator::ReadLoadActionGenerator() : name_("ReadLoadActionGenerator") {} + +ReadLoadActionGenerator::~ReadLoadActionGenerator() {} + Action* ReadLoadActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] ReadLoadActionGenerator worked"; + VLOG(20) << "[lb] ReadLoadActionGenerator worked"; - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - cluster->SortNodesByReadLoad(); + std::vector sorted_node_index; + cluster->SortNodesByReadLoad(&sorted_node_index); - uint32_t source_node_index = PickMostReadNodeWithPending(cluster); - uint32_t dest_node_index = PickLeastReadNode(cluster); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + uint32_t source_node_index = PickMostReadNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex || - source_node_index == dest_node_index) { - return new EmptyAction(); - } + std::vector sorted_tablet_index; + cluster->SortTabletsOfNodeByReadLoad(source_node_index, &sorted_tablet_index); - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); -} + uint32_t tablet_index = PickMostReadTabletFromSourceNode(cluster, sorted_tablet_index); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } -uint32_t ReadLoadActionGenerator::PickMostReadNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_read_load_.size() >= 1) { - return cluster->node_index_sorted_by_read_load_[cluster->node_index_sorted_by_read_load_.size() - 1]; - } else { - return kInvalidNodeIndex; - } -} + uint32_t dest_node_index = PickLeastReadNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } -uint32_t ReadLoadActionGenerator::PickMostReadNodeWithPending(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_read_load_.size() >= 1) { - int i = cluster->node_index_sorted_by_read_load_.size() - 1; - // skip node without pending - while (i >= 0) { - if (cluster->read_pending_nodes_index_.empty() || - cluster->read_pending_nodes_index_.find(cluster->node_index_sorted_by_read_load_[i]) - == cluster->read_pending_nodes_index_.end()) { - --i; - } else { - break; - } - } - if (i >= 0) { - return cluster->node_index_sorted_by_read_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; - } + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -uint32_t ReadLoadActionGenerator::PickLeastReadNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_read_load_.size() >= 1) { - size_t i = 0; - while (i < cluster->node_index_sorted_by_read_load_.size()) { - // skip meta table node and pending node - if ((cluster->lb_options_.meta_table_isolate_enabled && cluster->node_index_sorted_by_read_load_[i] == cluster->meta_table_node_index_) || - (!cluster->read_pending_nodes_index_.empty() && cluster->read_pending_nodes_index_.find(cluster->node_index_sorted_by_read_load_[i]) != cluster->read_pending_nodes_index_.end())) { - ++i; - } else { - break; - } - } - if (i < cluster->node_index_sorted_by_read_load_.size()) { - return cluster->node_index_sorted_by_read_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; - } -} +uint32_t ReadLoadActionGenerator::PickMostReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index) { + using std::placeholders::_1; + std::function is_read_pending_node = + std::bind(&Cluster::IsReadPendingNode, cluster.get(), _1); -std::string ReadLoadActionGenerator::Name() { - return name_; + return PickHeaviestNode(cluster, sorted_node_index, is_read_pending_node); } -WriteLoadActionGenerator::WriteLoadActionGenerator() : - name_("WriteLoadActionGenerator") { +uint32_t ReadLoadActionGenerator::PickMostReadTabletFromSourceNode( + const std::shared_ptr& cluster, const std::vector& sorted_tablet_index) { + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + return PickHeaviestTabletFromSourceNode(cluster, sorted_tablet_index, is_proper); } -WriteLoadActionGenerator::~WriteLoadActionGenerator() { +uint32_t ReadLoadActionGenerator::PickLeastReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } +std::string ReadLoadActionGenerator::Name() { return name_; } + +WriteLoadActionGenerator::WriteLoadActionGenerator() + : name_("WriteLoadActionGenerator"), last_chosen_dest_node_index_(kInvalidNodeIndex) {} + +WriteLoadActionGenerator::~WriteLoadActionGenerator() {} + Action* WriteLoadActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] WriteLoadActionGenerator worked"; + VLOG(20) << "[lb] WriteLoadActionGenerator worked"; - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - cluster->SortNodesByWriteLoad(); + std::vector sorted_node_index; + cluster->SortNodesByWriteLoad(&sorted_node_index); - uint32_t source_node_index = PickMostWriteNodeWithPending(cluster); - uint32_t dest_node_index = PickLeastWriteNode(cluster); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + uint32_t source_node_index = PickMostWriteNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex || - source_node_index == dest_node_index) { - return new EmptyAction(); - } + std::vector sorted_tablet_index; + cluster->SortTabletsOfNodeByWriteLoad(source_node_index, &sorted_tablet_index); + + uint32_t tablet_index = PickMostWriteTabletFromSourceNode(cluster, sorted_tablet_index); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } + + uint32_t dest_node_index = PickLeastWriteNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -uint32_t WriteLoadActionGenerator::PickMostWriteNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_write_load_.size() >= 1) { - return cluster->node_index_sorted_by_write_load_[cluster->node_index_sorted_by_write_load_.size() - 1]; - } else { - return kInvalidNodeIndex; - } +uint32_t WriteLoadActionGenerator::PickMostWriteNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index) { + using std::placeholders::_1; + std::function is_write_pending_node = + std::bind(&Cluster::IsWritePendingNode, cluster.get(), _1); + + return PickHeaviestNode(cluster, sorted_node_index, is_write_pending_node); } -uint32_t WriteLoadActionGenerator::PickMostWriteNodeWithPending(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_write_load_.size() >= 1) { - int i = cluster->node_index_sorted_by_write_load_.size() - 1; - // skip node without pending - while (i >= 0) { - if (cluster->write_pending_nodes_index_.empty() || - cluster->write_pending_nodes_index_.find(cluster->node_index_sorted_by_write_load_[i]) - == cluster->write_pending_nodes_index_.end()) { - --i; - } else { - break; - } - } - if (i >= 0) { - return cluster->node_index_sorted_by_write_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; - } +uint32_t WriteLoadActionGenerator::PickMostWriteTabletFromSourceNode( + const std::shared_ptr& cluster, const std::vector& sorted_tablet_index) { + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + return PickHeaviestTabletFromSourceNode(cluster, sorted_tablet_index, is_proper); } -uint32_t WriteLoadActionGenerator::PickLeastWriteNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_write_load_.size() >= 1) { - size_t i = 0; - while (i < cluster->node_index_sorted_by_write_load_.size()) { - // skip meta table node and pending node - if ((cluster->lb_options_.meta_table_isolate_enabled && cluster->node_index_sorted_by_write_load_[i] == cluster->meta_table_node_index_) || - (!cluster->write_pending_nodes_index_.empty() && cluster->write_pending_nodes_index_.find(cluster->node_index_sorted_by_write_load_[i]) != cluster->write_pending_nodes_index_.end())) { - ++i; - } else { - break; - } - } - if (i < cluster->node_index_sorted_by_write_load_.size()) { - return cluster->node_index_sorted_by_write_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; - } +uint32_t WriteLoadActionGenerator::PickLeastWriteNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); +} + +std::string WriteLoadActionGenerator::Name() { return name_; } + +ScanLoadActionGenerator::ScanLoadActionGenerator() : name_("ScanLoadActionGenerator") {} + +ScanLoadActionGenerator::~ScanLoadActionGenerator() {} + +Action* ScanLoadActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] ScanLoadActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + std::vector sorted_node_index; + cluster->SortNodesByScanLoad(&sorted_node_index); + + uint32_t source_node_index = PickMostScanNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } + + std::vector sorted_tablet_index; + cluster->SortTabletsOfNodeByScanLoad(source_node_index, &sorted_tablet_index); + + uint32_t tablet_index = PickMostScanTabletFromSourceNode(cluster, sorted_tablet_index); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } + + uint32_t dest_node_index = PickLeastScanNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -std::string WriteLoadActionGenerator::Name() { - return name_; +uint32_t ScanLoadActionGenerator::PickMostScanNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index) { + using std::placeholders::_1; + std::function is_scan_pending_node = + std::bind(&Cluster::IsScanPendingNode, cluster.get(), _1); + + return PickHeaviestNode(cluster, sorted_node_index, is_scan_pending_node); } -ScanLoadActionGenerator::ScanLoadActionGenerator() : - name_("ScanLoadActionGenerator") { +uint32_t ScanLoadActionGenerator::PickMostScanTabletFromSourceNode( + const std::shared_ptr& cluster, const std::vector& sorted_tablet_index) { + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + return PickHeaviestTabletFromSourceNode(cluster, sorted_tablet_index, is_proper); } -ScanLoadActionGenerator::~ScanLoadActionGenerator() { +uint32_t ScanLoadActionGenerator::PickLeastScanNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } -Action* ScanLoadActionGenerator::Generate(const std::shared_ptr& cluster) { - VLOG(20) << "[lb] ScanLoadActionGenerator worked"; +std::string ScanLoadActionGenerator::Name() { return name_; } - if (cluster->tablet_node_num_ < 2) { - return new EmptyAction(); - } +LReadActionGenerator::LReadActionGenerator() : name_("LReadActionGenerator") {} - cluster->SortNodesByScanLoad(); +LReadActionGenerator::~LReadActionGenerator() {} - uint32_t source_node_index = PickMostScanNodeWithPending(cluster); - uint32_t dest_node_index = PickLeastScanNode(cluster); - uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); +Action* LReadActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] LReadActionGenerator worked"; - if (tablet_index == kInvalidTabletIndex || - source_node_index == kInvalidNodeIndex || - dest_node_index == kInvalidNodeIndex || - source_node_index == dest_node_index) { - return new EmptyAction(); - } + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } - return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); + std::vector sorted_node_index; + cluster->SortNodesByLRead(&sorted_node_index); + + uint32_t source_node_index = PickMostLReadNode(cluster, sorted_node_index); + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } + + std::vector sorted_tablet_index; + cluster->SortTabletsOfNodeByLRead(source_node_index, &sorted_tablet_index); + + uint32_t tablet_index = PickMostLReadTabletFromSourceNode(cluster, sorted_tablet_index); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } + + uint32_t dest_node_index = PickLeastLReadNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); } -uint32_t ScanLoadActionGenerator::PickMostScanNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_scan_load_.size() >= 1) { - return cluster->node_index_sorted_by_scan_load_[cluster->node_index_sorted_by_scan_load_.size() - 1]; - } else { - return kInvalidNodeIndex; - } +uint32_t LReadActionGenerator::PickMostLReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index) { + using std::placeholders::_1; + std::function is_heavy_lread_node = + std::bind(&Cluster::IsHeavyLReadNode, cluster.get(), _1); + + return PickHeaviestNode(cluster, sorted_node_index, is_heavy_lread_node); } -uint32_t ScanLoadActionGenerator::PickMostScanNodeWithPending(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_scan_load_.size() >= 1) { - int i = cluster->node_index_sorted_by_scan_load_.size() - 1; - // skip node without pending - while (i >= 0) { - if (cluster->scan_pending_nodes_index_.empty() || - cluster->scan_pending_nodes_index_.find(cluster->node_index_sorted_by_scan_load_[i]) - == cluster->scan_pending_nodes_index_.end()) { - --i; - } else { - break; - } - } - if (i >= 0) { - return cluster->node_index_sorted_by_scan_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; - } +uint32_t LReadActionGenerator::PickMostLReadTabletFromSourceNode( + const std::shared_ptr& cluster, const std::vector& sorted_tablet_index) { + using std::placeholders::_1; + std::function is_proper = + std::bind(&Cluster::IsProperTargetTablet, cluster.get(), _1); + return PickHeaviestTabletFromSourceNode(cluster, sorted_tablet_index, is_proper); +} + +uint32_t LReadActionGenerator::PickLeastLReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } -uint32_t ScanLoadActionGenerator::PickLeastScanNode(const std::shared_ptr& cluster) { - if (cluster->node_index_sorted_by_scan_load_.size() >= 1) { - size_t i = 0; - while (i < cluster->node_index_sorted_by_scan_load_.size()) { - // skip meta table node and pending node - if ((cluster->lb_options_.meta_table_isolate_enabled && cluster->node_index_sorted_by_scan_load_[i] == cluster->meta_table_node_index_) || - (!cluster->scan_pending_nodes_index_.empty() && cluster->scan_pending_nodes_index_.find(cluster->node_index_sorted_by_scan_load_[i]) != cluster->scan_pending_nodes_index_.end())) { - ++i; - } else { - break; - } - } - if (i < cluster->node_index_sorted_by_scan_load_.size()) { - return cluster->node_index_sorted_by_scan_load_[i]; - } else { - return kInvalidNodeIndex; - } - } else { - return kInvalidNodeIndex; +std::string LReadActionGenerator::Name() { return name_; } + +MetaIsolateActionGenerator::MetaIsolateActionGenerator() : name_("MetaIsolateActionGenerator") {} + +MetaIsolateActionGenerator::~MetaIsolateActionGenerator() {} + +Action* MetaIsolateActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] MetaIsolateActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + std::vector sorted_node_index; + cluster->SortNodesByComplexLoad(&sorted_node_index); + + uint32_t source_node_index = cluster->meta_table_node_index_; + if (source_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } + + uint32_t tablet_index = PickRandomTabletOfMetaNode(cluster, source_node_index); + if (tablet_index == kInvalidTabletIndex) { + return new EmptyAction(); + } + + uint32_t dest_node_index = PickLeastComplexLoadNode(cluster, sorted_node_index, tablet_index); + if (dest_node_index == kInvalidNodeIndex || dest_node_index == source_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index, Name()); +} + +uint32_t MetaIsolateActionGenerator::PickRandomTabletOfMetaNode( + const std::shared_ptr& cluster, uint32_t source_node_index) { + uint32_t tablet_num = cluster->tablets_per_node_[source_node_index].size(); + if (tablet_num < 2) { + return kInvalidTabletIndex; + } + + while (true) { + uint32_t rand = Random::Rand(0, tablet_num); + uint32_t tablet_index = cluster->tablets_per_node_[source_node_index][rand]; + if (!cluster->IsMetaTablet(tablet_index)) { + return tablet_index; } + } } -std::string ScanLoadActionGenerator::Name() { - return name_; +uint32_t MetaIsolateActionGenerator::PickLeastComplexLoadNode( + const std::shared_ptr& cluster, const std::vector& sorted_node_index, + uint32_t chosen_tablet_index) { + using std::placeholders::_1; + using std::placeholders::_2; + std::function is_proper_location = + std::bind(&Cluster::IsProperLocation, cluster.get(), _1, _2); + return PickLightestNode(cluster, sorted_node_index, chosen_tablet_index, is_proper_location); } -} // namespace load_balancer -} // namespace tera +std::string MetaIsolateActionGenerator::Name() { return name_; } + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/action_generators.h b/src/load_balancer/action_generators.h index b2089a705..78578d353 100644 --- a/src/load_balancer/action_generators.h +++ b/src/load_balancer/action_generators.h @@ -13,125 +13,189 @@ namespace tera { namespace load_balancer { -// move a random tablet of a random node to another random node class RandomActionGenerator : public ActionGenerator { -public: - RandomActionGenerator(); - virtual ~RandomActionGenerator(); + public: + RandomActionGenerator(); + virtual ~RandomActionGenerator(); - // generate a random move action - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - std::string name_; + private: + std::string name_; }; -// move a tablet -// from the node holding most tablets -// to the node holding least tablets class TabletCountActionGenerator : public ActionGenerator { -public: - TabletCountActionGenerator(); - virtual ~TabletCountActionGenerator(); + public: + TabletCountActionGenerator(); + virtual ~TabletCountActionGenerator(); - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - uint32_t PickMostTabletsNode(const std::shared_ptr& cluster); - uint32_t PickLeastTabletsNode(const std::shared_ptr& cluster); + private: + uint32_t PickMostTabletsNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLeastTabletsNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); -private: - std::string name_; + private: + std::string name_; }; -// move a tablet -// from the node holding largest data size -// to the node holding smallest data size class SizeActionGenerator : public ActionGenerator { -public: - SizeActionGenerator(); - virtual ~SizeActionGenerator(); + public: + SizeActionGenerator(); + virtual ~SizeActionGenerator(); - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - uint32_t PickLargestSizeNode(const std::shared_ptr& cluster); - uint32_t PickSmallestSizeNode(const std::shared_ptr& cluster); + private: + uint32_t PickLargestSizeNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickSmallestSizeNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); -private: - std::string name_; + private: + std::string name_; +}; + +class FlashSizeActionGenerator : public ActionGenerator { + public: + FlashSizeActionGenerator(); + virtual ~FlashSizeActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + + private: + uint32_t PickHighestFlashSizePercentNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLowestFlashSizePercentNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); + + private: + std::string name_; }; -// move a tablet -// from the node has most read load -// to the node has least read load class ReadLoadActionGenerator : public ActionGenerator { -public: - ReadLoadActionGenerator(); - virtual ~ReadLoadActionGenerator(); + public: + ReadLoadActionGenerator(); + virtual ~ReadLoadActionGenerator(); - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - uint32_t PickMostReadNode(const std::shared_ptr& cluster); - uint32_t PickMostReadNodeWithPending(const std::shared_ptr& cluster); - uint32_t PickLeastReadNode(const std::shared_ptr& cluster); + private: + uint32_t PickMostReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickMostReadTabletFromSourceNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLeastReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); -private: - std::string name_; + private: + std::string name_; }; -// move a tablet -// from the node has most write load -// to the node has least write load class WriteLoadActionGenerator : public ActionGenerator { -public: - WriteLoadActionGenerator(); - virtual ~WriteLoadActionGenerator(); + public: + WriteLoadActionGenerator(); + virtual ~WriteLoadActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + + private: + uint32_t PickMostWriteNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickMostWriteTabletFromSourceNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLeastWriteNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); + + private: + std::string name_; + uint32_t last_chosen_dest_node_index_; +}; + +class ScanLoadActionGenerator : public ActionGenerator { + public: + ScanLoadActionGenerator(); + virtual ~ScanLoadActionGenerator(); - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - uint32_t PickMostWriteNode(const std::shared_ptr& cluster); - uint32_t PickMostWriteNodeWithPending(const std::shared_ptr& cluster); - uint32_t PickLeastWriteNode(const std::shared_ptr& cluster); + private: + uint32_t PickMostScanNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickMostScanTabletFromSourceNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLeastScanNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); -private: - std::string name_; + private: + std::string name_; }; -// move a tablet -// from the node has most scan load -// to the node has least scan load -class ScanLoadActionGenerator : public ActionGenerator { -public: - ScanLoadActionGenerator(); - virtual ~ScanLoadActionGenerator(); +class LReadActionGenerator : public ActionGenerator { + public: + LReadActionGenerator(); + virtual ~LReadActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + + private: + uint32_t PickMostLReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickMostLReadTabletFromSourceNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index); + uint32_t PickLeastLReadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); + + private: + std::string name_; +}; + +class MetaIsolateActionGenerator : public ActionGenerator { + public: + MetaIsolateActionGenerator(); + virtual ~MetaIsolateActionGenerator(); - virtual Action* Generate(const std::shared_ptr& cluster) override; + virtual Action* Generate(const std::shared_ptr& cluster) override; - virtual std::string Name() override; + virtual std::string Name() override; -private: - uint32_t PickMostScanNode(const std::shared_ptr& cluster); - uint32_t PickMostScanNodeWithPending(const std::shared_ptr& cluster); - uint32_t PickLeastScanNode(const std::shared_ptr& cluster); + private: + uint32_t PickRandomTabletOfMetaNode(const std::shared_ptr& cluster, + uint32_t source_node_index); + uint32_t PickLeastComplexLoadNode(const std::shared_ptr& cluster, + const std::vector& sorted_node_index, + uint32_t chosen_tablet_index); -private: - std::string name_; + private: + std::string name_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_ACTION_GENERATORS_H_ +#endif // TERA_LOAD_BALANCER_ACTION_GENERATORS_H_ diff --git a/src/load_balancer/actions.cc b/src/load_balancer/actions.cc index e1541900f..ee75c3f56 100644 --- a/src/load_balancer/actions.cc +++ b/src/load_balancer/actions.cc @@ -9,43 +9,31 @@ namespace tera { namespace load_balancer { -EmptyAction::EmptyAction() : - Action(Action::Type::EMPTY, "") { -} +EmptyAction::EmptyAction() : Action(Action::Type::EMPTY, "") {} -EmptyAction::~EmptyAction() { -} +EmptyAction::~EmptyAction() {} -Action* EmptyAction::UndoAction() { - return new EmptyAction(); -} +Action* EmptyAction::UndoAction() { return new EmptyAction(); } -std::string EmptyAction::ToString() const { - return "EmptyAction"; -} +std::string EmptyAction::ToString() const { return "EmptyAction"; } -MoveAction::MoveAction(uint32_t tablet_index, - uint32_t source_node_index, - uint32_t dest_node_index, - const std::string& generator) : - Action(Action::Type::MOVE, generator), - tablet_index_(tablet_index), - source_node_index_(source_node_index), - dest_node_index_(dest_node_index) { -} +MoveAction::MoveAction(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index, + const std::string& generator) + : Action(Action::Type::MOVE, generator), + tablet_index_(tablet_index), + source_node_index_(source_node_index), + dest_node_index_(dest_node_index) {} -MoveAction::~MoveAction() { -} +MoveAction::~MoveAction() {} Action* MoveAction::UndoAction() { - return new MoveAction(tablet_index_, dest_node_index_, source_node_index_, "UndoAction"); + return new MoveAction(tablet_index_, dest_node_index_, source_node_index_, "UndoAction"); } std::string MoveAction::ToString() const { - return "move " + std::to_string(tablet_index_) + " from " - + std::to_string(source_node_index_) + " to " + std::to_string(dest_node_index_) - + " generated by " + GetGeneratorName(); + return "move " + std::to_string(tablet_index_) + " from " + std::to_string(source_node_index_) + + " to " + std::to_string(dest_node_index_) + " generated by " + GetGeneratorName(); } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/actions.h b/src/load_balancer/actions.h index c6df36efd..e9c4df859 100644 --- a/src/load_balancer/actions.h +++ b/src/load_balancer/actions.h @@ -13,35 +13,33 @@ namespace tera { namespace load_balancer { class EmptyAction : public Action { -public: - EmptyAction(); - virtual ~EmptyAction(); + public: + EmptyAction(); + virtual ~EmptyAction(); - virtual Action* UndoAction() override; + virtual Action* UndoAction() override; - virtual std::string ToString() const override; + virtual std::string ToString() const override; }; class MoveAction : public Action { -public: - MoveAction(uint32_t tablet_index, - uint32_t source_node_index, - uint32_t dest_node_index, - const std::string& generator); + public: + MoveAction(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index, + const std::string& generator); - virtual ~MoveAction(); + virtual ~MoveAction(); - virtual Action* UndoAction() override; + virtual Action* UndoAction() override; - virtual std::string ToString() const override; + virtual std::string ToString() const override; -public: - uint32_t tablet_index_; - uint32_t source_node_index_; - uint32_t dest_node_index_; + public: + uint32_t tablet_index_; + uint32_t source_node_index_; + uint32_t dest_node_index_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_ACTIONS_H_ +#endif // TERA_LOAD_BALANCER_ACTIONS_H_ diff --git a/src/load_balancer/balancer.h b/src/load_balancer/balancer.h index 2ad1727ea..976fa414d 100644 --- a/src/load_balancer/balancer.h +++ b/src/load_balancer/balancer.h @@ -18,22 +18,22 @@ namespace tera { namespace load_balancer { class Balancer { -public: - virtual ~Balancer() {} - - // balance the whole cluster - virtual bool BalanceCluster( - const std::vector>& lb_nodes, - std::vector* plans) = 0; - - // balance for the specified table - virtual bool BalanceCluster( - const std::string& table_name, - const std::vector>& lb_nodes, - std::vector* plans) = 0; + public: + virtual ~Balancer() {} + + // balance the whole cluster + virtual bool BalanceCluster(const std::vector>& lb_nodes, + std::vector* plans) = 0; + + // balance for the specified table + virtual bool BalanceCluster(const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) = 0; + + virtual std::string GetName() = 0; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_BALANCER_H_ +#endif // TERA_LOAD_BALANCER_BALANCER_H_ diff --git a/src/load_balancer/cluster.cc b/src/load_balancer/cluster.cc index cbb9825ff..a48000f42 100644 --- a/src/load_balancer/cluster.cc +++ b/src/load_balancer/cluster.cc @@ -16,530 +16,713 @@ namespace tera { namespace load_balancer { Cluster::Cluster(const std::vector>& lb_nodes, - const LBOptions& options) : - meta_table_node_index_(std::numeric_limits::max()), - lb_options_(options) { - int64_t start_time_ns = get_micros(); - - for (const auto& node : lb_nodes) { - if (lb_options_.meta_table_isolate_enabled && - node->tablet_node_ptr->GetAddr() == lb_options_.meta_table_node_addr) { - VLOG(5) << "skip meta table node:" << lb_options_.meta_table_node_addr; - } else { - lb_nodes_.emplace_back(node); - } - } + const LBOptions& options, bool skip_meta_node) + : meta_table_node_index_(std::numeric_limits::max()), lb_options_(options) { + int64_t start_time_ns = get_micros(); + + lb_nodes_.reserve(lb_nodes.size()); + for (const auto& node : lb_nodes) { + if (skip_meta_node && node->tablet_node_ptr->GetAddr() == lb_options_.meta_table_node_addr) { + VLOG(5) << "skip meta table node:" << lb_options_.meta_table_node_addr; + } else { + lb_nodes_.emplace_back(node); + } + } + + table_num_ = 0; + tablet_node_num_ = 0; + tablet_num_ = 0; + tablet_moved_num_ = 0; + + for (const auto& node : lb_nodes_) { + uint32_t node_index = nodes_.size(); + nodes_[node_index] = node; + + std::string addr = node->tablet_node_ptr->GetAddr(); + assert(nodes_to_index_.find(addr) == nodes_to_index_.end()); + nodes_to_index_[addr] = node_index; + + tablets_per_node_[node_index].clear(); + initial_tablets_not_ready_per_node_[node_index].clear(); + size_per_node_[node_index] = 0; + flash_size_per_node_[node_index] = 0; + read_load_per_node_[node_index] = 0; + write_load_per_node_[node_index] = 0; + scan_load_per_node_[node_index] = 0; + lread_per_node_[node_index] = 0; + + uint64_t read_pending_num = node->tablet_node_ptr->GetReadPending(); + read_pending_per_node_[node_index] = read_pending_num; + read_load_per_node_[node_index] += lb_options_.read_pending_factor * read_pending_num; + + uint64_t write_pending_num = node->tablet_node_ptr->GetWritePending(); + write_pending_per_node_[node_index] = write_pending_num; + write_load_per_node_[node_index] += lb_options_.write_pending_factor * write_pending_num; + + uint64_t scan_pending_num = node->tablet_node_ptr->GetScanPending(); + scan_pending_per_node_[node_index] = scan_pending_num; + scan_load_per_node_[node_index] += lb_options_.scan_pending_factor * scan_pending_num; + + for (const auto& tablet : node->tablets) { + uint32_t tablet_index = tablets_.size(); + + RegisterTablet(tablet, tablet_index, node_index); + + tablets_per_node_[node_index].emplace_back(tablet_index); + if (tablets_[tablet_index]->tablet_ptr->GetStatus() != TabletMeta::kTabletReady) { + initial_tablets_not_ready_per_node_[node_index].emplace_back(tablet_index); + } + size_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetDataSize()); + flash_size_per_node_[node_index] += + static_cast(tablet->tablet_ptr->GetDataSizeOnFlash()); + read_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetReadQps()); + write_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetWriteQps()); + scan_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetScanQps()); + lread_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetLRead()); + + ++tablet_num_; + } + + ++tablet_node_num_; + } + + // if not ready tablets' ratio is higher than option, the node is considered + // abnormal + for (uint32_t i = 0; i < tablets_per_node_.size(); ++i) { + if (tablets_per_node_[i].size() != 0) { + double note_ready_num = static_cast(initial_tablets_not_ready_per_node_[i].size()); + double total_num = static_cast(tablets_per_node_[i].size()); + if (note_ready_num / total_num >= lb_options_.abnormal_node_ratio) { + abnormal_nodes_index_.insert(i); + } + } + } + + assert(table_num_ == tables_.size()); + assert(tablet_node_num_ == nodes_.size()); + assert(tablet_num_ == tablets_.size()); + + assert(table_num_ == tables_to_index_.size()); + assert(tablet_node_num_ == nodes_to_index_.size()); + assert(tablet_num_ == tablets_to_index_.size()); + + assert(tablet_num_ == tablet_index_to_node_index_.size()); + assert(tablet_num_ == initial_tablet_index_to_node_index_.size()); + assert(tablet_num_ == tablet_index_to_table_index_.size()); + + assert(tablet_node_num_ == tablets_per_node_.size()); + assert(tablet_node_num_ == initial_tablets_not_ready_per_node_.size()); + assert(tablet_node_num_ == size_per_node_.size()); + assert(tablet_node_num_ == flash_size_per_node_.size()); + assert(tablet_node_num_ == read_load_per_node_.size()); + assert(tablet_node_num_ == write_load_per_node_.size()); + assert(tablet_node_num_ == scan_load_per_node_.size()); + assert(tablet_node_num_ == lread_per_node_.size()); + assert(tablet_node_num_ == read_pending_per_node_.size()); + assert(tablet_node_num_ == write_pending_per_node_.size()); + assert(tablet_node_num_ == scan_pending_per_node_.size()); + assert(tablet_node_num_ >= abnormal_nodes_index_.size()); + + VLOG(20) << "[lb] construct Cluster cost time(ms):" << (get_micros() - start_time_ns) / 1000; +} - table_num_ = 0; - tablet_node_num_ = 0; - tablet_num_ = 0; - tablet_moved_num_ = 0; - - for (const auto& node : lb_nodes_) { - uint32_t node_index = nodes_.size(); - nodes_[node_index] = node; - - std::string addr = node->tablet_node_ptr->GetAddr(); - assert(nodes_to_index_.find(addr) == nodes_to_index_.end()); - nodes_to_index_[addr] = node_index; - - tablets_per_node_[node_index].clear(); - initial_tablets_not_ready_per_node_[node_index].clear(); - size_per_node_[node_index] = 0; - read_load_per_node_[node_index] = 0; - write_load_per_node_[node_index] = 0; - scan_load_per_node_[node_index] = 0; - - node_index_sorted_by_tablet_count_.emplace_back(node_index); - node_index_sorted_by_size_.emplace_back(node_index); - node_index_sorted_by_read_load_.emplace_back(node_index); - node_index_sorted_by_write_load_.emplace_back(node_index); - node_index_sorted_by_scan_load_.emplace_back(node_index); - - uint64_t read_pending_num = node->tablet_node_ptr->GetReadPending(); - if (read_pending_num > 0) { - read_pending_nodes_index_.insert(node_index); - read_load_per_node_[node_index] += lb_options_.read_pending_factor * read_pending_num; - } - - uint64_t write_pending_num = node->tablet_node_ptr->GetWritePending(); - if (write_pending_num > 0) { - write_pending_nodes_index_.insert(node_index); - write_load_per_node_[node_index] += lb_options_.write_pending_factor * write_pending_num; - } - - uint64_t scan_pending_num = node->tablet_node_ptr->GetScanPending(); - if (scan_pending_num > 0) { - scan_pending_nodes_index_.insert(node_index); - scan_load_per_node_[node_index] += lb_options_.scan_pending_factor * scan_pending_num; - } - - for (const auto& tablet : node->tablets) { - uint32_t tablet_index = tablets_.size(); - - RegisterTablet(tablet, tablet_index, node_index); - - tablets_per_node_[node_index].emplace_back(tablet_index); - if (tablets_[tablet_index]->tablet_ptr->GetStatus() != TabletMeta::kTabletReady) { - initial_tablets_not_ready_per_node_[node_index].emplace_back(tablet_index); - } - size_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetDataSize()); - read_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetReadQps()); - write_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetWriteQps()); - scan_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetScanQps()); - - ++tablet_num_; - } - - ++ tablet_node_num_; - } +Cluster::~Cluster() {} + +void Cluster::DebugCluster() { + LOG(INFO) << ""; + LOG(INFO) << "DebugCluster begin -----"; + + LOG(INFO) << "table_num_:" << table_num_; + LOG(INFO) << "tablet_node_num_:" << tablet_node_num_; + LOG(INFO) << "tablet_num_:" << tablet_num_; + LOG(INFO) << "tablet_moved_num_:" << tablet_moved_num_; + + LOG(INFO) << "[table_index -> table]:"; + for (const auto& table : tables_) { + LOG(INFO) << table.first << " -> " << table.second; + } + + LOG(INFO) << "[node_index -> node]:"; + for (const auto& node : nodes_) { + LOG(INFO) << node.first << " -> " << node.second->tablet_node_ptr->GetAddr(); + } + LOG(INFO) << "meta_table_node_index_:" << meta_table_node_index_; + + LOG(INFO) << "[tablet_index -> tablet]:"; + for (const auto& tablet : tablets_) { + LOG(INFO) << tablet.first << " -> " << tablet.second->tablet_ptr->GetPath(); + } + + LOG(INFO) << "[table -> table_index]:"; + for (const auto& table : tables_to_index_) { + LOG(INFO) << table.first << " -> " << table.second; + } + + LOG(INFO) << "[node -> node_index]:"; + for (const auto& node : nodes_to_index_) { + LOG(INFO) << node.first << " -> " << node.second; + } + + LOG(INFO) << "[tablet -> tablet_index]:"; + for (const auto& tablet : tablets_to_index_) { + LOG(INFO) << tablet.first << " -> " << tablet.second; + } + + LOG(INFO) << "[tablet_index -> node_index]:"; + for (const auto& it : tablet_index_to_node_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[initial tablet_index -> node_index]:"; + for (const auto& it : initial_tablet_index_to_node_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[tablet_index -> table_index]:"; + for (const auto& it : tablet_index_to_table_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> tablets index]:"; + for (const auto& it : tablets_per_node_) { + std::string line = std::to_string(it.first) + " ->"; + for (const auto tablet : it.second) { + line += " "; + line += std::to_string(tablet); + } + LOG(INFO) << line; + } + + LOG(INFO) << "[node_index -> data size]:"; + for (const auto& it : size_per_node_) { + LOG(INFO) << it.first << " -> " << it.second << "B"; + } + + LOG(INFO) << "[node_index -> flash data size]:"; + for (const auto& it : flash_size_per_node_) { + LOG(INFO) << it.first << " -> " << it.second << "B"; + } + + LOG(INFO) << "[node_index -> read load]:"; + for (const auto& it : read_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> write load]:"; + for (const auto& it : write_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> scan load]:"; + for (const auto& it : scan_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> lread]:"; + for (const auto& it : lread_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[read pending nodes index]:"; + for (const auto& it : read_pending_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[write pending nodes index]:"; + for (const auto& it : write_pending_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[scan pending nodes index]:"; + for (const auto& it : scan_pending_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> not ready tablets index]:"; + for (const auto& it : initial_tablets_not_ready_per_node_) { + std::string line = std::to_string(it.first) + " ->"; + for (const auto tablet : it.second) { + line += " "; + line += std::to_string(tablet); + } + LOG(INFO) << line; + } + + LOG(INFO) << "[abnormal nodes index]:"; + for (const auto& node : abnormal_nodes_index_) { + LOG(INFO) << node; + } + + LOG(INFO) << "DebugCluster end -----"; + LOG(INFO) << ""; +} + +bool Cluster::ValidAction(const std::shared_ptr& action) { + switch (action->GetType()) { + case Action::Type::EMPTY: + return false; + case Action::Type::ASSIGN: + return true; + case Action::Type::MOVE: + return true; + case Action::Type::SWAP: + return true; + default: + return false; + } +} + +void Cluster::DoAction(const std::shared_ptr& action) { + switch (action->GetType()) { + case Action::Type::EMPTY: + break; + case Action::Type::ASSIGN: + break; + case Action::Type::MOVE: { + MoveAction* move_action = dynamic_cast(action.get()); + VLOG(20) << "[lb] DoAction: " << move_action->ToString(); + assert(move_action->source_node_index_ != move_action->dest_node_index_); + + RemoveTablet(move_action->tablet_index_, move_action->source_node_index_); + AddTablet(move_action->tablet_index_, move_action->dest_node_index_); + MoveTablet(move_action->tablet_index_, move_action->source_node_index_, + move_action->dest_node_index_); + + break; + } + case Action::Type::SWAP: + break; + default: + break; + } +} + +void Cluster::SortNodesByTabletCount(std::vector* sorted_node_index) { + sorted_node_index->clear(); - // if not ready tablets' ratio is higher than option, the node is considered abnormal - for (uint32_t i = 0; i < tablets_per_node_.size(); ++i) { - if (tablets_per_node_[i].size() != 0) { - double note_ready_num = static_cast(initial_tablets_not_ready_per_node_[i].size()); - double total_num = static_cast(tablets_per_node_[i].size()); - if (note_ready_num / total_num >= lb_options_.abnormal_node_ratio) { - abnormal_nodes_index_.insert(i); - } - } + for (const auto& e : tablets_per_node_) { + sorted_node_index->emplace_back(e.first); + } + + std::sort(sorted_node_index->begin(), sorted_node_index->end(), [this](int a, int b) { + return tablets_per_node_[a].size() < tablets_per_node_[b].size(); + }); +} + +void Cluster::SortNodesBySize(std::vector* sorted_node_index) { + sorted_node_index->clear(); + + for (const auto& e : size_per_node_) { + sorted_node_index->emplace_back(e.first); + } + + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return size_per_node_[a] < size_per_node_[b]; }); +} + +void Cluster::SortNodesByFlashSizePercent(std::vector* sorted_node_index) { + sorted_node_index->clear(); + + for (const auto& e : flash_size_per_node_) { + if (nodes_[e.first]->tablet_node_ptr->GetPersistentCacheSize() <= 0) { + continue; } + sorted_node_index->emplace_back(e.first); + } + + std::sort(sorted_node_index->begin(), sorted_node_index->end(), [this](int a, int b) { + assert(nodes_[a]->tablet_node_ptr->GetPersistentCacheSize() > 0); + assert(nodes_[b]->tablet_node_ptr->GetPersistentCacheSize() > 0); + return static_cast(flash_size_per_node_[a]) / + nodes_[a]->tablet_node_ptr->GetPersistentCacheSize() < + static_cast(flash_size_per_node_[b]) / + nodes_[b]->tablet_node_ptr->GetPersistentCacheSize(); + }); +} - assert(table_num_ == tables_.size()); - assert(tablet_node_num_ == nodes_.size()); - assert(tablet_num_ == tablets_.size()); +void Cluster::SortNodesByReadLoad(std::vector* sorted_node_index) { + sorted_node_index->clear(); - assert(table_num_ == tables_to_index_.size()); - assert(tablet_node_num_ == nodes_to_index_.size()); - assert(tablet_num_ == tablets_to_index_.size()); + for (const auto& e : read_load_per_node_) { + sorted_node_index->emplace_back(e.first); + } - assert(tablet_num_ == tablet_index_to_node_index_.size()); - assert(tablet_num_ == initial_tablet_index_to_node_index_.size()); - assert(tablet_num_ == tablet_index_to_table_index_.size()); + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return read_load_per_node_[a] < read_load_per_node_[b]; }); +} - assert(tablet_node_num_ == tablets_per_node_.size()); - assert(tablet_node_num_ == initial_tablets_not_ready_per_node_.size()); - assert(tablet_node_num_ == size_per_node_.size()); - assert(tablet_node_num_ == read_load_per_node_.size()); - assert(tablet_node_num_ == write_load_per_node_.size()); - assert(tablet_node_num_ == scan_load_per_node_.size()); - assert(abnormal_nodes_index_.size() <= tablet_node_num_); - assert(read_pending_nodes_index_.size() <= tablet_node_num_); - assert(write_pending_nodes_index_.size() <= tablet_node_num_); - assert(scan_pending_nodes_index_.size() <= tablet_node_num_); +void Cluster::SortNodesByWriteLoad(std::vector* sorted_node_index) { + sorted_node_index->clear(); - assert(tablet_node_num_ == node_index_sorted_by_tablet_count_.size()); - assert(tablet_node_num_ == node_index_sorted_by_size_.size()); - assert(tablet_node_num_ == node_index_sorted_by_read_load_.size()); - assert(tablet_node_num_ == node_index_sorted_by_write_load_.size()); - assert(tablet_node_num_ == node_index_sorted_by_scan_load_.size()); + for (const auto& e : write_load_per_node_) { + sorted_node_index->emplace_back(e.first); + } - VLOG(20) << "[lb] construct Cluster cost time(ms):" << (get_micros() - start_time_ns) / 1000; + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return write_load_per_node_[a] < write_load_per_node_[b]; }); } -Cluster::~Cluster() { +void Cluster::SortNodesByScanLoad(std::vector* sorted_node_index) { + sorted_node_index->clear(); + + for (const auto& e : scan_load_per_node_) { + sorted_node_index->emplace_back(e.first); + } + + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return scan_load_per_node_[a] < scan_load_per_node_[b]; }); } -void Cluster::DebugCluster() { - LOG(INFO) << ""; - LOG(INFO) << "DebugCluster begin -----"; +void Cluster::SortNodesByLRead(std::vector* sorted_node_index) { + sorted_node_index->clear(); - LOG(INFO) << "table_num_:" << table_num_; - LOG(INFO) << "tablet_node_num_:" << tablet_node_num_; - LOG(INFO) << "tablet_num_:" << tablet_num_; - LOG(INFO) << "tablet_moved_num_:" << tablet_moved_num_; + for (const auto& e : lread_per_node_) { + sorted_node_index->emplace_back(e.first); + } - LOG(INFO) << "[table_index -> table]:"; - for (const auto& table : tables_) { - LOG(INFO) << table.first << " -> " << table.second; - } + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return lread_per_node_[a] < lread_per_node_[b]; }); +} - LOG(INFO) << "[node_index -> node]:"; - for (const auto& node : nodes_) { - LOG(INFO) << node.first << " -> " << node.second->tablet_node_ptr->GetAddr(); - } - LOG(INFO) << "meta_table_node_index_:" << meta_table_node_index_; +void Cluster::SortNodesByComplexLoad(std::vector* sorted_node_index) { + sorted_node_index->clear(); - LOG(INFO) << "[tablet_index -> tablet]:"; - for (const auto& tablet : tablets_) { - LOG(INFO) << tablet.first << " -> " << tablet.second->tablet_ptr->GetPath(); - } + for (const auto& e : size_per_node_) { + sorted_node_index->emplace_back(e.first); + } - LOG(INFO) << "[table -> table_index]:"; - for (const auto& table : tables_to_index_) { - LOG(INFO) << table.first << " -> " << table.second; - } + std::sort(sorted_node_index->begin(), sorted_node_index->end(), + [this](int a, int b) { return size_per_node_[a] < size_per_node_[b]; }); +} - LOG(INFO) << "[node -> node_index]:"; - for (const auto& node : nodes_to_index_) { - LOG(INFO) << node.first << " -> " << node.second; - } +void Cluster::SortTabletsOfNodeByReadLoad(uint32_t node_index, + std::vector* sorted_tablet_index) { + sorted_tablet_index->clear(); - LOG(INFO) << "[tablet -> tablet_index]:"; - for (const auto& tablet : tablets_to_index_) { - LOG(INFO) << tablet.first << " -> " << tablet.second; - } + assert(tablets_per_node_.find(node_index) != tablets_per_node_.end()); + for (const auto& e : tablets_per_node_[node_index]) { + sorted_tablet_index->emplace_back(e); + } - LOG(INFO) << "[tablet_index -> node_index]:"; - for (const auto& it : tablet_index_to_node_index_) { - LOG(INFO) << it.first << " -> " << it.second; - } + std::sort(sorted_tablet_index->begin(), sorted_tablet_index->end(), [this](int a, int b) { + return tablets_[a]->tablet_ptr->GetReadQps() < tablets_[b]->tablet_ptr->GetReadQps(); + }); +} - LOG(INFO) << "[initial tablet_index -> node_index]:"; - for (const auto& it : initial_tablet_index_to_node_index_) { - LOG(INFO) << it.first << " -> " << it.second; - } +void Cluster::SortTabletsOfNodeByWriteLoad(uint32_t node_index, + std::vector* sorted_tablet_index) { + sorted_tablet_index->clear(); - LOG(INFO) << "[tablet_index -> table_index]:"; - for (const auto& it : tablet_index_to_table_index_) { - LOG(INFO) << it.first << " -> " << it.second; - } + assert(tablets_per_node_.find(node_index) != tablets_per_node_.end()); + for (const auto& e : tablets_per_node_[node_index]) { + sorted_tablet_index->emplace_back(e); + } - LOG(INFO) << "[node_index -> tablets index]:"; - for (const auto& it : tablets_per_node_) { - std::string line = std::to_string(it.first) + " ->"; - for (const auto tablet : it.second) { - line += " "; - line += std::to_string(tablet); - } - LOG(INFO) << line; - } + std::sort(sorted_tablet_index->begin(), sorted_tablet_index->end(), [this](int a, int b) { + return tablets_[a]->tablet_ptr->GetWriteQps() < tablets_[b]->tablet_ptr->GetWriteQps(); + }); +} - LOG(INFO) << "[node_index -> data size]:"; - for (const auto& it : size_per_node_) { - LOG(INFO) << it.first << " -> " << it.second << "B"; - } +void Cluster::SortTabletsOfNodeByScanLoad(uint32_t node_index, + std::vector* sorted_tablet_index) { + sorted_tablet_index->clear(); - LOG(INFO) << "[node_index -> read load]:"; - for (const auto& it : read_load_per_node_) { - LOG(INFO) << it.first << " -> " << it.second; - } + assert(tablets_per_node_.find(node_index) != tablets_per_node_.end()); + for (const auto& e : tablets_per_node_[node_index]) { + sorted_tablet_index->emplace_back(e); + } - LOG(INFO) << "[node_index -> write load]:"; - for (const auto& it : write_load_per_node_) { - LOG(INFO) << it.first << " -> " << it.second; - } + std::sort(sorted_tablet_index->begin(), sorted_tablet_index->end(), [this](int a, int b) { + return tablets_[a]->tablet_ptr->GetScanQps() < tablets_[b]->tablet_ptr->GetScanQps(); + }); +} - LOG(INFO) << "[node_index -> scan load]:"; - for (const auto& it : scan_load_per_node_) { - LOG(INFO) << it.first << " -> " << it.second; - } +void Cluster::SortTabletsOfNodeByLRead(uint32_t node_index, + std::vector* sorted_tablet_index) { + sorted_tablet_index->clear(); - LOG(INFO) << "[tablets index of moved too frequently]:"; - for (const auto& tablet : tablets_moved_too_frequently_) { - LOG(INFO) << tablet; - } + assert(tablets_per_node_.find(node_index) != tablets_per_node_.end()); + for (const auto& e : tablets_per_node_[node_index]) { + sorted_tablet_index->emplace_back(e); + } - LOG(INFO) << "[node_index -> not ready tablets index]:"; - for (const auto& it : initial_tablets_not_ready_per_node_) { - std::string line = std::to_string(it.first) + " ->"; - for (const auto tablet : it.second) { - line += " "; - line += std::to_string(tablet); - } - LOG(INFO) << line; - } + std::sort(sorted_tablet_index->begin(), sorted_tablet_index->end(), [this](int a, int b) { + return tablets_[a]->tablet_ptr->GetLRead() < tablets_[b]->tablet_ptr->GetLRead(); + }); +} - LOG(INFO) << "[abnormal nodes index]:"; - for (const auto& node: abnormal_nodes_index_) { - LOG(INFO) << node; - } +bool Cluster::IsMetaNode(uint32_t node_index) { + assert(nodes_.find(node_index) != nodes_.end()); + return node_index == meta_table_node_index_; +} - LOG(INFO) << "[tablets index of moved to abnormal nodes]:"; - for (const auto& tablet : tablets_moved_to_abnormal_nodes_) { - LOG(INFO) << tablet; - } +bool Cluster::IsReadyNode(uint32_t node_index) { + assert(nodes_.find(node_index) != nodes_.end()); + if (nodes_[node_index]->tablet_node_ptr->GetState() == tera::master::kReady) { + return true; + } else { + return false; + } +} - LOG(INFO) << "[read pending nodes index]:"; - for (const auto& node: read_pending_nodes_index_) { - LOG(INFO) << node; - } +bool Cluster::IsReadPendingNode(uint32_t node_index) { + assert(read_pending_per_node_.find(node_index) != read_pending_per_node_.end()); + return read_pending_per_node_[node_index] > 0; +} - LOG(INFO) << "[tablets index of moved to read pending nodes]:"; - for (const auto& tablet : tablets_moved_to_read_pending_nodes_) { - LOG(INFO) << tablet; - } +bool Cluster::IsWritePendingNode(uint32_t node_index) { + assert(write_pending_per_node_.find(node_index) != write_pending_per_node_.end()); + return write_pending_per_node_[node_index] > 0; +} - LOG(INFO) << "[write pending nodes index]:"; - for (const auto& node: write_pending_nodes_index_) { - LOG(INFO) << node; - } +bool Cluster::IsScanPendingNode(uint32_t node_index) { + assert(scan_pending_per_node_.find(node_index) != scan_pending_per_node_.end()); + return scan_pending_per_node_[node_index] > 0; +} - LOG(INFO) << "[tablets index of moved to write pending nodes]:"; - for (const auto& tablet : tablets_moved_to_write_pending_nodes_) { - LOG(INFO) << tablet; - } +bool Cluster::IsPendingNode(uint32_t node_index) { + if (IsReadPendingNode(node_index) || IsWritePendingNode(node_index) || + IsScanPendingNode(node_index)) { + return true; + } else { + return false; + } +} - LOG(INFO) << "[scan pending nodes index]:"; - for (const auto& node: scan_pending_nodes_index_) { - LOG(INFO) << node; - } +bool Cluster::IsHeavyReadPendingNode(uint32_t node_index) { + assert(read_pending_per_node_.find(node_index) != read_pending_per_node_.end()); + return read_pending_per_node_[node_index] >= lb_options_.heavy_read_pending_threshold; +} - LOG(INFO) << "[tablets index of moved to scan pending nodes]:"; - for (const auto& tablet : tablets_moved_to_scan_pending_nodes_) { - LOG(INFO) << tablet; - } +bool Cluster::IsHeavyWritePendingNode(uint32_t node_index) { + assert(write_pending_per_node_.find(node_index) != write_pending_per_node_.end()); + return write_pending_per_node_[node_index] >= lb_options_.heavy_write_pending_threshold; +} - LOG(INFO) << "DebugCluster end -----"; - LOG(INFO) << ""; +bool Cluster::IsHeavyScanPendingNode(uint32_t node_index) { + assert(scan_pending_per_node_.find(node_index) != scan_pending_per_node_.end()); + return scan_pending_per_node_[node_index] >= lb_options_.heavy_scan_pending_threshold; } -bool Cluster::ValidAction(const std::shared_ptr& action) { - switch (action->GetType()) { - case Action::Type::EMPTY: - return false; - case Action::Type::ASSIGN: - return true; - case Action::Type::MOVE: { - MoveAction* move_action = dynamic_cast(action.get()); - if (tablets_[move_action->tablet_index_]->tablet_ptr->GetStatus() != TabletMeta::kTabletReady) { - VLOG(20) << "[lb] invalid action, reason:tablet not ready, tablet status:" - << StatusCodeToString(tablets_[move_action->tablet_index_]->tablet_ptr->GetStatus()); - return false; - } - - if (tables_[tablet_index_to_table_index_[move_action->tablet_index_]] == - lb_options_.meta_table_name) { - VLOG(20) << "[lb] invalid action, reason:move meta table"; - return false; - } - - if (lb_options_.meta_table_isolate_enabled && - move_action->dest_node_index_ == meta_table_node_index_) { - VLOG(20) << "[lb] invalid action, reason:move tablet to meta table node"; - return false; - } - - return true; - } - case Action::Type::SWAP: - return true;; - default: - return false; - } +bool Cluster::IsHeavyPendingNode(uint32_t node_index) { + if (IsHeavyReadPendingNode(node_index) || IsHeavyWritePendingNode(node_index) || + IsHeavyScanPendingNode(node_index)) { + return true; + } else { + return false; + } } -void Cluster::DoAction(const std::shared_ptr& action) { - switch (action->GetType()) { - case Action::Type::EMPTY: - break; - case Action::Type::ASSIGN: - break; - case Action::Type::MOVE: { - MoveAction* move_action = dynamic_cast(action.get()); - VLOG(20) << "[lb] DoAction: " << move_action->ToString(); - assert(move_action->source_node_index_ != move_action->dest_node_index_); - - RemoveTablet(move_action->tablet_index_, move_action->source_node_index_); - AddTablet(move_action->tablet_index_, move_action->dest_node_index_); - MoveTablet(move_action->tablet_index_, move_action->source_node_index_, move_action->dest_node_index_); - - break; - } - case Action::Type::SWAP: - break; - default: - break; +uint32_t Cluster::HeavyPendingNodeNum() { + uint32_t num = 0; + for (auto& it : nodes_) { + if (IsHeavyPendingNode(it.first)) { + ++num; } + } + return num; } -void Cluster::SortNodesByTabletCount() { - std::sort( - node_index_sorted_by_tablet_count_.begin(), - node_index_sorted_by_tablet_count_.end(), - [this](int a, int b) { - return tablets_per_node_[a].size() < tablets_per_node_[b].size(); - }); -} - -void Cluster::SortNodesBySize() { - std::sort( - node_index_sorted_by_size_.begin(), - node_index_sorted_by_size_.end(), - [this](int a, int b) { - return size_per_node_[a] < size_per_node_[b]; - }); -} - -void Cluster::SortNodesByReadLoad() { - std::sort( - node_index_sorted_by_read_load_.begin(), - node_index_sorted_by_read_load_.end(), - [this](int a, int b) { - return read_load_per_node_[a] < read_load_per_node_[b]; - }); -} - -void Cluster::SortNodesByWriteLoad() { - std::sort( - node_index_sorted_by_write_load_.begin(), - node_index_sorted_by_write_load_.end(), - [this](int a, int b) { - return write_load_per_node_[a] < write_load_per_node_[b]; - }); -} - -void Cluster::SortNodesByScanLoad() { - std::sort( - node_index_sorted_by_scan_load_.begin(), - node_index_sorted_by_scan_load_.end(), - [this](int a, int b) { - return scan_load_per_node_[a] < scan_load_per_node_[b]; - }); -} - -void Cluster::RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, uint32_t node_index) { - std::string table_name = tablet->tablet_ptr->GetTableName(); - if (tables_to_index_.find(table_name) == tables_to_index_.end()) { - uint32_t table_index = tables_.size(); - tables_[table_index] = table_name; - tables_to_index_[table_name] = table_index; - ++table_num_; - - if (table_name == lb_options_.meta_table_name) { - meta_table_node_index_ = node_index; - } - } +bool Cluster::IsHeavyLReadNode(uint32_t node_index) { + assert(lread_per_node_.find(node_index) != lread_per_node_.end()); + if (lread_per_node_[node_index] >= lb_options_.heavy_lread_threshold) { + return true; + } else { + return false; + } +} + +bool Cluster::IsAbnormalNode(uint32_t node_index) { + if (abnormal_nodes_index_.find(node_index) == abnormal_nodes_index_.end()) { + return false; + } else { + return true; + } +} + +bool Cluster::IsFlashSizeEnough(uint32_t tablet_index, uint32_t node_index) { + assert(tablets_.find(tablet_index) != tablets_.end()); + assert(nodes_.find(node_index) != nodes_.end()); - std::string path = tablet->tablet_ptr->GetPath(); - tablets_to_index_[path] = tablet_index; - tablets_[tablet_index] = tablet; + if (lb_options_.flash_size_cost_weight == 0) { + return true; + } - tablet_index_to_node_index_[tablet_index] = node_index; - initial_tablet_index_to_node_index_[tablet_index] = node_index; - tablet_index_to_table_index_[tablet_index] = tables_to_index_[table_name]; + if (tablets_[tablet_index]->tablet_ptr->HasFlashLg() && + nodes_[node_index]->tablet_node_ptr->GetPersistentCacheSize() == 0) { + return false; + } + + return true; } -void Cluster::AddTablet(uint32_t tablet_index, uint32_t to_node_index) { - tablets_per_node_[to_node_index].emplace_back(tablet_index); +bool Cluster::IsProperLocation(uint32_t tablet_index, uint32_t node_index) { + if (lb_options_.meta_table_isolate_enabled && IsMetaNode(node_index)) { + return false; + } + + if (!IsReadyNode(node_index)) { + return false; + } - size_per_node_[to_node_index] += static_cast( - tablets_[tablet_index]->tablet_ptr->GetDataSize()); - read_load_per_node_[to_node_index] += static_cast( - tablets_[tablet_index]->tablet_ptr->GetReadQps()); - write_load_per_node_[to_node_index] += static_cast( - tablets_[tablet_index]->tablet_ptr->GetWriteQps()); - scan_load_per_node_[to_node_index] += static_cast( - tablets_[tablet_index]->tablet_ptr->GetScanQps()); + if (IsPendingNode(node_index)) { + return false; + } + + if (IsHeavyLReadNode(node_index)) { + return false; + } + + if (IsAbnormalNode(node_index)) { + return false; + } + + if (!IsFlashSizeEnough(tablet_index, node_index)) { + return false; + } + + return true; } -void Cluster::RemoveTablet(uint32_t tablet_index, uint32_t from_node_index) { - if (tablets_per_node_.find(from_node_index) == tablets_per_node_.end()) { - return; - } - auto& tablets = tablets_per_node_[from_node_index]; - for (auto it = tablets.begin(); it != tablets.end();) { - if (*it == tablet_index) { - it = tablets.erase(it); - break; - } else { - ++it; - } - } +bool Cluster::IsMetaTablet(uint32_t tablet_index) { + assert(tablets_.find(tablet_index) != tablets_.end()); - size_per_node_[from_node_index] -= static_cast( - tablets_[tablet_index]->tablet_ptr->GetDataSize()); - read_load_per_node_[from_node_index] -= static_cast( - tablets_[tablet_index]->tablet_ptr->GetReadQps()); - write_load_per_node_[from_node_index] -= static_cast( - tablets_[tablet_index]->tablet_ptr->GetWriteQps()); - scan_load_per_node_[from_node_index] -= static_cast( - tablets_[tablet_index]->tablet_ptr->GetScanQps()); - - assert(size_per_node_[from_node_index] >= 0); - assert(read_load_per_node_[from_node_index] >= 0); - assert(write_load_per_node_[from_node_index] >= 0); - assert(scan_load_per_node_[from_node_index] >= 0); -} - -void Cluster::MoveTablet(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index) { - tablet_index_to_node_index_[tablet_index] = dest_node_index; - - if (initial_tablet_index_to_node_index_[tablet_index] == source_node_index) { - ++tablet_moved_num_; - - int64_t last_move_time_us = tablets_[tablet_index]->tablet_ptr->LastMoveTime(); - int64_t current_time_us = get_micros(); - if (current_time_us - last_move_time_us < - 1000000 * static_cast(lb_options_.tablet_move_too_frequently_threshold_s)) { - tablets_moved_too_frequently_.insert(tablet_index); - VLOG(20) << "[lb] add tablet moved too frequently, tablet index: " << tablet_index - << ", last_move_time: " << last_move_time_us << ", current time: " << current_time_us - << ", tablets_moved_too_frequently_ size: " << tablets_moved_too_frequently_.size(); - } - } else if (initial_tablet_index_to_node_index_[tablet_index] == dest_node_index) { - // tablet moved back - --tablet_moved_num_; - assert(tablet_moved_num_ >= 0); - - if (tablets_moved_too_frequently_.find(tablet_index) != tablets_moved_too_frequently_.end()) { - tablets_moved_too_frequently_.erase(tablet_index); - VLOG(20) << "[lb] remove tablet moved too frequently, tablet index: " << tablet_index - << ", tablets_moved_too_frequently_ size: " << tablets_moved_too_frequently_.size(); - } - } else { - } + if (tables_[tablet_index_to_table_index_[tablet_index]] == lb_options_.meta_table_name) { + return true; + } else { + return false; + } +} - if (abnormal_nodes_index_.find(dest_node_index) != abnormal_nodes_index_.end() && - dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { - tablets_moved_to_abnormal_nodes_.insert(tablet_index); - VLOG(20) << "[lb] add tablet moved to abnormal node, tablet index: " << tablet_index - << ", node index: " << dest_node_index - << ", tablets_moved_to_abnormal_nodes_ size: " << tablets_moved_to_abnormal_nodes_.size(); - } else if (abnormal_nodes_index_.find(source_node_index) != abnormal_nodes_index_.end()) { - if (tablets_moved_to_abnormal_nodes_.find(tablet_index) != tablets_moved_to_abnormal_nodes_.end()) { - tablets_moved_to_abnormal_nodes_.erase(tablet_index); - VLOG(20) << "[lb] remove tablet moved to abnormal nodes, tablet index: " << tablet_index - << ", tablets_moved_to_abnormal_nodes_ size: " << tablets_moved_to_abnormal_nodes_.size(); - } - } else { - } +bool Cluster::IsReadyTablet(uint32_t tablet_index) { + assert(tablets_.find(tablet_index) != tablets_.end()); - if (read_pending_nodes_index_.find(dest_node_index) != read_pending_nodes_index_.end() && - dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { - tablets_moved_to_read_pending_nodes_.insert(tablet_index); - VLOG(20) << "[lb] add tablet moved to read pending node, tablet index: " << tablet_index - << ", node index: " << dest_node_index - << ", tablets_moved_to_read_pending_nodes_ size: " << tablets_moved_to_read_pending_nodes_.size(); - } else if (read_pending_nodes_index_.find(source_node_index) != read_pending_nodes_index_.end()) { - if (tablets_moved_to_read_pending_nodes_.find(tablet_index) != tablets_moved_to_read_pending_nodes_.end()) { - tablets_moved_to_read_pending_nodes_.erase(tablet_index); - VLOG(20) << "[lb] remove tablet moved to read pending nodes, tablet index: " << tablet_index - << ", tablets_moved_to_read_pending_nodes_ size: " << tablets_moved_to_read_pending_nodes_.size(); - } - } else { - } + if (tablets_[tablet_index]->tablet_ptr->GetStatus() == TabletMeta::kTabletReady) { + return true; + } else { + return false; + } +} - if (write_pending_nodes_index_.find(dest_node_index) != write_pending_nodes_index_.end() && - dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { - tablets_moved_to_write_pending_nodes_.insert(tablet_index); - VLOG(20) << "[lb] add tablet moved to write pending node, tablet index: " << tablet_index - << ", node index: " << dest_node_index - << ", tablets_moved_to_write_pending_nodes_ size: " << tablets_moved_to_write_pending_nodes_.size(); - } else if (write_pending_nodes_index_.find(source_node_index) != write_pending_nodes_index_.end()) { - if (tablets_moved_to_write_pending_nodes_.find(tablet_index) != tablets_moved_to_write_pending_nodes_.end()) { - tablets_moved_to_write_pending_nodes_.erase(tablet_index); - VLOG(20) << "[lb] remove tablet moved to write pending nodes, tablet index: " << tablet_index - << ", tablets_moved_to_write_pending_nodes_ size: " << tablets_moved_to_write_pending_nodes_.size(); - } - } else { +bool Cluster::IsTabletMoveTooFrequent(uint32_t tablet_index) { + assert(tablets_.find(tablet_index) != tablets_.end()); + + int64_t last_move_time_us = tablets_[tablet_index]->tablet_ptr->LastMoveTime(); + int64_t current_time_us = get_micros(); + if (current_time_us - last_move_time_us < + 1000000 * static_cast(lb_options_.tablet_move_too_frequently_threshold_s)) { + VLOG(20) << "[lb] tablet move too frequently, tablet index: " << tablet_index + << ", path: " << tablets_[tablet_index]->tablet_ptr->GetPath() + << ", last_move_time: " << last_move_time_us << ", current time: " << current_time_us; + return true; + } else { + return false; + } +} + +bool Cluster::IsProperTargetTablet(uint32_t tablet_index) { + if (IsMetaTablet(tablet_index)) { + return false; + } + + if (!IsReadyTablet(tablet_index)) { + return false; + } + + if (IsTabletMoveTooFrequent(tablet_index)) { + return false; + } + + return true; +} + +void Cluster::RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, + uint32_t node_index) { + std::string table_name = tablet->tablet_ptr->GetTableName(); + if (tables_to_index_.find(table_name) == tables_to_index_.end()) { + uint32_t table_index = tables_.size(); + tables_[table_index] = table_name; + tables_to_index_[table_name] = table_index; + ++table_num_; + + if (table_name == lb_options_.meta_table_name) { + meta_table_node_index_ = node_index; } + } + + std::string path = tablet->tablet_ptr->GetPath(); + tablets_to_index_[path] = tablet_index; + tablets_[tablet_index] = tablet; - if (scan_pending_nodes_index_.find(dest_node_index) != scan_pending_nodes_index_.end() && - dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { - tablets_moved_to_scan_pending_nodes_.insert(tablet_index); - VLOG(20) << "[lb] add tablet moved to scan pending node, tablet index: " << tablet_index - << ", node index: " << dest_node_index - << ", tablets_moved_to_scan_pending_nodes_ size: " << tablets_moved_to_scan_pending_nodes_.size(); - } else if (scan_pending_nodes_index_.find(source_node_index) != scan_pending_nodes_index_.end()) { - if (tablets_moved_to_scan_pending_nodes_.find(tablet_index) != tablets_moved_to_scan_pending_nodes_.end()) { - tablets_moved_to_scan_pending_nodes_.erase(tablet_index); - VLOG(20) << "[lb] remove tablet moved to scan pending nodes, tablet index: " << tablet_index - << ", tablets_moved_to_scan_pending_nodes_ size: " << tablets_moved_to_scan_pending_nodes_.size(); - } + tablet_index_to_node_index_[tablet_index] = node_index; + initial_tablet_index_to_node_index_[tablet_index] = node_index; + tablet_index_to_table_index_[tablet_index] = tables_to_index_[table_name]; +} + +void Cluster::AddTablet(uint32_t tablet_index, uint32_t to_node_index) { + tablets_per_node_[to_node_index].emplace_back(tablet_index); + + size_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetDataSize()); + flash_size_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetDataSizeOnFlash()); + read_load_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetReadQps()); + write_load_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetWriteQps()); + scan_load_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetScanQps()); + lread_per_node_[to_node_index] += + static_cast(tablets_[tablet_index]->tablet_ptr->GetLRead()); +} + +void Cluster::RemoveTablet(uint32_t tablet_index, uint32_t from_node_index) { + if (tablets_per_node_.find(from_node_index) == tablets_per_node_.end()) { + return; + } + auto& tablets = tablets_per_node_[from_node_index]; + for (auto it = tablets.begin(); it != tablets.end();) { + if (*it == tablet_index) { + it = tablets.erase(it); + break; } else { - } + ++it; + } + } + + size_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetDataSize()); + flash_size_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetDataSizeOnFlash()); + read_load_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetReadQps()); + write_load_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetWriteQps()); + scan_load_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetScanQps()); + lread_per_node_[from_node_index] -= + static_cast(tablets_[tablet_index]->tablet_ptr->GetLRead()); + + assert(size_per_node_[from_node_index] >= 0); + assert(flash_size_per_node_[from_node_index] >= 0); + assert(read_load_per_node_[from_node_index] >= 0); + assert(write_load_per_node_[from_node_index] >= 0); + assert(scan_load_per_node_[from_node_index] >= 0); + assert(lread_per_node_[from_node_index] >= 0); +} + +void Cluster::MoveTablet(uint32_t tablet_index, uint32_t source_node_index, + uint32_t dest_node_index) { + tablet_index_to_node_index_[tablet_index] = dest_node_index; + + if (initial_tablet_index_to_node_index_[tablet_index] == source_node_index) { + ++tablet_moved_num_; + } else if (initial_tablet_index_to_node_index_[tablet_index] == dest_node_index) { + // tablet moved back + --tablet_moved_num_; + assert(tablet_moved_num_ >= 0); + } } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/cluster.h b/src/load_balancer/cluster.h index 8a22acd7c..c18395578 100644 --- a/src/load_balancer/cluster.h +++ b/src/load_balancer/cluster.h @@ -19,112 +19,141 @@ namespace tera { namespace load_balancer { +const uint32_t kInvalidNodeIndex = std::numeric_limits::max(); +const uint32_t kInvalidTabletIndex = std::numeric_limits::max(); + class Cluster { -public: - Cluster(const std::vector>& tablet_nodes, - const LBOptions& options); - - virtual ~Cluster(); - - void DebugCluster(); - - bool ValidAction(const std::shared_ptr& action); - - void DoAction(const std::shared_ptr& action); - - void SortNodesByTabletCount(); - - void SortNodesBySize(); - - void SortNodesByReadLoad(); - - void SortNodesByWriteLoad(); - - void SortNodesByScanLoad(); - -private: - void RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, uint32_t node_index); - void AddTablet(uint32_t tablet_index, uint32_t to_node_index); - void RemoveTablet(uint32_t tablet_index, uint32_t from_node_index); - void MoveTablet(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index); - -// cluster info, use index to speed up the calculation -// make these info public also for speeding up -public: - uint32_t table_num_; - uint32_t tablet_node_num_; - uint32_t tablet_num_; - uint32_t tablet_moved_num_; - - // table_index -> table - std::map tables_; - // node_index -> node - std::map> nodes_; - // tablet_index -> tablet - std::map> tablets_; - - // table -> table_index - std::map tables_to_index_; - // node -> node_index - std::map nodes_to_index_; - // tablet -> tablet_index - std::map tablets_to_index_; - - // tablet_index -> node_index - std::map tablet_index_to_node_index_; - // initial tablet_index -> node_index, it's the initial cluster state - std::map initial_tablet_index_to_node_index_; - // tablet_index -> table_index - std::map tablet_index_to_table_index_; - - // node_index -> tablets index on the node - std::map> tablets_per_node_; - // node_index -> tablets index of not ready on the node - std::map> initial_tablets_not_ready_per_node_; - // abnormal nodes index - std::unordered_set abnormal_nodes_index_; - // index of tablets moved to abnormal nodes - std::unordered_set tablets_moved_to_abnormal_nodes_; - // read pending nodes index - std::unordered_set read_pending_nodes_index_; - // index of tablets moved to read pending nodes - std::unordered_set tablets_moved_to_read_pending_nodes_; - // write pending nodes index - std::unordered_set write_pending_nodes_index_; - // index of tablets moved to write pending nodes - std::unordered_set tablets_moved_to_write_pending_nodes_; - // scan pending nodes index - std::unordered_set scan_pending_nodes_index_; - // index of tablets moved to scan pending nodes - std::unordered_set tablets_moved_to_scan_pending_nodes_; - // node_index -> data size on the node - std::map size_per_node_; - // node_index -> read load on the node - std::map read_load_per_node_; - // node_index -> write load on the node - std::map write_load_per_node_; - // node_index -> scan load on the node - std::map scan_load_per_node_; - // tablets index of moved too frequently - std::unordered_set tablets_moved_too_frequently_; - - // meta table node index - uint32_t meta_table_node_index_; - - // for ActionGenerator - std::vector node_index_sorted_by_tablet_count_; - std::vector node_index_sorted_by_size_; - std::vector node_index_sorted_by_read_load_; - std::vector node_index_sorted_by_write_load_; - std::vector node_index_sorted_by_scan_load_; - - LBOptions lb_options_; - -private: - std::vector> lb_nodes_; + public: + Cluster(const std::vector>& tablet_nodes, const LBOptions& options, + bool skip_meta_node); + + virtual ~Cluster(); + + void DebugCluster(); + + bool ValidAction(const std::shared_ptr& action); + void DoAction(const std::shared_ptr& action); + + void SortNodesByTabletCount(std::vector* sorted_node_index); + void SortNodesBySize(std::vector* sorted_node_index); + void SortNodesByFlashSizePercent(std::vector* sorted_node_index); + void SortNodesByReadLoad(std::vector* sorted_node_index); + void SortNodesByWriteLoad(std::vector* sorted_node_index); + void SortNodesByScanLoad(std::vector* sorted_node_index); + void SortNodesByLRead(std::vector* sorted_node_index); + void SortNodesByComplexLoad(std::vector* sorted_node_index); + + void SortTabletsOfNodeByReadLoad(uint32_t node_index, std::vector* sorted_tablet_index); + void SortTabletsOfNodeByWriteLoad(uint32_t node_index, + std::vector* sorted_tablet_index); + void SortTabletsOfNodeByScanLoad(uint32_t node_index, std::vector* sorted_tablet_index); + void SortTabletsOfNodeByLRead(uint32_t node_index, std::vector* sorted_tablet_index); + + // return true if meta table is on this node, otherwise false + bool IsMetaNode(uint32_t node_index); + + bool IsReadyNode(uint32_t node_index); + + bool IsReadPendingNode(uint32_t node_index); + bool IsWritePendingNode(uint32_t node_index); + bool IsScanPendingNode(uint32_t node_index); + bool IsPendingNode(uint32_t node_index); + bool IsHeavyReadPendingNode(uint32_t node_index); + bool IsHeavyWritePendingNode(uint32_t node_index); + bool IsHeavyScanPendingNode(uint32_t node_index); + bool IsHeavyPendingNode(uint32_t node_index); + uint32_t HeavyPendingNodeNum(); + bool IsHeavyLReadNode(uint32_t node_index); + + bool IsAbnormalNode(uint32_t node_index); + + /* + * check whether a tablet has flash lg located on a node who does not has ssd + */ + bool IsFlashSizeEnough(uint32_t tablet_index, uint32_t node_index); + + /** + * check whether it is proper to locate the tablet on the node + */ + bool IsProperLocation(uint32_t tablet_index, uint32_t node_index); + + bool IsMetaTablet(uint32_t tablet_index); + bool IsReadyTablet(uint32_t tablet_index); + bool IsTabletMoveTooFrequent(uint32_t tablet_index); + + bool IsProperTargetTablet(uint32_t tablet_index); + + private: + void RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, + uint32_t node_index); + void AddTablet(uint32_t tablet_index, uint32_t to_node_index); + void RemoveTablet(uint32_t tablet_index, uint32_t from_node_index); + void MoveTablet(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index); + + // cluster info, use index to speed up the calculation + // make these info public also for speeding up + public: + uint32_t table_num_; + uint32_t tablet_node_num_; + uint32_t tablet_num_; + uint32_t tablet_moved_num_; + + // table_index -> table + std::map tables_; + // node_index -> node + std::map> nodes_; + // tablet_index -> tablet + std::map> tablets_; + + // table -> table_index + std::map tables_to_index_; + // node -> node_index + std::map nodes_to_index_; + // tablet -> tablet_index + std::map tablets_to_index_; + + // tablet_index -> node_index + std::map tablet_index_to_node_index_; + // initial tablet_index -> node_index, it's the initial cluster state + std::map initial_tablet_index_to_node_index_; + // tablet_index -> table_index + std::map tablet_index_to_table_index_; + + // node_index -> tablets index on the node + std::map> tablets_per_node_; + // node_index -> tablets index of not ready on the node + std::map> initial_tablets_not_ready_per_node_; + // abnormal nodes index + std::unordered_set abnormal_nodes_index_; + // node_index -> data size on the node + std::map size_per_node_; + // node_index -> flash data size on the node + std::map flash_size_per_node_; + // node_index -> read load on the node + std::map read_load_per_node_; + // node_index -> write load on the node + std::map write_load_per_node_; + // node_index -> scan load on the node + std::map scan_load_per_node_; + // node_index -> lread on the node + std::map lread_per_node_; + // node_index -> read pending on the node + std::map read_pending_per_node_; + // node_index -> write pending on the node + std::map write_pending_per_node_; + // node_index -> scan pending on the node + std::map scan_pending_per_node_; + + // meta table node index + uint32_t meta_table_node_index_; + + LBOptions lb_options_; + + private: + std::vector> lb_nodes_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_CLUSTER_H_ +#endif // TERA_LOAD_BALANCER_CLUSTER_H_ diff --git a/src/load_balancer/cost_function.h b/src/load_balancer/cost_function.h index 862b09285..328e40db3 100644 --- a/src/load_balancer/cost_function.h +++ b/src/load_balancer/cost_function.h @@ -21,105 +21,93 @@ namespace tera { namespace load_balancer { class CostFunction { -public: - CostFunction(const LBOptions& options, const std::string& name) : - lb_options_(options), - name_(name) { + public: + CostFunction(const LBOptions& options, const std::string& name) + : lb_options_(options), name_(name) {} - } + virtual ~CostFunction() {} - virtual ~CostFunction() {} + virtual double Cost() = 0; - virtual double Cost() = 0; + virtual void Init(const std::shared_ptr& cluster) { cluster_ = cluster; } - virtual void Init(const std::shared_ptr& cluster) { - cluster_ = cluster; - } + double GetWeight() const { return weight_; } - double GetWeight() const { - return weight_; - } + void SetWeight(double w) { weight_ = w; } - void SetWeight(double w) { - weight_ = w; - } + std::string Name() const { return name_; } - std::string Name() const { - return name_; + protected: + double Scale(double min, double max, double value) { + VLOG(20) << "[lb] Scale begin, min:" << min << " max:" << max << " value:" << value; + if (max <= min || value <= min) { + return 0.0; + } + if (max - min == 0) { + return 0.0; } -protected: - double Scale(double min, double max, double value) { - VLOG(20) << "[lb] Scale begin, min:" << min << " max:" << max << " value:" << value; - if (max <= min || value <= min) { - return 0.0; - } - if (max - min == 0) { - return 0.0; - } - - double scaled = std::max(0.0, std::min(1.0, (value - min) / (max - min))); - VLOG(20) << "[lb] Scale end, scaled:" << scaled; - return scaled; + double scaled = std::max(0.0, std::min(1.0, (value - min) / (max - min))); + VLOG(20) << "[lb] Scale end, scaled:" << scaled; + return scaled; + } + + double ScaleFromArray(const std::vector& stats) { + if (lb_options_.debug_mode_enabled) { + std::string line; + for (const auto& s : stats) { + line += std::to_string(s); + line += " "; + } + LOG(INFO) << "[lb] stats:" << line; } - double ScaleFromArray(const std::vector& stats) { - if (lb_options_.debug_mode_enabled) { - std::string line; - for (const auto& s : stats) { - line += std::to_string(s); - line += " "; - } - LOG(INFO) << "[lb] stats:" << line; - } - - double total_cost = 0; - double total = GetSum(stats); - - double count = stats.size(); - double mean = total/count; - - double max = ((count - 1) * mean) + (total - mean); - - double min; - if (count > total) { - min = ((count - total) * mean) + ((1 - mean) * total); - } else { - int num_high = (int) (total - (floor(mean) * count)); - int num_low = (int) (count - num_high); - - min = (num_high * (ceil(mean) - mean)) + (num_low * (mean - floor(mean))); - - } - min = std::max(0.0, min); - for (size_t i = 0; i < stats.size(); i++) { - double n = stats[i]; - double diff = std::abs(mean - n); - total_cost += diff; - } - - return Scale(min, max, total_cost); + double total_cost = 0; + double total = GetSum(stats); + + double count = stats.size(); + double mean = total / count; + + double max = ((count - 1) * mean) + (total - mean); + + double min; + if (count > total) { + min = ((count - total) * mean) + ((1 - mean) * total); + } else { + int num_high = (int)(total - (floor(mean) * count)); + int num_low = (int)(count - num_high); + + min = (num_high * (ceil(mean) - mean)) + (num_low * (mean - floor(mean))); } + min = std::max(0.0, min); + for (size_t i = 0; i < stats.size(); i++) { + double n = stats[i]; + double diff = std::abs(mean - n); + total_cost += diff; + } + + return Scale(min, max, total_cost); + } -private: - double GetSum(const std::vector& stats) { - double total = 0; - for (const auto& s : stats) { - total += s; - } - return total; + private: + double GetSum(const std::vector& stats) { + double total = 0; + for (const auto& s : stats) { + total += s; } + return total; + } -protected: - std::shared_ptr cluster_; + protected: + std::shared_ptr cluster_; -private: - double weight_; - LBOptions lb_options_; - std::string name_; + private: + double weight_; + LBOptions lb_options_; + std::string name_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_COST_FUNCTION_H_ +#endif // TERA_LOAD_BALANCER_COST_FUNCTION_H_ diff --git a/src/load_balancer/cost_functions.cc b/src/load_balancer/cost_functions.cc index a1f00b3e0..a9cfe0c66 100644 --- a/src/load_balancer/cost_functions.cc +++ b/src/load_balancer/cost_functions.cc @@ -10,211 +10,144 @@ namespace tera { namespace load_balancer { -MoveCountCostFunction::MoveCountCostFunction (const LBOptions& options) : - CostFunction(options, "MoveCountCostFunction"), - kExpensiveCost(1000000), - tablet_max_move_num_(options.tablet_max_move_num) { - SetWeight(options.move_count_cost_weight); +MoveCountCostFunction::MoveCountCostFunction(const LBOptions& options) + : CostFunction(options, "MoveCountCostFunction"), + kExpensiveCost(1000000), + tablet_max_move_num_(options.tablet_max_move_num) { + SetWeight(options.move_count_cost_weight); } -MoveCountCostFunction::~MoveCountCostFunction() { -} +MoveCountCostFunction::~MoveCountCostFunction() {} double MoveCountCostFunction::Cost() { - double cost = cluster_->tablet_moved_num_; - if (cost > static_cast(tablet_max_move_num_)) { - // return an expensive cost - VLOG(20) << "[lb] reach max move num limit: " << tablet_max_move_num_; - return kExpensiveCost; - } - - return Scale(0, std::max(cluster_->tablet_num_, tablet_max_move_num_), cost); -} - -MoveFrequencyCostFunction::MoveFrequencyCostFunction(const LBOptions& options) : - CostFunction(options, "MoveFrequencyCostFunction"), - kExpensiveCost(100000) { - SetWeight(options.move_frequency_cost_weight); -} - -MoveFrequencyCostFunction::~MoveFrequencyCostFunction() { -} - -double MoveFrequencyCostFunction::Cost() { - if (cluster_->tablets_moved_too_frequently_.size() > 0) { - // there are tablets moved too frequently, return an expensive cost - VLOG(20) << "[lb] there are " << cluster_->tablets_moved_too_frequently_.size() - << " tablets moved too frequently"; - return kExpensiveCost; - } else { - return 0; - } -} + double cost = cluster_->tablet_moved_num_; + if (cost > static_cast(tablet_max_move_num_)) { + // return an expensive cost + VLOG(20) << "[lb] reach max move num limit: " << tablet_max_move_num_; + return kExpensiveCost; + } -AbnormalNodeCostFunction::AbnormalNodeCostFunction(const LBOptions& options) : - CostFunction(options, "AbnormalNodeCostFunction"), - kExpensiveCost(100000) { - SetWeight(options.abnormal_node_cost_weight); + return Scale(0, std::max(cluster_->tablet_num_, tablet_max_move_num_), cost); } -AbnormalNodeCostFunction::~AbnormalNodeCostFunction() { +TabletCountCostFunction::TabletCountCostFunction(const LBOptions& options) + : CostFunction(options, "TabletCountCostFunction") { + SetWeight(options.tablet_count_cost_weight); } -double AbnormalNodeCostFunction::Cost() { - if (cluster_->tablets_moved_to_abnormal_nodes_.size() > 0) { - // there are tablets moved to abnormal nodes, return an expensive cost - VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_abnormal_nodes_.size() - << " tablets moved to abnormal nodes"; - return kExpensiveCost; - } else { - return 0; - } -} - -ReadPendingNodeCostFunction::ReadPendingNodeCostFunction(const LBOptions& options) : - CostFunction(options, "ReadPendingNodeCostFunction"), - kExpensiveCost(10000) { - SetWeight(options.read_pending_node_cost_weight); -} - -ReadPendingNodeCostFunction::~ReadPendingNodeCostFunction() { -} +TabletCountCostFunction::~TabletCountCostFunction() {} -double ReadPendingNodeCostFunction::Cost() { - if (cluster_->tablets_moved_to_read_pending_nodes_.size() > 0) { - // there are tablets moved to read pending nodes, return an expensive cost - VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_read_pending_nodes_.size() - << " tablets moved to read pending nodes"; - return kExpensiveCost; - } else { - return 0; - } -} - -WritePendingNodeCostFunction::WritePendingNodeCostFunction(const LBOptions& options) : - CostFunction(options, "WritePendingNodeCostFunction"), - kExpensiveCost(10000) { - SetWeight(options.write_pending_node_cost_weight); -} +double TabletCountCostFunction::Cost() { + std::vector tablet_nums_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + tablet_nums_per_node.emplace_back(cluster_->tablets_per_node_[i].size()); + } -WritePendingNodeCostFunction::~WritePendingNodeCostFunction() { + return ScaleFromArray(tablet_nums_per_node); } -double WritePendingNodeCostFunction::Cost() { - if (cluster_->tablets_moved_to_write_pending_nodes_.size() > 0) { - // there are tablets moved to write pending nodes, return an expensive cost - VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_write_pending_nodes_.size() - << " tablets moved to write pending nodes"; - return kExpensiveCost; - } else { - return 0; - } +SizeCostFunction::SizeCostFunction(const LBOptions& options) + : CostFunction(options, "SizeCostFunction") { + SetWeight(options.size_cost_weight); } -ScanPendingNodeCostFunction::ScanPendingNodeCostFunction(const LBOptions& options) : - CostFunction(options, "ScanPendingNodeCostFunction"), - kExpensiveCost(10000) { - SetWeight(options.scan_pending_node_cost_weight); -} +SizeCostFunction::~SizeCostFunction() {} -ScanPendingNodeCostFunction::~ScanPendingNodeCostFunction() { -} +double SizeCostFunction::Cost() { + std::vector size_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + size_per_node.emplace_back(cluster_->size_per_node_[i]); + } -double ScanPendingNodeCostFunction::Cost() { - if (cluster_->tablets_moved_to_scan_pending_nodes_.size() > 0) { - // there are tablets moved to scan pending nodes, return an expensive cost - VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_scan_pending_nodes_.size() - << " tablets moved to scan pending nodes"; - return kExpensiveCost; - } else { - return 0; - } + return ScaleFromArray(size_per_node); } -TabletCountCostFunction::TabletCountCostFunction (const LBOptions& options) : - CostFunction(options, "TabletCountCostFunction") { - SetWeight(options.tablet_count_cost_weight); +FlashSizeCostFunction::FlashSizeCostFunction(const LBOptions& options) + : CostFunction(options, "FlashSizeCostFunction") { + SetWeight(options.flash_size_cost_weight); } -TabletCountCostFunction::~TabletCountCostFunction() { -} +FlashSizeCostFunction::~FlashSizeCostFunction() {} -double TabletCountCostFunction::Cost() { - std::vector tablet_nums_per_node; - for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { - tablet_nums_per_node.emplace_back(cluster_->tablets_per_node_[i].size()); +double FlashSizeCostFunction::Cost() { + std::vector flash_size_percent_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + uint64_t node_flash_capacity = cluster_->nodes_[i]->tablet_node_ptr->GetPersistentCacheSize(); + if (node_flash_capacity == 0) { + // skip the node which does not has ssd + continue; } + assert(node_flash_capacity > 0); + flash_size_percent_per_node.emplace_back(100.0 * cluster_->flash_size_per_node_[i] / + node_flash_capacity); + } - return ScaleFromArray(tablet_nums_per_node); + return ScaleFromArray(flash_size_percent_per_node); } -SizeCostFunction::SizeCostFunction (const LBOptions& options) : - CostFunction(options, "SizeCostFunction") { - SetWeight(options.size_cost_weight); +ReadLoadCostFunction::ReadLoadCostFunction(const LBOptions& options) + : CostFunction(options, "ReadLoadCostFunction") { + SetWeight(options.read_load_cost_weight); } -SizeCostFunction::~SizeCostFunction() { -} +ReadLoadCostFunction::~ReadLoadCostFunction() {} -double SizeCostFunction::Cost() { - std::vector size_per_node; - for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { - size_per_node.emplace_back(cluster_->size_per_node_[i]); - } +double ReadLoadCostFunction::Cost() { + std::vector read_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + read_load_per_node.emplace_back(cluster_->read_load_per_node_[i]); + } - return ScaleFromArray(size_per_node); + return ScaleFromArray(read_load_per_node); } -ReadLoadCostFunction::ReadLoadCostFunction (const LBOptions& options) : - CostFunction(options, "ReadLoadCostFunction") { - SetWeight(options.read_load_cost_weight); +WriteLoadCostFunction::WriteLoadCostFunction(const LBOptions& options) + : CostFunction(options, "WriteLoadCostFunction") { + SetWeight(options.write_load_cost_weight); } -ReadLoadCostFunction::~ReadLoadCostFunction() { -} +WriteLoadCostFunction::~WriteLoadCostFunction() {} -double ReadLoadCostFunction::Cost() { - std::vector read_load_per_node; - for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { - read_load_per_node.emplace_back(cluster_->read_load_per_node_[i]); - } +double WriteLoadCostFunction::Cost() { + std::vector write_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + write_load_per_node.emplace_back(cluster_->write_load_per_node_[i]); + } - return ScaleFromArray(read_load_per_node); + return ScaleFromArray(write_load_per_node); } -WriteLoadCostFunction::WriteLoadCostFunction (const LBOptions& options) : - CostFunction(options, "WriteLoadCostFunction") { - SetWeight(options.write_load_cost_weight); +ScanLoadCostFunction::ScanLoadCostFunction(const LBOptions& options) + : CostFunction(options, "ScanLoadCostFunction") { + SetWeight(options.scan_load_cost_weight); } -WriteLoadCostFunction::~WriteLoadCostFunction() { -} +ScanLoadCostFunction::~ScanLoadCostFunction() {} -double WriteLoadCostFunction::Cost() { - std::vector write_load_per_node; - for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { - write_load_per_node.emplace_back(cluster_->write_load_per_node_[i]); - } +double ScanLoadCostFunction::Cost() { + std::vector scan_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + scan_load_per_node.emplace_back(cluster_->scan_load_per_node_[i]); + } - return ScaleFromArray(write_load_per_node); + return ScaleFromArray(scan_load_per_node); } -ScanLoadCostFunction::ScanLoadCostFunction (const LBOptions& options) : - CostFunction(options, "ScanLoadCostFunction") { - SetWeight(options.scan_load_cost_weight); +LReadCostFunction::LReadCostFunction(const LBOptions& options) + : CostFunction(options, "LReadCostFunction") { + SetWeight(options.lread_cost_weight); } -ScanLoadCostFunction::~ScanLoadCostFunction() { -} +LReadCostFunction::~LReadCostFunction() {} -double ScanLoadCostFunction::Cost() { - std::vector scan_load_per_node; - for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { - scan_load_per_node.emplace_back(cluster_->scan_load_per_node_[i]); - } +double LReadCostFunction::Cost() { + std::vector lread_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + lread_per_node.emplace_back(cluster_->lread_per_node_[i]); + } - return ScaleFromArray(scan_load_per_node); + return ScaleFromArray(lread_per_node); } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/cost_functions.h b/src/load_balancer/cost_functions.h index 8d7e0eb39..608ef0e6e 100644 --- a/src/load_balancer/cost_functions.h +++ b/src/load_balancer/cost_functions.h @@ -10,125 +10,75 @@ namespace tera { namespace load_balancer { -// moving too many tablets will cost high class MoveCountCostFunction : public CostFunction { -public: - MoveCountCostFunction(const LBOptions& options); - virtual ~MoveCountCostFunction(); + public: + MoveCountCostFunction(const LBOptions& options); + virtual ~MoveCountCostFunction(); - virtual double Cost() override; + virtual double Cost() override; -private: - const double kExpensiveCost; - uint32_t tablet_max_move_num_; + private: + const double kExpensiveCost; + uint32_t tablet_max_move_num_; }; -// moving tablet oo frequently will cost high -class MoveFrequencyCostFunction : public CostFunction { -public: - MoveFrequencyCostFunction(const LBOptions& options); - virtual ~MoveFrequencyCostFunction(); - - virtual double Cost() override; - -private: - const double kExpensiveCost; -}; - -// moving a tablet to an abnormal node will cost high -class AbnormalNodeCostFunction : public CostFunction { -public: - AbnormalNodeCostFunction(const LBOptions& options); - virtual ~AbnormalNodeCostFunction(); - - virtual double Cost() override; - -private: - const double kExpensiveCost; -}; - -// moving a tablet to a read pending node will cost high -class ReadPendingNodeCostFunction : public CostFunction { -public: - ReadPendingNodeCostFunction(const LBOptions& options); - virtual ~ReadPendingNodeCostFunction(); - - virtual double Cost() override; - -private: - const double kExpensiveCost; -}; - -// moving a tablet to a write pending node will cost high -class WritePendingNodeCostFunction : public CostFunction { -public: - WritePendingNodeCostFunction(const LBOptions& options); - virtual ~WritePendingNodeCostFunction(); - - virtual double Cost() override; - -private: - const double kExpensiveCost; -}; - -// moving a tablet to a scan pending node will cost high -class ScanPendingNodeCostFunction : public CostFunction { -public: - ScanPendingNodeCostFunction(const LBOptions& options); - virtual ~ScanPendingNodeCostFunction(); - - virtual double Cost() override; +class TabletCountCostFunction : public CostFunction { + public: + TabletCountCostFunction(const LBOptions& options); + virtual ~TabletCountCostFunction(); -private: - const double kExpensiveCost; + virtual double Cost() override; }; -// balance the tablets num for each tablet node -class TabletCountCostFunction : public CostFunction { -public: - TabletCountCostFunction(const LBOptions& options); - virtual ~TabletCountCostFunction(); +class SizeCostFunction : public CostFunction { + public: + SizeCostFunction(const LBOptions& options); + virtual ~SizeCostFunction(); - virtual double Cost() override; + virtual double Cost() override; }; -// banlance the data size for each tablet node -class SizeCostFunction : public CostFunction { -public: - SizeCostFunction(const LBOptions& options); - virtual ~SizeCostFunction(); +class FlashSizeCostFunction : public CostFunction { + public: + FlashSizeCostFunction(const LBOptions& options); + virtual ~FlashSizeCostFunction(); - virtual double Cost() override; + virtual double Cost() override; }; -// banlance the read load for each tablet node class ReadLoadCostFunction : public CostFunction { -public: - ReadLoadCostFunction(const LBOptions& options); - virtual ~ReadLoadCostFunction(); + public: + ReadLoadCostFunction(const LBOptions& options); + virtual ~ReadLoadCostFunction(); - virtual double Cost() override; + virtual double Cost() override; }; -// banlance the write load for each tablet node class WriteLoadCostFunction : public CostFunction { -public: - WriteLoadCostFunction(const LBOptions& options); - virtual ~WriteLoadCostFunction(); + public: + WriteLoadCostFunction(const LBOptions& options); + virtual ~WriteLoadCostFunction(); - virtual double Cost() override; + virtual double Cost() override; }; -// banlance the scan load for each tablet node class ScanLoadCostFunction : public CostFunction { -public: - ScanLoadCostFunction(const LBOptions& options); - virtual ~ScanLoadCostFunction(); + public: + ScanLoadCostFunction(const LBOptions& options); + virtual ~ScanLoadCostFunction(); + + virtual double Cost() override; +}; + +class LReadCostFunction : public CostFunction { + public: + LReadCostFunction(const LBOptions& options); + virtual ~LReadCostFunction(); - virtual double Cost() override; + virtual double Cost() override; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_COST_FUNCTIONS_H_ +#endif // TERA_LOAD_BALANCER_COST_FUNCTIONS_H_ diff --git a/src/load_balancer/lb_entry.cc b/src/load_balancer/lb_entry.cc index 1e11c05eb..ca67ea5da 100644 --- a/src/load_balancer/lb_entry.cc +++ b/src/load_balancer/lb_entry.cc @@ -17,58 +17,47 @@ DECLARE_string(tera_lb_server_addr); DECLARE_string(tera_lb_server_port); -std::string GetTeraEntryName() { - return "lb"; -} +std::string GetTeraEntryName() { return "lb"; } -tera::TeraEntry* GetTeraEntry() { - return new tera::load_balancer::LBEntry(); -} +tera::TeraEntry* GetTeraEntry() { return new tera::load_balancer::LBEntry(); } namespace tera { namespace load_balancer { -LBEntry::LBEntry() : - rpc_server_(nullptr), - lb_service_impl_(nullptr), - lb_impl_(nullptr) { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); +LBEntry::LBEntry() : rpc_server_(nullptr), lb_service_impl_(nullptr), lb_impl_(nullptr) { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); } -LBEntry::~LBEntry() { -} +LBEntry::~LBEntry() {} bool LBEntry::StartServer() { - IpAddress lb_addr(FLAGS_tera_lb_server_addr, FLAGS_tera_lb_server_port); - LOG(INFO) << "Start load balancer RPC server at: " << lb_addr.ToString(); + IpAddress lb_addr(FLAGS_tera_lb_server_addr, FLAGS_tera_lb_server_port); + LOG(INFO) << "Start load balancer RPC server at: " << lb_addr.ToString(); - lb_impl_.reset(new LBImpl()); - lb_service_impl_ = new LBServiceImpl(lb_impl_); + lb_impl_.reset(new LBImpl()); + lb_service_impl_ = new LBServiceImpl(lb_impl_); - if (!lb_impl_->Init()) { - return false; - } + if (!lb_impl_->Init()) { + return false; + } - rpc_server_->RegisterService(lb_service_impl_); - if (!rpc_server_->Start(lb_addr.ToString())) { - LOG(ERROR) << "start RPC server error"; - return false; - } + rpc_server_->RegisterService(lb_service_impl_); + if (!rpc_server_->Start(lb_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } - LOG(INFO) << "finish starting load balancer server"; - return true; + LOG(INFO) << "finish starting load balancer server"; + return true; } bool LBEntry::Run() { - ThisThread::Sleep(1000); - return true; -} - -void LBEntry::ShutdownServer() { - rpc_server_->Stop(); + ThisThread::Sleep(1000); + return true; } -} // namespace load_balancer -} // namespace tera +void LBEntry::ShutdownServer() { rpc_server_->Stop(); } +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/lb_entry.h b/src/load_balancer/lb_entry.h index 03399bc00..5818cca80 100644 --- a/src/load_balancer/lb_entry.h +++ b/src/load_balancer/lb_entry.h @@ -9,7 +9,7 @@ #include "sofa/pbrpc/pbrpc.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" namespace tera { namespace load_balancer { @@ -18,21 +18,21 @@ class LBServiceImpl; class LBImpl; class LBEntry : public TeraEntry { -public: - LBEntry(); - virtual ~LBEntry(); - - virtual bool StartServer(); - virtual bool Run(); - virtual void ShutdownServer(); - -private: - std::unique_ptr rpc_server_; - LBServiceImpl* lb_service_impl_; - std::shared_ptr lb_impl_; + public: + LBEntry(); + virtual ~LBEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + + private: + std::unique_ptr rpc_server_; + LBServiceImpl* lb_service_impl_; + std::shared_ptr lb_impl_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_LB_ENTRY_H_ +#endif // TERA_LOAD_BALANCER_LB_ENTRY_H_ diff --git a/src/load_balancer/lb_flags.cc b/src/load_balancer/lb_flags.cc index 42cd0e6fd..2781ee735 100644 --- a/src/load_balancer/lb_flags.cc +++ b/src/load_balancer/lb_flags.cc @@ -13,36 +13,44 @@ DEFINE_int32(tera_lb_impl_thread_num, 1, "default load balancer impl thread pool DEFINE_string(tera_lb_meta_table_name, "meta_table", "the meta table name"); DEFINE_bool(tera_lb_meta_isolate_enabled, true, "enable master to reserve a tabletnode for meta"); +DEFINE_bool(tera_lb_by_table, false, "balance by table one by one"); +DEFINE_int32(tera_lb_meta_balance_period_s, 180, "default meta load balance period(s)"); +DEFINE_int32(tera_lb_meta_balance_max_move_num, 1, "default max move num for meta balance(s)"); DEFINE_int32(tera_lb_load_balance_period_s, 60, "default load balance period(s)"); -DEFINE_int32(tera_lb_max_compute_steps, 1000000, "default max compute steps for one balance procedure"); -DEFINE_int32(tera_lb_max_compute_steps_per_tablet, 1000, "default max compute steps per tablet for one balance procedure"); -DEFINE_int32(tera_lb_max_compute_time_ms, 30000, "default max compute time(ms) for one balance procedure"); -DEFINE_double(tera_lb_min_cost_need_balance, 0.05, "min cost needed for balance"); -DEFINE_double(tera_lb_bad_node_safemode_percent, 0.5, "if bad node num percent is higher than this, skip balance"); - -DEFINE_double(tera_lb_move_count_cost_weight, 10, "move cost weight"); -DEFINE_int32(tera_lb_tablet_max_move_num, 1, "default tablet max move num for one balance procedure"); - -DEFINE_double(tera_lb_move_frequency_cost_weight, 10, "move frequency cost weight"); -DEFINE_int32(tera_lb_tablet_move_too_frequently_threshold_s, 600, "if move a tablet in this threshold time(s) again, it's been moved too frequently"); - -DEFINE_double(tera_lb_abnormal_node_cost_weight, 10, "abnormal node cost weight"); +DEFINE_int32(tera_lb_max_compute_steps, 1000000, + "default max compute steps for one balance procedure"); +DEFINE_int32(tera_lb_max_compute_steps_per_tablet, 1000, + "default max compute steps per tablet for one balance procedure"); +DEFINE_int32(tera_lb_max_compute_time_ms, 30000, + "default max compute time(ms) for one balance procedure"); +DEFINE_double(tera_lb_min_cost_need_balance, 0.02, "min cost needed for balance"); +DEFINE_double(tera_lb_bad_node_safemode_percent, 0.5, + "if bad node num percent is higher than this, skip balance"); + +DEFINE_double(tera_lb_move_count_cost_weight, 1, "move cost weight"); +DEFINE_int32(tera_lb_tablet_max_move_num, 2, + "default tablet max move num for one balance procedure"); + +DEFINE_int32(tera_lb_tablet_move_too_frequently_threshold_s, 3600, + "if move a tablet in this threshold time(s) again, it's been " + "moved too frequently"); DEFINE_double(tera_lb_abnormal_node_ratio, 0.5, "abnormal node ratio"); -DEFINE_double(tera_lb_read_pending_node_cost_weight, 10, "read pending node cost weight"); -DEFINE_double(tera_lb_write_pending_node_cost_weight, 10, "write pending node cost weight"); -DEFINE_double(tera_lb_scan_pending_node_cost_weight, 10, "scan pending node cost weight"); - DEFINE_double(tera_lb_tablet_count_cost_weight, 100, "tablet count cost weight"); -DEFINE_double(tera_lb_size_cost_weight, 100, "size cost weight"); - -DEFINE_double(tera_lb_read_load_cost_weight, 20, "read load cost weight"); -DEFINE_double(tera_lb_write_load_cost_weight, 20, "write load cost weight"); -DEFINE_double(tera_lb_scan_load_cost_weight, 10, "scan load cost weight"); +DEFINE_double(tera_lb_size_cost_weight, 200, "size cost weight"); +DEFINE_double(tera_lb_flash_size_cost_weight, 0, "flash size cost weight"); + +DEFINE_double(tera_lb_read_load_cost_weight, 10, "read load cost weight"); +DEFINE_double(tera_lb_write_load_cost_weight, 10, "write load cost weight"); +DEFINE_double(tera_lb_scan_load_cost_weight, 1, "scan load cost weight"); +DEFINE_double(tera_lb_lread_cost_weight, 5, "lread cost weight"); +DEFINE_double(tera_lb_heavy_read_pending_threshold, 1000, "heavy read pending threshold"); +DEFINE_double(tera_lb_heavy_write_pending_threshold, 1000, "heavy write pending threshold"); +DEFINE_double(tera_lb_heavy_scan_pending_threshold, 1000, "heavy scan pending threshold"); +DEFINE_double(tera_lb_heavy_lread_threshold, 1000000, "heavy lread threshold"); DEFINE_double(tera_lb_read_pending_factor, 1, "read pending factor"); DEFINE_double(tera_lb_write_pending_factor, 1, "write pending factor"); DEFINE_double(tera_lb_scan_pending_factor, 1, "scan pending factor"); DEFINE_bool(tera_lb_debug_mode_enabled, false, "debug mode"); - diff --git a/src/load_balancer/lb_impl.cc b/src/load_balancer/lb_impl.cc index a93e7a5a9..117a4ed8e 100644 --- a/src/load_balancer/lb_impl.cc +++ b/src/load_balancer/lb_impl.cc @@ -4,6 +4,7 @@ #include "load_balancer/lb_impl.h" +#include #include #include #include @@ -12,6 +13,7 @@ #include "gflags/gflags.h" #include "glog/logging.h" +#include "load_balancer/meta_balancer.h" #include "load_balancer/unity_balancer.h" #include "proto/tabletnode.pb.h" #include "tera.h" @@ -20,6 +22,8 @@ DECLARE_bool(tera_lb_meta_isolate_enabled); DECLARE_string(tera_lb_meta_table_name); DECLARE_int32(tera_lb_impl_thread_num); +DECLARE_bool(tera_lb_by_table); +DECLARE_int32(tera_lb_meta_balance_period_s); DECLARE_int32(tera_lb_load_balance_period_s); DECLARE_int32(tera_lb_max_compute_steps); DECLARE_int32(tera_lb_max_compute_steps_per_tablet); @@ -27,19 +31,21 @@ DECLARE_int32(tera_lb_max_compute_time_ms); DECLARE_double(tera_lb_min_cost_need_balance); DECLARE_double(tera_lb_bad_node_safemode_percent); DECLARE_double(tera_lb_move_count_cost_weight); +DECLARE_int32(tera_lb_meta_balance_max_move_num); DECLARE_int32(tera_lb_tablet_max_move_num); -DECLARE_double(tera_lb_move_frequency_cost_weight); DECLARE_int32(tera_lb_tablet_move_too_frequently_threshold_s); -DECLARE_double(tera_lb_abnormal_node_cost_weight); DECLARE_double(tera_lb_abnormal_node_ratio); -DECLARE_double(tera_lb_read_pending_node_cost_weight); -DECLARE_double(tera_lb_write_pending_node_cost_weight); -DECLARE_double(tera_lb_scan_pending_node_cost_weight); DECLARE_double(tera_lb_tablet_count_cost_weight); DECLARE_double(tera_lb_size_cost_weight); +DECLARE_double(tera_lb_flash_size_cost_weight); DECLARE_double(tera_lb_read_load_cost_weight); DECLARE_double(tera_lb_write_load_cost_weight); DECLARE_double(tera_lb_scan_load_cost_weight); +DECLARE_double(tera_lb_lread_cost_weight); +DECLARE_double(tera_lb_heavy_read_pending_threshold); +DECLARE_double(tera_lb_heavy_write_pending_threshold); +DECLARE_double(tera_lb_heavy_scan_pending_threshold); +DECLARE_double(tera_lb_heavy_lread_threshold); DECLARE_double(tera_lb_read_pending_factor); DECLARE_double(tera_lb_write_pending_factor); DECLARE_double(tera_lb_scan_pending_factor); @@ -56,476 +62,653 @@ using tera::master::TabletNodePtr; namespace tera { namespace load_balancer { -LBImpl::LBImpl() : - thread_pool_(new ThreadPool(FLAGS_tera_lb_impl_thread_num)), - sdk_client_(nullptr), - safemode_(false), - round_(0), - lb_debug_mode_(FLAGS_tera_lb_debug_mode_enabled) { -} +LBImpl::LBImpl() + : thread_pool_(new ThreadPool(FLAGS_tera_lb_impl_thread_num)), + sdk_client_(nullptr), + safemode_(false), + lb_debug_mode_(FLAGS_tera_lb_debug_mode_enabled) {} -LBImpl::~LBImpl() { -} +LBImpl::~LBImpl() {} bool LBImpl::Init() { - if (lb_debug_mode_) { - LOG(INFO) << "[lb] debug mode enabled"; - } + if (lb_debug_mode_) { + LOG(INFO) << "[lb] debug mode enabled"; + } - // tera_entry has init glog - Client::SetGlogIsInitialized(); - uint32_t log_v = FLAGS_v; + // tera_entry has init glog + Client::SetGlogIsInitialized(); + uint32_t log_v = FLAGS_v; - sdk_client_.reset(Client::NewClient()); - if (!sdk_client_) { - LOG(ERROR) << "[lb] open sdk client fail"; - return false; - } + sdk_client_.reset(Client::NewClient()); + if (!sdk_client_) { + LOG(ERROR) << "[lb] init sdk client fail"; + return false; + } - // avoid sdk change log level of load balancer - FLAGS_v = log_v; + // avoid sdk change log level of load balancer + FLAGS_v = log_v; - ScheduleLoadBalance(); + InitOptions(); - return true; + if (FLAGS_tera_lb_meta_isolate_enabled) { + ScheduleMetaBalance(); + } + + ScheduleUnityBalance(); + + return true; } -void LBImpl::ScheduleLoadBalance() { - int schedule_period = FLAGS_tera_lb_load_balance_period_s * 1000; - VLOG(5) << "[lb] LoadBalance will be scheduled in: " << FLAGS_tera_lb_load_balance_period_s << "s"; - thread_pool_->DelayTask(schedule_period, - [this](int64_t) { - DoLoadBalance(); - ScheduleLoadBalance(); - } - ); +void LBImpl::InitOptions() { + MutexLock lock(&mutex_); + lb_options_.max_compute_steps = FLAGS_tera_lb_max_compute_steps; + lb_options_.max_compute_steps_per_tablet = FLAGS_tera_lb_max_compute_steps_per_tablet; + lb_options_.max_compute_time_ms = FLAGS_tera_lb_max_compute_time_ms; + lb_options_.min_cost_need_balance = FLAGS_tera_lb_min_cost_need_balance; + lb_options_.bad_node_safemode_percent = FLAGS_tera_lb_bad_node_safemode_percent; + lb_options_.move_count_cost_weight = FLAGS_tera_lb_move_count_cost_weight; + lb_options_.meta_balance_max_move_num = FLAGS_tera_lb_meta_balance_max_move_num; + lb_options_.tablet_max_move_num = FLAGS_tera_lb_tablet_max_move_num; + lb_options_.tablet_move_too_frequently_threshold_s = + FLAGS_tera_lb_tablet_move_too_frequently_threshold_s; + lb_options_.abnormal_node_ratio = FLAGS_tera_lb_abnormal_node_ratio; + lb_options_.tablet_count_cost_weight = FLAGS_tera_lb_tablet_count_cost_weight; + lb_options_.size_cost_weight = FLAGS_tera_lb_size_cost_weight; + lb_options_.flash_size_cost_weight = FLAGS_tera_lb_flash_size_cost_weight; + lb_options_.read_load_cost_weight = FLAGS_tera_lb_read_load_cost_weight; + lb_options_.write_load_cost_weight = FLAGS_tera_lb_write_load_cost_weight; + lb_options_.scan_load_cost_weight = FLAGS_tera_lb_scan_load_cost_weight; + lb_options_.lread_cost_weight = FLAGS_tera_lb_lread_cost_weight; + lb_options_.heavy_read_pending_threshold = FLAGS_tera_lb_heavy_read_pending_threshold; + lb_options_.heavy_write_pending_threshold = FLAGS_tera_lb_heavy_write_pending_threshold; + lb_options_.heavy_scan_pending_threshold = FLAGS_tera_lb_heavy_scan_pending_threshold; + lb_options_.heavy_lread_threshold = FLAGS_tera_lb_heavy_lread_threshold; + lb_options_.read_pending_factor = FLAGS_tera_lb_read_pending_factor; + lb_options_.write_pending_factor = FLAGS_tera_lb_write_pending_factor; + lb_options_.scan_pending_factor = FLAGS_tera_lb_scan_pending_factor; + lb_options_.meta_table_isolate_enabled = FLAGS_tera_lb_meta_isolate_enabled; + lb_options_.meta_table_name = FLAGS_tera_lb_meta_table_name; + lb_options_.meta_table_node_addr = meta_node_addr_; + lb_options_.debug_mode_enabled = lb_debug_mode_; } -void LBImpl::DoLoadBalance() { - ++round_; - VLOG(5) << "[lb] LoadBalance begin round: " << round_; - int64_t start_time = get_micros(); +void LBImpl::ScheduleMetaBalance() { + int schedule_period_ms = FLAGS_tera_lb_meta_balance_period_s * 1000; + VLOG(5) << "[lb] MetaBalance will be scheduled in: " << schedule_period_ms / 1000 << "s"; + thread_pool_->DelayTask(schedule_period_ms, [this](int64_t) { + DoMetaBalance(); + ScheduleMetaBalance(); + }); +} - std::vector tablet_nodes; - std::vector tables; - std::vector tablets; - if (!Collect(&tablet_nodes, &tables, &tablets)) { - return; - } +void LBImpl::ScheduleUnityBalance() { + int schedule_period = FLAGS_tera_lb_load_balance_period_s * 1000; + VLOG(5) << "[lb] UnityBalance will be scheduled in: " << FLAGS_tera_lb_load_balance_period_s + << "s"; + thread_pool_->DelayTask(schedule_period, [this](int64_t) { + DoUnityBalance(); + ScheduleUnityBalance(); + }); +} - if (lb_debug_mode_) { - DebugCollect(tablet_nodes, tables, tablets); - } +void LBImpl::DoMetaBalance() { + static uint64_t round = 0; + VLOG(5) << "[lb] MetaBalance begin round: " << ++round; + int64_t start_time = get_micros(); + + std::vector tablet_nodes; + std::vector tables; + std::vector tablets; + if (!Collect(&tablet_nodes, &tables, &tablets)) { + return; + } + + std::vector> lb_nodes; + CreateLBInput(tables, tablet_nodes, tablets, &lb_nodes); + + { + MutexLock lock(&mutex_); + lb_options_.meta_table_node_addr = meta_node_addr_; + } + + std::shared_ptr balancer = std::make_shared(lb_options_); + std::vector plans; + if (!balancer->BalanceCluster(lb_nodes, &plans)) { + LOG(WARNING) << "[lb] DoBalance failed"; + return; + } + + ExecutePlan(plans); + + int64_t cost_time = get_micros() - start_time; + VLOG(5) << "[lb] MetaBalance end round: " << round << ", cost: " << cost_time / 1000.0 << "ms"; +} + +void LBImpl::DoUnityBalance() { + static uint64_t round = 0; + VLOG(5) << "[lb] UnityBalance begin round: " << ++round; + int64_t start_time = get_micros(); + + std::vector tablet_nodes; + std::vector tables; + std::vector tablets; + if (!Collect(&tablet_nodes, &tables, &tablets)) { + return; + } + + { + MutexLock lock(&mutex_); + lb_options_.meta_table_node_addr = meta_node_addr_; + } + + std::shared_ptr balancer = std::make_shared(lb_options_); + std::vector plans; + if (FLAGS_tera_lb_by_table) { + std::map>> nodes_by_table; + CreateLBInputByTable(tables, tablet_nodes, tablets, &nodes_by_table); + if (!BlanceClusterByTable(balancer, nodes_by_table, &plans)) { + return; + } + } else { std::vector> lb_nodes; CreateLBInput(tables, tablet_nodes, tablets, &lb_nodes); - if (lb_debug_mode_) { - DebugLBNode(lb_nodes); + if (!balancer->BalanceCluster(lb_nodes, &plans)) { + LOG(WARNING) << "[lb] DoBalance failed"; + return; } + } - LBOptions options; - options.max_compute_steps = FLAGS_tera_lb_max_compute_steps; - options.max_compute_steps_per_tablet = FLAGS_tera_lb_max_compute_steps_per_tablet; - options.max_compute_time_ms = FLAGS_tera_lb_max_compute_time_ms; - options.min_cost_need_balance = FLAGS_tera_lb_min_cost_need_balance; - options.bad_node_safemode_percent = FLAGS_tera_lb_bad_node_safemode_percent; - options.move_count_cost_weight = FLAGS_tera_lb_move_count_cost_weight; - options.tablet_max_move_num = FLAGS_tera_lb_tablet_max_move_num; - options.move_frequency_cost_weight = FLAGS_tera_lb_move_frequency_cost_weight; - options.tablet_move_too_frequently_threshold_s = FLAGS_tera_lb_tablet_move_too_frequently_threshold_s; - options.abnormal_node_cost_weight = FLAGS_tera_lb_abnormal_node_cost_weight; - options.abnormal_node_ratio = FLAGS_tera_lb_abnormal_node_ratio; - options.read_pending_node_cost_weight = FLAGS_tera_lb_read_pending_node_cost_weight; - options.write_pending_node_cost_weight = FLAGS_tera_lb_write_pending_node_cost_weight; - options.scan_pending_node_cost_weight = FLAGS_tera_lb_scan_pending_node_cost_weight; - options.tablet_count_cost_weight = FLAGS_tera_lb_tablet_count_cost_weight; - options.size_cost_weight = FLAGS_tera_lb_size_cost_weight; - options.read_load_cost_weight = FLAGS_tera_lb_read_load_cost_weight; - options.write_load_cost_weight = FLAGS_tera_lb_write_load_cost_weight; - options.scan_load_cost_weight = FLAGS_tera_lb_scan_load_cost_weight; - options.meta_table_isolate_enabled = FLAGS_tera_lb_meta_isolate_enabled; - options.meta_table_name = FLAGS_tera_lb_meta_table_name; - options.meta_table_node_addr = GetMetaNodeAddr(); - options.debug_mode_enabled = lb_debug_mode_; - - std::unique_ptr balancer(new UnityBalancer(options)); - std::vector plans; - if (!balancer->BalanceCluster(lb_nodes, &plans)) { - LOG(WARNING) << "[lb] LoadBalance failed"; - return; + ExecutePlan(plans); + + int64_t cost_time = get_micros() - start_time; + VLOG(5) << "[lb] UnityBalance end round: " << round << ", cost: " << cost_time / 1000.0 << "ms"; +} + +bool LBImpl::BlanceClusterByTable( + const std::shared_ptr& balancer, + std::map>>& nodes_by_table, + std::vector* plans) { + std::vector tables; + for (const auto& pair : nodes_by_table) { + tables.emplace_back(pair.first); + } + std::random_shuffle(tables.begin(), tables.end()); + + for (const auto& table : tables) { + if (table == FLAGS_tera_lb_meta_table_name) { + continue; } + if (!balancer->BalanceCluster(table, nodes_by_table[table], plans)) { + LOG(WARNING) << "[lb] balance table " << table << " failed"; + return false; + } + } - DebugPlan(plans); + return true; +} - if (!IsSafemode()) { - bool master_safe_mode = true; - bool get_success = GetMasterSafemode(&master_safe_mode); - - if (get_success && !master_safe_mode) { - ExecutePlan(plans); - } else if (!get_success) { - VLOG(5) << "[lb] skip execute plan due to fail to get master safe mode"; - } else if (master_safe_mode) { - VLOG(5) << "[lb] skip execute plan due to master is in safe mode"; - } else { - } +bool LBImpl::CreateLBInput(const std::vector& tables, + const std::vector& nodes, + const std::vector& tablets, + std::vector>* lb_nodes) { + lb_nodes->clear(); + + std::map> nodes_map; + for (const auto& node : nodes) { + LBTabletNode* p_lb_node = new LBTabletNode(); + p_lb_node->tablet_node_ptr = node; + nodes_map[node->GetAddr()].reset(p_lb_node); + } + + for (const auto& tablet : tablets) { + std::string addr = tablet->GetServerAddr(); + if (nodes_map.find(addr) != nodes_map.end()) { + LBTablet* p_lb_tablet = new LBTablet(); + p_lb_tablet->tablet_ptr = tablet; + std::shared_ptr lb_tablet(p_lb_tablet); + nodes_map[addr]->tablets.emplace_back(lb_tablet); } else { - VLOG(5) << "[lb] skip execute plan in safe mode"; + // TODO + // unassigned tablet, skip now } + } - int64_t cost_time = get_micros() - start_time; - VLOG(5) << "[lb] LoadBalance end round: " << round_ - <<", cost: " << cost_time / 1000.0 << "ms"; + lb_nodes->reserve(nodes_map.size()); + for (const auto& pair : nodes_map) { + lb_nodes->emplace_back(pair.second); + } + + if (lb_debug_mode_) { + DebugLBNode(*lb_nodes); + } + + return true; } -bool LBImpl::CreateLBInput( - const std::vector& tables, - const std::vector& nodes, - const std::vector& tablets, - std::vector>* lb_nodes) { - lb_nodes->clear(); +bool LBImpl::CreateLBInputByTable( + const std::vector& tables, const std::vector& nodes, + const std::vector& tablets, + std::map>>* nodes_by_table) { + nodes_by_table->clear(); - std::map> nodes_map; + std::map>> nodes_by_table_intr; + for (const auto& table : tables) { + std::string table_name = table->GetTableName(); for (const auto& node : nodes) { - LBTabletNode* p_lb_node = new LBTabletNode(); - p_lb_node->tablet_node_ptr = node; - nodes_map[node->GetAddr()].reset(p_lb_node); + std::string addr = node->GetAddr(); + LBTabletNode* p_lb_node = new LBTabletNode(); + p_lb_node->tablet_node_ptr = node; + nodes_by_table_intr[table_name][addr].reset(p_lb_node); } + } - for (const auto& tablet : tablets) { - std::string addr = tablet->GetServerAddr(); - if (nodes_map.find(addr) != nodes_map.end()) { - LBTablet* p_lb_tablet = new LBTablet(); - p_lb_tablet->tablet_ptr = tablet; - std::shared_ptr lb_tablet(p_lb_tablet); - nodes_map[addr]->tablets.emplace_back(lb_tablet); - } else { - // TODO - // unassigned tablet, skip now - } + for (const auto& tablet : tablets) { + std::string table_name = tablet->GetTableName(); + std::string addr = tablet->GetServerAddr(); + if (nodes_by_table_intr.find(table_name) == nodes_by_table_intr.end()) { + LOG(WARNING) << "table " << table_name << " of tablet " << tablet->GetPath() + << " does not exist"; + continue; // skip tablet which has no table + } + if (nodes_by_table_intr[table_name].find(addr) == nodes_by_table_intr[table_name].end()) { + LOG(WARNING) << "server " << addr << " of tablet " << tablet->GetPath() << " does not exist"; + continue; // skip tablet which has no server } - for (const auto& pair : nodes_map) { - lb_nodes->emplace_back(pair.second); + LBTablet* p_lb_tablet = new LBTablet(); + p_lb_tablet->tablet_ptr = tablet; + nodes_by_table_intr[table_name][addr]->tablets.emplace_back(p_lb_tablet); + } + + for (const auto& table : nodes_by_table_intr) { + std::string table_name = table.first; + for (const auto& node : table.second) { + (*nodes_by_table)[table_name].emplace_back(node.second); } + } + + if (lb_debug_mode_) { + DebugLBNodeByTable(*nodes_by_table); + } - return true; + return true; } -bool LBImpl::Collect(std::vector* nodes, - std::vector* tables, +bool LBImpl::Collect(std::vector* nodes, std::vector* tables, std::vector* tablets) { - if (nodes == nullptr || tables == nullptr || tablets == nullptr) { - return false; - } - nodes->clear(); - tables->clear(); - tablets->clear(); + if (nodes == nullptr || tables == nullptr || tablets == nullptr) { + return false; + } + nodes->clear(); + tables->clear(); + tablets->clear(); - int64_t start_time = get_micros(); + int64_t start_time = get_micros(); - if (!CollectNodes(nodes)) { - LOG(ERROR) << "[lb] collect nodes fail"; - return false; - } + if (!CollectNodes(nodes)) { + LOG(ERROR) << "[lb] collect nodes fail"; + return false; + } - if (!CollectTablets(tables, tablets)) { - LOG(ERROR) << "[lb] collect tablets fail"; - return false; - } + if (!CollectTablets(tables, tablets)) { + LOG(ERROR) << "[lb] collect tablets fail"; + return false; + } + + int64_t cost_time = get_micros() - start_time; + VLOG(5) << "[lb] Collect cost: " << cost_time / 1000.0 << "ms"; - int64_t cost_time = get_micros() - start_time; - VLOG(5) << "[lb] Collect cost: " << cost_time / 1000.0 << "ms"; + if (lb_debug_mode_) { + DebugCollect(*nodes, *tables, *tablets); + } - return true; + return true; } bool LBImpl::CollectNodes(std::vector* nodes) { - std::shared_ptr client_impl((static_cast(sdk_client_.get()))->GetClientImpl()); - std::vector infos; - ErrorCode err; - if (!client_impl->ShowTabletNodesInfo(&infos, &err)) { - LOG(ERROR) << "[lb] fail to get TabletNodeInfo, err: " << err.ToString(); - return false; - } - - for (const auto& info : infos) { - TabletNodePtr node(new TabletNode()); - NodeInfoToNode(info, node); - nodes->push_back(node); - } - - VLOG(5) << "[lb] collected node size: " << nodes->size(); - - return true; + std::shared_ptr client_impl( + (static_cast(sdk_client_.get()))->GetClientImpl()); + std::vector infos; + ErrorCode err; + if (!client_impl->ShowTabletNodesInfo(&infos, &err)) { + LOG(ERROR) << "[lb] fail to get TabletNodeInfo, err: " << err.ToString(); + return false; + } + + for (const auto& info : infos) { + TabletNodePtr node(new TabletNode()); + NodeInfoToNode(info, node); + nodes->push_back(node); + } + + VLOG(5) << "[lb] collected node size: " << nodes->size(); + + return true; } bool LBImpl::NodeInfoToNode(const TabletNodeInfo& info, TabletNodePtr node) { - node->info_ = info; + node->info_ = info; - node->addr_ = info.addr(); - node->state_ = StringToNodeState(info.status_m()); - node->data_size_ = info.load(); - node->average_counter_.read_pending_ = info.read_pending(); - node->average_counter_.write_pending_ = info.write_pending(); - node->average_counter_.scan_pending_ = info.scan_pending(); + node->addr_ = info.addr(); + node->state_ = StringToNodeState(info.status_m()); + node->data_size_ = info.load(); + node->persistent_cache_size_ = info.persistent_cache_size(); + node->average_counter_.read_pending_ = info.read_pending(); + node->average_counter_.write_pending_ = info.write_pending(); + node->average_counter_.scan_pending_ = info.scan_pending(); - return true; + return true; } NodeState LBImpl::StringToNodeState(const std::string& str) { - if (str == "kReady") { - return tera::master::kReady; - } else if (str == "kOffLine") { - return tera::master::kOffLine; - } else if (str == "kOnKick") { - return tera::master::kOnKick; - } else if (str == "kWaitKick") { - return tera::master::kWaitKick; - } else { - return tera::master::kOffLine; - } + if (str == "kReady") { + return tera::master::kReady; + } else if (str == "kOffline" || str == "kOffLine") { + return tera::master::kOffline; + } else if (str == "kOnKick") { + return tera::master::kOnKick; + } else if (str == "kWaitKick") { + return tera::master::kWaitKick; + } else if (str == "kKicked") { + return tera::master::kKicked; + } else { + return tera::master::kOffline; + } } -bool LBImpl::CollectTablets(std::vector* tables, - std::vector* tablets) { - std::shared_ptr client_impl((static_cast(sdk_client_.get()))->GetClientImpl()); - TableMetaList table_list; - TabletMetaList tablet_list; - bool is_brief = false; - ErrorCode err; - if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, is_brief, &err)) { - LOG(ERROR) << "[lb] fail to get tablets, err: " << err.ToString(); - return false; +bool LBImpl::CollectTablets(std::vector* tables, std::vector* tablets) { + std::shared_ptr client_impl( + (static_cast(sdk_client_.get()))->GetClientImpl()); + TableMetaList table_list; + TabletMetaList tablet_list; + bool is_brief = false; + ErrorCode err; + if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, is_brief, &err)) { + LOG(ERROR) << "[lb] fail to get tablets, err: " << err.ToString(); + return false; + } + + std::map table_name_to_ptr; + + for (int i = 0; i < table_list.meta_size(); ++i) { + const TableMeta& meta = table_list.meta(i); + if (meta.status() != kTableEnable) { + VLOG(10) << "[lb] skip table:" << meta.table_name() + << ", status:" << StatusCodeToString(meta.status()); + continue; + } + const std::string& table_name = meta.table_name(); + TablePtr table(new tera::master::Table(table_name, meta.schema(), meta.status())); + tables->push_back(table); + + if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { + table_name_to_ptr[table_name] = table; + } + } + + if (tablet_list.meta_size() != tablet_list.counter_size()) { + LOG(ERROR) << "[lb] invalid TabletMetaList, meta size: " << tablet_list.meta_size() + << " counter size: " << tablet_list.counter_size(); + return false; + } + for (int i = 0; i < tablet_list.meta_size(); ++i) { + std::string table_name = tablet_list.meta(i).table_name(); + if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { + LOG(WARNING) << "[lb] tablet's table not exist " + << "tablet path: " << tablet_list.meta(i).path() << "table: " << table_name; + continue; + } + TabletPtr tablet(new tera::master::Tablet(tablet_list.meta(i), table_name_to_ptr[table_name])); + tablet->SetCounter(tablet_list.counter(i)); + if (tablet_list.meta(i).has_last_move_time_us()) { + tablet->SetLastMoveTime(tablet_list.meta(i).last_move_time_us()); + } else { + // !!! compatible with old master + // !!! set last move time to 0 will disable the MoveFrequencyCostFunction + // strategy + tablet->SetLastMoveTime(0); } - - std::map table_name_to_ptr; - - for (int i = 0; i < table_list.meta_size(); ++i) { - const TableMeta& meta = table_list.meta(i); - const std::string& table_name = meta.table_name(); - TablePtr table(new tera::master::Table(table_name, meta.schema(), meta.status())); - tables->push_back(table); - - if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { - table_name_to_ptr[table_name] = table; - } + if (tablet_list.meta(i).has_data_size_on_flash()) { + tablet->SetDataSizeOnFlash(tablet_list.meta(i).data_size_on_flash()); + } else { + tablet->SetDataSizeOnFlash(0); } + tablets->push_back(tablet); - if (tablet_list.meta_size() != tablet_list.counter_size()) { - LOG(ERROR) << "[lb] invalid TabletMetaList, meta size: " << tablet_list.meta_size() - << " counter size: " << tablet_list.counter_size(); - return false; - } - for (int i = 0; i < tablet_list.meta_size(); ++i) { - std::string table_name = tablet_list.meta(i).table_name(); - if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { - LOG(WARNING) << "[lb] tablet's table not exist " << "tablet path: " - << tablet_list.meta(i).path() << "table: " << table_name; - continue; - } - TabletPtr tablet(new tera::master::Tablet(tablet_list.meta(i), table_name_to_ptr[table_name])); - tablet->SetCounter(tablet_list.counter(i)); - if (tablet_list.meta(i).has_last_move_time_us()) { - tablet->SetLastMoveTime(tablet_list.meta(i).last_move_time_us()); - } else { - // !!! compatible with old master - // !!! set last move time to 0 will disable the MoveFrequencyCostFunction strategy - tablet->SetLastMoveTime(0); - } - tablets->push_back(tablet); - - if (table_name == FLAGS_tera_lb_meta_table_name) { - SetMetaNodeAddr(tablet->GetServerAddr()); - VLOG(5) << "[lb] meta table node addr: " << GetMetaNodeAddr(); - } + if (table_name == FLAGS_tera_lb_meta_table_name) { + SetMetaNodeAddr(tablet->GetServerAddr()); + VLOG(5) << "[lb] meta table node addr: " << GetMetaNodeAddr(); } + } - VLOG(5) << "[lb] collected table size: " << tables->size(); - VLOG(5) << "[lb] collected tablet size: " << tablets->size(); + VLOG(5) << "[lb] collected table size: " << tables->size(); + VLOG(5) << "[lb] collected tablet size: " << tablets->size(); - return true; + return true; } void LBImpl::DebugCollect(const std::vector& nodes, const std::vector& tables, const std::vector& tablets) { - LOG(INFO) << ""; - LOG(INFO) << "[lb] DebugCollect begin -----"; - - LOG(INFO) << "[lb] " << tables.size() << " table:" ; - for (const auto& table : tables) { - LOG(INFO) << "table:" + table->GetTableName() - << " status:" << StatusCodeToString(table->GetStatus()); - } + LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugCollect begin -----"; + + LOG(INFO) << "[lb] " << tables.size() << " table:"; + for (const auto& table : tables) { + LOG(INFO) << "table:" + table->GetTableName() + << " status:" << StatusCodeToString(table->GetStatus()); + } + + LOG(INFO) << "[lb] " << nodes.size() << " node:"; + for (const auto& node : nodes) { + LOG(INFO) << "addr:" + node->GetAddr() + << " state:" << tera::master::NodeStateToString(node->GetState()) + << " size:" << node->GetSize() << "B" + << " persistent_cache_size:" << node->GetPersistentCacheSize() << "B" + << " r_pending:" << node->GetReadPending() << " w_pending:" << node->GetWritePending() + << " s_pending:" << node->GetScanPending() + << " lread:" << node->info_.low_read_cell(); + } + + LOG(INFO) << "[lb] " << tablets.size() << " tablet:"; + for (const auto& tablet : tablets) { + LOG(INFO) << "path:" << tablet->GetPath() + << " status:" << StatusCodeToString(tablet->GetStatus()) + << " server:" << tablet->GetServerAddr() << " table:" << tablet->GetTableName() + << " size:" << tablet->GetDataSize() << " flash size:" << tablet->GetDataSizeOnFlash() + << " lread:" << tablet->GetLRead() << " last_move_time_us:" << tablet->LastMoveTime(); + } + + LOG(INFO) << "[lb] DebugCollect end -----"; + LOG(INFO) << ""; +} - LOG(INFO) << "[lb] " << nodes.size() << " node:"; - for (const auto& node : nodes) { - LOG(INFO) << "addr:" + node->GetAddr() - << " state:" << tera::master::NodeStateToString(node->GetState()) - << " size:" << node->GetSize() << "B" - << " r_pending:" << node->GetReadPending() - << " w_pending:" << node->GetWritePending() - << " s_pending:" << node->GetScanPending(); - } +void LBImpl::DebugLBNode(const std::vector>& lb_nodes) { + LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugLBNode begin -----"; + LOG(INFO) << "[lb] " << lb_nodes.size() << " lb_nodes:"; - LOG(INFO) << "[lb] " << tablets.size() << " tablet:"; - for (const auto& tablet : tablets) { - LOG(INFO) << "path:" + tablet->GetPath() - << " status:" << StatusCodeToString(tablet->GetStatus()) - << " server:" << tablet->GetServerAddr() - << " table:" << tablet->GetTableName() - << " last_move_time_us:" << tablet->LastMoveTime(); + for (const auto& node : lb_nodes) { + LOG(INFO) << "[lb] " << node->tablet_node_ptr->GetAddr() << ":"; + for (const auto& lb_tablet : node->tablets) { + LOG(INFO) << "[lb] " << lb_tablet->tablet_ptr->GetPath(); } + } - LOG(INFO) << "[lb] DebugCollect end -----"; - LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugLBNode end -----"; + LOG(INFO) << ""; } -void LBImpl::DebugLBNode(const std::vector>& lb_nodes) { - LOG(INFO) << ""; - LOG(INFO) << "[lb] DebugLBNode begin -----"; - LOG(INFO) << "[lb] " << lb_nodes.size() << " lb_nodes:" ; - - for (const auto& node : lb_nodes) { - LOG(INFO) << "[lb] " << node->tablet_node_ptr->GetAddr() << ":"; - for (const auto& lb_tablet : node->tablets) { - LOG(INFO) << "[lb] " << lb_tablet->tablet_ptr->GetPath(); - } +void LBImpl::DebugLBNodeByTable( + const std::map>>& nodes_by_table) { + LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugLBNodeByTable begin -----"; + + for (const auto& table : nodes_by_table) { + LOG(INFO) << "[lb] table " << table.first; + for (const auto& lb_node : table.second) { + LOG(INFO) << "[lb] node " << lb_node->tablet_node_ptr->GetAddr(); + for (const auto& tablet : lb_node->tablets) { + LOG(INFO) << "[lb] tablet " << tablet->tablet_ptr->GetPath(); + } } + } - LOG(INFO) << "[lb] DebugLBNode end -----"; - LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugLBNodeByTable end -----"; + LOG(INFO) << ""; } void LBImpl::DebugPlan(const std::vector& plans) { - VLOG(5) << ""; - VLOG(5) << "[lb] DebugPlan begin ----"; - VLOG(5) << plans.size() << " plans:"; + VLOG(5) << ""; + VLOG(5) << "[lb] DebugPlan begin ----"; + VLOG(5) << plans.size() << " plans:"; - for (const auto& plan : plans) { - VLOG(5) << "[lb] " + plan.ToString(); - } + for (const auto& plan : plans) { + VLOG(5) << "[lb] " + plan.ToString(); + } - VLOG(5) << "[lb] DebugPlan end ----"; - VLOG(5) << ""; + VLOG(5) << "[lb] DebugPlan end ----"; + VLOG(5) << ""; } void LBImpl::ExecutePlan(const std::vector& plans) { - std::shared_ptr client_impl((static_cast(sdk_client_.get()))->GetClientImpl()); - for (const auto& plan : plans) { - std::string tablet_path = plan.TabletPath(); - std::string dest_addr = plan.DestAddr(); - - std::vector arg_list; - arg_list.emplace_back("move"); - arg_list.emplace_back(tablet_path); - arg_list.emplace_back(dest_addr); - - ErrorCode err; - if (!client_impl->CmdCtrl("tablet", arg_list, nullptr, nullptr, &err)) { - LOG(ERROR) << "[lb] fail to execute plan:" << plan.ToString() << err.ToString(); - } else { - VLOG(5) << "[lb] execute plan success:" << plan.ToString(); - } + if (lb_debug_mode_) { + DebugPlan(plans); + } + + if (IsSafemode()) { + VLOG(5) << "[lb] skip execute plan in safe mode"; + return; + } + + bool master_safe_mode = true; + bool get_success = GetMasterSafemode(&master_safe_mode); + if (!get_success) { + VLOG(5) << "[lb] skip execute plan due to fail to get master safe mode"; + return; + } else { + if (master_safe_mode) { + VLOG(5) << "[lb] skip execute plan due to master is in safe mode"; + return; + } else { + // will execute plan } -} + } -bool LBImpl::IsSafemode() const { - MutexLock lock(&mutex_); - return safemode_; -} + std::shared_ptr client_impl( + (static_cast(sdk_client_.get()))->GetClientImpl()); + for (const auto& plan : plans) { + std::string tablet_path = plan.TabletPath(); + std::string dest_addr = plan.DestAddr(); -bool LBImpl::SetSafemode(bool value) { - MutexLock lock(&mutex_); - safemode_ = value; + std::vector arg_list; + arg_list.emplace_back("move"); + arg_list.emplace_back(tablet_path); + arg_list.emplace_back(dest_addr); - if (value) { - LOG(INFO) << "[lb] LoadBanlacer enter safemode"; + ErrorCode err; + if (!client_impl->CmdCtrl("tablet", arg_list, nullptr, nullptr, &err)) { + LOG(ERROR) << "[lb] fail to execute plan:" << plan.ToString() << err.ToString(); } else { - LOG(INFO) << "[lb] LoadBanlacer leave safemode"; + VLOG(5) << "[lb] execute plan success:" << plan.ToString(); } + } +} - return true; +bool LBImpl::IsSafemode() const { + MutexLock lock(&mutex_); + return safemode_; } -bool LBImpl::GetMasterSafemode(bool* safe_mode) { - if (safe_mode == nullptr) { - return false; - } +bool LBImpl::SetSafemode(bool value) { + MutexLock lock(&mutex_); + safemode_ = value; - std::string op = "get"; - std::vector arg_list; - arg_list.push_back(op); + if (value) { + LOG(INFO) << "[lb] LoadBanlacer enter safemode"; + } else { + LOG(INFO) << "[lb] LoadBanlacer leave safemode"; + } - std::shared_ptr client_impl((static_cast(sdk_client_.get()))->GetClientImpl()); - ErrorCode err; - if (!client_impl->CmdCtrl("safemode", arg_list, safe_mode, NULL, &err)) { - LOG(ERROR) << "[lb] fail to " << op << " master safemode" << err.ToString(); - return false; - } + return true; +} - VLOG(20) << "[lb] master safemode: " << *safe_mode; - return true; +bool LBImpl::GetMasterSafemode(bool* safe_mode) { + if (safe_mode == nullptr) { + return false; + } + + std::string op = "get"; + std::vector arg_list; + arg_list.push_back(op); + + std::shared_ptr client_impl( + (static_cast(sdk_client_.get()))->GetClientImpl()); + ErrorCode err; + if (!client_impl->CmdCtrl("safemode", arg_list, safe_mode, NULL, &err)) { + LOG(ERROR) << "[lb] fail to " << op << " master safemode" << err.ToString(); + return false; + } + + VLOG(20) << "[lb] master safemode: " << *safe_mode; + return true; } std::string LBImpl::GetMetaNodeAddr() const { - MutexLock lock(&mutex_); - return meta_node_addr_; + MutexLock lock(&mutex_); + return meta_node_addr_; } bool LBImpl::SetMetaNodeAddr(const std::string& addr) { - MutexLock lock(&mutex_); - meta_node_addr_ = addr; - return true; + MutexLock lock(&mutex_); + meta_node_addr_ = addr; + return true; } -void LBImpl::CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response, +void LBImpl::CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response, google::protobuf::Closure* done) { - std::string cmd_line; - for (int32_t i = 0; i < request->arg_list_size(); i++) { - cmd_line += request->arg_list(i); - if (i != request->arg_list_size() - 1) { - cmd_line += " "; - } + std::string cmd_line; + for (int32_t i = 0; i < request->arg_list_size(); i++) { + cmd_line += request->arg_list(i); + if (i != request->arg_list_size() - 1) { + cmd_line += " "; } - LOG(INFO) << "[lb] receive cmd: " << request->command() << " " << cmd_line; + } + LOG(INFO) << "[lb] receive cmd: " << request->command() << " " << cmd_line; - response->set_sequence_id(request->sequence_id()); + response->set_sequence_id(request->sequence_id()); - if (request->command() == "safemode") { - SafeModeCmdCtrl(request, response); - } else { - response->set_status(kInvalidArgument); - } + if (request->command() == "safemode") { + SafeModeCmdCtrl(request, response); + } else { + response->set_status(kInvalidArgument); + } - done->Run(); - return; + done->Run(); + return; } -void LBImpl::SafeModeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - if (request->arg_list_size() != 1) { - response->set_status(kInvalidArgument); - return; - } - - if (request->arg_list(0) == "enter") { - SetSafemode(true); - response->set_status(kLoadBalancerOk); - } else if (request->arg_list(0) == "leave") { - SetSafemode(false); - response->set_status(kLoadBalancerOk); - } else if (request->arg_list(0) == "get") { - response->set_bool_result(IsSafemode()); - response->set_status(kLoadBalancerOk); - } else { - response->set_status(kInvalidArgument); - } +void LBImpl::SafeModeCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response) { + if (request->arg_list_size() != 1) { + response->set_status(kInvalidArgument); + return; + } + + if (request->arg_list(0) == "enter") { + SetSafemode(true); + response->set_status(kLoadBalancerOk); + } else if (request->arg_list(0) == "leave") { + SetSafemode(false); + response->set_status(kLoadBalancerOk); + } else if (request->arg_list(0) == "get") { + response->set_bool_result(IsSafemode()); + response->set_status(kLoadBalancerOk); + } else { + response->set_status(kInvalidArgument); + } } -} // namespace load_balancer -} // namespace tera - +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/lb_impl.h b/src/load_balancer/lb_impl.h index 33aa28905..6c8666d5f 100644 --- a/src/load_balancer/lb_impl.h +++ b/src/load_balancer/lb_impl.h @@ -12,7 +12,9 @@ #include "common/mutex.h" #include "common/thread_pool.h" +#include "load_balancer/balancer.h" #include "load_balancer/lb_node.h" +#include "load_balancer/options.h" #include "load_balancer/plan.h" #include "master/tablet_manager.h" #include "master/tabletnode_manager.h" @@ -23,70 +25,76 @@ namespace tera { namespace load_balancer { class LBImpl { -public: - LBImpl(); - virtual ~LBImpl(); - - bool Init(); - - void CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response, - google::protobuf::Closure* done); - -private: - void ScheduleLoadBalance(); - void DoLoadBalance(); - - bool CreateLBInput(const std::vector& tables, - const std::vector& nodes, - const std::vector& tablets, - std::vector>* lb_nodes); - - bool Collect(std::vector* nodes, - std::vector* tables, - std::vector* tablets); - - bool CollectNodes(std::vector* nodes); - bool NodeInfoToNode(const TabletNodeInfo& info, - tera::master::TabletNodePtr node); - tera::master::NodeState StringToNodeState(const std::string& str); - - bool CollectTablets(std::vector* tables, - std::vector* tablets); - - void ExecutePlan(const std::vector& plans); - - bool IsSafemode() const; - bool SetSafemode(bool value); - - bool GetMasterSafemode(bool* safe_mode); - - std::string GetMetaNodeAddr() const; - bool SetMetaNodeAddr(const std::string& addr); - - void DebugCollect(const std::vector& nodes, - const std::vector& tables, - const std::vector& tablets); - void DebugLBNode(const std::vector>& lb_nodes); - void DebugPlan(const std::vector& plans); - - void SafeModeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - -private: - mutable Mutex mutex_; - - std::unique_ptr thread_pool_; - std::unique_ptr sdk_client_; - - bool safemode_; - uint64_t round_; - std::string meta_node_addr_; - - bool lb_debug_mode_; + public: + LBImpl(); + virtual ~LBImpl(); + + bool Init(); + + void CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response, + google::protobuf::Closure* done); + + private: + void ScheduleMetaBalance(); + void ScheduleUnityBalance(); + void DoMetaBalance(); + void DoUnityBalance(); + bool BlanceClusterByTable( + const std::shared_ptr& balancer, + std::map>>& nodes_by_table, + std::vector* plans); + + void InitOptions(); + bool CreateLBInput(const std::vector& tables, + const std::vector& nodes, + const std::vector& tablets, + std::vector>* lb_nodes); + bool CreateLBInputByTable( + const std::vector& tables, + const std::vector& nodes, + const std::vector& tablets, + std::map>>* nodes_by_table); + bool Collect(std::vector* nodes, + std::vector* tables, + std::vector* tablets); + bool CollectNodes(std::vector* nodes); + bool NodeInfoToNode(const TabletNodeInfo& info, tera::master::TabletNodePtr node); + tera::master::NodeState StringToNodeState(const std::string& str); + bool CollectTablets(std::vector* tables, + std::vector* tablets); + + bool IsSafemode() const; + bool SetSafemode(bool value); + void SafeModeCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + + bool GetMasterSafemode(bool* safe_mode); + void ExecutePlan(const std::vector& plans); + + std::string GetMetaNodeAddr() const; + bool SetMetaNodeAddr(const std::string& addr); + + void DebugCollect(const std::vector& nodes, + const std::vector& tables, + const std::vector& tablets); + void DebugLBNode(const std::vector>& lb_nodes); + void DebugLBNodeByTable( + const std::map>>& nodes_by_table); + void DebugPlan(const std::vector& plans); + + private: + mutable Mutex mutex_; + + std::unique_ptr thread_pool_; + std::unique_ptr sdk_client_; + LBOptions lb_options_; + + bool safemode_; + std::string meta_node_addr_; + + bool lb_debug_mode_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_LB_IMPL_H_ +#endif // TERA_LOAD_BALANCER_LB_IMPL_H_ diff --git a/src/load_balancer/lb_node.h b/src/load_balancer/lb_node.h index b3b4430e2..8319091b1 100644 --- a/src/load_balancer/lb_node.h +++ b/src/load_balancer/lb_node.h @@ -16,15 +16,15 @@ namespace tera { namespace load_balancer { struct LBTablet { - tera::master::TabletPtr tablet_ptr; + tera::master::TabletPtr tablet_ptr; }; struct LBTabletNode { - tera::master::TabletNodePtr tablet_node_ptr; - std::vector> tablets; + tera::master::TabletNodePtr tablet_node_ptr; + std::vector> tablets; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_LB_NODE_H_ +#endif // TERA_LOAD_BALANCER_LB_NODE_H_ diff --git a/src/load_balancer/lb_service_impl.cc b/src/load_balancer/lb_service_impl.cc index e67759c1c..700e95ede 100644 --- a/src/load_balancer/lb_service_impl.cc +++ b/src/load_balancer/lb_service_impl.cc @@ -17,33 +17,27 @@ DECLARE_int32(tera_lb_server_thread_num); namespace tera { namespace load_balancer { -LBServiceImpl::LBServiceImpl(const std::shared_ptr& lb_impl) : - lb_impl_(lb_impl), - thread_pool_(new ThreadPool(FLAGS_tera_lb_server_thread_num)) { -} +LBServiceImpl::LBServiceImpl(const std::shared_ptr& lb_impl) + : lb_impl_(lb_impl), thread_pool_(new ThreadPool(FLAGS_tera_lb_server_thread_num)) {} -LBServiceImpl::~LBServiceImpl() { -} +LBServiceImpl::~LBServiceImpl() {} void LBServiceImpl::CmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, + const CmdCtrlRequest* request, CmdCtrlResponse* response, google::protobuf::Closure* done) { - VLOG(20) << "accept RPC (CmdCtrl) from: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task task = - std::bind(&LBServiceImpl::DoCmdCtrl, this, controller, request, response, done); - thread_pool_->AddTask(task); + VLOG(20) << "accept RPC (CmdCtrl) from: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task task = + std::bind(&LBServiceImpl::DoCmdCtrl, this, controller, request, response, done); + thread_pool_->AddTask(task); } void LBServiceImpl::DoCmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, + const CmdCtrlRequest* request, CmdCtrlResponse* response, google::protobuf::Closure* done) { - VLOG(20) << "run RPC (CmdCtrl)"; - lb_impl_->CmdCtrl(request, response, done); - VLOG(20) << "finish RPC (CmdCtrl)"; + VLOG(20) << "run RPC (CmdCtrl)"; + lb_impl_->CmdCtrl(request, response, done); + VLOG(20) << "finish RPC (CmdCtrl)"; } -} // namespace load_balancer -} // namespace tera - +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/lb_service_impl.h b/src/load_balancer/lb_service_impl.h index f0754bb6e..a5ed40bf1 100644 --- a/src/load_balancer/lb_service_impl.h +++ b/src/load_balancer/lb_service_impl.h @@ -15,28 +15,24 @@ namespace load_balancer { class LBImpl; -class LBServiceImpl: public LoadBalancerService { -public: - explicit LBServiceImpl(const std::shared_ptr& lb_impl); - virtual ~LBServiceImpl(); - - void CmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, - google::protobuf::Closure* done); - -private: - void DoCmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, - google::protobuf::Closure* done); - -private: - std::shared_ptr lb_impl_; - std::unique_ptr thread_pool_; +class LBServiceImpl : public LoadBalancerService { + public: + explicit LBServiceImpl(const std::shared_ptr& lb_impl); + virtual ~LBServiceImpl(); + + void CmdCtrl(google::protobuf::RpcController* controller, const CmdCtrlRequest* request, + CmdCtrlResponse* response, google::protobuf::Closure* done); + + private: + void DoCmdCtrl(google::protobuf::RpcController* controller, const CmdCtrlRequest* request, + CmdCtrlResponse* response, google::protobuf::Closure* done); + + private: + std::shared_ptr lb_impl_; + std::unique_ptr thread_pool_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_LB_SERVICE_IMPL_H_ +#endif // TERA_LOAD_BALANCER_LB_SERVICE_IMPL_H_ diff --git a/src/load_balancer/meta_balancer.cc b/src/load_balancer/meta_balancer.cc new file mode 100644 index 000000000..8738a6f75 --- /dev/null +++ b/src/load_balancer/meta_balancer.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/meta_balancer.h" + +#include +#include + +#include "glog/logging.h" +#include "load_balancer/random.h" +#include "common/timer.h" + +namespace tera { +namespace load_balancer { + +using tera::master::TabletNodePtr; +using tera::master::TabletPtr; + +static uint32_t s_max_compute_step = 100; + +MetaBalancer::MetaBalancer(const LBOptions& options) : lb_options_(options) { + action_generators_.emplace_back(new MetaIsolateActionGenerator()); +} + +MetaBalancer::~MetaBalancer() {} + +bool MetaBalancer::BalanceCluster(const std::vector>& lb_nodes, + std::vector* plans) { + return BalanceCluster("", lb_nodes, plans); +} + +bool MetaBalancer::BalanceCluster(const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) { + if (lb_nodes.size() <= 1 || plans == nullptr) { + return false; + } + + VLOG(5) << "[lb] balance meta node begin"; + + std::shared_ptr cluster = + std::make_shared(lb_nodes, lb_options_, false /*skip_meta_node*/); + + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + + if (!NeedBalance(cluster)) { + return true; + } + + uint32_t step = 0; + uint32_t valid_action_count = 0; + while (true) { + ++step; + if (step > s_max_compute_step) { + break; + } + if (cluster->tablets_per_node_[cluster->meta_table_node_index_].size() == 1) { + break; + } + + std::shared_ptr action(NextAction(cluster)); + VLOG(20) << "[lb] action:" << action->ToString(); + + if (cluster->ValidAction(action)) { + ++valid_action_count; + } else { + continue; + } + + cluster->DoAction(action); + + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + + if (valid_action_count >= lb_options_.meta_balance_max_move_num) { + break; + } + } + + CreatePlans(cluster, plans); + + return true; +} + +bool MetaBalancer::NeedBalance(const std::shared_ptr& cluster) { + if (!lb_options_.meta_table_isolate_enabled) { + VLOG(10) << "[lb] meta isolate is closed, no need to balance"; + return false; + } + + if (cluster->tablet_node_num_ == 0) { + LOG(INFO) << "[lb] empty cluster , no need to balance"; + return false; + } + + if (cluster->tablets_per_node_[cluster->meta_table_node_index_].size() == 1) { + LOG(INFO) << "[lb] meta node is isolated, no need to balance"; + return false; + } + + uint32_t heavy_pending_node_num = cluster->HeavyPendingNodeNum(); + uint32_t bad_node_num = cluster->abnormal_nodes_index_.size() + heavy_pending_node_num; + double bad_node_percent = + static_cast(bad_node_num) / static_cast(cluster->tablet_node_num_); + if (bad_node_percent >= lb_options_.bad_node_safemode_percent) { + LOG(INFO) << "[lb] bad node num: " << bad_node_num + << ", total node num: " << cluster->tablet_node_num_ + << ", bad node safemode percent: " << lb_options_.bad_node_safemode_percent + << ", too many bad nodes, skip balance"; + return false; + } + + return true; +} + +Action* MetaBalancer::NextAction(const std::shared_ptr& cluster) { + uint32_t rand = Random::Rand(0, action_generators_.size()); + return action_generators_[rand]->Generate(cluster); +} + +void MetaBalancer::CreatePlans(const std::shared_ptr& cluster, std::vector* plans) { + for (uint32_t i = 0; i < cluster->tablet_index_to_node_index_.size(); ++i) { + uint32_t initial_node_index = cluster->initial_tablet_index_to_node_index_[i]; + uint32_t new_node_index = cluster->tablet_index_to_node_index_[i]; + + if (initial_node_index != new_node_index) { + // tablet has been moved to another tablet node + Plan plan(cluster->tablets_[i]->tablet_ptr, + cluster->nodes_[initial_node_index]->tablet_node_ptr, + cluster->nodes_[new_node_index]->tablet_node_ptr); + plans->emplace_back(plan); + } + } +} + +std::string MetaBalancer::GetName() { return "MetaBalancer"; } + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/meta_balancer.h b/src/load_balancer/meta_balancer.h new file mode 100644 index 000000000..f05b124f5 --- /dev/null +++ b/src/load_balancer/meta_balancer.h @@ -0,0 +1,51 @@ +#pragma once + +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "load_balancer/action_generators.h" +#include "load_balancer/actions.h" +#include "load_balancer/balancer.h" +#include "load_balancer/cluster.h" +#include "load_balancer/cost_functions.h" + +namespace tera { +namespace load_balancer { + +class MetaBalancer : public Balancer { + public: + explicit MetaBalancer(const LBOptions& options); + virtual ~MetaBalancer(); + + virtual bool BalanceCluster(const std::vector>& lb_nodes, + std::vector* plans) override; + + // if table_name is empty, balance whole culster, + // otherwhise balance the specified table of table_name + virtual bool BalanceCluster(const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) override; + + bool NeedBalance(const std::shared_ptr& cluster); + + std::string GetName() override; + + protected: + Action* NextAction(const std::shared_ptr& cluster); + + // diff the initial cluster state with the current cluster state, then create + // plans + void CreatePlans(const std::shared_ptr& cluster, std::vector* plans); + + private: + std::vector> action_generators_; + + LBOptions lb_options_; +}; + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/options.h b/src/load_balancer/options.h index 7202821c9..9eed7aa1d 100644 --- a/src/load_balancer/options.h +++ b/src/load_balancer/options.h @@ -11,98 +11,86 @@ namespace tera { namespace load_balancer { struct LBOptions { - // calculate - uint64_t max_compute_steps; - uint32_t max_compute_steps_per_tablet; - uint64_t max_compute_time_ms; - double min_cost_need_balance; - double bad_node_safemode_percent; - - // MoveCountCostFunction - double move_count_cost_weight; - uint32_t tablet_max_move_num; - - // MoveFrequencyCostFunction - double move_frequency_cost_weight; - uint32_t tablet_move_too_frequently_threshold_s; - - // AbnormalNodeCostFunction - double abnormal_node_cost_weight; - // if not ready tablets's ratio is hither than this value, - // the node in considered abnormal - double abnormal_node_ratio; - - // ReadPendingNodeCostFunction - double read_pending_node_cost_weight; - - // WritePendingNodeCostFunction - double write_pending_node_cost_weight; - - // ScanPendingNodeCostFunction - double scan_pending_node_cost_weight; - - // CountCostFunction - double tablet_count_cost_weight; - - // SizeCostFunction - double size_cost_weight; - - // LoadCostFunction - double read_load_cost_weight; - double write_load_cost_weight; - double scan_load_cost_weight; - - double read_pending_factor; - double write_pending_factor; - double scan_pending_factor; - - // meta table - bool meta_table_isolate_enabled; - std::string meta_table_name; - std::string meta_table_node_addr; - - // debug - bool debug_mode_enabled; - - LBOptions() : - max_compute_steps(1000000), - max_compute_steps_per_tablet(1000), - max_compute_time_ms(30 * 1000), - min_cost_need_balance(0.05), - bad_node_safemode_percent(0.5), - - move_count_cost_weight(10), - tablet_max_move_num(1), - - move_frequency_cost_weight(10), - tablet_move_too_frequently_threshold_s(600), - - abnormal_node_cost_weight(10), - abnormal_node_ratio(0.5), - - read_pending_node_cost_weight(10), - write_pending_node_cost_weight(10), - scan_pending_node_cost_weight(10), - - tablet_count_cost_weight(100), - size_cost_weight(100), - read_load_cost_weight(20), - write_load_cost_weight(20), - scan_load_cost_weight(20), - - read_pending_factor(100), - write_pending_factor(100), - scan_pending_factor(100), - - meta_table_isolate_enabled(true), - meta_table_name("meta_table"), - meta_table_node_addr(""), - - debug_mode_enabled(false) { - } + uint64_t max_compute_steps; + uint32_t max_compute_steps_per_tablet; + uint64_t max_compute_time_ms; + double min_cost_need_balance; + double bad_node_safemode_percent; + + double move_count_cost_weight; + uint32_t meta_balance_max_move_num; + uint32_t tablet_max_move_num; + + uint32_t tablet_move_too_frequently_threshold_s; + + /* + * if not ready tablets's ratio is higher than this value, + * the node is considered as an abnormal node + */ + double abnormal_node_ratio; + + double tablet_count_cost_weight; + double size_cost_weight; + double flash_size_cost_weight; + + double read_load_cost_weight; + double write_load_cost_weight; + double scan_load_cost_weight; + double lread_cost_weight; + double heavy_read_pending_threshold; + double heavy_write_pending_threshold; + double heavy_scan_pending_threshold; + double heavy_lread_threshold; + + double read_pending_factor; + double write_pending_factor; + double scan_pending_factor; + + bool meta_table_isolate_enabled; + std::string meta_table_name; + std::string meta_table_node_addr; + + bool debug_mode_enabled; + + LBOptions() + : max_compute_steps(1000000), + max_compute_steps_per_tablet(1000), + max_compute_time_ms(30 * 1000), + min_cost_need_balance(0.02), + bad_node_safemode_percent(0.5), + + move_count_cost_weight(1), + meta_balance_max_move_num(1), + tablet_max_move_num(1), + + tablet_move_too_frequently_threshold_s(600), + abnormal_node_ratio(0.5), + + tablet_count_cost_weight(100), + size_cost_weight(200), + flash_size_cost_weight(0), + + read_load_cost_weight(10), + write_load_cost_weight(10), + scan_load_cost_weight(5), + lread_cost_weight(10), + heavy_read_pending_threshold(1000), + heavy_write_pending_threshold(1000), + heavy_scan_pending_threshold(1000), + heavy_lread_threshold(1000000), + + read_pending_factor(1), + write_pending_factor(1), + scan_pending_factor(1), + + meta_table_isolate_enabled(true), + meta_table_name("meta_table"), + meta_table_node_addr(""), + + debug_mode_enabled(false) {} }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_OPTIONS_H_ +#endif // TERA_LOAD_BALANCER_OPTIONS_H_ diff --git a/src/load_balancer/plan.h b/src/load_balancer/plan.h index 6e4ca41ae..f06e230f8 100644 --- a/src/load_balancer/plan.h +++ b/src/load_balancer/plan.h @@ -14,58 +14,57 @@ namespace tera { namespace load_balancer { class Plan { -public: - Plan() {} + public: + Plan() {} - Plan(const tera::master::TabletPtr& tablet, - const tera::master::TabletNodePtr& source, - const tera::master::TabletNodePtr& dest) { - tablet_ = tablet; - source_ = source; - dest_ = dest; - } + Plan(const tera::master::TabletPtr& tablet, const tera::master::TabletNodePtr& source, + const tera::master::TabletNodePtr& dest) { + tablet_ = tablet; + source_ = source; + dest_ = dest; + } - virtual ~Plan() {} + virtual ~Plan() {} - virtual std::string TabletPath() const { - if (tablet_) { - return tablet_->GetPath(); - } else { - return ""; - } + virtual std::string TabletPath() const { + if (tablet_) { + return tablet_->GetPath(); + } else { + return ""; } + } - virtual std::string SourceAddr() const { - if (source_) { - return source_->GetAddr(); - } else { - return ""; - } + virtual std::string SourceAddr() const { + if (source_) { + return source_->GetAddr(); + } else { + return ""; } + } - virtual std::string DestAddr() const { - if (dest_) { - return dest_->GetAddr(); - } else { - return ""; - } + virtual std::string DestAddr() const { + if (dest_) { + return dest_->GetAddr(); + } else { + return ""; } + } - virtual std::string ToString() const { - std::string str = "tablet:" + (tablet_ ? tablet_->GetPath() : "") - + " source:" + (source_ ? source_->GetAddr() : "") - + " dest:" + (dest_ ? dest_->GetAddr() : ""); + virtual std::string ToString() const { + std::string str = "tablet:" + (tablet_ ? tablet_->GetPath() : "") + " source:" + + (source_ ? source_->GetAddr() : "") + " dest:" + + (dest_ ? dest_->GetAddr() : ""); - return str; - } + return str; + } -private: - tera::master::TabletPtr tablet_; - tera::master::TabletNodePtr source_; - tera::master::TabletNodePtr dest_; + private: + tera::master::TabletPtr tablet_; + tera::master::TabletNodePtr source_; + tera::master::TabletNodePtr dest_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_PLAN_H_ +#endif // TERA_LOAD_BALANCER_PLAN_H_ diff --git a/src/load_balancer/random.h b/src/load_balancer/random.h index 46a43008f..5ce23f6a0 100644 --- a/src/load_balancer/random.h +++ b/src/load_balancer/random.h @@ -16,58 +16,57 @@ namespace tera { namespace load_balancer { class Random { -public: - // random from [a, b) - // a < b should be ensured - // can generate negative number - // avg time cost: 25us - static int RandStd(int a, int b) { - assert(a < b); + public: + // random from [a, b) + // a < b should be ensured + // can generate negative number + // avg time cost: 25us + static int RandStd(int a, int b) { + assert(a < b); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(a, b - 1); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(a, b - 1); - return dis(gen); - } + return dis(gen); + } - // random from [a, b) - // a < b should be ensured - // can not generate negative number - // avg time cost: 150ns - static uint32_t RandTime(uint32_t a, uint32_t b) { - assert(a < b); + // random from [a, b) + // a < b should be ensured + // can not generate negative number + // avg time cost: 150ns + static uint32_t RandTime(uint32_t a, uint32_t b) { + assert(a < b); - int64_t time_us = get_micros(); - return time_us % (b - a) + a; - } + int64_t time_us = get_micros(); + return time_us % (b - a) + a; + } - // random from [a, b) - // a < b should be ensured - // can not generate negative number - // avg time cost: 15ns - static uint32_t Rand(uint32_t a, uint32_t b) { - assert(a < b); + // random from [a, b) + // a < b should be ensured + // can not generate negative number + // avg time cost: 15ns + static uint32_t Rand(uint32_t a, uint32_t b) { + assert(a < b); - uint32_t rand = xorshift32(); - return rand % (b - a) + a; - } + uint32_t rand = xorshift32(); + return rand % (b - a) + a; + } -private: - /* The state word must be initialized to non-zero */ - static uint32_t xorshift32() { - /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ - static uint32_t state = time(NULL); - uint32_t x = state; - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; - state = x; - return x; - } + private: + /* The state word must be initialized to non-zero */ + static uint32_t xorshift32() { + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + static uint32_t state = time(NULL); + uint32_t x = state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + state = x; + return x; + } }; +} // namespace load_balancer +} // namespace tera -} // namespace load_balancer -} // namespace tera - -#endif // TERA_LOAD_BALANCER_RANDOM_H_ +#endif // TERA_LOAD_BALANCER_RANDOM_H_ diff --git a/src/load_balancer/test/action_generators_test.cc b/src/load_balancer/test/action_generators_test.cc index d9e1e36a5..34da94391 100644 --- a/src/load_balancer/test/action_generators_test.cc +++ b/src/load_balancer/test/action_generators_test.cc @@ -13,323 +13,271 @@ namespace tera { namespace load_balancer { -class RandomActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - random_action_generator_.reset(new RandomActionGenerator()); - - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } - - virtual void TearDown() { - } - -private: - std::shared_ptr random_action_generator_; - std::shared_ptr cluster_; +class ActionGeneratorTest : public ::testing::Test { + public: + virtual void SetUp() { + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); + } + + virtual void TearDown() {} + + private: + std::shared_ptr cluster_; }; -class TabletCountActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - tablet_count_action_generator_.reset(new TabletCountActionGenerator()); +class MetaIsolateActionGeneratorTest : public ::testing::Test { + public: + virtual void SetUp() { + meta_isolate_action_generator_.reset(new MetaIsolateActionGenerator()); - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); + } - virtual void TearDown() { - } + virtual void TearDown() {} -private: - std::shared_ptr tablet_count_action_generator_; - std::shared_ptr cluster_; + private: + std::shared_ptr meta_isolate_action_generator_; + std::shared_ptr cluster_; }; -class SizeActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - size_action_generator_.reset(new SizeActionGenerator()); - - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } +TEST_F(ActionGeneratorTest, PickRandomNodeTest) { + cluster_->tablet_node_num_ = 10; - virtual void TearDown() { - } + for (uint32_t i = 0; i < 100; ++i) { + uint32_t index = ActionGenerator::PickRandomNode(cluster_); + ASSERT_GE(index, 0); + ASSERT_LT(index, cluster_->tablet_node_num_); + } +} -private: - std::shared_ptr size_action_generator_; - std::shared_ptr cluster_; -}; +TEST_F(ActionGeneratorTest, PickRandomTabletFromSourceNodeTest1) { + std::function is_proper = [](uint32_t tablet_index) -> bool { return true; }; + ASSERT_EQ(ActionGenerator::PickRandomTabletFromSourceNode(cluster_, 0, is_proper), + kInvalidTabletIndex); +} -class ReadLoadActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - read_load_action_generator_.reset(new ReadLoadActionGenerator()); +TEST_F(ActionGeneratorTest, PickRandomTabletFromSourceNodeTest2) { + cluster_->tablets_per_node_[0].emplace_back(0); + std::function is_proper = [](uint32_t tablet_index) -> bool { return true; }; + ASSERT_EQ(ActionGenerator::PickRandomTabletFromSourceNode(cluster_, 0, is_proper), 0); +} - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } +TEST_F(ActionGeneratorTest, PickRandomTabletFromSourceNodeTest3) { + cluster_->tablets_per_node_[0].emplace_back(0); + std::function is_not_proper = [](uint32_t tablet_index) -> bool { return false; }; + ASSERT_EQ(ActionGenerator::PickRandomTabletFromSourceNode(cluster_, 0, is_not_proper), + kInvalidTabletIndex); +} - virtual void TearDown() { +TEST_F(ActionGeneratorTest, PickRandomTabletFromSourceNodeTest4) { + cluster_->tablets_per_node_[0].emplace_back(0); + cluster_->tablets_per_node_[0].emplace_back(1); + std::function tablet0_is_not_proper = [](uint32_t tablet_index) -> bool { + if (tablet_index == 0) { + return false; + } else { + return true; } + }; + ASSERT_EQ(ActionGenerator::PickRandomTabletFromSourceNode(cluster_, 0, tablet0_is_not_proper), 1); +} -private: - std::shared_ptr read_load_action_generator_; - std::shared_ptr cluster_; -}; - -class WriteLoadActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - write_load_action_generator_.reset(new WriteLoadActionGenerator()); +TEST_F(ActionGeneratorTest, PickRandomDestNodeTest1) { + cluster_->tablet_node_num_ = 0; + uint32_t source_node_index = 0; + uint32_t chosen_tablet_index = 0; + + std::function is_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return true; }; + uint32_t dest_node_index = ActionGenerator::PickRandomDestNode( + cluster_, source_node_index, chosen_tablet_index, is_proper_location); + ASSERT_EQ(dest_node_index, kInvalidNodeIndex); + + cluster_->tablet_node_num_ = 1; + dest_node_index = ActionGenerator::PickRandomDestNode(cluster_, source_node_index, + chosen_tablet_index, is_proper_location); + ASSERT_EQ(dest_node_index, kInvalidNodeIndex); +} - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } +TEST_F(ActionGeneratorTest, PickRandomDestNodeTest2) { + cluster_->tablet_node_num_ = 2; + uint32_t source_node_index = 0; + uint32_t chosen_tablet_index = 0; - virtual void TearDown() { - } + std::function is_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return true; }; + uint32_t dest_node_index = ActionGenerator::PickRandomDestNode( + cluster_, source_node_index, chosen_tablet_index, is_proper_location); + ASSERT_EQ(dest_node_index, 1); +} -private: - std::shared_ptr write_load_action_generator_; - std::shared_ptr cluster_; -}; +TEST_F(ActionGeneratorTest, PickRandomDestNodeTest3) { + cluster_->tablet_node_num_ = 2; + uint32_t source_node_index = 0; + uint32_t chosen_tablet_index = 0; -class ScanLoadActionGeneratorTest : public ::testing::Test { -public: - virtual void SetUp() { - scan_load_action_generator_.reset(new ScanLoadActionGenerator()); + std::function is_not_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return false; }; + uint32_t dest_node_index = ActionGenerator::PickRandomDestNode( + cluster_, source_node_index, chosen_tablet_index, is_not_proper_location); + ASSERT_EQ(dest_node_index, kInvalidNodeIndex); +} - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } +TEST_F(ActionGeneratorTest, PickLightestNodeTest1) { + std::vector sorted_node_index; + uint32_t chosen_tablet_index = 0; - virtual void TearDown() { - } + std::function is_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return true; }; + ASSERT_EQ(kInvalidNodeIndex, + ActionGenerator::PickLightestNode(cluster_, sorted_node_index, chosen_tablet_index, + is_proper_location)); +} -private: - std::shared_ptr scan_load_action_generator_; - std::shared_ptr cluster_; -}; +TEST_F(ActionGeneratorTest, PickLightestNodeTest2) { + uint32_t heavier_node_index = 0; + uint32_t lighter_node_index = 1; + uint32_t chosen_tablet_index = 0; -TEST_F(RandomActionGeneratorTest, PickNodeTest) { - cluster_->tablet_node_num_ = 10; + std::vector sorted_node_index; + sorted_node_index.emplace_back(lighter_node_index); + sorted_node_index.emplace_back(heavier_node_index); - uint32_t index = random_action_generator_->PickRandomNode(cluster_); - ASSERT_GE(index, 0); - ASSERT_LT(index, cluster_->tablet_node_num_); + std::function is_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return true; }; + ASSERT_EQ(lighter_node_index, + ActionGenerator::PickLightestNode(cluster_, sorted_node_index, chosen_tablet_index, + is_proper_location)); +} - uint32_t other_index = random_action_generator_->PickOtherRandomNode(cluster_, index); - ASSERT_GE(other_index, 0); - ASSERT_LT(other_index, cluster_->tablet_node_num_); - ASSERT_NE(index, other_index); +TEST_F(ActionGeneratorTest, PickLightestNodeTest3) { + uint32_t heavier_node_index = 0; + uint32_t lighter_node_index = 1; + uint32_t chosen_tablet_index = 0; + + std::vector sorted_node_index; + sorted_node_index.emplace_back(lighter_node_index); + sorted_node_index.emplace_back(heavier_node_index); + + std::function lighter_is_not_proper_location = + [lighter_node_index](uint32_t tablet_index, uint32_t node_index) -> bool { + if (node_index == lighter_node_index) { + return false; + } else { + return true; + } + }; + ASSERT_EQ(heavier_node_index, + ActionGenerator::PickLightestNode(cluster_, sorted_node_index, chosen_tablet_index, + lighter_is_not_proper_location)); } -TEST_F(RandomActionGeneratorTest, PickRandomTabletOfNodeTest) { - cluster_->tablet_node_num_ = 1; - ASSERT_EQ(random_action_generator_->PickRandomTabletOfNode(cluster_, 0), std::numeric_limits::max()); +TEST_F(ActionGeneratorTest, PickLightestNodeTest4) { + uint32_t heavier_node_index = 0; + uint32_t lighter_node_index = 1; + uint32_t chosen_tablet_index = 0; - cluster_->tablets_per_node_[0].emplace_back(0); - ASSERT_EQ(random_action_generator_->PickRandomTabletOfNode(cluster_, 0), 0); + std::vector sorted_node_index; + sorted_node_index.emplace_back(lighter_node_index); + sorted_node_index.emplace_back(heavier_node_index); + + std::function is_not_proper_location = + [](uint32_t tablet_index, uint32_t node_index) -> bool { return false; }; + ASSERT_EQ(kInvalidNodeIndex, + ActionGenerator::PickLightestNode(cluster_, sorted_node_index, chosen_tablet_index, + is_not_proper_location)); } -TEST_F(RandomActionGeneratorTest, GenerateTest) { - cluster_->tablet_node_num_ = 1; - std::shared_ptr action(random_action_generator_->Generate(cluster_)); - ASSERT_EQ(Action::Type::EMPTY, action->GetType()); +TEST_F(ActionGeneratorTest, PickHeaviestNodeTest1) { + std::vector sorted_node_index; + ASSERT_EQ(kInvalidNodeIndex, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index)); + + sorted_node_index.emplace_back(0); + ASSERT_EQ(0, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index)); - cluster_->tablet_node_num_ = 2; - cluster_->tablets_per_node_[0].emplace_back(0); - cluster_->tablets_per_node_[1].emplace_back(1); - std::shared_ptr action_0(random_action_generator_->Generate(cluster_)); - ASSERT_EQ(Action::Type::MOVE, action_0->GetType()); + sorted_node_index.emplace_back(1); + ASSERT_EQ(1, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index)); } -TEST_F(TabletCountActionGeneratorTest, GenerateTest) { - uint32_t more_tablets_node_index = 0; - uint32_t less_tablets_node_index = 1; - cluster_->tablets_per_node_[more_tablets_node_index].emplace_back(0); - cluster_->tablets_per_node_[more_tablets_node_index].emplace_back(1); - cluster_->tablets_per_node_[less_tablets_node_index].emplace_back(2); +TEST_F(ActionGeneratorTest, PickHeaviestNodeTest2) { + std::function is_proper = [](uint32_t node_index) -> bool { return true; }; - cluster_->tablet_node_num_ = 2; + std::vector sorted_node_index; + ASSERT_EQ(kInvalidNodeIndex, + ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index, is_proper)); - cluster_->node_index_sorted_by_tablet_count_.emplace_back(more_tablets_node_index); - cluster_->node_index_sorted_by_tablet_count_.emplace_back(less_tablets_node_index); + sorted_node_index.emplace_back(0); + ASSERT_EQ(0, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index, is_proper)); - cluster_->SortNodesByTabletCount(); - ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickMostTabletsNode(cluster_)); - ASSERT_EQ(less_tablets_node_index, tablet_count_action_generator_->PickLeastTabletsNode(cluster_)); + sorted_node_index.emplace_back(1); + ASSERT_EQ(1, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index, is_proper)); - std::shared_ptr action(tablet_count_action_generator_->Generate(cluster_)); - ASSERT_EQ(Action::Type::MOVE, action->GetType()); - MoveAction* move_action = dynamic_cast(action.get()); - ASSERT_EQ(more_tablets_node_index, move_action->source_node_index_); - ASSERT_EQ(less_tablets_node_index, move_action->dest_node_index_); + std::function node1_is_not_proper = [](uint32_t node_index) -> bool { + if (node_index == 1) { + return false; + } else { + return true; + } + }; + ASSERT_EQ(0, ActionGenerator::PickHeaviestNode(cluster_, sorted_node_index, node1_is_not_proper)); +} - cluster_->meta_table_node_index_ = less_tablets_node_index; - ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickMostTabletsNode(cluster_)); - ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickLeastTabletsNode(cluster_)); +TEST_F(ActionGeneratorTest, PickHeaviestTabletFromSourceNodeTest1) { + std::vector sorted_tablet_index; + std::function is_proper = [](uint32_t tablet_index) -> bool { return true; }; + ASSERT_EQ( + ActionGenerator::PickHeaviestTabletFromSourceNode(cluster_, sorted_tablet_index, is_proper), + kInvalidTabletIndex); } -TEST_F(SizeActionGeneratorTest, GenerateTest) { - uint32_t larger_size_node_index = 0; - uint32_t smaller_size_node_index = 1; - cluster_->size_per_node_[larger_size_node_index] = 20; - cluster_->size_per_node_[smaller_size_node_index] = 10; - - uint32_t tablet_index_on_larger_size_node = 0; - uint32_t tablet_index_on_smaller_size_node = 1; - cluster_->tablet_node_num_ = 2; - cluster_->tablets_per_node_[larger_size_node_index].emplace_back(tablet_index_on_larger_size_node); - cluster_->tablets_per_node_[smaller_size_node_index].emplace_back(tablet_index_on_smaller_size_node); - - cluster_->node_index_sorted_by_size_.emplace_back(larger_size_node_index); - cluster_->node_index_sorted_by_size_.emplace_back(smaller_size_node_index); - - cluster_->SortNodesBySize(); - ASSERT_EQ(larger_size_node_index, size_action_generator_->PickLargestSizeNode(cluster_)); - ASSERT_EQ(smaller_size_node_index, size_action_generator_->PickSmallestSizeNode(cluster_)); - - std::shared_ptr action(size_action_generator_->Generate(cluster_)); - ASSERT_EQ(Action::Type::MOVE, action->GetType()); - MoveAction* move_action = dynamic_cast(action.get()); - ASSERT_EQ(tablet_index_on_larger_size_node, move_action->tablet_index_); - ASSERT_EQ(larger_size_node_index, move_action->source_node_index_); - ASSERT_EQ(smaller_size_node_index, move_action->dest_node_index_); - - cluster_->meta_table_node_index_ = smaller_size_node_index; - ASSERT_EQ(larger_size_node_index, size_action_generator_->PickLargestSizeNode(cluster_)); - ASSERT_EQ(larger_size_node_index, size_action_generator_->PickSmallestSizeNode(cluster_)); +TEST_F(ActionGeneratorTest, PickHeaviestTabletFromSourceNodeTest2) { + std::vector sorted_tablet_index; + sorted_tablet_index.emplace_back(0); + + std::function is_proper = [](uint32_t tablet_index) -> bool { return true; }; + ASSERT_EQ( + ActionGenerator::PickHeaviestTabletFromSourceNode(cluster_, sorted_tablet_index, is_proper), + 0); } -TEST_F(ReadLoadActionGeneratorTest, GenerateTest) { - uint32_t more_read_node_index = 0; - uint32_t less_read_node_index = 1; - cluster_->read_load_per_node_[more_read_node_index] = 20; - cluster_->read_load_per_node_[less_read_node_index] = 10; - - uint32_t tablet_index_on_more_read_node = 0; - uint32_t tablet_index_on_less_read_node = 1; - cluster_->tablet_node_num_ = 2; - cluster_->tablets_per_node_[more_read_node_index].emplace_back(tablet_index_on_more_read_node); - cluster_->tablets_per_node_[less_read_node_index].emplace_back(tablet_index_on_less_read_node); - - cluster_->node_index_sorted_by_read_load_.emplace_back(more_read_node_index); - cluster_->node_index_sorted_by_read_load_.emplace_back(less_read_node_index); - - cluster_->SortNodesByReadLoad(); - ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickMostReadNode(cluster_)); - ASSERT_EQ(less_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); - - cluster_->read_pending_nodes_index_.clear(); - ASSERT_EQ(kInvalidNodeIndex, read_load_action_generator_->PickMostReadNodeWithPending(cluster_)); - ASSERT_EQ(less_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); - - cluster_->read_pending_nodes_index_.clear(); - cluster_->read_pending_nodes_index_.insert(more_read_node_index); - ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickMostReadNodeWithPending(cluster_)); - ASSERT_EQ(less_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); - - cluster_->read_pending_nodes_index_.clear(); - cluster_->read_pending_nodes_index_.insert(less_read_node_index); - ASSERT_EQ(less_read_node_index, read_load_action_generator_->PickMostReadNodeWithPending(cluster_)); - ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); - cluster_->read_pending_nodes_index_.clear(); - - cluster_->meta_table_node_index_ = less_read_node_index; - ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickMostReadNode(cluster_)); - ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); +TEST_F(ActionGeneratorTest, PickHeaviestTabletFromSourceNodeTest3) { + std::vector sorted_tablet_index; + sorted_tablet_index.emplace_back(0); + + std::function is_not_proper = [](uint32_t tablet_index) -> bool { return false; }; + ASSERT_EQ(kInvalidNodeIndex, ActionGenerator::PickHeaviestTabletFromSourceNode( + cluster_, sorted_tablet_index, is_not_proper)); } -TEST_F(WriteLoadActionGeneratorTest, GenerateTest) { - uint32_t more_write_node_index = 0; - uint32_t less_write_node_index = 1; - cluster_->write_load_per_node_[more_write_node_index] = 20; - cluster_->write_load_per_node_[less_write_node_index] = 10; - - uint32_t tablet_index_on_more_write_node = 0; - uint32_t tablet_index_on_less_write_node = 1; - cluster_->tablet_node_num_ = 2; - cluster_->tablets_per_node_[more_write_node_index].emplace_back(tablet_index_on_more_write_node); - cluster_->tablets_per_node_[less_write_node_index].emplace_back(tablet_index_on_less_write_node); - - cluster_->node_index_sorted_by_write_load_.emplace_back(more_write_node_index); - cluster_->node_index_sorted_by_write_load_.emplace_back(less_write_node_index); - - cluster_->SortNodesByWriteLoad(); - ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickMostWriteNode(cluster_)); - ASSERT_EQ(less_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); - - cluster_->write_pending_nodes_index_.clear(); - ASSERT_EQ(kInvalidNodeIndex, write_load_action_generator_->PickMostWriteNodeWithPending(cluster_)); - ASSERT_EQ(less_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); - - cluster_->write_pending_nodes_index_.clear(); - cluster_->write_pending_nodes_index_.insert(more_write_node_index); - ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickMostWriteNodeWithPending(cluster_)); - ASSERT_EQ(less_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); - - cluster_->write_pending_nodes_index_.clear(); - cluster_->write_pending_nodes_index_.insert(less_write_node_index); - ASSERT_EQ(less_write_node_index, write_load_action_generator_->PickMostWriteNodeWithPending(cluster_)); - ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); - cluster_->write_pending_nodes_index_.clear(); - - cluster_->meta_table_node_index_ = less_write_node_index; - ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickMostWriteNode(cluster_)); - ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); +TEST_F(MetaIsolateActionGeneratorTest, PickRandomTabletOfMetaNodeTest1) { + ASSERT_EQ(meta_isolate_action_generator_->PickRandomTabletOfMetaNode(cluster_, 0), + kInvalidTabletIndex); + + cluster_->tablets_per_node_[0].emplace_back(0); + ASSERT_EQ(meta_isolate_action_generator_->PickRandomTabletOfMetaNode(cluster_, 0), + kInvalidTabletIndex); } -TEST_F(ScanLoadActionGeneratorTest, GenerateTest) { - uint32_t more_scan_node_index = 0; - uint32_t less_scan_node_index = 1; - cluster_->scan_load_per_node_[more_scan_node_index] = 20; - cluster_->scan_load_per_node_[less_scan_node_index] = 10; - - uint32_t tablet_index_on_more_scan_node = 0; - uint32_t tablet_index_on_less_scan_node = 1; - cluster_->tablet_node_num_ = 2; - cluster_->tablets_per_node_[more_scan_node_index].emplace_back(tablet_index_on_more_scan_node); - cluster_->tablets_per_node_[less_scan_node_index].emplace_back(tablet_index_on_less_scan_node); - - cluster_->node_index_sorted_by_scan_load_.emplace_back(more_scan_node_index); - cluster_->node_index_sorted_by_scan_load_.emplace_back(less_scan_node_index); - - cluster_->SortNodesByScanLoad(); - ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickMostScanNode(cluster_)); - ASSERT_EQ(less_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); - - cluster_->scan_pending_nodes_index_.clear(); - ASSERT_EQ(kInvalidNodeIndex, scan_load_action_generator_->PickMostScanNodeWithPending(cluster_)); - ASSERT_EQ(less_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); - - cluster_->scan_pending_nodes_index_.clear(); - cluster_->scan_pending_nodes_index_.insert(more_scan_node_index); - ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickMostScanNodeWithPending(cluster_)); - ASSERT_EQ(less_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); - - cluster_->scan_pending_nodes_index_.clear(); - cluster_->scan_pending_nodes_index_.insert(less_scan_node_index); - ASSERT_EQ(less_scan_node_index, scan_load_action_generator_->PickMostScanNodeWithPending(cluster_)); - ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); - cluster_->scan_pending_nodes_index_.clear(); - - cluster_->meta_table_node_index_ = less_scan_node_index; - ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickMostScanNode(cluster_)); - ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); +TEST_F(MetaIsolateActionGeneratorTest, PickRandomTabletOfMetaNodeTest2) { + cluster_->tablets_per_node_[0].emplace_back(0); + cluster_->tablets_per_node_[0].emplace_back(1); + cluster_->tablets_[0]; + cluster_->tablets_[1]; + cluster_->tablet_index_to_table_index_[0] = 0; + cluster_->tablet_index_to_table_index_[1] = 1; + cluster_->tables_[0] = cluster_->lb_options_.meta_table_name; + cluster_->tables_[1] = "user_table"; + + ASSERT_TRUE(cluster_->IsMetaTablet(0)); + ASSERT_FALSE(cluster_->IsMetaTablet(1)); + ASSERT_EQ(meta_isolate_action_generator_->PickRandomTabletOfMetaNode(cluster_, 0), 1); } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/actions_test.cc b/src/load_balancer/test/actions_test.cc index 8c1458a62..cfc17411f 100644 --- a/src/load_balancer/test/actions_test.cc +++ b/src/load_balancer/test/actions_test.cc @@ -11,19 +11,18 @@ namespace tera { namespace load_balancer { -class ActionsTest : public ::testing::Test { -}; +class ActionsTest : public ::testing::Test {}; TEST_F(ActionsTest, MoveActionTest) { - MoveAction move_action(0, 0, 1, ""); - std::shared_ptr undo_action(dynamic_cast(move_action.UndoAction())); + MoveAction move_action(0, 0, 1, ""); + std::shared_ptr undo_action(dynamic_cast(move_action.UndoAction())); - ASSERT_EQ(move_action.tablet_index_, undo_action->tablet_index_); - ASSERT_EQ(move_action.source_node_index_, undo_action->dest_node_index_); - ASSERT_EQ(move_action.dest_node_index_, undo_action->source_node_index_); + ASSERT_EQ(move_action.tablet_index_, undo_action->tablet_index_); + ASSERT_EQ(move_action.source_node_index_, undo_action->dest_node_index_); + ASSERT_EQ(move_action.dest_node_index_, undo_action->source_node_index_); } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/balancer_test_main.cc b/src/load_balancer/test/balancer_test_main.cc index bdb7de51d..522116202 100644 --- a/src/load_balancer/test/balancer_test_main.cc +++ b/src/load_balancer/test/balancer_test_main.cc @@ -12,25 +12,25 @@ #include "master/master_env.h" int main(int argc, char** argv) { - ::google::InitGoogleLogging(argv[0]); - FLAGS_v = 16; - FLAGS_minloglevel=0; - FLAGS_log_dir = "./log"; - if (access(FLAGS_log_dir.c_str(), F_OK)) { - mkdir(FLAGS_log_dir.c_str(), 0777); - } - std::string pragram_name("load balancer"); - tera::utils::SetupLog(pragram_name); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); + ::google::InitGoogleLogging(argv[0]); + FLAGS_v = 16; + FLAGS_minloglevel = 0; + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("load balancer"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); - using tera::master::TabletAvailability; - tera::master::MasterEnv().Init(new tera::master::MasterImpl, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr, - std::shared_ptr(new TabletAvailability(nullptr)), - nullptr); + using tera::master::TabletAvailability; + tera::master::MasterEnv().Init( + new tera::master::MasterImpl(nullptr, nullptr), nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, + std::shared_ptr(new TabletAvailability(nullptr)), nullptr); - return RUN_ALL_TESTS(); + return RUN_ALL_TESTS(); } /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/cluster_test.cc b/src/load_balancer/test/cluster_test.cc index 367946e00..b7aa5fb5a 100644 --- a/src/load_balancer/test/cluster_test.cc +++ b/src/load_balancer/test/cluster_test.cc @@ -15,376 +15,682 @@ namespace tera { namespace load_balancer { class ClusterTest : public ::testing::Test { -public: - virtual void SetUp() { - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); - } - - virtual void TearDown() { - } - -private: - std::shared_ptr cluster_; + public: + virtual void SetUp() { + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); + } + + virtual void TearDown() {} + + private: + std::shared_ptr cluster_; }; TEST_F(ClusterTest, ValidActionTest) { - TabletMeta tablet_meta_meta; - TabletMeta tablet_meta_other; - tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); - tera::master::TabletPtr tablet_ptr_other(new tera::master::Tablet(tablet_meta_other)); - std::shared_ptr lb_tablet_meta = std::make_shared(); - std::shared_ptr lb_tablet_other = std::make_shared(); - lb_tablet_meta->tablet_ptr = tablet_ptr_meta; - lb_tablet_other->tablet_ptr = tablet_ptr_other; - - cluster_->lb_options_.meta_table_name = "meta_table"; - uint32_t table_index_meta = 0; - uint32_t table_index_other= 1; - cluster_->tables_[table_index_meta] = "meta_table"; - cluster_->tables_[table_index_other] = "other_table"; - uint32_t tablet_index_meta = 0; - uint32_t tablet_index_other = 1; - cluster_->tablet_index_to_table_index_[tablet_index_meta] = table_index_meta; - cluster_->tablet_index_to_table_index_[tablet_index_other] = table_index_other; - cluster_->tablets_[tablet_index_meta] = lb_tablet_meta; - cluster_->tablets_[tablet_index_other] = lb_tablet_other; - - uint32_t meta_table_node_index = 0; - uint32_t other_node_index = 1; - cluster_->meta_table_node_index_ = meta_table_node_index; - - // empty action is invalid - std::shared_ptr empty_action(new EmptyAction()); - ASSERT_FALSE(cluster_->ValidAction(empty_action)); - - std::shared_ptr normal_move_action(new MoveAction(tablet_index_meta, 0, 1, "")); - // move not ready tablet is invalid - cluster_->tablets_[tablet_index_meta]->tablet_ptr->SetStatus(TabletMeta::kTabletOffline); - ASSERT_FALSE(cluster_->ValidAction(normal_move_action)); - - // move meta table is invalid - std::shared_ptr move_meta_table_action(new MoveAction(tablet_index_meta, 0, 1, "")); - cluster_->tablets_[tablet_index_meta]->tablet_ptr->SetStatus(TabletMeta::kTabletReady); - ASSERT_FALSE(cluster_->ValidAction(move_meta_table_action)); - // move nomal tablet is valid - std::shared_ptr move_other_table_action(new MoveAction(tablet_index_other, 0, 1, "")); - cluster_->tablets_[tablet_index_other]->tablet_ptr->SetStatus(TabletMeta::kTabletReady); - ASSERT_TRUE(cluster_->ValidAction(move_other_table_action)); - - std::shared_ptr move_to_meta_table_node_action(new MoveAction(tablet_index_other, 0, meta_table_node_index, "")); - std::shared_ptr move_to_other_node_action(new MoveAction(tablet_index_other, 0, other_node_index, "")); - cluster_->lb_options_.meta_table_isolate_enabled = true; - // move tablet to meta node is invalid if meta_table_isolate_enabled is true - ASSERT_FALSE(cluster_->ValidAction(move_to_meta_table_node_action)); - // move tablet to normal node is valid even if meta_table_isolate_enabled is true - ASSERT_TRUE(cluster_->ValidAction(move_to_other_node_action)); - cluster_->lb_options_.meta_table_isolate_enabled = false; - // move tablet to any node is valid if meta_table_isolate_enabled is true - ASSERT_TRUE(cluster_->ValidAction(move_to_meta_table_node_action)); - ASSERT_TRUE(cluster_->ValidAction(move_to_other_node_action)); + std::shared_ptr empty_action(new EmptyAction()); + ASSERT_FALSE(cluster_->ValidAction(empty_action)); } -TEST_F(ClusterTest, RegisterTabletTest) { - TabletMeta tablet_meta_meta; - tablet_meta_meta.set_table_name("meta_table"); - tablet_meta_meta.set_path("path/meta_table"); - tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); - std::shared_ptr lb_tablet_meta = std::make_shared(); - lb_tablet_meta->tablet_ptr = tablet_ptr_meta; +TEST_F(ClusterTest, SortNodesByTabletCount) { + cluster_->tablets_per_node_[0].emplace_back(0); + cluster_->tablets_per_node_[0].emplace_back(1); + cluster_->tablets_per_node_[1].emplace_back(2); + cluster_->tablets_per_node_[2].emplace_back(3); + cluster_->tablets_per_node_[2].emplace_back(4); + cluster_->tablets_per_node_[2].emplace_back(5); + + std::vector sorted_node_index; + cluster_->SortNodesByTabletCount(&sorted_node_index); + + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); +} - uint32_t tablet_index_0 = 0; - uint32_t node_index_0 = 0; - cluster_->RegisterTablet(lb_tablet_meta, tablet_index_0, node_index_0); +TEST_F(ClusterTest, SortNodesBySizeTest) { + cluster_->size_per_node_[0] = 20; + cluster_->size_per_node_[1] = 10; + cluster_->size_per_node_[2] = 30; - ASSERT_EQ(1, cluster_->table_num_); - ASSERT_EQ(1, cluster_->tables_.size()); - ASSERT_STREQ("meta_table", cluster_->tables_[0].c_str()); - ASSERT_EQ(0, cluster_->tables_to_index_["meta_table"]); + std::vector sorted_node_index; + cluster_->SortNodesBySize(&sorted_node_index); - ASSERT_EQ(tablet_index_0, cluster_->tablets_to_index_["path/meta_table"]); + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); +} - ASSERT_EQ(node_index_0, cluster_->tablet_index_to_node_index_[tablet_index_0]); - ASSERT_EQ(node_index_0, cluster_->initial_tablet_index_to_node_index_[tablet_index_0]); - ASSERT_EQ(0, cluster_->tablet_index_to_table_index_[tablet_index_0]); +TEST_F(ClusterTest, SortNodesByFlashSizePercentTest1) { + cluster_->flash_size_per_node_[0] = 20; + cluster_->flash_size_per_node_[1] = 10; + cluster_->flash_size_per_node_[2] = 30; + + tera::master::TabletNodePtr tablet_node_ptr_0(new tera::master::TabletNode()); + tablet_node_ptr_0->persistent_cache_size_ = 1; + std::shared_ptr lb_node_0 = std::make_shared(); + lb_node_0->tablet_node_ptr = tablet_node_ptr_0; + cluster_->nodes_[0] = lb_node_0; + + tera::master::TabletNodePtr tablet_node_ptr_1(new tera::master::TabletNode()); + tablet_node_ptr_1->persistent_cache_size_ = 1; + std::shared_ptr lb_node_1 = std::make_shared(); + lb_node_1->tablet_node_ptr = tablet_node_ptr_1; + cluster_->nodes_[1] = lb_node_1; + + tera::master::TabletNodePtr tablet_node_ptr_2(new tera::master::TabletNode()); + tablet_node_ptr_2->persistent_cache_size_ = 1; + std::shared_ptr lb_node_2 = std::make_shared(); + lb_node_2->tablet_node_ptr = tablet_node_ptr_2; + cluster_->nodes_[2] = lb_node_2; + + std::vector sorted_node_index; + cluster_->SortNodesByFlashSizePercent(&sorted_node_index); + + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); } -TEST_F(ClusterTest, AddTabletTest) { - TabletMeta tablet_meta_meta; - tablet_meta_meta.set_size(10); - tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); - tablet_ptr_meta->average_counter_.set_read_rows(20); - tablet_ptr_meta->average_counter_.set_write_rows(30); - tablet_ptr_meta->average_counter_.set_scan_rows(40); - std::shared_ptr lb_tablet_meta = std::make_shared(); - lb_tablet_meta->tablet_ptr = tablet_ptr_meta; +TEST_F(ClusterTest, SortNodesByFlashSizePercentTest2) { + cluster_->flash_size_per_node_[0] = 20; + cluster_->flash_size_per_node_[1] = 10; + cluster_->flash_size_per_node_[2] = 30; + + tera::master::TabletNodePtr tablet_node_ptr_0(new tera::master::TabletNode()); + tablet_node_ptr_0->persistent_cache_size_ = 1; + std::shared_ptr lb_node_0 = std::make_shared(); + lb_node_0->tablet_node_ptr = tablet_node_ptr_0; + cluster_->nodes_[0] = lb_node_0; + + tera::master::TabletNodePtr tablet_node_ptr_1(new tera::master::TabletNode()); + tablet_node_ptr_1->persistent_cache_size_ = 1; + std::shared_ptr lb_node_1 = std::make_shared(); + lb_node_1->tablet_node_ptr = tablet_node_ptr_1; + cluster_->nodes_[1] = lb_node_1; + + tera::master::TabletNodePtr tablet_node_ptr_2(new tera::master::TabletNode()); + tablet_node_ptr_2->persistent_cache_size_ = 0; + std::shared_ptr lb_node_2 = std::make_shared(); + lb_node_2->tablet_node_ptr = tablet_node_ptr_2; + cluster_->nodes_[2] = lb_node_2; + + std::vector sorted_node_index; + cluster_->SortNodesByFlashSizePercent(&sorted_node_index); + ASSERT_EQ(sorted_node_index.size(), 2); + + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); +} + +TEST_F(ClusterTest, SortNodesByReadLoad) { + cluster_->read_load_per_node_[0] = 20; + cluster_->read_load_per_node_[1] = 10; + cluster_->read_load_per_node_[2] = 30; + + std::vector sorted_node_index; + cluster_->SortNodesByReadLoad(&sorted_node_index); - uint32_t tablet_index = 0; - cluster_->tablets_[tablet_index] = lb_tablet_meta; + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); +} - uint32_t node_index = 0; - cluster_->size_per_node_[node_index] = 0; - cluster_->read_load_per_node_[node_index] = 0; - cluster_->write_load_per_node_[node_index] = 0; - cluster_->scan_load_per_node_[node_index] = 0; +TEST_F(ClusterTest, SortNodesByWriteLoad) { + cluster_->write_load_per_node_[0] = 20; + cluster_->write_load_per_node_[1] = 10; + cluster_->write_load_per_node_[2] = 30; - cluster_->AddTablet(tablet_index, node_index); + std::vector sorted_node_index; + cluster_->SortNodesByWriteLoad(&sorted_node_index); - ASSERT_EQ(1, cluster_->tablets_per_node_.size()); - ASSERT_EQ(10, cluster_->size_per_node_[node_index]); - ASSERT_EQ(20, cluster_->read_load_per_node_[node_index]); - ASSERT_EQ(30, cluster_->write_load_per_node_[node_index]); - ASSERT_EQ(40, cluster_->scan_load_per_node_[node_index]); + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); } -TEST_F(ClusterTest, RemoveTabletTest) { - TabletMeta tablet_meta_meta; - tablet_meta_meta.set_size(10); - tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); - tablet_ptr_meta->average_counter_.set_read_rows(20); - tablet_ptr_meta->average_counter_.set_write_rows(30); - tablet_ptr_meta->average_counter_.set_scan_rows(40); - std::shared_ptr lb_tablet_meta = std::make_shared(); - lb_tablet_meta->tablet_ptr = tablet_ptr_meta; +TEST_F(ClusterTest, SortNodesByScanLoad) { + cluster_->scan_load_per_node_[0] = 20; + cluster_->scan_load_per_node_[1] = 10; + cluster_->scan_load_per_node_[2] = 30; - uint32_t tablet_index = 0; - cluster_->tablets_[tablet_index] = lb_tablet_meta; + std::vector sorted_node_index; + cluster_->SortNodesByScanLoad(&sorted_node_index); - uint32_t node_index = 0; - cluster_->tablets_per_node_[node_index].emplace_back(tablet_index); + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); +} - cluster_->size_per_node_[node_index] = 10; - cluster_->read_load_per_node_[node_index] = 20; - cluster_->write_load_per_node_[node_index] = 30; - cluster_->scan_load_per_node_[node_index] = 40; +TEST_F(ClusterTest, SortNodesByLRead) { + cluster_->lread_per_node_[0] = 20; + cluster_->lread_per_node_[1] = 10; + cluster_->lread_per_node_[2] = 30; - cluster_->RemoveTablet(tablet_index, node_index); + std::vector sorted_node_index; + cluster_->SortNodesByLRead(&sorted_node_index); - ASSERT_EQ(0, cluster_->tablets_per_node_[node_index].size()); - ASSERT_EQ(0, cluster_->size_per_node_[node_index]); - ASSERT_EQ(0, cluster_->read_load_per_node_[node_index]); - ASSERT_EQ(0, cluster_->write_load_per_node_[node_index]); - ASSERT_EQ(0, cluster_->scan_load_per_node_[node_index]); + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); } -TEST_F(ClusterTest, MoveTabletTest) { - TabletMeta tablet_meta_meta; - tablet_meta_meta.set_size(10); - tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); - tablet_ptr_meta->average_counter_.set_read_rows(20); - tablet_ptr_meta->average_counter_.set_write_rows(30); - tablet_ptr_meta->average_counter_.set_scan_rows(40); - std::shared_ptr lb_tablet_meta = std::make_shared(); - lb_tablet_meta->tablet_ptr = tablet_ptr_meta; - - uint32_t tablet_index = 0; - uint32_t first_node_index = 0; - uint32_t second_node_index = 1; - uint32_t third_node_index = 2; - - cluster_->tablets_[tablet_index] = lb_tablet_meta; - cluster_->tablet_moved_num_ = 0; - cluster_->initial_tablet_index_to_node_index_[tablet_index] = first_node_index; - cluster_->tablet_index_to_node_index_[tablet_index] = first_node_index; - cluster_->abnormal_nodes_index_.insert(second_node_index); - cluster_->read_pending_nodes_index_.insert(second_node_index); - cluster_->write_pending_nodes_index_.insert(second_node_index); - cluster_->scan_pending_nodes_index_.insert(second_node_index); - - ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); - int64_t current_time_us = tera::get_micros(); - cluster_->lb_options_.tablet_move_too_frequently_threshold_s = 600; - cluster_->tablets_[tablet_index]->tablet_ptr->last_move_time_us_ = current_time_us; - - cluster_->MoveTablet(tablet_index, first_node_index, second_node_index); - ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(second_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(1, cluster_->tablet_moved_num_); - ASSERT_EQ(1, cluster_->tablets_moved_too_frequently_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_abnormal_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_read_pending_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_write_pending_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_scan_pending_nodes_.size()); - - cluster_->MoveTablet(tablet_index, second_node_index, third_node_index); - ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(third_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(1, cluster_->tablet_moved_num_); - ASSERT_EQ(1, cluster_->tablets_moved_too_frequently_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); - - cluster_->MoveTablet(tablet_index, third_node_index, first_node_index); - ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(first_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); - ASSERT_EQ(0, cluster_->tablet_moved_num_); - ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); - ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); - - cluster_->tablets_[tablet_index]->tablet_ptr->last_move_time_us_ = current_time_us - 2 * cluster_->lb_options_.tablet_move_too_frequently_threshold_s * 1000000; - cluster_->MoveTablet(tablet_index, first_node_index, second_node_index); - ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_abnormal_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_read_pending_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_write_pending_nodes_.size()); - ASSERT_EQ(1, cluster_->tablets_moved_to_scan_pending_nodes_.size()); -} - -TEST_F(ClusterTest, AbnormalNodeConstructTest) { - TabletMeta tablet_meta_0; - tablet_meta_0.set_path("path/meta_0"); - tera::master::TabletPtr tablet_ptr_0(new tera::master::Tablet(tablet_meta_0)); - std::shared_ptr lb_tablet_0 = std::make_shared(); - lb_tablet_0->tablet_ptr = tablet_ptr_0; - - TabletMeta tablet_meta_1; - tablet_meta_1.set_path("path/meta_1"); - tera::master::TabletPtr tablet_ptr_1(new tera::master::Tablet(tablet_meta_1)); - std::shared_ptr lb_tablet_1 = std::make_shared(); - lb_tablet_1->tablet_ptr = tablet_ptr_1; - - TabletMeta tablet_meta_2; - tablet_meta_2.set_path("path/meta_2"); - tera::master::TabletPtr tablet_ptr_2(new tera::master::Tablet(tablet_meta_2)); - std::shared_ptr lb_tablet_2 = std::make_shared(); - lb_tablet_2->tablet_ptr = tablet_ptr_2; - - tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); - tablet_node_ptr->addr_ = "127.0.0.1:2200"; - std::shared_ptr lb_node = std::make_shared(); - lb_node->tablet_node_ptr = tablet_node_ptr; - lb_node->tablets.emplace_back(lb_tablet_0); - lb_node->tablets.emplace_back(lb_tablet_1); - lb_node->tablets.emplace_back(lb_tablet_2); - - std::vector> lb_nodes; - lb_nodes.emplace_back(lb_node); +TEST_F(ClusterTest, SortNodesByComplexLoadTest) { + cluster_->size_per_node_[0] = 20; + cluster_->size_per_node_[1] = 10; + cluster_->size_per_node_[2] = 30; - LBOptions options; - options.abnormal_node_ratio = 0.5; + std::vector sorted_node_index; + cluster_->SortNodesByComplexLoad(&sorted_node_index); + + ASSERT_EQ(1, sorted_node_index[0]); + ASSERT_EQ(0, sorted_node_index[1]); + ASSERT_EQ(2, sorted_node_index[2]); +} + +TEST_F(ClusterTest, SortTabletsOfNodeByReadTest) { + TabletMeta tablet_meta_low; + TabletMeta tablet_meta_middle; + TabletMeta tablet_meta_high; + + tera::master::TabletPtr tablet_ptr_low(new tera::master::Tablet(tablet_meta_low)); + tera::master::TabletPtr tablet_ptr_middle(new tera::master::Tablet(tablet_meta_middle)); + tera::master::TabletPtr tablet_ptr_high(new tera::master::Tablet(tablet_meta_high)); + + tablet_ptr_low->average_counter_.set_read_rows(10); + tablet_ptr_middle->average_counter_.set_read_rows(20); + tablet_ptr_high->average_counter_.set_read_rows(30); + + std::shared_ptr lb_tablet_low = std::make_shared(); + std::shared_ptr lb_tablet_middle = std::make_shared(); + std::shared_ptr lb_tablet_high = std::make_shared(); - tablet_ptr_0->SetStatus(TabletMeta::kTabletReady); - tablet_ptr_1->SetStatus(TabletMeta::kTabletReady); - tablet_ptr_2->SetStatus(TabletMeta::kTabletReady); - cluster_.reset(new Cluster(lb_nodes, options)); - ASSERT_EQ(0, cluster_->initial_tablets_not_ready_per_node_[0].size()); - ASSERT_EQ(0, cluster_->abnormal_nodes_index_.size()); + lb_tablet_low->tablet_ptr = tablet_ptr_low; + lb_tablet_middle->tablet_ptr = tablet_ptr_middle; + lb_tablet_high->tablet_ptr = tablet_ptr_high; - tablet_ptr_0->SetStatus(TabletMeta::kTabletOffline); - cluster_.reset(new Cluster(lb_nodes, options)); - ASSERT_EQ(1, cluster_->initial_tablets_not_ready_per_node_[0].size()); - ASSERT_EQ(0, cluster_->abnormal_nodes_index_.size()); + uint32_t tablet_index_low = 0; + uint32_t tablet_index_middle = 1; + uint32_t tablet_index_high = 2; - tablet_ptr_1->SetStatus(TabletMeta::kTabletOffline); - cluster_.reset(new Cluster(lb_nodes, options)); - ASSERT_EQ(2, cluster_->initial_tablets_not_ready_per_node_[0].size()); - ASSERT_EQ(1, cluster_->abnormal_nodes_index_.size()); + cluster_->tablets_[tablet_index_low] = lb_tablet_low; + cluster_->tablets_[tablet_index_middle] = lb_tablet_middle; + cluster_->tablets_[tablet_index_high] = lb_tablet_high; + + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_low); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_high); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_middle); + + std::vector sorted_tablet_index; + cluster_->SortTabletsOfNodeByReadLoad(node_index, &sorted_tablet_index); + + ASSERT_EQ(tablet_index_low, sorted_tablet_index[0]); + ASSERT_EQ(tablet_index_middle, sorted_tablet_index[1]); + ASSERT_EQ(tablet_index_high, sorted_tablet_index[2]); } -TEST_F(ClusterTest, SortNodesByTabletCount) { - cluster_->tablets_per_node_[0].emplace_back(0); - cluster_->tablets_per_node_[0].emplace_back(1); - cluster_->tablets_per_node_[1].emplace_back(2); - cluster_->tablets_per_node_[2].emplace_back(3); - cluster_->tablets_per_node_[2].emplace_back(4); - cluster_->tablets_per_node_[2].emplace_back(5); - - cluster_->node_index_sorted_by_tablet_count_.emplace_back(0); - cluster_->node_index_sorted_by_tablet_count_.emplace_back(1); - cluster_->node_index_sorted_by_tablet_count_.emplace_back(2); - ASSERT_EQ(0, cluster_->node_index_sorted_by_tablet_count_[0]); - ASSERT_EQ(1, cluster_->node_index_sorted_by_tablet_count_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_tablet_count_[2]); - - cluster_->SortNodesByTabletCount(); - ASSERT_EQ(1, cluster_->node_index_sorted_by_tablet_count_[0]); - ASSERT_EQ(0, cluster_->node_index_sorted_by_tablet_count_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_tablet_count_[2]); +TEST_F(ClusterTest, SortTabletsOfNodeByWriteLoadTest) { + TabletMeta tablet_meta_low; + TabletMeta tablet_meta_middle; + TabletMeta tablet_meta_high; + + tera::master::TabletPtr tablet_ptr_low(new tera::master::Tablet(tablet_meta_low)); + tera::master::TabletPtr tablet_ptr_middle(new tera::master::Tablet(tablet_meta_middle)); + tera::master::TabletPtr tablet_ptr_high(new tera::master::Tablet(tablet_meta_high)); + + tablet_ptr_low->average_counter_.set_write_rows(10); + tablet_ptr_middle->average_counter_.set_write_rows(20); + tablet_ptr_high->average_counter_.set_write_rows(30); + + std::shared_ptr lb_tablet_low = std::make_shared(); + std::shared_ptr lb_tablet_middle = std::make_shared(); + std::shared_ptr lb_tablet_high = std::make_shared(); + + lb_tablet_low->tablet_ptr = tablet_ptr_low; + lb_tablet_middle->tablet_ptr = tablet_ptr_middle; + lb_tablet_high->tablet_ptr = tablet_ptr_high; + + uint32_t tablet_index_low = 0; + uint32_t tablet_index_middle = 1; + uint32_t tablet_index_high = 2; + + cluster_->tablets_[tablet_index_low] = lb_tablet_low; + cluster_->tablets_[tablet_index_middle] = lb_tablet_middle; + cluster_->tablets_[tablet_index_high] = lb_tablet_high; + + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_low); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_high); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_middle); + + std::vector sorted_tablet_index; + cluster_->SortTabletsOfNodeByWriteLoad(node_index, &sorted_tablet_index); + + ASSERT_EQ(tablet_index_low, sorted_tablet_index[0]); + ASSERT_EQ(tablet_index_middle, sorted_tablet_index[1]); + ASSERT_EQ(tablet_index_high, sorted_tablet_index[2]); } -TEST_F(ClusterTest, SortNodesBySizeTest) { - cluster_->size_per_node_[0] = 20; - cluster_->size_per_node_[1] = 10; - cluster_->size_per_node_[2] = 30; +TEST_F(ClusterTest, SortTabletsOfNodeByScanLoad) { + TabletMeta tablet_meta_low; + TabletMeta tablet_meta_middle; + TabletMeta tablet_meta_high; + + tera::master::TabletPtr tablet_ptr_low(new tera::master::Tablet(tablet_meta_low)); + tera::master::TabletPtr tablet_ptr_middle(new tera::master::Tablet(tablet_meta_middle)); + tera::master::TabletPtr tablet_ptr_high(new tera::master::Tablet(tablet_meta_high)); + + tablet_ptr_low->average_counter_.set_scan_rows(10); + tablet_ptr_middle->average_counter_.set_scan_rows(20); + tablet_ptr_high->average_counter_.set_scan_rows(30); + + std::shared_ptr lb_tablet_low = std::make_shared(); + std::shared_ptr lb_tablet_middle = std::make_shared(); + std::shared_ptr lb_tablet_high = std::make_shared(); + + lb_tablet_low->tablet_ptr = tablet_ptr_low; + lb_tablet_middle->tablet_ptr = tablet_ptr_middle; + lb_tablet_high->tablet_ptr = tablet_ptr_high; + + uint32_t tablet_index_low = 0; + uint32_t tablet_index_middle = 1; + uint32_t tablet_index_high = 2; + + cluster_->tablets_[tablet_index_low] = lb_tablet_low; + cluster_->tablets_[tablet_index_middle] = lb_tablet_middle; + cluster_->tablets_[tablet_index_high] = lb_tablet_high; - cluster_->node_index_sorted_by_size_.emplace_back(0); - cluster_->node_index_sorted_by_size_.emplace_back(1); - cluster_->node_index_sorted_by_size_.emplace_back(2); - ASSERT_EQ(0, cluster_->node_index_sorted_by_size_[0]); - ASSERT_EQ(1, cluster_->node_index_sorted_by_size_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_size_[2]); + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_low); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_high); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_middle); - cluster_->SortNodesBySize(); - ASSERT_EQ(1, cluster_->node_index_sorted_by_size_[0]); - ASSERT_EQ(0, cluster_->node_index_sorted_by_size_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_size_[2]); + std::vector sorted_tablet_index; + cluster_->SortTabletsOfNodeByScanLoad(node_index, &sorted_tablet_index); + + ASSERT_EQ(tablet_index_low, sorted_tablet_index[0]); + ASSERT_EQ(tablet_index_middle, sorted_tablet_index[1]); + ASSERT_EQ(tablet_index_high, sorted_tablet_index[2]); } -TEST_F(ClusterTest, SortNodesByReadLoad) { - cluster_->read_load_per_node_[0] = 20; - cluster_->read_load_per_node_[1] = 10; - cluster_->read_load_per_node_[2] = 30; +TEST_F(ClusterTest, SortTabletsOfNodeByLRead) { + TabletMeta tablet_meta_low; + TabletMeta tablet_meta_middle; + TabletMeta tablet_meta_high; + + tera::master::TabletPtr tablet_ptr_low(new tera::master::Tablet(tablet_meta_low)); + tera::master::TabletPtr tablet_ptr_middle(new tera::master::Tablet(tablet_meta_middle)); + tera::master::TabletPtr tablet_ptr_high(new tera::master::Tablet(tablet_meta_high)); + + tablet_ptr_low->average_counter_.set_low_read_cell(10); + tablet_ptr_middle->average_counter_.set_low_read_cell(20); + tablet_ptr_high->average_counter_.set_low_read_cell(30); + + std::shared_ptr lb_tablet_low = std::make_shared(); + std::shared_ptr lb_tablet_middle = std::make_shared(); + std::shared_ptr lb_tablet_high = std::make_shared(); + + lb_tablet_low->tablet_ptr = tablet_ptr_low; + lb_tablet_middle->tablet_ptr = tablet_ptr_middle; + lb_tablet_high->tablet_ptr = tablet_ptr_high; - cluster_->node_index_sorted_by_read_load_.emplace_back(0); - cluster_->node_index_sorted_by_read_load_.emplace_back(1); - cluster_->node_index_sorted_by_read_load_.emplace_back(2); - ASSERT_EQ(0, cluster_->node_index_sorted_by_read_load_[0]); - ASSERT_EQ(1, cluster_->node_index_sorted_by_read_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_read_load_[2]); + uint32_t tablet_index_low = 0; + uint32_t tablet_index_middle = 1; + uint32_t tablet_index_high = 2; - cluster_->SortNodesByReadLoad(); - ASSERT_EQ(1, cluster_->node_index_sorted_by_read_load_[0]); - ASSERT_EQ(0, cluster_->node_index_sorted_by_read_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_read_load_[2]); + cluster_->tablets_[tablet_index_low] = lb_tablet_low; + cluster_->tablets_[tablet_index_middle] = lb_tablet_middle; + cluster_->tablets_[tablet_index_high] = lb_tablet_high; + + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_low); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_high); + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index_middle); + + std::vector sorted_tablet_index; + cluster_->SortTabletsOfNodeByLRead(node_index, &sorted_tablet_index); + + ASSERT_EQ(tablet_index_low, sorted_tablet_index[0]); + ASSERT_EQ(tablet_index_middle, sorted_tablet_index[1]); + ASSERT_EQ(tablet_index_high, sorted_tablet_index[2]); } -TEST_F(ClusterTest, SortNodesByWriteLoad) { - cluster_->write_load_per_node_[0] = 20; - cluster_->write_load_per_node_[1] = 10; - cluster_->write_load_per_node_[2] = 30; +TEST_F(ClusterTest, IsMetaNodeTest) { + cluster_->meta_table_node_index_ = 0; + cluster_->nodes_[0]; + cluster_->nodes_[1]; + ASSERT_TRUE(cluster_->IsMetaNode(0)); + ASSERT_FALSE(cluster_->IsMetaNode(1)); +} - cluster_->node_index_sorted_by_write_load_.emplace_back(0); - cluster_->node_index_sorted_by_write_load_.emplace_back(1); - cluster_->node_index_sorted_by_write_load_.emplace_back(2); - ASSERT_EQ(0, cluster_->node_index_sorted_by_write_load_[0]); - ASSERT_EQ(1, cluster_->node_index_sorted_by_write_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_write_load_[2]); +TEST_F(ClusterTest, IsReadPendingNodeTest) { + cluster_->read_pending_per_node_[0] = 0; + ASSERT_FALSE(cluster_->IsReadPendingNode(0)); + cluster_->read_pending_per_node_[0] = 10; + ASSERT_TRUE(cluster_->IsReadPendingNode(0)); +} - cluster_->SortNodesByWriteLoad(); - ASSERT_EQ(1, cluster_->node_index_sorted_by_write_load_[0]); - ASSERT_EQ(0, cluster_->node_index_sorted_by_write_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_write_load_[2]); +TEST_F(ClusterTest, IsWritePendingNodeTest) { + cluster_->write_pending_per_node_[0] = 0; + ASSERT_FALSE(cluster_->IsWritePendingNode(0)); + cluster_->write_pending_per_node_[0] = 10; + ASSERT_TRUE(cluster_->IsWritePendingNode(0)); } -TEST_F(ClusterTest, SortNodesByScanLoad) { - cluster_->scan_load_per_node_[0] = 20; - cluster_->scan_load_per_node_[1] = 10; - cluster_->scan_load_per_node_[2] = 30; +TEST_F(ClusterTest, IsScanPendingNodeTest) { + cluster_->scan_pending_per_node_[0] = 0; + ASSERT_FALSE(cluster_->IsScanPendingNode(0)); + cluster_->scan_pending_per_node_[0] = 10; + ASSERT_TRUE(cluster_->IsScanPendingNode(0)); +} + +TEST_F(ClusterTest, IsHeavyReadPendingNodeTest) { + cluster_->read_pending_per_node_[0] = 10; + ASSERT_FALSE(cluster_->IsHeavyReadPendingNode(0)); + cluster_->read_pending_per_node_[0] = 1000; + ASSERT_TRUE(cluster_->IsHeavyReadPendingNode(0)); +} + +TEST_F(ClusterTest, IsHeavyWritePendingNodeTest) { + cluster_->write_pending_per_node_[0] = 10; + ASSERT_FALSE(cluster_->IsHeavyWritePendingNode(0)); + cluster_->write_pending_per_node_[0] = 1000; + ASSERT_TRUE(cluster_->IsHeavyWritePendingNode(0)); +} + +TEST_F(ClusterTest, IsHeavyScanPendingNodeTest) { + cluster_->scan_pending_per_node_[0] = 10; + ASSERT_FALSE(cluster_->IsHeavyScanPendingNode(0)); + cluster_->scan_pending_per_node_[0] = 1000; + ASSERT_TRUE(cluster_->IsHeavyScanPendingNode(0)); +} + +TEST_F(ClusterTest, HeavyPendingNodeNumTest) { + cluster_->nodes_[0]; + cluster_->nodes_[1]; + cluster_->nodes_[2]; + cluster_->read_pending_per_node_[0] = 10; + cluster_->write_pending_per_node_[0] = 0; + cluster_->scan_pending_per_node_[0] = 0; + + cluster_->read_pending_per_node_[1] = 0; + cluster_->write_pending_per_node_[1] = 1000; + cluster_->scan_pending_per_node_[1] = 0; + + cluster_->read_pending_per_node_[2] = 0; + cluster_->write_pending_per_node_[2] = 0; + cluster_->scan_pending_per_node_[2] = 2000; - cluster_->node_index_sorted_by_scan_load_.emplace_back(0); - cluster_->node_index_sorted_by_scan_load_.emplace_back(1); - cluster_->node_index_sorted_by_scan_load_.emplace_back(2); - ASSERT_EQ(0, cluster_->node_index_sorted_by_scan_load_[0]); - ASSERT_EQ(1, cluster_->node_index_sorted_by_scan_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_scan_load_[2]); + ASSERT_EQ(2, cluster_->HeavyPendingNodeNum()); +} + +TEST_F(ClusterTest, IsHeavyLReadNodeTest) { + uint32_t node_index = 0; + cluster_->lread_per_node_[node_index] = 1000; + ASSERT_FALSE(cluster_->IsHeavyLReadNode(node_index)); + cluster_->lread_per_node_[node_index] = 1000000; + ASSERT_TRUE(cluster_->IsHeavyLReadNode(node_index)); +} + +TEST_F(ClusterTest, IsAbnormalNodeTest) { + cluster_->abnormal_nodes_index_.insert(0); + ASSERT_TRUE(cluster_->IsAbnormalNode(0)); + ASSERT_FALSE(cluster_->IsAbnormalNode(1)); +} + +TEST_F(ClusterTest, IsFlashSizeEnoughTest1) { + cluster_->lb_options_.flash_size_cost_weight = 100; + TableSchema schema; + LocalityGroupSchema *lg_schema = schema.add_locality_groups(); + lg_schema->set_store_type(MemoryStore); + tera::master::TablePtr table_ptr(new tera::master::Table("", schema, kTableEnable)); + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta, table_ptr)); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); + tablet_node_ptr->persistent_cache_size_ = 1024; + std::shared_ptr lb_node = std::make_shared(); + lb_node->tablet_node_ptr = tablet_node_ptr; + cluster_->nodes_[0] = lb_node; + + ASSERT_TRUE(cluster_->IsFlashSizeEnough(0, 0)); +} + +TEST_F(ClusterTest, IsFlashSizeEnoughTest2) { + cluster_->lb_options_.flash_size_cost_weight = 100; + TableSchema schema; + LocalityGroupSchema *lg_schema = schema.add_locality_groups(); + lg_schema->set_store_type(FlashStore); + tera::master::TablePtr table_ptr(new tera::master::Table("", schema, kTableEnable)); + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta, table_ptr)); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); + tablet_node_ptr->persistent_cache_size_ = 1024; + std::shared_ptr lb_node = std::make_shared(); + lb_node->tablet_node_ptr = tablet_node_ptr; + cluster_->nodes_[0] = lb_node; + + ASSERT_TRUE(cluster_->IsFlashSizeEnough(0, 0)); +} - cluster_->SortNodesByScanLoad(); - ASSERT_EQ(1, cluster_->node_index_sorted_by_scan_load_[0]); - ASSERT_EQ(0, cluster_->node_index_sorted_by_scan_load_[1]); - ASSERT_EQ(2, cluster_->node_index_sorted_by_scan_load_[2]); +TEST_F(ClusterTest, IsFlashSizeEnoughTest3) { + cluster_->lb_options_.flash_size_cost_weight = 100; + TableSchema schema; + LocalityGroupSchema *lg_schema = schema.add_locality_groups(); + lg_schema->set_store_type(FlashStore); + tera::master::TablePtr table_ptr(new tera::master::Table("", schema, kTableEnable)); + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta, table_ptr)); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); + tablet_node_ptr->persistent_cache_size_ = 0; + std::shared_ptr lb_node = std::make_shared(); + lb_node->tablet_node_ptr = tablet_node_ptr; + cluster_->nodes_[0] = lb_node; + + ASSERT_FALSE(cluster_->IsFlashSizeEnough(0, 0)); +} + +TEST_F(ClusterTest, IsFlashSizeEnoughTest4) { + cluster_->lb_options_.flash_size_cost_weight = 100; + TableSchema schema; + LocalityGroupSchema *lg_schema = schema.add_locality_groups(); + lg_schema->set_store_type(DiskStore); + tera::master::TablePtr table_ptr(new tera::master::Table("", schema, kTableEnable)); + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta, table_ptr)); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); + tablet_node_ptr->persistent_cache_size_ = 0; + std::shared_ptr lb_node = std::make_shared(); + lb_node->tablet_node_ptr = tablet_node_ptr; + cluster_->nodes_[0] = lb_node; + + ASSERT_TRUE(cluster_->IsFlashSizeEnough(0, 0)); +} + +TEST_F(ClusterTest, IsMetaTabletTest1) { + cluster_->tablets_[0]; + ASSERT_FALSE(cluster_->IsMetaTablet(0)); +} + +TEST_F(ClusterTest, IsMetaTabletTest2) { + cluster_->lb_options_.meta_table_name = "meta_table"; + cluster_->tables_[0] = cluster_->lb_options_.meta_table_name; + cluster_->tables_[1] = "user_table"; + cluster_->tablet_index_to_table_index_[0] = 0; + cluster_->tablet_index_to_table_index_[1] = 1; + cluster_->tablets_[0]; + cluster_->tablets_[1]; + ASSERT_TRUE(cluster_->IsMetaTablet(0)); + ASSERT_FALSE(cluster_->IsMetaTablet(1)); +} + +TEST_F(ClusterTest, IsTabletMoveTooFrequentTest1) { + cluster_->lb_options_.tablet_move_too_frequently_threshold_s = 300; + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta)); + tablet_ptr->SetLastMoveTime(0); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + ASSERT_FALSE(cluster_->IsTabletMoveTooFrequent(0)); +} + +TEST_F(ClusterTest, IsTabletMoveTooFrequentTest2) { + cluster_->lb_options_.tablet_move_too_frequently_threshold_s = 300; + + TabletMeta tablet_meta; + tera::master::TabletPtr tablet_ptr(new tera::master::Tablet(tablet_meta)); + tablet_ptr->SetLastMoveTime(get_micros()); + std::shared_ptr lb_tablet = std::make_shared(); + lb_tablet->tablet_ptr = tablet_ptr; + cluster_->tablets_[0] = lb_tablet; + + ASSERT_TRUE(cluster_->IsTabletMoveTooFrequent(0)); +} +TEST_F(ClusterTest, RegisterTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_table_name("meta_table"); + tablet_meta_meta.set_path("path/meta_table"); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index_0 = 0; + uint32_t node_index_0 = 0; + cluster_->RegisterTablet(lb_tablet_meta, tablet_index_0, node_index_0); + + ASSERT_EQ(1, cluster_->table_num_); + ASSERT_EQ(1, cluster_->tables_.size()); + ASSERT_STREQ("meta_table", cluster_->tables_[0].c_str()); + ASSERT_EQ(0, cluster_->tables_to_index_["meta_table"]); + + ASSERT_EQ(tablet_index_0, cluster_->tablets_to_index_["path/meta_table"]); + + ASSERT_EQ(node_index_0, cluster_->tablet_index_to_node_index_[tablet_index_0]); + ASSERT_EQ(node_index_0, cluster_->initial_tablet_index_to_node_index_[tablet_index_0]); + ASSERT_EQ(0, cluster_->tablet_index_to_table_index_[tablet_index_0]); +} + +TEST_F(ClusterTest, AddTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_size(10); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tablet_ptr_meta->SetDataSizeOnFlash(1024); + tablet_ptr_meta->average_counter_.set_read_rows(20); + tablet_ptr_meta->average_counter_.set_write_rows(30); + tablet_ptr_meta->average_counter_.set_scan_rows(40); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + cluster_->tablets_[tablet_index] = lb_tablet_meta; + + uint32_t node_index = 0; + cluster_->size_per_node_[node_index] = 0; + cluster_->flash_size_per_node_[node_index] = 0; + cluster_->read_load_per_node_[node_index] = 0; + cluster_->write_load_per_node_[node_index] = 0; + cluster_->scan_load_per_node_[node_index] = 0; + + cluster_->AddTablet(tablet_index, node_index); + + ASSERT_EQ(1, cluster_->tablets_per_node_.size()); + ASSERT_EQ(10, cluster_->size_per_node_[node_index]); + ASSERT_EQ(1024, cluster_->flash_size_per_node_[node_index]); + ASSERT_EQ(20, cluster_->read_load_per_node_[node_index]); + ASSERT_EQ(30, cluster_->write_load_per_node_[node_index]); + ASSERT_EQ(40, cluster_->scan_load_per_node_[node_index]); +} + +TEST_F(ClusterTest, RemoveTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_size(10); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tablet_ptr_meta->SetDataSizeOnFlash(1024); + tablet_ptr_meta->average_counter_.set_read_rows(20); + tablet_ptr_meta->average_counter_.set_write_rows(30); + tablet_ptr_meta->average_counter_.set_scan_rows(40); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + cluster_->tablets_[tablet_index] = lb_tablet_meta; + + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index); + + cluster_->size_per_node_[node_index] = 10; + cluster_->flash_size_per_node_[node_index] = 1024; + cluster_->read_load_per_node_[node_index] = 20; + cluster_->write_load_per_node_[node_index] = 30; + cluster_->scan_load_per_node_[node_index] = 40; + + cluster_->RemoveTablet(tablet_index, node_index); + + ASSERT_EQ(0, cluster_->tablets_per_node_[node_index].size()); + ASSERT_EQ(0, cluster_->size_per_node_[node_index]); + ASSERT_EQ(0, cluster_->flash_size_per_node_[node_index]); + ASSERT_EQ(0, cluster_->read_load_per_node_[node_index]); + ASSERT_EQ(0, cluster_->write_load_per_node_[node_index]); + ASSERT_EQ(0, cluster_->scan_load_per_node_[node_index]); +} + +TEST_F(ClusterTest, MoveTabletTest) { + TabletMeta tablet_meta_meta; + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + uint32_t first_node_index = 0; + uint32_t second_node_index = 1; + uint32_t third_node_index = 2; + + cluster_->tablets_[tablet_index] = lb_tablet_meta; + cluster_->tablet_moved_num_ = 0; + cluster_->initial_tablet_index_to_node_index_[tablet_index] = first_node_index; + cluster_->tablet_index_to_node_index_[tablet_index] = first_node_index; + + cluster_->MoveTablet(tablet_index, first_node_index, second_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(second_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(1, cluster_->tablet_moved_num_); + + cluster_->MoveTablet(tablet_index, second_node_index, third_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(third_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(1, cluster_->tablet_moved_num_); + + cluster_->MoveTablet(tablet_index, third_node_index, first_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(first_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(0, cluster_->tablet_moved_num_); } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/cost_functions_test.cc b/src/load_balancer/test/cost_functions_test.cc index bf67655ca..ad6bbbe90 100644 --- a/src/load_balancer/test/cost_functions_test.cc +++ b/src/load_balancer/test/cost_functions_test.cc @@ -13,163 +13,155 @@ namespace tera { namespace load_balancer { class CostFunctionTest : public ::testing::Test { -public: - virtual void SetUp() { - move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); - } + public: + virtual void SetUp() { move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); } - virtual void TearDown() { - } + virtual void TearDown() {} -private: - LBOptions lb_options_; - std::shared_ptr move_cost_function_; + private: + LBOptions lb_options_; + std::shared_ptr move_cost_function_; }; class MoveCountCostFunctionTest : public ::testing::Test { -public: - virtual void SetUp() { - move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); + public: + virtual void SetUp() { + move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); - move_cost_function_->Init(cluster_); - } + move_cost_function_->Init(cluster_); + } - virtual void TearDown() { - } + virtual void TearDown() {} -private: - LBOptions lb_options_; - std::shared_ptr move_cost_function_; - std::shared_ptr cluster_; + private: + LBOptions lb_options_; + std::shared_ptr move_cost_function_; + std::shared_ptr cluster_; }; class TabletCountCostFunctionTest : public ::testing::Test { -public: - virtual void SetUp() { - tablet_count_cost_function_.reset(new TabletCountCostFunction(lb_options_)); + public: + virtual void SetUp() { + tablet_count_cost_function_.reset(new TabletCountCostFunction(lb_options_)); - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); - tablet_count_cost_function_->Init(cluster_); - } + tablet_count_cost_function_->Init(cluster_); + } - virtual void TearDown() { - } + virtual void TearDown() {} -private: - LBOptions lb_options_; - std::shared_ptr tablet_count_cost_function_; - std::shared_ptr cluster_; + private: + LBOptions lb_options_; + std::shared_ptr tablet_count_cost_function_; + std::shared_ptr cluster_; }; class SizeCostFunctionTest : public ::testing::Test { -public: - virtual void SetUp() { - size_cost_function_.reset(new SizeCostFunction(lb_options_)); + public: + virtual void SetUp() { + size_cost_function_.reset(new SizeCostFunction(lb_options_)); - std::vector> empty_lb_nodes; - LBOptions options; - cluster_.reset(new Cluster(empty_lb_nodes, options)); + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options, false)); - size_cost_function_->Init(cluster_); - } + size_cost_function_->Init(cluster_); + } - virtual void TearDown() { - } + virtual void TearDown() {} -private: - LBOptions lb_options_; - std::shared_ptr size_cost_function_; - std::shared_ptr cluster_; + private: + LBOptions lb_options_; + std::shared_ptr size_cost_function_; + std::shared_ptr cluster_; }; TEST_F(CostFunctionTest, WeightTest) { - double w = 3.14; - move_cost_function_->SetWeight(w); - ASSERT_DOUBLE_EQ(w, move_cost_function_->GetWeight()); + double w = 3.14; + move_cost_function_->SetWeight(w); + ASSERT_DOUBLE_EQ(w, move_cost_function_->GetWeight()); } TEST_F(CostFunctionTest, SumTest) { - std::vector stats = {1, 2, 3}; - ASSERT_DOUBLE_EQ(6, move_cost_function_->GetSum(stats)); + std::vector stats = {1, 2, 3}; + ASSERT_DOUBLE_EQ(6, move_cost_function_->GetSum(stats)); } TEST_F(CostFunctionTest, ScaleTest) { - // value <= min - ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, -1)); - ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); - - // max <= min - ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 0, 5)); - ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, -1, 5)); - - // normal case - ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); - ASSERT_DOUBLE_EQ(0.5, move_cost_function_->Scale(0, 10, 5)); - ASSERT_DOUBLE_EQ(1, move_cost_function_->Scale(0, 10, 10)); - - // random case - size_t times = 100; - int min = 0; - int max = 10; - for (size_t i = 0; i < times; ++i) { - int value = Random::Rand(min, max + 1); - ASSERT_TRUE(move_cost_function_->Scale(min, max, value) >= 0); - ASSERT_TRUE(move_cost_function_->Scale(min, max, value) <= 1); - } + // value <= min + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, -1)); + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); + + // max <= min + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 0, 5)); + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, -1, 5)); + + // normal case + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); + ASSERT_DOUBLE_EQ(0.5, move_cost_function_->Scale(0, 10, 5)); + ASSERT_DOUBLE_EQ(1, move_cost_function_->Scale(0, 10, 10)); + + // random case + size_t times = 100; + int min = 0; + int max = 10; + for (size_t i = 0; i < times; ++i) { + int value = Random::Rand(min, max + 1); + ASSERT_TRUE(move_cost_function_->Scale(min, max, value) >= 0); + ASSERT_TRUE(move_cost_function_->Scale(min, max, value) <= 1); + } } TEST_F(CostFunctionTest, ScaleFromArrayTest) { - std::vector stats_0 = {0, 0}; - ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); - - std::vector stats_1 = {10, 10}; - ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); - - int begin = 0; - int end = 100; - size_t times = 100; - std::vector stats_2; - for (size_t i = 0; i < times; ++i) { - stats_2.clear(); - stats_2.emplace_back(Random::Rand(begin, end)); - stats_2.emplace_back(Random::Rand(begin, end)); - - ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) >= 0); - ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) <= 1); - } + std::vector stats_0 = {0, 0}; + ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); + + std::vector stats_1 = {10, 10}; + ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); + + int begin = 0; + int end = 100; + size_t times = 100; + std::vector stats_2; + for (size_t i = 0; i < times; ++i) { + stats_2.clear(); + stats_2.emplace_back(Random::Rand(begin, end)); + stats_2.emplace_back(Random::Rand(begin, end)); + + ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) >= 0); + ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) <= 1); + } } TEST_F(MoveCountCostFunctionTest, CostTest) { - move_cost_function_->tablet_max_move_num_ = 10; - cluster_->tablet_num_ = 10; + move_cost_function_->tablet_max_move_num_ = 10; + cluster_->tablet_num_ = 10; - cluster_->tablet_moved_num_ = 1; - ASSERT_DOUBLE_EQ(0.1, move_cost_function_->Cost()); + cluster_->tablet_moved_num_ = 1; + ASSERT_DOUBLE_EQ(0.1, move_cost_function_->Cost()); - cluster_->tablet_moved_num_ = 6; - ASSERT_DOUBLE_EQ(0.6, move_cost_function_->Cost()); + cluster_->tablet_moved_num_ = 6; + ASSERT_DOUBLE_EQ(0.6, move_cost_function_->Cost()); - cluster_->tablet_moved_num_ = 10; - ASSERT_DOUBLE_EQ(1, move_cost_function_->Cost()); + cluster_->tablet_moved_num_ = 10; + ASSERT_DOUBLE_EQ(1, move_cost_function_->Cost()); - cluster_->tablet_moved_num_ = 11; - ASSERT_DOUBLE_EQ(move_cost_function_->kExpensiveCost, move_cost_function_->Cost()); + cluster_->tablet_moved_num_ = 11; + ASSERT_DOUBLE_EQ(move_cost_function_->kExpensiveCost, move_cost_function_->Cost()); } -TEST_F(TabletCountCostFunctionTest, CostTest) { -} +TEST_F(TabletCountCostFunctionTest, CostTest) {} -TEST_F(SizeCostFunctionTest, CostTest) { -} +TEST_F(SizeCostFunctionTest, CostTest) {} -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/random_test.cc b/src/load_balancer/test/random_test.cc index 385b76877..d84aad9fb 100644 --- a/src/load_balancer/test/random_test.cc +++ b/src/load_balancer/test/random_test.cc @@ -11,34 +11,33 @@ namespace tera { namespace load_balancer { -class RandomTest : public ::testing::Test { -}; +class RandomTest : public ::testing::Test {}; TEST_F(RandomTest, CommonTest) { - int start = 0; - int end = 3; - size_t times = 100; - - for (size_t i = 0; i < times; ++i) { - int rand = Random::Rand(start, end); - ASSERT_TRUE(rand >= start); - ASSERT_TRUE(rand < end); - } + int start = 0; + int end = 3; + size_t times = 100; + + for (size_t i = 0; i < times; ++i) { + int rand = Random::Rand(start, end); + ASSERT_TRUE(rand >= start); + ASSERT_TRUE(rand < end); + } } TEST_F(RandomTest, NegativeTest) { - int start = -10; - int end = 10; - size_t times = 100; - - for (size_t i = 0; i < times; ++i) { - int rand = Random::RandStd(start, end); - ASSERT_TRUE(rand >= start); - ASSERT_TRUE(rand < end); - } + int start = -10; + int end = 10; + size_t times = 100; + + for (size_t i = 0; i < times; ++i) { + int rand = Random::RandStd(start, end); + ASSERT_TRUE(rand >= start); + ASSERT_TRUE(rand < end); + } } -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/unity_balancer.cc b/src/load_balancer/unity_balancer.cc index c432f57cc..a6bf398a9 100644 --- a/src/load_balancer/unity_balancer.cc +++ b/src/load_balancer/unity_balancer.cc @@ -17,263 +17,256 @@ namespace load_balancer { using tera::master::TabletNodePtr; using tera::master::TabletPtr; -UnityBalancer::UnityBalancer(const LBOptions& options) : - lb_options_(options) { - // cost functions - if (lb_options_.move_count_cost_weight > 0) { - cost_functions_.emplace_back(new MoveCountCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.move_frequency_cost_weight > 0){ - cost_functions_.emplace_back(new MoveFrequencyCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.abnormal_node_cost_weight > 0) { - cost_functions_.emplace_back(new AbnormalNodeCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.read_pending_node_cost_weight > 0) { - cost_functions_.emplace_back(new ReadPendingNodeCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.write_pending_node_cost_weight > 0) { - cost_functions_.emplace_back(new WritePendingNodeCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.scan_pending_node_cost_weight > 0) { - cost_functions_.emplace_back(new ScanPendingNodeCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.tablet_count_cost_weight > 0) { - cost_functions_.emplace_back(new TabletCountCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.size_cost_weight > 0) { - cost_functions_.emplace_back(new SizeCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.read_load_cost_weight > 0) { - cost_functions_.emplace_back(new ReadLoadCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.write_load_cost_weight > 0) { - cost_functions_.emplace_back(new WriteLoadCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.scan_load_cost_weight > 0) { - cost_functions_.emplace_back(new ScanLoadCostFunction(options)); - VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; - } - - // action generators - action_generators_.emplace_back(new RandomActionGenerator()); +UnityBalancer::UnityBalancer(const LBOptions& options) : lb_options_(options) { + if (lb_options_.move_count_cost_weight > 0) { + cost_functions_.emplace_back(new MoveCountCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.tablet_count_cost_weight > 0) { + cost_functions_.emplace_back(new TabletCountCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new TabletCountActionGenerator()); VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - if (lb_options_.tablet_count_cost_weight > 0) { - action_generators_.emplace_back(new TabletCountActionGenerator()); - VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.size_cost_weight > 0) { - action_generators_.emplace_back(new SizeActionGenerator()); - VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.read_load_cost_weight > 0) { - action_generators_.emplace_back(new ReadLoadActionGenerator()); - VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.write_load_cost_weight > 0) { - action_generators_.emplace_back(new WriteLoadActionGenerator()); - VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - } - if (lb_options_.scan_load_cost_weight > 0) { - action_generators_.emplace_back(new ScanLoadActionGenerator()); - VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; - } + } + if (lb_options_.size_cost_weight > 0) { + cost_functions_.emplace_back(new SizeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new SizeActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.flash_size_cost_weight > 0) { + cost_functions_.emplace_back(new FlashSizeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new FlashSizeActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.read_load_cost_weight > 0) { + cost_functions_.emplace_back(new ReadLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new ReadLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.write_load_cost_weight > 0) { + cost_functions_.emplace_back(new WriteLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new WriteLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.scan_load_cost_weight > 0) { + cost_functions_.emplace_back(new ScanLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new ScanLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.lread_cost_weight > 0) { + cost_functions_.emplace_back(new LReadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + action_generators_.emplace_back(new LReadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } } -UnityBalancer::~UnityBalancer() { -} +UnityBalancer::~UnityBalancer() {} -bool UnityBalancer::BalanceCluster( - const std::vector>& lb_nodes, - std::vector* plans) { - return BalanceCluster("", lb_nodes, plans); +bool UnityBalancer::BalanceCluster(const std::vector>& lb_nodes, + std::vector* plans) { + return BalanceCluster("", lb_nodes, plans); } -bool UnityBalancer::BalanceCluster( - const std::string& table_name, - const std::vector>& lb_nodes, - std::vector* plans) { - if (lb_nodes.size() <= 1 || plans == nullptr) { - return false; - } +bool UnityBalancer::BalanceCluster(const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) { + if (lb_nodes.size() <= 1 || plans == nullptr) { + return false; + } - VLOG (5) << "[lb] BalanceCluster for table:" << table_name << " begin"; + VLOG(5) << "[lb] BalanceCluster for table:" << table_name << " begin"; - std::shared_ptr cluster = std::make_shared(lb_nodes, lb_options_); + bool skip_meta_node = lb_options_.meta_table_isolate_enabled ? true : false; + std::shared_ptr cluster = + std::make_shared(lb_nodes, lb_options_, skip_meta_node); - if (lb_options_.debug_mode_enabled) { - cluster->DebugCluster(); - } + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } - InitCostFunctions(cluster); - - if (!NeedBalance(cluster)) { - return true; - } + InitCostFunctions(cluster); - uint64_t max_steps = std::min(lb_options_.max_compute_steps, static_cast(lb_options_.max_compute_steps_per_tablet * cluster->tablet_num_)); - double init_cost = ComputeCost(std::numeric_limits::max()); - double current_cost = init_cost; - - VLOG(5) << "[lb] compute begin, max_steps:" << max_steps << " init_cost:" << init_cost; - - int64_t start_time_ns = get_micros(); - int64_t cost_time_ms = 0; - uint64_t step = 0; - for (step = 0; step < max_steps; ++step) { - std::shared_ptr action(NextAction(cluster)); - VLOG(20) << "[lb] step:" << step << " action:" << action->ToString(); - - if (!cluster->ValidAction(action)) { - continue; - } - - cluster->DoAction(action); - - if (lb_options_.debug_mode_enabled) { - cluster->DebugCluster(); - } - - double new_cost = ComputeCost(current_cost); - if (new_cost < current_cost) { - VLOG(10) << "[lb] got lower cost by " << action->GetGeneratorName(); - current_cost = new_cost; - } else { - std::shared_ptr undo_action(action->UndoAction()); - VLOG(20) << "[lb] undo action:" << undo_action->ToString(); - cluster->DoAction(undo_action); - - if (lb_options_.debug_mode_enabled) { - cluster->DebugCluster(); - } - } - - cost_time_ms = (get_micros() - start_time_ns) / 1000; - if (static_cast(cost_time_ms) > lb_options_.max_compute_time_ms) { - VLOG(5) << "[lb] stop computing since time reach to max_compute_time_ms_:" - << lb_options_.max_compute_time_ms; - break; - } + if (!NeedBalance(cluster)) { + return true; + } + + uint64_t max_steps = std::min( + lb_options_.max_compute_steps, + static_cast(lb_options_.max_compute_steps_per_tablet * cluster->tablet_num_)); + double init_cost = ComputeCost(std::numeric_limits::max()); + double current_cost = init_cost; + + VLOG(5) << "[lb] compute begin, max_steps:" << max_steps << " init total cost:" << init_cost; + + int64_t start_time_ns = get_micros(); + int64_t cost_time_ms = 0; + uint64_t step = 0; + uint32_t success_step = 0; + for (step = 0; step < max_steps; ++step) { + std::shared_ptr action(NextAction(cluster)); + VLOG(20) << "[lb] step:" << step << " action:" << action->ToString(); + + if (!cluster->ValidAction(action)) { + continue; } - VLOG(5) << "[lb] compute end, compute time(ms):" << cost_time_ms - << " compute steps:" << step - << " init cost:" << init_cost - << " new cost:" << current_cost; + cluster->DoAction(action); - if (current_cost < init_cost) { - CreatePlans(cluster, plans); - VLOG(5) << "[lb] balance plan size:" << plans->size(); - } else { - VLOG(5) << "[lb] no better balance plan"; + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); } - VLOG (5) << "[lb] BalanceCluster for table:" << table_name << " end"; + double new_cost = ComputeCost(current_cost); + if (new_cost < current_cost) { + VLOG(10) << "[lb] step " << step << " got lower cost " << new_cost << " by " + << action->GetGeneratorName(); + current_cost = new_cost; + ++success_step; + } else { + std::shared_ptr undo_action(action->UndoAction()); + VLOG(20) << "[lb] undo action:" << undo_action->ToString(); + cluster->DoAction(undo_action); - return true; -} + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + } -bool UnityBalancer::NeedBalance(const std::shared_ptr& cluster) { - if (cluster->tablet_node_num_ == 0) { - LOG(INFO) << "[lb] empty cluster , no need to balance"; - return false; + if (success_step >= lb_options_.tablet_max_move_num) { + VLOG(5) << "[lb] stop computing since success_step reach to " + "tablet_max_move_num:" << lb_options_.tablet_max_move_num; + break; } - uint32_t bad_node_num = cluster->abnormal_nodes_index_.size() + cluster->read_pending_nodes_index_.size() - + cluster->write_pending_nodes_index_.size() + cluster->scan_pending_nodes_index_.size(); - double bad_node_percent = static_cast(bad_node_num) / static_cast(cluster->tablet_node_num_); - if (bad_node_percent >= lb_options_.bad_node_safemode_percent) { - LOG(INFO) << "[lb] bad node num: " << bad_node_num << ", total node num: " - << cluster->tablet_node_num_ << ", bad node safemode percent: " - << lb_options_.bad_node_safemode_percent << ", too many bad nodes, skip balance"; - return false; + cost_time_ms = (get_micros() - start_time_ns) / 1000; + if (static_cast(cost_time_ms) > lb_options_.max_compute_time_ms) { + VLOG(5) << "[lb] stop computing since time reach to max_compute_time_ms:" + << lb_options_.max_compute_time_ms; + break; } + } - double total_cost = 0.0; - double total_weight = 0.0; + VLOG(5) << "[lb] compute end, compute time(ms):" << cost_time_ms << " compute steps:" << step + << " init total cost:" << init_cost << " new total cost:" << current_cost; - for (const auto& cost_func : cost_functions_) { - double weight = cost_func->GetWeight(); - if (weight <= 0) { - continue; - } + if (current_cost < init_cost) { + CreatePlans(cluster, plans); + VLOG(5) << "[lb] balance plan size:" << plans->size(); + } else { + VLOG(5) << "[lb] no better balance plan"; + } - total_weight += weight; - total_cost += cost_func->Cost() * weight; - } - double cost = total_weight == 0 ? 0 : total_cost / total_weight; + VLOG(5) << "[lb] BalanceCluster for table:" << table_name << " end"; - VLOG(5) << "[lb] NeedBalance compute, total_cost:" << total_cost - << " total_weight:" << total_weight - << " cost:" << cost - << " min_cost_need_balance:" << lb_options_.min_cost_need_balance; + return true; +} - if (total_cost <= 0 || total_weight <= 0 || cost < lb_options_.min_cost_need_balance) { - LOG(INFO) << "[lb] cluster is well balanced, no need to balance"; - return false; - } else { - return true; +bool UnityBalancer::NeedBalance(const std::shared_ptr& cluster) { + if (cluster->tablet_node_num_ <= 1) { + LOG(INFO) << "[lb] no enough nodes to balance"; + return false; + } + + uint32_t heavy_pending_node_num = cluster->HeavyPendingNodeNum(); + uint32_t bad_node_num = cluster->abnormal_nodes_index_.size() + heavy_pending_node_num; + double bad_node_percent = + static_cast(bad_node_num) / static_cast(cluster->tablet_node_num_); + if (bad_node_percent >= lb_options_.bad_node_safemode_percent) { + LOG(INFO) << "[lb] bad node num: " << bad_node_num + << ", total node num: " << cluster->tablet_node_num_ + << ", bad node safemode percent: " << lb_options_.bad_node_safemode_percent + << ", too many bad nodes, skip balance"; + return false; + } + + if (heavy_pending_node_num > 0) { + LOG(INFO) << "[lb] cluster has " << heavy_pending_node_num + << " heavy pending nodes, need balance"; + return true; + } + + double total_cost = 0.0; + double total_weight = 0.0; + + for (const auto& cost_func : cost_functions_) { + double weight = cost_func->GetWeight(); + if (weight <= 0) { + continue; } + double cost = cost_func->Cost(); + VLOG(5) << "[lb] init cost of " << cost_func->Name() << ": " << cost << " * " << weight; + + total_weight += weight; + total_cost += cost * weight; + } + double cost = total_weight == 0 ? 0 : total_cost / total_weight; + + VLOG(5) << "[lb] NeedBalance compute, total_cost:" << total_cost + << " total_weight:" << total_weight << " cost:" << cost + << " min_cost_need_balance:" << lb_options_.min_cost_need_balance; + + if (total_cost <= 0 || total_weight <= 0 || cost < lb_options_.min_cost_need_balance) { + LOG(INFO) << "[lb] cluster is well balanced, no need to balance"; + return false; + } else { + return true; + } } void UnityBalancer::InitCostFunctions(const std::shared_ptr& cluster) { - for (const auto& cost_func : cost_functions_) { - cost_func->Init(cluster); - } + for (const auto& cost_func : cost_functions_) { + cost_func->Init(cluster); + } } double UnityBalancer::ComputeCost(double previous_cost) { - VLOG(20) << "[lb] ComputeCost begin, previous cost:" << previous_cost; - double total_cost = 0.0; - - for (const auto& cost_func : cost_functions_) { - double weight = cost_func->GetWeight(); - if (weight <= 0) { - continue; - } - double cost = cost_func->Cost(); - total_cost += cost * weight; - VLOG(20) << "[lb] " << cost_func->Name() << " cost:" << cost << " weight:" << weight; - if (total_cost > previous_cost) { - break; - } + VLOG(20) << "[lb] ComputeCost begin, previous total cost:" << previous_cost; + double total_cost = 0.0; + + for (const auto& cost_func : cost_functions_) { + double weight = cost_func->GetWeight(); + if (weight <= 0) { + continue; + } + double cost = cost_func->Cost(); + total_cost += cost * weight; + VLOG(20) << "[lb] " << cost_func->Name() << " cost:" << cost << " weight:" << weight; + if (total_cost > previous_cost) { + break; } + } - VLOG(20) << "[lb] ComputeCost end, new cost:" << total_cost; - return total_cost; + VLOG(20) << "[lb] ComputeCost end, new total cost:" << total_cost; + return total_cost; } Action* UnityBalancer::NextAction(const std::shared_ptr& cluster) { - uint32_t rand = Random::Rand(0, action_generators_.size()); - return action_generators_[rand]->Generate(cluster); + uint32_t rand = Random::Rand(0, action_generators_.size()); + return action_generators_[rand]->Generate(cluster); } void UnityBalancer::CreatePlans(const std::shared_ptr& cluster, std::vector* plans) { - plans->clear(); - - for (uint32_t i = 0; i < cluster->tablet_index_to_node_index_.size(); ++i) { - uint32_t initial_node_index = cluster->initial_tablet_index_to_node_index_[i]; - uint32_t new_node_index = cluster->tablet_index_to_node_index_[i]; - - if (initial_node_index != new_node_index) { - // tablet has been moved to another tablet node - Plan plan(cluster->tablets_[i]->tablet_ptr, - cluster->nodes_[initial_node_index]->tablet_node_ptr, - cluster->nodes_[new_node_index]->tablet_node_ptr); - plans->emplace_back(plan); - } + for (uint32_t i = 0; i < cluster->tablet_index_to_node_index_.size(); ++i) { + uint32_t initial_node_index = cluster->initial_tablet_index_to_node_index_[i]; + uint32_t new_node_index = cluster->tablet_index_to_node_index_[i]; + + if (initial_node_index != new_node_index) { + // tablet has been moved to another tablet node + Plan plan(cluster->tablets_[i]->tablet_ptr, + cluster->nodes_[initial_node_index]->tablet_node_ptr, + cluster->nodes_[new_node_index]->tablet_node_ptr); + plans->emplace_back(plan); } + } } -} // namespace load_balancer -} // namespace tera +std::string UnityBalancer::GetName() { return "UnityBalancer"; } + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/unity_balancer.h b/src/load_balancer/unity_balancer.h index 522acabff..c06c88147 100644 --- a/src/load_balancer/unity_balancer.h +++ b/src/load_balancer/unity_balancer.h @@ -18,41 +18,42 @@ namespace tera { namespace load_balancer { class UnityBalancer : public Balancer { -public: - explicit UnityBalancer(const LBOptions& options); - virtual ~UnityBalancer(); + public: + explicit UnityBalancer(const LBOptions& options); + virtual ~UnityBalancer(); - virtual bool BalanceCluster( - const std::vector>& lb_nodes, - std::vector* plans) override; + virtual bool BalanceCluster(const std::vector>& lb_nodes, + std::vector* plans) override; - // if table_name is empty, balance whole culster, - // otherwhise balance the specified table of table_name - virtual bool BalanceCluster( - const std::string& table_name, - const std::vector>& lb_nodes, - std::vector* plans) override; + // if table_name is empty, balance whole culster, + // otherwhise balance the specified table of table_name + virtual bool BalanceCluster(const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) override; - virtual bool NeedBalance(const std::shared_ptr& cluster); + bool NeedBalance(const std::shared_ptr& cluster); -protected: - virtual void InitCostFunctions(const std::shared_ptr& cluster); + virtual std::string GetName() override; - virtual double ComputeCost(double previous_cost); + protected: + void InitCostFunctions(const std::shared_ptr& cluster); - virtual Action* NextAction(const std::shared_ptr& cluster); + double ComputeCost(double previous_cost); - // diff the initial cluster state with the current cluster state, then create plans - virtual void CreatePlans(const std::shared_ptr& cluster, std::vector* plans); + Action* NextAction(const std::shared_ptr& cluster); -private: - std::vector> cost_functions_; - std::vector> action_generators_; + // diff the initial cluster state with the current cluster state, then create + // plans + void CreatePlans(const std::shared_ptr& cluster, std::vector* plans); - LBOptions lb_options_; + private: + std::vector> cost_functions_; + std::vector> action_generators_; + + LBOptions lb_options_; }; -} // namespace load_balancer -} // namespace tera +} // namespace load_balancer +} // namespace tera -#endif // TERA_LOAD_BALANCER_UNITY_BALANCER_H_ +#endif // TERA_LOAD_BALANCER_UNITY_BALANCER_H_ diff --git a/src/master/abnormal_node_mgr.cc b/src/master/abnormal_node_mgr.cc new file mode 100644 index 000000000..cd21525bf --- /dev/null +++ b/src/master/abnormal_node_mgr.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "common/timer.h" +#include "master/abnormal_node_mgr.h" + +namespace tera { + +namespace master { + +AbnormalNodeMgr::AbnormalNodeMgr() {} + +AbnormalNodeMgr::~AbnormalNodeMgr() {} + +void AbnormalNodeMgr::RecordNodeDelete(const std::string& addr, const int64_t delete_time) { + if (FLAGS_abnormal_node_trigger_count < 1) { + return; + } + VLOG(30) << "append node delete time, node: " << addr << ", time: " << get_time_str(delete_time); + + MutexLock lock(&mutex_); + NodeAbnormalInfo& info = nodes_abnormal_infos_[addr]; + info.deleted_times.emplace_back(delete_time); + if (info.deleted_times.size() > static_cast(FLAGS_abnormal_node_trigger_count)) { + info.deleted_times.erase(info.deleted_times.begin()); + } + + if (DeleteTooFrequent(info.deleted_times)) { + int64_t last_delete_time = info.deleted_times[info.deleted_times.size() - 1]; + // avoiding overflow, 30 is large enough + int64_t recovery_wait_time = FLAGS_abnormal_node_auto_recovery_period_s + << (info.abnormal_count > 30 ? 30 : info.abnormal_count); + if (recovery_wait_time > 24 * 3600) { // no more than 24 hours + recovery_wait_time = 24 * 3600; + } + info.recovery_time = last_delete_time + recovery_wait_time; + + ++info.abnormal_count; + info.deleted_times.clear(); + + VLOG(30) << "delete too frequent(delete " << FLAGS_abnormal_node_trigger_count << " times in " + << FLAGS_abnormal_node_check_period_s << "s), abnormal count: " << info.abnormal_count + << ", recovery_time: " << get_time_str(info.recovery_time); + } + + if (delay_add_nodes_.find(addr) != delay_add_nodes_.end()) { + delay_add_nodes_.erase(addr); + abnormal_nodes_count_.Set(delay_add_nodes_.size()); + VLOG(30) << "cancel delay add node, addr: " << addr; + } +} + +bool AbnormalNodeMgr::IsAbnormalNode(const std::string& addr, const std::string& uuid) { + MutexLock lock(&mutex_); + if (nodes_abnormal_infos_.find(addr) == nodes_abnormal_infos_.end()) { + return false; + } + + NodeAbnormalInfo& info = nodes_abnormal_infos_[addr]; + if (get_micros() / 1000000 >= info.recovery_time) { + return false; + } else { + DelayAddNode(addr, uuid); + return true; + } +} + +std::string AbnormalNodeMgr::GetNodeInfo(const std::string& addr) { + MutexLock lock(&mutex_); + std::string ret = ""; + if (nodes_abnormal_infos_.find(addr) != nodes_abnormal_infos_.end()) { + ret = ret + "abnormal node: " + addr + ", abnormal count: " + + std::to_string(nodes_abnormal_infos_[addr].abnormal_count) + ", recovery time: " + + get_time_str(nodes_abnormal_infos_[addr].recovery_time); + } + return ret; +} + +void AbnormalNodeMgr::ConsumeRecoveredNodes(std::unordered_map* nodes) { + MutexLock lock(&mutex_); + for (const auto& node : delay_add_nodes_) { + if (get_micros() / 1000000 >= node.second.recovery_time) { + nodes->emplace(node.first, node.second.uuid); + } + } + + for (const auto& node : *nodes) { + delay_add_nodes_.erase(node.first); + } + abnormal_nodes_count_.Set(delay_add_nodes_.size()); +} + +void AbnormalNodeMgr::GetDelayAddNodes(std::unordered_map* nodes) { + MutexLock lock(&mutex_); + for (const auto& node : delay_add_nodes_) { + nodes->emplace(node.first, node.second.uuid); + } +} + +void AbnormalNodeMgr::DelayAddNode(const std::string& addr, const std::string& uuid) { + VLOG(30) << "delay add node, addr: " << addr << ", uuid: " << uuid; + mutex_.AssertHeld(); + DelayAddNodeInfo& info = delay_add_nodes_[addr]; + info.uuid = uuid; + assert(nodes_abnormal_infos_.find(addr) != nodes_abnormal_infos_.end()); + info.recovery_time = nodes_abnormal_infos_[addr].recovery_time; + abnormal_nodes_count_.Set(delay_add_nodes_.size()); +} + +bool AbnormalNodeMgr::DeleteTooFrequent(const std::vector& times) { + if (FLAGS_abnormal_node_trigger_count < 2) { + return false; + } + assert(times.size() <= static_cast(FLAGS_abnormal_node_trigger_count)); + if (times.size() < static_cast(FLAGS_abnormal_node_trigger_count)) { + return false; + } + + if (times[FLAGS_abnormal_node_trigger_count - 1] - times[0] <= + FLAGS_abnormal_node_check_period_s) { + return true; + } else { + return false; + } +} + +} // namespace master + +} // namespace tera diff --git a/src/master/abnormal_node_mgr.h b/src/master/abnormal_node_mgr.h new file mode 100644 index 000000000..3150a181c --- /dev/null +++ b/src/master/abnormal_node_mgr.h @@ -0,0 +1,69 @@ +#pragma once + +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include + +#include "common/metric/metric_counter.h" +#include "common/mutex.h" + +DECLARE_int64(abnormal_node_check_period_s); +DECLARE_int32(abnormal_node_trigger_count); +DECLARE_int64(abnormal_node_auto_recovery_period_s); + +namespace tera { + +namespace master { + +class AbnormalNodeMgr { + public: + AbnormalNodeMgr(); + ~AbnormalNodeMgr(); + + void RecordNodeDelete(const std::string& addr, const int64_t delete_time); + + bool IsAbnormalNode(const std::string& addr, const std::string& uuid); + + std::string GetNodeInfo(const std::string& addr); + + void ConsumeRecoveredNodes(std::unordered_map* nodes); + + void GetDelayAddNodes(std::unordered_map* nodes); + + private: + struct NodeAbnormalInfo { + std::vector deleted_times; + int64_t abnormal_count; + int64_t recovery_time; + + NodeAbnormalInfo() : abnormal_count(0), recovery_time(0) {} + }; + + std::unordered_map nodes_abnormal_infos_; + + struct DelayAddNodeInfo { + std::string uuid; + int64_t recovery_time; + }; + + std::unordered_map delay_add_nodes_; + + mutable Mutex mutex_; + MetricCounter abnormal_nodes_count_{ + "tera_master_abnormal_nodes_count", {SubscriberType::LATEST}, false}; + + private: + void DelayAddNode(const std::string& addr, const std::string& uuid); + + bool DeleteTooFrequent(const std::vector& times); +}; + +} // namespace master + +} // namespace tera diff --git a/src/master/availability.cc b/src/master/availability.cc index a747acf86..42552c96f 100644 --- a/src/master/availability.cc +++ b/src/master/availability.cc @@ -13,10 +13,14 @@ #include "common/timer.h" #include "utils/string_util.h" -DEFINE_bool(tera_master_availability_show_details_enabled, false, "whether show details of not-ready tablets"); -DEFINE_int64(tera_master_availability_error_threshold, 600, "10 minutes, the threshold (in s) of error availability"); -DEFINE_int64(tera_master_availability_fatal_threshold, 3600, "1 hour, the threshold (in s) of fatal availability"); -DEFINE_int64(tera_master_availability_warning_threshold, 60, "1 minute, the threshold (in s) of warning availability"); +DEFINE_bool(tera_master_availability_show_details_enabled, false, + "whether show details of not-ready tablets"); +DEFINE_int64(tera_master_availability_error_threshold, 600, + "10 minutes, the threshold (in s) of error availability"); +DEFINE_int64(tera_master_availability_fatal_threshold, 3600, + "1 hour, the threshold (in s) of fatal availability"); +DEFINE_int64(tera_master_availability_warning_threshold, 60, + "1 minute, the threshold (in s) of warning availability"); DEFINE_int64(tera_master_not_available_threshold, 0, "the threshold (in s) of not available"); DECLARE_string(tera_master_meta_table_name); DECLARE_string(tera_master_meta_table_path); @@ -25,197 +29,191 @@ namespace tera { namespace master { static std::string GetNameFromPath(const std::string& path) { - if (path == FLAGS_tera_master_meta_table_path) { - return FLAGS_tera_master_meta_table_name; - } - std::vector t; - SplitString(path, "/", &t); // table_name/tablet00...001 - if (!t.empty()) { - return t[0]; - } else { - return ""; - } + if (path == FLAGS_tera_master_meta_table_path) { + return FLAGS_tera_master_meta_table_name; + } + std::vector t; + SplitString(path, "/", &t); // table_name/tablet00...001 + if (!t.empty()) { + return t[0]; + } else { + return ""; + } } - -TabletAvailability::TabletAvailability(std::shared_ptr t) - : tablet_manager_(t) { - start_ts_ = get_micros(); +TabletAvailability::TabletAvailability(std::shared_ptr t) : tablet_manager_(t) { + start_ts_ = get_micros(); } void TabletAvailability::AddNotReadyTablet(const std::string& path, const TabletMeta::TabletStatus& tablet_status) { - if (tablet_status == TabletMeta::kTabletReady || tablet_status == TabletMeta::kTabletDisable) { - return; - } - - MutexLock lock(&mutex_); - int64_t ts = get_micros(); - tablets_.insert(std::pair(path, ts)); - auto iter = not_ready_tablet_metrics_.emplace( - path, - MetricCounter{ - metric_name_, - "table:" + GetNameFromPath(path) + ",tablet:" + path, - {SubscriberType::LATEST}, - false - }); - - if (iter.second) { - VLOG(12) << "[Add NotReady To Metric]: " << static_cast(TabletErrorStatus::kNotReady); - iter.first->second.Set(static_cast(TabletErrorStatus::kNotReady)); - } else { - VLOG(12) << "[Add NotReady To Metric Failed]: " << static_cast(TabletErrorStatus::kNotReady); - } - - if (tablets_hist_cost_[path].start_ts > 0) { - VLOG(10) << "notready again " << path; - return; - } - - tablets_hist_cost_[path].start_ts = ts; - tablets_hist_cost_[path].notready_num++; - VLOG(10) << "addnotready " << path - << ", total_cost " << tablets_hist_cost_[path].total - << ", start_ts " << tablets_hist_cost_[path].start_ts - << ", notready " << tablets_hist_cost_[path].notready_num - << ", reready " << tablets_hist_cost_[path].reready_num; + if (tablet_status == TabletMeta::kTabletReady || tablet_status == TabletMeta::kTabletDisable) { + return; + } + + MutexLock lock(&mutex_); + int64_t ts = get_micros(); + tablets_.insert(std::pair(path, ts)); + auto iter = not_ready_tablet_metrics_.emplace( + path, MetricCounter{metric_name_, + "table:" + GetNameFromPath(path) + ",tablet:" + path, + {SubscriberType::LATEST}, + false}); + + if (iter.second) { + VLOG(12) << "[Add NotReady To Metric]: " << static_cast(TabletErrorStatus::kNotReady); + iter.first->second.Set(static_cast(TabletErrorStatus::kNotReady)); + } else { + VLOG(12) << "[Add NotReady To Metric Failed]: " + << static_cast(TabletErrorStatus::kNotReady); + } + + if (tablets_hist_cost_[path].start_ts > 0) { + VLOG(10) << "notready again " << path; + return; + } + + tablets_hist_cost_[path].start_ts = ts; + tablets_hist_cost_[path].notready_num++; + VLOG(10) << "addnotready " << path << ", total_cost " << tablets_hist_cost_[path].total + << ", start_ts " << tablets_hist_cost_[path].start_ts << ", notready " + << tablets_hist_cost_[path].notready_num << ", reready " + << tablets_hist_cost_[path].reready_num; } void TabletAvailability::EraseNotReadyTablet(const std::string& path) { - MutexLock lock(&mutex_); - tablets_.erase(path); - not_ready_tablet_metrics_.erase(path); + MutexLock lock(&mutex_); + tablets_.erase(path); + not_ready_tablet_metrics_.erase(path); + + if (tablets_hist_cost_.find(path) == tablets_hist_cost_.end() || + tablets_hist_cost_[path].start_ts == 0) { + VLOG(10) << "reready again " << path; + return; + } + + int64_t ts = get_micros(); + if (tablets_hist_cost_[path].start_ts > 0) { + tablets_hist_cost_[path].total += ts - tablets_hist_cost_[path].start_ts; + } + tablets_hist_cost_[path].start_ts = 0; + tablets_hist_cost_[path].reready_num++; + VLOG(10) << "delnotready " << path << ", total_cost " << tablets_hist_cost_[path].total + << ", start_ts " << tablets_hist_cost_[path].start_ts << ", notready " + << tablets_hist_cost_[path].notready_num << ", reready " + << tablets_hist_cost_[path].reready_num; +} - if (tablets_hist_cost_.find(path) == tablets_hist_cost_.end() || - tablets_hist_cost_[path].start_ts == 0) { - VLOG(10) << "reready again " << path; - return; +void TabletAvailability::LogAvailability() { + int64_t not_avai_count = 0; + int64_t not_avai_warning = 0; + int64_t not_avai_error = 0; + int64_t not_avai_fatal = 0; + int64_t start = get_micros(); + std::map tablets_snapshot; + std::map::iterator it; + std::set ignore_tables; + { + MutexLock lock(&mutex_); + tablets_snapshot = tablets_; + } + for (it = tablets_snapshot.begin(); it != tablets_snapshot.end(); ++it) { + std::string table_name = GetNameFromPath(it->first); + TablePtr table; + if (!tablet_manager_->FindTable(table_name, &table)) { + LOG(ERROR) << "[availability] unknown table:" << table_name; + ignore_tables.insert(it->first); + continue; + } + if (table->GetStatus() != kTableEnable) { + ignore_tables.insert(it->first); } + } + int64_t all_tablets = tablet_manager_->GetAllTabletsCount(); - int64_t ts = get_micros(); - if (tablets_hist_cost_[path].start_ts > 0) { - tablets_hist_cost_[path].total += ts - tablets_hist_cost_[path].start_ts; + MutexLock lock(&mutex_); + for (it = tablets_.begin(); it != tablets_.end(); ++it) { + if (ignore_tables.find(it->first) != ignore_tables.end()) { + continue; } - tablets_hist_cost_[path].start_ts = 0; - tablets_hist_cost_[path].reready_num++; - VLOG(10) << "delnotready " << path - << ", total_cost " << tablets_hist_cost_[path].total - << ", start_ts " << tablets_hist_cost_[path].start_ts - << ", notready " << tablets_hist_cost_[path].notready_num - << ", reready " << tablets_hist_cost_[path].reready_num; -} -void TabletAvailability::LogAvailability() { - int64_t not_avai_count = 0; - int64_t not_avai_warning = 0; - int64_t not_avai_error = 0; - int64_t not_avai_fatal = 0; - int64_t start = get_micros(); - std::map tablets_snapshot; - std::map::iterator it; - std::set ignore_tables; - { - MutexLock lock(&mutex_); - tablets_snapshot = tablets_; + auto metric_iter = not_ready_tablet_metrics_.find(it->first); + assert(metric_iter != not_ready_tablet_metrics_.end()); + + if ((start - it->second) > FLAGS_tera_master_not_available_threshold * 1000 * 1000LL) { + VLOG(12) << "[availability] not available:" << it->first; + not_avai_count++; } - for (it = tablets_snapshot.begin(); it != tablets_snapshot.end(); ++it) { - std::string table_name = GetNameFromPath(it->first); - TablePtr table; - if (!tablet_manager_->FindTable(table_name, &table)) { - LOG(ERROR) << "[availability] unknown table:" << table_name; - ignore_tables.insert(it->first); - continue; - } - if (table->GetStatus() != kTableEnable) { - ignore_tables.insert(it->first); - } + if ((start - it->second) > FLAGS_tera_master_availability_fatal_threshold * 1000 * 1000LL) { + not_avai_fatal++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kFatal)); + if (FLAGS_tera_master_availability_show_details_enabled) { + LOG(INFO) << "[availability] fatal-tablet:" << it->first; + } + } else if ((start - it->second) > + FLAGS_tera_master_availability_error_threshold * 1000 * 1000LL) { + not_avai_error++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kError)); + if (FLAGS_tera_master_availability_show_details_enabled) { + LOG(INFO) << "[availability] error-tablet:" << it->first; + } + } else if ((start - it->second) > + FLAGS_tera_master_availability_warning_threshold * 1000 * 1000LL) { + not_avai_warning++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kWarning)); } - int64_t all_tablets = tablet_manager_->GetAllTabletsCount(); - - MutexLock lock(&mutex_); - for (it = tablets_.begin(); it != tablets_.end(); ++it) { - if (ignore_tables.find(it->first) != ignore_tables.end() ) { - continue; - } - - auto metric_iter = not_ready_tablet_metrics_.find(it->first); - assert(metric_iter != not_ready_tablet_metrics_.end()); - - if ((start - it->second) > FLAGS_tera_master_not_available_threshold * 1000 * 1000LL) { - VLOG(12) << "[availability] not available:" << it->first; - not_avai_count++; - } - if ((start - it->second) > FLAGS_tera_master_availability_fatal_threshold * 1000 * 1000LL) { - not_avai_fatal++; - metric_iter->second.Set(static_cast(TabletErrorStatus::kFatal)); - if (FLAGS_tera_master_availability_show_details_enabled) { - LOG(INFO) << "[availability] fatal-tablet:" << it->first; - } - } else if ((start - it->second) > FLAGS_tera_master_availability_error_threshold * 1000 * 1000LL) { - not_avai_error++; - metric_iter->second.Set(static_cast(TabletErrorStatus::kError)); - if (FLAGS_tera_master_availability_show_details_enabled) { - LOG(INFO) << "[availability] error-tablet:" << it->first; - } - } else if ((start - it->second) > FLAGS_tera_master_availability_warning_threshold * 1000 * 1000LL) { - not_avai_warning++; - metric_iter->second.Set(static_cast(TabletErrorStatus::kWarning)); - } + } + + LOG(INFO) << "[availability][current-status] fatal=" << not_avai_fatal + << " f-ratio=" << RoundNumberToNDecimalPlaces((double)not_avai_fatal / all_tablets, 6) + << ", error=" << not_avai_error + << " e-ratio=" << RoundNumberToNDecimalPlaces((double)not_avai_error / all_tablets, 6) + << ", warn=" << not_avai_warning << " w-ratio=" + << RoundNumberToNDecimalPlaces((double)not_avai_warning / all_tablets, 6); + + LOG(INFO) << "[availability][current-status] (not-available/not-ready/all-tablets: " + << not_avai_count << "/" << tablets_.size() << "/" << all_tablets << ")" + << " available tablets percentage: " << 1 - not_avai_count / (double)all_tablets; + + int64_t total_time = 0, all_time = start - start_ts_; + start_ts_ = start; + int64_t total_notready = 0, total_reready = 0; + std::map::iterator stat_it; + for (stat_it = tablets_hist_cost_.begin(); stat_it != tablets_hist_cost_.end();) { + if (stat_it->second.start_ts > 0) { + stat_it->second.total += start - stat_it->second.start_ts; } - LOG(INFO) << "[availability][current-status] fatal=" << not_avai_fatal - << " f-ratio=" << RoundNumberToNDecimalPlaces((double)not_avai_fatal/all_tablets, 6) - << ", error=" << not_avai_error - << " e-ratio=" << RoundNumberToNDecimalPlaces((double)not_avai_error/all_tablets, 6) - << ", warn=" << not_avai_warning - << " w-ratio=" << RoundNumberToNDecimalPlaces((double)not_avai_warning/all_tablets, 6); - - LOG(INFO) << "[availability][current-status] (not-available/not-ready/all-tablets: " - << not_avai_count << "/" << tablets_.size() << "/" << all_tablets << ")" - << " available tablets percentage: " << 1 - not_avai_count/(double)all_tablets; - - int64_t total_time = 0, all_time = start - start_ts_; - start_ts_ = start; - int64_t total_notready = 0, total_reready = 0; - std::map::iterator stat_it; - for (stat_it = tablets_hist_cost_.begin(); - stat_it != tablets_hist_cost_.end();) { - if (stat_it->second.start_ts > 0) { - stat_it->second.total += start - stat_it->second.start_ts; - } - - total_time += stat_it->second.total; - total_notready += stat_it->second.notready_num; - total_reready += stat_it->second.reready_num; - - if (stat_it->second.start_ts > 0) { - stat_it->second.total = 0; - stat_it->second.start_ts = start; - stat_it->second.notready_num = 1; - stat_it->second.reready_num = 0; - ++stat_it; - } else { - tablets_hist_cost_.erase(stat_it++); - } + total_time += stat_it->second.total; + total_notready += stat_it->second.notready_num; + total_reready += stat_it->second.reready_num; + + if (stat_it->second.start_ts > 0) { + stat_it->second.total = 0; + stat_it->second.start_ts = start; + stat_it->second.notready_num = 1; + stat_it->second.reready_num = 0; + ++stat_it; + } else { + tablets_hist_cost_.erase(stat_it++); } - int64_t nr_notready_tablets = tablets_hist_cost_.size(); - double time_percent = 1.0 - (double)total_time / (all_time * all_tablets + 1); - ready_time_percent.Set(static_cast(time_percent * 10000)); - - LOG(INFO) << "[availability][tablet_staticstic] time_interval: " << all_time / 1000 - << ", notready_time: " << total_time / 1000 - << ", total_time: " << (all_time * all_tablets) / 1000 - << ", ready_time_percent: " << RoundNumberToNDecimalPlaces(1.0 - (double)total_time / (all_time * all_tablets + 1), 6) - << ", notready_tablets: " << nr_notready_tablets - << ", total_tabltes: " << all_tablets - << ", ready_tablets_percent: " << RoundNumberToNDecimalPlaces(1.0 - (double)nr_notready_tablets / (all_tablets + 1), 6) - << ", notready_count: " << total_notready - << ", reready_count: " << total_reready; - - int64_t cost = get_micros() - start; - LOG(INFO) << "[availability] cost time:" << cost/1000 << " ms"; + } + int64_t nr_notready_tablets = tablets_hist_cost_.size(); + double time_percent = 1.0 - (double)total_time / (all_time * all_tablets + 1); + ready_time_percent.Set(static_cast(time_percent * 10000)); + + LOG(INFO) << "[availability][tablet_staticstic] time_interval: " << all_time / 1000 + << ", notready_time: " << total_time / 1000 + << ", total_time: " << (all_time * all_tablets) / 1000 << ", ready_time_percent: " + << RoundNumberToNDecimalPlaces(1.0 - (double)total_time / (all_time * all_tablets + 1), + 6) << ", notready_tablets: " << nr_notready_tablets + << ", total_tabltes: " << all_tablets << ", ready_tablets_percent: " + << RoundNumberToNDecimalPlaces(1.0 - (double)nr_notready_tablets / (all_tablets + 1), 6) + << ", notready_count: " << total_notready << ", reready_count: " << total_reready; + + int64_t cost = get_micros() - start; + LOG(INFO) << "[availability] cost time:" << cost / 1000 << " ms"; } -} // master -} // tera +} // master +} // tera diff --git a/src/master/availability.h b/src/master/availability.h index 82b39eeae..95fcc905c 100644 --- a/src/master/availability.h +++ b/src/master/availability.h @@ -17,44 +17,36 @@ namespace tera { namespace master { struct TimeStatistic { - int64_t total; - int64_t start_ts; - int64_t notready_num; - int64_t reready_num; - TimeStatistic() : total(0), start_ts(0), notready_num(0), reready_num(0) {} + int64_t total; + int64_t start_ts; + int64_t notready_num; + int64_t reready_num; + TimeStatistic() : total(0), start_ts(0), notready_num(0), reready_num(0) {} }; class TabletAvailability { -public: - TabletAvailability(std::shared_ptr t); - void LogAvailability(); - void AddNotReadyTablet(const std::string& path, - const TabletMeta::TabletStatus& tablet_status); - void EraseNotReadyTablet(const std::string& id); - -private: - - enum class TabletErrorStatus { - kNotReady = 1, - kFatal = 2, - kError = 3, - kWarning = 4 - }; - - Mutex mutex_; - std::shared_ptr tablet_manager_; - - std::map tablets_; - std::map not_ready_tablet_metrics_; - MetricCounter ready_time_percent{"tera_master_tablet_ready_time_percent", - {SubscriberType::LATEST}, - false}; - - int64_t start_ts_; - std::map tablets_hist_cost_; - const std::string metric_name_{"tera_master_tablet_availability"}; + public: + TabletAvailability(std::shared_ptr t); + void LogAvailability(); + void AddNotReadyTablet(const std::string& path, const TabletMeta::TabletStatus& tablet_status); + void EraseNotReadyTablet(const std::string& id); + + private: + enum class TabletErrorStatus { kNotReady = 1, kFatal = 2, kError = 3, kWarning = 4 }; + + Mutex mutex_; + std::shared_ptr tablet_manager_; + + std::map tablets_; + std::map not_ready_tablet_metrics_; + MetricCounter ready_time_percent{ + "tera_master_tablet_ready_time_percent", {SubscriberType::LATEST}, false}; + + int64_t start_ts_; + std::map tablets_hist_cost_; + const std::string metric_name_{"tera_master_tablet_availability"}; }; -} // master -} // tera +} // master +} // tera -#endif // TERA_MASTER_TABLET_AVAILABILITY_H_ +#endif // TERA_MASTER_TABLET_AVAILABILITY_H_ diff --git a/src/master/create_table_procedure.cc b/src/master/create_table_procedure.cc index 0fc3b31a6..f3cadf0a5 100644 --- a/src/master/create_table_procedure.cc +++ b/src/master/create_table_procedure.cc @@ -20,192 +20,195 @@ DECLARE_bool(tera_only_root_create_table); namespace tera { namespace master { -std::map CreateTableProcedure::phase_handlers_ { - {CreateTablePhase::kPrepare, std::bind(&CreateTableProcedure::PreCheckHandler, _1, _2)}, - {CreateTablePhase::kUpdateMeta, std::bind(&CreateTableProcedure::UpdateMetaHandler, _1, _2)}, - {CreateTablePhase::kLoadTablets, std::bind(&CreateTableProcedure::LoadTabletsHandler, _1, _2)}, - {CreateTablePhase::kEofPhase, std::bind(&CreateTableProcedure::EofHandler, _1, _2)} -}; - -CreateTableProcedure::CreateTableProcedure(const CreateTableRequest* request, - CreateTableResponse* response, +std::map CreateTableProcedure::phase_handlers_{ + {CreateTablePhase::kPrepare, std::bind(&CreateTableProcedure::PreCheckHandler, _1, _2)}, + {CreateTablePhase::kUpdateMeta, std::bind(&CreateTableProcedure::UpdateMetaHandler, _1, _2)}, + {CreateTablePhase::kLoadTablets, std::bind(&CreateTableProcedure::LoadTabletsHandler, _1, _2)}, + {CreateTablePhase::kEofPhase, std::bind(&CreateTableProcedure::EofHandler, _1, _2)}}; + +CreateTableProcedure::CreateTableProcedure(const CreateTableRequest* request, + CreateTableResponse* response, google::protobuf::Closure* closure, - ThreadPool* thread_pool) : - request_(request), - response_(response), - rpc_closure_(closure), - table_name_(request_->table_name()), - update_meta_(false), - done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "create table: " << table_name_ << " begin"; - SetNextPhase(CreateTablePhase::kPrepare); + ThreadPool* thread_pool) + : request_(request), + response_(response), + rpc_closure_(closure), + table_name_(request_->table_name()), + update_meta_(false), + done_(false), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "create table: " << table_name_ << " begin"; + SetNextPhase(CreateTablePhase::kPrepare); } std::string CreateTableProcedure::ProcId() const { - static std::string prefix("CreateTable:"); - return prefix + std::string(table_name_); + static std::string prefix("CreateTable:"); + return prefix + std::string(table_name_); } void CreateTableProcedure::RunNextStage() { - CreateTablePhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", table: " << table_name_; - CreateTablePhaseHandler handler = it->second; - handler(this, phase); + CreateTablePhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase + << ", table: " << table_name_; + CreateTablePhaseHandler handler = it->second; + handler(this, phase); } void CreateTableProcedure::PreCheckHandler(const CreateTablePhase&) { - { - TablePtr table; - if (MasterEnv().GetTabletManager()->FindTable(request_->table_name(), &table)) { - PROC_LOG(ERROR) << "Fail to create table: " << request_->table_name() - << ", table already exist"; - EnterEofPhaseWithResponseStatus(kTableExist); - return; - } - if (FLAGS_tera_acl_enabled && - !MasterEnv().GetMaster()->IsRootUser(request_->user_token()) && - FLAGS_tera_only_root_create_table) { - EnterEofPhaseWithResponseStatus(kNotPermission); - return; - } + { + TablePtr table; + if (MasterEnv().GetTabletManager()->FindTable(request_->table_name(), &table)) { + PROC_LOG(ERROR) << "Fail to create table: " << request_->table_name() + << ", table already exist"; + EnterEofPhaseWithResponseStatus(kTableExist); + return; } - - // try clean env, if there is a dir same as table_name, delete it first - if (!io::MoveEnvDirToTrash(request_->table_name())) { - PROC_LOG(ERROR) << "Fail to create table: " << request_->table_name() - << ", cannot move old table dir to trash"; - EnterEofPhaseWithResponseStatus(kTableExist); - return; + if (FLAGS_tera_acl_enabled && !MasterEnv().GetMaster()->IsRootUser(request_->user_token()) && + FLAGS_tera_only_root_create_table) { + EnterEofPhaseWithResponseStatus(kNotPermission); + return; } - - int32_t tablet_num = request_->delimiters_size() + 1; - bool delivalid = true; - for (int32_t i = 1; i < tablet_num - 1; i++) { - // TODO: Use user defined comparator - if (request_->delimiters(i) <= request_->delimiters(i-1)) { - delivalid = false; - break; - } + } + + // try clean env, if there is a dir same as table_name, delete it first + if (!io::MoveEnvDirToTrash(request_->table_name())) { + PROC_LOG(ERROR) << "Fail to create table: " << request_->table_name() + << ", cannot move old table dir to trash"; + EnterEofPhaseWithResponseStatus(kTableExist); + return; + } + + int32_t tablet_num = request_->delimiters_size() + 1; + bool delivalid = true; + for (int32_t i = 1; i < tablet_num - 1; i++) { + // TODO: Use user defined comparator + if (request_->delimiters(i) <= request_->delimiters(i - 1)) { + delivalid = false; + break; } - if (tablet_num > FLAGS_tera_max_pre_assign_tablet_num || !delivalid - || request_->schema().locality_groups_size() < 1) { - if (tablet_num > FLAGS_tera_max_pre_assign_tablet_num) { - PROC_LOG(WARNING) << "Too many pre-create tablets " << tablet_num; - } else if (!delivalid) { - PROC_LOG(WARNING) << "Invalid delimiters for " << request_->table_name(); - } else { - PROC_LOG(WARNING) << "No LocalityGroupSchema for " << request_->table_name(); - } - EnterEofPhaseWithResponseStatus(kInvalidArgument); - return; + } + if (tablet_num > FLAGS_tera_max_pre_assign_tablet_num || !delivalid || + request_->schema().locality_groups_size() < 1) { + if (tablet_num > FLAGS_tera_max_pre_assign_tablet_num) { + PROC_LOG(WARNING) << "Too many pre-create tablets " << tablet_num; + } else if (!delivalid) { + PROC_LOG(WARNING) << "Invalid delimiters for " << request_->table_name(); + } else { + PROC_LOG(WARNING) << "No LocalityGroupSchema for " << request_->table_name(); } - - const std::string& table_name = request_->table_name(); - StatusCode status = kMasterOk; - tablets_.reserve(tablet_num); - meta_records_.reserve(tablet_num + 1); - - table_ = TabletManager::CreateTable(table_name, request_->schema(), kTableEnable); - table_->LockTransition(); - MasterEnv().GetTabletManager()->AddTable(table_, &status); - PackMetaWriteRecords(table_, false, meta_records_); - for (int32_t i = 1; i <= tablet_num; ++i) { - std::string path = leveldb::GetTabletPathFromNum(request_->table_name(), i); - const std::string& start_key = (i == 1) ? "" : request_->delimiters(i - 2); - const std::string& end_key = (i == tablet_num) ? "" : request_->delimiters(i - 1); - TabletMeta meta; - TabletManager::PackTabletMeta(&meta, table_name, start_key, end_key, path, "", - TabletMeta::kTabletOffline, FLAGS_tera_tablet_write_block_size * 1024); - TabletPtr tablet = TabletManager::CreateTablet(meta); - tablet->LockTransition(); - if (!table_->AddTablet(tablet, &status)) { - PROC_LOG(WARNING) << "add tablet failed" << tablet->GetPath(); - EnterEofPhaseWithResponseStatus(status); - MasterEnv().GetTabletManager()->DeleteTable(table_name_, &status); - return; - - } - PackMetaWriteRecords(tablet, false, meta_records_); - tablets_.emplace_back(tablet); + EnterEofPhaseWithResponseStatus(kInvalidArgument); + return; + } + + const std::string& table_name = request_->table_name(); + StatusCode status = kMasterOk; + tablets_.reserve(tablet_num); + meta_records_.reserve(tablet_num + 1); + + table_ = TabletManager::CreateTable(table_name, request_->schema(), kTableEnable); + table_->LockTransition(); + PackMetaWriteRecords(table_, false, meta_records_); + for (int32_t i = 1; i <= tablet_num; ++i) { + std::string path = leveldb::GetTabletPathFromNum(request_->table_name(), i); + const std::string& start_key = (i == 1) ? "" : request_->delimiters(i - 2); + const std::string& end_key = (i == tablet_num) ? "" : request_->delimiters(i - 1); + TabletMeta meta; + TabletManager::PackTabletMeta(&meta, table_name, start_key, end_key, path, "", + TabletMeta::kTabletOffline, + FLAGS_tera_tablet_write_block_size * 1024); + TabletPtr tablet = table_->AddTablet(meta, &status); + if (!tablet) { + PROC_LOG(WARNING) << "add tablet failed" << meta.path() + << ", errcode: " << StatusCodeToString(status); + EnterEofPhaseWithResponseStatus(status); + MasterEnv().GetTabletManager()->DeleteTable(table_name_, &status); + return; } - const LocalityGroupSchema& lg0 = request_->schema().locality_groups(0); - PROC_LOG(INFO) << "Begin to create table: " << request_->table_name() - << ", store_medium: " << lg0.store_type() - << ", compress: " << lg0.compress_type() - << ", raw_key: " << request_->schema().raw_key() - << ", has " << tablet_num << " tablets, schema: " - << request_->schema().ShortDebugString(); - SetNextPhase(CreateTablePhase::kUpdateMeta); + tablet->LockTransition(); + PackMetaWriteRecords(tablet, false, meta_records_); + tablets_.emplace_back(tablet); + } + if (!MasterEnv().GetTabletManager()->AddTable(table_, &status)) { + PROC_LOG(ERROR) << "Fail to create table: " << request_->table_name() + << ", table already exist"; + EnterEofPhaseWithResponseStatus(kTableExist); + return; + } + + const LocalityGroupSchema& lg0 = request_->schema().locality_groups(0); + PROC_LOG(INFO) << "Begin to create table: " << request_->table_name() + << ", store_medium: " << lg0.store_type() << ", compress: " << lg0.compress_type() + << ", raw_key: " << request_->schema().raw_key() << ", has " << tablet_num + << " tablets, schema: " << request_->schema().ShortDebugString(); + SetNextPhase(CreateTablePhase::kUpdateMeta); } void CreateTableProcedure::UpdateMetaHandler(const CreateTablePhase&) { - if (update_meta_) { - return; - } - update_meta_.store(true); - PROC_LOG(INFO) << "table: " << table_name_ << " begin to update meta"; - UpdateMetaClosure closure = std::bind(&CreateTableProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(meta_records_, closure, FLAGS_tera_master_meta_retry_times); + if (update_meta_) { + return; + } + update_meta_.store(true); + PROC_LOG(INFO) << "table: " << table_name_ << " begin to update meta"; + UpdateMetaClosure closure = std::bind(&CreateTableProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(meta_records_, closure, FLAGS_tera_master_meta_retry_times); } void CreateTableProcedure::UpdateMetaDone(bool succ) { - if (!succ) { - PROC_LOG(WARNING) << "fail to update meta"; - EnterEofPhaseWithResponseStatus(kMetaTabletError); - return; - } - LOG(INFO) << "create table " << table_->GetTableName() << " update meta success"; - EnterPhaseAndResponseStatus(kMasterOk, CreateTablePhase::kLoadTablets); + if (!succ) { + PROC_LOG(WARNING) << "fail to update meta"; + EnterEofPhaseWithResponseStatus(kMetaTabletError); + return; + } + LOG(INFO) << "create table " << table_->GetTableName() << " update meta success"; + EnterPhaseAndResponseStatus(kMasterOk, CreateTablePhase::kLoadTablets); } void CreateTableProcedure::LoadTabletsHandler(const CreateTablePhase&) { - Scheduler* size_scheduler = MasterEnv().GetSizeScheduler().get(); - for (size_t i = 0; i < tablets_.size(); i++) { - CHECK(tablets_[i]->GetStatus() == TabletMeta::kTabletOffline) << tablets_[i]->GetPath(); - TabletNodePtr dest_node; - if (!MasterEnv().GetTabletNodeManager() - ->ScheduleTabletNodeOrWait(size_scheduler, table_name_, false, &dest_node)) { - LOG(ERROR) << "no available tabletnode, abort load tablet: " << tablets_[i]; - continue; - } - std::shared_ptr proc( - new LoadTabletProcedure(tablets_[i], dest_node, thread_pool_)); - MasterEnv().GetExecutor()->AddProcedure(proc); + Scheduler* size_scheduler = MasterEnv().GetSizeScheduler().get(); + for (size_t i = 0; i < tablets_.size(); i++) { + CHECK(tablets_[i]->GetStatus() == TabletMeta::kTabletOffline) << tablets_[i]->GetPath(); + TabletNodePtr dest_node; + if (!MasterEnv().GetTabletNodeManager()->ScheduleTabletNodeOrWait( + size_scheduler, table_name_, tablets_[i], false, &dest_node)) { + LOG(ERROR) << "no available tabletnode, abort load tablet: " << tablets_[i]; + continue; } - SetNextPhase(CreateTablePhase::kEofPhase); + std::shared_ptr proc( + new LoadTabletProcedure(tablets_[i], dest_node, thread_pool_)); + MasterEnv().GetExecutor()->AddProcedure(proc); + } + SetNextPhase(CreateTablePhase::kEofPhase); } void CreateTableProcedure::EofHandler(const CreateTablePhase&) { - PROC_LOG(INFO) << "create table: " << table_name_ << " finish"; - done_.store(true); - // unlike DisableTableProcedure, here we do not ensure that all tablets been loaded successfully - // and just finish the CreateTableProcedure early as all LoadTabletProcedure been added to ProcedureExecutor. - if (table_ && table_->InTransition()) { - table_->UnlockTransition(); - } - rpc_closure_->Run(); -} - -bool CreateTableProcedure::Done() { - return done_; + PROC_LOG(INFO) << "create table: " << table_name_ << " finish"; + done_.store(true); + // unlike DisableTableProcedure, here we do not ensure that all tablets been + // loaded successfully + // and just finish the CreateTableProcedure early as all LoadTabletProcedure + // been added to ProcedureExecutor. + if (table_ && table_->InTransition()) { + table_->UnlockTransition(); + } + rpc_closure_->Run(); } -std::ostream& operator<< (std::ostream& o, const CreateTablePhase& phase) { - static const char* msg[] = {"CreateTablePhase::kPrepare", - "CreateTablePhase::kUpdateMeta", - "CreateTablePhase::kLoadTablets", - "CreateTablePhase::kEofPhase", - "CreateTablePhase::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(CreateTablePhase::kPrepare); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +bool CreateTableProcedure::Done() { return done_; } + +std::ostream& operator<<(std::ostream& o, const CreateTablePhase& phase) { + static const char* msg[] = {"CreateTablePhase::kPrepare", "CreateTablePhase::kUpdateMeta", + "CreateTablePhase::kLoadTablets", "CreateTablePhase::kEofPhase", + "CreateTablePhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(CreateTablePhase::kPrepare); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } diff --git a/src/master/create_table_procedure.h b/src/master/create_table_procedure.h index 92dbbf0ac..65c7057d4 100644 --- a/src/master/create_table_procedure.h +++ b/src/master/create_table_procedure.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "proto/master_rpc.pb.h" #include "master/procedure.h" @@ -16,67 +17,67 @@ namespace tera { namespace master { -enum class CreateTablePhase { - kPrepare, - kUpdateMeta, - kLoadTablets, - kEofPhase -}; +enum class CreateTablePhase { kPrepare, kUpdateMeta, kLoadTablets, kEofPhase }; -std::ostream& operator<< (std::ostream& o, const CreateTablePhase& phase); +std::ostream& operator<<(std::ostream& o, const CreateTablePhase& phase); class CreateTableProcedure : public Procedure { -public: - CreateTableProcedure(const CreateTableRequest* request, - CreateTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool); - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual ~CreateTableProcedure() {} - - virtual bool Done(); - -private: - typedef std::function CreateTablePhaseHandler; - - void SetNextPhase(const CreateTablePhase& phase) {phases_.emplace_back(phase);} - - CreateTablePhase GetCurrentPhase() {return phases_.back();} - - void EnterPhaseAndResponseStatus(StatusCode status, const CreateTablePhase& phase) { - response_->set_status(status); - SetNextPhase(phase); - } - - void EnterEofPhaseWithResponseStatus(StatusCode status) { - EnterPhaseAndResponseStatus(status, CreateTablePhase::kEofPhase); - } - - void PreCheckHandler(const CreateTablePhase&); - void UpdateMetaHandler(const CreateTablePhase&); - void LoadTabletsHandler(const CreateTablePhase&); - void EofHandler(const CreateTablePhase&); - - void UpdateMetaDone(bool succ); - -private: - const CreateTableRequest* request_; - CreateTableResponse* response_; - google::protobuf::Closure* rpc_closure_; - std::string table_name_; - TablePtr table_; - std::vector tablets_; - std::vector meta_records_; - std::atomic update_meta_; - std::atomic done_ ; - std::vector phases_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; + public: + CreateTableProcedure(const CreateTableRequest* request, CreateTableResponse* response, + google::protobuf::Closure* closure, ThreadPool* thread_pool); + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual ~CreateTableProcedure() {} + + virtual bool Done(); + + private: + typedef std::function + CreateTablePhaseHandler; + + void SetNextPhase(const CreateTablePhase& phase) { + std::lock_guard lock_guard(phase_mutex_); + phases_.emplace_back(phase); + } + + CreateTablePhase GetCurrentPhase() { + std::lock_guard lock_guard(phase_mutex_); + return phases_.back(); + } + + void EnterPhaseAndResponseStatus(StatusCode status, const CreateTablePhase& phase) { + response_->set_status(status); + SetNextPhase(phase); + } + + void EnterEofPhaseWithResponseStatus(StatusCode status) { + EnterPhaseAndResponseStatus(status, CreateTablePhase::kEofPhase); + } + + void PreCheckHandler(const CreateTablePhase&); + void UpdateMetaHandler(const CreateTablePhase&); + void LoadTabletsHandler(const CreateTablePhase&); + void EofHandler(const CreateTablePhase&); + + void UpdateMetaDone(bool succ); + + private: + const CreateTableRequest* request_; + CreateTableResponse* response_; + google::protobuf::Closure* rpc_closure_; + std::string table_name_; + TablePtr table_; + std::vector tablets_; + std::vector meta_records_; + std::atomic update_meta_; + std::atomic done_; + std::mutex phase_mutex_; + std::vector phases_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; }; - } } diff --git a/src/master/delete_table_procedure.cc b/src/master/delete_table_procedure.cc index 302154f9f..8038c2aec 100644 --- a/src/master/delete_table_procedure.cc +++ b/src/master/delete_table_procedure.cc @@ -10,125 +10,123 @@ DECLARE_int32(tera_master_meta_retry_times); namespace tera { namespace master { -std::map - DeleteTableProcedure::phase_handlers_ { - {DeleteTablePhase::kPrepare, - std::bind(&DeleteTableProcedure::PrepareHandler, _1, _2)}, - {DeleteTablePhase::kDeleteTable, - std::bind(&DeleteTableProcedure::DeleteTableHandler, _1, _2)}, - {DeleteTablePhase::kUpdateMeta, - std::bind(&DeleteTableProcedure::UpdateMetaHandler, _1, _2)}, - {DeleteTablePhase::kEofPhase, - std::bind(&DeleteTableProcedure::EofPhaseHandler, _1, _2)} -}; - -DeleteTableProcedure::DeleteTableProcedure(TablePtr table, - const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool) : - table_(table), - request_(request), - response_(response), - rpc_closure_(closure), - update_meta_(false), - done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "begin delete table: " << table_->GetTableName(); - SetNextPhase(DeleteTablePhase::kPrepare); +std::map DeleteTableProcedure::phase_handlers_{ + {DeleteTablePhase::kPrepare, std::bind(&DeleteTableProcedure::PrepareHandler, _1, _2)}, + {DeleteTablePhase::kDeleteTable, std::bind(&DeleteTableProcedure::DeleteTableHandler, _1, _2)}, + {DeleteTablePhase::kUpdateMeta, std::bind(&DeleteTableProcedure::UpdateMetaHandler, _1, _2)}, + {DeleteTablePhase::kEofPhase, std::bind(&DeleteTableProcedure::EofPhaseHandler, _1, _2)}}; + +DeleteTableProcedure::DeleteTableProcedure(TablePtr table, const DeleteTableRequest* request, + DeleteTableResponse* response, + google::protobuf::Closure* closure, + ThreadPool* thread_pool) + : table_(table), + request_(request), + response_(response), + rpc_closure_(closure), + update_meta_(false), + done_(false), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "begin delete table: " << table_->GetTableName(); + SetNextPhase(DeleteTablePhase::kPrepare); } std::string DeleteTableProcedure::ProcId() const { - static std::string prefix("DeleteTable:"); - return prefix + table_->GetTableName(); + static std::string prefix("DeleteTable:"); + return prefix + table_->GetTableName(); } void DeleteTableProcedure::RunNextStage() { - DeleteTablePhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) - << "illegal phase:" << phase << ", table: " << table_->GetTableName(); - DeleteTablePhaseHandler handler = it->second; - handler(this, phase); + DeleteTablePhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase:" << phase + << ", table: " << table_->GetTableName(); + DeleteTablePhaseHandler handler = it->second; + handler(this, phase); } void DeleteTableProcedure::PrepareHandler(const DeleteTablePhase&) { - if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "delete table")) { - EnterPhaseWithResponseStatus(kNotPermission, DeleteTablePhase::kEofPhase); - return; - } - SetNextPhase(DeleteTablePhase::kDeleteTable); + if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "delete table")) { + EnterPhaseWithResponseStatus(kNotPermission, DeleteTablePhase::kEofPhase); + return; + } + SetNextPhase(DeleteTablePhase::kDeleteTable); } void DeleteTableProcedure::DeleteTableHandler(const DeleteTablePhase&) { - std::vector tablets; - table_->GetTablet(&tablets); - for (size_t i = 0; i < tablets.size(); ++i) { - TabletPtr tablet = tablets[i]; - if (tablet->GetStatus() != TabletMeta::kTabletDisable) { - PROC_LOG(WARNING) << "tablet: " << tablet - << " not in disabled status, " << StatusCodeToString(tablet->GetStatus()); - EnterPhaseWithResponseStatus(StatusCode(tablet->GetStatus()), DeleteTablePhase::kEofPhase); - return; - } - PackMetaWriteRecords(tablet, true, meta_records_); - } - if (!table_->DoStateTransition(TableEvent::kDeleteTable)) { - PROC_LOG(WARNING) << "table: " << table_->GetTableName() - << ", current status: " << StatusCodeToString(table_->GetStatus()); - EnterPhaseWithResponseStatus(kTableNotSupport, DeleteTablePhase::kEofPhase); - return; + std::vector tablets; + table_->GetTablet(&tablets); + for (size_t i = 0; i < tablets.size(); ++i) { + TabletPtr tablet = tablets[i]; + if (tablet->GetStatus() != TabletMeta::kTabletDisable) { + PROC_LOG(WARNING) << "tablet: " << tablet << " not in disabled status, " + << StatusCodeToString(tablet->GetStatus()); + EnterPhaseWithResponseStatus(StatusCode(tablet->GetStatus()), DeleteTablePhase::kEofPhase); + return; } - PackMetaWriteRecords(table_, true, meta_records_); - SetNextPhase(DeleteTablePhase::kUpdateMeta); + PackMetaWriteRecords(tablet, true, meta_records_); + } + if (!table_->DoStateTransition(TableEvent::kDeleteTable)) { + PROC_LOG(WARNING) << "table: " << table_->GetTableName() + << ", current status: " << StatusCodeToString(table_->GetStatus()); + EnterPhaseWithResponseStatus(kTableNotSupport, DeleteTablePhase::kEofPhase); + return; + } + PackMetaWriteRecords(table_, true, meta_records_); + + // delete quota setting store in meta table + quota::MasterQuotaHelper::PackDeleteQuotaRecords(table_->GetTableName(), meta_records_); + + SetNextPhase(DeleteTablePhase::kUpdateMeta); } void DeleteTableProcedure::UpdateMetaHandler(const DeleteTablePhase&) { - if (update_meta_) { - return; - } - update_meta_.store(true); - PROC_LOG(INFO) << "table: " << table_->GetTableName() << "begin to update meta"; - UpdateMetaClosure closure = std::bind(&DeleteTableProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(meta_records_, closure, FLAGS_tera_master_meta_retry_times); + if (update_meta_) { + return; + } + update_meta_.store(true); + PROC_LOG(INFO) << "table: " << table_->GetTableName() << "begin to update meta"; + UpdateMetaClosure closure = std::bind(&DeleteTableProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(meta_records_, closure, FLAGS_tera_master_meta_retry_times); } void DeleteTableProcedure::EofPhaseHandler(const DeleteTablePhase&) { - done_.store(true); - if (table_ && table_->InTransition()) { - table_->UnlockTransition(); - } - PROC_LOG(INFO) << "delete table: " << table_->GetTableName() << " finish"; - rpc_closure_->Run(); + done_.store(true); + if (table_->InTransition()) { + table_->UnlockTransition(); + } + PROC_LOG(INFO) << "delete table: " << table_->GetTableName() << " finish"; + rpc_closure_->Run(); } void DeleteTableProcedure::UpdateMetaDone(bool succ) { - if (!succ) { - PROC_LOG(WARNING) << "table: " << table_->GetTableName() << " update meta fail"; - EnterPhaseWithResponseStatus(kMetaTabletError, DeleteTablePhase::kEofPhase); - return; - } - PROC_LOG(INFO) << "table: " << table_->GetTableName() << " update meta succ"; - StatusCode code; - MasterEnv().GetTabletManager()->DeleteTable(table_->GetTableName(), &code); - EnterPhaseWithResponseStatus(kMasterOk, DeleteTablePhase::kEofPhase); + if (!succ) { + PROC_LOG(WARNING) << "table: " << table_->GetTableName() << " update meta fail"; + EnterPhaseWithResponseStatus(kMetaTabletError, DeleteTablePhase::kEofPhase); + return; + } + PROC_LOG(INFO) << "table: " << table_->GetTableName() << " update meta succ"; + if (!MasterEnv().GetQuotaEntry()->DelRecord(table_->GetTableName())) { + PROC_LOG(WARNING) << "table: " << table_->GetTableName() + << " delete master memory quota cache failed"; + } + StatusCode code; + MasterEnv().GetTabletManager()->DeleteTable(table_->GetTableName(), &code); + EnterPhaseWithResponseStatus(kMasterOk, DeleteTablePhase::kEofPhase); } -std::ostream& operator<< (std::ostream& o, const DeleteTablePhase& phase) { - static const char* msg[] = {"DeleteTablePhase::kPrepare", - "DeleteTablePhase::kDeleteTable", - "DeleteTablePhase::kUpdateMeta", - "DeleteTablePhase::kEofPhase", - "DeleteTablePhase::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(DeleteTablePhase::kPrepare); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const DeleteTablePhase& phase) { + static const char* msg[] = {"DeleteTablePhase::kPrepare", "DeleteTablePhase::kDeleteTable", + "DeleteTablePhase::kUpdateMeta", "DeleteTablePhase::kEofPhase", + "DeleteTablePhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(DeleteTablePhase::kPrepare); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - - } } - diff --git a/src/master/delete_table_procedure.h b/src/master/delete_table_procedure.h index 546635bbe..a4b144eb2 100644 --- a/src/master/delete_table_procedure.h +++ b/src/master/delete_table_procedure.h @@ -16,61 +16,67 @@ namespace tera { namespace master { enum class DeleteTablePhase { - kPrepare, - kDeleteTable, - kUpdateMeta, - kEofPhase, + kPrepare, + kDeleteTable, + kUpdateMeta, + kEofPhase, }; -std::ostream& operator<< (std::ostream& o, const DeleteTablePhase& phase); +std::ostream& operator<<(std::ostream& o, const DeleteTablePhase& phase); class DeleteTableProcedure : public Procedure { -public: - DeleteTableProcedure(TablePtr table, - const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool); - - virtual ~DeleteTableProcedure() {} - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual bool Done() {return done_.load();} - -private: - typedef std::function DeleteTablePhaseHandler; - - void SetNextPhase(const DeleteTablePhase& phase) {phases_.push_back(phase);} - DeleteTablePhase GetCurrentPhase() {return phases_.back();} - - void EnterPhaseWithResponseStatus(StatusCode status, DeleteTablePhase phase) { - response_->set_status(status); - SetNextPhase(phase); - } - - void PrepareHandler(const DeleteTablePhase& phase); - void DeleteTableHandler(const DeleteTablePhase& phase); - void UpdateMetaHandler(const DeleteTablePhase& phase); - void EofPhaseHandler(const DeleteTablePhase& phase); - - void UpdateMetaDone(bool succ); - -private: - TablePtr table_; - const DeleteTableRequest* request_; - DeleteTableResponse* response_; - google::protobuf::Closure* rpc_closure_; - std::string table_name_; - std::atomic update_meta_; - std::atomic done_; - std::vector meta_records_; - std::vector phases_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; + public: + DeleteTableProcedure(TablePtr table, const DeleteTableRequest* request, + DeleteTableResponse* response, google::protobuf::Closure* closure, + ThreadPool* thread_pool); + + virtual ~DeleteTableProcedure() {} + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + private: + typedef std::function + DeleteTablePhaseHandler; + + void SetNextPhase(const DeleteTablePhase& phase) { + std::lock_guard lock_guard(phase_mutex_); + phases_.push_back(phase); + } + + DeleteTablePhase GetCurrentPhase() { + std::lock_guard lock_guard(phase_mutex_); + return phases_.back(); + } + + void EnterPhaseWithResponseStatus(StatusCode status, DeleteTablePhase phase) { + response_->set_status(status); + SetNextPhase(phase); + } + + void PrepareHandler(const DeleteTablePhase& phase); + void DeleteTableHandler(const DeleteTablePhase& phase); + void UpdateMetaHandler(const DeleteTablePhase& phase); + void EofPhaseHandler(const DeleteTablePhase& phase); + + void UpdateMetaDone(bool succ); + + private: + TablePtr table_; + const DeleteTableRequest* request_; + DeleteTableResponse* response_; + google::protobuf::Closure* rpc_closure_; + std::string table_name_; + std::atomic update_meta_; + std::atomic done_; + std::vector meta_records_; + std::mutex phase_mutex_; + std::vector phases_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; }; - } } diff --git a/src/master/disable_table_procedure.cc b/src/master/disable_table_procedure.cc index dfeceed10..c9aa9569e 100644 --- a/src/master/disable_table_procedure.cc +++ b/src/master/disable_table_procedure.cc @@ -12,144 +12,140 @@ DECLARE_int32(tera_master_meta_retry_times); namespace tera { namespace master { -std::map DisableTableProcedure::phase_handlers_ { - {DisableTablePhase::kPrepare, - std::bind(&DisableTableProcedure::PrepareHandler, _1, _2)}, - {DisableTablePhase::kDisableTable, - std::bind(&DisableTableProcedure::DisableTableHandler, _1, _2)}, - {DisableTablePhase::kUpdateMeta, - std::bind(&DisableTableProcedure::UpdateMetaHandler, _1, _2)}, - {DisableTablePhase::kDisableTablets, - std::bind(&DisableTableProcedure::DisableTabletsHandler, _1, _2)}, - {DisableTablePhase::kEofPhase, - std::bind(&DisableTableProcedure::EofPhaseHandler, _1, _2)} -}; +std::map DisableTableProcedure::phase_handlers_{ + {DisableTablePhase::kPrepare, std::bind(&DisableTableProcedure::PrepareHandler, _1, _2)}, + {DisableTablePhase::kDisableTable, + std::bind(&DisableTableProcedure::DisableTableHandler, _1, _2)}, + {DisableTablePhase::kUpdateMeta, std::bind(&DisableTableProcedure::UpdateMetaHandler, _1, _2)}, + {DisableTablePhase::kDisableTablets, + std::bind(&DisableTableProcedure::DisableTabletsHandler, _1, _2)}, + {DisableTablePhase::kEofPhase, std::bind(&DisableTableProcedure::EofPhaseHandler, _1, _2)}}; -DisableTableProcedure::DisableTableProcedure(TablePtr table, - const DisableTableRequest* request, DisableTableResponse* response, - google::protobuf::Closure* closure, ThreadPool* thread_pool) : - table_(table), - request_(request), - response_(response), - rpc_closure_(closure), - update_meta_(false), - done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "begin disable table: " << table_->GetTableName(); - SetNextPhase(DisableTablePhase::kPrepare); +DisableTableProcedure::DisableTableProcedure(TablePtr table, const DisableTableRequest* request, + DisableTableResponse* response, + google::protobuf::Closure* closure, + ThreadPool* thread_pool) + : table_(table), + request_(request), + response_(response), + rpc_closure_(closure), + update_meta_(false), + done_(false), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "begin disable table: " << table_->GetTableName(); + SetNextPhase(DisableTablePhase::kPrepare); } std::string DisableTableProcedure::ProcId() const { - static std::string prefix("DisableTable:"); - return prefix + table_->GetTableName(); + static std::string prefix("DisableTable:"); + return prefix + table_->GetTableName(); } void DisableTableProcedure::RunNextStage() { - DisableTablePhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", table: " << table_; - DisableTablePhaseHandler handler = it->second; - handler(this, phase); + DisableTablePhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", table: " << table_; + DisableTablePhaseHandler handler = it->second; + handler(this, phase); } void DisableTableProcedure::PrepareHandler(const DisableTablePhase&) { - if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "disable table")) { - PROC_LOG(WARNING) << "disable table: " << table_->GetTableName() << " abort, permission denied"; - EnterPhaseWithResponseStatus(kNotPermission, DisableTablePhase::kEofPhase); - return; - } - SetNextPhase(DisableTablePhase::kDisableTable); + if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "disable table")) { + PROC_LOG(WARNING) << "disable table: " << table_->GetTableName() << " abort, permission denied"; + EnterPhaseWithResponseStatus(kNotPermission, DisableTablePhase::kEofPhase); + return; + } + SetNextPhase(DisableTablePhase::kDisableTable); } void DisableTableProcedure::DisableTableHandler(const DisableTablePhase&) { - if (!table_->DoStateTransition(TableEvent::kDisableTable)) { - PROC_LOG(WARNING) << table_->GetTableName() - << " current state: " << table_->GetStatus() << ", disable failed"; - EnterPhaseWithResponseStatus( - static_cast(table_->GetStatus()), DisableTablePhase::kEofPhase); - return; - } - SetNextPhase(DisableTablePhase::kUpdateMeta); + if (!table_->DoStateTransition(TableEvent::kDisableTable)) { + PROC_LOG(WARNING) << table_->GetTableName() << " current state: " << table_->GetStatus() + << ", disable failed"; + EnterPhaseWithResponseStatus(static_cast(table_->GetStatus()), + DisableTablePhase::kEofPhase); + return; + } + SetNextPhase(DisableTablePhase::kUpdateMeta); } void DisableTableProcedure::UpdateMetaHandler(const DisableTablePhase&) { - if (!update_meta_) { - update_meta_.store(true); - MetaWriteRecord record = PackMetaWriteRecord(table_, false); - PROC_LOG(INFO) << "table: " << table_->GetTableName() - << " begin to update table disable info to meta"; - UpdateMetaClosure closure = std::bind(&DisableTableProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); - } + if (!update_meta_) { + update_meta_.store(true); + MetaWriteRecord record = PackMetaWriteRecord(table_, false); + PROC_LOG(INFO) << "table: " << table_->GetTableName() + << " begin to update table disable info to meta"; + UpdateMetaClosure closure = std::bind(&DisableTableProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); + } } void DisableTableProcedure::UpdateMetaDone(bool succ) { - if (!succ) { - // disable failed because meta write fail, revert table's status to kTableEnable - PROC_CHECK(table_->DoStateTransition(TableEvent::kEnableTable)); - PROC_LOG(WARNING) << "fail to update meta"; - EnterPhaseWithResponseStatus(kMetaTabletError, DisableTablePhase::kEofPhase); - return; - } - PROC_LOG(INFO) << "update disable table info to meta succ"; - EnterPhaseWithResponseStatus(kMasterOk, DisableTablePhase::kDisableTablets); + if (!succ) { + // disable failed because meta write fail, revert table's status to + // kTableEnable + PROC_CHECK(table_->DoStateTransition(TableEvent::kEnableTable)); + PROC_LOG(WARNING) << "fail to update meta"; + EnterPhaseWithResponseStatus(kMetaTabletError, DisableTablePhase::kEofPhase); + return; + } + PROC_LOG(INFO) << "update disable table info to meta succ"; + EnterPhaseWithResponseStatus(kMasterOk, DisableTablePhase::kDisableTablets); } void DisableTableProcedure::DisableTabletsHandler(const DisableTablePhase&) { - std::vector tablet_meta_list; - table_->GetTablet(&tablet_meta_list); - int in_transition_tablet_cnt = 0; - for (uint32_t i = 0; i < tablet_meta_list.size(); ++i) { - TabletPtr tablet = tablet_meta_list[i]; - if (tablet->GetStatus() == TabletMeta::kTabletDisable) { - continue; - } - if (tablet->LockTransition()) { - if (tablet->GetStatus() == TabletMeta::kTabletOffline || - tablet->GetStatus() == TabletMeta::kTabletLoadFail) { - tablet->DoStateTransition(TabletEvent::kTableDisable); - tablet->UnlockTransition(); - continue; - } - std::shared_ptr proc(new UnloadTabletProcedure(tablet, thread_pool_, false)); - MasterEnv().GetExecutor()->AddProcedure(proc); - in_transition_tablet_cnt++; - } - else { - in_transition_tablet_cnt++; - } + std::vector tablet_meta_list; + table_->GetTablet(&tablet_meta_list); + int in_transition_tablet_cnt = 0; + for (uint32_t i = 0; i < tablet_meta_list.size(); ++i) { + TabletPtr tablet = tablet_meta_list[i]; + if (tablet->GetStatus() == TabletMeta::kTabletDisable) { + continue; } - PROC_VLOG(23) << "table: " << table_->GetTableName() - << ", in transition num: " << in_transition_tablet_cnt; - if (in_transition_tablet_cnt == 0) { - SetNextPhase(DisableTablePhase::kEofPhase); - return; + if (tablet->LockTransition()) { + if (tablet->GetStatus() == TabletMeta::kTabletOffline || + tablet->GetStatus() == TabletMeta::kTabletLoadFail) { + tablet->DoStateTransition(TabletEvent::kTableDisable); + tablet->UnlockTransition(); + continue; + } + std::shared_ptr proc(new UnloadTabletProcedure(tablet, thread_pool_, false)); + MasterEnv().GetExecutor()->AddProcedure(proc); + in_transition_tablet_cnt++; + } else { + in_transition_tablet_cnt++; } + } + PROC_VLOG(23) << "table: " << table_->GetTableName() + << ", in transition num: " << in_transition_tablet_cnt; + if (in_transition_tablet_cnt == 0) { + SetNextPhase(DisableTablePhase::kEofPhase); + return; + } } void DisableTableProcedure::EofPhaseHandler(const DisableTablePhase&) { - done_.store(true); - if (table_ && table_->InTransition()) { - table_->UnlockTransition(); - } - PROC_LOG(INFO) << "disable table: " << table_->GetTableName() << " finish"; - rpc_closure_->Run(); + done_.store(true); + if (table_ && table_->InTransition()) { + table_->UnlockTransition(); + } + PROC_LOG(INFO) << "disable table: " << table_->GetTableName() << " finish"; + rpc_closure_->Run(); } -std::ostream& operator<< (std::ostream& o, const DisableTablePhase& phase) { - static const char* msg[] = {"DisableTablePhase::kPrepare", - "DisableTablePhase::kDisableTable", - "DisableTablePhase::kUpdateMeta", - "DisableTablePhase::kDisableTablets", - "DisableTablePhase::kEofPhase", - "DisableTablePhase::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(DisableTablePhase::kPrepare); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const DisableTablePhase& phase) { + static const char* msg[] = { + "DisableTablePhase::kPrepare", "DisableTablePhase::kDisableTable", + "DisableTablePhase::kUpdateMeta", "DisableTablePhase::kDisableTablets", + "DisableTablePhase::kEofPhase", "DisableTablePhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(DisableTablePhase::kPrepare); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } diff --git a/src/master/disable_table_procedure.h b/src/master/disable_table_procedure.h index 4e961228b..521a67a52 100644 --- a/src/master/disable_table_procedure.h +++ b/src/master/disable_table_procedure.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "master/procedure.h" #include "master/tablet_manager.h" #include "proto/master_rpc.pb.h" @@ -14,63 +15,62 @@ namespace tera { namespace master { -enum class DisableTablePhase { - kPrepare, - kDisableTable, - kUpdateMeta, - kDisableTablets, - kEofPhase -}; +enum class DisableTablePhase { kPrepare, kDisableTable, kUpdateMeta, kDisableTablets, kEofPhase }; -std::ostream& operator<< (std::ostream& o, const DisableTablePhase& phase); +std::ostream& operator<<(std::ostream& o, const DisableTablePhase& phase); class DisableTableProcedure : public Procedure { -public: - DisableTableProcedure(TablePtr table_, - const DisableTableRequest* request_, - DisableTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool); - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual bool Done() {return done_.load();} - - virtual ~DisableTableProcedure() {} -private: - typedef std::function DisableTablePhaseHandler; - - void SetNextPhase(const DisableTablePhase& phase) {phases_.push_back(phase);} - - void EnterPhaseWithResponseStatus(StatusCode code, DisableTablePhase phase) { - response_->set_status(code); - SetNextPhase(phase); - } - - DisableTablePhase GetCurrentPhase() {return phases_.back();} - - void PrepareHandler(const DisableTablePhase&); - void DisableTableHandler(const DisableTablePhase&); - void UpdateMetaHandler(const DisableTablePhase&); - void DisableTabletsHandler(const DisableTablePhase&); - void EofPhaseHandler(const DisableTablePhase&); - - void UpdateMetaDone(bool succ); - -private: - TablePtr table_; - const DisableTableRequest* request_; - DisableTableResponse* response_; - google::protobuf::Closure* rpc_closure_; - std::atomic update_meta_; - std::vector phases_; - std::atomic done_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; - + public: + DisableTableProcedure(TablePtr table_, const DisableTableRequest* request_, + DisableTableResponse* response, google::protobuf::Closure* closure, + ThreadPool* thread_pool); + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + virtual ~DisableTableProcedure() {} + + private: + typedef std::function + DisableTablePhaseHandler; + + void SetNextPhase(const DisableTablePhase& phase) { + std::lock_guard lock_guard(phase_mutex_); + phases_.push_back(phase); + } + + void EnterPhaseWithResponseStatus(StatusCode code, DisableTablePhase phase) { + response_->set_status(code); + SetNextPhase(phase); + } + + DisableTablePhase GetCurrentPhase() { + std::lock_guard lock_guard(phase_mutex_); + return phases_.back(); + } + + void PrepareHandler(const DisableTablePhase&); + void DisableTableHandler(const DisableTablePhase&); + void UpdateMetaHandler(const DisableTablePhase&); + void DisableTabletsHandler(const DisableTablePhase&); + void EofPhaseHandler(const DisableTablePhase&); + + void UpdateMetaDone(bool succ); + + private: + TablePtr table_; + const DisableTableRequest* request_; + DisableTableResponse* response_; + google::protobuf::Closure* rpc_closure_; + std::atomic update_meta_; + std::mutex phase_mutex_; + std::vector phases_; + std::atomic done_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; }; - } } diff --git a/src/master/enable_table_procedure.cc b/src/master/enable_table_procedure.cc index 4a322f00d..928cdeb16 100644 --- a/src/master/enable_table_procedure.cc +++ b/src/master/enable_table_procedure.cc @@ -11,131 +11,123 @@ DECLARE_int32(tera_master_meta_retry_times); namespace tera { namespace master { -std::map EnableTableProcedure::phase_handlers_ { - {EnableTablePhase::kPrepare, - std::bind(&EnableTableProcedure::PrepareHandler, _1, _2)}, - {EnableTablePhase::kEnableTable, - std::bind(&EnableTableProcedure::EnableTableHandler, _1, _2)}, - {EnableTablePhase::kUpdateMeta, - std::bind(&EnableTableProcedure::UpdateMetaHandler, _1, _2)}, - {EnableTablePhase::kEnableTablets, - std::bind(&EnableTableProcedure::EnableTabletsHandler, _1, _2)}, - {EnableTablePhase::kEofPhase, - std::bind(&EnableTableProcedure::EofPhaseHandler, _1, _2)}, +std::map EnableTableProcedure::phase_handlers_{ + {EnableTablePhase::kPrepare, std::bind(&EnableTableProcedure::PrepareHandler, _1, _2)}, + {EnableTablePhase::kEnableTable, std::bind(&EnableTableProcedure::EnableTableHandler, _1, _2)}, + {EnableTablePhase::kUpdateMeta, std::bind(&EnableTableProcedure::UpdateMetaHandler, _1, _2)}, + {EnableTablePhase::kEnableTablets, + std::bind(&EnableTableProcedure::EnableTabletsHandler, _1, _2)}, + {EnableTablePhase::kEofPhase, std::bind(&EnableTableProcedure::EofPhaseHandler, _1, _2)}, }; -EnableTableProcedure::EnableTableProcedure(TablePtr table, - const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool) : - table_(table), - request_(request), - response_(response), - rpc_closure_(closure), - update_meta_(false), - done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "enable table: " << table_->GetTableName() << " begin"; - SetNextPhase(EnableTablePhase::kPrepare); +EnableTableProcedure::EnableTableProcedure(TablePtr table, const EnableTableRequest* request, + EnableTableResponse* response, + google::protobuf::Closure* closure, + ThreadPool* thread_pool) + : table_(table), + request_(request), + response_(response), + rpc_closure_(closure), + update_meta_(false), + done_(false), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "enable table: " << table_->GetTableName() << " begin"; + SetNextPhase(EnableTablePhase::kPrepare); } std::string EnableTableProcedure::ProcId() const { - std::string prefix("EnableTable:"); - return prefix + table_->GetTableName(); + std::string prefix("EnableTable:"); + return prefix + table_->GetTableName(); } void EnableTableProcedure::RunNextStage() { - EnableTablePhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) - << "illegeal phase:" << phase << ", table:" << table_->GetTableName(); - EnableTablePhaseHandler handler = it->second; - handler(this, phase); + EnableTablePhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegeal phase:" << phase + << ", table:" << table_->GetTableName(); + EnableTablePhaseHandler handler = it->second; + handler(this, phase); } void EnableTableProcedure::PrepareHandler(const EnableTablePhase&) { - if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "enable table")) { - PROC_LOG(WARNING) << "enable table: " << table_->GetTableName() << "abort, permission denied"; - EnterPhaseWithResponseStatus(kNotPermission, EnableTablePhase::kEofPhase); - return; - } - SetNextPhase(EnableTablePhase::kEnableTable); + if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "enable table")) { + PROC_LOG(WARNING) << "enable table: " << table_->GetTableName() << "abort, permission denied"; + EnterPhaseWithResponseStatus(kNotPermission, EnableTablePhase::kEofPhase); + return; + } + SetNextPhase(EnableTablePhase::kEnableTable); } void EnableTableProcedure::EnableTableHandler(const EnableTablePhase&) { - if (!table_->DoStateTransition(TableEvent::kEnableTable)) { - PROC_LOG(WARNING) << table_->GetTableName() - << "current state: " << table_->GetStatus() << ", enable failed"; - EnterPhaseWithResponseStatus( - static_cast(table_->GetStatus()), EnableTablePhase::kEofPhase); - return; - } - SetNextPhase(EnableTablePhase::kUpdateMeta); + if (!table_->DoStateTransition(TableEvent::kEnableTable)) { + PROC_LOG(WARNING) << table_->GetTableName() << "current state: " << table_->GetStatus() + << ", enable failed"; + EnterPhaseWithResponseStatus(static_cast(table_->GetStatus()), + EnableTablePhase::kEofPhase); + return; + } + SetNextPhase(EnableTablePhase::kUpdateMeta); } void EnableTableProcedure::UpdateMetaHandler(const EnableTablePhase&) { - if (update_meta_) { - return; - } - update_meta_.store(true); - MetaWriteRecord record = PackMetaWriteRecord(table_, false); - PROC_LOG(INFO) << "table: " << table_->GetTableName() - << " begin to update table enable info to meta"; - UpdateMetaClosure closure = std::bind(&EnableTableProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); + if (update_meta_) { + return; + } + update_meta_.store(true); + MetaWriteRecord record = PackMetaWriteRecord(table_, false); + PROC_LOG(INFO) << "table: " << table_->GetTableName() + << " begin to update table enable info to meta"; + UpdateMetaClosure closure = std::bind(&EnableTableProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); } void EnableTableProcedure::EnableTabletsHandler(const EnableTablePhase&) { - std::vector tablets; - table_->GetTablet(&tablets); - for (std::size_t i = 0; i < tablets.size(); ++i) { - TabletPtr tablet = tablets[i]; - PROC_CHECK(tablet->LockTransition()) << tablet->GetPath() << " in another tansition"; - PROC_CHECK(tablet->DoStateTransition(TabletEvent::kTableEnable)) - << tablet->GetPath() << ", current status: " << tablet->GetStatus(); - std::shared_ptr proc(new LoadTabletProcedure(tablet, tablet->GetTabletNode(), thread_pool_)); - MasterEnv().GetExecutor()->AddProcedure(proc); - } - SetNextPhase(EnableTablePhase::kEofPhase); + std::vector tablets; + table_->GetTablet(&tablets); + for (std::size_t i = 0; i < tablets.size(); ++i) { + TabletPtr tablet = tablets[i]; + PROC_CHECK(tablet->LockTransition()) << tablet->GetPath() << " in another tansition"; + PROC_CHECK(tablet->DoStateTransition(TabletEvent::kTableEnable)) + << tablet->GetPath() << ", current status: " << tablet->GetStatus(); + std::shared_ptr proc( + new LoadTabletProcedure(tablet, tablet->GetTabletNode(), thread_pool_)); + MasterEnv().GetExecutor()->AddProcedure(proc); + } + SetNextPhase(EnableTablePhase::kEofPhase); } void EnableTableProcedure::EofPhaseHandler(const EnableTablePhase&) { - done_.store(true); - PROC_LOG(INFO) << "table: " << table_->GetTableName() << ", status: " << table_->GetStatus(); - if (table_ && table_->InTransition()) { - table_->UnlockTransition(); - } - rpc_closure_->Run(); + done_.store(true); + PROC_LOG(INFO) << "table: " << table_->GetTableName() << ", status: " << table_->GetStatus(); + if (table_ && table_->InTransition()) { + table_->UnlockTransition(); + } + rpc_closure_->Run(); } void EnableTableProcedure::UpdateMetaDone(bool succ) { - if (!succ) { - PROC_LOG(WARNING) << "enable table: " << table_->GetTableName() << " update meta fail"; - PROC_CHECK(table_->DoStateTransition(TableEvent::kDisableTable)); - EnterPhaseWithResponseStatus(kMetaTabletError, EnableTablePhase::kEofPhase); - return; - } - PROC_LOG(INFO) << "update enable table info to meta succ"; - EnterPhaseWithResponseStatus(kMasterOk, EnableTablePhase::kEnableTablets); + if (!succ) { + PROC_LOG(WARNING) << "enable table: " << table_->GetTableName() << " update meta fail"; + PROC_CHECK(table_->DoStateTransition(TableEvent::kDisableTable)); + EnterPhaseWithResponseStatus(kMetaTabletError, EnableTablePhase::kEofPhase); + return; + } + PROC_LOG(INFO) << "update enable table info to meta succ"; + EnterPhaseWithResponseStatus(kMasterOk, EnableTablePhase::kEnableTablets); } -std::ostream& operator<< (std::ostream& o, const EnableTablePhase& phase) { - static const char* msg[] = {"EnableTablePhase::kPrepare", - "EnableTablePhase::kEnableTable", - "EnableTablePhase::kUpdateMeta", - "EnableTablePhase::kEnableTablets", - "EnableTablePhase::kEofPhase", - "EnableTablePhase::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(EnableTablePhase::kPrepare); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; - +std::ostream& operator<<(std::ostream& o, const EnableTablePhase& phase) { + static const char* msg[] = {"EnableTablePhase::kPrepare", "EnableTablePhase::kEnableTable", + "EnableTablePhase::kUpdateMeta", "EnableTablePhase::kEnableTablets", + "EnableTablePhase::kEofPhase", "EnableTablePhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(EnableTablePhase::kPrepare); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - - } } diff --git a/src/master/enable_table_procedure.h b/src/master/enable_table_procedure.h index 27880e036..b3b10d072 100644 --- a/src/master/enable_table_procedure.h +++ b/src/master/enable_table_procedure.h @@ -16,64 +16,63 @@ namespace tera { namespace master { enum class EnableTablePhase { - kPrepare, - kEnableTable, - kUpdateMeta, - kEnableTablets, - kEofPhase, + kPrepare, + kEnableTable, + kUpdateMeta, + kEnableTablets, + kEofPhase, }; -std::ostream& operator<< (std::ostream& o, const EnableTablePhase& phase); +std::ostream& operator<<(std::ostream& o, const EnableTablePhase& phase); class EnableTableProcedure : public Procedure { -public: - EnableTableProcedure(TablePtr table, - const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool); - virtual ~EnableTableProcedure() {} - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual bool Done() {return done_.load();} -private: - typedef std::function EnableTablePhaseHandler; - - void SetNextPhase(const EnableTablePhase& phase) {phases_.emplace_back(phase);} - - void EnterPhaseWithResponseStatus(StatusCode status, EnableTablePhase phase) { - response_->set_status(status); - SetNextPhase(phase); - } - - EnableTablePhase GetCurrentPhase() {return phases_.back();} - - void PrepareHandler(const EnableTablePhase&); - - void EnableTableHandler(const EnableTablePhase&); - - void UpdateMetaHandler(const EnableTablePhase&); - - void EnableTabletsHandler(const EnableTablePhase&); - - void EofPhaseHandler(const EnableTablePhase&); - - void UpdateMetaDone(bool succ); - -private: - TablePtr table_; - const EnableTableRequest* request_; - EnableTableResponse* response_; - google::protobuf::Closure* rpc_closure_; - std::atomic update_meta_; - std::atomic done_; - std::vector phases_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; -}; + public: + EnableTableProcedure(TablePtr table, const EnableTableRequest* request, + EnableTableResponse* response, google::protobuf::Closure* closure, + ThreadPool* thread_pool); + virtual ~EnableTableProcedure() {} + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + private: + typedef std::function + EnableTablePhaseHandler; + + void SetNextPhase(const EnableTablePhase& phase) { phases_.emplace_back(phase); } + + void EnterPhaseWithResponseStatus(StatusCode status, EnableTablePhase phase) { + response_->set_status(status); + SetNextPhase(phase); + } + + EnableTablePhase GetCurrentPhase() { return phases_.back(); } + void PrepareHandler(const EnableTablePhase&); + + void EnableTableHandler(const EnableTablePhase&); + + void UpdateMetaHandler(const EnableTablePhase&); + + void EnableTabletsHandler(const EnableTablePhase&); + + void EofPhaseHandler(const EnableTablePhase&); + + void UpdateMetaDone(bool succ); + + private: + TablePtr table_; + const EnableTableRequest* request_; + EnableTableResponse* response_; + google::protobuf::Closure* rpc_closure_; + std::atomic update_meta_; + std::atomic done_; + std::vector phases_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; +}; } } diff --git a/src/master/gc_strategy.cc b/src/master/gc_strategy.cc deleted file mode 100644 index 5d8b9e358..000000000 --- a/src/master/gc_strategy.cc +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "master/gc_strategy.h" - -#include - -#include "db/filename.h" -#include "io/utils_leveldb.h" -#include "leveldb/env_dfs.h" - -DECLARE_string(tera_tabletnode_path_prefix); -DECLARE_string(tera_master_meta_table_name); -DECLARE_int32(tera_garbage_collect_debug_log); -DECLARE_string(tera_leveldb_env_type); -namespace tera { -namespace master { - -BatchGcStrategy::BatchGcStrategy (std::shared_ptr tablet_manager) - : tablet_manager_(tablet_manager), - file_total_num_(0), - file_delete_num_(0) {} - -bool BatchGcStrategy::PreQuery () { - int64_t start_ts = get_micros(); - gc_live_files_.clear(); - gc_tablets_.clear(); - - std::vector tables; - - tablet_manager_->ShowTable(&tables, NULL); - for (size_t i = 0; i < tables.size(); ++i) { - if (tables[i]->GetStatus() != kTableEnable || - tables[i]->GetTableName() == FLAGS_tera_master_meta_table_name) { - // table not ready and skip metatable - continue; - } - GcTabletSet& tablet_set = gc_tablets_[tables[i]->GetTableName()]; - if (!tables[i]->GetTabletsForGc(&tablet_set.first, &tablet_set.second, false)) { - // tablet not ready or there is none dead tablets - gc_tablets_.erase(tables[i]->GetTableName()); - continue; - } - } - - file_total_num_ = 0; - CollectDeadTabletsFiles(); - - LOG(INFO) << "[gc] DoTabletNodeGc: collect all files, total:" << file_total_num_ - << ", cost: " << (get_micros() - start_ts) / 1000 << "ms."; - - if (gc_tablets_.size() == 0) { - LOG(INFO) << "[gc] do not need gc this time."; - return false; - } - return true; -} - -void BatchGcStrategy::PostQuery () { - bool is_success = true; - std::map::iterator it = gc_tablets_.begin(); - for (; it != gc_tablets_.end(); ++it) { - if (it->second.first.size() != 0) { - VLOG(10) << "[gc] there are tablet not ready: " << it->first; - is_success = false; - break; - } - } - if (!is_success) { - LOG(INFO) << "[gc] gc not success, try next time."; - return; - } - - file_delete_num_ = 0; - int64_t start_ts = get_micros(); - DeleteObsoleteFiles(); - LOG(INFO) << "[gc] DoTabletNodeGcPhase2 finished, total:" << file_delete_num_ - << ", cost:" << (get_micros() - start_ts) / 1000 << "ms. list_times " << list_count_.Get(); - list_count_.Clear(); -} - -void BatchGcStrategy::Clear(std::string tablename) { - LOG(INFO) << "[gc] Clear do nothing (BatchGcStrategy) " << tablename; -} - -void BatchGcStrategy::ProcessQueryCallbackForGc(QueryResponse* response) { - MutexLock lock(&gc_mutex_); - std::set gc_table_set; - for (int i = 0; i < response->inh_live_files_size(); ++i) { - const InheritedLiveFiles& live = response->inh_live_files(i); - gc_table_set.insert(live.table_name()); - } - - for (int i = 0; i < response->tabletmeta_list().meta_size(); ++i) { - const TabletMeta& meta = response->tabletmeta_list().meta(i); - VLOG(10) << "[gc] try erase live tablet: " << meta.path() - << ", tablename: " << meta.table_name(); - if (gc_tablets_.find(meta.table_name()) != gc_tablets_.end() && - gc_table_set.find(meta.table_name()) != gc_table_set.end()) { - // erase live tablet - VLOG(10) << "[gc] erase live tablet: " << meta.path(); - uint64_t tabletnum = leveldb::GetTabletNumFromPath(meta.path()); - gc_tablets_[meta.table_name()].first.erase(tabletnum); - } - } - - // erase inherited live files - for (int i = 0; i < response->inh_live_files_size(); ++i) { - const InheritedLiveFiles& live = response->inh_live_files(i); - if (gc_live_files_.find(live.table_name()) == gc_live_files_.end()) { - VLOG(10) << "[gc] table: " << live.table_name() << " skip gc."; - continue; - } - GcFileSet& file_set = gc_live_files_[live.table_name()]; - int lg_num = live.lg_live_files_size(); - CHECK(static_cast(lg_num) == file_set.size()) - << "lg_num should eq " << file_set.size(); - for (int lg = 0; lg < lg_num; ++lg) { - const LgInheritedLiveFiles& lg_live_files = live.lg_live_files(lg); - for (int f = 0; f < lg_live_files.file_number_size(); ++f) { - std::string file_path = leveldb::BuildTableFilePath( - live.table_name(), lg, lg_live_files.file_number(f)); - VLOG(10) << "[gc] " << " erase live file: " << file_path; - file_set[lg].erase(lg_live_files.file_number(f)); - } - } - } -} - -void BatchGcStrategy::CollectDeadTabletsFiles() { - std::map::iterator table_it = gc_tablets_.begin(); - for (; table_it != gc_tablets_.end(); ++table_it) { - std::set& dead_tablets = table_it->second.second; - std::set::iterator tablet_it = dead_tablets.begin(); - for (; tablet_it != dead_tablets.end(); ++tablet_it) { - CollectSingleDeadTablet(table_it->first, *tablet_it); - } - } -} - -bool BatchGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint64_t tabletnum) { - std::string tablepath = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; - std::string tablet_path = leveldb::GetTabletPathFromNum(tablepath, tabletnum); - leveldb::Env* env = io::LeveldbBaseEnv(); - std::vector children; - env->GetChildren(tablet_path, &children); - list_count_.Inc(); - if (children.size() == 0) { - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); - } - - delete file_lock; - - env->DeleteDir(tablet_path); - return false; - } - for (size_t lg = 0; lg < children.size(); ++lg) { - std::string lg_path = tablet_path + "/" + children[lg]; - leveldb::FileType type = leveldb::kUnknown; - uint64_t number = 0; - if (ParseFileName(children[lg], &number, &type)) { - LOG(INFO) << "[gc] delete: " << lg_path; - - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); - } - - env->DeleteFile(lg_path); - continue; - } - - leveldb::Slice rest(children[lg]); - uint64_t lg_num = 0; - if (!leveldb::ConsumeDecimalNumber(&rest, &lg_num)) { - LOG(ERROR) << "[gc] skip unknown dir: " << lg_path; - continue; - } - - std::vector files; - env->GetChildren(lg_path, &files); - list_count_.Inc(); - if (files.size() == 0) { - LOG(INFO) << "[gc] delete empty lg dir: " << lg_path; - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); - } - delete file_lock; - env->DeleteDir(lg_path); - continue; - } - file_total_num_ += files.size(); - for (size_t f = 0; f < files.size(); ++f) { - std::string file_path = lg_path + "/" + files[f]; - type = leveldb::kUnknown; - number = 0; - if (!ParseFileName(files[f], &number, &type) || - type != leveldb::kTableFile) { - // only keep sst, delete rest files - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - leveldb::Status s = env->LockFile(lg_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); - } - delete file_lock; - io::DeleteEnvDir(file_path); - continue; - } - - uint64_t full_number = leveldb::BuildFullFileNumber(lg_path, number); - GcFileSet& file_set = gc_live_files_[tablename]; - if (file_set.size() == 0) { - TablePtr table; - CHECK(tablet_manager_->FindTable(tablename, &table)); - file_set.resize(table->GetSchema().locality_groups_size()); - VLOG(10) << "[gc] resize : " << tablename - << " fileset lg size: " << file_set.size(); - } - VLOG(10) << "[gc] " << tablename << " insert live file: " << file_path; - CHECK(lg_num < file_set.size()); - file_set[lg_num].insert(full_number); - } - } - return true; -} - -void BatchGcStrategy::DeleteObsoleteFiles() { - leveldb::Env* env = io::LeveldbBaseEnv(); - std::map::iterator table_it = gc_live_files_.begin(); - for (; table_it != gc_live_files_.end(); ++table_it) { - std::string tablepath = FLAGS_tera_tabletnode_path_prefix + "/" + table_it->first; - GcFileSet& file_set = table_it->second; - for (size_t lg = 0; lg < file_set.size(); ++lg) { - std::set::iterator it = file_set[lg].begin(); - for (; it != file_set[lg].end(); ++it) { - uint64_t tablet = 0; - uint64_t number = 0; - leveldb::ParseFullFileNumber(*it, &tablet, &number); - std::string file_path = leveldb::BuildTableFilePath(tablepath, tablet, lg, number); - std::string lg_path = leveldb::BuildTabletLgPath(tablepath, tablet, lg); - - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - leveldb::Status s = env->LockFile(lg_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); - } - delete file_lock; - - LOG(INFO) << "[gc] delete: " << file_path; - env->DeleteFile(file_path); - file_delete_num_++; - } - } - } -} - -} // namespace master -} // namespace tera diff --git a/src/master/gc_strategy.h b/src/master/gc_strategy.h deleted file mode 100644 index c68364502..000000000 --- a/src/master/gc_strategy.h +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -#ifndef TERA_MASTER_GC_STRATEGY_H_ -#define TERA_MASTER_GC_STRATEGY_H_ - -#include "master/tablet_manager.h" -#include "proto/tabletnode_client.h" -#include "types.h" -#include "common/counter.h" - -namespace tera { -namespace master { - -class TabletManager; -class TabletNodeManager; - -class GcStrategy { -public: - virtual ~GcStrategy() {} - - // get file system image before query - // return true if need to triger gc (gc query & post query) - virtual bool PreQuery () = 0; - - // process gc query results - virtual void ProcessQueryCallbackForGc(QueryResponse* response) = 0; - - // delete useless files - virtual void PostQuery () = 0; - - // clear memory when table is deleted - virtual void Clear(std::string tablename) = 0; -}; - -class BatchGcStrategy : public GcStrategy { -public: - BatchGcStrategy (std::shared_ptr tablet_manager); - virtual ~BatchGcStrategy() {} - - // get file system image before query - virtual bool PreQuery (); - - // compute dead files - virtual void ProcessQueryCallbackForGc(QueryResponse* response); - - // delete dead files - virtual void PostQuery (); - - virtual void Clear(std::string tablename); - -private: - void CollectDeadTabletsFiles(); - bool CollectSingleDeadTablet(const std::string& tablename, uint64_t tabletnum); - void DeleteObsoleteFiles(); - - std::shared_ptr tablet_manager_; - - // tabletnode garbage clean - // first: live tablet, second: dead tablet - typedef std::pair, std::set > GcTabletSet; - typedef std::vector > GcFileSet; - mutable Mutex gc_mutex_; - std::map gc_tablets_; - std::map gc_live_files_; - int64_t file_total_num_; - int64_t file_delete_num_; - tera::Counter list_count_; -}; - -} // namespace master -} // namespace tera - -#endif // TERA_MASTER_GC_STRATEGY_H_ diff --git a/src/master/load_tablet_procedure.cc b/src/master/load_tablet_procedure.cc index 15f94bab9..7b2bb2467 100644 --- a/src/master/load_tablet_procedure.cc +++ b/src/master/load_tablet_procedure.cc @@ -12,433 +12,482 @@ DECLARE_string(tera_master_meta_table_path); DECLARE_int32(tera_master_load_rpc_timeout); DECLARE_int32(tera_master_control_tabletnode_retry_period); DECLARE_int32(tera_master_impl_retry_times); -DECLARE_int32(tera_master_load_slow_retry_times); DECLARE_bool(tera_stat_table_enabled); -DEFINE_int32(tablet_load_max_tried_ts, 3, "max number of tabletnodes " - "a tablet can try to load on before it finally enter status kTableLoadFail"); +DEFINE_int32(tablet_load_max_tried_ts, 3, + "max number of tabletnodes " + "a tablet can try to load on before it finally enter status " + "kTableLoadFail"); namespace tera { namespace master { -std::map LoadTabletProcedure::event_handlers_ { - {TabletEvent::kTsOffline, std::bind(&LoadTabletProcedure::TabletNodeOffLineHandler, _1, _2)}, - {TabletEvent::kTsRestart, std::bind(&LoadTabletProcedure::TabletNodeRestartHandler, _1, _2)}, - {TabletEvent::kTsLoadBusy, std::bind(&LoadTabletProcedure::TabletNodeBusyHandler, _1, _2)}, - {TabletEvent::kTsDelayOffline, std::bind(&LoadTabletProcedure::TabletPendOffLineHandler, _1, _2)}, - {TabletEvent::kUpdateMeta, std::bind(&LoadTabletProcedure::UpdateMetaHandler, _1, _2)}, - {TabletEvent::kLoadTablet, std::bind(&LoadTabletProcedure::LoadTabletHandler, _1, _2)}, - {TabletEvent::kWaitRpcResponse, std::bind(&LoadTabletProcedure::WaitRpcResponseHandler, _1, _2)}, - {TabletEvent::kTsLoadSucc, std::bind(&LoadTabletProcedure::TabletNodeLoadSuccHandler, _1, _2)}, - {TabletEvent::kTsLoadFail, std::bind(&LoadTabletProcedure::TabletNodeLoadFailHandler, _1, _2)}, - {TabletEvent::kTabletLoadFail, std::bind(&LoadTabletProcedure::TabletLoadFailHandler, _1, _2)}, - {TabletEvent::kEofEvent, std::bind(&LoadTabletProcedure::EOFHandler, _1, _2)} -}; - -LoadTabletProcedure::LoadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool) : - LoadTabletProcedure(tablet, TabletNodePtr(nullptr), thread_pool) { -} +std::map LoadTabletProcedure::event_handlers_{ + {TabletEvent::kTsOffline, std::bind(&LoadTabletProcedure::TabletNodeOfflineHandler, _1, _2)}, + {TabletEvent::kTsRestart, std::bind(&LoadTabletProcedure::TabletNodeRestartHandler, _1, _2)}, + {TabletEvent::kTsLoadBusy, std::bind(&LoadTabletProcedure::TabletNodeBusyHandler, _1, _2)}, + {TabletEvent::kTsDelayOffline, + std::bind(&LoadTabletProcedure::TabletPendOfflineHandler, _1, _2)}, + {TabletEvent::kUpdateMeta, std::bind(&LoadTabletProcedure::UpdateMetaHandler, _1, _2)}, + {TabletEvent::kLoadTablet, std::bind(&LoadTabletProcedure::LoadTabletHandler, _1, _2)}, + {TabletEvent::kWaitRpcResponse, + std::bind(&LoadTabletProcedure::WaitRpcResponseHandler, _1, _2)}, + {TabletEvent::kTsLoadSucc, std::bind(&LoadTabletProcedure::TabletNodeLoadSuccHandler, _1, _2)}, + {TabletEvent::kTsLoadFail, std::bind(&LoadTabletProcedure::TabletNodeLoadFailHandler, _1, _2)}, + {TabletEvent::kTabletLoadFail, std::bind(&LoadTabletProcedure::TabletLoadFailHandler, _1, _2)}, + {TabletEvent::kEofEvent, std::bind(&LoadTabletProcedure::EOFHandler, _1, _2)}}; + +LoadTabletProcedure::LoadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool) + : LoadTabletProcedure(tablet, TabletNodePtr(nullptr), thread_pool, false) {} LoadTabletProcedure::LoadTabletProcedure(TabletPtr tablet, TabletNodePtr dest_node, - ThreadPool* thread_pool) : - id_(std::string("LoadTablet:") + tablet->GetPath() + ":" + TimeStamp()), - tablet_(tablet), - dest_node_(dest_node), - done_(false), - load_request_dispatching_(false), - load_retrys_(0), - slow_load_retrys_(0), - update_meta_done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "load tablet begin, tablet: " << tablet_->GetPath(); - PROC_CHECK(tablet_->GetStatus() == TabletMeta::kTabletOffline || - tablet_->GetStatus() == TabletMeta::kTabletDelayOffline || - tablet_->GetStatus() == TabletMeta::kTabletLoadFail); - // corrupted DB need recovery, set its's status from kTabletLoadFail to kTabletOffline thus - // we can recover it from status kTabletOffline - if (tablet_->GetStatus() == TabletMeta::kTabletLoadFail && tablet_->HasErrorIgnoredLGs()) { - tablet_->SetStatus(TabletMeta::kTabletOffline); - } - if (dest_node_) { - BindTabletToTabletNode(tablet_, dest_node_); - } + ThreadPool* thread_pool) + : LoadTabletProcedure(tablet, dest_node, thread_pool, false) {} + +LoadTabletProcedure::LoadTabletProcedure(TabletPtr tablet, TabletNodePtr dest_node, + ThreadPool* thread_pool, bool is_sub_proc) + : id_(std::string("LoadTablet:") + tablet->GetPath() + ":" + TimeStamp()), + tablet_(tablet), + dest_node_(dest_node), + done_(false), + load_request_dispatching_(false), + load_retrys_(0), + slow_load_retrys_(0), + update_meta_done_(false), + is_sub_proc_(is_sub_proc), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "load tablet begin, tablet: " << tablet_->GetPath(); + PROC_CHECK(tablet_->GetStatus() == TabletMeta::kTabletOffline || + tablet_->GetStatus() == TabletMeta::kTabletDelayOffline || + tablet_->GetStatus() == TabletMeta::kTabletLoadFail); + // corrupted DB need recovery, set its's status from kTabletLoadFail to + // kTabletOffline thus + // we can recover it from status kTabletOffline + if (tablet_->GetStatus() == TabletMeta::kTabletLoadFail && tablet_->HasErrorIgnoredLGs()) { + tablet_->SetStatus(TabletMeta::kTabletOffline); + } + if (dest_node_) { + BindTabletToTabletNode(tablet_, dest_node_); + } } -// UpdateMeta & LoadTabletAsync is regard as an undivided process phase and we issued LoadTabletAsync -// immediately in UpdateMetaDone callback. -// As currently we ensure that UpdateMeta will succ finally by +// UpdateMeta & LoadTabletAsync is regard as an undivided process phase and we +// issued LoadTabletAsync +// immediately in UpdateMetaDone callback. +// As currently we ensure that UpdateMeta will succ finally by void LoadTabletProcedure::UpdateMetaDone(bool) { - PROC_LOG(INFO) << "update meta tablet success, " << tablet_; - update_meta_done_.store(true); + PROC_LOG(INFO) << "update meta tablet success, " << tablet_; + update_meta_done_.store(true); } -void LoadTabletProcedure::LoadTabletAsyncWrapper(std::weak_ptr weak_proc, TabletNodePtr dest_node) { - auto proc = weak_proc.lock(); - if (!proc) { - LOG(WARNING) << "weak_ptr expired, giveup this loadtabletasync try"; - return; - } - return proc->LoadTabletAsync(dest_node); +void LoadTabletProcedure::LoadTabletAsyncWrapper(std::weak_ptr weak_proc, + TabletNodePtr dest_node) { + auto proc = weak_proc.lock(); + if (!proc) { + LOG(WARNING) << "weak_ptr expired, giveup this loadtabletasync try"; + return; + } + return proc->LoadTabletAsync(dest_node); } void LoadTabletProcedure::LoadTabletAsync(TabletNodePtr dest_node) { - if (dest_node->NodeDown()) { - LOG(WARNING) << "dest node offline, giveup this load try node: " << dest_node->GetAddr(); - return; - } - - tabletnode::TabletNodeClient node_client(thread_pool_, dest_node->GetAddr(), - FLAGS_tera_master_load_rpc_timeout); - LoadTabletRequest* request = new LoadTabletRequest; - LoadTabletResponse* response = new LoadTabletResponse; - request->set_tablet_name(tablet_->GetTableName()); - request->set_sequence_id(MasterEnv().SequenceId().Inc()); - request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); - request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); - request->set_path(tablet_->GetPath()); - request->mutable_schema()->CopyFrom(tablet_->GetSchema()); - request->set_session_id(dest_node->uuid_); - - TablePtr table = tablet_->GetTable(); - TabletMeta meta; - tablet_->ToMeta(&meta); - PROC_CHECK(meta.parent_tablets_size() <= 2) - << "too many parents tablets: " << meta.parent_tablets_size(); - for (int32_t i = 0; i < meta.parent_tablets_size(); ++i) { - request->add_parent_tablets(meta.parent_tablets(i)); - } - - std::vector ignore_err_lgs; - tablet_->GetErrorIgnoredLGs(&ignore_err_lgs); - for (uint32_t i = 0; i < ignore_err_lgs.size(); ++i) { - PROC_VLOG(6) << "tablet:" << tablet_->GetPath() << " Add ignore err lg to request :" << ignore_err_lgs[i]; - request->add_ignore_err_lgs(ignore_err_lgs[i]); - } - tablet_->SetErrorIgnoredLGs(); // clean error lg, only for this request once - - PROC_LOG(INFO) << "LoadTabletAsync id: " << request->sequence_id() << ", " - << tablet_; - // Bind "dest_node" to the first parameter of LoadTabletCallback as dest_node may decay to kOffLine state - // and we initiate RPC to a new started TabletNode on the same IP:PORT before we apperceive it - // under which case we may have a success rpc with status code being "kIllegalAccess" - //NOTICE: explicit cast from shared_ptr to weak_ptr is necessary! - LoadClosure done = std::bind(&LoadTabletProcedure::LoadTabletCallbackWrapper, - std::weak_ptr(shared_from_this()), dest_node, _1, _2, _3, _4); - node_client.LoadTablet(request, response, done); + if (dest_node->NodeDown()) { + LOG(WARNING) << "dest node offline, giveup this load try node: " << dest_node->GetAddr(); + return; + } + + tabletnode::TabletNodeClient node_client(thread_pool_, dest_node->GetAddr(), + FLAGS_tera_master_load_rpc_timeout); + LoadTabletRequest* request = new LoadTabletRequest; + LoadTabletResponse* response = new LoadTabletResponse; + request->set_tablet_name(tablet_->GetTableName()); + request->set_sequence_id(MasterEnv().SequenceId().Inc()); + request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); + request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); + request->set_path(tablet_->GetPath()); + request->mutable_schema()->CopyFrom(tablet_->GetSchema()); + request->set_session_id(dest_node->uuid_); + request->set_create_time(tablet_->CreateTime()); + request->set_version(tablet_->Version()); + TablePtr table = tablet_->GetTable(); + TabletMeta meta; + tablet_->ToMeta(&meta); + PROC_CHECK(meta.parent_tablets_size() <= 2) + << "too many parents tablets: " << meta.parent_tablets_size(); + for (int32_t i = 0; i < meta.parent_tablets_size(); ++i) { + request->add_parent_tablets(meta.parent_tablets(i)); + } + + std::vector ignore_err_lgs; + tablet_->GetErrorIgnoredLGs(&ignore_err_lgs); + for (uint32_t i = 0; i < ignore_err_lgs.size(); ++i) { + PROC_VLOG(6) << "tablet:" << tablet_->GetPath() + << " Add ignore err lg to request :" << ignore_err_lgs[i]; + request->add_ignore_err_lgs(ignore_err_lgs[i]); + } + tablet_->SetErrorIgnoredLGs(); // clean error lg, only for this request once + + PROC_LOG(INFO) << "LoadTabletAsync id: " << request->sequence_id() << ", " << tablet_; + // Bind "dest_node" to the first parameter of LoadTabletCallback as dest_node + // may decay to kOffline state + // and we initiate RPC to a new started TabletNode on the same IP:PORT before + // we apperceive it + // under which case we may have a success rpc with status code being + // "kIllegalAccess" + // NOTICE: explicit cast from shared_ptr to weak_ptr is necessary! + LoadClosure done = + std::bind(&LoadTabletProcedure::LoadTabletCallbackWrapper, + std::weak_ptr(shared_from_this()), dest_node, _1, _2, _3, _4); + node_client.LoadTablet(request, response, done); } void LoadTabletProcedure::LoadTabletCallbackWrapper(std::weak_ptr weak_proc, - TabletNodePtr node, - LoadTabletRequest* request, - LoadTabletResponse* response, - bool failed, - int error_code) { - auto proc = weak_proc.lock(); - if (!proc) { - LOG(WARNING) << "weak_ptr expired, giveup this loadtabletcallback"; - return; - } - return proc->LoadTabletCallback(node, request, response, failed, error_code); + TabletNodePtr node, LoadTabletRequest* request, + LoadTabletResponse* response, bool failed, + int error_code) { + auto proc = weak_proc.lock(); + if (!proc) { + LOG(WARNING) << "weak_ptr expired, giveup this loadtabletcallback"; + return; + } + return proc->LoadTabletCallback(node, request, response, failed, error_code); } - -//NOTICE: please do not process tabletserver down/restart event in this CallBack as this CallBack should only -//focus on RPC callback related logic -void LoadTabletProcedure::LoadTabletCallback(TabletNodePtr node, - LoadTabletRequest* request, - LoadTabletResponse* response, - bool failed, - int error_code) { - std::unique_ptr request_holder(request); - std::unique_ptr response_holder(response); - PROC_VLOG(23) << "load tablet: " << tablet_ << " callback: " << request_holder->sequence_id(); - if (tablet_->GetStatus() != TabletMeta::kTabletLoading) { - load_request_dispatching_.store(false); - PROC_LOG(WARNING) << "tablet: " << tablet_ << " fall into state: " - << StatusCodeToString(tablet_->GetStatus()) <<", discard this load tablet callback"; - return; - } - - CHECK(tablet_->GetStatus() == TabletMeta::kTabletLoading); - StatusCode status = response->status(); - // dest_node is still alive && rpc succ && return status is OK, then this load attempts is regarded as success - if (!node->NodeDown() && !failed && (status == kTabletNodeOk || status == kTabletReady)) { - PROC_LOG(INFO) << "id: " << request_holder->sequence_id() << "load tablet success, " << tablet_; - load_request_dispatching_.store(false); - if (FLAGS_tera_stat_table_enabled) { - MasterEnv().GetStatTable()->ErasureTabletCorrupt(request_holder->path()); - } - return; - } - // load failed - // TabletNode is down. Once the "node" is down, we regard the load attempt as fail and return immedialtely. - // No load retry will be issued unless a new TabletNode restarted on the same IP:PORT soonly or a - // new TabletNode with different IP:PORT is scheduled for this LoadProcedure - if (node->NodeDown()) { - PROC_LOG(INFO) << "id: " << request_holder->sequence_id() << - "load tablet: " << tablet_ << "dest_node is down: " << node->GetAddr(); - load_retrys_++; - return; - } - - // TabletNode is not down but RPC failed: - // 1. RPC failed. - // rpc connect failed, timeout, .etc. Or, - // 2. tabletserver return inappropriate status such as - // kIOError, nfs error or tablet db corruption - // kTableOnLoad, tablet load very slowly with the first load failed with RPC_ERROR_REQUEST_TIMEOUT - // and succeeding load request return with kTableOnLoad until the tablet loaded successful finally. etc - if (failed) { - PROC_LOG(WARNING) << "load tablet: "<< load_retrys_ << "th try failed, error: " - << sofa::pbrpc::RpcErrorCodeToString(error_code) << ", " << tablet_; - load_retrys_++; - } else if (status == kTabletOnLoad) { - // 10 times for slow tablet - if (++slow_load_retrys_ % 10 == 0) { - PROC_LOG(ERROR) << kSms << "slow load, retry: " << load_retrys_ << ", " << tablet_; - load_retrys_++; - } - } else { // maybe IOError as error happens when TabletNode access nfs - PROC_LOG(WARNING) << "load tablet " << load_retrys_ << "th try failed, error: " - << StatusCodeToString(status) << ", " << tablet_; - load_retrys_++; - } - // record detail fail msg to stat table - if (response_holder->has_detail_fail_msg() && FLAGS_tera_stat_table_enabled) { - std::string msg = response_holder->detail_fail_msg(); - MasterEnv().GetStatTable()->RecordTabletCorrupt(request_holder->path(), msg); - } - - if (load_retrys_ > FLAGS_tera_master_impl_retry_times) { - PROC_LOG(WARNING) << kSms << "load tablet: " << tablet_ << " failed at tabletnode: " << node->GetAddr(); - return; +// NOTICE: please do not process tabletserver down/restart event in this +// CallBack as this CallBack should only +// focus on RPC callback related logic +void LoadTabletProcedure::LoadTabletCallback(TabletNodePtr node, LoadTabletRequest* request, + LoadTabletResponse* response, bool failed, + int error_code) { + std::unique_ptr request_holder(request); + std::unique_ptr response_holder(response); + uint64_t sequence_id = request_holder->sequence_id(); + PROC_VLOG(23) << "load tablet: " << tablet_ << " callback: " << sequence_id; + if (tablet_->GetStatus() != TabletMeta::kTabletLoading) { + load_request_dispatching_.store(false); + PROC_LOG(WARNING) << "tablet: " << tablet_ + << " fall into state: " << StatusCodeToString(tablet_->GetStatus()) + << ", discard this load tablet callback"; + return; + } + + CHECK(tablet_->GetStatus() == TabletMeta::kTabletLoading); + StatusCode status = response->status(); + // dest_node is still alive && rpc succ && return status is OK, then this load + // attempts is regarded as success + if (!node->NodeDown() && !failed && (status == kTabletNodeOk || status == kTabletReady)) { + PROC_LOG(INFO) << "id: " << sequence_id << " load tablet success, " << tablet_; + load_request_dispatching_.store(false); + if (FLAGS_tera_stat_table_enabled) { + MasterEnv().GetStatTable()->ErasureTabletCorrupt(request_holder->path()); } - - //NOTICE: bind parameter "node" rather than member variable "dest_node_" to LoadTabletAsync - //because we should only send retry request to the previous TS which we communicated failed and - //succedent retries should be abandoned once TS is down. But dest_node_ maybe modified by - //TabletNodeOffLineHandler, leading unappropriate RPC retries to "dest_node_" - //if we pass "dest_node_" to LoadTabletAsync - //NOTICE: explicit cast from shared_ptr to weak_ptr is necessary! - ThreadPool::Task task = std::bind(&LoadTabletProcedure::LoadTabletAsyncWrapper, - std::weak_ptr(shared_from_this()), node); - MasterEnv().GetThreadPool()->DelayTask(FLAGS_tera_master_control_tabletnode_retry_period, task); return; + } + // load failed + // TabletNode is down. Once the "node" is down, we regard the load attempt as + // fail and return immedialtely. + // No load retry will be issued unless a new TabletNode restarted on the same + // IP:PORT soonly or a + // new TabletNode with different IP:PORT is scheduled for this LoadProcedure + if (node->NodeDown()) { + PROC_LOG(INFO) << "id: " << sequence_id << "load tablet: " << tablet_ + << "dest_node is down: " << node->GetAddr(); + load_retrys_++; + return; + } + + // record detail fail msg to stat table + if (response_holder->has_detail_fail_msg() && FLAGS_tera_stat_table_enabled) { + std::string msg = response_holder->detail_fail_msg(); + MasterEnv().GetStatTable()->RecordTabletCorrupt(request_holder->path(), msg); + } + + // TabletNode is not down but RPC failed: + // 1. RPC failed. + // generally, RPC_ERROR_REQUEST_TIMEOUT caused by tablet slow load is a + // common case + // If this happen, we will issue a load retry request immediately to query + // tablet's load at the end of this function + // 2. tabletserver return inappropriate status such as + // kIOError, nfs error or tablet db corruption + // kTableOnLoad, tablet load very slowly with the first load failed with + // RPC_ERROR_REQUEST_TIMEOUT + // and succeeding load request return with kTableOnLoad until the tablet + // loaded successful finally. etc + // for this case, we will delay issuing load retry request + if (failed) { + PROC_LOG(WARNING) << "id: " << sequence_id << ", load tablet: " << load_retrys_ + << "th try failed, error: " << sofa::pbrpc::RpcErrorCodeToString(error_code) + << ", " << tablet_; + load_retrys_++; + } else if (status == kTabletNodeIsBusy) { + VLOG(5) << "id: " << sequence_id << ", ts is too busy, load request is " + "rejected and will retry later, " + "tablet: " << tablet_; + } else if (status == kTabletWaitLoad || status == kTabletOnLoad) { + if (++slow_load_retrys_ % 10 == 0) { + load_retrys_++; + } + VLOG_IF(5, status == kTabletWaitLoad) << "id: " << sequence_id + << ", tablet load pended at ts, tablet: " << tablet_; + VLOG_IF(5, status == kTabletOnLoad) + << "id" << sequence_id << "slow load tablet: " << tablet_ + << ", tablet status query from ts: " << StatusCodeToString(status); + } else { + // maybe IOError as error happens when TabletNode access nfs + PROC_LOG(WARNING) << "id: " << sequence_id << ", load tablet " << load_retrys_ + << "th try failed, error: " << StatusCodeToString(status) << ", " << tablet_; + load_retrys_++; + } + + if (load_retrys_ > FLAGS_tera_master_impl_retry_times) { + PROC_LOG(WARNING) << kSms << "load tablet: " << tablet_ + << " failed at tabletnode: " << node->GetAddr(); + return; + } + // NOTICE: bind parameter "node" rather than member variable "dest_node_" to + // LoadTabletAsync + // because we should only send retry request to the previous TS which we + // communicated failed and + // succedent retries should be abandoned once TS is down. But dest_node_ maybe + // modified by + // TabletNodeOfflineHandler, leading unappropriate RPC retries to "dest_node_" + // if we pass "dest_node_" to LoadTabletAsync + // NOTICE: explicit cast from shared_ptr to weak_ptr is necessary! + ThreadPool::Task retry_task = + std::bind(&LoadTabletProcedure::LoadTabletAsyncWrapper, + std::weak_ptr(shared_from_this()), node); + MasterEnv().GetThreadPool()->DelayTask(FLAGS_tera_master_control_tabletnode_retry_period, + retry_task); } TabletEvent LoadTabletProcedure::GenerateEvent() { - if (tablet_->GetStatus() == TabletMeta::kTabletReady || - tablet_->GetStatus() == TabletMeta::kTabletLoadFail) { - return TabletEvent::kEofEvent; - } - // we regard kTabletPending as a special status equivalent to kTableOffLine only with extra - // consideration for cache locality, so they both call GenrateTabletOffLineEvent() - if (tablet_->GetStatus() == TabletMeta::kTabletOffline || - tablet_->GetStatus() == TabletMeta::kTabletDelayOffline) { - return GenerateTabletOffLineEvent(); - } - if (tablet_->GetStatus() == TabletMeta::kTabletLoading) { - return GenerateTabletOnLoadEvent(); - } + if (tablet_->GetStatus() == TabletMeta::kTabletReady || + tablet_->GetStatus() == TabletMeta::kTabletLoadFail) { return TabletEvent::kEofEvent; + } + // we regard kTabletPending as a special status equivalent to kTableOffline + // only with extra + // consideration for cache locality, so they both call + // GenrateTabletOfflineEvent() + if (tablet_->GetStatus() == TabletMeta::kTabletOffline || + tablet_->GetStatus() == TabletMeta::kTabletDelayOffline) { + return GenerateTabletOfflineEvent(); + } + if (tablet_->GetStatus() == TabletMeta::kTabletLoading) { + return GenerateTabletOnLoadEvent(); + } + return TabletEvent::kEofEvent; } -TabletEvent LoadTabletProcedure::GenerateTabletOffLineEvent() { - if (!dest_node_) { - return TabletEvent::kTsOffline; +TabletEvent LoadTabletProcedure::GenerateTabletOfflineEvent() { + if (!dest_node_) { + return TabletEvent::kTsOffline; + } + if (dest_node_->NodeDown()) { + return GenerateTsDownEvent(); + } + // dest TS is alive, and load concurrency allows the tablet to be load, so + // generate load event + else { + // meta table always load immediately + if (tablet_->GetTableName() == FLAGS_tera_master_meta_table_name) { + return TabletEvent::kLoadTablet; } - if (dest_node_->NodeDown()) { - return GenerateTsDownEvent(); + if (!update_meta_done_) { + return TabletEvent::kUpdateMeta; } - // dest TS is alive, and load concurrency allows the tablet to be load, so generate load event - else { - // meta table always load immediately - if (tablet_->GetTableName() == FLAGS_tera_master_meta_table_name) { - return TabletEvent::kLoadTablet; - } - if (!update_meta_done_) { - return TabletEvent::kUpdateMeta; - } - if (dest_node_->TryLoad(tablet_)) { - return TabletEvent::kLoadTablet; - } - return TabletEvent::kTsLoadBusy; + if (dest_node_->TryLoad(tablet_)) { + return TabletEvent::kLoadTablet; } + return TabletEvent::kTsLoadBusy; + } } TabletEvent LoadTabletProcedure::GenerateTabletOnLoadEvent() { - // all load attempts on the same TS (IP:PORT) all failed, return TS_LOAD_FAIL - // should clear load_retrys_, dest_node_, loading_ and change tablet's state to kTableOffLine - if (load_retrys_ > FLAGS_tera_master_impl_retry_times) { - return TabletEvent::kTsLoadFail; - } - // - if (dest_node_->NodeDown()) { - return GenerateTsDownEvent(); - } - if (load_request_dispatching_) { - return TabletEvent::kWaitRpcResponse; - } - else { - return TabletEvent::kTsLoadSucc; - } + // all load attempts on the same TS (IP:PORT) all failed, return TS_LOAD_FAIL + // should clear load_retrys_, dest_node_, loading_ and change tablet's state + // to kTableOffline + if (load_retrys_ > FLAGS_tera_master_impl_retry_times) { + return TabletEvent::kTsLoadFail; + } + // + if (dest_node_->NodeDown()) { + return GenerateTsDownEvent(); + } + if (load_request_dispatching_) { + return TabletEvent::kWaitRpcResponse; + } else { + return TabletEvent::kTsLoadSucc; + } } TabletEvent LoadTabletProcedure::GenerateTsDownEvent() { - // if meta tablet's dest_node down, always return kTsOffLine, - // thus we can reschedule an available ts for meta table immediate - if (tablet_->GetTableName() == FLAGS_tera_master_meta_table_name) { - return TabletEvent::kTsOffline; - } - if (MasterEnv().GetTabletNodeManager()->FindTabletNode(dest_node_->GetAddr(), &restarted_dest_node_)) { - return TabletEvent::kTsRestart; - } - if (dest_node_->GetState() == kPendingOffLine) { - return TabletEvent::kTsDelayOffline; - } - // we should schedule an avail tabletnode for the tablet + // if meta tablet's dest_node down, always return kTsOffline, + // thus we can reschedule an available ts for meta table immediate + if (tablet_->GetTableName() == FLAGS_tera_master_meta_table_name) { return TabletEvent::kTsOffline; + } + if (MasterEnv().GetTabletNodeManager()->FindTabletNode(dest_node_->GetAddr(), + &restarted_dest_node_)) { + return TabletEvent::kTsRestart; + } + if (dest_node_->GetState() == kPendingOffline) { + return TabletEvent::kTsDelayOffline; + } + // we should schedule an avail tabletnode for the tablet + return TabletEvent::kTsOffline; } bool LoadTabletProcedure::IsNewEvent(TabletEvent event) { - // except TabletEvent::kTsRestart, other two continuous equal events is indeed the same event - // thus we can deal only once - if (events_.empty() || events_.back() != event || - event == TabletEvent::kTsRestart || event == TabletEvent::kTsOffline) { - if (!dest_node_) { - PROC_VLOG(23) << "tablet: " << tablet_->GetPath() << ", event: " << event; - } - else { - PROC_VLOG(23) << "tablet: " << tablet_->GetPath() - << ", event: " << event << ", dest_node: " << dest_node_->GetAddr(); - } - events_.emplace_back(event); - return true; - } - return false; + // except TabletEvent::kTsRestart, other two continuous equal events is indeed + // the same event + // thus we can deal only once + if (events_.empty() || events_.back() != event || event == TabletEvent::kTsRestart || + event == TabletEvent::kTsOffline) { + if (!dest_node_) { + PROC_VLOG(23) << "tablet: " << tablet_->GetPath() << ", event: " << event; + } else { + PROC_VLOG(23) << "tablet: " << tablet_->GetPath() << ", event: " << event + << ", dest_node: " << dest_node_->GetAddr(); + } + events_.emplace_back(event); + return true; + } + return false; } void LoadTabletProcedure::RunNextStage() { - if (!IsNewEvent(GenerateEvent())) { - return; - } - TabletEvent event = events_.back(); - auto it = event_handlers_.find(event); - PROC_LOG_IF(FATAL, it == event_handlers_.end()) << "illegal event: " << event << ", tablet: " << tablet_; - TabletLoadEventHandler handler = it->second; - handler(this, event); + if (!IsNewEvent(GenerateEvent())) { + return; + } + TabletEvent event = events_.back(); + auto it = event_handlers_.find(event); + PROC_LOG_IF(FATAL, it == event_handlers_.end()) << "illegal event: " << event + << ", tablet: " << tablet_; + TabletLoadEventHandler handler = it->second; + handler(this, event); } -void LoadTabletProcedure::TabletNodeOffLineHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - load_request_dispatching_.store(false); - PROC_LOG(INFO) << "try schedule a tabletnode for tablet: " << tablet_->GetPath(); - const std::string table_name = tablet_->GetTableName(); - TabletNodePtr node; - Scheduler* size_scheduler = MasterEnv().GetSizeScheduler().get(); - while (!node) { - if (!MasterEnv().GetTabletNodeManager()-> - ScheduleTabletNodeOrWait(size_scheduler, table_name, false, &node)) { - PROC_LOG(ERROR) << kSms << "fatal, cannot schedule tabletnode for tablet: " << tablet_; - continue; - } - if (node->GetState() == kOffLine) { - continue; - } - } - dest_node_.swap(node); - PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() << ", pick destnode: " << dest_node_->GetAddr(); - BindTabletToTabletNode(tablet_, dest_node_); - // reset - load_retrys_ = 0; - slow_load_retrys_ = 0; - update_meta_done_.store(false); +void LoadTabletProcedure::TabletNodeOfflineHandler(const TabletEvent& event) { + tablet_->DoStateTransition(event); + load_request_dispatching_.store(false); + PROC_LOG(INFO) << "try schedule a tabletnode for tablet: " << tablet_->GetPath(); + const std::string table_name = tablet_->GetTableName(); + TabletNodePtr node; + Scheduler* size_scheduler = MasterEnv().GetSizeScheduler().get(); + while (!node) { + if (!MasterEnv().GetTabletNodeManager()->ScheduleTabletNodeOrWait(size_scheduler, table_name, + tablet_, false, &node)) { + PROC_LOG(ERROR) << kSms << "fatal, cannot schedule tabletnode for tablet: " << tablet_; + continue; + } + if (node->GetState() == kOffline) { + continue; + } + } + dest_node_.swap(node); + PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() + << ", pick destnode: " << dest_node_->GetAddr(); + BindTabletToTabletNode(tablet_, dest_node_); + // reset + load_retrys_ = 0; + slow_load_retrys_ = 0; + update_meta_done_.store(false); } void LoadTabletProcedure::TabletNodeRestartHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - load_request_dispatching_.store(false); - dest_node_.swap(restarted_dest_node_); - BindTabletToTabletNode(tablet_, dest_node_); + tablet_->DoStateTransition(event); + load_request_dispatching_.store(false); + dest_node_.swap(restarted_dest_node_); + BindTabletToTabletNode(tablet_, dest_node_); } void LoadTabletProcedure::TabletNodeBusyHandler(const TabletEvent& event) { - PROC_LOG(INFO) << "tabletnode: " << dest_node_->GetAddr() - << ", delay load tablet: " << tablet_->GetPath(); + PROC_LOG(INFO) << "tabletnode: " << dest_node_->GetAddr() + << ", delay load tablet: " << tablet_->GetPath(); } -void LoadTabletProcedure::TabletPendOffLineHandler(const TabletEvent& event) { - PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() << ", current event: " << event - << " considering cache locality, tabletnode: " << dest_node_->GetAddr(); - tablet_->DoStateTransition(event); +void LoadTabletProcedure::TabletPendOfflineHandler(const TabletEvent& event) { + PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() << ", current event: " << event + << " considering cache locality, tabletnode: " << dest_node_->GetAddr(); + tablet_->DoStateTransition(event); } void LoadTabletProcedure::UpdateMetaHandler(const TabletEvent&) { - MetaWriteRecord record = PackMetaWriteRecord(tablet_, false); - UpdateMetaClosure done = std::bind(&LoadTabletProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(record, done, -1); + MetaWriteRecord record = PackMetaWriteRecord(tablet_, false); + UpdateMetaClosure done = std::bind(&LoadTabletProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(record, done, -1); } void LoadTabletProcedure::LoadTabletHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - load_request_dispatching_.store(true); - PROC_LOG(INFO) << "load tablet: " << tablet_->GetPath() << ", destnode: " << dest_node_->GetAddr(); - LoadTabletAsync(dest_node_); + tablet_->DoStateTransition(event); + load_request_dispatching_.store(true); + PROC_LOG(INFO) << "load tablet: " << tablet_->GetPath() + << ", destnode: " << dest_node_->GetAddr(); + LoadTabletAsync(dest_node_); } -void LoadTabletProcedure::WaitRpcResponseHandler(const TabletEvent&) { - return; -} +void LoadTabletProcedure::WaitRpcResponseHandler(const TabletEvent&) { return; } void LoadTabletProcedure::TabletNodeLoadSuccHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - dest_node_->FinishLoad(tablet_); - tablet_->ClearLoadFailedCount(); + tablet_->DoStateTransition(event); + dest_node_->FinishLoad(tablet_); + tablet_->ClearLoadFailedCount(); } void LoadTabletProcedure::TabletNodeLoadFailHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - dest_node_->FinishLoad(tablet_); - tablet_->IncLoadFailedCount(); + tablet_->DoStateTransition(event); + dest_node_->FinishLoad(tablet_); + tablet_->IncLoadFailedCount(); } void LoadTabletProcedure::TabletLoadFailHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - PROC_LOG(ERROR) << "tablet: " << tablet_ - << "event: "<< event << ", exhausted all load retries, abort load" ; + tablet_->DoStateTransition(event); + PROC_LOG(ERROR) << "tablet: " << tablet_ << "event: " << event + << ", exhausted all load retries, abort load"; } void LoadTabletProcedure::EOFHandler(const TabletEvent& event) { - PROC_LOG(INFO) << "tablet: " << tablet_ << "," << event; - if (tablet_->LoadFailedCount() >= FLAGS_tablet_load_max_tried_ts) { - PROC_LOG(ERROR) << "tablet: " << tablet_ - << "event: "<< event << ", exhausted all load retries, abort load" ; - } - // we still have opportunities to load the failed tablet on other tabletnodes, so we generate - // a new MoveTabletProcedure and finish current LoadTabletProcedure. - // Notice do not unlock tablet TransitionLock if a MoveTabletProcedure is generated. - if (tablet_->GetStatus() != TabletMeta::kTabletReady && - tablet_->LoadFailedCount() < FLAGS_tablet_load_max_tried_ts) { - // Since LoadTablet is executed asynchronously on TS, master cannot get tablet's exact status - // on the remote TS when master got a LoadTablet failed RPC response. For example, master will - // tread tablet as loaded failed with RPC error code "RPC_TIMEOUT" if LoadTablet RPC request is - // delayed by TS for a long time, but TS may finally execute LoadTablet successfully without master - // perception. So here master tread the loaded failed tablet as Ready and issue a Move action - // (which consists of an Unload and a following Load sub actions) for the tablet. - PROC_LOG(INFO) << "try move to recover the load failed tablet: " << tablet_; - tablet_->SetStatus(TabletMeta::kTabletReady); - std::shared_ptr move_proc(new MoveTabletProcedure(tablet_, - TabletNodePtr(nullptr), MasterEnv().GetThreadPool().get())); - if (!MasterEnv().GetExecutor()->AddProcedure(move_proc)) { - PROC_LOG(WARNING) << "add move tablet procedure failed, tablet: " << tablet_; - } - } - else { - tablet_->UnlockTransition(); - } - done_.store(true); - - + if (tablet_->LoadFailedCount() >= FLAGS_tablet_load_max_tried_ts) { + PROC_LOG(ERROR) << "tablet: " << tablet_ << "event: " << event + << ", exhausted all load retries, abort load"; + } + // we still have opportunities to load the failed tablet on other tabletnodes, + // so we generate + // a new MoveTabletProcedure and finish current LoadTabletProcedure. + // Notice do not unlock tablet TransitionLock if a MoveTabletProcedure is + // generated. + if (tablet_->GetStatus() != TabletMeta::kTabletReady && + tablet_->LoadFailedCount() < FLAGS_tablet_load_max_tried_ts) { + // Since LoadTablet is executed asynchronously on TS, master cannot get + // tablet's exact status + // on the remote TS when master got a LoadTablet failed RPC response. For + // example, master will + // tread tablet as loaded failed with RPC error code "RPC_TIMEOUT" if + // LoadTablet RPC request is + // delayed by TS for a long time, but TS may finally execute LoadTablet + // successfully without master + // perception. So here master tread the loaded failed tablet as Ready and + // issue a Move action + // (which consists of an Unload and a following Load sub actions) for the + // tablet. + PROC_LOG(INFO) << "try move to recover the load failed tablet: " << tablet_; + tablet_->SetStatus(TabletMeta::kTabletReady); + std::shared_ptr move_proc(new MoveTabletProcedure( + tablet_, TabletNodePtr(nullptr), MasterEnv().GetThreadPool().get())); + if (!MasterEnv().GetExecutor()->AddProcedure(move_proc)) { + PROC_LOG(WARNING) << "add move tablet procedure failed, tablet: " << tablet_; + } + } else { + if (!!MasterEnv().GetQuotaEntry()) { + MasterEnv().GetQuotaEntry()->CaculateDeltaQuota(tablet_->GetTableName()); + } + tablet_->UnlockTransition(); + } + PROC_LOG(INFO) << "tablet: " << tablet_ << "," << event + << ", tablet transition state: " << tablet_->InTransition(); + done_.store(true); } - } } diff --git a/src/master/load_tablet_procedure.h b/src/master/load_tablet_procedure.h index 620e956cd..201cc7ce4 100644 --- a/src/master/load_tablet_procedure.h +++ b/src/master/load_tablet_procedure.h @@ -14,89 +14,95 @@ namespace tera { namespace master { -class LoadTabletProcedure : public Procedure, public std::enable_shared_from_this { -public: - LoadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool); - - LoadTabletProcedure(TabletPtr tablet, TabletNodePtr dest_node, - ThreadPool* thread_pool); - - virtual std::string ProcId() const { - //std::string id = std::string("LoadTablet:") + tablet_->GetPath(); - return id_; - } - virtual void RunNextStage(); - - virtual ~LoadTabletProcedure() {} +class LoadTabletProcedure : public Procedure, + public std::enable_shared_from_this { + public: + LoadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool); + + LoadTabletProcedure(TabletPtr tablet, TabletNodePtr dest_node, ThreadPool* thread_pool); + + LoadTabletProcedure(TabletPtr tablet, TabletNodePtr dest_node, ThreadPool* thread_pool, + bool is_sub_proc); + + virtual std::string ProcId() const { + // std::string id = std::string("LoadTablet:") + tablet_->GetPath(); + return id_; + } + virtual void RunNextStage(); + + virtual ~LoadTabletProcedure() {} - virtual bool Done() { - return done_.load(); + virtual bool Done() { return done_.load(); } + + virtual ProcedureLimiter::LockType GetLockType() override { + if (is_sub_proc_) { + return ProcedureLimiter::LockType::kNoLimit; + } else { + return ProcedureLimiter::LockType::kLoad; } + } -private: - typedef std::function TabletLoadEventHandler; - - typedef std::function LoadClosure; - - TabletEvent GenerateEvent(); - - TabletEvent GenerateTabletOffLineEvent(); - - TabletEvent GenerateTabletOnLoadEvent(); - - TabletEvent GenerateTsDownEvent(); - - // unique events we need to process - bool IsNewEvent(TabletEvent event); - - void UpdateMetaDone(bool); - - static void LoadTabletAsyncWrapper(std::weak_ptr weak_proc, TabletNodePtr dest_node); - - static void LoadTabletCallbackWrapper(std::weak_ptr weak_proc, - TabletNodePtr node, - LoadTabletRequest* request, - LoadTabletResponse* response, - bool failed, - int error_code); - - void LoadTabletAsync(TabletNodePtr dest_node); - - void LoadTabletCallback(TabletNodePtr node, - LoadTabletRequest* request, - LoadTabletResponse* response, - bool failed, - int error_code); - - // EventHandlers - void TabletNodeOffLineHandler(const TabletEvent& event); - void TabletNodeRestartHandler(const TabletEvent& event); - void TabletNodeBusyHandler(const TabletEvent& event); - void TabletPendOffLineHandler(const TabletEvent& event); - void UpdateMetaHandler(const TabletEvent& event); - void LoadTabletHandler(const TabletEvent& event); - void WaitRpcResponseHandler(const TabletEvent& event); - void TabletNodeLoadSuccHandler(const TabletEvent& event); - void TabletNodeLoadFailHandler(const TabletEvent& event); - void TabletLoadFailHandler(const TabletEvent& event); - void EOFHandler(const TabletEvent& event); - -private: - const std::string id_; - TabletPtr tablet_; - TabletNodePtr dest_node_; - // following counters or flags way be accessed from different threads concurrently, - // so std::atomic is used to ensure ordered access to those variables from different threads - std::atomic done_; - std::atomic load_request_dispatching_; - std::atomic load_retrys_; - std::atomic slow_load_retrys_; - std::atomic update_meta_done_; - std::vector events_; - TabletNodePtr restarted_dest_node_; - static std::map event_handlers_; - ThreadPool* thread_pool_; -}; + private: + typedef std::function TabletLoadEventHandler; + + typedef std::function LoadClosure; + + TabletEvent GenerateEvent(); + + TabletEvent GenerateTabletOfflineEvent(); + TabletEvent GenerateTabletOnLoadEvent(); + + TabletEvent GenerateTsDownEvent(); + + // unique events we need to process + bool IsNewEvent(TabletEvent event); + + void UpdateMetaDone(bool); + + static void LoadTabletAsyncWrapper(std::weak_ptr weak_proc, + TabletNodePtr dest_node); + + static void LoadTabletCallbackWrapper(std::weak_ptr weak_proc, + TabletNodePtr node, LoadTabletRequest* request, + LoadTabletResponse* response, bool failed, int error_code); + + void LoadTabletAsync(TabletNodePtr dest_node); + + void LoadTabletCallback(TabletNodePtr node, LoadTabletRequest* request, + LoadTabletResponse* response, bool failed, int error_code); + + // EventHandlers + void TabletNodeOfflineHandler(const TabletEvent& event); + void TabletNodeRestartHandler(const TabletEvent& event); + void TabletNodeBusyHandler(const TabletEvent& event); + void TabletPendOfflineHandler(const TabletEvent& event); + void UpdateMetaHandler(const TabletEvent& event); + void LoadTabletHandler(const TabletEvent& event); + void WaitRpcResponseHandler(const TabletEvent& event); + void TabletNodeLoadSuccHandler(const TabletEvent& event); + void TabletNodeLoadFailHandler(const TabletEvent& event); + void TabletLoadFailHandler(const TabletEvent& event); + void EOFHandler(const TabletEvent& event); + + private: + const std::string id_; + TabletPtr tablet_; + TabletNodePtr dest_node_; + // following counters or flags way be accessed from different threads + // concurrently, + // so std::atomic is used to ensure ordered access to those variables from + // different threads + std::atomic done_; + std::atomic load_request_dispatching_; + std::atomic load_retrys_; + std::atomic slow_load_retrys_; + std::atomic update_meta_done_; + std::vector events_; + TabletNodePtr restarted_dest_node_; + bool is_sub_proc_; + static std::map event_handlers_; + ThreadPool* thread_pool_; +}; } } diff --git a/src/master/master_entry.cc b/src/master/master_entry.cc index d468e8e60..d81c69885 100644 --- a/src/master/master_entry.cc +++ b/src/master/master_entry.cc @@ -6,11 +6,12 @@ #include #include - #include "common/metric/collector_report.h" #include "common/net/ip_address.h" #include "master/master_impl.h" +#include "master/multi_tenancy_service_impl.h" #include "master/remote_master.h" +#include "master/remote_multi_tenancy_service.h" #include "utils/utils_cmd.h" DEFINE_bool(tera_metric_http_server_enable, true, "enable metric http server, enable as default"); @@ -21,82 +22,95 @@ DECLARE_int32(tera_master_rpc_server_max_inflow); DECLARE_int32(tera_master_rpc_server_max_outflow); DECLARE_bool(tera_metric_http_server_enable); DECLARE_int32(tera_metric_http_server_listen_port); +DECLARE_int32(tera_master_thread_max_num); +DECLARE_string(tera_auth_policy); -std::string GetTeraEntryName() { - return "master"; -} +DECLARE_string(tera_cluster_name); -tera::TeraEntry* GetTeraEntry() { - return new tera::master::MasterEntry(); -} +std::string GetTeraEntryName() { return "master"; } + +tera::TeraEntry* GetTeraEntry() { return new tera::master::MasterEntry(); } namespace tera { namespace master { MasterEntry::MasterEntry() - : master_impl_(NULL), - remote_master_(NULL), - rpc_server_(NULL), + : master_impl_(nullptr), + multi_tenancy_service_impl_(nullptr), + thread_pool_(nullptr), + remote_master_(nullptr), + remote_multi_tenancy_service_(nullptr), + rpc_server_(nullptr), metric_http_server_(new tera::MetricHttpServer()) { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_options.max_throughput_in = FLAGS_tera_master_rpc_server_max_inflow; - rpc_options.max_throughput_out = FLAGS_tera_master_rpc_server_max_outflow; - rpc_options.keep_alive_time = 7200; - rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = FLAGS_tera_master_rpc_server_max_inflow; + rpc_options.max_throughput_out = FLAGS_tera_master_rpc_server_max_outflow; + rpc_options.keep_alive_time = 7200; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); } MasterEntry::~MasterEntry() {} bool MasterEntry::StartServer() { - // start metric http server - if (FLAGS_tera_metric_http_server_enable) { - if(!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { - LOG(ERROR) << "Start metric http server failed."; - return false; - } - } else { - LOG(INFO) << "Metric http server is disabled."; + // start metric http server + if (FLAGS_tera_metric_http_server_enable) { + if (!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { + LOG(ERROR) << "Start metric http server failed."; + return false; } - - IpAddress master_addr("0.0.0.0", FLAGS_tera_master_port); - LOG(INFO) << "Start master RPC server at: " << master_addr.ToString(); - - master_impl_.reset(new MasterImpl()); - remote_master_ = new RemoteMaster(master_impl_.get()); - - if (!master_impl_->Init()) { - return false; - } - - rpc_server_->RegisterService(remote_master_); - if (!rpc_server_->Start(master_addr.ToString())) { - LOG(ERROR) << "start RPC server error"; - return false; - } - - LOG(INFO) << "finish starting master server"; - - return true; + } else { + LOG(INFO) << "Metric http server is disabled."; + } + + IpAddress master_addr("0.0.0.0", FLAGS_tera_master_port); + LOG(INFO) << "Start master RPC server at: " << master_addr.ToString(); + + std::shared_ptr access_entry = + std::make_shared(FLAGS_tera_auth_policy); + std::shared_ptr quota_entry = + std::make_shared(); + master_impl_.reset(new MasterImpl(access_entry, quota_entry)); + multi_tenancy_service_impl_.reset(new MultiTenacyServiceImpl(access_entry, quota_entry)); + thread_pool_.reset(new ThreadPool(FLAGS_tera_master_thread_max_num)); + remote_master_ = new RemoteMaster(master_impl_.get(), thread_pool_); + remote_multi_tenancy_service_ = + new RemoteMultiTenancyService(multi_tenancy_service_impl_.get(), thread_pool_); + + if (!master_impl_->Init()) { + return false; + } + + rpc_server_->RegisterService(remote_master_); + rpc_server_->RegisterService(remote_multi_tenancy_service_); + if (!rpc_server_->Start(master_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + + LOG(INFO) << "finish starting master server for cluster: " << FLAGS_tera_cluster_name; + + return true; } bool MasterEntry::Run() { - CollectorReportPublisher::GetInstance().Refresh(); - static int64_t timer_ticks = 0; - ++timer_ticks; + CollectorReportPublisher::GetInstance().Refresh(); + static int64_t timer_ticks = 0; + ++timer_ticks; - if (timer_ticks % 10 == 0) { - LOG(INFO) << "[ThreadPool schd/task/cnt] " << master_impl_->ProfilingLog(); - } + if (timer_ticks % 10 == 0) { + LOG(INFO) << "[ThreadPool schd/task/cnt] " << master_impl_->ProfilingLog(); + } - ThisThread::Sleep(1000); - return true; + ThisThread::Sleep(1000); + return true; } void MasterEntry::ShutdownServer() { - metric_http_server_->Stop(); - rpc_server_->Stop(); - master_impl_.reset(); + metric_http_server_->Stop(); + rpc_server_->Stop(); + master_impl_.reset(); + thread_pool_.reset(); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/master_entry.h b/src/master/master_entry.h index 919da4928..499df0884 100644 --- a/src/master/master_entry.h +++ b/src/master/master_entry.h @@ -5,39 +5,47 @@ #ifndef TERA_MASTER_MASTER_ENTRY_H_ #define TERA_MASTER_MASTER_ENTRY_H_ +#include + #include #include "common/base/scoped_ptr.h" #include "common/metric/metric_http_server.h" -#include "tera_entry.h" +#include "common/thread_pool.h" +#include "tera/tera_entry.h" namespace tera { namespace master { class MasterImpl; class RemoteMaster; +class RemoteMultiTenancyService; +class MultiTenacyServiceImpl; class MasterEntry : public TeraEntry { -public: - MasterEntry(); - ~MasterEntry(); - - bool StartServer(); - bool Run(); - void ShutdownServer(); - -private: - bool InitZKAdaptor(); - -private: - scoped_ptr master_impl_; - // scoped_ptr remote_master_; - RemoteMaster* remote_master_; - scoped_ptr rpc_server_; - scoped_ptr metric_http_server_; + public: + MasterEntry(); + ~MasterEntry(); + + bool StartServer(); + bool Run(); + void ShutdownServer(); + + private: + bool InitZKAdaptor(); + + private: + scoped_ptr master_impl_; + scoped_ptr multi_tenancy_service_impl_; + std::shared_ptr thread_pool_; + // scoped_ptr remote_master_; + RemoteMaster* remote_master_; + RemoteMultiTenancyService* remote_multi_tenancy_service_; + scoped_ptr rpc_server_; + scoped_ptr metric_http_server_; }; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_MASTER_ENTRY_H_ +#endif // TERA_MASTER_MASTER_ENTRY_H_ diff --git a/src/master/master_env.cc b/src/master/master_env.cc index 91f590cf3..fe8b4e4e7 100644 --- a/src/master/master_env.cc +++ b/src/master/master_env.cc @@ -18,213 +18,205 @@ DECLARE_string(tera_master_meta_table_name); using namespace std::placeholders; -namespace tera{ -namespace master{ +namespace tera { +namespace master { -enum MetaTaskType { - kWrite = 0, - kScan, - kRepair -}; +enum MetaTaskType { kWrite = 0, kScan, kRepair }; struct MetaTask { - MetaTaskType type_; + MetaTaskType type_; }; struct WriteTask { - MetaTaskType type_; - UpdateMetaClosure done_; - std::vector meta_entries_; - int32_t left_try_times_; + MetaTaskType type_; + UpdateMetaClosure done_; + std::vector meta_entries_; + int32_t left_try_times_; }; struct ScanTask { - MetaTaskType type_; - TeraMasterEnv::ScanClosure done_; - std::string table_name_; - std::string tablet_key_start_; - std::string tablet_key_end_; + MetaTaskType type_; + TeraMasterEnv::ScanClosure done_; + std::string table_name_; + std::string tablet_key_start_; + std::string tablet_key_end_; }; std::mutex TeraMasterEnv::meta_task_mutex_; std::queue TeraMasterEnv::meta_task_queue_; Counter TeraMasterEnv::sequence_id_; - -void TeraMasterEnv::BatchWriteMetaTableAsync(MetaWriteRecord record, UpdateMetaClosure done, int32_t left_try_times) { - std::vector meta_entries; - meta_entries.push_back(record); - BatchWriteMetaTableAsync(meta_entries, done, left_try_times); +void TeraMasterEnv::BatchWriteMetaTableAsync(MetaWriteRecord record, UpdateMetaClosure done, + int32_t left_try_times) { + std::vector meta_entries; + meta_entries.push_back(record); + BatchWriteMetaTableAsync(meta_entries, done, left_try_times); } void TeraMasterEnv::BatchWriteMetaTableAsync(std::vector meta_entries, - UpdateMetaClosure done, int32_t left_try_times) { - std::string meta_addr; - if (!MasterEnv().GetTabletManager()->GetMetaTabletAddr(&meta_addr)) { - SuspendMetaOperation(meta_entries, done, left_try_times); - return; - } - - WriteTabletRequest* request = new WriteTabletRequest; - WriteTabletResponse* response = new WriteTabletResponse; - request->set_sequence_id(SequenceId().Inc()); - request->set_tablet_name(FLAGS_tera_master_meta_table_name); - request->set_is_sync(true); - request->set_is_instant(true); - for (size_t i = 0; i < meta_entries.size(); ++i) { - std::string packed_key = meta_entries[i].key; - std::string packed_value = meta_entries[i].value; - bool is_delete = meta_entries[i].is_delete; - //bool is_delete = meta_entries[i](&packed_key, &packed_value); - RowMutationSequence* mu_seq = request->add_row_list(); - mu_seq->set_row_key(packed_key); - Mutation* mutation = mu_seq->add_mutation_sequence(); - if (!is_delete) { - mutation->set_type(kPut); - mutation->set_value(packed_value); - } else { - mutation->set_type(kDeleteRow); - } - } - if (request->row_list_size() == 0) { - delete request; - delete response; - return; + UpdateMetaClosure done, int32_t left_try_times) { + std::string meta_addr; + if (!MasterEnv().GetTabletManager()->GetMetaTabletAddr(&meta_addr)) { + SuspendMetaOperation(meta_entries, done, left_try_times); + return; + } + + WriteTabletRequest* request = new WriteTabletRequest; + WriteTabletResponse* response = new WriteTabletResponse; + request->set_sequence_id(SequenceId().Inc()); + request->set_tablet_name(FLAGS_tera_master_meta_table_name); + request->set_is_sync(true); + request->set_is_instant(true); + MasterEnv().GetAccessBuilder()->BuildInternalGroupRequest(request); + for (size_t i = 0; i < meta_entries.size(); ++i) { + std::string packed_key = meta_entries[i].key; + std::string packed_value = meta_entries[i].value; + bool is_delete = meta_entries[i].is_delete; + // bool is_delete = meta_entries[i](&packed_key, &packed_value); + RowMutationSequence* mu_seq = request->add_row_list(); + mu_seq->set_row_key(packed_key); + Mutation* mutation = mu_seq->add_mutation_sequence(); + if (!is_delete) { + mutation->set_type(kPut); + mutation->set_value(packed_value); } else { - LOG(INFO) << "WriteMetaTableAsync id: " << request->sequence_id(); + mutation->set_type(kDeleteRow); } - - WriteClosure meta_done = - std::bind(TeraMasterEnv::UpdateMetaCallback, meta_entries, done, left_try_times, _1, _2, _3, _4); - - tabletnode::TabletNodeClient meta_node_client(MasterEnv().GetThreadPool().get(), meta_addr); - meta_node_client.WriteTablet(request, response, meta_done); + } + if (request->row_list_size() == 0) { + delete request; + delete response; + return; + } else { + LOG(INFO) << "WriteMetaTableAsync id: " << request->sequence_id(); + } + + WriteClosure meta_done = std::bind(TeraMasterEnv::UpdateMetaCallback, meta_entries, done, + left_try_times, _1, _2, _3, _4); + + tabletnode::TabletNodeClient meta_node_client(MasterEnv().GetThreadPool().get(), meta_addr); + meta_node_client.WriteTablet(request, response, meta_done); } -void TeraMasterEnv::UpdateMetaCallback(std::vector records, - UpdateMetaClosure done, - int32_t left_try_times, - WriteTabletRequest* request, - WriteTabletResponse* response, - bool failed, - int error_code) { - StatusCode status = response->status(); - if (!failed && status == kTabletNodeOk) { - // all the row status should be the same - CHECK_GT(response->row_status_list_size(), 0); - status = response->row_status_list(0); - } - std::unique_ptr request_holder(request); - std::unique_ptr response_holder(response); - if (failed || status != kTabletNodeOk) { - std::string errmsg = - failed ? sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status); - LOG(ERROR) << "fail to update meta tablet: error_msg: " << errmsg << ", will retry later"; - for (auto it = records.begin(); it != records.end(); ++it) { - std::string op = (it->is_delete ? "DEL" : "PUT"); - LOG(WARNING) << "update meta records suspended and retry later, " << "OP: " - << op << ", key: " << DebugString(it->key) << ", value: " << DebugString(it->value); - } - if (left_try_times == 0) { - done(false); - } - left_try_times = left_try_times > 0 ? left_try_times - 1 : left_try_times; - MasterEnv().SuspendMetaOperation(records, done, left_try_times); - return; - } +void TeraMasterEnv::UpdateMetaCallback(std::vector records, UpdateMetaClosure done, + int32_t left_try_times, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code) { + StatusCode status = response->status(); + if (!failed && status == kTabletNodeOk) { + // all the row status should be the same + CHECK_GT(response->row_status_list_size(), 0); + status = response->row_status_list(0); + } + std::unique_ptr request_holder(request); + std::unique_ptr response_holder(response); + if (failed || status != kTabletNodeOk) { + std::string errmsg = + failed ? sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status); + LOG(ERROR) << "fail to update meta tablet: error_msg: " << errmsg << ", will retry later"; for (auto it = records.begin(); it != records.end(); ++it) { - std::string op = (it->is_delete ? "DEL" : "PUT"); - LOG(INFO) << "update meta tablet succ, " << "OP: " << op - << ", key: " << DebugString(it->key) << ", value: " << DebugString(it->value); + std::string op = (it->is_delete ? "DEL" : "PUT"); + LOG(WARNING) << "update meta records suspended and retry later, " + << "OP: " << op << ", key: " << DebugString(it->key) + << ", value: " << DebugString(it->value); } - done(true); + if (left_try_times == 0) { + done(false); + return; + } + left_try_times = left_try_times > 0 ? left_try_times - 1 : left_try_times; + MasterEnv().SuspendMetaOperation(records, done, left_try_times); + return; + } + for (auto it = records.begin(); it != records.end(); ++it) { + std::string op = (it->is_delete ? "DEL" : "PUT"); + LOG(INFO) << "update meta tablet succ, " + << "OP: " << op << ", key: " << DebugString(it->key) + << ", value: " << DebugString(it->value); + } + done(true); } void TeraMasterEnv::ScanMetaTableAsync(const std::string& table_name, - const std::string& tablet_key_start, - const std::string& tablet_key_end, - ScanClosure done) { - std::string meta_addr; - if (MasterEnv().GetTabletManager()->GetMetaTabletAddr(&meta_addr)) { - SuspendScanMetaOperation(table_name, tablet_key_start, tablet_key_end, done); - return; - } - - ScanTabletRequest* request = new ScanTabletRequest; - ScanTabletResponse* response = new ScanTabletResponse; - request->set_sequence_id(SequenceId().Inc()); - request->set_table_name(FLAGS_tera_master_meta_table_name); - std::string scan_key_start, scan_key_end; - MetaTableScanRange(table_name, tablet_key_start, tablet_key_end, - &scan_key_start, &scan_key_end); - request->set_start(scan_key_start); - request->set_end(scan_key_end); - - LOG(INFO) << "ScanMetaTableAsync id: " << request->sequence_id() << ", " - << "table: " << table_name << ", range: [" - << DebugString(tablet_key_start) << ", " << DebugString(tablet_key_end); - tabletnode::TabletNodeClient meta_node_client(MasterEnv().GetThreadPool().get(), meta_addr); - meta_node_client.ScanTablet(request, response, done); + const std::string& tablet_key_start, + const std::string& tablet_key_end, ScanClosure done) { + std::string meta_addr; + if (MasterEnv().GetTabletManager()->GetMetaTabletAddr(&meta_addr)) { + SuspendScanMetaOperation(table_name, tablet_key_start, tablet_key_end, done); + return; + } + + ScanTabletRequest* request = new ScanTabletRequest; + ScanTabletResponse* response = new ScanTabletResponse; + request->set_sequence_id(SequenceId().Inc()); + request->set_table_name(FLAGS_tera_master_meta_table_name); + std::string scan_key_start, scan_key_end; + MetaTableScanRange(table_name, tablet_key_start, tablet_key_end, &scan_key_start, &scan_key_end); + request->set_start(scan_key_start); + request->set_end(scan_key_end); + + MasterEnv().GetAccessBuilder()->BuildInternalGroupRequest(request); + + LOG(INFO) << "ScanMetaTableAsync id: " << request->sequence_id() << ", " + << "table: " << table_name << ", range: [" << DebugString(tablet_key_start) << ", " + << DebugString(tablet_key_end); + tabletnode::TabletNodeClient meta_node_client(MasterEnv().GetThreadPool().get(), meta_addr); + meta_node_client.ScanTablet(request, response, done); } -void TeraMasterEnv::SuspendScanMetaOperation(const std::string& table_name, - const std::string& tablet_key_start, - const std::string& tablet_key_end, - ScanClosure done) { - ScanTask* task = new ScanTask; - task->type_ = kScan; - task->done_ = done; - task->table_name_ = table_name; - task->tablet_key_start_ = tablet_key_start; - task->tablet_key_end_ = tablet_key_end; - PushToMetaPendingQueue((MetaTask*)task); +void TeraMasterEnv::SuspendScanMetaOperation(const std::string& table_name, + const std::string& tablet_key_start, + const std::string& tablet_key_end, ScanClosure done) { + ScanTask* task = new ScanTask; + task->type_ = kScan; + task->done_ = done; + task->table_name_ = table_name; + task->tablet_key_start_ = tablet_key_start; + task->tablet_key_end_ = tablet_key_end; + PushToMetaPendingQueue((MetaTask*)task); } -void TeraMasterEnv::SuspendMetaOperation(MetaWriteRecord record, UpdateMetaClosure done, int32_t left_try_times) { - std::vector meta_entries; - meta_entries.push_back(record); - SuspendMetaOperation(meta_entries, done, left_try_times); +void TeraMasterEnv::SuspendMetaOperation(MetaWriteRecord record, UpdateMetaClosure done, + int32_t left_try_times) { + std::vector meta_entries; + meta_entries.push_back(record); + SuspendMetaOperation(meta_entries, done, left_try_times); } -void TeraMasterEnv::SuspendMetaOperation(std::vector meta_entries, UpdateMetaClosure done, int32_t left_try_times) { - WriteTask* task = new WriteTask; - task->type_ = kWrite; - task->done_ = done; - task->meta_entries_ = meta_entries; - task->left_try_times_ = left_try_times; - PushToMetaPendingQueue((MetaTask*)task); +void TeraMasterEnv::SuspendMetaOperation(std::vector meta_entries, + UpdateMetaClosure done, int32_t left_try_times) { + WriteTask* task = new WriteTask; + task->type_ = kWrite; + task->done_ = done; + task->meta_entries_ = meta_entries; + task->left_try_times_ = left_try_times; + PushToMetaPendingQueue((MetaTask*)task); } void TeraMasterEnv::PushToMetaPendingQueue(MetaTask* task) { - std::lock_guard lock(meta_task_mutex_); - meta_task_queue_.push(task); - if (meta_task_queue_.size() == 1) { - TabletPtr meta_tablet; - MasterEnv().GetTabletManager()->FindTablet(FLAGS_tera_master_meta_table_name, "", - &meta_tablet); - TryMoveTablet(meta_tablet); - } + std::lock_guard lock(meta_task_mutex_); + meta_task_queue_.push(task); + if (meta_task_queue_.size() == 1) { + TabletPtr meta_tablet; + MasterEnv().GetTabletManager()->FindTablet(FLAGS_tera_master_meta_table_name, "", &meta_tablet); + MasterEnv().GetMaster()->TryMoveTablet(meta_tablet); + } } void TeraMasterEnv::ResumeMetaOperation() { - meta_task_mutex_.lock(); - while (!meta_task_queue_.empty()) { - MetaTask* task = meta_task_queue_.front(); - if (task->type_ == kWrite) { - WriteTask* write_task = (WriteTask*)task; - BatchWriteMetaTableAsync(write_task->meta_entries_, write_task->done_, write_task->left_try_times_); - delete write_task; - } else if (task->type_ == kScan) { - ScanTask* scan_task = (ScanTask*)task; - ScanMetaTableAsync(scan_task->table_name_, - scan_task->tablet_key_start_, - scan_task->tablet_key_end_, scan_task->done_); - delete scan_task; - } - meta_task_queue_.pop(); + meta_task_mutex_.lock(); + while (!meta_task_queue_.empty()) { + MetaTask* task = meta_task_queue_.front(); + if (task->type_ == kWrite) { + WriteTask* write_task = (WriteTask*)task; + BatchWriteMetaTableAsync(write_task->meta_entries_, write_task->done_, + write_task->left_try_times_); + delete write_task; + } else if (task->type_ == kScan) { + ScanTask* scan_task = (ScanTask*)task; + ScanMetaTableAsync(scan_task->table_name_, scan_task->tablet_key_start_, + scan_task->tablet_key_end_, scan_task->done_); + delete scan_task; } - meta_task_mutex_.unlock(); - + meta_task_queue_.pop(); + } + meta_task_mutex_.unlock(); } - } } - - diff --git a/src/master/master_env.h b/src/master/master_env.h index 3ef8152d8..3049c6b6f 100644 --- a/src/master/master_env.h +++ b/src/master/master_env.h @@ -26,152 +26,140 @@ class TabletAvailability; class Scheduler; class SizeScheduler; class LoadScheduler; -class MasterImpl; +class MasterImpl; class MetaTask; -typedef std::function UpdateMetaClosure; +typedef std::function UpdateMetaClosure; struct MetaWriteRecord { - MetaWriteRecord() {}; - MetaWriteRecord(std::string key_, std::string value_, bool is_delete_) : - key(key_), value(value_), is_delete(is_delete_) {} - std::string key; - std::string value; - bool is_delete; + MetaWriteRecord(){}; + MetaWriteRecord(const std::string& key, const std::string& value, bool is_delete) + : key(key), value(value), is_delete(is_delete) {} + std::string key; + std::string value; + bool is_delete; }; inline MetaWriteRecord PackMetaWriteRecord(TablePtr table, bool is_delete) { - std::string key, value; - table->ToMetaTableKeyValue(&key, &value); - return MetaWriteRecord(key, value, is_delete); + std::string key, value; + table->ToMetaTableKeyValue(&key, &value); + return MetaWriteRecord(key, value, is_delete); } inline MetaWriteRecord PackMetaWriteRecord(TabletPtr tablet, bool is_delete) { - std::string key, value; - tablet->ToMetaTableKeyValue(&key, &value); - return MetaWriteRecord(key, value, is_delete); + std::string key, value; + tablet->ToMetaTableKeyValue(&key, &value); + return MetaWriteRecord(key, value, is_delete); } -inline void PackMetaWriteRecords(TablePtr table, bool is_delete, std::vector& records) { - records.emplace_back(PackMetaWriteRecord(table, is_delete)); +inline void PackMetaWriteRecords(TablePtr table, bool is_delete, + std::vector& records) { + records.emplace_back(PackMetaWriteRecord(table, is_delete)); } -inline void PackMetaWriteRecords(TabletPtr tablet, bool is_delete, std::vector& records) { - records.emplace_back(PackMetaWriteRecord(tablet, is_delete)); +inline void PackMetaWriteRecords(TabletPtr tablet, bool is_delete, + std::vector& records) { + records.emplace_back(PackMetaWriteRecord(tablet, is_delete)); } -class TeraMasterEnv{ -public: - TeraMasterEnv() : master_(nullptr) {} - void Init(MasterImpl* master, - std::shared_ptr tabletnode_manager, - std::shared_ptr tablet_manager, - std::shared_ptr size_scheduler, - std::shared_ptr load_scheduler, - std::shared_ptr thread_pool, - std::shared_ptr executor, - std::shared_ptr tablet_availability, - std::shared_ptr stat_table) { - master_ = master; - tabletnode_manager_ = tabletnode_manager; - tablet_manager_ = tablet_manager; - size_scheduler_ = size_scheduler; - load_scheduler_ = load_scheduler; - thread_pool_ = thread_pool; - executor_ = executor; - tablet_availability_ = tablet_availability; - stat_table_ = stat_table; - } - - MasterImpl* GetMaster() { - return master_; - } - - std::shared_ptr& GetTabletNodeManager() { - return tabletnode_manager_; - } - std::shared_ptr& GetTabletManager() { - return tablet_manager_; - } - std::shared_ptr& GetSizeScheduler() { - return size_scheduler_; - } - std::shared_ptr& GetLoadScheduler() { - return load_scheduler_; - } - - std::shared_ptr& GetThreadPool() { - return thread_pool_; - } - - std::shared_ptr GetExecutor() { - return executor_; - } - - std::shared_ptr GetTabletAvailability() { - return tablet_availability_; - } - - std::shared_ptr GetStatTable() { - return stat_table_; - } - - static Counter& SequenceId() { - return sequence_id_; - } - - typedef std::function QueryClosure; - typedef std::function UpdateClosure; - typedef std::function LoadClosure; - typedef std::function UnloadClosure; - typedef std::function SplitClosure; - typedef std::function WriteClosure; - typedef std::function ScanClosure; - - static void BatchWriteMetaTableAsync(MetaWriteRecord record, UpdateMetaClosure done, int32_t left_try_times = -1); - static void BatchWriteMetaTableAsync(std::vector meta_entries, UpdateMetaClosure done, int32_t left_try_times = -1); - - static void UpdateMetaCallback(std::vector records, - UpdateMetaClosure done, - int32_t left_try_times, - WriteTabletRequest* request, - WriteTabletResponse* response, bool failed, int error_code); - - static void ScanMetaTableAsync(const std::string& table_name, - const std::string& tablet_key_start, - const std::string& tablet_key_end, - ScanClosure done); - - static void SuspendMetaOperation(MetaWriteRecord record, UpdateMetaClosure done, int32_t left_try_times); - static void SuspendMetaOperation(std::vector meta_entries, UpdateMetaClosure done, int32_t left_try_times); - - static void SuspendScanMetaOperation(const std::string& table_name, - const std::string& tablet_start_key, - const std::string& tablet_end_key, - ScanClosure done); - - static void PushToMetaPendingQueue(MetaTask* task); - - static void ResumeMetaOperation(); - -private: - MasterImpl* master_; - std::shared_ptr tabletnode_manager_; - std::shared_ptr tablet_manager_; - std::shared_ptr size_scheduler_; - std::shared_ptr load_scheduler_; - std::shared_ptr thread_pool_; - std::shared_ptr executor_; - std::shared_ptr tablet_availability_; - std::shared_ptr stat_table_; - static std::mutex meta_task_mutex_; - static std::queue meta_task_queue_; - static Counter sequence_id_; +class TeraMasterEnv { + public: + TeraMasterEnv() : master_(nullptr) {} + void Init(MasterImpl* master, const std::shared_ptr& tabletnode_manager, + const std::shared_ptr& tablet_manager, + const std::shared_ptr& access_builder, + const std::shared_ptr& quota_entry, + const std::shared_ptr& size_scheduler, + const std::shared_ptr& load_scheduler, + const std::shared_ptr& thread_pool, + const std::shared_ptr& executor, + const std::shared_ptr& tablet_availability, + const std::shared_ptr& stat_table) { + master_ = master; + tabletnode_manager_ = tabletnode_manager; + tablet_manager_ = tablet_manager; + access_builder_ = access_builder; + quota_entry_ = quota_entry; + size_scheduler_ = size_scheduler; + load_scheduler_ = load_scheduler; + thread_pool_ = thread_pool; + executor_ = executor; + tablet_availability_ = tablet_availability; + stat_table_ = stat_table; + } + + MasterImpl* GetMaster() { return master_; } + + std::shared_ptr& GetTabletNodeManager() { return tabletnode_manager_; } + std::shared_ptr& GetTabletManager() { return tablet_manager_; } + + std::shared_ptr& GetAccessBuilder() { return access_builder_; } + std::shared_ptr& GetQuotaEntry() { return quota_entry_; } + + std::shared_ptr& GetSizeScheduler() { return size_scheduler_; } + std::shared_ptr& GetLoadScheduler() { return load_scheduler_; } + + std::shared_ptr& GetThreadPool() { return thread_pool_; } + + std::shared_ptr GetExecutor() { return executor_; } + + std::shared_ptr GetTabletAvailability() { return tablet_availability_; } + + std::shared_ptr GetStatTable() { return stat_table_; } + + static Counter& SequenceId() { return sequence_id_; } + + typedef std::function QueryClosure; + typedef std::function UpdateClosure; + typedef std::function LoadClosure; + typedef std::function UnloadClosure; + typedef std::function SplitClosure; + typedef std::function WriteClosure; + typedef std::function ScanClosure; + + static void BatchWriteMetaTableAsync(MetaWriteRecord record, UpdateMetaClosure done, + int32_t left_try_times = -1); + static void BatchWriteMetaTableAsync(std::vector meta_entries, + UpdateMetaClosure done, int32_t left_try_times = -1); + + static void UpdateMetaCallback(std::vector records, UpdateMetaClosure done, + int32_t left_try_times, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code); + + static void ScanMetaTableAsync(const std::string& table_name, const std::string& tablet_key_start, + const std::string& tablet_key_end, ScanClosure done); + + static void SuspendMetaOperation(MetaWriteRecord record, UpdateMetaClosure done, + int32_t left_try_times); + static void SuspendMetaOperation(std::vector meta_entries, + UpdateMetaClosure done, int32_t left_try_times); + + static void SuspendScanMetaOperation(const std::string& table_name, + const std::string& tablet_start_key, + const std::string& tablet_end_key, ScanClosure done); + + static void PushToMetaPendingQueue(MetaTask* task); + + static void ResumeMetaOperation(); + + private: + MasterImpl* master_; + std::shared_ptr tabletnode_manager_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr quota_entry_; + std::shared_ptr size_scheduler_; + std::shared_ptr load_scheduler_; + std::shared_ptr thread_pool_; + std::shared_ptr executor_; + std::shared_ptr tablet_availability_; + std::shared_ptr stat_table_; + static std::mutex meta_task_mutex_; + static std::queue meta_task_queue_; + static Counter sequence_id_; }; inline TeraMasterEnv& MasterEnv() { - static TeraMasterEnv master_env; - return master_env; + static TeraMasterEnv master_env; + return master_env; } - } } - diff --git a/src/master/master_flags.cc b/src/master/master_flags.cc index fcc0e2cca..e0f84c0d8 100644 --- a/src/master/master_flags.cc +++ b/src/master/master_flags.cc @@ -8,13 +8,17 @@ DEFINE_string(tera_master_port, "10000", "the master port of tera system"); DEFINE_int32(tera_master_connect_retry_times, 5, "the max retry times when connect to master"); -DEFINE_int32(tera_master_connect_retry_period, 1000, "the retry period (in ms) between two master connection"); -DEFINE_int32(tera_master_connect_timeout_period, 5000, "the timeout period (in ms) for each master connection"); -DEFINE_int32(tera_master_query_tabletnode_period, 10000, "the period (in ms) for query tabletnode status" ); -DEFINE_int32(tera_master_common_retry_period, 1000, "the period (in ms) for common operation" ); +DEFINE_int32(tera_master_connect_retry_period, 1000, + "the retry period (in ms) between two master connection"); +DEFINE_int32(tera_master_connect_timeout_period, 5000, + "the timeout period (in ms) for each master connection"); +DEFINE_int32(tera_master_query_tabletnode_period, 10000, + "the period (in ms) for query tabletnode status"); +DEFINE_int32(tera_master_common_retry_period, 1000, "the period (in ms) for common operation"); DEFINE_int32(tera_master_meta_retry_times, 5, "the max retry times when master read/write meta"); DEFINE_bool(tera_master_meta_recovery_enabled, false, "whether recovery meta tablet at startup"); -DEFINE_string(tera_master_meta_recovery_file, "../data/meta.bak", "path of meta table recovery file"); +DEFINE_string(tera_master_meta_recovery_file, "../data/meta.bak", + "path of meta table recovery file"); DEFINE_bool(tera_master_cache_check_enabled, true, "enable the periodic check & release cache"); DEFINE_int32(tera_master_cache_release_period, 180, "the period (in sec) to try release cache"); @@ -22,62 +26,114 @@ DEFINE_int32(tera_master_cache_keep_min, 512, "the minimal free cache size (in M DEFINE_int32(tera_master_thread_min_num, 1, "the min thread number of master server"); DEFINE_int32(tera_master_thread_max_num, 10, "the max thread number of master server"); -DEFINE_int32(tera_master_impl_thread_max_num, 20, "the max thread number for master impl operations"); -DEFINE_int32(tera_master_impl_query_thread_num, 20, "the thread number for master impl query tabletnodes"); -DEFINE_int32(tera_master_impl_retry_times, 5, "the max retry times when master impl operation fail"); - -DEFINE_double(tera_master_workload_merge_threshold, 1.0, "if workload(wwl) < 1.0, enable merge on this tablet"); -DEFINE_double(tera_master_workload_split_threshold, 9.9, "if workload(wwl) > 9.9, trigger split by workload"); +DEFINE_int32(tera_master_impl_thread_max_num, 20, + "the max thread number for master impl operations"); +DEFINE_int32(tera_master_impl_query_thread_num, 20, + "the thread number for master impl query tabletnodes"); +DEFINE_int32(tera_master_impl_retry_times, 5, + "the max retry times when master impl operation fail"); + +DEFINE_int64(tera_master_max_tablet_size_M, 16384, "max tablet size"); +DEFINE_int64(tera_master_disable_merge_ttl_s, 604800, + "master will enable merge after this time(s)"); +DEFINE_double(tera_master_workload_merge_threshold, 1.0, + "if workload(wwl) < 1.0, enable merge on this tablet"); +DEFINE_double(tera_master_workload_split_threshold, 9999, + "if workload(wwl) is higher then this value, trigger split by workload"); DEFINE_int64(tera_master_min_split_size, 64, "the size (in MB) of tablet to trigger split"); -DEFINE_double(tera_master_min_split_ratio, 0.5, "min ratio of split size of tablet schema to trigger split"); +DEFINE_double(tera_master_min_split_ratio, 0.5, + "min ratio of split size of tablet schema to trigger split"); DEFINE_int64(tera_master_split_history_time_interval, 600000, "minimal split time interval(ms)"); -DEFINE_string(tera_master_gc_strategy, "trackable", "gc strategy, [default, trackable]"); -DEFINE_int32(tera_master_max_split_concurrency, 1, "the max concurrency of tabletnode for split tablet"); -DEFINE_int32(tera_master_max_load_concurrency, 20, "the max concurrency of tabletnode for load tablet"); +DEFINE_int32(tera_master_max_split_concurrency, 1, + "the max concurrency of tabletnode for split tablet"); +DEFINE_int32(tera_master_max_load_concurrency, 20, + "the max concurrency of tabletnode for load tablet"); DEFINE_int32(tera_master_max_move_concurrency, 50, "the max concurrency for move tablet"); DEFINE_int32(tera_master_max_unload_concurrency, 50, "the max concurrency for unload tablet"); DEFINE_int32(tera_master_load_interval, 300, "the delay interval (in sec) for load tablet"); -DEFINE_int32(tera_master_schema_update_retry_period, 1, "the period (in second) to poll schema update"); -DEFINE_int32(tera_master_schema_update_retry_times, 60000, "the max retry times of syncing new schema to ts"); +DEFINE_int32(tera_master_schema_update_retry_period, 1, + "the period (in second) to poll schema update"); +DEFINE_int32(tera_master_schema_update_retry_times, 60000, + "the max retry times of syncing new schema to ts"); +DEFINE_int32(tera_master_write_meta_retry_times, 5, "the max retry times for write meta"); DEFINE_int32(tera_garbage_collect_debug_log, 0, "garbage collect debug log"); // load balance DEFINE_bool(tera_master_move_tablet_enabled, false, "enable master to auto move tablet"); -DEFINE_bool(tera_master_meta_isolate_enabled, false, "enable master to reserve a tabletnode for meta"); -DEFINE_bool(tera_master_load_balance_table_grained, true, "whether the load balance policy only consider the specified table"); -DEFINE_double(tera_master_load_balance_size_ratio_trigger, 1.2, "ratio of heaviest node size to lightest to trigger load balance"); -DEFINE_int32(tera_master_load_balance_ts_load_threshold, 1000000000, "threshold of one tabletnode in QPS load-balance decision"); -DEFINE_int64(tera_master_load_balance_ts_size_threshold, 0, "threshold of one tabletnode in Size load-balance decision"); +DEFINE_bool(tera_master_meta_isolate_enabled, false, + "enable master to reserve a tabletnode for meta"); +DEFINE_bool(tera_master_load_balance_table_grained, true, + "whether the load balance policy only consider the specified table"); +DEFINE_double(tera_master_load_balance_size_ratio_trigger, 1.2, + "ratio of heaviest node size to lightest to trigger load balance"); +DEFINE_int32(tera_master_load_balance_ts_load_threshold, 1000000000, + "threshold of one tabletnode in QPS load-balance decision"); +DEFINE_int64(tera_master_load_balance_ts_size_threshold, 0, + "threshold of one tabletnode in Size load-balance decision"); DEFINE_int32(tera_master_load_balance_scan_weight, 300, "scan weight in load-balance decision"); -DEFINE_double(tera_safemode_tablet_locality_ratio, 0.9, "the tablet locality ratio threshold of safemode"); +DEFINE_int32(safemode_ttl_minutes, 2880, "safemode ttl default value in minutes"); + +DEFINE_double(tera_safemode_tablet_locality_ratio, 0.9, + "the tablet locality ratio threshold of safemode"); DEFINE_bool(tera_master_kick_tabletnode_enabled, true, "enable master to kick tabletnode"); -DEFINE_int32(tera_master_kick_tabletnode_query_fail_times, 10, "the number of query fail to kick tabletnode"); -DEFINE_int32(tera_master_control_tabletnode_retry_period, 60000, "the retry period (in ms) for master control tabletnode"); +DEFINE_int32(tera_master_kick_tabletnode_query_fail_times, 10, + "the number of query fail to kick tabletnode"); +DEFINE_int32(tera_master_control_tabletnode_retry_period, 60000, + "the retry period (in ms) for master control tabletnode"); DEFINE_int32(tera_master_load_rpc_timeout, 60000, "the timeout period (in ms) for load rpc"); DEFINE_int32(tera_master_unload_rpc_timeout, 60000, "the timeout period (in ms) for unload rpc"); DEFINE_int32(tera_master_split_rpc_timeout, 120000, "the timeout period (in ms) for split rpc"); -DEFINE_int32(tera_master_tabletnode_timeout, 60000, "the timeout period (in ms) for move tablet after tabletnode down"); -DEFINE_int32(tera_master_collect_info_timeout, 3000, "the timeout period (in ms) for collect tabletnode info"); -DEFINE_int32(tera_master_collect_info_retry_period, 3000, "the retry period (in ms) for collect tabletnode info"); -DEFINE_int32(tera_master_collect_info_retry_times, 10, "the max retry times for collect tabletnode info"); -DEFINE_int32(tera_master_load_slow_retry_times, 60, "the max retry times when master load very slow tablet"); - -DEFINE_int32(tera_master_rpc_server_max_inflow, -1, "the max input flow (in MB/s) for master rpc-server, -1 means no limit"); -DEFINE_int32(tera_master_rpc_server_max_outflow, -1, "the max input flow (in MB/s) for master rpc-server, -1 means no limit"); +DEFINE_int32(tera_master_tabletnode_timeout, 60000, + "the timeout period (in ms) for move tablet after tabletnode down"); +DEFINE_int32(tera_master_collect_info_timeout, 3000, + "the timeout period (in ms) for collect tabletnode info"); +DEFINE_int32(tera_master_collect_info_retry_period, 3000, + "the retry period (in ms) for collect tabletnode info"); +DEFINE_int32(tera_master_collect_info_retry_times, 10, + "the max retry times for collect tabletnode info"); +DEFINE_int32(tera_master_load_slow_retry_times, 60, + "the max retry times when master load very slow tablet"); + +DEFINE_int32(tera_master_rpc_server_max_inflow, -1, + "the max input flow (in MB/s) for master rpc-server, -1 means no limit"); +DEFINE_int32(tera_master_rpc_server_max_outflow, -1, + "the max input flow (in MB/s) for master rpc-server, -1 means no limit"); DEFINE_int32(tera_max_pre_assign_tablet_num, 100000, "max num of pre-assign tablets per table"); -DEFINE_bool(tera_delete_obsolete_tabledir_enabled, true, "move table dir to trash when dropping table"); +DEFINE_bool(tera_delete_obsolete_tabledir_enabled, true, + "move table dir to trash when dropping table"); DEFINE_int32(tera_master_gc_period, 60000, "the period (in ms) for master gc"); DEFINE_bool(tera_master_gc_trash_enabled, true, "enable master gc trash"); DEFINE_int64(tera_master_gc_trash_clean_period_s, 3600, "period (in second) for clean gc trash"); -DEFINE_bool(tera_master_availability_check_enabled, true, "whether execute availability check"); // reload config safety -DEFINE_int64(tera_master_availability_check_period, 60, "the period (in s) of availability check"); // reload config safety +DEFINE_bool(tera_master_availability_check_enabled, true, + "whether execute availability check"); // reload config safety +DEFINE_int64(tera_master_availability_check_period, 60, + "the period (in s) of availability check"); // reload config safety DEFINE_bool(tera_master_update_split_meta, true, "[split] update child tablets meta from master"); - +DEFINE_int64(abnormal_node_check_period_s, 600, "period (in second) for abnormal node check"); +DEFINE_int32(abnormal_node_trigger_count, 3, "trigger count for abnormal node check"); +DEFINE_int64(abnormal_node_auto_recovery_period_s, 600, + "period (in second) for abnormal node's auto recovery"); +DEFINE_int64(delay_add_node_schedule_period_s, 60, + "schedule period (in second) for delay add abnormal node"); +DEFINE_string(tera_cluster_name, "anonymous", "name of tera cluster for prometheus query"); + +DEFINE_bool(tera_master_support_isomerism, false, "tera master support isomerism"); +DEFINE_int64(tera_master_dfs_write_bytes_quota_in_MB, -1, + "Total cluster dfs write quota, which will trigger slowdown write mode when exceeded"); +DEFINE_int64(tera_master_dfs_qps_quota, -1, + "Total cluster dfs qps quota(open,create,delete,close), which will trigger slowdown " + "write mode when exceeded"); + +DEFINE_int32(master_merge_procedure_limit, 10, "merge procedure limit"); +DEFINE_int32(master_split_procedure_limit, 10, "split procedure limit"); +DEFINE_int32(master_move_procedure_limit, 100, "move procedure limit"); +DEFINE_int32(master_load_procedure_limit, 300, "load procedure limit"); +DEFINE_int32(master_unload_procedure_limit, 100, "unload procedure limit"); diff --git a/src/master/master_impl.cc b/src/master/master_impl.cc index 2a6611a83..6c213fab2 100644 --- a/src/master/master_impl.cc +++ b/src/master/master_impl.cc @@ -13,12 +13,24 @@ #include #include +#include "common/timer.h" #include "db/filename.h" #include "io/io_utils.h" #include "io/utils_leveldb.h" #include "leveldb/status.h" +#include "master/create_table_procedure.h" +#include "master/delete_table_procedure.h" +#include "master/disable_table_procedure.h" +#include "master/enable_table_procedure.h" +#include "master/load_tablet_procedure.h" #include "master/master_zk_adapter.h" +#include "master/merge_tablet_procedure.h" +#include "master/move_tablet_procedure.h" +#include "master/split_tablet_procedure.h" +#include "master/unload_tablet_procedure.h" +#include "master/update_table_procedure.h" #include "master/workload_scheduler.h" +#include "master_env.h" #include "proto/kv_helper.h" #include "proto/master_client.h" #include "proto/proto_helper.h" @@ -27,19 +39,7 @@ #include "utils/config_utils.h" #include "utils/schema_utils.h" #include "utils/string_util.h" -#include "common/timer.h" #include "utils/utils_cmd.h" -#include "master_env.h" -#include "master/create_table_procedure.h" -#include "master/delete_table_procedure.h" -#include "master/disable_table_procedure.h" -#include "master/enable_table_procedure.h" -#include "master/load_tablet_procedure.h" -#include "master/move_tablet_procedure.h" -#include "master/merge_tablet_procedure.h" -#include "master/split_tablet_procedure.h" -#include "master/unload_tablet_procedure.h" -#include "master/update_table_procedure.h" DECLARE_string(tera_master_port); DECLARE_bool(tera_master_meta_recovery_enabled); @@ -53,6 +53,7 @@ DECLARE_int32(tera_master_impl_query_thread_num); DECLARE_int32(tera_master_impl_retry_times); DECLARE_int32(tera_master_query_tabletnode_period); +DECLARE_int32(tera_master_tabletnode_timeout); DECLARE_string(tera_master_meta_table_name); DECLARE_string(tera_master_meta_table_path); @@ -62,6 +63,8 @@ DECLARE_string(tera_coord_type); DECLARE_bool(tera_zk_enabled); DECLARE_bool(tera_mock_zk_enabled); +DECLARE_int64(tera_master_max_tablet_size_M); +DECLARE_int64(tera_master_disable_merge_ttl_s); DECLARE_double(tera_master_workload_split_threshold); DECLARE_double(tera_master_workload_merge_threshold); DECLARE_int64(tera_master_split_tablet_size); @@ -72,6 +75,7 @@ DECLARE_bool(tera_master_kick_tabletnode_enabled); DECLARE_int32(tera_master_kick_tabletnode_query_fail_times); DECLARE_double(tera_safemode_tablet_locality_ratio); +DECLARE_int32(safemode_ttl_minutes); DECLARE_int32(tera_master_collect_info_timeout); DECLARE_int32(tera_master_collect_info_retry_period); DECLARE_int32(tera_master_collect_info_retry_times); @@ -90,6 +94,7 @@ DECLARE_int64(tera_tablet_write_block_size); DECLARE_int32(tera_master_gc_period); DECLARE_bool(tera_master_gc_trash_enabled); DECLARE_int64(tera_master_gc_trash_clean_period_s); +DECLARE_int64(delay_add_node_schedule_period_s); DECLARE_string(tera_tabletnode_path_prefix); DECLARE_string(tera_leveldb_env_type); @@ -104,7 +109,6 @@ DECLARE_int64(tera_sdk_perf_counter_log_interval); DECLARE_bool(tera_acl_enabled); DECLARE_bool(tera_only_root_create_table); -DECLARE_string(tera_master_gc_strategy); DECLARE_string(flagfile); DECLARE_bool(tera_online_schema_update_enabled); @@ -115,14 +119,18 @@ DECLARE_int64(tera_master_availability_check_period); DECLARE_bool(tera_master_availability_check_enabled); DECLARE_bool(tera_stat_table_enabled); +DECLARE_string(tera_auth_policy); +DECLARE_int64(tera_master_dfs_write_bytes_quota_in_MB); +DECLARE_int64(tera_master_dfs_qps_quota); using namespace std::placeholders; namespace tera { namespace master { -MasterImpl::MasterImpl() - : status_(kNotInited), +MasterImpl::MasterImpl(const std::shared_ptr &access_entry, + const std::shared_ptr "a_entry) + : state_machine_(MasterStatus::kIsSecondary), thread_pool_(new ThreadPool(FLAGS_tera_master_impl_thread_max_num)), restored_(false), tablet_manager_(new TabletManager(&this_sequence_id_, this, thread_pool_.get())), @@ -144,1622 +152,1781 @@ MasterImpl::MasterImpl() gc_timer_id_(kInvalidTimerId), gc_query_enable_(false), executor_(new ProcedureExecutor), - tablet_availability_(new TabletAvailability(tablet_manager_)) { - if (FLAGS_tera_master_cache_check_enabled) { - EnableReleaseCacheTimer(); - } - if (FLAGS_tera_local_addr == "") { - local_addr_ = utils::GetLocalHostName()+ ":" + FLAGS_tera_master_port; - } else { - local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_master_port; - } - - if (FLAGS_tera_master_gc_strategy == "default") { - LOG(INFO) << "[gc] gc strategy is BatchGcStrategy"; - gc_strategy_ = std::shared_ptr(new BatchGcStrategy(tablet_manager_)); - } else if (FLAGS_tera_master_gc_strategy == "trackable") { - LOG(INFO) << "[gc] gc strategy is Trackable"; - } else { - LOG(ERROR) << "Unknown gc strategy: " << FLAGS_tera_master_gc_strategy - << ", exit"; - exit(EXIT_FAILURE); - } - executor_->Start(); - if (FLAGS_tera_stat_table_enabled) { - stat_table_.reset(new sdk::StatTable(thread_pool_.get(), - sdk::StatTableCustomer::kMaster, - local_addr_)); - } + tablet_availability_(new TabletAvailability(tablet_manager_)), + access_entry_(access_entry), + access_builder_(new auth::AccessBuilder(FLAGS_tera_auth_policy)), + quota_entry_(quota_entry), + abnormal_node_mgr_(new AbnormalNodeMgr()) { + if (FLAGS_tera_master_cache_check_enabled) { + EnableReleaseCacheTimer(); + } + if (FLAGS_tera_local_addr == "") { + local_addr_ = utils::GetLocalHostName() + ":" + FLAGS_tera_master_port; + } else { + local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_master_port; + } + + executor_->Start(); + access_builder_->Login(auth::kInternalGroup, "", nullptr); + if (FLAGS_tera_stat_table_enabled) { + stat_table_.reset(new sdk::StatTable(thread_pool_.get(), access_builder_, + sdk::StatTableCustomer::kMaster, local_addr_)); + } + if (!!quota_entry_) { + quota_entry_->SetTabletManager(tablet_manager_); + quota_entry_->SetTabletNodeManager(tabletnode_manager_); + quota_entry_->SetDfsWriteSizeQuota(FLAGS_tera_master_dfs_write_bytes_quota_in_MB << 20); + quota_entry_->SetDfsQpsQuota(FLAGS_tera_master_dfs_qps_quota); + } } MasterImpl::~MasterImpl() { - LOG(INFO) << "begin destory impl"; - executor_->Stop(); - executor_.reset(); - stat_table_.reset(); - zk_adapter_.reset(); - LOG(INFO) << "end destory impl"; + LOG(INFO) << "begin destory impl"; + executor_->Stop(); + executor_.reset(); + stat_table_.reset(); + zk_adapter_.reset(); + abnormal_node_mgr_.reset(); + LOG(INFO) << "end destory impl"; } bool MasterImpl::Init() { - if (FLAGS_tera_leveldb_env_type != "local") { - io::InitDfsEnv(); - } - if (FLAGS_tera_coord_type.empty()) { - LOG(ERROR) << "Note: We don't recommend that use '" - << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" - << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" - << " flag is usually recommended."; - } - if (FLAGS_tera_coord_type == "zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { - zk_adapter_.reset(new MasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { - LOG(INFO) << "ins mode" ; - zk_adapter_.reset(new InsMasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "mock_zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { - LOG(INFO) << "mock zk mode" ; - zk_adapter_.reset(new MockMasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "mock_ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { - LOG(INFO) << "mock ins mode" ; - zk_adapter_.reset(new MockInsMasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "fake_zk" - || FLAGS_tera_coord_type.empty()) { - LOG(INFO) << "fake zk mode!"; - zk_adapter_.reset(new FakeMasterZkAdapter(this, local_addr_)); - } - - MasterEnv().Init(this, - tabletnode_manager_, - tablet_manager_, - size_scheduler_, - load_scheduler_, - thread_pool_, - executor_, - tablet_availability_, - stat_table_); - - LOG(INFO) << "[acl] " << (FLAGS_tera_acl_enabled ? "enabled" : "disabled"); - SetMasterStatus(kIsSecondary); - thread_pool_->AddTask(std::bind(&MasterImpl::InitAsync, this)); - return true; + if (FLAGS_tera_leveldb_env_type != "local") { + io::InitDfsEnv(); + } + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '" + << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster " + "coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" + << " flag is usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + zk_adapter_.reset(new MasterZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + LOG(INFO) << "ins mode"; + zk_adapter_.reset(new InsMasterZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "mock_zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { + LOG(INFO) << "mock zk mode"; + zk_adapter_.reset(new MockMasterZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "mock_ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { + LOG(INFO) << "mock ins mode"; + zk_adapter_.reset(new MockInsMasterZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "fake_zk" || FLAGS_tera_coord_type.empty()) { + LOG(INFO) << "fake zk mode!"; + zk_adapter_.reset(new FakeMasterZkAdapter(this, local_addr_)); + } + + MasterEnv().Init(this, tabletnode_manager_, tablet_manager_, access_builder_, quota_entry_, + size_scheduler_, load_scheduler_, thread_pool_, executor_, tablet_availability_, + stat_table_); + + LOG(INFO) << "[acl] " << (FLAGS_tera_acl_enabled ? "enabled" : "disabled"); + thread_pool_->AddTask(std::bind(&MasterImpl::InitAsync, this)); + return true; } void MasterImpl::InitAsync() { - std::string meta_tablet_addr; - std::map tabletnode_list; - bool safe_mode = false; + std::string meta_tablet_addr; + std::map tabletnode_list; + bool safe_mode = false; - // Make sure tabletnode_list will not change - // during restore process. - MutexLock lock(&tabletnode_mutex_); + // Make sure tabletnode_list will not change + // during restore process. + MutexLock lock(&tabletnode_mutex_); - while (!zk_adapter_->Init(&meta_tablet_addr, &tabletnode_list, - &safe_mode)) { - LOG(ERROR) << kSms << "zookeeper error, please check!"; - } + while (!zk_adapter_->Init(&meta_tablet_addr, &tabletnode_list, &safe_mode)) { + LOG(ERROR) << kSms << "zookeeper error, please check!"; + } - Restore(tabletnode_list); -} + DoStateTransition(MasterEvent::kGetMasterLock); -bool MasterImpl::Restore(const std::map& tabletnode_list) { - tabletnode_mutex_.AssertHeld(); - CHECK(!restored_); + Restore(tabletnode_list); - if (tabletnode_list.size() == 0) { - SetMasterStatus(kOnWait); - LOG(ERROR) << kSms << "no available tabletnode"; - return false; - } + ScheduleDelayAddNode(); +} + +bool MasterImpl::Restore(const std::map &tabletnode_list) { + tabletnode_mutex_.AssertHeld(); + CHECK(!restored_); - SetMasterStatus(kOnRestore); + if (tabletnode_list.size() == 0) { + DoStateTransition(MasterEvent::kNoAvailTs); + LOG(ERROR) << kSms << "no available tabletnode"; + return false; + } - std::vector tablet_list; - CollectAllTabletInfo(tabletnode_list, &tablet_list); + std::vector tablet_list; + CollectAllTabletInfo(tabletnode_list, &tablet_list); - if (!RestoreMetaTablet(tablet_list)) { - SetMasterStatus(kOnWait); - return false; - } + if (!RestoreMetaTablet(tablet_list)) { + DoStateTransition(MasterEvent::kNoAvailTs); + return false; + } - SetMasterStatus(kIsReadonly); + DoStateTransition(MasterEvent::kMetaRestored); - user_manager_->SetupRootUser(); + user_manager_->SetupRootUser(); - RestoreUserTablet(tablet_list); + RestoreUserTablet(tablet_list); - TryLeaveSafeMode(); - EnableAvailabilityCheck(); - RefreshTableCounter(); + TryLeaveSafeMode(); + EnableAvailabilityCheck(); + RefreshTableCounter(); - // restore success - restored_ = true; - return true; + // restore success + restored_ = true; + return true; } -void MasterImpl::CollectAllTabletInfo(const std::map& tabletnode_list, - std::vector* tablet_list) { - Mutex mutex; - sem_t finish_counter; - sem_init(&finish_counter, 0, 0); - tablet_list->clear(); - uint32_t tabletnode_num = tabletnode_list.size(); - std::map::const_iterator it = tabletnode_list.begin(); - for (; it != tabletnode_list.end(); ++it) { - const std::string& addr = it->first; - const std::string& uuid = it->second; - tabletnode_manager_->AddTabletNode(addr, uuid); - - QueryClosure done = - std::bind(&MasterImpl::CollectTabletInfoCallback, this, addr, - tablet_list, &finish_counter, &mutex, _1, _2, _3, _4); - QueryTabletNodeAsync(addr, FLAGS_tera_master_collect_info_timeout, false, done); - } - - uint32_t i = 0; - while (i++ < tabletnode_num) { - sem_wait(&finish_counter); - } - sem_destroy(&finish_counter); -} - -bool MasterImpl::RestoreMetaTablet(const std::vector& tablet_list) { - //std::string* meta_tablet_addr) { - // find the unique loaded complete meta tablet - // if meta_tablet is loaded by more than one tabletnode, unload them all - // if meta_tablet is incomplete (not from "" to ""), unload it - bool loaded_twice = false; - bool loaded = false; - TabletMeta meta_tablet_meta; - std::vector::const_iterator it = tablet_list.begin(); - for (; it != tablet_list.end(); ++it) { - StatusCode status = kTabletNodeOk; - const TabletMeta& meta = *it; - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - const std::string& key_start = meta.key_range().key_start(); - const std::string& key_end = meta.key_range().key_end(); - if (loaded_twice) { - if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, - key_start, key_end, meta.server_addr(), - &status)) { - TryKickTabletNode(meta.server_addr()); - } - } else if (!key_start.empty() || !key_end.empty()) { - // unload incomplete meta tablet - if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, - key_start, key_end, meta.server_addr(), - &status)) { - TryKickTabletNode(meta.server_addr()); - } - } else if (loaded) { - // more than one meta tablets are loaded - loaded_twice = true; - if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, - key_start, key_end, meta.server_addr(), - &status)) { - TryKickTabletNode(meta.server_addr()); - } - if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, "", "", - meta_tablet_meta.server_addr(), &status)) { - TryKickTabletNode(meta.server_addr()); - } - } else { - loaded = true; - meta_tablet_meta.CopyFrom(meta); - } - } - } - std::string meta_tablet_addr; - if (loaded && !loaded_twice) { - meta_tablet_addr.assign(meta_tablet_meta.server_addr()); - } else if (!LoadMetaTablet(&meta_tablet_addr)) { - return false; - } - // meta table has been loaded up by now - if (FLAGS_tera_master_meta_recovery_enabled) { - const std::string& filename = FLAGS_tera_master_meta_recovery_file; - while (!LoadMetaTableFromFile(filename)) { - LOG(ERROR) << kSms << "fail to recovery meta table from backup"; - ThisThread::Sleep(60 * 1000); - } - // load MetaTablet, clear all data in MetaTablet and dump current memory snapshot to MetaTable - while (!tablet_manager_->ClearMetaTable(meta_tablet_addr) - || !tablet_manager_->DumpMetaTable(meta_tablet_addr)) { - TryKickTabletNode(meta_tablet_addr); - if (!LoadMetaTablet(&meta_tablet_addr)) { - return false; - } - } - TabletNodePtr meta_node = tabletnode_manager_->FindTabletNode(meta_tablet_addr, NULL); - meta_tablet_ = tablet_manager_->AddMetaTablet(meta_node, zk_adapter_); - LOG(INFO) << "recovery meta table from backup file success"; - return true; - } +void MasterImpl::CollectAllTabletInfo(const std::map &tabletnode_list, + std::vector *tablet_list) { + Mutex mutex; + sem_t finish_counter; + sem_init(&finish_counter, 0, 0); + tablet_list->clear(); + uint32_t tabletnode_num = tabletnode_list.size(); + std::map::const_iterator it = tabletnode_list.begin(); + for (; it != tabletnode_list.end(); ++it) { + const std::string &addr = it->first; + const std::string &uuid = it->second; + tabletnode_manager_->AddTabletNode(addr, uuid); + QueryClosure done = std::bind(&MasterImpl::CollectTabletInfoCallback, this, addr, tablet_list, + &finish_counter, &mutex, _1, _2, _3, _4); + QueryTabletNodeAsync(addr, FLAGS_tera_master_collect_info_timeout, false, done); + } + + uint32_t i = 0; + while (i++ < tabletnode_num) { + sem_wait(&finish_counter); + } + sem_destroy(&finish_counter); +} + +bool MasterImpl::RestoreMetaTablet(const std::vector &tablet_list) { + // std::string* meta_tablet_addr) { + // find the unique loaded complete meta tablet + // if meta_tablet is loaded by more than one tabletnode, unload them all + // if meta_tablet is incomplete (not from "" to ""), unload it + bool loaded_twice = false; + bool loaded = false; + TabletMeta meta_tablet_meta; + std::vector::const_iterator it = tablet_list.begin(); + for (; it != tablet_list.end(); ++it) { StatusCode status = kTabletNodeOk; - while (!LoadMetaTable(meta_tablet_addr, &status)) { - TryKickTabletNode(meta_tablet_addr); - if (!LoadMetaTablet(&meta_tablet_addr)) { - return false; - } + const TabletMeta &meta = *it; + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + const std::string &key_start = meta.key_range().key_start(); + const std::string &key_end = meta.key_range().key_end(); + if (loaded_twice) { + if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, key_start, key_end, + meta.server_addr(), &status)) { + TryKickTabletNode(meta.server_addr()); + } + } else if (!key_start.empty() || !key_end.empty()) { + // unload incomplete meta tablet + if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, key_start, key_end, + meta.server_addr(), &status)) { + TryKickTabletNode(meta.server_addr()); + } + } else if (loaded) { + // more than one meta tablets are loaded + loaded_twice = true; + if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, key_start, key_end, + meta.server_addr(), &status)) { + TryKickTabletNode(meta.server_addr()); + } + if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, "", "", + meta_tablet_meta.server_addr(), &status)) { + TryKickTabletNode(meta.server_addr()); + } + } else { + loaded = true; + meta_tablet_meta.CopyFrom(meta); + } + } + } + std::string meta_tablet_addr; + if (loaded && !loaded_twice) { + meta_tablet_addr.assign(meta_tablet_meta.server_addr()); + } else if (!LoadMetaTablet(&meta_tablet_addr)) { + return false; + } + // meta table has been loaded up by now + if (FLAGS_tera_master_meta_recovery_enabled) { + const std::string &filename = FLAGS_tera_master_meta_recovery_file; + while (!LoadMetaTableFromFile(filename)) { + LOG(ERROR) << kSms << "fail to recovery meta table from backup"; + ThisThread::Sleep(60 * 1000); + } + // load MetaTablet, clear all data in MetaTablet and dump current memory + // snapshot to MetaTable + while (!tablet_manager_->ClearMetaTable(meta_tablet_addr) || + !tablet_manager_->DumpMetaTable(meta_tablet_addr)) { + TryKickTabletNode(meta_tablet_addr); + if (!LoadMetaTablet(&meta_tablet_addr)) { + return false; + } } + TabletNodePtr meta_node = tabletnode_manager_->FindTabletNode(meta_tablet_addr, NULL); + meta_tablet_ = tablet_manager_->AddMetaTablet(meta_node, zk_adapter_); + LOG(INFO) << "recovery meta table from backup file success"; return true; -} + } + + StatusCode status = kTabletNodeOk; + while (!LoadMetaTable(meta_tablet_addr, &status)) { + TryKickTabletNode(meta_tablet_addr); + if (!LoadMetaTablet(&meta_tablet_addr)) { + return false; + } + } + return true; +} + +void MasterImpl::RestoreUserTablet(const std::vector &report_meta_list) { + std::vector::const_iterator meta_it = report_meta_list.begin(); + std::set disabled_tables; + for (; meta_it != report_meta_list.end(); ++meta_it) { + const TabletMeta &meta = *meta_it; + const std::string &table_name = meta.table_name(); + if (table_name == FLAGS_tera_master_meta_table_name) { + continue; + } + const std::string &key_start = meta.key_range().key_start(); + const std::string &key_end = meta.key_range().key_end(); + const std::string &path = meta.path(); + const std::string &server_addr = meta.server_addr(); + TabletNodePtr node = tabletnode_manager_->FindTabletNode(meta.server_addr(), NULL); + CompactStatus compact_status = meta.compact_status(); + TabletMeta::TabletStatus status = meta.status(); -void MasterImpl::RestoreUserTablet(const std::vector& report_meta_list) { - std::vector::const_iterator meta_it = report_meta_list.begin(); - std::set disabled_tables; - for (; meta_it != report_meta_list.end(); ++meta_it) { - const TabletMeta& meta = *meta_it; - const std::string& table_name = meta.table_name(); - if (table_name == FLAGS_tera_master_meta_table_name) { - continue; - } - const std::string& key_start = meta.key_range().key_start(); - const std::string& key_end = meta.key_range().key_end(); - const std::string& path = meta.path(); - const std::string& server_addr = meta.server_addr(); - TabletNodePtr node = tabletnode_manager_->FindTabletNode(meta.server_addr(), NULL); - CompactStatus compact_status = meta.compact_status(); - TabletMeta::TabletStatus status = meta.status(); - - TabletPtr tablet; - if (!tablet_manager_->FindTablet(table_name, key_start, &tablet) - || !tablet->Verify(table_name, key_start, key_end, path, server_addr)) { - LOG(INFO) << "unload unexpected table: " << path << ", server: " - << server_addr; - TabletMeta unknown_meta = meta; - unknown_meta.set_status(TabletMeta::kTabletUnloading); - TabletPtr unknown_tablet(new Tablet(unknown_meta)); - unknown_tablet->AssignTabletNode(node); - TryUnloadTablet(unknown_tablet); - } else { - tablet->AssignTabletNode(node); - tablet->SetStatus(TabletMeta::kTabletReady); - // tablets of a table may be partially disabled before master deaded, so we need try disable - // the table once more on master restarted - if (tablet->GetTable()->GetStatus() == kTableDisable) { - disabled_tables.insert(tablet->GetTable()); - continue; - } - tablet->UpdateSize(meta); - tablet->SetCompactStatus(compact_status); - // if the actual status of a tablet reported by ts is unloading, try move it to make sure it be loaded finally - if (status == TabletMeta::kTabletUnloading || status == TabletMeta::kTabletUnloading2) { - TryMoveTablet(tablet); - continue; - } - } + TabletPtr tablet; + if (!tablet_manager_->FindTablet(table_name, key_start, &tablet) || + !tablet->Verify(table_name, key_start, key_end, path, server_addr)) { + LOG(INFO) << "unload unexpected table: " << path << ", server: " << server_addr; + TabletMeta unknown_meta = meta; + unknown_meta.set_status(TabletMeta::kTabletReady); + TabletPtr unknown_tablet(new UnknownTablet(unknown_meta)); + BindTabletToTabletNode(unknown_tablet, node); + TryUnloadTablet(unknown_tablet); + } else { + BindTabletToTabletNode(tablet, node); + // tablets of a table may be partially disabled before master deaded, so + // we need try disable + // the table once more on master restarted + if (tablet->GetTable()->GetStatus() == kTableDisable) { + disabled_tables.insert(tablet->GetTable()); + continue; + } + tablet->UpdateSize(meta); + tablet->SetCompactStatus(compact_status); + // if the actual status of a tablet reported by ts is unloading, try move + // it to make sure it be loaded finally + if (status == TabletMeta::kTabletUnloading || status == TabletMeta::kTabletUnloading2) { + tablet->SetStatus(TabletMeta::kTabletReady); + TryMoveTablet(tablet); + continue; + } + if (status == TabletMeta::kTabletReady) { + tablet->SetStatus(TabletMeta::kTabletReady); + } + // treat kTabletLoading reported from TS as kTabletOffline, thus we will + // try to load it in subsequent lines + } + } + + std::vector all_tablet_list; + tablet_manager_->ShowTable(NULL, &all_tablet_list); + std::vector::iterator it; + for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->GetTableName() == FLAGS_tera_master_meta_table_name) { + continue; + } + // there may exists in transition tablets here as we may have a + // MoveTabletProcedure for it + // if its reported status is unloading + if (tablet->InTransition()) { + LOG(WARNING) << "give up restore in transition tablet, tablet: " << tablet; + continue; + } + const std::string &server_addr = tablet->GetServerAddr(); + if (tablet->GetStatus() == TabletMeta::kTabletReady) { + VLOG(8) << "READY Tablet, " << tablet; + continue; + } + if (tablet->GetStatus() != TabletMeta::kTabletOffline) { + LOG(ERROR) << kSms << "tablet " << tablet + << ", unexpected status: " << StatusCodeToString(tablet->GetStatus()); + continue; + } + if (tablet->GetTable()->GetStatus() == kTableDisable) { + disabled_tables.insert(tablet->GetTable()); + continue; } - std::vector all_tablet_list; - tablet_manager_->ShowTable(NULL, &all_tablet_list); - std::vector::iterator it; - for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->GetTableName() == FLAGS_tera_master_meta_table_name) { - continue; - } - // there may exists in transition tablets here as we may have a MoveTabletProcedure for it - // if its reported status is unloading - if (tablet->InTransition()) { - LOG(WARNING) << "give up restore in transition tablet, tablet: " << tablet; - continue; - } - const std::string& server_addr = tablet->GetServerAddr(); - if (tablet->GetStatus() == TabletMeta::kTabletReady) { - VLOG(8) << "READY Tablet, " << tablet; - continue; - } - if (tablet->GetStatus() != TabletMeta::kTabletOffline) { - LOG(ERROR) << kSms << "tablet " << tablet - << ", unexpected status: " << StatusCodeToString(tablet->GetStatus()); - continue; - } - if (tablet->GetTable()->GetStatus() == kTableDisable) { - disabled_tables.insert(tablet->GetTable()); - continue; - } - - TabletNodePtr node; - if (server_addr.empty()) { - VLOG(8) << "OFFLINE Tablet with empty addr, " << tablet; - } else if (!tabletnode_manager_->FindTabletNode(server_addr, &node)) { - VLOG(8) << "OFFLINE Tablet of Dead TS, " << tablet; - } else if (node->state_ == kReady) { - VLOG(8) << "OFFLINE Tablet of Alive TS, " << tablet; - TryLoadTablet(tablet, node); - } else { - // Ts not response, we count its tablets as Ready and wait for it to be kicked. - tablet->SetStatus(TabletMeta::kTabletReady); - VLOG(8) << "UNKNOWN Tablet of No-Response TS, " << tablet; - } + TabletNodePtr node; + if (server_addr.empty()) { + VLOG(8) << "OFFLINE Tablet with empty addr, " << tablet; + } else if (!tabletnode_manager_->FindTabletNode(server_addr, &node)) { + VLOG(8) << "OFFLINE Tablet of Dead TS, " << tablet; + } else if (node->state_ == kReady) { + VLOG(8) << "OFFLINE Tablet of Alive TS, " << tablet; + TryLoadTablet(tablet, node); + } else { + // Ts not response, try load it + TryLoadTablet(tablet, node); + VLOG(8) << "UNKNOWN Tablet of No-Response TS, try load it" << tablet; } - for (auto& table : disabled_tables) { - if (table->LockTransition()) { - DisableAllTablets(table); - } + } + for (auto &table : disabled_tables) { + if (table->LockTransition()) { + DisableAllTablets(table); } + } } void MasterImpl::DisableAllTablets(TablePtr table) { - std::vector tablet_meta_list; - table->GetTablet(&tablet_meta_list); - int in_transition_tablet_cnt = 0; - for (uint32_t i = 0; i < tablet_meta_list.size(); ++i) { - TabletPtr tablet = tablet_meta_list[i]; - if (tablet->GetStatus() == TabletMeta::kTabletDisable) { - continue; - } - if (tablet->LockTransition()) { - if (tablet->GetStatus() == TabletMeta::kTabletOffline) { - tablet->DoStateTransition(TabletEvent::kTableDisable); - tablet->UnlockTransition(); - continue; - } - tablet->UnlockTransition(); - if (TryUnloadTablet(tablet)) { - in_transition_tablet_cnt++; - } - } - else { - in_transition_tablet_cnt++; - } - } - VLOG(23) << "table: " << table->GetTableName() << ", in transition num: " << in_transition_tablet_cnt; - if (in_transition_tablet_cnt == 0) { - table->UnlockTransition(); - return; + std::vector tablet_meta_list; + table->GetTablet(&tablet_meta_list); + int in_transition_tablet_cnt = 0; + for (uint32_t i = 0; i < tablet_meta_list.size(); ++i) { + TabletPtr tablet = tablet_meta_list[i]; + if (tablet->GetStatus() == TabletMeta::kTabletDisable) { + continue; + } + if (tablet->LockTransition()) { + if (tablet->GetStatus() == TabletMeta::kTabletOffline) { + tablet->DoStateTransition(TabletEvent::kTableDisable); + tablet->UnlockTransition(); + continue; + } + tablet->UnlockTransition(); + if (TryUnloadTablet(tablet)) { + in_transition_tablet_cnt++; + } + } else { + in_transition_tablet_cnt++; } - ThreadPool::Task task = std::bind(&MasterImpl::DisableAllTablets, this, table); - thread_pool_->DelayTask(500, task); // magic number 500ms -} - -bool MasterImpl::LoadMetaTablet(std::string* server_addr) { - TabletMeta meta; - meta.set_table_name(FLAGS_tera_master_meta_table_name); - meta.set_path(FLAGS_tera_master_meta_table_path); - meta.mutable_key_range()->set_key_start(""); - meta.mutable_key_range()->set_key_end(""); - TableSchema schema; - schema.set_name(FLAGS_tera_master_meta_table_name); - schema.set_kv_only(true); - LocalityGroupSchema* lg_schema = schema.add_locality_groups(); - lg_schema->set_compress_type(false); - lg_schema->set_store_type(MemoryStore); - - TabletNodePtr node; - while (tabletnode_manager_->ScheduleTabletNode(size_scheduler_.get(), "", - false, &node)) { - *server_addr = node->GetAddr(); - meta.set_server_addr(*server_addr); - StatusCode status = kTabletNodeOk; - if (LoadTabletSync(meta, schema, &status)) { - LOG(INFO) << "load meta tablet on node: " << *server_addr; - return true; - } - LOG(ERROR) << "fail to load meta tablet on node: " << *server_addr - << ", status: " << StatusCodeToString(status); - TryKickTabletNode(*server_addr); + } + VLOG(23) << "table: " << table->GetTableName() + << ", in transition num: " << in_transition_tablet_cnt; + if (in_transition_tablet_cnt == 0) { + table->UnlockTransition(); + return; + } + ThreadPool::Task task = std::bind(&MasterImpl::DisableAllTablets, this, table); + thread_pool_->DelayTask(500, task); // magic number 500ms +} + +bool MasterImpl::LoadMetaTablet(std::string *server_addr) { + TabletMeta meta; + meta.set_table_name(FLAGS_tera_master_meta_table_name); + meta.set_path(FLAGS_tera_master_meta_table_path); + meta.mutable_key_range()->set_key_start(""); + meta.mutable_key_range()->set_key_end(""); + TableSchema schema; + schema.set_name(FLAGS_tera_master_meta_table_name); + schema.set_kv_only(true); + LocalityGroupSchema *lg_schema = schema.add_locality_groups(); + lg_schema->set_compress_type(false); + lg_schema->set_store_type(MemoryStore); + + TabletNodePtr node; + TabletPtr tablet(new Tablet( + meta, TablePtr(new Table(FLAGS_tera_master_meta_table_name, schema, kTableEnable)))); + while (tabletnode_manager_->ScheduleTabletNode(size_scheduler_.get(), "", tablet, false, &node)) { + *server_addr = node->GetAddr(); + meta.set_server_addr(*server_addr); + StatusCode status = kTabletNodeOk; + if (LoadTabletSync(meta, schema, &status)) { + LOG(INFO) << "load meta tablet on node: " << *server_addr; + return true; } - LOG(ERROR) << "no live node to load meta tablet"; - return false; + LOG(ERROR) << "fail to load meta tablet on node: " << *server_addr + << ", status: " << StatusCodeToString(status); + TryKickTabletNode(*server_addr); + } + LOG(ERROR) << "no live node to load meta tablet"; + return false; } -void MasterImpl::UnloadMetaTablet(const std::string& server_addr) { - StatusCode status = kTabletNodeOk; - if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, "", "", server_addr, - &status)) { - LOG(ERROR) << "fail to unload meta tablet on node: " << server_addr; - TryKickTabletNode(server_addr); - } +void MasterImpl::UnloadMetaTablet(const std::string &server_addr) { + StatusCode status = kTabletNodeOk; + if (!UnloadTabletSync(FLAGS_tera_master_meta_table_name, "", "", server_addr, &status)) { + LOG(ERROR) << "fail to unload meta tablet on node: " << server_addr; + TryKickTabletNode(server_addr); + } } -bool MasterImpl::IsRootUser(const std::string& token) { - return user_manager_->UserNameToToken("root") == token; +bool MasterImpl::IsRootUser(const std::string &token) { + return user_manager_->UserNameToToken("root") == token; } // user is admin or user is in admin_group -bool MasterImpl::CheckUserPermissionOnTable(const std::string& token, TablePtr table) { - std::string group_name = table->GetSchema().admin_group(); - std::string user_name = user_manager_->TokenToUserName(token); - return (user_manager_->IsUserInGroup(user_name, group_name) - || (table->GetSchema().admin() == user_manager_->TokenToUserName(token))); -} - -bool MasterImpl::LoadMetaTable(const std::string& meta_tablet_addr, - StatusCode* ret_status) { - tablet_manager_->ClearTableList(); - ScanTabletRequest request; - ScanTabletResponse response; - request.set_sequence_id(this_sequence_id_.Inc()); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(""); +bool MasterImpl::CheckUserPermissionOnTable(const std::string &token, TablePtr table) { + std::string group_name = table->GetSchema().admin_group(); + std::string user_name = user_manager_->TokenToUserName(token); + return (user_manager_->IsUserInGroup(user_name, group_name) || + (table->GetSchema().admin() == user_manager_->TokenToUserName(token))); +} + +bool MasterImpl::LoadMetaTable(const std::string &meta_tablet_addr, StatusCode *ret_status) { + tablet_manager_->ClearTableList(); + ScanTabletRequest request; + ScanTabletResponse response; + request.set_sequence_id(this_sequence_id_.Inc()); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(""); + request.set_end(""); + TabletNodePtr meta_node = tabletnode_manager_->FindTabletNode(meta_tablet_addr, NULL); + meta_tablet_ = tablet_manager_->AddMetaTablet(meta_node, zk_adapter_); + access_builder_->BuildInternalGroupRequest(&request); + tabletnode::TabletNodeClient meta_node_client(thread_pool_.get(), meta_tablet_addr); + while (meta_node_client.ScanTablet(&request, &response)) { + if (response.status() != kTabletNodeOk) { + SetStatusCode(response.status(), ret_status); + LOG(ERROR) << "fail to load meta table: " << StatusCodeToString(response.status()); + tablet_manager_->ClearTableList(); + return false; + } + if (response.results().key_values_size() <= 0) { + LOG(INFO) << "load meta table success"; + return true; + } + uint32_t record_size = response.results().key_values_size(); + LOG(INFO) << "load meta table: " << record_size << " records"; + + std::string last_record_key; + for (uint32_t i = 0; i < record_size; i++) { + const KeyValuePair &record = response.results().key_values(i); + last_record_key = record.key(); + char first_key_char = record.key()[0]; + if (first_key_char == '~') { + user_manager_->LoadUserMeta(record.key(), record.value()); + } else if (first_key_char == '|') { + if (record.key().length() < 2) { + LOG(ERROR) << "multi tenancy meta key format wrong [key : " << record.key() + << ", value : " << record.value() << "]"; + continue; + } + char second_key_char = record.key()[1]; + if (second_key_char == '0') { + /* The auth data stores in meta_table + * |00User => Passwd, [role1, role2, ...] + * |01role1 => [Permission1, Permission2, ...] + */ + access_entry_->GetAccessUpdater().AddRecord(record.key(), record.value()); + } else if (second_key_char == '1') { + // The quota data stores in meta_table + // |10TableName => TableQuota (pb format) + quota_entry_->AddRecord(record.key(), record.value()); + } else { + LOG(ERROR) << "multi tenancy meta key format wrong [key : " << record.key() + << ", value : " << record.value() << "]"; + continue; + } + } else if (first_key_char == '@') { + tablet_manager_->LoadTableMeta(record.key(), record.value()); + } else if (first_key_char > '@') { + tablet_manager_->LoadTabletMeta(record.key(), record.value()); + } else { + continue; + } + } + std::string next_record_key = NextKey(last_record_key); + request.set_start(next_record_key); request.set_end(""); - tabletnode::TabletNodeClient meta_node_client(thread_pool_.get(), meta_tablet_addr); - while (meta_node_client.ScanTablet(&request, &response)) { - if (response.status() != kTabletNodeOk) { - SetStatusCode(response.status(), ret_status); - LOG(ERROR) << "fail to load meta table: " - << StatusCodeToString(response.status()); - tablet_manager_->ClearTableList(); - return false; - } - if (response.results().key_values_size() <= 0) { - LOG(INFO) << "load meta table success"; - TabletNodePtr meta_node = tabletnode_manager_->FindTabletNode(meta_tablet_addr, NULL); - meta_tablet_ = tablet_manager_->AddMetaTablet(meta_node, zk_adapter_); - return true; - } - uint32_t record_size = response.results().key_values_size(); - LOG(INFO) << "load meta table: " << record_size << " records"; - - std::string last_record_key; - for (uint32_t i = 0; i < record_size; i++) { - const KeyValuePair& record = response.results().key_values(i); - last_record_key = record.key(); - char first_key_char = record.key()[0]; - if (first_key_char == '~') { - user_manager_->LoadUserMeta(record.key(), record.value()); - } else if (first_key_char == '@') { - tablet_manager_->LoadTableMeta(record.key(), record.value()); - } else if (first_key_char > '@') { - tablet_manager_->LoadTabletMeta(record.key(), record.value()); - } else { - continue; - } - } - std::string next_record_key = NextKey(last_record_key); - request.set_start(next_record_key); - request.set_end(""); - request.set_sequence_id(this_sequence_id_.Inc()); - response.Clear(); - } - SetStatusCode(kRPCError, ret_status); - LOG(ERROR) << "fail to load meta table: " << StatusCodeToString(kRPCError); - tablet_manager_->ClearTableList(); + request.set_sequence_id(this_sequence_id_.Inc()); + response.Clear(); + } + SetStatusCode(kRPCError, ret_status); + LOG(ERROR) << "fail to load meta table: " << StatusCodeToString(kRPCError); + tablet_manager_->ClearTableList(); + return false; +} + +bool MasterImpl::LoadMetaTableFromFile(const std::string &filename, StatusCode *ret_status) { + tablet_manager_->ClearTableList(); + std::ifstream ifs(filename.c_str(), std::ofstream::binary); + if (!ifs.is_open()) { + LOG(ERROR) << "fail to open file " << filename << " for read"; + SetStatusCode(kIOError, ret_status); return false; -} - -bool MasterImpl::LoadMetaTableFromFile(const std::string& filename, - StatusCode* ret_status) { - tablet_manager_->ClearTableList(); - std::ifstream ifs(filename.c_str(), std::ofstream::binary); - if (!ifs.is_open()) { - LOG(ERROR) << "fail to open file " << filename << " for read"; - SetStatusCode(kIOError, ret_status); - return false; + } + + uint64_t count = 0; + std::string key, value; + while (ReadFromStream(ifs, &key, &value)) { + if (key.empty()) { + return true; + } + + char first_key_char = key[0]; + if (first_key_char == '~') { + user_manager_->LoadUserMeta(key, value); + } else if (first_key_char == '|') { + // user&passwd&role&permission + } else if (first_key_char == '@') { + tablet_manager_->LoadTableMeta(key, value); + } else if (first_key_char > '@') { + tablet_manager_->LoadTabletMeta(key, value); + } else { + continue; } - uint64_t count = 0; - std::string key, value; - while (ReadFromStream(ifs, &key, &value)) { - if (key.empty()) { - return true; - } - - char first_key_char = key[0]; - if (first_key_char == '~') { - user_manager_->LoadUserMeta(key, value); - } else if (first_key_char == '@') { - tablet_manager_->LoadTableMeta(key, value); - } else if (first_key_char > '@') { - tablet_manager_->LoadTabletMeta(key, value); - } else { - continue; - } - - count++; - } - tablet_manager_->ClearTableList(); - SetStatusCode(kIOError, ret_status); - LOG(ERROR) << "fail to load meta table: " << StatusCodeToString(kIOError); - return false; + count++; + } + tablet_manager_->ClearTableList(); + SetStatusCode(kIOError, ret_status); + LOG(ERROR) << "fail to load meta table: " << StatusCodeToString(kIOError); + return false; } -bool MasterImpl::ReadFromStream(std::ifstream& ifs, - std::string* key, - std::string* value) { - uint32_t key_size = 0, value_size = 0; - ifs.read((char*)&key_size, sizeof(key_size)); - if (ifs.eof() && ifs.gcount() == 0) { - key->clear(); - value->clear(); - return true; - } - key->resize(key_size); - ifs.read((char*)key->data(), key_size); - if (ifs.fail()) { - return false; - } - ifs.read((char*)&value_size, sizeof(value_size)); - if (ifs.fail()) { - return false; - } - value->resize(value_size); - ifs.read((char*)value->data(), value_size); - if (ifs.fail()) { - return false; - } +bool MasterImpl::ReadFromStream(std::ifstream &ifs, std::string *key, std::string *value) { + uint32_t key_size = 0, value_size = 0; + ifs.read((char *)&key_size, sizeof(key_size)); + if (ifs.eof() && ifs.gcount() == 0) { + key->clear(); + value->clear(); return true; + } + key->resize(key_size); + ifs.read((char *)key->data(), key_size); + if (ifs.fail()) { + return false; + } + ifs.read((char *)&value_size, sizeof(value_size)); + if (ifs.fail()) { + return false; + } + value->resize(value_size); + ifs.read((char *)value->data(), value_size); + if (ifs.fail()) { + return false; + } + return true; } ///////////// RPC interface ////////////// -void MasterImpl::CreateTable(const CreateTableRequest* request, - CreateTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } - - std::shared_ptr proc(new CreateTableProcedure(request, response, done, thread_pool_.get())); - MasterEnv().GetExecutor()->AddProcedure(proc); -} +void MasterImpl::CreateTable(const CreateTableRequest *request, CreateTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } -void MasterImpl::DeleteTable(const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } + // auth VerifyAndAuthorize + if (!access_entry_->VerifyAndAuthorize(request, response)) { + VLOG(20) << "CreateTable VerifyAndAuthorize failed"; + done->Run(); + return; + } - TablePtr table; - if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { - LOG_IF(ERROR, !table) << "fail to delete table: " << request->table_name() - << ", table not exist"; - LOG_IF(ERROR, table) << "fail to delete table: " << request->table_name() - << ", current in another state transition"; - StatusCode code = !table ? kTableNotFound : kTableNotSupport; - response->set_status(code); - done->Run(); - return; - } - std::shared_ptr proc(new DeleteTableProcedure(table, request, response, done, thread_pool_.get())); - MasterEnv().GetExecutor()->AddProcedure(proc); + std::shared_ptr proc( + new CreateTableProcedure(request, response, done, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); } -void MasterImpl::DisableTable(const DisableTableRequest* request, - DisableTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } - TablePtr table; - if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { - LOG_IF(ERROR, !table) << "fail to disable table: " << request->table_name() - << ", table not exist"; - LOG_IF(ERROR, table) << "fail to disable table: " << request->table_name() - << ", current in another state transition"; - StatusCode code = !table ? kTableNotFound : kTableNotSupport; - response->set_status(code); - done->Run(); - return; - } +void MasterImpl::DeleteTable(const DeleteTableRequest *request, DeleteTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } - std::shared_ptr proc(new DisableTableProcedure(table, request, response, done, thread_pool_.get())); - MasterEnv().GetExecutor()->AddProcedure(proc); -} + // auth VerifyAndAuthorize + if (!access_entry_->VerifyAndAuthorize(request, response)) { + VLOG(20) << "DeleteTable VerifyAndAuthorize failed"; + done->Run(); + return; + } + + TablePtr table; + if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { + LOG_IF(ERROR, !table) << "fail to delete table: " << request->table_name() + << ", table not exist"; + LOG_IF(ERROR, table) << "fail to delete table: " << request->table_name() + << ", current in another state transition"; + StatusCode code = !table ? kTableNotFound : kTableNotSupport; + response->set_status(code); + done->Run(); + return; + } + std::shared_ptr proc( + new DeleteTableProcedure(table, request, response, done, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); +} + +void MasterImpl::DisableTable(const DisableTableRequest *request, DisableTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } -void MasterImpl::EnableTable(const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } + // auth VerifyAndAuthorize + if (!access_entry_->VerifyAndAuthorize(request, response)) { + VLOG(20) << "DisableTable VerifyAndAuthorize failed"; + done->Run(); + return; + } + + TablePtr table; + if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { + LOG_IF(ERROR, !table) << "fail to disable table: " << request->table_name() + << ", table not exist"; + LOG_IF(ERROR, table) << "fail to disable table: " << request->table_name() + << ", current in another state transition"; + StatusCode code = !table ? kTableNotFound : kTableNotSupport; + response->set_status(code); + done->Run(); + return; + } - TablePtr table; - if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { - LOG_IF(ERROR, !table) << "fail to enable table: " << request->table_name() - << ", table not exist"; - LOG_IF(ERROR, table) << "fail to enable table: " << request->table_name() - << ", current in another state transition"; - StatusCode code = !table ? kTableNotFound : kTableNotSupport; - response->set_status(code); - done->Run(); - return; - } - std::shared_ptr proc(new EnableTableProcedure(table, request, response, done, thread_pool_.get())); - MasterEnv().GetExecutor()->AddProcedure(proc); + std::shared_ptr proc( + new DisableTableProcedure(table, request, response, done, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); } -void MasterImpl::UpdateCheck(const UpdateCheckRequest* request, - UpdateCheckResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - TablePtr table; - if (!tablet_manager_->FindTable(request->table_name(), &table)) { - LOG(ERROR) << "[update] fail to update-check table: " << request->table_name() - << ", table not exist"; - response->set_status(kTableNotExist); - done->Run(); - return; - } - if (!HasPermission(request, table, "update-check table")) { - response->set_status(kNotPermission); - done->Run(); - return; - } - if (!FLAGS_tera_online_schema_update_enabled) { - LOG(INFO) << "[update] online-schema-change is disabled"; - response->set_status(kInvalidArgument); - } else if (table->GetSchemaIsSyncing()) { - response->set_done(false); - response->set_status(kMasterOk); - } else { - response->set_done(true); - response->set_status(kMasterOk); - } +void MasterImpl::EnableTable(const EnableTableRequest *request, EnableTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); done->Run(); -} + return; + } -void MasterImpl::UpdateTable(const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } + // auth VerifyAndAuthorize + if (!access_entry_->VerifyAndAuthorize(request, response)) { + VLOG(20) << "EnableTable VerifyAndAuthorize failed"; + done->Run(); + return; + } + + TablePtr table; + if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { + LOG_IF(ERROR, !table) << "fail to enable table: " << request->table_name() + << ", table not exist"; + LOG_IF(ERROR, table) << "fail to enable table: " << request->table_name() + << ", current in another state transition"; + StatusCode code = !table ? kTableNotFound : kTableNotSupport; + response->set_status(code); + done->Run(); + return; + } + std::shared_ptr proc( + new EnableTableProcedure(table, request, response, done, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); +} + +void MasterImpl::UpdateCheck(const UpdateCheckRequest *request, UpdateCheckResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + TablePtr table; + if (!tablet_manager_->FindTable(request->table_name(), &table)) { + LOG(ERROR) << "[update] fail to update-check table: " << request->table_name() + << ", table not exist"; + response->set_status(kTableNotExist); + done->Run(); + return; + } + if (!HasPermission(request, table, "update-check table")) { + response->set_status(kNotPermission); + done->Run(); + return; + } + if (!FLAGS_tera_online_schema_update_enabled) { + LOG(INFO) << "[update] online-schema-change is disabled"; + response->set_status(kInvalidArgument); + } else if (table->GetSchemaIsSyncing()) { + response->set_done(false); + response->set_status(kMasterOk); + } else { + response->set_done(true); + response->set_status(kMasterOk); + } + done->Run(); +} + +void MasterImpl::UpdateTable(const UpdateTableRequest *request, UpdateTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } - TablePtr table; - if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { - LOG_IF(ERROR, !table) << "fail to update table: " << request->table_name() - << ", table not exist"; - LOG_IF(ERROR, table) << "fail to update table: " << request->table_name() - << ", current in another state transition"; - StatusCode code = !table ? kTableNotFound : kTableNotSupport; - response->set_status(code); - done->Run(); - return; - } + // auth UpdateTable + if (!access_entry_->VerifyAndAuthorize(request, response)) { + VLOG(20) << "UpdateTable VerifyAndAuthorize failed"; + done->Run(); + return; + } + + TablePtr table; + if (!tablet_manager_->FindTable(request->table_name(), &table) || !table->LockTransition()) { + LOG_IF(ERROR, !table) << "fail to update table: " << request->table_name() + << ", table not exist"; + LOG_IF(ERROR, table) << "fail to update table: " << request->table_name() + << ", current in another state transition"; + StatusCode code = !table ? kTableNotFound : kTableNotSupport; + response->set_status(code); + done->Run(); + return; + } - std::shared_ptr proc(new UpdateTableProcedure(table, request, response, done, thread_pool_.get())); - MasterEnv().GetExecutor()->AddProcedure(proc); + std::shared_ptr proc( + new UpdateTableProcedure(table, request, response, done, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); } -void MasterImpl::SearchTable(const SearchTableRequest* request, - SearchTableResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } +void MasterImpl::SearchTable(const SearchTableRequest *request, SearchTableResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + std::string start_table_name = request->prefix_table_name(); + if (request->has_start_table_name()) { + start_table_name = request->start_table_name(); + } + std::string start_tablet_key; + if (request->has_start_tablet_key()) { + start_tablet_key = request->start_tablet_key(); + } + uint32_t max_found = std::numeric_limits::max(); + if (request->has_max_num()) { + max_found = request->max_num(); + } + StatusCode status = kMasterOk; + std::vector tablet_list; + int64_t found_num = + tablet_manager_->SearchTable(&tablet_list, request->prefix_table_name(), start_table_name, + start_tablet_key, max_found, &status); + if (found_num >= 0) { + TabletMetaList *ret_meta_list = response->mutable_meta_list(); + for (uint32_t i = 0; i < tablet_list.size(); ++i) { + TabletPtr tablet = tablet_list[i]; + tablet->ToMeta(ret_meta_list->add_meta()); + } + response->set_is_more(found_num == max_found); + } else { + LOG(ERROR) << "fail to find tablet meta for: " << request->prefix_table_name() + << ", status_: " << StatusCodeToString(status); + } + + response->set_status(status); + done->Run(); +} + +void MasterImpl::CopyTableMetaToUser(TablePtr table, TableMeta *meta_ptr) { + TableSchema old_schema; + if (table->GetOldSchema(&old_schema)) { + TableMeta meta; + table->ToMeta(&meta); + meta.mutable_schema()->CopyFrom(old_schema); + meta_ptr->CopyFrom(meta); + } else { + table->ToMeta(meta_ptr); + } + meta_ptr->set_create_time(table->CreateTime() / 1000000); +} + +void MasterImpl::ShowTables(const ShowTablesRequest *request, ShowTablesResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning && master_status != kIsReadonly) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + std::string start_table_name; + if (request->has_start_table_name()) { + start_table_name = request->start_table_name(); + } + std::string start_tablet_key; + if (request->has_start_tablet_key()) { + start_tablet_key = request->start_tablet_key(); + } + uint32_t max_table_found = std::numeric_limits::max(); + if (request->has_max_table_num()) { + max_table_found = request->max_table_num(); + } + uint32_t max_tablet_found = std::numeric_limits::max(); + if (request->has_max_tablet_num()) { + max_tablet_found = request->max_tablet_num(); + } + + StatusCode status = kMasterOk; + std::vector table_list; + std::vector tablet_list; + bool is_more = false; + bool ret = + tablet_manager_->ShowTable(&table_list, &tablet_list, start_table_name, start_tablet_key, + max_table_found, max_tablet_found, &is_more, &status); + if (ret) { + TableMetaList *table_meta_list = response->mutable_table_meta_list(); + for (uint32_t i = 0; i < table_list.size(); ++i) { + TablePtr table = table_list[i]; + CopyTableMetaToUser(table, table_meta_list->add_meta()); + } + TabletMetaList *tablet_meta_list = response->mutable_tablet_meta_list(); + for (uint32_t i = 0; i < tablet_list.size(); ++i) { + TabletPtr tablet = tablet_list[i]; + TabletMeta meta; + tablet->ToMeta(&meta); + meta.set_last_move_time_us(tablet->LastMoveTime()); + meta.set_data_size_on_flash(tablet->GetDataSizeOnFlash()); + tablet_meta_list->add_meta()->CopyFrom(meta); + tablet_meta_list->add_counter()->CopyFrom(tablet->GetCounter()); + tablet_meta_list->add_timestamp(tablet->UpdateTime()); + } + response->set_is_more(is_more); + } else { + LOG(ERROR) << "fail to show all tables, status_: " << StatusCodeToString(status); + } + + response->set_status(status); + done->Run(); +} + +void MasterImpl::ShowTablesBrief(const ShowTablesRequest *request, ShowTablesResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning && master_status != kIsReadonly) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + std::vector table_list; + tablet_manager_->ShowTable(&table_list, NULL); + + TableMetaList *table_meta_list = response->mutable_table_meta_list(); + for (uint32_t i = 0; i < table_list.size(); ++i) { + TablePtr table = table_list[i]; + table->ToMeta(table_meta_list->add_meta()); + table_meta_list->add_counter()->CopyFrom(table->GetCounter()); + } + + response->set_all_brief(true); + response->set_status(kMasterOk); + done->Run(); +} + +void MasterImpl::ShowTabletNodes(const ShowTabletNodesRequest *request, + ShowTabletNodesResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning && master_status != kIsReadonly) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } - std::string start_table_name = request->prefix_table_name(); - if (request->has_start_table_name()) { - start_table_name = request->start_table_name(); - } - std::string start_tablet_key; - if (request->has_start_tablet_key()) { - start_tablet_key = request->start_tablet_key(); + if (request->has_is_showall() && request->is_showall()) { + // show all tabletnodes + std::vector tabletnode_array; + tabletnode_manager_->GetAllTabletNodeInfo(&tabletnode_array); + for (size_t i = 0; i < tabletnode_array.size(); ++i) { + response->add_tabletnode_info()->CopyFrom(tabletnode_array[i]->GetInfo()); } - uint32_t max_found = std::numeric_limits::max(); - if (request->has_max_num()) { - max_found = request->max_num(); + response->set_status(kMasterOk); + done->Run(); + return; + } else { + TabletNodePtr tabletnode; + if (!tabletnode_manager_->FindTabletNode(request->addr(), &tabletnode)) { + response->set_status(kTabletNodeNotRegistered); + done->Run(); + return; } - StatusCode status = kMasterOk; + response->add_tabletnode_info()->CopyFrom(tabletnode->GetInfo()); std::vector tablet_list; - int64_t found_num = tablet_manager_->SearchTable(&tablet_list, - request->prefix_table_name(), start_table_name, - start_tablet_key, max_found, &status); - if (found_num >= 0) { - TabletMetaList* ret_meta_list = response->mutable_meta_list(); - for (uint32_t i = 0; i < tablet_list.size(); ++i) { - TabletPtr tablet = tablet_list[i]; - tablet->ToMeta(ret_meta_list->add_meta()); - } - response->set_is_more(found_num == max_found); - } else { - LOG(ERROR) << "fail to find tablet meta for: " << request->prefix_table_name() - << ", status_: " << StatusCodeToString(status); + tablet_manager_->FindTablet(request->addr(), &tablet_list, + false); // don't need disabled tables/tablets + for (size_t i = 0; i < tablet_list.size(); ++i) { + TabletMeta *meta = response->mutable_tabletmeta_list()->add_meta(); + TabletCounter *counter = response->mutable_tabletmeta_list()->add_counter(); + tablet_list[i]->ToMeta(meta); + counter->CopyFrom(tablet_list[i]->GetCounter()); } - response->set_status(status); + response->set_status(kMasterOk); done->Run(); + return; + } } -void MasterImpl::CopyTableMetaToUser(TablePtr table, TableMeta* meta_ptr) { - TableSchema old_schema; - if (table->GetOldSchema(&old_schema)) { - TableMeta meta; - table->ToMeta(&meta); - meta.mutable_schema()->CopyFrom(old_schema); - meta_ptr->CopyFrom(meta); - } else { - table->ToMeta(meta_ptr); - } -} +void MasterImpl::KickTabletNodeCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() != 1) { + response->set_status(kInvalidArgument); + return; + } + std::string node_addr = request->arg_list(0); + TabletNodePtr node; + if (!tabletnode_manager_->FindTabletNode(node_addr, &node)) { + response->set_status(kInvalidArgument); + return; + } -void MasterImpl::ShowTables(const ShowTablesRequest* request, - ShowTablesResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning && master_status != kIsReadonly) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } + std::string operation = request->command(); + if (request->command() == "forcekick") { + std::lock_guard lock(kick_mutex_); + zk_adapter_->KickTabletServer(node->addr_, node->uuid_); + response->set_status(kMasterOk); + return; + } - std::string start_table_name; - if (request->has_start_table_name()) { - start_table_name = request->start_table_name(); - } - std::string start_tablet_key; - if (request->has_start_tablet_key()) { - start_tablet_key = request->start_tablet_key(); + if (request->arg_list_size() == 1) { + StatusCode status = kMasterOk; + if (!TryKickTabletNode(node)) { + status = static_cast(GetMasterStatus()); } - uint32_t max_table_found = std::numeric_limits::max(); - if (request->has_max_table_num()) { - max_table_found = request->max_table_num(); + response->set_status(status); + return; + } +} + +void MasterImpl::CmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + std::string cmd_line; + for (int32_t i = 0; i < request->arg_list_size(); i++) { + cmd_line += request->arg_list(i); + if (i != request->arg_list_size() - 1) { + cmd_line += " "; + } + } + LOG(INFO) << "receive cmd: " << request->command() << " " << cmd_line; + + response->set_sequence_id(request->sequence_id()); + if (request->command() == "safemode") { + SafeModeCmdCtrl(request, response); + } else if (request->command() == "tablet") { + TabletCmdCtrl(request, response); + } else if (request->command() == "meta") { + MetaCmdCtrl(request, response); + } else if (request->command() == "reload config") { + ReloadConfig(response); + } else if (request->command() == "kick" || request->command() == "forcekick") { + KickTabletNodeCmdCtrl(request, response); + } else if (request->command() == "table") { + TableCmdCtrl(request, response); + } else if (request->command() == "dfs-hard-limit") { + DfsHardLimitCmdCtrl(request, response); + } else if (request->command() == "procedure-limit") { + ProcedureLimitCmdCtrl(request, response); + } else { + response->set_status(kInvalidArgument); + } +} + +void MasterImpl::AddUserInfoToMetaCallback(UserPtr user_ptr, const OperateUserRequest *rpc_request, + OperateUserResponse *rpc_response, + google::protobuf::Closure *rpc_done, bool succ) { + if (!succ) { + rpc_response->set_status(kMetaTabletError); + rpc_done->Run(); + return; + } + rpc_response->set_status(kMasterOk); + rpc_done->Run(); + LOG(INFO) << "[user-manager] write user info to meta table done: " + << user_ptr->GetUserInfo().user_name(); + std::string user_name = user_ptr->GetUserInfo().user_name(); + UserOperateType op_type = rpc_request->op_type(); + if (op_type == kDeleteUser) { + user_manager_->DeleteUser(user_name); + } else if (op_type == kCreateUser) { + user_manager_->AddUser(user_name, user_ptr->GetUserInfo()); + } else if (op_type == kChangePwd) { + user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); + } else if (op_type == kAddToGroup) { + user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); + } else if (op_type == kDeleteFromGroup) { + user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); + } else { + LOG(ERROR) << "[user-manager] unknown operate type: " << op_type; + } + LOG(INFO) << "[user-manager] " << user_ptr->DebugString(); +} + +void MasterImpl::OperateUser(const OperateUserRequest *request, OperateUserResponse *response, + google::protobuf::Closure *done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + if (!request->has_user_info() || !request->user_info().has_user_name() || + !request->has_op_type()) { + response->set_status(kInvalidArgument); + done->Run(); + return; + } + /* + * for (change password), (add user to group), (delete user from group), + * we get the original UserInfo(including token & group), + * do some modification according to the RPC request on the original UserInfo, + * and rewrite it to meta table. + */ + UserInfo operated_user = request->user_info(); + std::string user_name = operated_user.user_name(); + std::string token; // caller of this request + token = request->has_user_token() ? request->user_token() : ""; + UserOperateType op_type = request->op_type(); + bool is_delete = false; + bool is_invalid = false; + if (op_type == kCreateUser) { + if (!operated_user.has_user_name() || !operated_user.has_token() || + !user_manager_->IsValidForCreate(token, user_name)) { + is_invalid = true; + } + } else if (op_type == kDeleteUser) { + if (!operated_user.has_user_name() || !user_manager_->IsValidForDelete(token, user_name)) { + is_invalid = true; + } else { + is_delete = true; } - uint32_t max_tablet_found = std::numeric_limits::max(); - if (request->has_max_tablet_num()) { - max_tablet_found = request->max_tablet_num(); + } else if (op_type == kChangePwd) { + if (!operated_user.has_user_name() || !operated_user.has_token() || + !user_manager_->IsValidForChangepwd(token, user_name)) { + is_invalid = true; + } else { + operated_user = user_manager_->GetUserInfo(user_name); + operated_user.set_token(request->user_info().token()); } - - StatusCode status = kMasterOk; - std::vector table_list; - std::vector tablet_list; - bool is_more = false; - bool ret = - tablet_manager_->ShowTable(&table_list, &tablet_list, - start_table_name, start_tablet_key, - max_table_found, max_tablet_found, - &is_more, &status); - if (ret) { - TableMetaList* table_meta_list = response->mutable_table_meta_list(); - for (uint32_t i = 0; i < table_list.size(); ++i) { - TablePtr table = table_list[i]; - CopyTableMetaToUser(table, table_meta_list->add_meta()); - } - TabletMetaList* tablet_meta_list = response->mutable_tablet_meta_list(); - for (uint32_t i = 0; i < tablet_list.size(); ++i) { - TabletPtr tablet = tablet_list[i]; - TabletMeta meta; - tablet->ToMeta(&meta); - meta.set_last_move_time_us(tablet->LastMoveTime()); - tablet_meta_list->add_meta()->CopyFrom(meta); - tablet_meta_list->add_counter()->CopyFrom(tablet->GetCounter()); - tablet_meta_list->add_timestamp(tablet->UpdateTime()); - } - response->set_is_more(is_more); + } else if (op_type == kAddToGroup) { + if (!operated_user.has_user_name() || operated_user.group_name_size() != 1 || + !user_manager_->IsValidForAddToGroup(token, user_name, operated_user.group_name(0))) { + is_invalid = true; + } else { + std::string group = operated_user.group_name(0); + operated_user = user_manager_->GetUserInfo(user_name); + operated_user.add_group_name(group); + } + } else if (op_type == kDeleteFromGroup) { + if (!operated_user.has_user_name() || operated_user.group_name_size() != 1 || + !user_manager_->IsValidForDeleteFromGroup(token, user_name, operated_user.group_name(0))) { + is_invalid = true; } else { - LOG(ERROR) << "fail to show all tables, status_: " - << StatusCodeToString(status); + std::string group = operated_user.group_name(0); + operated_user = user_manager_->GetUserInfo(user_name); + user_manager_->DeleteGroupFromUserInfo(operated_user, group); } - - response->set_status(status); + } else if (op_type == kShowUser) { + UserInfo *user_info = response->mutable_user_info(); + *user_info = user_manager_->GetUserInfo(user_name); + response->set_status(kMasterOk); + done->Run(); + return; + } else { + LOG(ERROR) << "[user-manager] unknown operate type: " << op_type; + is_invalid = true; + } + if (is_invalid) { + response->set_status(kInvalidArgument); done->Run(); + return; + } + UserPtr user_ptr(new User(user_name, operated_user)); + + std::string key, value; + user_ptr->ToMetaTableKeyValue(&key, &value); + MetaWriteRecord record{key, value, is_delete}; + UpdateMetaClosure closure = std::bind(&MasterImpl::AddUserInfoToMetaCallback, this, user_ptr, + request, response, done, _1); + MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); } -void MasterImpl::ShowTablesBrief(const ShowTablesRequest* request, - ShowTablesResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning && master_status != kIsReadonly) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } +void MasterImpl::SafeModeCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() < 1) { + response->set_status(kInvalidArgument); + return; + } - std::vector table_list; - tablet_manager_->ShowTable(&table_list, NULL); + StatusCode status; - TableMetaList* table_meta_list = response->mutable_table_meta_list(); - for (uint32_t i = 0; i < table_list.size(); ++i) { - TablePtr table = table_list[i]; - table->ToMeta(table_meta_list->add_meta()); - table_meta_list->add_counter()->CopyFrom(table->GetCounter()); - } + int32_t minutes = -1; + if (request->arg_list_size() == 2) { + minutes = std::atoi(request->arg_list(1).c_str()); + } - response->set_all_brief(true); + if (request->arg_list(0) == "enter") { + if (EnterSafeMode(MasterEvent::kEnterSafemode, &status) || status == kMasterIsReadonly) { + SetSafeModeTTLTask(minutes); + response->set_status(kMasterOk); + } else { + response->set_status(status); + } + } else if (request->arg_list(0) == "leave") { + // int32_t running_guard_time = -1; + if (request->arg_list_size() == 2) { + // running_guard_time = std::atoi(request->arg_list(1).c_str()); + if (IsInSafeMode() && minutes > 0) { + LOG(INFO) << "master will leave safemode and keep in running state for " << minutes + << " minutes"; + running_guard_timestamp_ = get_millis() + minutes * 60 * 1000; + } + } + + if (LeaveSafeMode(MasterEvent::kLeaveSafemode, &status) || status == kMasterIsRunning) { + CancelSafeModeTTLTask(); + response->set_status(kMasterOk); + } else { + response->set_status(status); + } + } else if (request->arg_list(0) == "get") { + response->set_bool_result(kIsReadonly == GetMasterStatus()); + response->set_str_result(StatusCodeToString(static_cast(GetMasterStatus()))); response->set_status(kMasterOk); - done->Run(); + } else { + response->set_status(kInvalidArgument); + } } -void MasterImpl::ShowTabletNodes(const ShowTabletNodesRequest* request, - ShowTabletNodesResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning && master_status != kIsReadonly) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } +void MasterImpl::SetSafeModeTTLTask(int64_t delay_minutes) { + delay_minutes = delay_minutes > 0 ? delay_minutes : FLAGS_safemode_ttl_minutes; + LOG(INFO) << "master will keep in safemode for next " << delay_minutes << " minutes"; + int64_t delay_ms = delay_minutes * 60 * 1000; - if (request->has_is_showall() && request->is_showall()) { - // show all tabletnodes - std::vector tabletnode_array; - tabletnode_manager_->GetAllTabletNodeInfo(&tabletnode_array); - for (size_t i = 0; i < tabletnode_array.size(); ++i) { - response->add_tabletnode_info()->CopyFrom(tabletnode_array[i]->GetInfo()); - } - response->set_status(kMasterOk); - done->Run(); - return; - } else { - TabletNodePtr tabletnode; - if (!tabletnode_manager_->FindTabletNode(request->addr(), &tabletnode)) { - response->set_status(kTabletNodeNotRegistered); - done->Run(); - return; - } - response->add_tabletnode_info()->CopyFrom(tabletnode->GetInfo()); - std::vector tablet_list; - tablet_manager_->FindTablet(request->addr(), - &tablet_list, - false); // don't need disabled tables/tablets - for (size_t i = 0; i < tablet_list.size(); ++i) { - TabletMeta* meta = response->mutable_tabletmeta_list()->add_meta(); - TabletCounter* counter = response->mutable_tabletmeta_list()->add_counter(); - tablet_list[i]->ToMeta(meta); - counter->CopyFrom(tablet_list[i]->GetCounter()); - } - - response->set_status(kMasterOk); - done->Run(); - return; - } + ThreadPool::Task delay_task = std::bind(&MasterImpl::TryLeaveSafeMode, this); + if (safemode_ttl_taskid_ > 0) { + thread_pool_->CancelTask(safemode_ttl_taskid_); + } + safemode_ttl_taskid_ = thread_pool_->DelayTask(delay_ms, delay_task); } -void MasterImpl::KickTabletNodeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - if (request->arg_list_size() == 1) { - TryKickTabletNode(request->arg_list(0)); - response->set_status(kMasterOk); - return; - } else { - response->set_status(kInvalidArgument); - return; - } +void MasterImpl::CancelSafeModeTTLTask() { + if (safemode_ttl_taskid_ > 0) { + thread_pool_->CancelTask(safemode_ttl_taskid_); + safemode_ttl_taskid_ = -1; + } } -void MasterImpl::CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - std::string cmd_line; - for (int32_t i = 0; i < request->arg_list_size(); i++) { - cmd_line += request->arg_list(i); - if (i != request->arg_list_size() - 1) { - cmd_line += " "; - } - } - LOG(INFO) << "receive cmd: " << request->command() << " " << cmd_line; - - response->set_sequence_id(request->sequence_id()); - if (request->command() == "safemode") { - SafeModeCmdCtrl(request, response); - } else if (request->command() == "tablet") { - TabletCmdCtrl(request, response); - } else if (request->command() == "meta") { - MetaCmdCtrl(request, response); - } else if (request->command() == "reload config") { - ReloadConfig(response); - } else if (request->command() == "kick") { - KickTabletNodeCmdCtrl(request, response); - } else if (request->command() == "table") { - TableCmdCtrl(request, response); - } else { - response->set_status(kInvalidArgument); - } +void MasterImpl::ReloadConfig(CmdCtrlResponse *response) { + if (utils::LoadFlagFile(FLAGS_flagfile)) { + LOG(INFO) << "[reload config] done"; + response->set_status(kMasterOk); + } else { + LOG(ERROR) << "[reload config] config file not found"; + response->set_status(kInvalidArgument); + } } -void MasterImpl::AddUserInfoToMetaCallback(UserPtr user_ptr, - const OperateUserRequest* rpc_request, - OperateUserResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ) { - if (!succ) { - rpc_response->set_status(kMetaTabletError); - rpc_done->Run(); - return; - } - rpc_response->set_status(kMasterOk); - rpc_done->Run(); - LOG(INFO) << "[user-manager] write user info to meta table done: " - << user_ptr->GetUserInfo().user_name(); - std::string user_name = user_ptr->GetUserInfo().user_name(); - UserOperateType op_type = rpc_request->op_type(); - if (op_type == kDeleteUser) { - user_manager_->DeleteUser(user_name); - } else if (op_type == kCreateUser){ - user_manager_->AddUser(user_name, user_ptr->GetUserInfo()); - } else if (op_type == kChangePwd) { - user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); - } else if (op_type == kAddToGroup) { - user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); - } else if (op_type == kDeleteFromGroup) { - user_manager_->SetUserInfo(user_name, user_ptr->GetUserInfo()); - } else { - LOG(ERROR) << "[user-manager] unknown operate type: " << op_type; - } - LOG(INFO) << "[user-manager] " << user_ptr->DebugString(); -} +void MasterImpl::TableCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() < 2) { + response->set_status(kInvalidArgument); + return; + } -void MasterImpl::OperateUser(const OperateUserRequest* request, - OperateUserResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - MasterStatus master_status = GetMasterStatus(); - if (master_status != kIsRunning) { - LOG(ERROR) << "master is not ready, status_ = " - << StatusCodeToString(static_cast(master_status)); - response->set_status(static_cast(master_status)); - done->Run(); - return; - } - if (!request->has_user_info() - || !request->user_info().has_user_name() - || !request->has_op_type()) { - response->set_status(kInvalidArgument); - done->Run(); - return; - } - /* - * for (change password), (add user to group), (delete user from group), - * we get the original UserInfo(including token & group), - * do some modification according to the RPC request on the original UserInfo, - * and rewrite it to meta table. - */ - UserInfo operated_user = request->user_info(); - std::string user_name = operated_user.user_name(); - std::string token; // caller of this request - token = request->has_user_token() ? request->user_token() : ""; - UserOperateType op_type = request->op_type(); - bool is_delete = false; - bool is_invalid = false; - if (op_type == kCreateUser) { - if (!operated_user.has_user_name() || !operated_user.has_token() - || !user_manager_->IsValidForCreate(token, user_name)) { - is_invalid = true; - } - } else if (op_type == kDeleteUser) { - if (!operated_user.has_user_name() - || !user_manager_->IsValidForDelete(token, user_name)) { - is_invalid = true; - } else { - is_delete = true; - } - } else if (op_type == kChangePwd) { - if (!operated_user.has_user_name() || !operated_user.has_token() - || !user_manager_->IsValidForChangepwd(token, user_name)) { - is_invalid = true; - } else { - operated_user = user_manager_->GetUserInfo(user_name); - operated_user.set_token(request->user_info().token()); - } - } else if (op_type == kAddToGroup) { - if (!operated_user.has_user_name() || operated_user.group_name_size() != 1 - || !user_manager_->IsValidForAddToGroup(token, user_name, - operated_user.group_name(0))) { - is_invalid = true; - } else { - std::string group = operated_user.group_name(0); - operated_user = user_manager_->GetUserInfo(user_name); - operated_user.add_group_name(group); - } - } else if (op_type == kDeleteFromGroup) { - if (!operated_user.has_user_name() || operated_user.group_name_size() != 1 - || !user_manager_->IsValidForDeleteFromGroup(token, user_name, - operated_user.group_name(0))) { - is_invalid = true; - } else { - std::string group = operated_user.group_name(0); - operated_user = user_manager_->GetUserInfo(user_name); - user_manager_->DeleteGroupFromUserInfo(operated_user, group); - } - } else if (op_type == kShowUser) { - UserInfo* user_info = response->mutable_user_info(); - *user_info = user_manager_->GetUserInfo(user_name); - response->set_status(kMasterOk); - done->Run(); - return; - } else { - LOG(ERROR) << "[user-manager] unknown operate type: " << op_type; - is_invalid = true; - } - if (is_invalid) { + if (request->arg_list(0) == "split") { + TabletPtr tablet; + StatusCode status; + for (int32_t i = 2; i < request->arg_list_size(); i++) { + if (!tablet_manager_->SearchTablet(request->arg_list(1), request->arg_list(i), &tablet, + &status)) { response->set_status(kInvalidArgument); - done->Run(); return; + } + VLOG(10) << "table split: key " << request->arg_list(i) << ", " << tablet; + // TrySplitTablet(tablet, request->arg_list(i)); } - UserPtr user_ptr(new User(user_name, operated_user)); - - std::string key, value; - user_ptr->ToMetaTableKeyValue(&key, &value); - MetaWriteRecord record {key, value, is_delete}; - UpdateMetaClosure closure = std::bind( - &MasterImpl::AddUserInfoToMetaCallback, this, user_ptr, request, response, done, _1); - MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); + response->set_status(kMasterOk); + } else { + response->set_status(kInvalidArgument); + } + return; } -void MasterImpl::SafeModeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - if (request->arg_list_size() != 1) { - response->set_status(kInvalidArgument); - return; - } - - StatusCode status; - if (request->arg_list(0) == "enter") { - if (EnterSafeMode(&status) || status == kMasterIsReadonly) { - response->set_status(kMasterOk); - } else { - response->set_status(status); - } - } else if (request->arg_list(0) == "leave") { - if (LeaveSafeMode(&status) || status == kMasterIsRunning) { - response->set_status(kMasterOk); - } else { - response->set_status(status); - } - } else if (request->arg_list(0) == "get") { - response->set_bool_result(kIsReadonly == GetMasterStatus()); - response->set_status(kMasterOk); - } else { - response->set_status(kInvalidArgument); - } -} +void MasterImpl::TabletCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + int32_t request_argc = request->arg_list_size(); + if (request_argc < 2) { + response->set_status(kInvalidArgument); + return; + } + const std::string &op = request->arg_list(0); + const std::string &tablet_id = request->arg_list(1); + TabletPtr tablet; + bool found = false; + std::vector all_tablet_list; + tablet_manager_->ShowTable(NULL, &all_tablet_list); + std::vector::iterator it = all_tablet_list.begin(); + for (; it != all_tablet_list.end(); ++it) { + TabletPtr t = *it; + if (tablet_id == t->GetPath()) { + tablet = t; + found = true; + } + } + if (!found) { + response->set_status(kInvalidArgument); + return; + } -void MasterImpl::ReloadConfig(CmdCtrlResponse* response) { - if (utils::LoadFlagFile(FLAGS_flagfile)) { - LOG(INFO) << "[reload config] done"; - response->set_status(kMasterOk); - } else { - LOG(ERROR) << "[reload config] config file not found"; - response->set_status(kInvalidArgument); + if (op == "reload" && request_argc == 2) { + TabletNodePtr current_tablet_node = tablet->GetTabletNode(); + TryMoveTablet(tablet, current_tablet_node); + response->set_status(kMasterOk); + } else if (op == "reloadx" && request_argc == 3 && + tablet->SetErrorIgnoredLGs(request->arg_list(2))) { + TabletNodePtr current_tablet_node = tablet->GetTabletNode(); + TryMoveTablet(tablet, current_tablet_node); + response->set_status(kMasterOk); + } else if (op == "move" && request_argc == 3) { + std::string expect_server_addr = request->arg_list(2); + TabletNodePtr dest_node; + if (!expect_server_addr.empty() && + !tabletnode_manager_->FindTabletNode(expect_server_addr, &dest_node)) { + response->set_status(kInvalidArgument); + return; + } + TryMoveTablet(tablet, dest_node); + response->set_status(kMasterOk); + } else if (op == "movex" && request_argc == 4 && + tablet->SetErrorIgnoredLGs(request->arg_list(3))) { + std::string expect_server_addr = request->arg_list(2); + TabletNodePtr dest_node; + if (!expect_server_addr.empty() && + !tabletnode_manager_->FindTabletNode(expect_server_addr, &dest_node)) { + response->set_status(kInvalidArgument); + return; + } + TryMoveTablet(tablet, dest_node); + response->set_status(kMasterOk); + } else if (op == "split" && (request_argc == 2 || request_argc == 3)) { + std::string split_key; + if (request_argc == 3) { + split_key = request->arg_list(2); + LOG(INFO) << "User specified split key: " << split_key; } + TrySplitTablet(tablet, split_key); + response->set_status(kMasterOk); + } else if (op == "merge" && request_argc == 2) { + TryMergeTablet(tablet); + response->set_status(kMasterOk); + } else { + response->set_status(kInvalidArgument); + } } -void MasterImpl::TableCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - if (request->arg_list_size() < 2) { - response->set_status(kInvalidArgument); - return; - } - - if (request->arg_list(0) == "split") { - TabletPtr tablet; - StatusCode status; - for (int32_t i = 2; i < request->arg_list_size(); i++) { - if (!tablet_manager_->SearchTablet(request->arg_list(1), - request->arg_list(i), - &tablet, &status)) { - response->set_status(kInvalidArgument); - return; - } - VLOG(10) << "table split: key " << request->arg_list(i) - << ", " << tablet; - //TrySplitTablet(tablet, request->arg_list(i)); - } - response->set_status(kMasterOk); - } else { - response->set_status(kInvalidArgument); - } +void MasterImpl::MetaCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() != 2) { + response->set_status(kInvalidArgument); return; -} + } -void MasterImpl::TabletCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - int32_t request_argc = request->arg_list_size(); - if (request_argc < 2) { - response->set_status(kInvalidArgument); - return; - } - const std::string& op = request->arg_list(0); - const std::string& tablet_id = request->arg_list(1); - TabletPtr tablet; - bool found = false; - std::vector all_tablet_list; - tablet_manager_->ShowTable(NULL, &all_tablet_list); - std::vector::iterator it = all_tablet_list.begin(); - for (; it != all_tablet_list.end(); ++it) { - TabletPtr t = *it; - if (tablet_id == t->GetPath()) { - tablet = t; - found = true; - } - } - if (!found) { - response->set_status(kInvalidArgument); - return; - } - - if (op == "reload" && request_argc == 2) { - TabletNodePtr current_tablet_node = tablet->GetTabletNode(); - TryMoveTablet(tablet, current_tablet_node); - response->set_status(kMasterOk); - } else if (op == "reloadx" && request_argc == 3 - && tablet->SetErrorIgnoredLGs(request->arg_list(2))) { - TabletNodePtr current_tablet_node = tablet->GetTabletNode(); - TryMoveTablet(tablet, current_tablet_node); - response->set_status(kMasterOk); - } else if (op == "move" && request_argc == 3) { - std::string expect_server_addr = request->arg_list(2); - TabletNodePtr dest_node; - if (!expect_server_addr.empty() && - !tabletnode_manager_->FindTabletNode(expect_server_addr, &dest_node)) { - response->set_status(kInvalidArgument); - return; - } - TryMoveTablet(tablet, dest_node); - response->set_status(kMasterOk); - } else if (op == "movex" && request_argc == 4 - && tablet->SetErrorIgnoredLGs(request->arg_list(3))) { - std::string expect_server_addr = request->arg_list(2); - TabletNodePtr dest_node; - if (!expect_server_addr.empty() && - !tabletnode_manager_->FindTabletNode(expect_server_addr, &dest_node)) { - response->set_status(kInvalidArgument); - return; - } - TryMoveTablet(tablet, dest_node); - response->set_status(kMasterOk); - } else if (op == "split" && (request_argc == 2 || request_argc == 3)) { - std::string split_key; - if (request_argc == 3) { - split_key = request->arg_list(2); - LOG(INFO) << "User specified split key: " << split_key; - } - TrySplitTablet(tablet, split_key); - response->set_status(kMasterOk); - } else if (op == "merge" && request_argc == 2) { - TryMergeTablet(tablet); - response->set_status(kMasterOk); + if (request->arg_list(0) == "backup") { + const std::string &filename = request->arg_list(1); + StatusCode status = kMasterOk; + if (tablet_manager_->DumpMetaTableToFile(filename, &status)) { + response->set_status(kMasterOk); } else { - response->set_status(kInvalidArgument); + response->set_status(status); } + } else { + response->set_status(kInvalidArgument); + } } -void MasterImpl::MetaCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - if (request->arg_list_size() != 2) { - response->set_status(kInvalidArgument); - return; +void MasterImpl::ProcedureLimitCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() == 1) { + if (request->arg_list(0) != "get") { + response->set_status(kInvalidArgument); + return; } - - if (request->arg_list(0) == "backup") { - const std::string& filename = request->arg_list(1); - StatusCode status = kMasterOk; - if (tablet_manager_->DumpMetaTableToFile(filename, &status)) { - response->set_status(kMasterOk); - } else { - response->set_status(status); - } + response->set_bool_result(true); + response->set_str_result(ProcedureLimiter::Instance().GetSummary()); + response->set_status(kMasterOk); + } else if (request->arg_list_size() == 3) { + if (request->arg_list(0) != "set") { + response->set_status(kInvalidArgument); + return; + } + std::string type = request->arg_list(1); + std::string limit_str = request->arg_list(2); + try { + std::stoul(limit_str); + } catch (...) { + response->set_status(kInvalidArgument); + return; + } + uint32_t limit = static_cast(std::stoul(limit_str)); + if (type == "kMerge") { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kMerge, limit); + } else if (type == "kSplit") { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kSplit, limit); + } else if (type == "kMove") { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kMove, limit); + } else if (type == "kLoad") { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kLoad, limit); + } else if (type == "kUnload") { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kUnload, limit); } else { - response->set_status(kInvalidArgument); + response->set_status(kInvalidArgument); + return; } + response->set_bool_result(true); + response->set_str_result(ProcedureLimiter::Instance().GetSummary()); + response->set_status(kMasterOk); + } else { + response->set_status(kInvalidArgument); + return; + } } /////////// common //////////// -bool MasterImpl::SetMasterStatus(const MasterStatus& new_status, - MasterStatus* old_status) { - MutexLock lock(&status_mutex_); - if (old_status != NULL) { - *old_status = status_; - } - if (CheckStatusSwitch(status_, new_status)) { - LOG(INFO) << "master status switch " - << StatusCodeToString(static_cast(status_)) - << " to " << StatusCodeToString(static_cast(new_status)); - status_ = new_status; - return true; - } +bool MasterImpl::DoStateTransition(const MasterEvent event, MasterStatus *old_status) { + MutexLock lock(&status_mutex_); + if (!state_machine_.DoStateTransition(event, old_status)) { + LOG(WARNING) << "not support master status switch, event: " << event << ", curr_status: " + << StatusCodeToString(static_cast(state_machine_.GetState())); return false; + } + LOG(INFO) << "master status switched, event: " << event + << ", from: " << StatusCodeToString(static_cast(*old_status)) + << " to: " << StatusCodeToString(static_cast(state_machine_.GetState())); + std::string mode = StatusCodeToString(static_cast(state_machine_.GetState())); + return true; } -MasterImpl::MasterStatus MasterImpl::GetMasterStatus() { - MutexLock lock(&status_mutex_); - return status_; +bool MasterImpl::DoStateTransition(const MasterEvent event) { + MasterStatus old_status; + return DoStateTransition(event, &old_status); } -bool MasterImpl::CheckStatusSwitch(MasterStatus old_status, - MasterStatus new_status) { - switch (old_status) { - case kNotInited: - if (new_status == kIsSecondary) { - return true; - } - break; - case kIsSecondary: - if (new_status == kOnRestore || new_status == kOnWait) { - return true; - } - break; - case kOnWait: - if (new_status == kOnRestore) { - return true; - } - break; - case kOnRestore: - if (new_status == kIsRunning || new_status == kIsReadonly - || new_status == kOnWait) { - return true; - } - break; - case kIsRunning: - if (new_status == kIsReadonly) { - return true; - } - break; - case kIsReadonly: - if (new_status == kIsRunning) { - return true; - } - break; - default: - break; - } +bool MasterImpl::IsInSafeMode() { + MutexLock lock(&status_mutex_); + MasterStatus status = state_machine_.GetState(); + return (status == kIsReadonly); +} - LOG(ERROR) << "not support master status switch " - << StatusCodeToString(static_cast(old_status)) << " to " - << StatusCodeToString(static_cast(new_status)); - return false; +MasterStatus MasterImpl::GetMasterStatus() { + MutexLock lock(&status_mutex_); + return state_machine_.GetState(); } -bool MasterImpl::GetMetaTabletAddr(std::string* addr) { - return (restored_ && tablet_manager_->GetMetaTabletAddr(addr)); +bool MasterImpl::GetMetaTabletAddr(std::string *addr) { + return (restored_ && tablet_manager_->GetMetaTabletAddr(addr)); } /////////// load balance ////////// void MasterImpl::QueryTabletNode() { - bool gc_query_enable = false; - { - MutexLock locker(&mutex_); - if (!query_enabled_) { - query_tabletnode_timer_id_ = kInvalidTimerId; - return; - } - if (gc_query_enable_) { - gc_query_enable_ = false; - gc_query_enable = true; - } - } - - start_query_time_ = get_micros(); - std::vector tabletnode_array; - tabletnode_manager_->GetAllTabletNodeInfo(&tabletnode_array); - LOG(INFO) << "query tabletnodes: " << tabletnode_array.size() - << ", id " << query_tabletnode_timer_id_; - - if (FLAGS_tera_stat_table_enabled) { - stat_table_->OpenStatTable(); + bool gc_query_enable = false; + { + MutexLock locker(&mutex_); + if (!query_enabled_) { + query_tabletnode_timer_id_ = kInvalidTimerId; + return; + } + if (gc_query_enable_) { + gc_query_enable_ = false; + gc_query_enable = true; + } + } + + start_query_time_ = get_micros(); + std::vector tabletnode_array; + tabletnode_manager_->GetAllTabletNodeInfo(&tabletnode_array); + LOG(INFO) << "query tabletnodes: " << tabletnode_array.size() << ", id " + << query_tabletnode_timer_id_; + + if (FLAGS_tera_stat_table_enabled) { + stat_table_->OpenStatTable(); + } + + CHECK_EQ(query_pending_count_.Get(), 0); + CHECK_EQ(update_auth_pending_count_.Get(), 0); + CHECK_EQ(update_quota_pending_count_.Get(), 0); + query_pending_count_.Inc(); + + std::vector::iterator it = tabletnode_array.begin(); + for (; it != tabletnode_array.end(); ++it) { + TabletNodePtr tabletnode = *it; + if (tabletnode->state_ != kReady && tabletnode->state_ != kWaitKick) { + VLOG(20) << "will not query tabletnode: " << tabletnode->addr_; + continue; } - - CHECK_EQ(query_pending_count_.Get(), 0); query_pending_count_.Inc(); - std::vector::iterator it = tabletnode_array.begin(); - for (; it != tabletnode_array.end(); ++it) { - TabletNodePtr tabletnode = *it; - if (tabletnode->state_ != kReady) { - VLOG(20) << "will not query tabletnode: " << tabletnode->addr_; - continue; - } - query_pending_count_.Inc(); - QueryClosure done = - std::bind(&MasterImpl::QueryTabletNodeCallback, this, tabletnode->addr_, - _1, _2, _3, _4); - QueryTabletNodeAsync(tabletnode->addr_, - FLAGS_tera_master_query_tabletnode_period, - gc_query_enable, done); - } - - if (0 == query_pending_count_.Dec()) { - { - MutexLock locker(&mutex_); - if (query_enabled_) { - ScheduleQueryTabletNode(); - } else { - query_tabletnode_timer_id_ = kInvalidTimerId; - } - } - if (gc_query_enable) { - DoTabletNodeGcPhase2(); - } + update_auth_pending_count_.Inc(); + update_quota_pending_count_.Inc(); + QueryClosure done = + std::bind(&MasterImpl::QueryTabletNodeCallback, this, tabletnode->addr_, _1, _2, _3, _4); + QueryTabletNodeAsync(tabletnode->addr_, FLAGS_tera_master_query_tabletnode_period, + gc_query_enable, done); + } + + if (0 == query_pending_count_.Dec()) { + LOG(INFO) << "query tabletnodes finish, id " << query_tabletnode_timer_id_ + << ", update auth failed ts count " << update_auth_pending_count_.Get() + << ", update quota failed ts count " << update_quota_pending_count_.Get() << ", cost " + << (get_micros() - start_query_time_) / 1000 << "ms."; + (update_auth_pending_count_.Get() == 0) + ? access_entry_->GetAccessUpdater().SyncUgiVersion(false) + : access_entry_->GetAccessUpdater().SyncUgiVersion(true); + // If ClearDeltaQuota failed, then means still need to sync version. + if (update_quota_pending_count_.Get() == 0 && quota_entry_->ClearDeltaQuota()) { + quota_entry_->SyncVersion(false); + } else { + quota_entry_->SyncVersion(true); + } + update_auth_pending_count_.Set(0); + update_quota_pending_count_.Set(0); + quota_entry_->RefreshClusterFlowControlStatus(); + quota_entry_->RefreshDfsHardLimit(); + { + MutexLock locker(&mutex_); + if (query_enabled_) { + ScheduleQueryTabletNode(); + } else { + query_tabletnode_timer_id_ = kInvalidTimerId; + } + } + if (gc_query_enable) { + DoTabletNodeGcPhase2(); } + } } void MasterImpl::ScheduleQueryTabletNode() { - mutex_.AssertHeld(); - int schedule_delay = FLAGS_tera_master_query_tabletnode_period; + mutex_.AssertHeld(); + int schedule_delay = FLAGS_tera_master_query_tabletnode_period; - LOG(INFO) << "schedule query tabletnodes after " << schedule_delay << "ms."; + LOG(INFO) << "schedule query tabletnodes after " << schedule_delay << "ms."; - ThreadPool::Task task = std::bind(&MasterImpl::QueryTabletNode, this); - query_tabletnode_timer_id_ = thread_pool_->DelayTask(schedule_delay, task); + ThreadPool::Task task = std::bind(&MasterImpl::QueryTabletNode, this); + query_tabletnode_timer_id_ = thread_pool_->DelayTask(schedule_delay, task); } void MasterImpl::EnableQueryTabletNodeTimer() { - MutexLock locker(&mutex_); - if (query_tabletnode_timer_id_ == kInvalidTimerId) { - ScheduleQueryTabletNode(); - } - query_enabled_ = true; + MutexLock locker(&mutex_); + if (query_tabletnode_timer_id_ == kInvalidTimerId) { + ScheduleQueryTabletNode(); + } + query_enabled_ = true; } void MasterImpl::DisableQueryTabletNodeTimer() { - MutexLock locker(&mutex_); - if (query_tabletnode_timer_id_ != kInvalidTimerId) { - bool non_block = true; - if (thread_pool_->CancelTask(query_tabletnode_timer_id_, non_block)) { - query_tabletnode_timer_id_ = kInvalidTimerId; - } + MutexLock locker(&mutex_); + if (query_tabletnode_timer_id_ != kInvalidTimerId) { + bool non_block = true; + if (thread_pool_->CancelTask(query_tabletnode_timer_id_, non_block)) { + query_tabletnode_timer_id_ = kInvalidTimerId; } - query_enabled_ = false; + } + query_enabled_ = false; } void MasterImpl::ScheduleLoadBalance() { - { - MutexLock locker(&mutex_); - if (!load_balance_enabled_) { - return; - } - if (load_balance_scheduled_) { - return; - } - load_balance_scheduled_ = true; + { + MutexLock locker(&mutex_); + if (!load_balance_enabled_) { + return; + } + if (load_balance_scheduled_) { + return; } + load_balance_scheduled_ = true; + } - ThreadPool::Task task = - std::bind(static_cast(&MasterImpl::LoadBalance), this); - thread_pool_->AddTask(task); + ThreadPool::Task task = + std::bind(static_cast(&MasterImpl::LoadBalance), this); + thread_pool_->AddTask(task); } void MasterImpl::EnableLoadBalance() { - MutexLock locker(&mutex_); - load_balance_enabled_ = true; + MutexLock locker(&mutex_); + load_balance_enabled_ = true; } void MasterImpl::DisableLoadBalance() { - MutexLock locker(&mutex_); - load_balance_enabled_ = false; + MutexLock locker(&mutex_); + load_balance_enabled_ = false; } void MasterImpl::LoadBalance() { - { - MutexLock locker(&mutex_); - if (!load_balance_enabled_) { - load_balance_scheduled_ = false; - return; - } - } - - LOG(INFO) << "LoadBalance start"; - int64_t start_time = get_micros(); - - std::vector all_table_list; - std::vector all_tablet_list; - tablet_manager_->ShowTable(&all_table_list, &all_tablet_list); - - std::vector all_node_list; - tabletnode_manager_->GetAllTabletNodeInfo(&all_node_list); - - // Make a constant copy of tablet nodes to make sure that the returned value - // of GetSize, GetQps, ... remain unchanged every time they are called during - // the load balance process so as not to cause exceptions of std::sort(). - std::vector all_node_list_copy; - for (size_t i = 0; i < all_node_list.size(); i++) { - TabletNodePtr node_copy(new TabletNode(*all_node_list[i])); - all_node_list_copy.push_back(node_copy); - } - - uint32_t max_move_num = FLAGS_tera_master_max_move_concurrency; - - // Run qps-based-sheduler first, then size-based-scheduler - // If read_pending occured, process it first - max_move_num -= LoadBalance(load_scheduler_.get(), max_move_num, 1, - all_node_list_copy, all_tablet_list); - - if (FLAGS_tera_master_load_balance_table_grained) { - for (size_t i = 0; i < all_table_list.size(); ++i) { - TablePtr table = all_table_list[i]; - if (table->GetStatus() != kTableEnable) { - continue; - } - if (table->GetTableName() == FLAGS_tera_master_meta_table_name) { - continue; - } - - std::vector tablet_list; - table->GetTablet(&tablet_list); - max_move_num -= LoadBalance(size_scheduler_.get(), max_move_num, 3, - all_node_list_copy, tablet_list, table->GetTableName()); - } - } else { - max_move_num -= LoadBalance(size_scheduler_.get(), max_move_num, 3, - all_node_list_copy, all_tablet_list); - } - - int64_t cost_time = get_micros() - start_time; - LOG(INFO) << "LoadBalance finish, cost " << cost_time / 1000000.0 << "s"; - - { - MutexLock locker(&mutex_); - load_balance_scheduled_ = false; - } -} - -uint32_t MasterImpl::LoadBalance(Scheduler* scheduler, - uint32_t max_move_num, uint32_t max_round_num, - std::vector& tabletnode_list_copy, - std::vector& tablet_list, - const std::string& table_name) { - std::map > node_tablet_list; - std::vector::iterator it = tablet_list.begin(); - for (; it != tablet_list.end(); ++it) { - TabletPtr tablet = *it; - node_tablet_list[tablet->GetServerAddr()].push_back(tablet); - } - - if (!scheduler->NeedSchedule(tabletnode_list_copy, table_name)) { - return 0; - } - - // descending sort the node according to workload, - // so that the node with heaviest workload will be scheduled first - scheduler->DescendingSort(tabletnode_list_copy, table_name); - - uint32_t round_count = 0; - uint32_t total_move_count = 0; - while (round_count < max_round_num) { - VLOG(20) << "LoadBalance (" << scheduler->Name() << ") " << table_name - << " round " << round_count << " start"; - - uint32_t round_move_count = 0; - std::vector::iterator node_copy_it = tabletnode_list_copy.begin(); - while (total_move_count < max_move_num && node_copy_it != tabletnode_list_copy.end()) { - TabletNodePtr node; - if (tabletnode_manager_->FindTabletNode((*node_copy_it)->GetAddr(), &node) - && (*node_copy_it)->GetId() == node->GetId() - && node->GetState() == kReady) { - const std::vector& tablet_list = node_tablet_list[node->GetAddr()]; - if (TabletNodeLoadBalance(node, scheduler, tablet_list, table_name)) { - round_move_count++; - total_move_count++; - } - } - ++node_copy_it; - } - - VLOG(20) << "LoadBalance (" << scheduler->Name() << ") " << table_name - << " round " << round_count << " move " << round_move_count; - - round_count++; - if (round_move_count == 0) { - break; - } - } - - if (total_move_count != 0) { - LOG(INFO) << "LoadBalance (" << scheduler->Name() << ") " << table_name - << " total round " << round_count << " total move " << total_move_count; - } - return total_move_count; -} - -bool MasterImpl::TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler* scheduler, - const std::vector& tablet_list, - const std::string& table_name) { - VLOG(7) << "TabletNodeLoadBalance() " << tabletnode->GetAddr() << " " - << scheduler->Name() << " " << table_name; - if (tablet_list.size() < 1) { - return false; + { + MutexLock locker(&mutex_); + if (!load_balance_enabled_) { + load_balance_scheduled_ = false; + return; + } + } + + LOG(INFO) << "LoadBalance start"; + int64_t start_time = get_micros(); + + std::vector all_table_list; + std::vector all_tablet_list; + tablet_manager_->ShowTable(&all_table_list, &all_tablet_list); + + std::vector all_node_list; + tabletnode_manager_->GetAllTabletNodeInfo(&all_node_list); + + // Make a constant copy of tablet nodes to make sure that the returned value + // of GetSize, GetQps, ... remain unchanged every time they are called during + // the load balance process so as not to cause exceptions of std::sort(). + std::vector all_node_list_copy; + for (size_t i = 0; i < all_node_list.size(); i++) { + TabletNodePtr node_copy(new TabletNode(*all_node_list[i])); + all_node_list_copy.push_back(node_copy); + } + + uint32_t max_move_num = FLAGS_tera_master_max_move_concurrency; + + // Run qps-based-sheduler first, then size-based-scheduler + // If read_pending occured, process it first + max_move_num -= + LoadBalance(load_scheduler_.get(), max_move_num, 1, all_node_list_copy, all_tablet_list); + + if (FLAGS_tera_master_load_balance_table_grained) { + for (size_t i = 0; i < all_table_list.size(); ++i) { + TablePtr table = all_table_list[i]; + if (table->GetStatus() != kTableEnable) { + continue; + } + if (table->GetTableName() == FLAGS_tera_master_meta_table_name) { + continue; + } + + std::vector tablet_list; + table->GetTablet(&tablet_list); + max_move_num -= LoadBalance(size_scheduler_.get(), max_move_num, 3, all_node_list_copy, + tablet_list, table->GetTableName()); + } + } else { + max_move_num -= + LoadBalance(size_scheduler_.get(), max_move_num, 3, all_node_list_copy, all_tablet_list); + } + + int64_t cost_time = get_micros() - start_time; + LOG(INFO) << "LoadBalance finish, cost " << cost_time / 1000000.0 << "s"; + + { + MutexLock locker(&mutex_); + load_balance_scheduled_ = false; + } +} + +uint32_t MasterImpl::LoadBalance(Scheduler *scheduler, uint32_t max_move_num, + uint32_t max_round_num, + std::vector &tabletnode_list_copy, + std::vector &tablet_list, + const std::string &table_name) { + std::map> node_tablet_list; + std::vector::iterator it = tablet_list.begin(); + for (; it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + node_tablet_list[tablet->GetServerAddr()].push_back(tablet); + } + + if (!scheduler->NeedSchedule(tabletnode_list_copy, table_name)) { + return 0; + } + + // descending sort the node according to workload, + // so that the node with heaviest workload will be scheduled first + scheduler->DescendingSort(tabletnode_list_copy, table_name); + + uint32_t round_count = 0; + uint32_t total_move_count = 0; + while (round_count < max_round_num) { + VLOG(20) << "LoadBalance (" << scheduler->Name() << ") " << table_name << " round " + << round_count << " start"; + + uint32_t round_move_count = 0; + std::vector::iterator node_copy_it = tabletnode_list_copy.begin(); + while (total_move_count < max_move_num && node_copy_it != tabletnode_list_copy.end()) { + TabletNodePtr node; + if (tabletnode_manager_->FindTabletNode((*node_copy_it)->GetAddr(), &node) && + (*node_copy_it)->GetId() == node->GetId() && node->GetState() == kReady) { + const std::vector &tablet_list = node_tablet_list[node->GetAddr()]; + if (TabletNodeLoadBalance(node, scheduler, tablet_list, table_name)) { + round_move_count++; + total_move_count++; + } + } + ++node_copy_it; + } + + VLOG(20) << "LoadBalance (" << scheduler->Name() << ") " << table_name << " round " + << round_count << " move " << round_move_count; + + round_count++; + if (round_move_count == 0) { + break; + } + } + + if (total_move_count != 0) { + LOG(INFO) << "LoadBalance (" << scheduler->Name() << ") " << table_name << " total round " + << round_count << " total move " << total_move_count; + } + return total_move_count; +} + +bool MasterImpl::TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler *scheduler, + const std::vector &tablet_list, + const std::string &table_name) { + VLOG(7) << "TabletNodeLoadBalance() " << tabletnode->GetAddr() << " " << scheduler->Name() << " " + << table_name; + if (tablet_list.size() < 1) { + return false; + } + + bool any_tablet_split = false; + std::vector tablet_candidates; + + std::vector::const_iterator it; + for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->GetStatus() != TabletMeta::kTabletReady || + tablet->GetTableName() == FLAGS_tera_master_meta_table_name) { + continue; + } + double write_workload = tablet->GetCounter().write_workload(); + int64_t split_size = FLAGS_tera_master_split_tablet_size; + if (tablet->GetSchema().has_split_size() && tablet->GetSchema().split_size() > 0) { + split_size = tablet->GetSchema().split_size(); + } + if (write_workload > FLAGS_tera_master_workload_split_threshold) { + if (split_size > FLAGS_tera_master_min_split_size) { + split_size = std::max(FLAGS_tera_master_min_split_size, + static_cast(split_size * FLAGS_tera_master_min_split_ratio)); + } + VLOG(6) << tablet->GetPath() << ", trigger workload split, write_workload: " << write_workload + << ", split it by size(M): " << split_size; + } + int64_t merge_size = FLAGS_tera_master_merge_tablet_size; + if (tablet->GetSchema().has_merge_size() && tablet->GetSchema().merge_size() > 0) { + merge_size = tablet->GetSchema().merge_size(); + } + if (merge_size == 0) { + int64_t current_time_s = static_cast(time(NULL)); + int64_t table_create_time_s = + static_cast(tablet->GetTable()->CreateTime() / 1000000); + if (current_time_s - table_create_time_s >= FLAGS_tera_master_disable_merge_ttl_s && + tablet->GetTable()->LockTransition()) { + int64_t new_split_size = tablet->GetSchema().split_size(); + if (new_split_size > FLAGS_tera_master_max_tablet_size_M) { + new_split_size = FLAGS_tera_master_max_tablet_size_M; + } + int64_t new_merge_size = new_split_size >> 2; + UpdateTableRequest *request = new UpdateTableRequest(); + UpdateTableResponse *response = new UpdateTableResponse(); + TableSchema *schema = request->mutable_schema(); + schema->CopyFrom(tablet->GetSchema()); + schema->set_split_size(new_split_size); + schema->set_merge_size(new_merge_size); + google::protobuf::Closure *closure = UpdateDoneClosure::NewInstance(request, response); + std::shared_ptr proc(new UpdateTableProcedure( + tablet->GetTable(), request, response, closure, thread_pool_.get())); + MasterEnv().GetExecutor()->AddProcedure(proc); + + merge_size = new_merge_size; + VLOG(6) << "table: " << tablet->GetTableName() + << " enable merge after ttl_s: " << FLAGS_tera_master_disable_merge_ttl_s + << " current_time_s: " << current_time_s + << " table_create_time_s: " << table_create_time_s + << " try set split size(M) to be: " << new_split_size + << " try set merge size(M) to be: " << merge_size; + } else { + VLOG(20) << "table: " << tablet->GetTableName() + << " remain disable merge in ttl_s: " << FLAGS_tera_master_disable_merge_ttl_s + << " current_time_s: " << current_time_s + << " table_create_time_s: " << table_create_time_s; + } + } + if (tablet->GetDataSize() < 0) { + // tablet size is error, skip it + continue; + } else if (tablet->GetDataSize() > (split_size << 20) && + tablet->TestAndSetSplitTimeStamp(get_micros())) { + TrySplitTablet(tablet); + any_tablet_split = true; + continue; + } else if (tablet->GetDataSize() < (merge_size << 20)) { + if (!tablet->IsBusy() && write_workload < FLAGS_tera_master_workload_merge_threshold) { + TryMergeTablet(tablet); + } else { + VLOG(6) << "[merge] skip high workload tablet: " << tablet->GetPath() << ", write_workload " + << write_workload; + } + continue; } - - bool any_tablet_split = false; - std::vector tablet_candidates; - - std::vector::const_iterator it; - for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->GetStatus() != TabletMeta::kTabletReady - || tablet->GetTableName() == FLAGS_tera_master_meta_table_name) { - continue; - } - double write_workload = tablet->GetCounter().write_workload(); - int64_t split_size = FLAGS_tera_master_split_tablet_size; - if (tablet->GetSchema().has_split_size() && tablet->GetSchema().split_size() > 0) { - split_size = tablet->GetSchema().split_size(); - } - if (write_workload > FLAGS_tera_master_workload_split_threshold) { - if (split_size > FLAGS_tera_master_min_split_size) { - split_size = std::max(FLAGS_tera_master_min_split_size, - static_cast(split_size * FLAGS_tera_master_min_split_ratio)); - } - VLOG(6) << tablet->GetPath() << ", trigger workload split, write_workload: " << write_workload - << ", split it by size(M): " << split_size; - } - int64_t merge_size = FLAGS_tera_master_merge_tablet_size; - if (tablet->GetSchema().has_merge_size() && tablet->GetSchema().merge_size() > 0) { - merge_size = tablet->GetSchema().merge_size(); - } - if (tablet->GetDataSize() < 0) { - // tablet size is error, skip it - continue; - } else if (tablet->GetDataSize() > (split_size << 20) && - tablet->TestAndSetSplitTimeStamp(get_micros())) { - TrySplitTablet(tablet); - any_tablet_split = true; - continue; - } else if (tablet->GetDataSize() < (merge_size << 20)) { - if (!tablet->IsBusy() && - write_workload < FLAGS_tera_master_workload_merge_threshold) { - TryMergeTablet(tablet); - } else { - VLOG(6) << "[merge] skip high workload tablet: " - << tablet->GetPath() << ", write_workload " << write_workload; - } - continue; - } - if (tablet->GetStatus() == TabletMeta::kTabletReady) { - tablet_candidates.push_back(tablet); - } + if (tablet->GetStatus() == TabletMeta::kTabletReady) { + tablet_candidates.push_back(tablet); } + } - // if any tablet is splitting, no need to move tablet - if (!FLAGS_tera_master_move_tablet_enabled || any_tablet_split) { - return false; - } - - TabletNodePtr dest_tabletnode; - size_t tablet_index = 0; - if (scheduler->MayMoveOut(tabletnode, table_name) - && tabletnode_manager_->ScheduleTabletNode(scheduler, table_name, true, &dest_tabletnode) - && tabletnode_manager_->ShouldMoveData(scheduler, table_name, tabletnode, - dest_tabletnode, tablet_candidates, - &tablet_index) - && dest_tabletnode->GetState() == kReady) { - TryMoveTablet(tablet_candidates[tablet_index], dest_tabletnode); - return true; - } + // if any tablet is splitting, no need to move tablet + if (!FLAGS_tera_master_move_tablet_enabled || any_tablet_split) { return false; + } + + TabletNodePtr dest_tabletnode; + size_t tablet_index = 0; + if (scheduler->MayMoveOut(tabletnode, table_name) && + tabletnode_manager_->ScheduleTabletNode(scheduler, table_name, nullptr, true, + &dest_tabletnode) && + tabletnode_manager_->ShouldMoveData(scheduler, table_name, tabletnode, dest_tabletnode, + tablet_candidates, &tablet_index) && + dest_tabletnode->GetState() == kReady) { + TryMoveTablet(tablet_candidates[tablet_index], dest_tabletnode); + return true; + } + return false; } /////////// cache release ////////// @@ -1789,1006 +1956,1132 @@ void MasterImpl::TryReleaseCache(bool enbaled_debug) { } void MasterImpl::ReleaseCacheWrapper() { - MutexLock locker(&mutex_); + MutexLock locker(&mutex_); - TryReleaseCache(); + TryReleaseCache(); - release_cache_timer_id_ = kInvalidTimerId; - EnableReleaseCacheTimer(); + release_cache_timer_id_ = kInvalidTimerId; + EnableReleaseCacheTimer(); } void MasterImpl::EnableReleaseCacheTimer() { - assert(release_cache_timer_id_ == kInvalidTimerId); - ThreadPool::Task task = - std::bind(&MasterImpl::ReleaseCacheWrapper, this); - int64_t timeout_period = 1000LL * - FLAGS_tera_master_cache_release_period; - release_cache_timer_id_ = thread_pool_->DelayTask( - timeout_period, task); + assert(release_cache_timer_id_ == kInvalidTimerId); + ThreadPool::Task task = std::bind(&MasterImpl::ReleaseCacheWrapper, this); + int64_t timeout_period = 1000LL * FLAGS_tera_master_cache_release_period; + release_cache_timer_id_ = thread_pool_->DelayTask(timeout_period, task); } void MasterImpl::DisableReleaseCacheTimer() { - if (release_cache_timer_id_ != kInvalidTimerId) { - thread_pool_->CancelTask(release_cache_timer_id_); - release_cache_timer_id_ = kInvalidTimerId; - } + if (release_cache_timer_id_ != kInvalidTimerId) { + thread_pool_->CancelTask(release_cache_timer_id_); + release_cache_timer_id_ = kInvalidTimerId; + } } ////////// ts operation //////////// -void MasterImpl::RefreshTabletNodeList(const std::map& new_ts_list) { - MutexLock lock(&tabletnode_mutex_); - - std::map del_ts_list; - std::map add_ts_list; - - std::map old_ts_list; - tabletnode_manager_->GetAllTabletNodeId(&old_ts_list); - - std::map::const_iterator old_it = old_ts_list.begin(); - std::map::const_iterator new_it = new_ts_list.begin(); - while (old_it != old_ts_list.end() && new_it != new_ts_list.end()) { - const std::string& old_addr = old_it->first; - const std::string& new_addr = new_it->first; - const std::string& old_uuid = old_it->second; - const std::string& new_uuid = new_it->second; - int cmp_ret = old_addr.compare(new_addr); - if (cmp_ret == 0) { - if (old_uuid != new_uuid) { - LOG(INFO) << "tabletnode " << old_addr << " restart: " - << old_uuid << " -> " << new_uuid; - del_ts_list[old_addr] = old_uuid; - add_ts_list[new_addr] = new_uuid; - } - ++old_it; - ++new_it; - } else if (cmp_ret < 0) { - del_ts_list[old_addr] = old_uuid; - ++old_it; - } else { - add_ts_list[new_addr] = new_uuid; - ++new_it; - } - } - for (; old_it != old_ts_list.end(); ++old_it) { - const std::string& old_addr = old_it->first; - const std::string& old_uuid = old_it->second; +void MasterImpl::RefreshTabletNodeList(const std::map &new_ts_list) { + MutexLock lock(&tabletnode_mutex_); + + std::map del_ts_list; + std::map add_ts_list; + + std::map old_ts_list; + tabletnode_manager_->GetAllTabletNodeId(&old_ts_list); + std::unordered_map delay_add_nodes; + abnormal_node_mgr_->GetDelayAddNodes(&delay_add_nodes); + for (const auto &node : delay_add_nodes) { + old_ts_list.emplace(node.first, node.second); + } + + std::map::const_iterator old_it = old_ts_list.begin(); + std::map::const_iterator new_it = new_ts_list.begin(); + while (old_it != old_ts_list.end() && new_it != new_ts_list.end()) { + const std::string &old_addr = old_it->first; + const std::string &new_addr = new_it->first; + const std::string &old_uuid = old_it->second; + const std::string &new_uuid = new_it->second; + int cmp_ret = old_addr.compare(new_addr); + if (cmp_ret == 0) { + if (old_uuid != new_uuid) { + LOG(INFO) << "tabletnode " << old_addr << " restart: " << old_uuid << " -> " << new_uuid; del_ts_list[old_addr] = old_uuid; - } - for (; new_it != new_ts_list.end(); ++new_it) { - const std::string& new_addr = new_it->first; - const std::string& new_uuid = new_it->second; add_ts_list[new_addr] = new_uuid; - } - - std::map::iterator it; - for (it = del_ts_list.begin(); it != del_ts_list.end(); ++it) { - const std::string& old_addr = it->first; - DeleteTabletNode(old_addr, it->second); - } - - if (add_ts_list.size() > 0 && !restored_) { - CHECK(GetMasterStatus() == kOnWait); - Restore(new_ts_list); - return; - } + } + ++old_it; + ++new_it; + } else if (cmp_ret < 0) { + VLOG(30) << "delete node, addr: " << old_addr << ", uuid: " << old_uuid; + del_ts_list[old_addr] = old_uuid; + ++old_it; + } else { + VLOG(30) << "add node, addr: " << new_addr << ", uuid: " << new_uuid; + add_ts_list[new_addr] = new_uuid; + ++new_it; + } + } + for (; old_it != old_ts_list.end(); ++old_it) { + const std::string &old_addr = old_it->first; + const std::string &old_uuid = old_it->second; + del_ts_list[old_addr] = old_uuid; + } + for (; new_it != new_ts_list.end(); ++new_it) { + const std::string &new_addr = new_it->first; + const std::string &new_uuid = new_it->second; + add_ts_list[new_addr] = new_uuid; + } + + std::map::iterator it; + for (it = del_ts_list.begin(); it != del_ts_list.end(); ++it) { + const std::string &old_addr = it->first; + DeleteTabletNode(old_addr, it->second); + } + + if (add_ts_list.size() > 0 && !restored_) { + CHECK(GetMasterStatus() == kOnWait); + DoStateTransition(MasterEvent::kAvailTs); + Restore(new_ts_list); + return; + } - for (it = add_ts_list.begin(); it != add_ts_list.end(); ++it) { - const std::string& new_addr = it->first; - const std::string& new_uuid = it->second; - AddTabletNode(new_addr, new_uuid); - } + for (it = add_ts_list.begin(); it != add_ts_list.end(); ++it) { + const std::string &new_addr = it->first; + const std::string &new_uuid = it->second; + AddTabletNode(new_addr, new_uuid); + } } -void MasterImpl::AddTabletNode(const std::string& tabletnode_addr, - const std::string& tabletnode_uuid) { - TabletNodePtr node = tabletnode_manager_->AddTabletNode(tabletnode_addr, tabletnode_uuid); - if (!node) { - return; - } - CHECK(node->GetState() == kReady); - // update tabletnode info - timeval update_time; - gettimeofday(&update_time, NULL); - TabletNode state; - state.addr_ = tabletnode_addr; - state.report_status_ = kTabletNodeReady; - state.info_.set_addr(tabletnode_addr); - state.data_size_ = 0; - state.qps_ = 0; - state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; - - tabletnode_manager_->UpdateTabletNode(tabletnode_addr, state); +void MasterImpl::AddTabletNode(const std::string &tabletnode_addr, + const std::string &tabletnode_uuid) { + if (abnormal_node_mgr_->IsAbnormalNode(tabletnode_addr, tabletnode_uuid)) { + LOG(WARNING) << abnormal_node_mgr_->GetNodeInfo(tabletnode_addr); + return; + } - // If all tabletnodes restart in one zk callback, - // master will not enter restore/wait state; - // meta table must be scheduled to load from here. - if (meta_tablet_->GetStatus() == TabletMeta::kTabletOffline) { - TryLoadTablet(meta_tablet_); - } - // load offline tablets - // update tabletnode - std::vector tablet_list; - tablet_manager_->FindTablet(tabletnode_addr, - &tablet_list, - false); // need disabled table/tablets - std::vector::iterator it = tablet_list.begin(); - for (; it != tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->LockTransition()) { - if (tablet->GetStatus() != TabletMeta::kTabletOffline) { - tablet->UnlockTransition(); - LOG(WARNING) << "tablet cannot deal TsOffline event, tablet: " << tablet; - continue; - } - std::shared_ptr load(new LoadTabletProcedure(tablet, node, thread_pool_.get())); - if (MasterEnv().GetExecutor()->AddProcedure(load) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << load->ProcId(); - tablet->UnlockTransition(); - } - } + TabletNodePtr node = tabletnode_manager_->AddTabletNode(tabletnode_addr, tabletnode_uuid); + if (!node) { + return; + } + CHECK(node->GetState() == kReady); + // update tabletnode info + timeval update_time; + gettimeofday(&update_time, NULL); + TabletNode state; + state.addr_ = tabletnode_addr; + state.report_status_ = kTabletNodeReady; + state.info_.set_addr(tabletnode_addr); + state.data_size_ = 0; + state.qps_ = 0; + state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; + + tabletnode_manager_->UpdateTabletNode(tabletnode_addr, state); + + // If all tabletnodes restart in one zk callback, + // master will not enter restore/wait state; + // meta table must be scheduled to load from here. + if (meta_tablet_->GetStatus() == TabletMeta::kTabletOffline) { + TryLoadTablet(meta_tablet_); + } + + int64_t reconn_taskid = tabletnode_manager_->PopTabletNodeReconnectTaskID(tabletnode_addr); + if (reconn_taskid > 0) { + thread_pool_->CancelTask(reconn_taskid); + LOG(INFO) << "tabletnode reconnected, cancel reconn tiemout task, id: " << reconn_taskid; + } + + // load offline tablets + // update tabletnode + std::vector tablet_list; + tablet_manager_->FindTablet(tabletnode_addr, &tablet_list, false); // need disabled table/tablets + std::vector::iterator it = tablet_list.begin(); + for (; it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->LockTransition()) { + if (tablet->GetStatus() == TabletMeta::kTabletDelayOffline) { + tablet->DoStateTransition(TabletEvent::kTsOffline); + } + if (tablet->GetStatus() != TabletMeta::kTabletOffline) { + tablet->UnlockTransition(); + LOG(WARNING) << "tablet cannot deal TsOffline event, tablet: " << tablet; + continue; + } + std::shared_ptr load(new LoadTabletProcedure(tablet, node, thread_pool_.get())); + if (MasterEnv().GetExecutor()->AddProcedure(load) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << load->ProcId(); + tablet->UnlockTransition(); + } } - TryLeaveSafeMode(); + } + // safemode must be manual checked and manual leave } -void MasterImpl::DeleteTabletNode(const std::string& tabletnode_addr, const std::string& uuid) { - TabletNodePtr node = tabletnode_manager_->DelTabletNode(tabletnode_addr); - if (!node) { - return; - } - // possible status: running, readonly, wait. - if (GetMasterStatus() == kOnWait) { - return; - } - TryEnterSafeMode(); +void MasterImpl::DeleteTabletNode(const std::string &tabletnode_addr, const std::string &uuid) { + abnormal_node_mgr_->RecordNodeDelete(tabletnode_addr, get_micros() / 1000000); + TabletNodePtr node = tabletnode_manager_->DelTabletNode(tabletnode_addr); + if (!node || node->uuid_ != uuid) { + LOG(INFO) << "invalid node and uuid: addr: " << tabletnode_addr << ", uuid: " << uuid; + return; + } - std::vector tablet_list; - tablet_manager_->FindTablet(tabletnode_addr, &tablet_list, false); - for (auto it = tablet_list.begin(); it != tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->LockTransition()) { - // tablet maybe already been updated by another async TabletXxxProcedure (for example a - // manual triggered MoveTabletProcedure), leading tablet info got through FindTablet - // is stale. skip these kinds of tablets - if (tablet->GetTabletNode() && tablet->GetTabletNode()->GetId() != uuid) { - LOG(WARNING) << "stale tablet info, tablet: " << tablet << " has already been resumed @ [" - << tablet->GetTabletNode()->GetAddr() << ", " << tablet->GetTabletNode()->GetId(); - tablet->UnlockTransition(); - continue; - } - if (tablet->GetStatus() == TabletMeta::kTabletLoadFail) { - LOG(INFO) << "try reload tablet in status kTabletLoadFail as ts down, tablet:" << tablet; - tablet->DoStateTransition(TabletEvent::kTsOffline); - } - std::shared_ptr move(new MoveTabletProcedure(tablet, node, thread_pool_.get())); - if (MasterEnv().GetExecutor()->AddProcedure(move) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << move->ProcId(); - tablet->UnlockTransition(); - } - } + // possible status: running, readonly, wait. + if (GetMasterStatus() == kOnWait) { + return; + } + + std::vector tablet_list; + tablet_manager_->FindTablet(tabletnode_addr, &tablet_list, false); + + if (meta_tablet_->GetTabletNode() && meta_tablet_->GetTabletNode()->uuid_ == uuid) { + LOG(INFO) << " try move meta tablet immediately: "; + TryMoveTablet(meta_tablet_); + auto pend = std::remove(tablet_list.begin(), tablet_list.end(), + std::dynamic_pointer_cast(meta_tablet_)); + tablet_list.erase(pend, tablet_list.end()); + } + + bool in_safemode = TryEnterSafeMode(); + TabletEvent event = in_safemode ? TabletEvent::kTsOffline : TabletEvent::kTsDelayOffline; + for (auto it = tablet_list.begin(); it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if ((tablet->GetTabletNode() && tablet->GetTabletNode()->uuid_ == uuid) && + tablet->LockTransition()) { + tablet->DoStateTransition(event); + tablet->UnlockTransition(); + } + } + if (in_safemode) { + LOG(WARNING) << "master is in safemode, will not recover user tablet at ts: " + << tabletnode_addr; + return; + } + int64_t wait_time = FLAGS_tera_master_tabletnode_timeout; + wait_time = wait_time ? wait_time : 3 * FLAGS_tera_master_query_tabletnode_period; + ThreadPool::Task task = + std::bind(&MasterImpl::MoveTabletOnDeadTabletNode, this, tablet_list, node); + int64_t reconnect_timeout_task_id = thread_pool_->DelayTask(wait_time, task); + tabletnode_manager_->WaitTabletNodeReconnect(tabletnode_addr, uuid, reconnect_timeout_task_id); +} + +void MasterImpl::MoveTabletOnDeadTabletNode(const std::vector &tablet_list, + TabletNodePtr dead_node) { + const std::string uuid = dead_node->GetId(); + for (auto it = tablet_list.begin(); it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->LockTransition()) { + // tablet maybe already been updated by another async TabletXxxProcedure + // (for example a + // manual triggered MoveTabletProcedure), leading tablet info got through + // FindTablet + // is stale. skip these kinds of tablets + if (tablet->GetTabletNode() && tablet->GetTabletNode()->GetId() != uuid) { + LOG(WARNING) << "stale tablet info, tablet: " << tablet << " has already been resumed @ [" + << tablet->GetTabletNode()->GetAddr() << ", " + << tablet->GetTabletNode()->GetId(); + tablet->UnlockTransition(); + continue; + } + tablet->DoStateTransition(TabletEvent::kTsOffline); + // if meta is at this dead TS, move it always + std::shared_ptr move( + new MoveTabletProcedure(tablet, dead_node, thread_pool_.get())); + if (MasterEnv().GetExecutor()->AddProcedure(move) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << move->ProcId(); + tablet->UnlockTransition(); + } } + } } -void MasterImpl::TryEnterSafeMode() { - if (GetMasterStatus() != kIsRunning) { - return; - } - double tablet_locality_ratio = LiveNodeTabletRatio(); - LOG(INFO) << "tablet locality ratio: " << tablet_locality_ratio; - if (tablet_locality_ratio < FLAGS_tera_safemode_tablet_locality_ratio) { - EnterSafeMode(); - } -} +bool MasterImpl::TryEnterSafeMode() { + if (IsInSafeMode()) { + return true; + } + double tablet_locality_ratio = LiveNodeTabletRatio(); + LOG(INFO) << "tablet locality ratio: " << tablet_locality_ratio; + if (tablet_locality_ratio < FLAGS_tera_safemode_tablet_locality_ratio) { + return EnterSafeMode(MasterEvent::kEnterSafemode); + } + return false; +} + +bool MasterImpl::EnterSafeMode(const MasterEvent event, StatusCode *status) { + if (GetMasterStatus() == kIsRunning && get_millis() < running_guard_timestamp_) { + LOG(WARNING) << "refuse to enter safemode until after : " + << (running_guard_timestamp_ - get_millis()) << "(ms) later"; + return false; + } -bool MasterImpl::EnterSafeMode(StatusCode* status) { - MasterStatus old_status; - if (!SetMasterStatus(kIsReadonly, &old_status)) { - SetStatusCode(static_cast(old_status), status); - return false; - } + MasterStatus old_status; + if (!DoStateTransition(event, &old_status)) { + SetStatusCode(static_cast(old_status), status); + return false; + } - LOG(WARNING) << kSms << "enter safemode"; - if (!zk_adapter_->MarkSafeMode()) { - SetStatusCode(kZKError, status); - return false; - } + LOG(WARNING) << kSms << "enter safemode"; - tablet_manager_->Stop(); - DisableTabletNodeGcTimer(); - DisableLoadBalance(); - DisableGcTrashCleanTimer(); - return true; -} + if (!zk_adapter_->MarkSafeMode()) { + SetStatusCode(kZKError, status); + return false; + } -void MasterImpl::TryLeaveSafeMode() { - if (GetMasterStatus() != kIsReadonly) { - return; - } - double tablet_locality_ratio = LiveNodeTabletRatio(); - LOG(INFO) << "tablet locality ratio: " << tablet_locality_ratio; - if (tablet_locality_ratio >= FLAGS_tera_safemode_tablet_locality_ratio) { - LeaveSafeMode(); - } + tablet_manager_->Stop(); + DisableTabletNodeGcTimer(); + DisableLoadBalance(); + DisableGcTrashCleanTimer(); + return true; } -bool MasterImpl::LeaveSafeMode(StatusCode* status) { - MasterStatus old_status; - if (!SetMasterStatus(kIsRunning, &old_status)) { - SetStatusCode(static_cast(old_status), status); - return false; - } - - LOG(WARNING) << kSms << "leave safemode"; - if (!zk_adapter_->UnmarkSafeMode()) { - SetStatusCode(kZKError, status); - return false; - } +void MasterImpl::TryLeaveSafeMode() { + if (GetMasterStatus() != kIsReadonly) { + return; + } + double tablet_locality_ratio = LiveNodeTabletRatio(); + LOG(INFO) << "tablet locality ratio: " << tablet_locality_ratio; + if (tablet_locality_ratio >= FLAGS_tera_safemode_tablet_locality_ratio) { + LeaveSafeMode(MasterEvent::kLeaveSafemode); + } +} + +bool MasterImpl::LeaveSafeMode(const MasterEvent event, StatusCode *status) { + safemode_ttl_taskid_ = -1; + MasterStatus old_status; + if (!DoStateTransition(event, &old_status)) { + SetStatusCode(static_cast(old_status), status); + return false; + } - LoadAllDeadNodeTablets(); + LOG(WARNING) << kSms << "leave safemode"; - tablet_manager_->Init(); - EnableQueryTabletNodeTimer(); - EnableTabletNodeGcTimer(); - EnableLoadBalance(); - EnableGcTrashCleanTimer(); + if (zk_adapter_->HasSafeModeNode() && !zk_adapter_->UnmarkSafeMode()) { + SetStatusCode(kZKError, status); + return false; + } - std::vector node_array; - tabletnode_manager_->GetAllTabletNodeInfo(&node_array); - for (uint32_t i = 0; i < node_array.size(); i++) { - TabletNodePtr node = node_array[i]; - if (node->GetState() == kWaitKick) { - KickTabletNode(node); - } - } + LoadAllDeadNodeTablets(); + tablet_manager_->Init(); + EnableQueryTabletNodeTimer(); + EnableTabletNodeGcTimer(); + EnableLoadBalance(); + EnableGcTrashCleanTimer(); - return true; + return true; } void MasterImpl::LoadAllDeadNodeTablets() { - std::vector all_tablet_list; - tablet_manager_->ShowTable(NULL, &all_tablet_list); + std::vector all_tablet_list; + tablet_manager_->ShowTable(NULL, &all_tablet_list); - std::vector::iterator it; - for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->GetStatus() != TabletMeta::kTabletOffline) { - continue; - } - TabletNodePtr node; - if (tabletnode_manager_->FindTabletNode(tablet->GetServerAddr(), &node) - && node->GetState() == kReady) { - continue; - } - LOG(INFO) << "try load tablets in dead node, " << tablet; - TryLoadTablet(tablet); + std::vector::iterator it; + for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->GetStatus() != TabletMeta::kTabletOffline) { + continue; } -} -void MasterImpl::TryKickTabletNode(const std::string& tabletnode_addr) { - if (!FLAGS_tera_master_kick_tabletnode_enabled) { - LOG(INFO) << "will not kick tabletnode " << tabletnode_addr; - return; - } - LOG(INFO) << "try kick tabletnode " << tabletnode_addr << " ..."; + TabletNodePtr node; + if (tabletnode_manager_->FindTabletNode(tablet->GetServerAddr(), &node) && + node->GetState() == kReady) { + continue; + } + LOG(INFO) << "try load tablets in dead node, " << tablet; + TryLoadTablet(tablet); + } +} + +bool MasterImpl::TryKickTabletNode(TabletNodePtr node) { + // concurrently kicking is not allowed + std::lock_guard lock(kick_mutex_); + if (node->NodeKicked()) { + VLOG(6) << "node has already been kicked, addr: " << node->GetAddr() + << ", uuid: " << node->GetId() + << ", state: " << StatusCodeToString((StatusCode)node->GetState()); + return true; + } + if (!FLAGS_tera_master_kick_tabletnode_enabled) { + VLOG(6) << "kick is disabled, addr: " << node->GetAddr() << ", uuid: " << node->GetId(); + return false; + } - TabletNodePtr tabletnode; - if (!tabletnode_manager_->FindTabletNode(tabletnode_addr, &tabletnode)) { - LOG(WARNING) << "cancel kick tabletnode " << tabletnode_addr << " has been removed"; - return; - } + if (IsInSafeMode()) { + VLOG(6) << "cancel kick ts, master is in safemode, addr: " << node->GetAddr() + << ", uuid: " << node->GetId(); + return false; + } - NodeState old_state; - if (!tabletnode->SetState(kWaitKick, &old_state)) { - LOG(WARNING) << "cancel kick tabletnode " << tabletnode_addr - << " state: " << StatusCodeToString(static_cast(old_state)); - return; - } - KickTabletNode(tabletnode); -} + if (!node->DoStateTransition(NodeEvent::kPrepareKickTs)) { + return false; + } + + double tablet_locality_ratio = LiveNodeTabletRatio(); + LOG(INFO) << "tablet locality ratio: " << tablet_locality_ratio; + if (tablet_locality_ratio < FLAGS_tera_safemode_tablet_locality_ratio) { + node->DoStateTransition(NodeEvent::kCancelKickTs); + LOG(WARNING) << "tablet live ratio will fall to: " << tablet_locality_ratio + << ", cancel kick ts: " << node->GetAddr(); + return false; + } -void MasterImpl::KickTabletNode(TabletNodePtr node) { - // avoid massive kick - static Mutex mutex; - { - MutexLock lock(&mutex); - MasterStatus status = GetMasterStatus(); - if (status == kIsReadonly) { - LOG(WARNING) << "cancel kick tabletnode " << node->addr_ - << ", master state: " << StatusCodeToString(static_cast(status)); - return; - } - TryEnterSafeMode(); - } + if (!zk_adapter_->KickTabletServer(node->addr_, node->uuid_)) { + LOG(WARNING) << "kick tabletnode fail, node: " << node->addr_ << "," << node->uuid_; + node->DoStateTransition(NodeEvent::kCancelKickTs); + // revert node status; + return false; + } + node->DoStateTransition(NodeEvent::kZkKickNodeCreated); + return true; +} - NodeState old_state; - if (!node->SetState(kOnKick, &old_state)) { - LOG(WARNING) << "cancel kick, tabletnode " << node->addr_ - << " state: " << StatusCodeToString(static_cast(old_state)); - return; - } - if (!zk_adapter_->KickTabletServer(node->addr_, node->uuid_)) { - LOG(FATAL) << "Unable to kick tabletnode: " << node->addr_; - } +bool MasterImpl::TryKickTabletNode(const std::string &tabletnode_addr) { + TabletNodePtr node; + if (!tabletnode_manager_->FindTabletNode(tabletnode_addr, &node)) { + LOG(WARNING) << "tabletnode not exist: addr: " << tabletnode_addr; + return false; + } + return TryKickTabletNode(node); } double MasterImpl::LiveNodeTabletRatio() { - std::vector all_tablet_list; - tablet_manager_->ShowTable(NULL, &all_tablet_list); - uint64_t tablet_num = all_tablet_list.size(); - if (tablet_num == 0) { - return 1.0; - } - - std::map > node_tablet_list; - std::vector::iterator it; - for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { - TabletPtr tablet = *it; - node_tablet_list[tablet->GetServerAddr()].push_back(tablet); - } - - uint64_t live_tablet_num = 0; - std::vector all_node_list; - tabletnode_manager_->GetAllTabletNodeInfo(&all_node_list); - std::vector::iterator node_it = all_node_list.begin(); - for (; node_it != all_node_list.end(); ++node_it) { - TabletNodePtr node = *node_it; - if (node->GetState() != kReady) { - continue; - } - const std::string& addr = node->GetAddr(); - const std::vector& tablet_list = node_tablet_list[addr]; - live_tablet_num += tablet_list.size(); - } - return (double)live_tablet_num / tablet_num; + std::vector all_tablet_list; + tablet_manager_->ShowTable(NULL, &all_tablet_list); + uint64_t tablet_num = all_tablet_list.size(); + if (tablet_num == 0) { + return 1.0; + } + + std::map> node_tablet_list; + std::vector::iterator it; + for (it = all_tablet_list.begin(); it != all_tablet_list.end(); ++it) { + TabletPtr tablet = *it; + node_tablet_list[tablet->GetServerAddr()].push_back(tablet); + } + + uint64_t live_tablet_num = 0; + std::vector all_node_list; + tabletnode_manager_->GetAllTabletNodeInfo(&all_node_list); + std::vector::iterator node_it = all_node_list.begin(); + for (; node_it != all_node_list.end(); ++node_it) { + TabletNodePtr node = *node_it; + if (node->GetState() != kReady) { + continue; + } + const std::string &addr = node->GetAddr(); + const std::vector &tablet_list = node_tablet_list[addr]; + live_tablet_num += tablet_list.size(); + } + return (double)live_tablet_num / tablet_num; } ////////// table operation //////////// -bool MasterImpl::LoadTabletSync(const TabletMeta& meta, - const TableSchema& schema, - StatusCode* status) { - TabletNodePtr node; - if (!tabletnode_manager_->FindTabletNode(meta.server_addr(), &node)) { - SetStatusCode(kTabletNodeOffLine, status); - return false; - } - - tabletnode::TabletNodeClient node_client(thread_pool_.get(), meta.server_addr(), - FLAGS_tera_master_load_rpc_timeout); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_tablet_name(meta.table_name()); - request.set_sequence_id(this_sequence_id_.Inc()); - request.mutable_key_range()->CopyFrom(meta.key_range()); - request.set_path(meta.path()); - request.mutable_schema()->CopyFrom(schema); - request.set_session_id(node->uuid_); - - if (node_client.LoadTablet(&request, &response) - && response.status() == kTabletNodeOk) { - return true; - } - SetStatusCode(response.status(), status); +bool MasterImpl::LoadTabletSync(const TabletMeta &meta, const TableSchema &schema, + StatusCode *status) { + TabletNodePtr node; + if (!tabletnode_manager_->FindTabletNode(meta.server_addr(), &node)) { + SetStatusCode(kTabletNodeOffline, status); return false; -} - -bool MasterImpl::UnloadTabletSync(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - const std::string& server_addr, - StatusCode* status) { - VLOG(5) << "UnloadTabletSync() for " << table_name << " [" - << DebugString(key_start) << ", " << DebugString(key_end) << "]"; - tabletnode::TabletNodeClient node_client(thread_pool_.get(), server_addr, - FLAGS_tera_master_unload_rpc_timeout); + } - UnloadTabletRequest request; - UnloadTabletResponse response; - request.set_sequence_id(this_sequence_id_.Inc()); - request.set_tablet_name(table_name); - request.mutable_key_range()->set_key_start(key_start); - request.mutable_key_range()->set_key_end(key_end); + tabletnode::TabletNodeClient node_client(thread_pool_.get(), meta.server_addr(), + FLAGS_tera_master_load_rpc_timeout); + LoadTabletRequest request; + LoadTabletResponse response; + request.set_tablet_name(meta.table_name()); + request.set_sequence_id(this_sequence_id_.Inc()); + request.mutable_key_range()->CopyFrom(meta.key_range()); + request.set_path(meta.path()); + request.mutable_schema()->CopyFrom(schema); + request.set_session_id(node->uuid_); - if (!node_client.UnloadTablet(&request, &response) - || response.status() != kTabletNodeOk) { - SetStatusCode(response.status(), status); - LOG(ERROR) << "fail to unload table: " << table_name << " [" - << DebugString(key_start) << ", " << DebugString(key_end) << "]" - << ", status_: " << StatusCodeToString(response.status()); - return false; - } + if (node_client.LoadTablet(&request, &response) && response.status() == kTabletNodeOk) { return true; -} - - -void MasterImpl::QueryTabletNodeAsync(std::string addr, int32_t timeout, - bool is_gc, QueryClosure done) { - tabletnode::TabletNodeClient node_client(thread_pool_.get(), addr, timeout); - - QueryRequest* request = new QueryRequest; - QueryResponse* response = new QueryResponse; - request->set_sequence_id(this_sequence_id_.Inc()); - - if (is_gc) { - request->set_is_gc_query(true); + } + SetStatusCode(response.status(), status); + return false; +} + +bool MasterImpl::UnloadTabletSync(const std::string &table_name, const std::string &key_start, + const std::string &key_end, const std::string &server_addr, + StatusCode *status) { + VLOG(5) << "UnloadTabletSync() for " << table_name << " [" << DebugString(key_start) << ", " + << DebugString(key_end) << "]"; + tabletnode::TabletNodeClient node_client(thread_pool_.get(), server_addr, + FLAGS_tera_master_unload_rpc_timeout); + + UnloadTabletRequest request; + UnloadTabletResponse response; + request.set_sequence_id(this_sequence_id_.Inc()); + request.set_tablet_name(table_name); + request.mutable_key_range()->set_key_start(key_start); + request.mutable_key_range()->set_key_end(key_end); + + if (!node_client.UnloadTablet(&request, &response) || response.status() != kTabletNodeOk) { + SetStatusCode(response.status(), status); + LOG(ERROR) << "fail to unload table: " << table_name << " [" << DebugString(key_start) << ", " + << DebugString(key_end) << "]" + << ", status_: " << StatusCodeToString(response.status()); + return false; + } + return true; +} + +void MasterImpl::QueryTabletNodeAsync(std::string addr, int32_t timeout, bool is_gc, + QueryClosure done) { + tabletnode::TabletNodeClient node_client(thread_pool_.get(), addr, timeout); + + QueryRequest *request = new QueryRequest; + QueryResponse *response = new QueryResponse; + request->set_sequence_id(this_sequence_id_.Inc()); + + if (is_gc) { + request->set_is_gc_query(true); + } + + // Set update info in access_checker + access_entry_->GetAccessUpdater().BuildReq(request); + quota_entry_->BuildReq(request, addr); + + VLOG(20) << "QueryAsync id: " << request->sequence_id() << ", " + << "server: " << addr; + node_client.Query(query_thread_pool_.get(), request, response, done); +} + +void MasterImpl::QueryTabletNodeCallback(std::string addr, QueryRequest *req, QueryResponse *res, + bool failed, int error_code) { + std::unique_ptr request{req}; + std::unique_ptr response{res}; + bool in_safemode = IsInSafeMode(); + int64_t query_callback_start = get_micros(); + const int64_t fuzzy_time = FLAGS_tera_master_query_tabletnode_period * 1000; + TabletNodePtr node; + if (!tabletnode_manager_->FindTabletNode(addr, &node)) { + LOG(WARNING) << "fail to query: server down, id: " << request->sequence_id() + << ", server: " << addr; + } else if (failed || response->status() != kTabletNodeOk) { + LOG_IF(WARNING, failed) << "fail to query: " << sofa::pbrpc::RpcErrorCodeToString(error_code) + << ", id: " << request->sequence_id() << ", server: " << addr; + LOG_IF(WARNING, !failed) << "fail to query: " << StatusCodeToString(response->status()) + << ", id: " << request->sequence_id() << ", server: " << addr; + int32_t fail_count = node->IncQueryFailCount(); + if (fail_count >= FLAGS_tera_master_kick_tabletnode_query_fail_times) { + LOG(ERROR) << kSms << "fail to query " << addr << " for " << fail_count << " times"; + TryKickTabletNode(addr); + } + } else { + // update tablet meta + uint32_t meta_num = response->tabletmeta_list().meta_size(); + std::map tablet_map; + for (uint32_t i = 0; i < meta_num; i++) { + const TabletMeta &meta = response->tabletmeta_list().meta(i); + const TabletCounter &counter = response->tabletmeta_list().counter(i); + const std::string &table_name = meta.table_name(); + const std::string &key_start = meta.key_range().key_start(); + const std::string &key_end = meta.key_range().key_end(); + int64_t create_time = meta.create_time(); + uint64_t version = meta.version(); + + TablePtr table; + if (!tablet_manager_->FindTable(table_name, &table)) { + LOG(WARNING) << "[query] table not exist, tablet: " << meta.path() << " [" + << DebugString(key_start) << ", " << DebugString(key_end) << "] @ " + << meta.server_addr() << " status: " << meta.status(); + continue; + } + + if (create_time > 0 && create_time < table->CreateTime()) { + LOG(WARNING) << "[query] stale tablet of newly create table, tablet: " << meta.path(); + continue; + } + + std::vector tablets; + if (!table->FindOverlappedTablets(key_start, key_end, &tablets)) { + LOG(WARNING) << "[query] key range hole find for table: " << table_name + << ", hole tablet: " << meta.path() << ", keyrange: [" << key_start << ", " + << key_end << "]"; + continue; + } + + if (tablets.size() > 1) { + bool splitted_tablet = true; + for (uint32_t j = 0; j < tablets.size(); ++j) { + if (version > tablets[j]->Version()) { + LOG(FATAL) << "[query] tablet version error: " << tablets[j]; + splitted_tablet &= false; + } + } + if (splitted_tablet) { + TabletPtr stale_tablet(new StaleTablet(meta)); + BindTabletToTabletNode(stale_tablet, node); + TryUnloadTablet(stale_tablet); + } + continue; + } + + CHECK_EQ(tablets.size(), 1u); + TabletPtr tablet = tablets[0]; + if (version > 0 && version < tablet->Version()) { + if (in_safemode) { + LOG(ERROR) << "[query] stale tablet: " << meta.path() << " @ " << meta.server_addr() + << ", keyrange: [" << DebugString(key_start) << ", " << DebugString(key_end) + << "]" + << ", vs tablet: " << tablet; + continue; + } + TabletPtr stale_tablet(new StaleTablet(meta)); + LOG(WARNING) << "[query] try unload stale tablet: " << stale_tablet; + BindTabletToTabletNode(stale_tablet, node); + TryUnloadTablet(stale_tablet); + } + + if (tablet->ReadyTime() >= start_query_time_) { + VLOG(20) << "[query] ignore mutable tablet: " << meta.path() << " [" + << DebugString(key_start) << ", " << DebugString(key_end) << "] @ " + << meta.server_addr() << " status: " << StatusCodeToString(meta.status()); + } else if (tablet->GetKeyStart() != key_start || tablet->GetKeyEnd() != key_end) { + LOG(ERROR) << "[query] range error tablet: " << meta.path() << " [" + << DebugString(key_start) << ", " << DebugString(key_end) << "] @ " + << meta.server_addr(); + } else if (tablet->GetPath() != meta.path()) { + LOG(ERROR) << "[query] path error tablet: " << meta.path() << "] @ " << meta.server_addr() + << " should be " << tablet->GetPath(); + } else if (TabletMeta::kTabletReady != meta.status()) { + LOG(ERROR) << "[query] status error tablet: " << meta.path() << "] @ " << meta.server_addr() + << "query status: " << StatusCodeToString(meta.status()) + << " should be kTabletReady"; + } else if (tablet->GetServerAddr() != meta.server_addr()) { + LOG(ERROR) << "[query] address tablet: " << meta.path() << " @ " << meta.server_addr() + << " should @ " << tablet->GetServerAddr(); + } else if (tablet->GetTable()->GetStatus() == kTableDisable) { + LOG(INFO) << "table disabled: " << tablet->GetPath(); + } else { + VLOG(20) << "[query] OK tablet: " << meta.path() << "] @ " << meta.server_addr(); + tablet->SetUpdateTime(query_callback_start); + tablet->UpdateSize(meta); + tablet->SetCounter(counter); + tablet->SetCompactStatus(meta.compact_status()); + } } - VLOG(20) << "QueryAsync id: " << request->sequence_id() << ", " - << "server: " << addr; - node_client.Query(query_thread_pool_.get(), request, response, done); -} - -void MasterImpl::QueryTabletNodeCallback(std::string addr, QueryRequest* request, - QueryResponse* response, bool failed, - int error_code) { - int64_t query_callback_start = get_micros(); - TabletNodePtr node; - if (!tabletnode_manager_->FindTabletNode(addr, &node)) { - LOG(WARNING) << "fail to query: server down, id: " - << request->sequence_id() << ", server: " << addr; - } else if (failed || response->status() != kTabletNodeOk) { - if (failed) { - LOG(WARNING) << "fail to query: " - << sofa::pbrpc::RpcErrorCodeToString(error_code) - << ", id: " << request->sequence_id() << ", server: " << addr; + // update tabletnode info + timeval update_time; + gettimeofday(&update_time, NULL); + TabletNode state; + state.addr_ = addr; + state.report_status_ = response->tabletnode_info().status_t(); + state.info_ = response->tabletnode_info(); + state.info_.set_addr(addr); + state.load_ = response->tabletnode_info().load(); + state.persistent_cache_size_ = response->tabletnode_info().persistent_cache_size(); + state.data_size_ = 0; + state.qps_ = 0; + state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; + // calculate data_size of tabletnode + // count both Ready/OnLoad and OffLine tablet + std::vector tablet_list; + tablet_manager_->FindTablet(addr, &tablet_list, false); // don't need disabled tables/tablets + std::vector::iterator it; + for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { + TabletPtr tablet = *it; + if (tablet->UpdateTime() != query_callback_start) { + if (tablet->GetStatus() == TabletMeta::kTabletUnloadFail && !in_safemode) { + LOG(WARNING) << "[query] missed previous unload fail tablet, try move it: " << tablet; + LOG(ERROR) << "[query] missed tablet, try move it: " << tablet; + TryMoveTablet(tablet, tablet->GetTabletNode()); + } + if (tablet->GetStatus() == TabletMeta::kTabletReady && + tablet->ReadyTime() + fuzzy_time < start_query_time_) { + LOG(ERROR) << "[query] missed tablet, try move it: " << tablet; + TryMoveTablet(tablet, tablet->GetTabletNode()); + } + } + + TabletMeta::TabletStatus tablet_status = tablet->GetStatus(); + if (tablet_status == TabletMeta::kTabletReady || + tablet_status == TabletMeta::kTabletLoading || + tablet_status == TabletMeta::kTabletOffline) { + state.data_size_ += tablet->GetDataSize(); + state.qps_ += tablet->GetQps(); + if (state.table_size_.find(tablet->GetTableName()) != state.table_size_.end()) { + state.table_size_[tablet->GetTableName()] += tablet->GetDataSize(); + state.table_qps_[tablet->GetTableName()] += tablet->GetQps(); } else { - LOG(WARNING) << "fail to query: " - << StatusCodeToString(response->status()) - << ", id: " << request->sequence_id() << ", server: " << addr; - } - int32_t fail_count = node->IncQueryFailCount(); - if (fail_count >= FLAGS_tera_master_kick_tabletnode_query_fail_times) { - LOG(ERROR) << kSms << "fail to query " << addr - << " for " << fail_count << " times"; - TryKickTabletNode(addr); - } + state.table_size_[tablet->GetTableName()] = tablet->GetDataSize(); + state.table_qps_[tablet->GetTableName()] = tablet->GetQps(); + } + } + } + tabletnode_manager_->UpdateTabletNode(addr, state); + node->ResetQueryFailCount(); + + for (int32_t i = 0; i < response->tablet_background_errors_size(); i++) { + const TabletBackgroundErrorInfo &background_error = response->tablet_background_errors(i); + if (FLAGS_tera_stat_table_enabled) { + stat_table_->RecordTabletCorrupt(background_error.tablet_name(), + background_error.detail_info()); + } + } + VLOG(20) << "query tabletnode [" << addr + << "], status_: " << StatusCodeToString(state.report_status_); + } + + // if this is a gc query, process it + if (request->is_gc_query()) { + for (int32_t i = 0; i < response->tablet_inh_file_infos_size(); i++) { + const TabletInheritedFileInfo &tablet_inh_info = response->tablet_inh_file_infos(i); + TablePtr table_ptr; + if (tablet_manager_->FindTable(tablet_inh_info.table_name(), &table_ptr)) { + table_ptr->GarbageCollect(tablet_inh_info); + } + } + } + + // Must check master version equal to response's version or not + // Maybe update version and set SyncUgiVersion true (cause by user update auth) while dispatch + // query, at the same tiem if all query update success then set SyncUgiVersion false, + // this update auth won't dispatch before next user update auth. + if (response->has_version() && + access_entry_->GetAccessUpdater().IsSameVersion(response->version())) { + update_auth_pending_count_.Dec(); + } + + // Keep SyncVersion() to set true or false, in case ts version different with master, + // master should trigger the query dispatch. + if (response->has_quota_version() && quota_entry_->IsSameVersion(response->quota_version())) { + update_quota_pending_count_.Dec(); + } + + if (0 == query_pending_count_.Dec()) { + LOG(INFO) << "query tabletnodes finish, id " << query_tabletnode_timer_id_ + << ", update auth failed ts count " << update_auth_pending_count_.Get() + << ", update quota failed ts count " << update_quota_pending_count_.Get() << ", cost " + << (get_micros() - start_query_time_) / 1000 << "ms."; + (update_auth_pending_count_.Get() == 0) + ? access_entry_->GetAccessUpdater().SyncUgiVersion(false) + : access_entry_->GetAccessUpdater().SyncUgiVersion(true); + if (update_quota_pending_count_.Get() == 0 && quota_entry_->ClearDeltaQuota()) { + quota_entry_->SyncVersion(false); } else { - // update tablet meta - uint32_t meta_num = response->tabletmeta_list().meta_size(); - std::map tablet_map; - for (uint32_t i = 0; i < meta_num; i++) { - const TabletMeta& meta = response->tabletmeta_list().meta(i); - const TabletCounter& counter = response->tabletmeta_list().counter(i); - const std::string& table_name = meta.table_name(); - const std::string& key_start = meta.key_range().key_start(); - const std::string& key_end = meta.key_range().key_end(); - - std::vector tablets; - if (!tablet_manager_->FindOverlappedTablets(table_name, key_start, key_end, &tablets)) { - LOG(WARNING) << "[query] table not exist, tablet: " << meta.path() - << " [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "] @ " << meta.server_addr() - << " status: " << meta.status(); - continue; - } - - if (tablets.size() > 1) { - bool any_tablet_load_before_query = false; - for (uint32_t j = 0; j < tablets.size(); ++j) { - if (tablets[j]->ReadyTime() < start_query_time_) { - any_tablet_load_before_query = true; - break; - } - } - if (any_tablet_load_before_query) { - LOG(ERROR) << "[query] range error tablet: " << meta.path() - << " [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "] @ " << meta.server_addr() - << " status: " << meta.status(); - } else { - VLOG(20) << "[query] ignore mutable tablet: " << meta.path() - << " [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "] @ " << meta.server_addr() - << " status: " << meta.status(); - } - continue; - } - - CHECK_EQ(tablets.size(), 1u); - TabletPtr tablet = tablets[0]; - if (tablet->ReadyTime() >= start_query_time_) { - VLOG(20) << "[query] ignore mutable tablet: " << meta.path() - << " [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "] @ " << meta.server_addr() - << " status: " << meta.status(); - } else if (tablet->GetKeyStart() != key_start || tablet->GetKeyEnd() != key_end) { - LOG(ERROR) << "[query] range error tablet: " << meta.path() - << " [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "] @ " << meta.server_addr(); - } else if (tablet->GetPath() != meta.path()) { - LOG(ERROR) << "[query] path error tablet: " << meta.path() - << "] @ " << meta.server_addr() - << " should be " << tablet->GetPath(); - } else if (TabletMeta::kTabletReady != meta.status()) { - LOG(ERROR) << "[query] status error tablet: " << meta.path() - << "] @ " << meta.server_addr() - << " should be kTabletReady"; - } else if (tablet->GetServerAddr() != meta.server_addr()) { - LOG(ERROR) << "[query] addr error tablet: " << meta.path() - << " @ " << meta.server_addr() - << " should @ " << tablet->GetServerAddr(); - } else if (tablet->GetTable()->GetStatus() == kTableDisable) { - LOG(INFO) << "table disabled: " << tablet->GetPath(); - } else { - VLOG(20) << "[query] OK tablet: " << meta.path() - << "] @ " << meta.server_addr(); - tablet->SetUpdateTime(query_callback_start); - tablet->UpdateSize(meta); - tablet->SetCounter(counter); - tablet->SetCompactStatus(meta.compact_status()); - } - } - - // update tabletnode info - timeval update_time; - gettimeofday(&update_time, NULL); - TabletNode state; - state.addr_ = addr; - state.report_status_ = response->tabletnode_info().status_t(); - state.info_ = response->tabletnode_info(); - state.info_.set_addr(addr); - state.load_ = response->tabletnode_info().load(); - state.data_size_ = 0; - state.qps_ = 0; - state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; - // calculate data_size of tabletnode - // count both Ready/OnLoad and OffLine tablet - std::vector tablet_list; - tablet_manager_->FindTablet(addr, - &tablet_list, - false); // don't need disabled tables/tablets - std::vector::iterator it; - for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { - TabletPtr tablet = *it; - if (tablet->UpdateTime() != query_callback_start) { - if (tablet->ReadyTime() < start_query_time_) { - LOG(ERROR) << "[query] missed tablet: " << tablet; - } else { - VLOG(20) << "[query] ignore mutable missed tablet: " << tablet; - } - } - - TabletMeta::TabletStatus tablet_status = tablet->GetStatus(); - if (tablet_status == TabletMeta::kTabletReady || tablet_status == TabletMeta::kTabletLoading - || tablet_status == TabletMeta::kTabletOffline) { - state.data_size_ += tablet->GetDataSize(); - state.qps_ += tablet->GetQps(); - if (state.table_size_.find(tablet->GetTableName()) != state.table_size_.end()) { - state.table_size_[tablet->GetTableName()] += tablet->GetDataSize(); - state.table_qps_[tablet->GetTableName()] += tablet->GetQps(); - } else { - state.table_size_[tablet->GetTableName()] = tablet->GetDataSize(); - state.table_qps_[tablet->GetTableName()] = tablet->GetQps(); - } - } - } - tabletnode_manager_->UpdateTabletNode(addr, state); - node->ResetQueryFailCount(); - - for (int32_t i = 0; i < response->tablet_background_errors_size(); i++) { - const TabletBackgroundErrorInfo& background_error = - response->tablet_background_errors(i); - if (FLAGS_tera_stat_table_enabled) { - stat_table_->RecordTabletCorrupt(background_error.tablet_name(), - background_error.detail_info()); - } - } - VLOG(20) << "query tabletnode [" << addr << "], status_: " - << StatusCodeToString(state.report_status_); + quota_entry_->SyncVersion(true); } - - // if this is a gc query, process it - if (request->is_gc_query()) { - if (FLAGS_tera_master_gc_strategy == "trackable") { - for (int32_t i = 0; i < response->tablet_inh_file_infos_size(); i++) { - const TabletInheritedFileInfo& tablet_inh_info = response->tablet_inh_file_infos(i); - TablePtr table_ptr; - if (tablet_manager_->FindTable(tablet_inh_info.table_name(), &table_ptr)) { - table_ptr->GarbageCollect(tablet_inh_info); - } - } - } else { - gc_strategy_->ProcessQueryCallbackForGc(response); - } + update_auth_pending_count_.Set(0); + update_quota_pending_count_.Set(0); + quota_entry_->RefreshClusterFlowControlStatus(); + quota_entry_->RefreshDfsHardLimit(); + { + MutexLock locker(&mutex_); + if (query_enabled_) { + ScheduleQueryTabletNode(); + } else { + query_tabletnode_timer_id_ = kInvalidTimerId; + } } - if (0 == query_pending_count_.Dec()) { - LOG(INFO) << "query tabletnodes finish, id " - << query_tabletnode_timer_id_ - << ", cost " << (get_micros() - start_query_time_) / 1000 << "ms." ; - { - MutexLock locker(&mutex_); - if (query_enabled_) { - ScheduleQueryTabletNode(); - } else { - query_tabletnode_timer_id_ = kInvalidTimerId; - } - } - - ScheduleLoadBalance(); + ScheduleLoadBalance(); - if (request->is_gc_query()) { - DoTabletNodeGcPhase2(); - } + if (request->is_gc_query()) { + DoTabletNodeGcPhase2(); } + } - delete request; - delete response; - VLOG(20) << "query tabletnode finish " << addr - << ", id " << query_tabletnode_timer_id_ - << ", callback cost " << (get_micros() - query_callback_start) / 1000 << "ms."; + VLOG(20) << "query tabletnode finish " << addr << ", id " << query_tabletnode_timer_id_ + << ", callback cost " << (get_micros() - query_callback_start) / 1000 << "ms."; } -void MasterImpl::CollectTabletInfoCallback(std::string addr, - std::vector* tablet_list, - sem_t* finish_counter, Mutex* mutex, - QueryRequest* request, - QueryResponse* response, +void MasterImpl::CollectTabletInfoCallback(std::string addr, std::vector *tablet_list, + sem_t *finish_counter, Mutex *mutex, + QueryRequest *request, QueryResponse *response, bool failed, int error_code) { - std::unique_ptr request_holder(request); - std::unique_ptr response_holder(response); - TabletNodePtr node; - if (!tabletnode_manager_->FindTabletNode(addr, &node)) { - LOG(WARNING) << "fail to query: server down, id: " - << request->sequence_id() << ", server: " << addr; - } else if (!failed && response->status() == kTabletNodeOk) { - mutex->Lock(); - uint32_t meta_num = response->tabletmeta_list().meta_size(); - for (uint32_t i = 0; i < meta_num; i++) { - const TabletMeta& meta = response->tabletmeta_list().meta(i); - tablet_list->push_back(meta); - } - mutex->Unlock(); - - // update tabletnode info - timeval update_time; - gettimeofday(&update_time, NULL); - TabletNode state; - state.addr_ = addr; - state.report_status_ = response->tabletnode_info().status_t(); - state.info_ = response->tabletnode_info(); - state.info_.set_addr(addr); - state.load_ = response->tabletnode_info().load(); - state.data_size_ = 0; - state.qps_ = 0; - state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; - // calculate data_size of tabletnode - for (uint32_t i = 0; i < meta_num; i++) { - const TabletMeta& meta = response->tabletmeta_list().meta(i); - state.data_size_ += meta.size(); - if (state.table_size_.find(meta.table_name()) != state.table_size_.end()) { - state.table_size_[meta.table_name()] += meta.size(); - } else { - state.table_size_[meta.table_name()] = meta.size(); - } - } - //NodeState old_state; - tabletnode_manager_->UpdateTabletNode(addr, state); - node->ResetQueryFailCount(); - LOG(INFO) << "query tabletnode [" << addr << "], status_: " - << StatusCodeToString(response->tabletnode_info().status_t()); + std::unique_ptr request_holder(request); + std::unique_ptr response_holder(response); + TabletNodePtr node; + if (!tabletnode_manager_->FindTabletNode(addr, &node)) { + LOG(WARNING) << "fail to query: server down, id: " << request->sequence_id() + << ", server: " << addr; + } else if (!failed && response->status() == kTabletNodeOk) { + mutex->Lock(); + uint32_t meta_num = response->tabletmeta_list().meta_size(); + for (uint32_t i = 0; i < meta_num; i++) { + const TabletMeta &meta = response->tabletmeta_list().meta(i); + tablet_list->push_back(meta); + } + mutex->Unlock(); + + // update tabletnode info + timeval update_time; + gettimeofday(&update_time, NULL); + TabletNode state; + state.addr_ = addr; + state.report_status_ = response->tabletnode_info().status_t(); + state.info_ = response->tabletnode_info(); + state.info_.set_addr(addr); + state.load_ = response->tabletnode_info().load(); + state.persistent_cache_size_ = response->tabletnode_info().persistent_cache_size(); + state.data_size_ = 0; + state.qps_ = 0; + state.update_time_ = update_time.tv_sec * 1000 + update_time.tv_usec / 1000; + // calculate data_size of tabletnode + for (uint32_t i = 0; i < meta_num; i++) { + const TabletMeta &meta = response->tabletmeta_list().meta(i); + state.data_size_ += meta.size(); + if (state.table_size_.find(meta.table_name()) != state.table_size_.end()) { + state.table_size_[meta.table_name()] += meta.size(); + } else { + state.table_size_[meta.table_name()] = meta.size(); + } + } + // NodeState old_state; + tabletnode_manager_->UpdateTabletNode(addr, state); + node->ResetQueryFailCount(); + LOG(INFO) << "query tabletnode [" << addr + << "], status_: " << StatusCodeToString(response->tabletnode_info().status_t()); + } else { + if (failed) { + LOG(WARNING) << "fail to query: " << sofa::pbrpc::RpcErrorCodeToString(error_code) + << ", id: " << request->sequence_id() << ", server: " << addr; } else { - if (failed) { - LOG(WARNING) << "fail to query: " - << sofa::pbrpc::RpcErrorCodeToString(error_code) - << ", id: " << request->sequence_id() << ", server: " << addr; - } else { - LOG(WARNING) << "fail to query: " - << StatusCodeToString(response->status()) - << ", id: " << request->sequence_id() << ", server: " << addr; - } - int32_t fail_count = node->IncQueryFailCount(); - if (fail_count >= FLAGS_tera_master_collect_info_retry_times) { - LOG(ERROR) << kSms << "fail to query " << addr - << " for " << fail_count << " times"; - TryKickTabletNode(addr); - } else { - ThreadPool::Task task = - std::bind(&MasterImpl::RetryCollectTabletInfo, this, addr, - tablet_list, finish_counter, mutex); - thread_pool_->DelayTask(FLAGS_tera_master_collect_info_retry_period, - task); - return; - } + LOG(WARNING) << "fail to query: " << StatusCodeToString(response->status()) + << ", id: " << request->sequence_id() << ", server: " << addr; } - sem_post(finish_counter); -} - -void MasterImpl::RetryCollectTabletInfo(std::string addr, - std::vector* tablet_list, - sem_t* finish_counter, Mutex* mutex) { - QueryClosure done = - std::bind(&MasterImpl::CollectTabletInfoCallback, this, addr, - tablet_list, finish_counter, mutex, _1, _2, _3, _4); - QueryTabletNodeAsync(addr, FLAGS_tera_master_collect_info_timeout, false, done); -} - - -void MasterImpl::AddMetaCallback(std::vector tablets, - CreateTableResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ) { - if (!succ) { - LOG(INFO) << "create table " << tablets[0]->GetTableName() << " failed"; - rpc_response->set_status(kMetaTabletError); - rpc_done->Run(); - return; + int32_t fail_count = node->IncQueryFailCount(); + if (fail_count >= FLAGS_tera_master_collect_info_retry_times) { + LOG(ERROR) << kSms << "fail to query " << addr << " for " << fail_count << " times"; + TryKickTabletNode(addr); + } else { + ThreadPool::Task task = std::bind(&MasterImpl::RetryCollectTabletInfo, this, addr, + tablet_list, finish_counter, mutex); + thread_pool_->DelayTask(FLAGS_tera_master_collect_info_retry_period, task); + return; } + } + sem_post(finish_counter); +} - rpc_response->set_status(kMasterOk); - rpc_done->Run(); - LOG(INFO) << "create table " << tablets[0]->GetTableName() << " success"; - for (size_t i = 0; i < tablets.size(); i++) { - TryLoadTablet(tablets[i]); - } +void MasterImpl::RetryCollectTabletInfo(std::string addr, std::vector *tablet_list, + sem_t *finish_counter, Mutex *mutex) { + QueryClosure done = std::bind(&MasterImpl::CollectTabletInfoCallback, this, addr, tablet_list, + finish_counter, mutex, _1, _2, _3, _4); + QueryTabletNodeAsync(addr, FLAGS_tera_master_collect_info_timeout, false, done); } void MasterImpl::ScheduleTabletNodeGc() { - mutex_.AssertHeld(); - LOG(INFO) << "[gc] ScheduleTabletNodeGcTimer"; - ThreadPool::Task task = - std::bind(&MasterImpl::DoTabletNodeGc, this); - gc_timer_id_ = thread_pool_->DelayTask( - FLAGS_tera_master_gc_period, task); + mutex_.AssertHeld(); + LOG(INFO) << "[gc] ScheduleTabletNodeGcTimer"; + ThreadPool::Task task = std::bind(&MasterImpl::DoTabletNodeGc, this); + gc_timer_id_ = thread_pool_->DelayTask(FLAGS_tera_master_gc_period, task); } void MasterImpl::EnableTabletNodeGcTimer() { - MutexLock lock(&mutex_); - if (gc_timer_id_ == kInvalidTimerId) { - ScheduleTabletNodeGc(); - } - gc_enabled_ = true; + MutexLock lock(&mutex_); + if (gc_timer_id_ == kInvalidTimerId) { + ScheduleTabletNodeGc(); + } + gc_enabled_ = true; } void MasterImpl::DoGcTrashClean() { - { - MutexLock lock(&mutex_); - if (!gc_trash_clean_enabled_) { - gc_trash_clean_timer_id_ = kInvalidTimerId; - return; - } + { + MutexLock lock(&mutex_); + if (!gc_trash_clean_enabled_) { + gc_trash_clean_timer_id_ = kInvalidTimerId; + return; } + } - int64_t start_ts = get_micros(); - io::CleanTrackableGcTrash(); - LOG(INFO) << "[gc] clean trackable gc trash, cost: " - << (get_micros() - start_ts) / 1000 << " ms"; + int64_t start_ts = get_micros(); + io::CleanTrackableGcTrash(); + LOG(INFO) << "[gc] clean trackable gc trash, cost: " << (get_micros() - start_ts) / 1000 << " ms"; - MutexLock lock(&mutex_); - ScheduleGcTrashClean(); + MutexLock lock(&mutex_); + ScheduleGcTrashClean(); } void MasterImpl::ScheduleGcTrashClean() { - mutex_.AssertHeld(); - VLOG(10) << "[gc] ScheduleGcTrashClean"; - ThreadPool::Task task = - std::bind(&MasterImpl::DoGcTrashClean, this); - gc_timer_id_ = thread_pool_->DelayTask( - FLAGS_tera_master_gc_trash_clean_period_s * 1000, task); + mutex_.AssertHeld(); + VLOG(10) << "[gc] ScheduleGcTrashClean"; + ThreadPool::Task task = std::bind(&MasterImpl::DoGcTrashClean, this); + gc_trash_clean_timer_id_ = + thread_pool_->DelayTask(FLAGS_tera_master_gc_trash_clean_period_s * 1000, task); } void MasterImpl::EnableGcTrashCleanTimer() { - if (!FLAGS_tera_master_gc_trash_enabled) { - return; - } + if (!FLAGS_tera_master_gc_trash_enabled) { + return; + } - MutexLock lock(&mutex_); - if (gc_trash_clean_timer_id_ == kInvalidTimerId) { - ScheduleGcTrashClean(); - } - gc_trash_clean_enabled_ = true; + MutexLock lock(&mutex_); + if (gc_trash_clean_timer_id_ == kInvalidTimerId) { + ScheduleGcTrashClean(); + } + gc_trash_clean_enabled_ = true; } void MasterImpl::DisableGcTrashCleanTimer() { - if (!FLAGS_tera_master_gc_trash_enabled) { - return; - } + if (!FLAGS_tera_master_gc_trash_enabled) { + return; + } - MutexLock lock(&mutex_); - if (gc_trash_clean_timer_id_ != kInvalidTimerId) { - bool non_block = true; - if (thread_pool_->CancelTask(gc_timer_id_, non_block)) { - gc_trash_clean_timer_id_ = kInvalidTimerId; - } + MutexLock lock(&mutex_); + if (gc_trash_clean_timer_id_ != kInvalidTimerId) { + bool non_block = true; + if (thread_pool_->CancelTask(gc_trash_clean_timer_id_, non_block)) { + gc_trash_clean_timer_id_ = kInvalidTimerId; } - gc_trash_clean_enabled_ = false; + } + gc_trash_clean_enabled_ = false; +} + +void MasterImpl::DoDelayAddNode() { + int64_t start_ts = get_micros(); + + std::unordered_map nodes; + abnormal_node_mgr_->ConsumeRecoveredNodes(&nodes); + for (const auto &node : nodes) { + AddTabletNode(node.first, node.second); + } + + VLOG(30) << "delay add node cost: " << (get_micros() - start_ts) / 1000 << " ms"; +} + +void MasterImpl::ScheduleDelayAddNode() { + VLOG(30) << "DelayAddNode will be scheduled in: " << FLAGS_delay_add_node_schedule_period_s + << "s"; + int schedule_period = FLAGS_delay_add_node_schedule_period_s * 1000; + thread_pool_->DelayTask(schedule_period, [this](int64_t) { + DoDelayAddNode(); + ScheduleDelayAddNode(); + }); } void MasterImpl::DoAvailableCheck() { - MutexLock lock(&mutex_); - if (FLAGS_tera_master_availability_check_enabled) { - tablet_availability_->LogAvailability(); - } - ScheduleAvailableCheck(); + MutexLock lock(&mutex_); + if (FLAGS_tera_master_availability_check_enabled) { + tablet_availability_->LogAvailability(); + } + ScheduleAvailableCheck(); } void MasterImpl::ScheduleAvailableCheck() { - mutex_.AssertHeld(); - ThreadPool::Task task = - std::bind(&MasterImpl::DoAvailableCheck, this); - thread_pool_->DelayTask( - FLAGS_tera_master_availability_check_period * 1000, task); + mutex_.AssertHeld(); + ThreadPool::Task task = std::bind(&MasterImpl::DoAvailableCheck, this); + thread_pool_->DelayTask(FLAGS_tera_master_availability_check_period * 1000, task); } void MasterImpl::EnableAvailabilityCheck() { - MutexLock lock(&mutex_); - ScheduleAvailableCheck(); + MutexLock lock(&mutex_); + ScheduleAvailableCheck(); } void MasterImpl::DisableTabletNodeGcTimer() { - MutexLock lock(&mutex_); - if (gc_timer_id_ != kInvalidTimerId) { - bool non_block = true; - if (thread_pool_->CancelTask(gc_timer_id_, non_block)) { - gc_timer_id_ = kInvalidTimerId; - } + MutexLock lock(&mutex_); + if (gc_timer_id_ != kInvalidTimerId) { + bool non_block = true; + if (thread_pool_->CancelTask(gc_timer_id_, non_block)) { + gc_timer_id_ = kInvalidTimerId; } - gc_enabled_ = false; + } + gc_enabled_ = false; } void MasterImpl::DoTabletNodeGc() { - { - MutexLock lock(&mutex_); - if (!gc_enabled_) { - gc_timer_id_ = kInvalidTimerId; - return; - } + { + MutexLock lock(&mutex_); + if (!gc_enabled_) { + gc_timer_id_ = kInvalidTimerId; + return; } + } - bool need_gc = true; - if (FLAGS_tera_master_gc_strategy == "trackable") { - std::vector table_list; - tablet_manager_->ShowTable(&table_list, NULL); - for (uint32_t i = 0; i < table_list.size(); ++i) { - table_list[i]->TryCollectInheritedFile(); - } - } else { - need_gc = gc_strategy_->PreQuery(); - } + std::vector table_list; + tablet_manager_->ShowTable(&table_list, NULL); + for (uint32_t i = 0; i < table_list.size(); ++i) { + table_list[i]->TryCollectInheritedFile(); + } - MutexLock lock(&mutex_); - if (!need_gc) { - if (gc_enabled_) { - ScheduleTabletNodeGc(); - } else { - gc_timer_id_ = kInvalidTimerId; - } - return; - } - gc_query_enable_ = true; + MutexLock lock(&mutex_); + gc_query_enable_ = true; } void MasterImpl::DoTabletNodeGcPhase2() { - if (FLAGS_tera_master_gc_strategy == "trackable") { - std::vector table_list; - tablet_manager_->ShowTable(&table_list, NULL); - for (uint32_t i = 0; i < table_list.size(); ++i) { - table_list[i]->CleanObsoleteFile(); - } - } else { - gc_strategy_->PostQuery(); - } - - LOG(INFO) << "[gc] try clean trash dir."; - int64_t start = get_micros(); - io::CleanTrashDir(); - int64_t cost = (get_micros() - start) / 1000; - LOG(INFO) << "[gc] clean trash dir done, cost: " << cost << "ms."; - - MutexLock lock(&mutex_); - if (gc_enabled_) { - ScheduleTabletNodeGc(); - } else { - gc_timer_id_ = kInvalidTimerId; - } + std::vector table_list; + tablet_manager_->ShowTable(&table_list, NULL); + for (uint32_t i = 0; i < table_list.size(); ++i) { + table_list[i]->CleanObsoleteFile(); + } + + LOG(INFO) << "[gc] try clean trash dir."; + int64_t start = get_micros(); + io::CleanTrashDir(); + int64_t cost = (get_micros() - start) / 1000; + LOG(INFO) << "[gc] clean trash dir done, cost: " << cost << "ms."; + + MutexLock lock(&mutex_); + if (gc_enabled_) { + ScheduleTabletNodeGc(); + } else { + gc_timer_id_ = kInvalidTimerId; + } } void MasterImpl::RefreshTableCounter() { - int64_t start = get_micros(); - std::vector table_list; - tablet_manager_->ShowTable(&table_list, NULL); - for (uint32_t i = 0; i < table_list.size(); ++i) { - table_list[i]->RefreshCounter(); - } + int64_t start = get_micros(); + std::vector table_list; + tablet_manager_->ShowTable(&table_list, NULL); + for (uint32_t i = 0; i < table_list.size(); ++i) { + table_list[i]->RefreshCounter(); + } - // Set refresh interval as query-interval / 2, because each table counter - // changed after query callback reached. - ThreadPool::Task task = std::bind(&MasterImpl::RefreshTableCounter, this); - thread_pool_->DelayTask(FLAGS_tera_master_query_tabletnode_period / 2, task); - LOG(INFO) << "RefreshTableCounter, cost: " - << ((get_micros() - start) / 1000) << "ms."; + // Set refresh interval as query-interval / 2, because each table counter + // changed after query callback reached. + ThreadPool::Task task = std::bind(&MasterImpl::RefreshTableCounter, this); + thread_pool_->DelayTask(FLAGS_tera_master_query_tabletnode_period / 2, task); + LOG(INFO) << "RefreshTableCounter, cost: " << ((get_micros() - start) / 1000) << "ms."; } std::string MasterImpl::ProfilingLog() { - return "[main : " + thread_pool_->ProfilingLog() + "] [query : " - + query_thread_pool_->ProfilingLog() + "]"; + return "[main : " + thread_pool_->ProfilingLog() + "] [query : " + + query_thread_pool_->ProfilingLog() + "]"; } -bool TryLoadTablet(TabletPtr tablet, TabletNodePtr node) { - if (!tablet->LockTransition()) { - LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this load try"; - return false; - } - std::shared_ptr load(new LoadTabletProcedure(tablet, node, MasterEnv().GetThreadPool().get())); - if (MasterEnv().GetExecutor()->AddProcedure(load) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << load->ProcId(); - tablet->UnlockTransition(); - return false; - } - return true; +bool MasterImpl::TryLoadTablet(TabletPtr tablet, TabletNodePtr node) { + if (!tablet->LockTransition()) { + LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this load try"; + return false; + } + std::shared_ptr load( + new LoadTabletProcedure(tablet, node, MasterEnv().GetThreadPool().get())); + if (MasterEnv().GetExecutor()->AddProcedure(load) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << load->ProcId(); + tablet->UnlockTransition(); + return false; + } + return true; } -bool TryUnloadTablet(TabletPtr tablet) { - if (!tablet->LockTransition()) { - LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this unload try"; - return false; - } - std::shared_ptr unload(new UnloadTabletProcedure(tablet, MasterEnv().GetThreadPool().get(), false)); - if (MasterEnv().GetExecutor()->AddProcedure(unload) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << unload->ProcId(); - tablet->UnlockTransition(); - return false; - } - return true; +bool MasterImpl::TryUnloadTablet(TabletPtr tablet) { + if (!tablet->LockTransition()) { + LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this unload try"; + return false; + } + std::shared_ptr unload( + new UnloadTabletProcedure(tablet, MasterEnv().GetThreadPool().get(), false)); + if (MasterEnv().GetExecutor()->AddProcedure(unload) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << unload->ProcId(); + tablet->UnlockTransition(); + return false; + } + return true; } -bool TryMoveTablet(TabletPtr tablet, TabletNodePtr node) { - if (!tablet->LockTransition()) { - LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this move try"; - return false; - } - std::shared_ptr move(new MoveTabletProcedure(tablet, node, MasterEnv().GetThreadPool().get())); - if (MasterEnv().GetExecutor()->AddProcedure(move) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << move->ProcId(); - tablet->UnlockTransition(); - return false; - } - return true; +bool MasterImpl::TryMoveTablet(TabletPtr tablet, TabletNodePtr node) { + if (!tablet->LockTransition()) { + LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this move try"; + return false; + } + std::shared_ptr move( + new MoveTabletProcedure(tablet, node, MasterEnv().GetThreadPool().get())); + if (MasterEnv().GetExecutor()->AddProcedure(move) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << move->ProcId(); + tablet->UnlockTransition(); + return false; + } + return true; } -bool TryMergeTablet(TabletPtr tablet) { - TabletPtr peer; - if (!MasterEnv().GetTabletManager()->PickMergeTablet(tablet, &peer)) { - LOG(WARNING) << "merge abort, cannot get proper merge peer, tablet: " << tablet; - return false; - } - if (!tablet->LockTransition()) { - LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this merge try"; - return false; - } - if (!peer->LockTransition()) { - tablet->UnlockTransition(); - LOG(WARNING) << "merge peer is in transition, give up this merge try: " << peer; - return false; - } - std::shared_ptr merge(new MergeTabletProcedure(tablet, peer, MasterEnv().GetThreadPool().get())); - if(MasterEnv().GetExecutor()->AddProcedure(merge) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << merge->ProcId(); - tablet->UnlockTransition(); - peer->UnlockTransition(); - return false; - } - return true; +bool MasterImpl::TryMergeTablet(TabletPtr tablet) { + TabletPtr peer; + if (!MasterEnv().GetTabletManager()->PickMergeTablet(tablet, &peer)) { + VLOG(13) << "merge abort, cannot get proper merge peer, tablet: " << tablet; + return false; + } + if (!tablet->LockTransition()) { + VLOG(13) << "tablet: " << tablet->GetPath() << "is in transition, giveup this merge try"; + return false; + } + if (!peer->LockTransition()) { + tablet->UnlockTransition(); + VLOG(13) << "merge peer is in transition, give up this merge try: " << peer; + return false; + } + std::shared_ptr merge( + new MergeTabletProcedure(tablet, peer, MasterEnv().GetThreadPool().get())); + if (MasterEnv().GetExecutor()->AddProcedure(merge) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << merge->ProcId(); + tablet->UnlockTransition(); + peer->UnlockTransition(); + return false; + } + return true; } -bool TrySplitTablet(TabletPtr tablet, std::string split_key) { - if (!tablet->LockTransition()) { - LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this split try"; - return false; - } - std::shared_ptr split(new SplitTabletProcedure(tablet, split_key, MasterEnv().GetThreadPool().get())); - if(MasterEnv().GetExecutor()->AddProcedure(split) == 0) { - LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << split->ProcId(); - tablet->UnlockTransition(); - return false; - } - return true; +bool MasterImpl::TrySplitTablet(TabletPtr tablet, std::string split_key) { + if (!tablet->LockTransition()) { + LOG(WARNING) << "tablet: " << tablet->GetPath() << "is in transition, giveup this split try"; + return false; + } + std::shared_ptr split( + new SplitTabletProcedure(tablet, split_key, MasterEnv().GetThreadPool().get())); + if (MasterEnv().GetExecutor()->AddProcedure(split) == 0) { + LOG(WARNING) << "add to procedure_executor fail, may duplicated procid: " << split->ProcId(); + tablet->UnlockTransition(); + return false; + } + return true; +} + +void MasterImpl::DfsHardLimitCmdCtrl(const CmdCtrlRequest *request, CmdCtrlResponse *response) { + if (request->arg_list_size() < 1 || request->arg_list_size() > 2) { + response->set_status(kInvalidArgument); + return; + } + + std::string str_result; + if (!quota_entry_) { + response->set_status(kMasterOk); + str_result.append("Quota entry is not inited"); + response->set_str_result(std::move(str_result)); + return; + } + + if (request->arg_list_size() == 1 && request->arg_list(0) == "get") { + response->set_status(kMasterOk); + int64_t tmp_val; + str_result.append("Dfs write hard limit: "); + tmp_val = quota_entry_->GetDfsWriteThroughputHardLimit(); + str_result.append(tmp_val > 0 ? std::to_string(tmp_val) : "No Limit"); + str_result.append(".\n"); + str_result.append("Dfs read hard limit: "); + tmp_val = quota_entry_->GetDfsReadThroughputHardLimit(); + str_result.append(tmp_val > 0 ? std::to_string(tmp_val) : "No Limit"); + str_result.append("."); + response->set_str_result(std::move(str_result)); + return; + } + + auto &op = request->arg_list(0); + auto &limit = request->arg_list(1); + int64_t numeric_limit; + + if (op != "write" && op != "read") { + response->set_status(kInvalidArgument); + return; + } + + try { + numeric_limit = std::stol(limit); + } catch (...) { + response->set_status(kInvalidArgument); + return; + } + + if (op == "write") { + quota_entry_->SetDfsWriteThroughputHardLimit(numeric_limit); + } else { + quota_entry_->SetDfsReadThroughputHardLimit(numeric_limit); + } + + str_result.assign("Set dfs " + op + " hard limit to: " + std::to_string(numeric_limit) + + " success."); + response->set_status(kMasterOk); + response->set_str_result(std::move(str_result)); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/master_impl.h b/src/master/master_impl.h index d1eb32f6c..c39c7a7c4 100644 --- a/src/master/master_impl.h +++ b/src/master/master_impl.h @@ -16,8 +16,9 @@ #include "common/thread_pool.h" #include "gflags/gflags.h" -#include "master/gc_strategy.h" +#include "master/abnormal_node_mgr.h" #include "master/availability.h" +#include "master/master_state_machine.h" #include "master/tablet_manager.h" #include "master/tabletnode_manager.h" #include "master/user_manager.h" @@ -27,11 +28,17 @@ #include "sdk/stat_table.h" #include "sdk/table_impl.h" +#include "access/access_entry.h" +#include "access/access_builder.h" + +#include "quota/master_quota_entry.h" + #include "tablet_state_machine.h" #include "procedure_executor.h" DECLARE_int32(tera_master_impl_retry_times); DECLARE_bool(tera_acl_enabled); + namespace tera { class LoadTabletRequest; @@ -56,328 +63,297 @@ class MetaTable; class Scheduler; class TabletManager; class TabletNodeManager; +class TeraMasterEnv; class MasterImpl { -public: - enum MasterStatus { - kNotInited = kMasterNotInited, - kIsBusy = kMasterIsBusy, - kIsSecondary = kMasterIsSecondary, - kIsReadonly = kMasterIsReadonly, - kIsRunning = kMasterIsRunning, - kOnRestore = kMasterOnRestore, - kOnWait = kMasterOnWait - }; - - MasterImpl(); - virtual ~MasterImpl(); - - bool Init(); - - bool Restore(const std::map& tabletnode_list); - - void CreateTable(const CreateTableRequest* request, - CreateTableResponse* response, - google::protobuf::Closure* done); - - void DeleteTable(const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* done); - - void DisableTable(const DisableTableRequest* request, - DisableTableResponse* response, - google::protobuf::Closure* done); - - void EnableTable(const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* done); - - void UpdateTable(const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* done); - - void UpdateCheck(const UpdateCheckRequest* request, - UpdateCheckResponse* response, - google::protobuf::Closure* done); - - void SearchTable(const SearchTableRequest* request, - SearchTableResponse* response, - google::protobuf::Closure* done); - - void ShowTables(const ShowTablesRequest* request, - ShowTablesResponse* response, + friend class TeraMasterEnv; + + public: + MasterImpl(const std::shared_ptr& access_entry, + const std::shared_ptr& quota_entry); + virtual ~MasterImpl(); + + bool Init(); + + bool Restore(const std::map& tabletnode_list); + + void CreateTable(const CreateTableRequest* request, CreateTableResponse* response, + google::protobuf::Closure* done); + + void DeleteTable(const DeleteTableRequest* request, DeleteTableResponse* response, + google::protobuf::Closure* done); + + void DisableTable(const DisableTableRequest* request, DisableTableResponse* response, google::protobuf::Closure* done); - void ShowTablesBrief(const ShowTablesRequest* request, - ShowTablesResponse* response, - google::protobuf::Closure* done); + void EnableTable(const EnableTableRequest* request, EnableTableResponse* response, + google::protobuf::Closure* done); - void ShowTabletNodes(const ShowTabletNodesRequest* request, - ShowTabletNodesResponse* response, - google::protobuf::Closure* done); + void UpdateTable(const UpdateTableRequest* request, UpdateTableResponse* response, + google::protobuf::Closure* done); - void CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - void OperateUser(const OperateUserRequest* request, - OperateUserResponse* response, - google::protobuf::Closure* done); + void UpdateCheck(const UpdateCheckRequest* request, UpdateCheckResponse* response, + google::protobuf::Closure* done); - void RefreshTabletNodeList(const std::map& ts_node_list); + void SearchTable(const SearchTableRequest* request, SearchTableResponse* response, + google::protobuf::Closure* done); - bool SetMasterStatus(const MasterStatus& new_status, - MasterStatus* old_status = NULL); - MasterStatus GetMasterStatus(); + void ShowTables(const ShowTablesRequest* request, ShowTablesResponse* response, + google::protobuf::Closure* done); - void EnableQueryTabletNodeTimer(); - void DisableQueryTabletNodeTimer(); + void ShowTablesBrief(const ShowTablesRequest* request, ShowTablesResponse* response, + google::protobuf::Closure* done); - bool GetMetaTabletAddr(std::string* addr); + void ShowTabletNodes(const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response, + google::protobuf::Closure* done); - void TryKickTabletNode(const std::string& tabletnode_addr); + void CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void OperateUser(const OperateUserRequest* request, OperateUserResponse* response, + google::protobuf::Closure* done); - std::string ProfilingLog(); + void RefreshTabletNodeList(const std::map& ts_node_list); - bool IsRootUser(const std::string& token); + bool DoStateTransition(const MasterEvent, MasterStatus* old_status); - template - bool HasPermission(const Request* request, TablePtr table, const char* operate) { - if (!FLAGS_tera_acl_enabled || - IsRootUser(request->user_token()) || - ((table->GetSchema().admin_group() == "") && (table->GetSchema().admin() == "")) || - (request->has_user_token() && CheckUserPermissionOnTable(request->user_token(), table))) { - return true; - } else { - std::string token = request->has_user_token() ? request->user_token() : ""; - LOG(WARNING) << "[acl]" << user_manager_->TokenToUserName(token) - << ":" << token << "fail to " << operate; - return false; - } - } + bool DoStateTransition(const MasterEvent event); + + bool IsInSafeMode(); + + MasterStatus GetMasterStatus(); + std::shared_ptr GetAccessEntry() { return access_entry_; } + + void EnableQueryTabletNodeTimer(); + void DisableQueryTabletNodeTimer(); + + bool GetMetaTabletAddr(std::string* addr); + + bool TryKickTabletNode(TabletNodePtr node); -private: - typedef std::function QueryClosure; - typedef std::function UpdateClosure; - - void SafeModeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - void ReloadConfig(CmdCtrlResponse* response); - void KickTabletNodeCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - void TabletCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - void TableCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - void MetaCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); - - bool LoadTabletSync(const TabletMeta& meta, - const TableSchema& schema, + bool TryKickTabletNode(const std::string& tabletnode_addr); + + std::string ProfilingLog(); + + bool IsRootUser(const std::string& token); + + template + bool HasPermission(const Request* request, TablePtr table, const char* operate) { + if (!FLAGS_tera_acl_enabled || IsRootUser(request->user_token()) || + ((table->GetSchema().admin_group() == "") && (table->GetSchema().admin() == "")) || + (request->has_user_token() && CheckUserPermissionOnTable(request->user_token(), table))) { + return true; + } else { + std::string token = request->has_user_token() ? request->user_token() : ""; + LOG(WARNING) << "[acl]" << user_manager_->TokenToUserName(token) << ":" << token << "fail to " + << operate; + return false; + } + } + + private: + typedef std::function QueryClosure; + typedef std::function UpdateClosure; + + void SafeModeCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void ReloadConfig(CmdCtrlResponse* response); + void KickTabletNodeCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void TabletCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void TableCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void MetaCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void DfsHardLimitCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void ProcedureLimitCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + + bool LoadTabletSync(const TabletMeta& meta, const TableSchema& schema, StatusCode* status); + bool UnloadTabletSync(const std::string& table_name, const std::string& key_start, + const std::string& key_end, const std::string& server_addr, StatusCode* status); - bool UnloadTabletSync(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - const std::string& server_addr, StatusCode* status); - - - void TryReleaseCache(bool enbaled_debug = false); - void ReleaseCacheWrapper(); - void EnableReleaseCacheTimer(); - void DisableReleaseCacheTimer(); - void EnableLoadBalance(); - void DisableLoadBalance(); - - void InitAsync(); - - bool CreateAndLoadTable(const std::string& table_name, - bool compress, StoreMedium store, StatusCode* status); - - bool RemoveTablet(const TabletMeta& meta, StatusCode* status); - - void ScheduleLoadBalance(); - void LoadBalance(); - uint32_t LoadBalance(Scheduler* scheduler, - uint32_t max_move_num, uint32_t max_round_num, - std::vector& tabletnode_list, - std::vector& tablet_list, - const std::string& table_name = ""); - bool TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler* scheduler, - const std::vector& tablet_list, - const std::string& table_name = ""); - - void ScheduleQueryTabletNode(); - void QueryTabletNode(); - void QueryTabletNodeAsync(std::string addr, int32_t timeout, - bool is_gc, QueryClosure done); - - void QueryTabletNodeCallback(std::string addr, QueryRequest* request, - QueryResponse* response, bool failed, - int error_code); - void CollectTabletInfoCallback(std::string addr, - std::vector* tablet_list, - sem_t* finish_counter, Mutex* mutex, - QueryRequest* request, - QueryResponse* response, bool failed, - int error_code); - void RetryCollectTabletInfo(std::string addr, - std::vector* tablet_list, - sem_t* finish_counter, Mutex* mutex); - - void AddMetaCallback(std::vector tablets, - CreateTableResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ); - void AddUserInfoToMetaCallback(UserPtr user_ptr, - const OperateUserRequest* rpc_request, - OperateUserResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ); - - void UpdateTableRecordForEnableCallback(TablePtr table, - EnableTableResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ); - - void UpdateTableRecordForUpdateCallback(TablePtr table, - UpdateTableResponse* rpc_response, - google::protobuf::Closure* rpc_done, - bool succ); - - void DisableAllTablets(TablePtr table); - - void UpdateSchemaCallback(std::string table_name, - std::string tablet_path, - std::string start_key, - std::string end_key, - int32_t retry_times, - UpdateRequest* request, - UpdateResponse* response, - bool rpc_failed, int status_code); - void NoticeTabletNodeSchemaUpdatedAsync(TabletPtr tablet, - UpdateClosure done); - void NoticeTabletNodeSchemaUpdated(TablePtr table); - void NoticeTabletNodeSchemaUpdated(TabletPtr tablet); - - // load metabale to master memory - bool LoadMetaTable(const std::string& meta_tablet_addr, - StatusCode* ret_status); - bool LoadMetaTableFromFile(const std::string& filename, - StatusCode* ret_status = NULL); - bool ReadFromStream(std::ifstream& ifs, - std::string* key, - std::string* value); - - // load metatable on a tabletserver - bool LoadMetaTablet(std::string* server_addr); - void UnloadMetaTablet(const std::string& server_addr); - void RestartTabletNode(const std::string& addr, const std::string& uuid); - - void AddTabletNode(const std::string& tabletnode_addr, - const std::string& tabletnode_id); - void DeleteTabletNode(const std::string& tabletnode_addr, const std::string& uuid); - void KickTabletNode(TabletNodePtr node); - void TryEnterSafeMode(); - void TryLeaveSafeMode(); - bool EnterSafeMode(StatusCode* status = NULL); - bool LeaveSafeMode(StatusCode* status = NULL); - void TryMovePendingTablet(TabletPtr tablet); - double LiveNodeTabletRatio(); - void LoadAllDeadNodeTablets(); - - void CollectAllTabletInfo(const std::map& tabletnode_list, - std::vector* tablet_list); - bool RestoreMetaTablet(const std::vector& tablet_list); - - void RestoreUserTablet(const std::vector& report_tablet_list); - - - bool CheckStatusSwitch(MasterStatus old_status, MasterStatus new_status); - - // garbage clean - void EnableGcTrashCleanTimer(); - void DisableGcTrashCleanTimer(); - void ScheduleGcTrashClean(); - void DoGcTrashClean(); - void EnableTabletNodeGcTimer(); - void DisableTabletNodeGcTimer(); - void ScheduleTabletNodeGc(); - void DoTabletNodeGc(); - void DoTabletNodeGcPhase2(); - - bool CheckUserPermissionOnTable(const std::string& token, TablePtr table); - - - void RefreshTableCounter(); - - void DoAvailableCheck(); - void ScheduleAvailableCheck(); - void EnableAvailabilityCheck(); - void DeleteTablet(TabletPtr tablet); - void CopyTableMetaToUser(TablePtr table, TableMeta* meta_ptr); - //bool IsUpdateCf(TablePtr table); - -private: - mutable Mutex status_mutex_; - MasterStatus status_; - std::string local_addr_; - - std::shared_ptr thread_pool_; - - mutable Mutex tabletnode_mutex_; - bool restored_; - std::shared_ptr tablet_manager_; - std::shared_ptr tabletnode_manager_; - std::shared_ptr user_manager_; - std::shared_ptr zk_adapter_; - std::shared_ptr size_scheduler_; - std::shared_ptr load_scheduler_; - - Mutex mutex_; - int64_t release_cache_timer_id_; - Counter this_sequence_id_; - - bool query_enabled_; - scoped_ptr query_thread_pool_; - int64_t start_query_time_; - int64_t query_tabletnode_timer_id_; - Counter query_pending_count_; - - bool load_balance_scheduled_; - bool load_balance_enabled_; - - mutable Mutex tabletnode_timer_mutex_; - std::map tabletnode_timer_id_map_; - - mutable Mutex tablet_mutex_; - - MetaTabletPtr meta_tablet_; - - // stat table - std::shared_ptr stat_table_; - - // tabletnode garbage clean - bool gc_trash_clean_enabled_; - int64_t gc_trash_clean_timer_id_; - bool gc_enabled_; - int64_t gc_timer_id_; - bool gc_query_enable_; - std::shared_ptr gc_strategy_; - - - std::shared_ptr executor_; - std::shared_ptr tablet_availability_; -}; -bool TryLoadTablet(TabletPtr tablet, TabletNodePtr node = TabletNodePtr(nullptr)); + void TryReleaseCache(bool enbaled_debug = false); + void ReleaseCacheWrapper(); + void EnableReleaseCacheTimer(); + void DisableReleaseCacheTimer(); + void EnableLoadBalance(); + void DisableLoadBalance(); + + void InitAsync(); + + bool CreateAndLoadTable(const std::string& table_name, bool compress, StoreMedium store, + StatusCode* status); + + bool RemoveTablet(const TabletMeta& meta, StatusCode* status); + + void ScheduleLoadBalance(); + void LoadBalance(); + uint32_t LoadBalance(Scheduler* scheduler, uint32_t max_move_num, uint32_t max_round_num, + std::vector& tabletnode_list, + std::vector& tablet_list, const std::string& table_name = ""); + bool TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler* scheduler, + const std::vector& tablet_list, + const std::string& table_name = ""); + + void ScheduleQueryTabletNode(); + void QueryTabletNode(); + void QueryTabletNodeAsync(std::string addr, int32_t timeout, bool is_gc, QueryClosure done); + + void QueryTabletNodeCallback(std::string addr, QueryRequest* req, QueryResponse* res, bool failed, + int error_code); + void CollectTabletInfoCallback(std::string addr, std::vector* tablet_list, + sem_t* finish_counter, Mutex* mutex, QueryRequest* request, + QueryResponse* response, bool failed, int error_code); + void RetryCollectTabletInfo(std::string addr, std::vector* tablet_list, + sem_t* finish_counter, Mutex* mutex); + + void AddUserInfoToMetaCallback(UserPtr user_ptr, const OperateUserRequest* rpc_request, + OperateUserResponse* rpc_response, + google::protobuf::Closure* rpc_done, bool succ); + + void UpdateTableRecordForEnableCallback(TablePtr table, EnableTableResponse* rpc_response, + google::protobuf::Closure* rpc_done, bool succ); + + void UpdateTableRecordForUpdateCallback(TablePtr table, UpdateTableResponse* rpc_response, + google::protobuf::Closure* rpc_done, bool succ); + + void DisableAllTablets(TablePtr table); + + void UpdateSchemaCallback(std::string table_name, std::string tablet_path, std::string start_key, + std::string end_key, int32_t retry_times, UpdateRequest* request, + UpdateResponse* response, bool rpc_failed, int status_code); + void NoticeTabletNodeSchemaUpdatedAsync(TabletPtr tablet, UpdateClosure done); + void NoticeTabletNodeSchemaUpdated(TablePtr table); + void NoticeTabletNodeSchemaUpdated(TabletPtr tablet); + + // load metabale to master memory + bool LoadMetaTable(const std::string& meta_tablet_addr, StatusCode* ret_status); + bool LoadMetaTableFromFile(const std::string& filename, StatusCode* ret_status = NULL); + bool ReadFromStream(std::ifstream& ifs, std::string* key, std::string* value); + + // load metatable on a tabletserver + bool LoadMetaTablet(std::string* server_addr); + void UnloadMetaTablet(const std::string& server_addr); + void RestartTabletNode(const std::string& addr, const std::string& uuid); -bool TryUnloadTablet(TabletPtr tablet); + void AddTabletNode(const std::string& tabletnode_addr, const std::string& tabletnode_id); + void DeleteTabletNode(const std::string& tabletnode_addr, const std::string& uuid); -bool TryMoveTablet(TabletPtr tablet, TabletNodePtr node = TabletNodePtr(nullptr)); + void MoveTabletOnDeadTabletNode(const std::vector& tablet_list, + TabletNodePtr dead_node); -bool TryMergeTablet(TabletPtr tablet); + bool TryEnterSafeMode(); -bool TrySplitTablet(TabletPtr tablet, std::string split_key = ""); + void TryLeaveSafeMode(); + bool EnterSafeMode(const MasterEvent event, StatusCode* status = NULL); + bool LeaveSafeMode(const MasterEvent event, StatusCode* status = NULL); + + void SetSafeModeTTLTask(int64_t delay_minute); + void CancelSafeModeTTLTask(); + + void TryMovePendingTablet(TabletPtr tablet); + double LiveNodeTabletRatio(); + void LoadAllDeadNodeTablets(); + + void CollectAllTabletInfo(const std::map& tabletnode_list, + std::vector* tablet_list); + bool RestoreMetaTablet(const std::vector& tablet_list); + + void RestoreUserTablet(const std::vector& report_tablet_list); + + // garbage clean + void EnableGcTrashCleanTimer(); + void DisableGcTrashCleanTimer(); + void ScheduleGcTrashClean(); + void DoGcTrashClean(); + void EnableTabletNodeGcTimer(); + void DisableTabletNodeGcTimer(); + void ScheduleTabletNodeGc(); + void DoTabletNodeGc(); + void DoTabletNodeGcPhase2(); + + bool CheckUserPermissionOnTable(const std::string& token, TablePtr table); + + void RefreshTableCounter(); + + void DoAvailableCheck(); + void ScheduleAvailableCheck(); + void EnableAvailabilityCheck(); + void DeleteTablet(TabletPtr tablet); + void CopyTableMetaToUser(TablePtr table, TableMeta* meta_ptr); + + void ScheduleDelayAddNode(); + void DoDelayAddNode(); + + bool TryLoadTablet(TabletPtr tablet, TabletNodePtr node = TabletNodePtr(nullptr)); + + bool TryUnloadTablet(TabletPtr tablet); + + bool TryMoveTablet(TabletPtr tablet, TabletNodePtr node = TabletNodePtr(nullptr)); + + bool TryMergeTablet(TabletPtr tablet); + + bool TrySplitTablet(TabletPtr tablet, std::string split_key = ""); + + private: + mutable Mutex status_mutex_; + MasterStateMachine state_machine_; + + // MasterStatus status_; + std::string local_addr_; + + std::shared_ptr thread_pool_; + + mutable Mutex tabletnode_mutex_; + bool restored_; + std::shared_ptr tablet_manager_; + std::shared_ptr tabletnode_manager_; + std::shared_ptr user_manager_; + std::shared_ptr zk_adapter_; + std::shared_ptr size_scheduler_; + std::shared_ptr load_scheduler_; + + Mutex mutex_; + int64_t release_cache_timer_id_; + Counter this_sequence_id_; + + bool query_enabled_; + scoped_ptr query_thread_pool_; + int64_t start_query_time_; + int64_t query_tabletnode_timer_id_; + Counter query_pending_count_; + Counter update_auth_pending_count_; + Counter update_quota_pending_count_; + + bool load_balance_scheduled_; + bool load_balance_enabled_; + + mutable Mutex tabletnode_timer_mutex_; + std::map tabletnode_timer_id_map_; + + mutable Mutex tablet_mutex_; + + MetaTabletPtr meta_tablet_; + + std::mutex kick_mutex_; + + // stat table + std::shared_ptr stat_table_; + + // tabletnode garbage clean + bool gc_trash_clean_enabled_; + int64_t gc_trash_clean_timer_id_; + bool gc_enabled_; + int64_t gc_timer_id_; + bool gc_query_enable_; + + std::shared_ptr executor_; + std::shared_ptr tablet_availability_; + + std::shared_ptr access_entry_; + std::shared_ptr access_builder_; + std::shared_ptr quota_entry_; + std::unique_ptr abnormal_node_mgr_; + int64_t running_guard_timestamp_ = 0; + int64_t safemode_ttl_taskid_ = -1; +}; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_MASTER_IMPL_H_ +#endif // TERA_MASTER_MASTER_IMPL_H_ diff --git a/src/master/master_state_machine.cc b/src/master/master_state_machine.cc new file mode 100644 index 000000000..305f0e345 --- /dev/null +++ b/src/master/master_state_machine.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master/master_state_machine.h" + +namespace tera { +namespace master { + +static MasterStateMachine::MasterStateTransitionRulesType s_master_state_transition_rules; + +const MasterStateMachine::MasterStateTransitionRulesType MasterStateMachine::state_transitions_( + std::move(s_master_state_transition_rules.AddTransitionRule(kIsSecondary, + MasterEvent::kGetMasterLock, + kOnRestore) + .AddTransitionRule(kOnRestore, MasterEvent::kNoAvailTs, kOnWait) + .AddTransitionRule(kOnRestore, MasterEvent::kMetaRestored, kIsReadonly) + .AddTransitionRule(kOnRestore, MasterEvent::kLostMasterLock, kIsSecondary) + .AddTransitionRule(kOnWait, MasterEvent::kAvailTs, kOnRestore) + .AddTransitionRule(kOnWait, MasterEvent::kLostMasterLock, kIsSecondary) + .AddTransitionRule(kIsReadonly, MasterEvent::kLeaveSafemode, kIsRunning) + .AddTransitionRule(kIsReadonly, MasterEvent::kLostMasterLock, kIsSecondary) + .AddTransitionRule(kIsRunning, MasterEvent::kEnterSafemode, kIsReadonly) + .AddTransitionRule(kIsRunning, MasterEvent::kLostMasterLock, kIsSecondary) + .AddTransitionRule(kIsReadonly, MasterEvent::kLostMasterLock, kIsSecondary))); + +std::ostream& operator<<(std::ostream& o, const MasterEvent event) { + static const char* msg[] = {"MasterEvent::kGetMasterLock", "MasterEvent::kLostMasterLock", + "MasterEvent::kNoAvailTs", "MasterEvent::kAvailTs", + "MasterEvent::kMetaRestored", "MasterEvent::kEnterSafemode", + "MasterEvent::kLeaveSafemode", "MasterEvent::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(event) - static_cast(MasterEvent::kGetMasterLock); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; +} +} +} diff --git a/src/master/master_state_machine.h b/src/master/master_state_machine.h new file mode 100644 index 000000000..5de77584f --- /dev/null +++ b/src/master/master_state_machine.h @@ -0,0 +1,63 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "master/state_machine.h" +#include "proto/status_code.pb.h" + +namespace tera { +namespace master { + +enum MasterStatus { + kIsSecondary = kMasterIsSecondary, + kIsRunning = kMasterIsRunning, + kOnRestore = kMasterOnRestore, + kOnWait = kMasterOnWait, + kIsReadonly = kMasterIsReadonly, +}; + +enum class MasterEvent { + kGetMasterLock, + kLostMasterLock, + kNoAvailTs, + kAvailTs, + kMetaRestored, + kEnterSafemode, + kLeaveSafemode +}; + +std::ostream& operator<<(std::ostream& o, const MasterEvent event); + +class MasterStateMachine { + public: + MasterStateMachine(MasterStatus init_status) : curr_status_(init_status) {} + ~MasterStateMachine() {} + + bool DoStateTransition(const MasterEvent event, MasterStatus* status) { + *status = curr_status_; + return DoStateTransition(event); + } + + bool DoStateTransition(const MasterEvent event) { + MasterStatus post_status; + if (!state_transitions_.DoStateTransition(curr_status_, event, &post_status)) { + return false; + } + curr_status_ = post_status; + return true; + } + + MasterStatus GetState() const { return curr_status_; } + + typedef StateTransitionRules MasterStateTransitionRulesType; + + private: + MasterStatus curr_status_; + const static MasterStateTransitionRulesType state_transitions_; +}; +} +} diff --git a/src/master/master_zk_adapter.cc b/src/master/master_zk_adapter.cc index 1ebbbf0d4..e49c02a56 100644 --- a/src/master/master_zk_adapter.cc +++ b/src/master/master_zk_adapter.cc @@ -22,756 +22,706 @@ DECLARE_int64(tera_master_ins_session_timeout); namespace tera { namespace master { -MasterZkAdapter::MasterZkAdapter(MasterImpl * master_impl, - const std::string& server_addr) - : master_impl_(master_impl), server_addr_(server_addr) { -} +MasterZkAdapter::MasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) + : master_impl_(master_impl), server_addr_(server_addr) {} -MasterZkAdapter::~MasterZkAdapter() { -} +MasterZkAdapter::~MasterZkAdapter() {} bool MasterZkAdapter::Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode) { - MutexLock lock(&mutex_); + std::map* tabletnode_list, bool* safe_mode) { + MutexLock lock(&mutex_); - if (!Setup()) { - return false; - } + if (!Setup()) { + return false; + } - if (!LockMasterLock()) { - Reset(); - return false; - } + if (!LockMasterLock()) { + Reset(); + return false; + } - if (!WatchMasterLock()) { - UnlockMasterLock(); - Reset(); - return false; - } + if (!WatchMasterLock()) { + UnlockMasterLock(); + Reset(); + return false; + } - if (!CreateMasterNode()) { - UnlockMasterLock(); - Reset(); - return false; - } + if (!CreateMasterNode()) { + UnlockMasterLock(); + Reset(); + return false; + } - bool root_tablet_node_exist = false; - if (!WatchRootTabletNode(&root_tablet_node_exist, root_tablet_addr)) { - DeleteMasterNode(); - UnlockMasterLock(); - Reset(); - return false; - } + bool root_tablet_node_exist = false; + if (!WatchRootTabletNode(&root_tablet_node_exist, root_tablet_addr)) { + DeleteMasterNode(); + UnlockMasterLock(); + Reset(); + return false; + } - if (!WatchSafeModeMark(safe_mode)) { - DeleteMasterNode(); - UnlockMasterLock(); - Reset(); - return false; - } + if (!WatchSafeModeMark(safe_mode)) { + DeleteMasterNode(); + UnlockMasterLock(); + Reset(); + return false; + } - if (!WatchTabletNodeList(tabletnode_list)) { - DeleteMasterNode(); - UnlockMasterLock(); - Reset(); - return false; - } + if (!WatchTabletNodeList(tabletnode_list)) { + DeleteMasterNode(); + UnlockMasterLock(); + Reset(); + return false; + } - return true; + return true; } bool MasterZkAdapter::Setup() { - LOG(INFO) << "try init zk..."; - int zk_errno = zk::ZE_OK; - int32_t retry_count = 0; - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_tera_zk_root_path, - FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "init zk success"; - return true; + LOG(INFO) << "try init zk..."; + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + return true; } -void MasterZkAdapter::Reset() { - Finalize(); -} +void MasterZkAdapter::Reset() { Finalize(); } bool MasterZkAdapter::LockMasterLock() { - LOG(INFO) << "try lock master-lock..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!SyncLock(kMasterLockPath, &zk_errno, -1)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to acquire master lock " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry lock master-lock in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "acquire master lock success"; - return true; + LOG(INFO) << "try lock master-lock..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!SyncLock(kMasterLockPath, &zk_errno, -1)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to acquire master lock " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry lock master-lock in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "acquire master lock success"; + return true; } bool MasterZkAdapter::UnlockMasterLock() { - LOG(INFO) << "try release master-lock..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!Unlock(kMasterLockPath, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to release master-lock"; - return false; - } - LOG(ERROR) << "retry unlock master-lock in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "release master-lock success"; - return true; + LOG(INFO) << "try release master-lock..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!Unlock(kMasterLockPath, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to release master-lock"; + return false; + } + LOG(ERROR) << "retry unlock master-lock in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "release master-lock success"; + return true; } bool MasterZkAdapter::CreateMasterNode() { - LOG(INFO) << "try create master node..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CreateEphemeralNode(kMasterNodePath, server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create master node " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry create master node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "create master node success"; - return true; + LOG(INFO) << "try create master node..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CreateEphemeralNode(kMasterNodePath, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create master node " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry create master node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "create master node success"; + return true; } bool MasterZkAdapter::DeleteMasterNode() { - LOG(INFO) << "try delete master node..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!DeleteNode(kMasterNodePath, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to delete master node " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry delete master node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "delete master node success"; - return true; + LOG(INFO) << "try delete master node..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!DeleteNode(kMasterNodePath, &zk_errno) && zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to delete master node " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry delete master node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "delete master node success"; + return true; +} + +bool MasterZkAdapter::KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id) { + MutexLock lock(&mutex_); + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CreatePersistentNode(kKickPath + "/" + ts_zk_id, ts_host, &zk_errno) && + zk_errno != zk::ZE_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to kick ts [" << ts_host << "] " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry kick ts in " << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "kick ts [" << ts_host << "] success"; + return true; } -bool MasterZkAdapter::KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id) { - MutexLock lock(&mutex_); - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CreatePersistentNode(kKickPath + "/" + ts_zk_id, ts_host, &zk_errno) - && zk_errno != zk::ZE_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to kick ts [" << ts_host << "] " - << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry kick ts in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "kick ts [" << ts_host << "] success"; - return true; +bool MasterZkAdapter::MarkSafeMode() { + MutexLock lock(&mutex_); + LOG(INFO) << "try mark safemode..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CreatePersistentNode(kSafeModeNodePath, "safemode", &zk_errno) && + zk_errno != zk::ZE_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to mark safemode " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry mark safemode in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "mark safemode success"; + return true; } -bool MasterZkAdapter::MarkSafeMode() { - MutexLock lock(&mutex_); - LOG(INFO) << "try mark safemode..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CreatePersistentNode(kSafeModeNodePath, "safemode", &zk_errno) - && zk_errno != zk::ZE_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to mark safemode " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry mark safemode in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "mark safemode success"; - return true; +bool MasterZkAdapter::HasSafeModeNode() { + MutexLock lock(&mutex_); + std::string value; + bool is_exist = false; + int error_code; + CHECK(CheckExist(kSafeModeNodePath, &is_exist, &error_code)); + return is_exist; } bool MasterZkAdapter::UnmarkSafeMode() { - MutexLock lock(&mutex_); - LOG(INFO) << "try unmark safemode..."; - int zk_errno = zk::ZE_OK; - int32_t retry_count = 0; - while (!DeleteNode(kSafeModeNodePath, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to unmark safemode " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry unmark safemode in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "unmark safemode success"; - return true; + MutexLock lock(&mutex_); + LOG(INFO) << "try unmark safemode..."; + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + while (!DeleteNode(kSafeModeNodePath, &zk_errno) && zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to unmark safemode " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry unmark safemode in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "unmark safemode success"; + return true; } bool MasterZkAdapter::UpdateRootTabletNode(const std::string& root_tablet_addr) { - MutexLock lock(&mutex_); - LOG(INFO) << "try update root node..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!WriteNode(kRootTabletNodePath, root_tablet_addr, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(INFO) << "fail to update root node " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry update root node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - if (zk_errno == zk::ZE_OK) { - LOG(INFO) << "update root node success"; - return true; - } - - LOG(INFO) << "root node not exist, try create root node..."; - retry_count = 0; + MutexLock lock(&mutex_); + LOG(INFO) << "try update root node..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!WriteNode(kRootTabletNodePath, root_tablet_addr, &zk_errno) && + zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(INFO) << "fail to update root node " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry update root node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); zk_errno = zk::ZE_OK; - while (!CreatePersistentNode(kRootTabletNodePath, root_tablet_addr, - &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create root node " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry create root node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "create root node success"; + } + if (zk_errno == zk::ZE_OK) { + LOG(INFO) << "update root node success"; return true; -} - -bool MasterZkAdapter::WatchRootTabletNode(bool* is_exist, - std::string* root_tablet_addr) { - LOG(INFO) << "try check root node exist..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CheckAndWatchExist(kRootTabletNodePath, is_exist, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to check root node exist " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry check root node exist in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - if (!*is_exist) { - LOG(INFO) << "root node not exist"; - return true; - } - - LOG(INFO) << "root node exist, try read root node..."; - retry_count = 0; + } + + LOG(INFO) << "root node not exist, try create root node..."; + retry_count = 0; + zk_errno = zk::ZE_OK; + while (!CreatePersistentNode(kRootTabletNodePath, root_tablet_addr, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create root node " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry create root node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); zk_errno = zk::ZE_OK; - while (!ReadAndWatchNode(kRootTabletNodePath, root_tablet_addr, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to read root node " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry read root node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - if (zk_errno == zk::ZE_NOT_EXIST) { - *is_exist = false; - LOG(INFO) << "root node not exist"; - return true; - } - LOG(INFO) << "root node value=[" << *root_tablet_addr << "]"; + } + LOG(INFO) << "create root node success"; + return true; +} + +bool MasterZkAdapter::WatchRootTabletNode(bool* is_exist, std::string* root_tablet_addr) { + LOG(INFO) << "try check root node exist..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CheckAndWatchExist(kRootTabletNodePath, is_exist, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to check root node exist " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry check root node exist in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + if (!*is_exist) { + LOG(INFO) << "root node not exist"; return true; + } + + LOG(INFO) << "root node exist, try read root node..."; + retry_count = 0; + zk_errno = zk::ZE_OK; + while (!ReadAndWatchNode(kRootTabletNodePath, root_tablet_addr, &zk_errno) && + zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to read root node " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry read root node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + if (zk_errno == zk::ZE_NOT_EXIST) { + *is_exist = false; + LOG(INFO) << "root node not exist"; + return true; + } + LOG(INFO) << "root node value=[" << *root_tablet_addr << "]"; + return true; } bool MasterZkAdapter::WatchSafeModeMark(bool* is_safemode) { - LOG(INFO) << "try watch safemode mark..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CheckAndWatchExist(kSafeModeNodePath, is_safemode, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to watch safemode mark" << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry watch safe mode mark in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "watch safemode success"; - return true; + LOG(INFO) << "try watch safemode mark..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CheckAndWatchExist(kSafeModeNodePath, is_safemode, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch safemode mark" << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry watch safe mode mark in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "watch safemode success"; + return true; } bool MasterZkAdapter::WatchTabletNodeList(std::map* tabletnode_list) { - LOG(INFO) << "try watch tabletnode list..."; - std::vector name_list; - std::vector data_list; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!ListAndWatchChildren(kTsListPath, &name_list, &data_list, - &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to watch tabletnode list " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry watch tabletnode list in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - size_t list_count = name_list.size(); - for (size_t i = 0; i < list_count; i++) { - const std::string& name = name_list[i]; - const std::string& data = data_list[i]; - int seq_num = zk::ZooKeeperUtil::GetSequenceNo(name); - if (seq_num < 0) { - LOG(ERROR) << "ignore non-sequential node"; - continue; - } - if (data == "") { - LOG(ERROR) << "cannot get value of child : " << name; - continue; - } - // keep larger(newer) sequence id - std::map::iterator it = tabletnode_list->find(data); - if (it != tabletnode_list->end()) { - int prev_seq_num = zk::ZooKeeperUtil::GetSequenceNo(it->second); - if (prev_seq_num > seq_num) { - VLOG(5) << "ignore old node: " << data << " " << name; - continue; - } - } - // TODO: check value - (*tabletnode_list)[data] = name; - } - LOG(INFO) << "watch tabletnode list success"; - return true; + LOG(INFO) << "try watch tabletnode list..."; + std::vector name_list; + std::vector data_list; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!ListAndWatchChildren(kTsListPath, &name_list, &data_list, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch tabletnode list " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry watch tabletnode list in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = name_list[i]; + const std::string& data = data_list[i]; + int seq_num = zk::ZooKeeperUtil::GetSequenceNo(name); + if (seq_num < 0) { + LOG(ERROR) << "ignore non-sequential node"; + continue; + } + if (data == "") { + LOG(ERROR) << "cannot get value of child : " << name; + continue; + } + // keep larger(newer) sequence id + std::map::iterator it = tabletnode_list->find(data); + if (it != tabletnode_list->end()) { + int prev_seq_num = zk::ZooKeeperUtil::GetSequenceNo(it->second); + if (prev_seq_num > seq_num) { + VLOG(5) << "ignore old node: " << data << " " << name; + continue; + } + } + // TODO: check value + (*tabletnode_list)[data] = name; + } + LOG(INFO) << "watch tabletnode list success"; + return true; } bool MasterZkAdapter::WatchMasterLock() { - LOG(INFO) << "watch master lock ..."; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!WatchZkLock(kMasterLockPath, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to watch master lock " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "retry watch master-lock in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "watch master lock success"; - return true; + LOG(INFO) << "watch master lock ..."; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!WatchZkLock(kMasterLockPath, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch master lock " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "retry watch master-lock in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "watch master lock success"; + return true; } -void MasterZkAdapter::OnSafeModeMarkCreated() { - LOG(ERROR) << "safemode mark node is created"; -} +void MasterZkAdapter::OnSafeModeMarkCreated() { LOG(ERROR) << "safemode mark node is created"; } -void MasterZkAdapter::OnSafeModeMarkDeleted() { - LOG(ERROR) << "safemode mark node is deleted"; -} +void MasterZkAdapter::OnSafeModeMarkDeleted() { LOG(ERROR) << "safemode mark node is deleted"; } void MasterZkAdapter::OnTabletNodeListDeleted() { - LOG(ERROR) << "ts dir node is deleted"; - if (!MarkSafeMode()) { - master_impl_->SetMasterStatus(MasterImpl::kIsSecondary); - master_impl_->DisableQueryTabletNodeTimer(); - DeleteMasterNode(); - UnlockMasterLock(); - Reset(); - } + LOG(ERROR) << "ts dir node is deleted"; + if (!MarkSafeMode()) { + // zookeeper node is not allowed to be deleted unless it has no child node, + // thus once + // ts dir is deleted, master must be in kOnWait status + master_impl_->DisableQueryTabletNodeTimer(); + DeleteMasterNode(); + UnlockMasterLock(); + Reset(); + } } void MasterZkAdapter::OnRootTabletNodeDeleted() { - LOG(ERROR) << "root tablet node is deleted"; - std::string root_tablet_addr; - if (master_impl_->GetMetaTabletAddr(&root_tablet_addr)) { - if (!UpdateRootTabletNode(root_tablet_addr)) { - master_impl_->SetMasterStatus(MasterImpl::kIsSecondary); - master_impl_->DisableQueryTabletNodeTimer(); - DeleteMasterNode(); - UnlockMasterLock(); - Reset(); - } - } else { - LOG(ERROR) << "root tablet not loaded, will not update zk"; - } + LOG(ERROR) << "root tablet node is deleted"; + std::string root_tablet_addr; + if (master_impl_->GetMetaTabletAddr(&root_tablet_addr)) { + if (!UpdateRootTabletNode(root_tablet_addr)) { + // let master behavoir as it lost MasterLock + master_impl_->DoStateTransition(MasterEvent::kLostMasterLock); + master_impl_->DisableQueryTabletNodeTimer(); + DeleteMasterNode(); + UnlockMasterLock(); + Reset(); + } + } else { + LOG(ERROR) << "root tablet not loaded, will not update zk"; + } } void MasterZkAdapter::OnMasterNodeDeleted() { - LOG(ERROR) << "master node deleted"; - // TODO: not support from kRuning to secondary - master_impl_->SetMasterStatus(MasterImpl::kIsSecondary); - master_impl_->DisableQueryTabletNodeTimer(); - UnlockMasterLock(); - Reset(); + LOG(ERROR) << "master node deleted"; + master_impl_->DoStateTransition(MasterEvent::kLostMasterLock); + master_impl_->DisableQueryTabletNodeTimer(); + UnlockMasterLock(); + Reset(); } void MasterZkAdapter::OnZkLockDeleted() { - LOG(ERROR) << "master lock deleted, kill-self"; - master_impl_->DisableQueryTabletNodeTimer(); - Reset(); - _Exit(EXIT_FAILURE); + LOG(ERROR) << "master lock deleted, kill-self"; + master_impl_->DisableQueryTabletNodeTimer(); + Reset(); + _Exit(EXIT_FAILURE); } -void MasterZkAdapter::OnTabletServerKickMarkCreated() { -} +void MasterZkAdapter::OnTabletServerKickMarkCreated() {} -void MasterZkAdapter::OnTabletServerKickMarkDeleted() { -} +void MasterZkAdapter::OnTabletServerKickMarkDeleted() {} -void MasterZkAdapter::OnTabletServerStart(const std::string& ts_host) { -} +void MasterZkAdapter::OnTabletServerStart(const std::string& ts_host) {} -void MasterZkAdapter::OnTabletServerExist(const std::string& ts_host) { -} +void MasterZkAdapter::OnTabletServerExist(const std::string& ts_host) {} void MasterZkAdapter::OnChildrenChanged(const std::string& path, const std::vector& name_list, const std::vector& data_list) { - VLOG(5) << "OnChilerenChanged: path=[" << path << "]"; - if (path.compare(kTsListPath) != 0) { - return; - } - std::map ts_node_list; - - mutex_.Lock(); - size_t list_count = name_list.size(); - for (size_t i = 0; i < list_count; i++) { - const std::string& name = name_list[i]; - const std::string& data = data_list[i]; - int seq_num = zk::ZooKeeperUtil::GetSequenceNo(name); - if (seq_num < 0) { - LOG(ERROR) << "ignore non-sequential node"; - continue; - } - if (data == "") { - LOG(ERROR) << "cannot get value of child : " << name; - continue; - } - // keep larger(newer) sequence id - std::map::iterator it = ts_node_list.find(data); - if (it != ts_node_list.end()) { - int prev_seq_num = zk::ZooKeeperUtil::GetSequenceNo(it->second); - if (prev_seq_num > seq_num) { - VLOG(5) << "ignore old node: " << data << " " << name; - continue; - } - } - // TODO: check value - ts_node_list[data] = name; - } - mutex_.Unlock(); - master_impl_->RefreshTabletNodeList(ts_node_list); -} - -void MasterZkAdapter::OnNodeValueChanged(const std::string& path, - const std::string& value) { - VLOG(5) << "OnNodeValueChanged: path=[" << path << "], value=[" - << value << "]"; - MutexLock lock(&mutex_); + VLOG(5) << "OnChilerenChanged: path=[" << path << "]"; + if (path.compare(kTsListPath) != 0) { + return; + } + std::map ts_node_list; + + mutex_.Lock(); + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = name_list[i]; + const std::string& data = data_list[i]; + int seq_num = zk::ZooKeeperUtil::GetSequenceNo(name); + if (seq_num < 0) { + LOG(ERROR) << "ignore non-sequential node"; + continue; + } + if (data == "") { + LOG(ERROR) << "cannot get value of child : " << name; + continue; + } + // keep larger(newer) sequence id + std::map::iterator it = ts_node_list.find(data); + if (it != ts_node_list.end()) { + int prev_seq_num = zk::ZooKeeperUtil::GetSequenceNo(it->second); + if (prev_seq_num > seq_num) { + VLOG(5) << "ignore old node: " << data << " " << name; + continue; + } + } + // TODO: check value + ts_node_list[data] = name; + } + mutex_.Unlock(); + master_impl_->RefreshTabletNodeList(ts_node_list); +} + +void MasterZkAdapter::OnNodeValueChanged(const std::string& path, const std::string& value) { + VLOG(5) << "OnNodeValueChanged: path=[" << path << "], value=[" << value << "]"; + MutexLock lock(&mutex_); } void MasterZkAdapter::OnNodeCreated(const std::string& path) { - VLOG(5) << "OnNodeCreated: path=[" << path << "]"; - MutexLock lock(&mutex_); + VLOG(5) << "OnNodeCreated: path=[" << path << "]"; + MutexLock lock(&mutex_); } void MasterZkAdapter::OnNodeDeleted(const std::string& path) { - VLOG(5) << "OnNodeDeleted: path=[" << path << "]"; - - MutexLock lock(&mutex_); - if (path.compare(kSafeModeNodePath) == 0) { - OnSafeModeMarkDeleted(); - } else if (path.compare(kTsListPath) == 0) { - OnTabletNodeListDeleted(); - } else if (path.compare(kRootTabletNodePath) == 0) { - OnRootTabletNodeDeleted(); - } else if (path.compare(kMasterNodePath) == 0) { - OnMasterNodeDeleted(); - } else { - } -} + VLOG(5) << "OnNodeDeleted: path=[" << path << "]"; -void MasterZkAdapter::OnWatchFailed(const std::string& path, int watch_type, - int err) { - LOG(ERROR) << "OnWatchFailed: path=[" << path << "], watch_type=" - << watch_type << ", err=" << err; - // MutexLock lock(&mutex_); - _Exit(EXIT_FAILURE); + // MutexLock lock(&mutex_); + if (path.compare(kSafeModeNodePath) == 0) { + OnSafeModeMarkDeleted(); + } else if (path.compare(kTsListPath) == 0) { + OnTabletNodeListDeleted(); + } else if (path.compare(kRootTabletNodePath) == 0) { + OnRootTabletNodeDeleted(); + } else if (path.compare(kMasterNodePath) == 0) { + OnMasterNodeDeleted(); + } else { + } } +void MasterZkAdapter::OnWatchFailed(const std::string& path, int watch_type, int err) { + LOG(ERROR) << "OnWatchFailed: path=[" << path << "], watch_type=" << watch_type + << ", err=" << err; + // MutexLock lock(&mutex_); + _Exit(EXIT_FAILURE); +} void MasterZkAdapter::OnSessionTimeout() { - LOG(ERROR) << "zk session timeout!"; - _Exit(EXIT_FAILURE); + LOG(ERROR) << "zk session timeout!"; + _Exit(EXIT_FAILURE); } -FakeMasterZkAdapter::FakeMasterZkAdapter(MasterImpl * master_impl, - const std::string& server_addr) +FakeMasterZkAdapter::FakeMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) : master_impl_(master_impl), server_addr_(server_addr) { - fake_path_ = FLAGS_tera_fake_zk_path_prefix + "/"; + fake_path_ = FLAGS_tera_fake_zk_path_prefix + "/"; } -FakeMasterZkAdapter::~FakeMasterZkAdapter() { -} +FakeMasterZkAdapter::~FakeMasterZkAdapter() {} bool FakeMasterZkAdapter::Init(std::string* root_tablet_addr, std::map* tabletnode_list, bool* safe_mode) { - std::string master_lock = fake_path_ + kMasterLockPath; - std::string master_path = fake_path_ + kMasterNodePath; - std::string ts_list_path = fake_path_ + kTsListPath; - std::string kick_path = fake_path_ + kKickPath; - std::string root_path = fake_path_ + kRootTabletNodePath; - - // setup master-lock - if (!IsEmpty(master_lock)) { - LOG(ERROR) << "fake zk error: " << master_lock; - _Exit(EXIT_FAILURE); - } - if (!zk::FakeZkUtil::WriteNode(master_lock + "/0", server_addr_)) { - LOG(ERROR) << "fake zk error: " << master_lock + "/0, " - << server_addr_; - _Exit(EXIT_FAILURE); - } - if (!zk::FakeZkUtil::WriteNode(master_path, server_addr_)) { - LOG(ERROR) << "fake zk error: " << master_path + ", " - << server_addr_; - _Exit(EXIT_FAILURE); - } + std::string master_lock = fake_path_ + kMasterLockPath; + std::string master_path = fake_path_ + kMasterNodePath; + std::string ts_list_path = fake_path_ + kTsListPath; + std::string kick_path = fake_path_ + kKickPath; + std::string root_path = fake_path_ + kRootTabletNodePath; + + // setup master-lock + if (!IsEmpty(master_lock)) { + LOG(ERROR) << "fake zk error: " << master_lock; + _Exit(EXIT_FAILURE); + } + if (!zk::FakeZkUtil::WriteNode(master_lock + "/0", server_addr_)) { + LOG(ERROR) << "fake zk error: " << master_lock + "/0, " << server_addr_; + _Exit(EXIT_FAILURE); + } + if (!zk::FakeZkUtil::WriteNode(master_path, server_addr_)) { + LOG(ERROR) << "fake zk error: " << master_path + ", " << server_addr_; + _Exit(EXIT_FAILURE); + } - // get all ts - std::vector allts; - if (!zk::FakeZkUtil::ListNodes(ts_list_path, &allts) && allts.size() == 0) { - LOG(ERROR) << "fake zk error: " << ts_list_path; - _Exit(EXIT_FAILURE); - } - for (size_t i = 0; i < allts.size(); ++i) { - std::string value; - std::string node_path = ts_list_path + "/" + allts[i]; - if (!zk::FakeZkUtil::ReadNode(node_path, &value)) { - LOG(ERROR) << "fake zk error: " << allts[i]; - _Exit(EXIT_FAILURE); - } - (*tabletnode_list)[value] = allts[i]; + // get all ts + std::vector allts; + if (!zk::FakeZkUtil::ListNodes(ts_list_path, &allts) && allts.size() == 0) { + LOG(ERROR) << "fake zk error: " << ts_list_path; + _Exit(EXIT_FAILURE); + } + for (size_t i = 0; i < allts.size(); ++i) { + std::string value; + std::string node_path = ts_list_path + "/" + allts[i]; + if (!zk::FakeZkUtil::ReadNode(node_path, &value)) { + LOG(ERROR) << "fake zk error: " << allts[i]; + _Exit(EXIT_FAILURE); } + (*tabletnode_list)[value] = allts[i]; + } - return true; + return true; } bool FakeMasterZkAdapter::KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id) { - return true; + return true; } -bool FakeMasterZkAdapter::MarkSafeMode() { - return true; -} +bool FakeMasterZkAdapter::MarkSafeMode() { return true; } -bool FakeMasterZkAdapter::UnmarkSafeMode() { - return true; -} +bool FakeMasterZkAdapter::UnmarkSafeMode() { return true; } bool FakeMasterZkAdapter::UpdateRootTabletNode(const std::string& root_tablet_addr) { - std::string root_table = fake_path_ + kRootTabletNodePath; - if (!zk::FakeZkUtil::WriteNode(root_table, root_tablet_addr)) { - LOG(ERROR) << "fake zk error: " << root_table - << ", " << root_tablet_addr; - _Exit(EXIT_FAILURE); - } - LOG(INFO) << "update fake root_table_addr: " << root_tablet_addr; - return true; + std::string root_table = fake_path_ + kRootTabletNodePath; + if (!zk::FakeZkUtil::WriteNode(root_table, root_tablet_addr)) { + LOG(ERROR) << "fake zk error: " << root_table << ", " << root_tablet_addr; + _Exit(EXIT_FAILURE); + } + LOG(INFO) << "update fake root_table_addr: " << root_tablet_addr; + return true; } void FakeMasterZkAdapter::OnChildrenChanged(const std::string& path, const std::vector& name_list, - const std::vector& data_list) { - -} + const std::vector& data_list) {} -void FakeMasterZkAdapter::OnNodeValueChanged(const std::string& path, - const std::string& value) { +void FakeMasterZkAdapter::OnNodeValueChanged(const std::string& path, const std::string& value) {} -} +void FakeMasterZkAdapter::OnNodeCreated(const std::string& path) {} -void FakeMasterZkAdapter::OnNodeCreated(const std::string& path) { -} +void FakeMasterZkAdapter::OnNodeDeleted(const std::string& path) {} -void FakeMasterZkAdapter::OnNodeDeleted(const std::string& path) { +void FakeMasterZkAdapter::OnWatchFailed(const std::string& path, int watch_type, int err) {} -} +void FakeMasterZkAdapter::OnSessionTimeout() {} -void FakeMasterZkAdapter::OnWatchFailed(const std::string& path, - int watch_type, - int err) { -} - -void FakeMasterZkAdapter::OnSessionTimeout() { -} - - - -InsMasterZkAdapter::InsMasterZkAdapter(MasterImpl * master_impl, - const std::string& server_addr) - : master_impl_(master_impl), server_addr_(server_addr), ins_sdk_(NULL) { - -} +InsMasterZkAdapter::InsMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) + : master_impl_(master_impl), server_addr_(server_addr), ins_sdk_(NULL) {} InsMasterZkAdapter::~InsMasterZkAdapter() { - if (ins_sdk_) { - std::string root_path = FLAGS_tera_ins_root_path; - std::string master_lock = root_path + kMasterLockPath; - galaxy::ins::sdk::SDKError err; - ins_sdk_->UnLock(master_lock, &err); - } + if (ins_sdk_) { + std::string root_path = FLAGS_tera_ins_root_path; + std::string master_lock = root_path + kMasterLockPath; + galaxy::ins::sdk::SDKError err; + ins_sdk_->UnLock(master_lock, &err); + } } static void InsOnTsChange(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { - LOG(INFO) << "ts on ins changed event" ; - InsMasterZkAdapter* ins_adp = static_cast(param.context); - ins_adp->RefreshTabletNodeList(); + LOG(INFO) << "ts on ins changed event"; + InsMasterZkAdapter* ins_adp = static_cast(param.context); + ins_adp->RefreshTabletNodeList(); } static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { - InsMasterZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnLockChange(param.value, param.deleted); + InsMasterZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); } -static void InsOnSessionTimeout(void * context) { - InsMasterZkAdapter* ins_adp = static_cast(context); - ins_adp->OnSessionTimeout(); +static void InsOnSessionTimeout(void* context) { + InsMasterZkAdapter* ins_adp = static_cast(context); + ins_adp->OnSessionTimeout(); } bool InsMasterZkAdapter::Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode) { - MutexLock lock(&mutex_); - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - ins_sdk_->SetTimeoutTime(FLAGS_tera_master_ins_session_timeout); - std::string root_path = FLAGS_tera_ins_root_path; - std::string master_lock = root_path + kMasterLockPath; - std::string master_path = root_path + kMasterNodePath; - std::string ts_list_path = root_path + kTsListPath; - galaxy::ins::sdk::SDKError err; - CHECK(ins_sdk_->Lock(master_lock, &err)) << "lock master_lock fail"; - CHECK(ins_sdk_->Put(master_path, server_addr_, &err)) << "writer master fail"; - CHECK(ins_sdk_->Watch(master_lock, InsOnLockChange, this, &err)) - << "watch master-lock fail"; - CHECK(ins_sdk_->Watch(ts_list_path, &InsOnTsChange, this, &err)) - << "watch ts list failed"; - galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(ts_list_path+"/!", - ts_list_path+"/~"); - while (!result->Done()) { - CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); - std::string session_id = result->Value(); - std::string key = result->Key(); - size_t preifx_len = (ts_list_path + "/").size(); - std::string ts_addr = key.substr(preifx_len); - (*tabletnode_list)[ts_addr] = session_id; - result->Next(); - } - delete result; - ins_sdk_->RegisterSessionTimeout(InsOnSessionTimeout, this); - return true; + std::map* tabletnode_list, + bool* safe_mode) { + MutexLock lock(&mutex_); + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_master_ins_session_timeout); + std::string root_path = FLAGS_tera_ins_root_path; + std::string master_lock = root_path + kMasterLockPath; + std::string master_path = root_path + kMasterNodePath; + std::string ts_list_path = root_path + kTsListPath; + galaxy::ins::sdk::SDKError err; + CHECK(ins_sdk_->Lock(master_lock, &err)) << "lock master_lock fail"; + CHECK(ins_sdk_->Put(master_path, server_addr_, &err)) << "writer master fail"; + CHECK(ins_sdk_->Watch(master_lock, InsOnLockChange, this, &err)) << "watch master-lock fail"; + CHECK(ins_sdk_->Watch(ts_list_path, &InsOnTsChange, this, &err)) << "watch ts list failed"; + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(ts_list_path + "/!", ts_list_path + "/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string session_id = result->Value(); + std::string key = result->Key(); + size_t preifx_len = (ts_list_path + "/").size(); + std::string ts_addr = key.substr(preifx_len); + (*tabletnode_list)[ts_addr] = session_id; + result->Next(); + } + delete result; + ins_sdk_->RegisterSessionTimeout(InsOnSessionTimeout, this); + return true; } void InsMasterZkAdapter::RefreshTabletNodeList() { - std::string root_path = FLAGS_tera_ins_root_path; - std::string ts_list_path = root_path + kTsListPath; - galaxy::ins::sdk::SDKError err; - CHECK(ins_sdk_->Watch(ts_list_path, &InsOnTsChange, - this, &err)) << "watch ts failed"; - galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(ts_list_path+"/!", - ts_list_path+"/~"); - - std::map tabletnode_list; - while (!result->Done()) { - CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); - std::string session_id = result->Value(); - std::string key = result->Key(); - size_t preifx_len = (ts_list_path + "/").size(); - std::string ts_addr = key.substr(preifx_len); - tabletnode_list[ts_addr] = session_id; - result->Next(); - } - delete result; - master_impl_->RefreshTabletNodeList(tabletnode_list); + std::string root_path = FLAGS_tera_ins_root_path; + std::string ts_list_path = root_path + kTsListPath; + galaxy::ins::sdk::SDKError err; + CHECK(ins_sdk_->Watch(ts_list_path, &InsOnTsChange, this, &err)) << "watch ts failed"; + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(ts_list_path + "/!", ts_list_path + "/~"); + + std::map tabletnode_list; + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string session_id = result->Value(); + std::string key = result->Key(); + size_t preifx_len = (ts_list_path + "/").size(); + std::string ts_addr = key.substr(preifx_len); + tabletnode_list[ts_addr] = session_id; + result->Next(); + } + delete result; + master_impl_->RefreshTabletNodeList(tabletnode_list); } void InsMasterZkAdapter::OnLockChange(std::string session_id, bool deleted) { - if (deleted || session_id != ins_sdk_->GetSessionID()) { - LOG(ERROR) << "master lock lost"; - exit(1); - } + if (deleted || session_id != ins_sdk_->GetSessionID()) { + LOG(ERROR) << "master lock lost"; + exit(1); + } } -bool InsMasterZkAdapter::KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id) { - std::string root_path = FLAGS_tera_ins_root_path; - std::string kick_path = root_path + kKickPath; - galaxy::ins::sdk::SDKError err; - bool ret = ins_sdk_->Put(kick_path + "/" + ts_zk_id, ts_host, &err); - return ret; +bool InsMasterZkAdapter::KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id) { + std::string root_path = FLAGS_tera_ins_root_path; + std::string kick_path = root_path + kKickPath; + galaxy::ins::sdk::SDKError err; + bool ret = ins_sdk_->Put(kick_path + "/" + ts_zk_id, ts_host, &err); + return ret; } bool InsMasterZkAdapter::UpdateRootTabletNode(const std::string& root_tablet_addr) { - std::string root_path = FLAGS_tera_ins_root_path; - std::string meta_path = root_path + kRootTabletNodePath; - galaxy::ins::sdk::SDKError err; - bool ret = ins_sdk_->Put(meta_path, root_tablet_addr, &err); - return ret; + std::string root_path = FLAGS_tera_ins_root_path; + std::string meta_path = root_path + kRootTabletNodePath; + galaxy::ins::sdk::SDKError err; + bool ret = ins_sdk_->Put(meta_path, root_tablet_addr, &err); + return ret; } void InsMasterZkAdapter::OnSessionTimeout() { - MutexLock lock(&mutex_); - LOG(ERROR) << "ins sessiont timeout"; - _Exit(EXIT_FAILURE); + MutexLock lock(&mutex_); + LOG(ERROR) << "ins sessiont timeout"; + _Exit(EXIT_FAILURE); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/master_zk_adapter.h b/src/master/master_zk_adapter.h index 777ce143f..6e90f70ff 100644 --- a/src/master/master_zk_adapter.h +++ b/src/master/master_zk_adapter.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_MASTER_MASTER_ZK_ADAPTER_H_ -#define TERA_MASTER_MASTER_ZK_ADAPTER_H_ +#ifndef TERA_MASTER_MASTER_ZK_ADAPTER_H_ +#define TERA_MASTER_MASTER_ZK_ADAPTER_H_ #include #include @@ -11,10 +11,10 @@ #include "master/master_impl.h" #include "zk/zk_adapter.h" -namespace galaxy{ -namespace ins{ +namespace galaxy { +namespace ins { namespace sdk { - class InsSDK; +class InsSDK; } } } @@ -23,81 +23,75 @@ namespace tera { namespace master { class MasterZkAdapterBase : public zk::ZooKeeperAdapter { -public: - virtual ~MasterZkAdapterBase() {}; - virtual bool Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode) = 0; - - virtual bool KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id) = 0; - virtual bool MarkSafeMode() = 0; - virtual bool UnmarkSafeMode() = 0; - virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr) = 0; + public: + virtual ~MasterZkAdapterBase(){}; + virtual bool Init(std::string* root_tablet_addr, + std::map* tabletnode_list, bool* safe_mode) = 0; + + virtual bool KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id) = 0; + virtual bool MarkSafeMode() = 0; + virtual bool UnmarkSafeMode() = 0; + virtual bool HasSafeModeNode() = 0; + virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr) = 0; }; class MasterZkAdapter : public MasterZkAdapterBase { -public: - MasterZkAdapter(MasterImpl* master_impl, - const std::string & server_addr); - virtual ~MasterZkAdapter(); - virtual bool Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode); - - virtual bool KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id); - virtual bool MarkSafeMode(); - virtual bool UnmarkSafeMode(); - virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); - -protected: - virtual bool Setup(); - virtual void Reset(); - - virtual bool LockMasterLock(); - virtual bool UnlockMasterLock(); - virtual bool WatchMasterLock(); - virtual bool CreateMasterNode(); - virtual bool DeleteMasterNode(); - - virtual bool WatchRootTabletNode(bool* is_exist, std::string* root_tablet_addr); - virtual bool WatchSafeModeMark(bool* is_safemode); - virtual bool WatchTabletNodeList(std::map* tabletnode_list); - - virtual void OnSafeModeMarkCreated(); - virtual void OnSafeModeMarkDeleted(); - virtual void OnTabletNodeListDeleted(); - virtual void OnRootTabletNodeDeleted(); - virtual void OnMasterNodeDeleted(); - virtual void OnZkLockDeleted(); - virtual void OnTabletServerKickMarkCreated(); - virtual void OnTabletServerKickMarkDeleted(); - virtual void OnTabletServerStart(const std::string& ts_host); - virtual void OnTabletServerExist(const std::string& ts_host); - - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list); - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value); - virtual void OnNodeCreated(const std::string& path); - virtual void OnNodeDeleted(const std::string& path); - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err); - virtual void OnSessionTimeout(); - -private: - mutable Mutex mutex_; - MasterImpl * master_impl_; - std::string server_addr_; + public: + MasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr); + virtual ~MasterZkAdapter(); + virtual bool Init(std::string* root_tablet_addr, + std::map* tabletnode_list, bool* safe_mode); + + virtual bool KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id); + virtual bool MarkSafeMode(); + virtual bool HasSafeModeNode(); + virtual bool UnmarkSafeMode(); + virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); + + protected: + virtual bool Setup(); + virtual void Reset(); + + virtual bool LockMasterLock(); + virtual bool UnlockMasterLock(); + virtual bool WatchMasterLock(); + virtual bool CreateMasterNode(); + virtual bool DeleteMasterNode(); + + virtual bool WatchRootTabletNode(bool* is_exist, std::string* root_tablet_addr); + virtual bool WatchSafeModeMark(bool* is_safemode); + virtual bool WatchTabletNodeList(std::map* tabletnode_list); + + virtual void OnSafeModeMarkCreated(); + virtual void OnSafeModeMarkDeleted(); + virtual void OnTabletNodeListDeleted(); + virtual void OnRootTabletNodeDeleted(); + virtual void OnMasterNodeDeleted(); + virtual void OnZkLockDeleted(); + virtual void OnTabletServerKickMarkCreated(); + virtual void OnTabletServerKickMarkDeleted(); + virtual void OnTabletServerStart(const std::string& ts_host); + virtual void OnTabletServerExist(const std::string& ts_host); + + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list); + virtual void OnNodeValueChanged(const std::string& path, const std::string& value); + virtual void OnNodeCreated(const std::string& path); + virtual void OnNodeDeleted(const std::string& path); + virtual void OnWatchFailed(const std::string& path, int watch_type, int err); + virtual void OnSessionTimeout(); + + private: + mutable Mutex mutex_; + MasterImpl* master_impl_; + std::string server_addr_; }; class MockMasterZkAdapter : public MasterZkAdapter { -public: - MockMasterZkAdapter(MasterImpl* master_impl, const std::string & server_addr) : - MasterZkAdapter(master_impl, server_addr) {} - virtual ~MockMasterZkAdapter() {} + public: + MockMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) + : MasterZkAdapter(master_impl, server_addr) {} + virtual ~MockMasterZkAdapter() {} }; /* @@ -106,83 +100,74 @@ class MockMasterZkAdapter : public MasterZkAdapter { * This is implemented through local file system. * Not support watching. */ -class FakeMasterZkAdapter: public MasterZkAdapterBase { -public: - FakeMasterZkAdapter(MasterImpl * master_impl, - const std::string & server_addr); - virtual ~FakeMasterZkAdapter(); - virtual bool Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode); - - virtual bool KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id); - virtual bool MarkSafeMode(); - virtual bool UnmarkSafeMode(); - virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); - -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list); - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value); - virtual void OnNodeCreated(const std::string& path); - virtual void OnNodeDeleted(const std::string& path); - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err); - virtual void OnSessionTimeout(); - -private: - mutable Mutex mutex_; - MasterImpl * master_impl_; - std::string server_addr_; - std::string fake_path_; +class FakeMasterZkAdapter : public MasterZkAdapterBase { + public: + FakeMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr); + virtual ~FakeMasterZkAdapter(); + virtual bool Init(std::string* root_tablet_addr, + std::map* tabletnode_list, bool* safe_mode); + + virtual bool KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id); + virtual bool MarkSafeMode(); + virtual bool HasSafeModeNode() { return false; } + virtual bool UnmarkSafeMode(); + virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list); + virtual void OnNodeValueChanged(const std::string& path, const std::string& value); + virtual void OnNodeCreated(const std::string& path); + virtual void OnNodeDeleted(const std::string& path); + virtual void OnWatchFailed(const std::string& path, int watch_type, int err); + virtual void OnSessionTimeout(); + + private: + mutable Mutex mutex_; + MasterImpl* master_impl_; + std::string server_addr_; + std::string fake_path_; }; - -class InsMasterZkAdapter: public MasterZkAdapterBase { -public: - InsMasterZkAdapter(MasterImpl * master_impl, - const std::string & server_addr); - virtual ~InsMasterZkAdapter(); - virtual bool Init(std::string* root_tablet_addr, - std::map* tabletnode_list, - bool* safe_mode); - - virtual bool KickTabletServer(const std::string& ts_host, - const std::string& ts_zk_id); - virtual bool MarkSafeMode() {return true;} - virtual bool UnmarkSafeMode() {return true;} - virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); - void RefreshTabletNodeList(); - void OnLockChange(std::string session_id, bool deleted); - void OnSessionTimeout(); -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} -private: - mutable Mutex mutex_; - MasterImpl * master_impl_; - std::string server_addr_; - galaxy::ins::sdk::InsSDK* ins_sdk_; +class InsMasterZkAdapter : public MasterZkAdapterBase { + public: + InsMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr); + virtual ~InsMasterZkAdapter(); + virtual bool Init(std::string* root_tablet_addr, + std::map* tabletnode_list, bool* safe_mode); + + virtual bool KickTabletServer(const std::string& ts_host, const std::string& ts_zk_id); + virtual bool MarkSafeMode() { return true; } + virtual bool HasSafeModeNode() { return false; } + virtual bool UnmarkSafeMode() { return true; } + virtual bool UpdateRootTabletNode(const std::string& root_tablet_addr); + void RefreshTabletNodeList(); + void OnLockChange(std::string session_id, bool deleted); + void OnSessionTimeout(); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + + private: + mutable Mutex mutex_; + MasterImpl* master_impl_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; }; class MockInsMasterZkAdapter : public InsMasterZkAdapter { -public: - MockInsMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) : - InsMasterZkAdapter(master_impl, server_addr) {} - virtual ~MockInsMasterZkAdapter() {} + public: + MockInsMasterZkAdapter(MasterImpl* master_impl, const std::string& server_addr) + : InsMasterZkAdapter(master_impl, server_addr) {} + virtual ~MockInsMasterZkAdapter() {} }; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_MASTER_ZK_ADAPTER_H_ +#endif // TERA_MASTER_MASTER_ZK_ADAPTER_H_ diff --git a/src/master/merge_tablet_procedure.cc b/src/master/merge_tablet_procedure.cc index 7756dd039..a76fcc4b0 100644 --- a/src/master/merge_tablet_procedure.cc +++ b/src/master/merge_tablet_procedure.cc @@ -18,232 +18,244 @@ DECLARE_string(tera_tabletnode_path_prefix); namespace tera { namespace master { -std::map MergeTabletProcedure::phase_handlers_ { - {MergeTabletPhase::kUnLoadTablets, std::bind(&MergeTabletProcedure::UnloadTabletsPhaseHandler, _1, _2)}, - {MergeTabletPhase::kPostUnLoadTablets, std::bind(&MergeTabletProcedure::PostUnloadTabletsPhaseHandler, _1, _2)}, - {MergeTabletPhase::kUpdateMeta, std::bind(&MergeTabletProcedure::UpdateMetaPhaseHandler, _1, _2)}, - {MergeTabletPhase::kLoadMergedTablet, std::bind(&MergeTabletProcedure::LoadMergedTabletPhaseHandler, _1, _2)}, - {MergeTabletPhase::kFaultRecover, std::bind(&MergeTabletProcedure::FaultRecoverPhaseHandler, _1, _2)}, - {MergeTabletPhase::kEofPhase, std::bind(&MergeTabletProcedure::EOFPhaseHandler, _1, _2)} -}; - -MergeTabletProcedure::MergeTabletProcedure(TabletPtr first, TabletPtr second, ThreadPool* thread_pool) : - id_(std::string("MergeTablet:") + first->GetPath() + ":" + TimeStamp()), - tablets_{first, second}, thread_pool_(thread_pool) -{ - PROC_LOG_IF(INFO, tablets_[1]) << "merge tablet begin, tablets, first: " - << tablets_[0] << ", second: " << tablets_[1]; - if (tablets_[0]->GetStatus() != TabletMeta::kTabletReady || - tablets_[1]->GetStatus() != TabletMeta::kTabletReady) { - PROC_LOG(WARNING) << "tablets not ready, giveup this merge"; - SetNextPhase(MergeTabletPhase::kEofPhase); - return; - } - // check KeyRange - if (tablets_[0]->GetKeyEnd() != tablets_[1]->GetKeyStart() && - tablets_[1]->GetKeyEnd() != tablets_[0]->GetKeyStart()) { - PROC_LOG(WARNING) << "invalid merge peers: first: " << tablets_[0] << ", second: " << tablets_[1]; - SetNextPhase(MergeTabletPhase::kEofPhase); - return; - } - SetNextPhase(MergeTabletPhase::kUnLoadTablets); -} +std::map + MergeTabletProcedure::phase_handlers_{ + {MergeTabletPhase::kUnLoadTablets, + std::bind(&MergeTabletProcedure::UnloadTabletsPhaseHandler, _1, _2)}, + {MergeTabletPhase::kPostUnLoadTablets, + std::bind(&MergeTabletProcedure::PostUnloadTabletsPhaseHandler, _1, _2)}, + {MergeTabletPhase::kUpdateMeta, + std::bind(&MergeTabletProcedure::UpdateMetaPhaseHandler, _1, _2)}, + {MergeTabletPhase::kLoadMergedTablet, + std::bind(&MergeTabletProcedure::LoadMergedTabletPhaseHandler, _1, _2)}, + {MergeTabletPhase::kFaultRecover, + std::bind(&MergeTabletProcedure::FaultRecoverPhaseHandler, _1, _2)}, + {MergeTabletPhase::kEofPhase, std::bind(&MergeTabletProcedure::EOFPhaseHandler, _1, _2)}}; -std::string MergeTabletProcedure::ProcId() const { - //std::string id("MergeTablet:"); - //id = id + tablets_[0]->GetPath(); - return id_; +MergeTabletProcedure::MergeTabletProcedure(TabletPtr first, TabletPtr second, + ThreadPool* thread_pool) + : Procedure(ProcedureLimiter::LockType::kMerge), + id_(std::string("MergeTablet:") + first->GetPath() + ":" + second->GetPath() + ":" + + TimeStamp()), + tablets_{first, second}, + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "merge tablet begin, tablets, first: " << tablets_[0] + << ", second: " << tablets_[1]; + if (tablets_[0]->GetStatus() != TabletMeta::kTabletReady || + tablets_[1]->GetStatus() != TabletMeta::kTabletReady) { + PROC_LOG(WARNING) << "tablets not ready, giveup this merge"; + SetNextPhase(MergeTabletPhase::kEofPhase); + return; + } + // check KeyRange + if (tablets_[0]->GetKeyEnd() != tablets_[1]->GetKeyStart() && + tablets_[1]->GetKeyEnd() != tablets_[0]->GetKeyStart()) { + PROC_LOG(WARNING) << "invalid merge peers: first: " << tablets_[0] + << ", second: " << tablets_[1]; + SetNextPhase(MergeTabletPhase::kEofPhase); + return; + } + SetNextPhase(MergeTabletPhase::kUnLoadTablets); } +std::string MergeTabletProcedure::ProcId() const { return id_; } + void MergeTabletProcedure::RunNextStage() { - MergeTabletPhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", tablet: " << tablets_[0]; - MergeTabletPhaseHandler handler = it->second; - handler(this, phase); + MergeTabletPhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase + << ", tablet: " << tablets_[0]; + MergeTabletPhaseHandler handler = it->second; + handler(this, phase); } void MergeTabletProcedure::UnloadTabletsPhaseHandler(const MergeTabletPhase&) { - if (!unload_procs_[0] && !unload_procs_[1]) { - unload_procs_[0].reset(new UnloadTabletProcedure(tablets_[0], thread_pool_, true)); - unload_procs_[1].reset(new UnloadTabletProcedure(tablets_[1], thread_pool_, true)); - PROC_LOG(INFO) << "Generate UnloadTablet SubProcedure1: " - << unload_procs_[0]->ProcId() << " SubProcedure2: " << unload_procs_[1]->ProcId(); - MasterEnv().GetExecutor()->AddProcedure(unload_procs_[0]); - MasterEnv().GetExecutor()->AddProcedure(unload_procs_[1]); - } - // both unload_procs_[0] and unload_procs_[1] should not be nullptr - PROC_CHECK(unload_procs_[0] && unload_procs_[1]); - // wait both tablets unload finish - if (!unload_procs_[0]->Done() || !unload_procs_[1]->Done()){ - return; - } - TabletMeta::TabletStatus status0 = tablets_[0]->GetStatus(); - TabletMeta::TabletStatus status1 = tablets_[1]->GetStatus(); - if (status0 != TabletMeta::kTabletOffline || status1 != TabletMeta::kTabletOffline) { - PROC_LOG(WARNING) << "unload tablets not ok, tablet: " << tablets_[0] - << ", status: " << StatusCodeToString(status0) << ", tablet: " - << tablets_[1] << ", status: " << StatusCodeToString(status1); - SetNextPhase(MergeTabletPhase::kEofPhase); - } - else { - SetNextPhase(MergeTabletPhase::kPostUnLoadTablets); - } + if (!unload_procs_[0] && !unload_procs_[1]) { + unload_procs_[0].reset(new UnloadTabletProcedure(tablets_[0], thread_pool_, true)); + unload_procs_[1].reset(new UnloadTabletProcedure(tablets_[1], thread_pool_, true)); + PROC_LOG(INFO) << "Generate UnloadTablet SubProcedure1: " << unload_procs_[0]->ProcId() + << " SubProcedure2: " << unload_procs_[1]->ProcId(); + MasterEnv().GetExecutor()->AddProcedure(unload_procs_[0]); + MasterEnv().GetExecutor()->AddProcedure(unload_procs_[1]); + } + // both unload_procs_[0] and unload_procs_[1] should not be nullptr + PROC_CHECK(unload_procs_[0] && unload_procs_[1]); + // wait both tablets unload finish + if (!unload_procs_[0]->Done() || !unload_procs_[1]->Done()) { + return; + } + TabletMeta::TabletStatus status0 = tablets_[0]->GetStatus(); + TabletMeta::TabletStatus status1 = tablets_[1]->GetStatus(); + if (status0 != TabletMeta::kTabletOffline || status1 != TabletMeta::kTabletOffline) { + PROC_LOG(WARNING) << "unload tablets not ok, tablet: " << tablets_[0] + << ", status: " << StatusCodeToString(status0) << ", tablet: " << tablets_[1] + << ", status: " << StatusCodeToString(status1); + SetNextPhase(MergeTabletPhase::kEofPhase); + } else { + SetNextPhase(MergeTabletPhase::kPostUnLoadTablets); + } } void MergeTabletProcedure::PostUnloadTabletsPhaseHandler(const MergeTabletPhase&) { - if (!TabletStateCheck()) { - SetNextPhase(MergeTabletPhase::kFaultRecover); - return; - } - SetNextPhase(MergeTabletPhase::kUpdateMeta); + if (!TabletStateCheck()) { + SetNextPhase(MergeTabletPhase::kFaultRecover); + return; + } + SetNextPhase(MergeTabletPhase::kUpdateMeta); } void MergeTabletProcedure::UpdateMetaPhaseHandler(const MergeTabletPhase&) { - // update meta asynchronously - if (!merged_) { - UpdateMeta(); - } + // update meta asynchronously + if (!merged_) { + UpdateMeta(); + } } void MergeTabletProcedure::LoadMergedTabletPhaseHandler(const MergeTabletPhase&) { - if (!load_proc_) { - load_proc_.reset(new LoadTabletProcedure(merged_, dest_node_, thread_pool_)); - PROC_LOG(INFO) << "Generate LoadTablet SubProcedure: " << load_proc_->ProcId() - << "merged: " << merged_ << ", destnode: " << dest_node_->GetAddr(); - MasterEnv().GetExecutor()->AddProcedure(load_proc_); - } - SetNextPhase(MergeTabletPhase::kEofPhase); + if (!load_proc_) { + load_proc_.reset(new LoadTabletProcedure(merged_, dest_node_, thread_pool_, true)); + PROC_LOG(INFO) << "Generate LoadTablet SubProcedure: " << load_proc_->ProcId() + << "merged: " << merged_ << ", destnode: " << dest_node_->GetAddr(); + MasterEnv().GetExecutor()->AddProcedure(load_proc_); + } + SetNextPhase(MergeTabletPhase::kEofPhase); } void MergeTabletProcedure::FaultRecoverPhaseHandler(const MergeTabletPhase&) { - PROC_CHECK(phases_.size() >= 2 && GetCurrentPhase() == MergeTabletPhase::kFaultRecover); - if (!recover_procs_[0]) { - recover_procs_[0].reset(new LoadTabletProcedure(tablets_[0], - tablets_[0]->GetTabletNode(), thread_pool_)); - recover_procs_[1].reset(new LoadTabletProcedure(tablets_[1], - tablets_[1]->GetTabletNode(), thread_pool_)); - MasterEnv().GetExecutor()->AddProcedure(recover_procs_[0]); - MasterEnv().GetExecutor()->AddProcedure(recover_procs_[1]); - PROC_LOG(INFO) << "[merge] rollback " << tablets_[0] << ", SubProcedure: " << recover_procs_[0]->ProcId(); - PROC_LOG(INFO) << "[merge] rollback " << tablets_[1] << ", SubProcedure: " << recover_procs_[1]->ProcId(); - return; - } - SetNextPhase(MergeTabletPhase::kEofPhase); + PROC_CHECK(phases_.size() >= 2 && GetCurrentPhase() == MergeTabletPhase::kFaultRecover); + if (!recover_procs_[0]) { + recover_procs_[0].reset( + new LoadTabletProcedure(tablets_[0], tablets_[0]->GetTabletNode(), thread_pool_, true)); + recover_procs_[1].reset( + new LoadTabletProcedure(tablets_[1], tablets_[1]->GetTabletNode(), thread_pool_, true)); + MasterEnv().GetExecutor()->AddProcedure(recover_procs_[0]); + MasterEnv().GetExecutor()->AddProcedure(recover_procs_[1]); + PROC_LOG(INFO) << "[merge] rollback " << tablets_[0] + << ", SubProcedure: " << recover_procs_[0]->ProcId(); + PROC_LOG(INFO) << "[merge] rollback " << tablets_[1] + << ", SubProcedure: " << recover_procs_[1]->ProcId(); + return; + } + SetNextPhase(MergeTabletPhase::kEofPhase); } void MergeTabletProcedure::EOFPhaseHandler(const MergeTabletPhase&) { - if (!recover_procs_[0]) { - tablets_[0]->UnlockTransition(); - } - if (!recover_procs_[1]) { - tablets_[1]->UnlockTransition(); - } - PROC_LOG(INFO) << "MergeTabletProcedure done"; - done_ = true; + if (!recover_procs_[0]) { + tablets_[0]->UnlockTransition(); + } + if (!recover_procs_[1]) { + tablets_[1]->UnlockTransition(); + } + PROC_LOG_IF(INFO, !merged_) << "merge finished abort, first: " << tablets_[0] + << ", second: " << tablets_[1]; + PROC_LOG_IF(INFO, merged_) << "merge finished done, merged: " << merged_ + << "first: " << tablets_[0] << ", second: " << tablets_[1]; + done_ = true; } bool MergeTabletProcedure::TabletStateCheck() { - leveldb::Env* env = io::LeveldbBaseEnv(); - for (size_t i = 0; i < sizeof(tablets_) / sizeof (TabletPtr); ++i) { - std::vector children; - std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + "/" + tablets_[i]->GetPath(); - // NOTICE: - env->GetChildren(tablet_path, &children); - leveldb::Status status = env->GetChildren(tablet_path, &children); - if (!status.ok()) { - PROC_LOG(WARNING) << "[merge] abort, " << tablets_[i] - << ", tablet status check error: " << status.ToString(); - return false; - } - for (size_t j = 0; j < children.size(); ++j) { - leveldb::FileType type = leveldb::kUnknown; - uint64_t number = 0; - if (ParseFileName(children[j], &number, &type) && - type == leveldb::kLogFile) { - PROC_LOG(WARNING) << "[merge] abort, " << tablets_[i] << ", tablet log not clear."; - return false; - } - } + leveldb::Env* env = io::LeveldbBaseEnv(); + for (size_t i = 0; i < sizeof(tablets_) / sizeof(TabletPtr); ++i) { + std::vector children; + std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + "/" + tablets_[i]->GetPath(); + // NOTICE: + env->GetChildren(tablet_path, &children); + leveldb::Status status = env->GetChildren(tablet_path, &children); + if (!status.ok()) { + PROC_LOG(WARNING) << "[merge] abort, " << tablets_[i] + << ", tablet status check error: " << status.ToString(); + return false; } - return true; + for (size_t j = 0; j < children.size(); ++j) { + leveldb::FileType type = leveldb::kUnknown; + uint64_t number = 0; + if (ParseFileName(children[j], &number, &type) && type == leveldb::kLogFile) { + PROC_LOG(WARNING) << "[merge] abort, " << tablets_[i] << ", tablet log not clear."; + return false; + } + } + } + return true; } void MergeTabletProcedure::UpdateMeta() { - std::vector records; - PackMetaWriteRecords(tablets_[0], true, records); - PackMetaWriteRecords(tablets_[1], true, records); - - TabletMeta new_meta; - // - if (tablets_[0]->GetKeyStart() == tablets_[1]->GetKeyEnd() && tablets_[0]->GetKeyStart() != "") { - tablets_[1]->ToMeta(&new_meta); - new_meta.mutable_key_range()->set_key_end(tablets_[0]->GetKeyEnd()); - new_meta.clear_parent_tablets(); - new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[1]->GetPath())); - new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[0]->GetPath())); - } - // - else if (tablets_[0]->GetKeyEnd() == tablets_[1]->GetKeyStart()){ - tablets_[0]->ToMeta(&new_meta); - new_meta.mutable_key_range()->set_key_end(tablets_[1]->GetKeyEnd()); - new_meta.clear_parent_tablets(); - new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[0]->GetPath())); - new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[1]->GetPath())); - } - else { - PROC_LOG(FATAL) << "tablet range error, cannot be merged" << tablets_[0] << ", " << tablets_[1]; - } + std::vector records; + PackMetaWriteRecords(tablets_[0], true, records); + PackMetaWriteRecords(tablets_[1], true, records); - new_meta.set_status(TabletMeta::kTabletOffline); - std::string new_path = leveldb::GetChildTabletPath(tablets_[0]->GetPath(), - tablets_[0]->GetTable()->GetNextTabletNo()); - new_meta.set_path(new_path); - new_meta.set_size(tablets_[0]->GetDataSize() + tablets_[1]->GetDataSize()); - merged_.reset(new Tablet(new_meta, tablets_[0]->GetTable())); - - dest_node_ = (tablets_[0]->GetDataSize() > tablets_[1]->GetDataSize() ? - tablets_[0]->GetTabletNode() : tablets_[1]->GetTabletNode()); - PackMetaWriteRecords(merged_, false, records); - UpdateMetaClosure done = std::bind(&MergeTabletProcedure::MergeUpdateMetaDone, this, _1); - PROC_LOG(INFO) << "[merge] update meta, tablet: [" << tablets_[0]->GetPath() << ", " << tablets_[1]->GetPath() << "]"; - // update meta table asynchronously until meta write ok. - MasterEnv().BatchWriteMetaTableAsync(records, done, -1); + TabletMeta new_meta; + // + if (tablets_[0]->GetKeyStart() == tablets_[1]->GetKeyEnd() && tablets_[0]->GetKeyStart() != "") { + tablets_[1]->ToMeta(&new_meta); + new_meta.mutable_key_range()->set_key_end(tablets_[0]->GetKeyEnd()); + new_meta.clear_parent_tablets(); + new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[1]->GetPath())); + new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[0]->GetPath())); + } + // + else if (tablets_[0]->GetKeyEnd() == tablets_[1]->GetKeyStart()) { + tablets_[0]->ToMeta(&new_meta); + new_meta.mutable_key_range()->set_key_end(tablets_[1]->GetKeyEnd()); + new_meta.clear_parent_tablets(); + new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[0]->GetPath())); + new_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(tablets_[1]->GetPath())); + } else { + PROC_LOG(FATAL) << "tablet range error, cannot be merged" << tablets_[0] << ", " << tablets_[1]; + } + + new_meta.set_status(TabletMeta::kTabletOffline); + std::string new_path = leveldb::GetChildTabletPath(tablets_[0]->GetPath(), + tablets_[0]->GetTable()->GetNextTabletNo()); + new_meta.set_path(new_path); + new_meta.set_size(tablets_[0]->GetDataSize() + tablets_[1]->GetDataSize()); + uint64_t version = tablets_[0]->Version() > tablets_[1]->Version() ? tablets_[0]->Version() + : tablets_[1]->Version(); + new_meta.set_version(version + 1); + merged_.reset(new Tablet(new_meta, tablets_[0]->GetTable())); + + dest_node_ = + (tablets_[0]->GetDataSize() > tablets_[1]->GetDataSize() ? tablets_[0]->GetTabletNode() + : tablets_[1]->GetTabletNode()); + PackMetaWriteRecords(merged_, false, records); + UpdateMetaClosure done = std::bind(&MergeTabletProcedure::MergeUpdateMetaDone, this, _1); + PROC_LOG(INFO) << "[merge] update meta, tablet: [" << tablets_[0]->GetPath() << ", " + << tablets_[1]->GetPath() << "]"; + // update meta table asynchronously until meta write ok. + MasterEnv().BatchWriteMetaTableAsync(records, done, -1); } -// will be called when update meta finish successfully, and set next process phase be LOAD_MERGED_TABLET +// will be called when update meta finish successfully, and set next process +// phase be LOAD_MERGED_TABLET void MergeTabletProcedure::MergeUpdateMetaDone(bool) { - TabletMeta new_meta; - merged_->ToMeta(&new_meta); - TablePtr table = merged_->GetTable(); - merged_->LockTransition(); - if (tablets_[0]->GetKeyStart() == merged_->GetKeyStart()) { - // - table->MergeTablets(tablets_[0], tablets_[1], new_meta, &merged_); - } - else { - // - table->MergeTablets(tablets_[1], tablets_[0], new_meta, &merged_); - } - SetNextPhase(MergeTabletPhase::kLoadMergedTablet); + tablets_[0]->DoStateTransition(TabletEvent::kFinishMergeTablet); + tablets_[1]->DoStateTransition(TabletEvent::kFinishMergeTablet); + TabletMeta new_meta; + merged_->ToMeta(&new_meta); + TablePtr table = merged_->GetTable(); + merged_->LockTransition(); + if (tablets_[0]->GetKeyStart() == merged_->GetKeyStart()) { + // + table->MergeTablets(tablets_[0], tablets_[1], new_meta, &merged_); + } else { + // + table->MergeTablets(tablets_[1], tablets_[0], new_meta, &merged_); + } + SetNextPhase(MergeTabletPhase::kLoadMergedTablet); } -std::ostream& operator<< (std::ostream& o, const MergeTabletPhase& phase) { - static const char* msg[] = {"MergeTabletPhase::kUnLoadTablets", - "MergeTabletPhase::kPostUnLoadTablets", - "MergeTabletPhase::kUpdateMeta", - "MergeTabletPhase::kLoadMergedTablet", - "MergeTabletPhase::kFaultRecover", - "MergeTabletPhase::kEofPhase", - "MergeTabletPhase::UNKNOWN"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(MergeTabletPhase::kUnLoadTablets); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const MergeTabletPhase& phase) { + static const char* msg[] = { + "MergeTabletPhase::kUnLoadTablets", "MergeTabletPhase::kPostUnLoadTablets", + "MergeTabletPhase::kUpdateMeta", "MergeTabletPhase::kLoadMergedTablet", + "MergeTabletPhase::kFaultRecover", "MergeTabletPhase::kEofPhase", + "MergeTabletPhase::UNKNOWN"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(MergeTabletPhase::kUnLoadTablets); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } - diff --git a/src/master/merge_tablet_procedure.h b/src/master/merge_tablet_procedure.h index 64f17c512..c5035b3dc 100644 --- a/src/master/merge_tablet_procedure.h +++ b/src/master/merge_tablet_procedure.h @@ -13,76 +13,74 @@ namespace tera { namespace master { - -enum class MergeTabletPhase{ - kUnLoadTablets, - kPostUnLoadTablets, - kUpdateMeta, - kLoadMergedTablet, - kFaultRecover, - kEofPhase, + +enum class MergeTabletPhase { + kUnLoadTablets, + kPostUnLoadTablets, + kUpdateMeta, + kLoadMergedTablet, + kFaultRecover, + kEofPhase, }; -std::ostream& operator<< (std::ostream& o, const MergeTabletPhase& phase); +std::ostream& operator<<(std::ostream& o, const MergeTabletPhase& phase); class MergeTabletProcedure : public Procedure { -public: - - MergeTabletProcedure(TabletPtr left, TabletPtr right, ThreadPool* thread_pool); + public: + MergeTabletProcedure(TabletPtr left, TabletPtr right, ThreadPool* thread_pool); - virtual ~MergeTabletProcedure() {} + virtual ~MergeTabletProcedure() {} - virtual std::string ProcId() const; + virtual std::string ProcId() const; - virtual void RunNextStage(); + virtual void RunNextStage(); - virtual bool Done() {return done_;} + virtual bool Done() { return done_; } -private: - typedef std::function MergeTabletPhaseHandler; + private: + typedef std::function + MergeTabletPhaseHandler; - MergeTabletPhase GetCurrentPhase() { - std::lock_guard lock(mutex_); - return phases_.back(); - } + MergeTabletPhase GetCurrentPhase() { + std::lock_guard lock(mutex_); + return phases_.back(); + } - void SetNextPhase(MergeTabletPhase phase) { - std::lock_guard lock(mutex_); - phases_.emplace_back(phase); - } + void SetNextPhase(MergeTabletPhase phase) { + std::lock_guard lock(mutex_); + phases_.emplace_back(phase); + } - bool TabletStateCheck(); - - void UpdateMetaTable(); + bool TabletStateCheck(); - void UpdateMeta(); + void UpdateMetaTable(); - void MergeUpdateMetaDone(bool); - - void UnloadTabletsPhaseHandler(const MergeTabletPhase&); - void PostUnloadTabletsPhaseHandler(const MergeTabletPhase&); - void UpdateMetaPhaseHandler(const MergeTabletPhase&); - void LoadMergedTabletPhaseHandler(const MergeTabletPhase&); - void FaultRecoverPhaseHandler(const MergeTabletPhase&); - void EOFPhaseHandler(const MergeTabletPhase&); + void UpdateMeta(); -private: - const std::string id_; - std::mutex mutex_; - bool done_ = false; - TabletPtr tablets_[2]; - TabletPtr merged_; - TabletNodePtr dest_node_; + void MergeUpdateMetaDone(bool); - std::shared_ptr unload_procs_[2]; - std::shared_ptr load_proc_; + void UnloadTabletsPhaseHandler(const MergeTabletPhase&); + void PostUnloadTabletsPhaseHandler(const MergeTabletPhase&); + void UpdateMetaPhaseHandler(const MergeTabletPhase&); + void LoadMergedTabletPhaseHandler(const MergeTabletPhase&); + void FaultRecoverPhaseHandler(const MergeTabletPhase&); + void EOFPhaseHandler(const MergeTabletPhase&); - std::vector phases_; - std::shared_ptr recover_procs_[2]; - static std::map phase_handlers_; - ThreadPool* thread_pool_; -}; + private: + const std::string id_; + std::mutex mutex_; + bool done_ = false; + TabletPtr tablets_[2]; + TabletPtr merged_; + TabletNodePtr dest_node_; + + std::shared_ptr unload_procs_[2]; + std::shared_ptr load_proc_; + std::vector phases_; + std::shared_ptr recover_procs_[2]; + static std::map phase_handlers_; + ThreadPool* thread_pool_; +}; } } - diff --git a/src/master/move_tablet_procedure.cc b/src/master/move_tablet_procedure.cc index 557cdba3b..37858813c 100644 --- a/src/master/move_tablet_procedure.cc +++ b/src/master/move_tablet_procedure.cc @@ -13,103 +13,112 @@ namespace tera { namespace master { -std::map MoveTabletProcedure::phase_handlers_ { - {MoveTabletPhase::kUnLoadTablet, std::bind(&MoveTabletProcedure::UnloadTabletPhaseHandler, _1, _2)}, - {MoveTabletPhase::kLoadTablet, std::bind(&MoveTabletProcedure::LoadTabletPhaseHandler, _1, _2)}, - {MoveTabletPhase::kEofPhase, std::bind(&MoveTabletProcedure::EOFPhaseHandler, _1, _2)} -}; +std::map MoveTabletProcedure::phase_handlers_{ + {MoveTabletPhase::kUnLoadTablet, + std::bind(&MoveTabletProcedure::UnloadTabletPhaseHandler, _1, _2)}, + {MoveTabletPhase::kLoadTablet, std::bind(&MoveTabletProcedure::LoadTabletPhaseHandler, _1, _2)}, + {MoveTabletPhase::kEofPhase, std::bind(&MoveTabletProcedure::EOFPhaseHandler, _1, _2)}}; -MoveTabletProcedure::MoveTabletProcedure(TabletPtr tablet, TabletNodePtr node, ThreadPool* thread_pool) : - id_(std::string("MoveTablet:") + tablet->GetPath() + ":" + TimeStamp()), - tablet_(tablet), - dest_node_(node), - done_(false), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "move tablet begin, tablet: " << tablet_->GetPath(); - if (dest_node_) { - // PlanToMoveIn field should be removed in the future as it is the LoadBalanceModule's duty - // to avoid move many tablets to the same TS. - dest_node_->PlanToMoveIn(); - } - if (tablet_->GetStatus() == TabletMeta::kTabletReady) { - SetNextPhase(MoveTabletPhase::kUnLoadTablet); - } - else if (tablet_->GetStatus() == TabletMeta::kTabletOffline || - tablet_->GetStatus() == TabletMeta::kTabletLoadFail) { - SetNextPhase(MoveTabletPhase::kLoadTablet); - } - else { - SetNextPhase(MoveTabletPhase::kEofPhase); - } +MoveTabletProcedure::MoveTabletProcedure(TabletPtr tablet, TabletNodePtr node, + ThreadPool* thread_pool) + : Procedure(ProcedureLimiter::LockType::kMove), + id_(std::string("MoveTablet:") + tablet->GetPath() + ":" + TimeStamp()), + tablet_(tablet), + dest_node_(node), + done_(false), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "move tablet begin, tablet: " << tablet_->GetPath(); + if (dest_node_) { + // PlanToMoveIn field should be removed in the future as it is the + // LoadBalanceModule's duty + // to avoid move many tablets to the same TS. + dest_node_->PlanToMoveIn(); + } + if (tablet_->GetStatus() == TabletMeta::kTabletReady || + tablet_->GetStatus() == TabletMeta::kTabletUnloadFail) { + SetNextPhase(MoveTabletPhase::kUnLoadTablet); + } else if (tablet_->GetStatus() == TabletMeta::kTabletOffline || + tablet_->GetStatus() == TabletMeta::kTabletLoadFail) { + SetNextPhase(MoveTabletPhase::kLoadTablet); + } else { + LOG(INFO) << "tablet status not support move, tablet:" << tablet_; + SetNextPhase(MoveTabletPhase::kEofPhase); + } } -std::string MoveTabletProcedure::ProcId() const { - return id_; -} +std::string MoveTabletProcedure::ProcId() const { return id_; } void MoveTabletProcedure::RunNextStage() { - MoveTabletPhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", tablet: " << tablet_; - MoveTabletPhaseHandler handler = it->second; - handler(this, phase); + MoveTabletPhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", tablet: " << tablet_; + MoveTabletPhaseHandler handler = it->second; + handler(this, phase); } void MoveTabletProcedure::UnloadTabletPhaseHandler(const MoveTabletPhase&) { - if (!unload_proc_) { - PROC_LOG(INFO) << "MoveTablet: Unload: " << tablet_; - unload_proc_.reset(new UnloadTabletProcedure(tablet_, thread_pool_, true)); - MasterEnv().GetExecutor()->AddProcedure(unload_proc_); - } - // currently tablet unloading operation has not finished yet, return and check - // status in next schedule cycle - if (!unload_proc_->Done()) { - return; - } - if (tablet_->GetStatus() != TabletMeta::kTabletOffline) { - // currently if unload fail, we directory abort the MoveTabletProcedure. U should also - // notice that if master_kick_tabletnode is enabled, we will never fall into unload - // fail position because we can always unload the tablet succ by kick off the TS - // TODO: if dfs directory lock is enabled, we can enter LOAD_TABLET phase directly - // as directory lock ensures we can avoid multi-load problem - SetNextPhase(MoveTabletPhase::kEofPhase); - return; - } - SetNextPhase(MoveTabletPhase::kLoadTablet); + if (!unload_proc_) { + PROC_LOG(INFO) << "MoveTablet: Unload: " << tablet_; + unload_proc_.reset(new UnloadTabletProcedure(tablet_, thread_pool_, true)); + MasterEnv().GetExecutor()->AddProcedure(unload_proc_); + } + // currently tablet unloading operation has not finished yet, return and check + // status in next schedule cycle + if (!unload_proc_->Done()) { + return; + } + if (tablet_->GetStatus() != TabletMeta::kTabletOffline) { + // currently if unload fail, we directly abort the MoveTabletProcedure. U + // should also + // notice that if master_kick_tabletnode is enabled, we will never fall into + // unload + // fail position because we can always unload the tablet succ by kick off + // the TS + // TODO: if dfs directory lock is enabled, we can enter LOAD_TABLET phase + // directly + // as directory lock ensures we can avoid multi-load problem + SetNextPhase(MoveTabletPhase::kEofPhase); + return; + } + SetNextPhase(MoveTabletPhase::kLoadTablet); } void MoveTabletProcedure::LoadTabletPhaseHandler(const MoveTabletPhase&) { - if (!load_proc_) { - load_proc_.reset(new LoadTabletProcedure(tablet_, dest_node_, thread_pool_)); - PROC_LOG(INFO) << "MoveTablet: generate async LoadTabletProcedure: " - << load_proc_->ProcId() << "tablet " << tablet_; - MasterEnv().GetExecutor()->AddProcedure(load_proc_); - } - SetNextPhase(MoveTabletPhase::kEofPhase); + if (!load_proc_) { + tablet_->IncVersion(); + load_proc_.reset(new LoadTabletProcedure(tablet_, dest_node_, thread_pool_, true)); + PROC_LOG(INFO) << "MoveTablet: generate async LoadTabletProcedure: " << load_proc_->ProcId() + << "tablet " << tablet_; + MasterEnv().GetExecutor()->AddProcedure(load_proc_); + } + SetNextPhase(MoveTabletPhase::kEofPhase); } void MoveTabletProcedure::EOFPhaseHandler(const MoveTabletPhase&) { - PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() << "move EOF_PHASE"; - if (dest_node_) { - dest_node_->DoneMoveIn(); - } - // record last move time, avoiding move a tablet too frequently - tablet_->SetLastMoveTime(get_micros()); - done_ = true; + PROC_LOG(INFO) << "tablet: " << tablet_->GetPath() << "move EOF_PHASE"; + if (dest_node_) { + dest_node_->DoneMoveIn(); + } + // record last move time, avoiding move a tablet too frequently + tablet_->SetLastMoveTime(get_micros()); + if (!load_proc_) { + LOG(WARNING) << "move tablet finished without load procedure, tablet: " << tablet_; + tablet_->UnlockTransition(); + } + done_ = true; } -std::ostream& operator<< (std::ostream& o, const MoveTabletPhase& phase) { - static const char* msg[] = {"MoveTabletPhase::kUnLoadTablet", - "MoveTabletPhase::kLoadTablet", - "MoveTabletPhase::kEofPhase", - "MoveTabletPhase::Unknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(MoveTabletPhase::kUnLoadTablet); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const MoveTabletPhase& phase) { + static const char* msg[] = {"MoveTabletPhase::kUnLoadTablet", "MoveTabletPhase::kLoadTablet", + "MoveTabletPhase::kEofPhase", "MoveTabletPhase::Unknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(MoveTabletPhase::kUnLoadTablet); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } diff --git a/src/master/move_tablet_procedure.h b/src/master/move_tablet_procedure.h index 02bfaeeef..00906aead 100644 --- a/src/master/move_tablet_procedure.h +++ b/src/master/move_tablet_procedure.h @@ -16,56 +16,50 @@ namespace tera { namespace master { enum class MoveTabletPhase { - kUnLoadTablet, - kLoadTablet, - kEofPhase, + kUnLoadTablet, + kLoadTablet, + kEofPhase, }; +std::ostream& operator<<(std::ostream& o, const MoveTabletPhase& phase); -std::ostream& operator<< (std::ostream& o, const MoveTabletPhase& phase); - -// Notice that MoveTabletProcedure is splitted into a UnloadTabletProcedure and a LoadTabletProcedure, +// Notice that MoveTabletProcedure is splitted into a UnloadTabletProcedure and +// a LoadTabletProcedure, // so MoveTabletProcedure do not deal with tablet state transition directly class MoveTabletProcedure : public Procedure { -public: - MoveTabletProcedure(TabletPtr tablet, TabletNodePtr node, ThreadPool* thread_pool); - - virtual ~MoveTabletProcedure() {} - - virtual std::string ProcId() const; + public: + MoveTabletProcedure(TabletPtr tablet, TabletNodePtr node, ThreadPool* thread_pool); - virtual void RunNextStage(); + virtual ~MoveTabletProcedure() {} - virtual bool Done() {return done_;} + virtual std::string ProcId() const; -private: - typedef std::function MoveTabletPhaseHandler; - - MoveTabletPhase GetCurrentPhase() { - return phases_.back(); - } - - void SetNextPhase(const MoveTabletPhase& phase) { - phases_.emplace_back(phase); - } + virtual void RunNextStage(); - void UnloadTabletPhaseHandler(const MoveTabletPhase& phase); - void LoadTabletPhaseHandler(const MoveTabletPhase& phase); - void EOFPhaseHandler(const MoveTabletPhase& phase); + virtual bool Done() { return done_; } -private: - const std::string id_; - std::mutex mutex_; - TabletPtr tablet_; - TabletNodePtr dest_node_; - bool done_; - std::shared_ptr unload_proc_; - std::shared_ptr load_proc_; - std::vector phases_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; -}; + private: + typedef std::function MoveTabletPhaseHandler; + + MoveTabletPhase GetCurrentPhase() { return phases_.back(); } + + void SetNextPhase(const MoveTabletPhase& phase) { phases_.emplace_back(phase); } + void UnloadTabletPhaseHandler(const MoveTabletPhase& phase); + void LoadTabletPhaseHandler(const MoveTabletPhase& phase); + void EOFPhaseHandler(const MoveTabletPhase& phase); + + private: + const std::string id_; + std::mutex mutex_; + TabletPtr tablet_; + TabletNodePtr dest_node_; + bool done_; + std::shared_ptr unload_proc_; + std::shared_ptr load_proc_; + std::vector phases_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; +}; } } - diff --git a/src/master/multi_tenancy_service_impl.cc b/src/master/multi_tenancy_service_impl.cc new file mode 100644 index 000000000..458aec943 --- /dev/null +++ b/src/master/multi_tenancy_service_impl.cc @@ -0,0 +1,220 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master/multi_tenancy_service_impl.h" +#include +#include +#include "access/access_entry.h" +#include "quota/quota_entry.h" +#include "master/master_env.h" +#include "master/update_auth_procedure.h" +#include "master/set_quota_procedure.h" +#include "access/helpers/access_utils.h" +#include "quota/helpers/quota_utils.h" + +namespace tera { +namespace master { + +MultiTenacyServiceImpl::MultiTenacyServiceImpl( + const std::shared_ptr& access_entry, + const std::shared_ptr& quota_entry) + : access_entry_(access_entry), + quota_entry_(quota_entry), + thread_pool_(MasterEnv().GetThreadPool()) {} + +void MultiTenacyServiceImpl::UpdateUgi(const UpdateUgiRequest* request, UpdateUgiResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + + // Parse ugi info to MetaWriteRecord + std::unique_ptr meta_write_record( + auth::AccessUtils::NewMetaRecord(access_entry_, request->update_info())); + if (!meta_write_record) { + response->set_status(kMismatchAuthType); + done->Run(); + return; + } + std::shared_ptr proc(new UpdateAuthProcedure( + request, response, done, thread_pool_.get(), access_entry_, meta_write_record, + auth::AccessUpdateType::UpdateUgi)); + MasterEnv().GetExecutor()->AddProcedure(proc); +} + +void MultiTenacyServiceImpl::ShowUgi(const ShowUgiRequest* request, ShowUgiResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + + access_entry_->GetAccessUpdater().ShowUgiInfo(response); + response->set_status(kMasterOk); + done->Run(); +} + +void MultiTenacyServiceImpl::UpdateAuth(const UpdateAuthRequest* request, + UpdateAuthResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + + // Update Auth + std::unique_ptr meta_write_record( + auth::AccessUtils::NewMetaRecord(access_entry_, request->update_info())); + if (!meta_write_record) { + response->set_status(kMismatchAuthType); + done->Run(); + return; + } + std::shared_ptr proc(new UpdateAuthProcedure( + request, response, done, thread_pool_.get(), access_entry_, meta_write_record, + auth::AccessUpdateType::UpdateAuth)); + MasterEnv().GetExecutor()->AddProcedure(proc); +} + +void MultiTenacyServiceImpl::ShowAuth(const ShowAuthRequest* request, ShowAuthResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + access_entry_->GetAccessUpdater().ShowAuthInfo(response); + response->set_status(kMasterOk); + done->Run(); +} + +void MultiTenacyServiceImpl::SetAuthPolicy(const SetAuthPolicyRequest* request, + SetAuthPolicyResponse* response, + google::protobuf::Closure* done) { + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + response->set_status(kMasterOk); + done->Run(); +} + +void MultiTenacyServiceImpl::ShowAuthPolicy(const ShowAuthPolicyRequest* request, + ShowAuthPolicyResponse* response, + google::protobuf::Closure* done) { + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + + response->set_status(kMasterOk); + done->Run(); +} + +void MultiTenacyServiceImpl::SetQuota(const SetQuotaRequest* request, SetQuotaResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + + // build meta_write_record for quota + // Should get quota in master memory and then merge the TableQuota + std::unique_ptr target_table_quota(new TableQuota); + if (!quota_entry_->GetTableQuota(request->table_quota().table_name(), target_table_quota.get())) { + // brand new table_quota, should set all default value + target_table_quota->set_table_name(request->table_quota().table_name()); + target_table_quota->set_type(TableQuota::kSetQuota); + // peroid = 1, limit = -1 + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaWriteReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaWriteBytes); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaReadReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaReadBytes); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaScanReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaScanBytes); + } + quota::MasterQuotaHelper::MergeTableQuota(request->table_quota(), target_table_quota.get()); + + std::unique_ptr meta_write_record( + quota::MasterQuotaHelper::NewMetaRecordFromQuota(*target_table_quota)); + if (!meta_write_record) { + response->set_status(kQuotaInvalidArg); + done->Run(); + return; + } + std::shared_ptr proc(new SetQuotaProcedure(request, response, done, thread_pool_.get(), + quota_entry_, meta_write_record)); + MasterEnv().GetExecutor()->AddProcedure(proc); +} + +void MultiTenacyServiceImpl::ShowQuota(const ShowQuotaRequest* request, ShowQuotaResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + MasterStatus master_status = MasterEnv().GetMaster()->GetMasterStatus(); + if (master_status != kIsRunning) { + LOG(ERROR) << "master is not ready, status_ = " + << StatusCodeToString(static_cast(master_status)); + response->set_status(static_cast(master_status)); + done->Run(); + return; + } + + // TODO: doesn't need access verify at first + quota_entry_->ShowQuotaInfo(response, request->brief_show()); + response->set_status(kMasterOk); + done->Run(); +} +} +} diff --git a/src/master/multi_tenancy_service_impl.h b/src/master/multi_tenancy_service_impl.h new file mode 100644 index 000000000..a30267ae1 --- /dev/null +++ b/src/master/multi_tenancy_service_impl.h @@ -0,0 +1,53 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once +#include +#include "common/thread_pool.h" +#include "proto/master_rpc.pb.h" + +namespace tera { + +namespace auth { +class AccessEntry; +} + +namespace quota { +class MasterQuotaEntry; +} + +namespace master { + +class MultiTenacyServiceImpl { + public: + explicit MultiTenacyServiceImpl(const std::shared_ptr& access_entry, + const std::shared_ptr& quota_entry); + virtual ~MultiTenacyServiceImpl() {} + void UpdateUgi(const UpdateUgiRequest* request, UpdateUgiResponse* response, + google::protobuf::Closure* done); + void ShowUgi(const ShowUgiRequest* request, ShowUgiResponse* response, + google::protobuf::Closure* done); + + void UpdateAuth(const UpdateAuthRequest* request, UpdateAuthResponse* response, + google::protobuf::Closure* done); + void ShowAuth(const ShowAuthRequest* request, ShowAuthResponse* response, + google::protobuf::Closure* done); + + void SetAuthPolicy(const SetAuthPolicyRequest* request, SetAuthPolicyResponse* response, + google::protobuf::Closure* done); + void ShowAuthPolicy(const ShowAuthPolicyRequest* request, ShowAuthPolicyResponse* response, + google::protobuf::Closure* done); + + void SetQuota(const SetQuotaRequest* request, SetQuotaResponse* response, + google::protobuf::Closure* done); + void ShowQuota(const ShowQuotaRequest* request, ShowQuotaResponse* response, + google::protobuf::Closure* done); + + private: + std::shared_ptr access_entry_; + std::shared_ptr quota_entry_; + std::shared_ptr thread_pool_; +}; +} +} diff --git a/src/master/procedure.h b/src/master/procedure.h index df5daf823..27b975591 100644 --- a/src/master/procedure.h +++ b/src/master/procedure.h @@ -1,46 +1,62 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #pragma once + #include #include + +#include "master/procedure_limiter.h" + namespace tera { namespace master { class Procedure { -public: - // ProcId, Should be unique for each Procedure - virtual std::string ProcId() const = 0; - // the whole lifecycle of a Procedure may be consist of several different stages, - // and RunNextState() may be called several times before Procedure is Done, so some status info should - // be saved in Your Procedure subclass at the end of each stage, and you should do detrmine - // what to do according the lastest status saved each time RunNextState() is called - virtual void RunNextStage() = 0; - // whether the Procedure is finished - virtual bool Done() = 0; - virtual ~Procedure() {} - -protected: - static std::string TimeStamp() { - int64_t ts = get_micros(); - char buf[128] = {0}; - snprintf(buf, 128, "%ld", ts); - return buf; - } + public: + // ProcId, Should be unique for each Procedure + virtual std::string ProcId() const = 0; + // the whole lifecycle of a Procedure may be consist of several different + // stages, + // and RunNextState() may be called several times before Procedure is Done, so + // some status info should + // be saved in Your Procedure subclass at the end of each stage, and you + // should do detrmine + // what to do according the lastest status saved each time RunNextState() is + // called + virtual void RunNextStage() = 0; + // whether the Procedure is finished + virtual bool Done() = 0; + + virtual ProcedureLimiter::LockType GetLockType() { return type_; } + + Procedure() : type_(ProcedureLimiter::LockType::kNoLimit) {} + Procedure(const ProcedureLimiter::LockType& type) : type_(type) {} + + virtual ~Procedure() {} + + protected: + static std::string TimeStamp() { + int64_t ts = get_micros(); + char buf[128] = {0}; + snprintf(buf, 128, "%ld", ts); + return buf; + } + + private: + ProcedureLimiter::LockType type_; }; // below macros should be only used inside subclasses of Procedure #ifndef TEST #define PROC_ID (!this ? std::string("") : ProcId()) -#else +#else #define PROC_ID std::string("test") #endif #define PROC_LOG(level) LOG(level) << "[" << PROC_ID << "] " #define PROC_VLOG(level) VLOG(level) << "[" << PROC_ID << "] " #define PROC_LOG_IF(level, condition) LOG_IF(level, condition) << "[" << PROC_ID << "] " -#define PROC_VLOG_IF(level, condition) VLOG_IF(level, condition) << "[" << PROC_ID<< "] " +#define PROC_VLOG_IF(level, condition) VLOG_IF(level, condition) << "[" << PROC_ID << "] " #define PROC_CHECK(condition) CHECK(condition) << "[" << PROC_ID << "] " } } - diff --git a/src/master/procedure_executor.cc b/src/master/procedure_executor.cc index 0fd4d187a..08ff0a2df 100644 --- a/src/master/procedure_executor.cc +++ b/src/master/procedure_executor.cc @@ -13,101 +13,98 @@ DEFINE_int32(procedure_executor_thread_num, 10, "procedure executor thread pool namespace tera { namespace master { - void ProcedureWrapper::RunNextStage(std::shared_ptr proc_executor) { - proc_->RunNextStage(); - scheduling_.store(false); - if (Done()) { - VLOG(23) << "procedure executor remove procedure: " << ProcId(); - proc_executor->RemoveProcedure(ProcId()); - } + proc_->RunNextStage(); + scheduling_.store(false); + if (Done()) { + ProcedureLimiter::Instance().ReleaseLock(proc_->GetLockType()); + VLOG(23) << "procedure executor remove procedure: " << ProcId(); + proc_executor->RemoveProcedure(ProcId()); + } } -ProcedureExecutor::ProcedureExecutor() : - running_(false), - proc_index_(0), - thread_pool_(new ThreadPool(FLAGS_procedure_executor_thread_num)){ - -} +ProcedureExecutor::ProcedureExecutor() + : running_(false), + proc_index_(0), + thread_pool_(new ThreadPool(FLAGS_procedure_executor_thread_num)) {} bool ProcedureExecutor::Start() { - std::unique_lock lock(mutex_); - if (running_) { - return false; - } - running_ = true; - schedule_thread_ = std::move(std::thread(&ProcedureExecutor::ScheduleProcedures, this)); - return true; + std::unique_lock lock(mutex_); + if (running_) { + return false; + } + running_ = true; + schedule_thread_ = std::move(std::thread(&ProcedureExecutor::ScheduleProcedures, this)); + return true; } void ProcedureExecutor::Stop() { - mutex_.lock(); - if (!running_) { - mutex_.unlock(); - return; - } - running_ = false; - cv_.notify_all(); - // it may takes a long time to join threads, so unlock mutex_ manually to minimize race condition + mutex_.lock(); + if (!running_) { mutex_.unlock(); + return; + } + running_ = false; + cv_.notify_all(); + // it may takes a long time to join threads, so unlock mutex_ manually to + // minimize race condition + mutex_.unlock(); - schedule_thread_.join(); - - thread_pool_->Stop(true); + schedule_thread_.join(); + thread_pool_->Stop(true); } uint64_t ProcedureExecutor::AddProcedure(std::shared_ptr proc) { - std::lock_guard lock(mutex_); - if (!running_) { - return 0; - } - std::string proc_id = proc->ProcId(); - if (procedure_indexs_.find(proc_id) != procedure_indexs_.end()) { - return 0; - } - procedure_indexs_.emplace(proc_id, ++proc_index_); - procedures_.emplace(proc_index_, - std::shared_ptr(new ProcedureWrapper(proc))); - cv_.notify_all(); - return proc_index_; + std::lock_guard lock(mutex_); + if (!running_) { + return 0; + } + std::string proc_id = proc->ProcId(); + if (procedure_indexs_.find(proc_id) != procedure_indexs_.end()) { + VLOG(23) << "Error in AddProcedure : " << proc_id << " has existed!"; + return 0; + } + procedure_indexs_.emplace(proc_id, ++proc_index_); + procedures_.emplace(proc_index_, std::shared_ptr(new ProcedureWrapper(proc))); + cv_.notify_all(); + return proc_index_; } bool ProcedureExecutor::RemoveProcedure(const std::string& proc_id) { - std::unique_lock lock(mutex_); - auto it = procedure_indexs_.find(proc_id); - if (it == procedure_indexs_.end()) { - return false; - } - procedures_.erase(it->second); - procedure_indexs_.erase(it); - return true; + std::unique_lock lock(mutex_); + auto it = procedure_indexs_.find(proc_id); + if (it == procedure_indexs_.end()) { + return false; + } + procedures_.erase(it->second); + procedure_indexs_.erase(it); + return true; } void ProcedureExecutor::ScheduleProcedures() { - while (running_){ - std::map> procedures; - { - std::unique_lock lock(mutex_); - while (procedures_.empty() && running_) { - cv_.wait(lock); - } - procedures = procedures_; - } - - for (auto it = procedures.begin(); it != procedures.end(); ++it) { - auto proc = it->second; - const std::string proc_id = proc->ProcId(); - if (proc->TrySchedule()) { - ThreadPool::Task task = std::bind(&ProcedureWrapper::RunNextStage, proc, shared_from_this()); - thread_pool_->AddTask(task); - } - } - // sleep 10ms before start next schedule round - usleep(10 * 1000); + while (running_) { + std::map> procedures; + { + std::unique_lock lock(mutex_); + while (procedures_.empty() && running_) { + cv_.wait(lock); + } + procedures = procedures_; } -} - + for (auto it = procedures.begin(); it != procedures.end(); ++it) { + auto proc = it->second; + const std::string proc_id = proc->ProcId(); + if (proc->TrySchedule()) { + ThreadPool::Task task = + std::bind(&ProcedureWrapper::RunNextStage, proc, shared_from_this()); + thread_pool_->AddTask(task); + } + } + // sleep 10ms before start next schedule round + usleep(10 * 1000); + } +} } } diff --git a/src/master/procedure_executor.h b/src/master/procedure_executor.h index b3ff2fbfb..bd78badd9 100644 --- a/src/master/procedure_executor.h +++ b/src/master/procedure_executor.h @@ -24,69 +24,77 @@ namespace master { class ProcedureExecutor; class ProcedureWrapper { -public: - explicit ProcedureWrapper(std::shared_ptr proc) : scheduling_(false), proc_(proc) {} - void RunNextStage(std::shared_ptr proc_executor); - - bool Done() { - return proc_->Done(); - } + public: + explicit ProcedureWrapper(std::shared_ptr proc) + : scheduling_(false), proc_(proc), got_lock_(false) {} + void RunNextStage(std::shared_ptr proc_executor); + + bool Done() { return proc_->Done(); } - std::string ProcId() { - return proc_->ProcId(); + std::string ProcId() { return proc_->ProcId(); } + + bool TrySchedule() { + if (!TryGetLock()) { + return false; + } + if (proc_->Done() || scheduling_) { + return false; } + scheduling_.store(true); + return true; + } - bool TrySchedule() { - if (proc_->Done() || scheduling_) { - return false; - } - scheduling_.store(true); - return true; + bool TryGetLock() { + if (got_lock_) { + return true; } + if (!ProcedureLimiter::Instance().GetLock(proc_->GetLockType())) { + return false; + } + got_lock_ = true; + return true; + } - std::atomic scheduling_; - std::shared_ptr proc_; + std::atomic scheduling_; + std::shared_ptr proc_; + bool got_lock_; }; class ProcedureExecutor : public std::enable_shared_from_this { -public: - ProcedureExecutor(); + public: + ProcedureExecutor(); - ~ProcedureExecutor() { - Stop(); - } - - bool Start(); - void Stop(); - - uint64_t AddProcedure(std::shared_ptr proc); - - void ScheduleProcedures(); - -private: - bool RemoveProcedure(const std::string& proc_id); - friend class ProcedureWrapper; - -private: - std::mutex mutex_; - std::condition_variable cv_; - bool running_; - - uint64_t proc_index_; - std::map procedure_indexs_; - // use integer as map key thus we can schedule Procedures - // according to the order as they are added to the map - std::map> procedures_; - - // ThreadPool used to run - std::shared_ptr thread_pool_; - // polling all Procedures and add Procedure can be scheduled to thread_pool, running the - // Procedure background in thread_pool_ - // A procedure may be scheduled several times until the Procedure is Done - std::thread schedule_thread_; + ~ProcedureExecutor() { Stop(); } -}; + bool Start(); + void Stop(); + uint64_t AddProcedure(std::shared_ptr proc); -} + void ScheduleProcedures(); + + private: + bool RemoveProcedure(const std::string& proc_id); + friend class ProcedureWrapper; + + private: + std::mutex mutex_; + std::condition_variable cv_; + std::atomic running_; + + uint64_t proc_index_; + std::map procedure_indexs_; + // use integer as map key thus we can schedule Procedures + // according to the order as they are added to the map + std::map> procedures_; + + // ThreadPool used to run + std::shared_ptr thread_pool_; + // polling all Procedures and add Procedure can be scheduled to thread_pool, + // running the + // Procedure background in thread_pool_ + // A procedure may be scheduled several times until the Procedure is Done + std::thread schedule_thread_; +}; +} } diff --git a/src/master/procedure_limiter.h b/src/master/procedure_limiter.h new file mode 100644 index 000000000..d1614fa18 --- /dev/null +++ b/src/master/procedure_limiter.h @@ -0,0 +1,152 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include +#include + +DECLARE_int32(master_merge_procedure_limit); +DECLARE_int32(master_split_procedure_limit); +DECLARE_int32(master_move_procedure_limit); +DECLARE_int32(master_load_procedure_limit); +DECLARE_int32(master_unload_procedure_limit); + +namespace tera { +namespace master { + +class ProcedureLimiter final { + public: + enum class LockType { kNoLimit = 0, kMerge, kSplit, kMove, kLoad, kUnload }; + + friend std::ostream& operator<<(std::ostream& os, const ProcedureLimiter::LockType& type) { + static const std::vector msg = {"kNoLimit", "kMerge", "kSplit", + "kMove", "kLoad", "kUnload"}; + size_t index = static_cast::type>(type); + assert(index < msg.size()); + os << msg[index]; + return os; + } + + public: + static ProcedureLimiter& Instance() { + static ProcedureLimiter instance; + return instance; + } + + bool GetLock(const LockType& type) { + if (type == LockType::kNoLimit) { + VLOG(20) << "[ProcedureLimiter] get lock for type:" << type << " success"; + return true; + } + assert(limit_.find(type) != limit_.end()); + assert(in_use_.find(type) != in_use_.end()); + + std::lock_guard guard(mutex_); + if (in_use_[type] >= limit_[type]) { + VLOG(20) << "[ProcedureLimiter] get lock for type:" << type + << " fail, reason: lock exhaust, lock limit:" << limit_[type] + << ", in use:" << in_use_[type]; + return false; + } + ++in_use_[type]; + VLOG(20) << "[ProcedureLimiter] get lock for type:" << type + << " success, lock limit:" << limit_[type] << ", in use:" << in_use_[type]; + return true; + } + + void ReleaseLock(const LockType& type) { + if (type == LockType::kNoLimit) { + VLOG(20) << "[ProcedureLimiter] release lock for type:" << type << " success"; + return; + } + assert(limit_.find(type) != limit_.end()); + assert(in_use_.find(type) != in_use_.end()); + + std::lock_guard guard(mutex_); + assert(in_use_[type] > 0); + --in_use_[type]; + VLOG(20) << "[ProcedureLimiter] release lock for type:" << type + << " success, lock limit:" << limit_[type] << ", in use:" << in_use_[type]; + } + + void SetLockLimit(const LockType& type, uint32_t num) { + std::lock_guard guard(mutex_); + limit_[type] = num; + VLOG(20) << "[ProcedureLimiter] set lock type:" << type << " with lock limit:" << num; + } + + uint32_t GetLockLimit(const LockType& type) const { + assert(limit_.find(type) != limit_.end()); + std::lock_guard guard(mutex_); + return limit_[type]; + } + + uint32_t GetLockInUse(const LockType& type) const { + assert(in_use_.find(type) != in_use_.end()); + std::lock_guard guard(mutex_); + return in_use_[type]; + } + + std::string GetSummary() const { + std::ostringstream res_ss; + res_ss << "[kMerge, limit:" << GetLockLimit(LockType::kMerge) + << ", in_use:" << GetLockInUse(LockType::kMerge) + << "]\n[kSplit, limit:" << GetLockLimit(LockType::kSplit) + << ", in_use:" << GetLockInUse(LockType::kSplit) + << "]\n[kMove, limit:" << GetLockLimit(LockType::kMove) + << ", in_use:" << GetLockInUse(LockType::kMove) + << "]\n[kLoad, limit:" << GetLockLimit(LockType::kLoad) + << ", in_use:" << GetLockInUse(LockType::kLoad) + << "]\n[kUnload, limit:" << GetLockLimit(LockType::kUnload) + << ", in_use:" << GetLockInUse(LockType::kUnload) << "]"; + return res_ss.str(); + } + + private: + struct LockTypeHash { + template + std::size_t operator()(const T& t) const { + return static_cast(t); + } + }; + mutable std::unordered_map limit_; + mutable std::unordered_map in_use_; + mutable std::mutex mutex_; + + private: + ProcedureLimiter() { + SetLockLimit(LockType::kMerge, static_cast(FLAGS_master_merge_procedure_limit)); + SetLockLimit(LockType::kSplit, static_cast(FLAGS_master_split_procedure_limit)); + SetLockLimit(LockType::kMove, static_cast(FLAGS_master_move_procedure_limit)); + SetLockLimit(LockType::kLoad, static_cast(FLAGS_master_load_procedure_limit)); + SetLockLimit(LockType::kUnload, static_cast(FLAGS_master_unload_procedure_limit)); + { + std::lock_guard guard(mutex_); + in_use_[LockType::kMerge] = 0; + in_use_[LockType::kSplit] = 0; + in_use_[LockType::kMove] = 0; + in_use_[LockType::kLoad] = 0; + in_use_[LockType::kUnload] = 0; + } + } + + ~ProcedureLimiter() = default; + + ProcedureLimiter(const ProcedureLimiter&) = delete; + ProcedureLimiter& operator=(const ProcedureLimiter&) = delete; + ProcedureLimiter(ProcedureLimiter&&) = delete; + ProcedureLimiter& operator=(ProcedureLimiter&&) = delete; +}; + +} // namespace master +} // namespace tera diff --git a/src/master/remote_master.cc b/src/master/remote_master.cc index a87b82733..fbe8433ab 100644 --- a/src/master/remote_master.cc +++ b/src/master/remote_master.cc @@ -20,227 +20,206 @@ DECLARE_string(tera_master_stat_table_name); namespace tera { namespace master { -RemoteMaster::RemoteMaster(MasterImpl* master_impl) - : master_impl_(master_impl), - thread_pool_(new ThreadPool(FLAGS_tera_master_thread_max_num)) {} +RemoteMaster::RemoteMaster(MasterImpl* master_impl, std::shared_ptr thread_pool) + : master_impl_(master_impl), thread_pool_(thread_pool) {} RemoteMaster::~RemoteMaster() {} void RemoteMaster::CreateTable(google::protobuf::RpcController* controller, - const CreateTableRequest* request, - CreateTableResponse* response, + const CreateTableRequest* request, CreateTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (CreateTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoCreateTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (CreateTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoCreateTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::DeleteTable(google::protobuf::RpcController* controller, - const DeleteTableRequest* request, - DeleteTableResponse* response, + const DeleteTableRequest* request, DeleteTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (DeleteTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoDeleteTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (DeleteTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoDeleteTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::DisableTable(google::protobuf::RpcController* controller, - const DisableTableRequest* request, - DisableTableResponse* response, + const DisableTableRequest* request, DisableTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (DisableTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoDisableTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (DisableTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoDisableTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::EnableTable(google::protobuf::RpcController* controller, - const EnableTableRequest* request, - EnableTableResponse* response, + const EnableTableRequest* request, EnableTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (EnableTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoEnableTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (EnableTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoEnableTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::UpdateTable(google::protobuf::RpcController* controller, - const UpdateTableRequest* request, - UpdateTableResponse* response, + const UpdateTableRequest* request, UpdateTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (UpdateTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoUpdateTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (UpdateTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoUpdateTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::UpdateCheck(google::protobuf::RpcController* controller, - const UpdateCheckRequest* request, - UpdateCheckResponse* response, + const UpdateCheckRequest* request, UpdateCheckResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (UpdateCheck): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoUpdateCheck, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (UpdateCheck): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoUpdateCheck, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::SearchTable(google::protobuf::RpcController* controller, - const SearchTableRequest* request, - SearchTableResponse* response, + const SearchTableRequest* request, SearchTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (SearchTable): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoSearchTable, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (SearchTable): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoSearchTable, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::ShowTables(google::protobuf::RpcController* controller, - const ShowTablesRequest* request, - ShowTablesResponse* response, + const ShowTablesRequest* request, ShowTablesResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (ShowTables): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoShowTables, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (ShowTables): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoShowTables, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::ShowTabletNodes(google::protobuf::RpcController* controller, const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (ShowTabletNodes): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoShowTabletNodes, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (ShowTabletNodes): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoShowTabletNodes, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::CmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, + const CmdCtrlRequest* request, CmdCtrlResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (CmdCtrl): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoCmdCtrl, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (CmdCtrl): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoCmdCtrl, this, controller, request, response, done); + thread_pool_->AddTask(callback); } void RemoteMaster::OperateUser(google::protobuf::RpcController* controller, - const OperateUserRequest* request, - OperateUserResponse* response, + const OperateUserRequest* request, OperateUserResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (OperateUser): " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteMaster::DoOperateUser, this, controller, request, response, done); - thread_pool_->AddTask(callback); + LOG(INFO) << "accept RPC (OperateUser): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMaster::DoOperateUser, this, controller, request, response, done); + thread_pool_->AddTask(callback); } // internal void RemoteMaster::DoCreateTable(google::protobuf::RpcController* controller, - const CreateTableRequest* request, - CreateTableResponse* response, + const CreateTableRequest* request, CreateTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (CreateTable)"; - master_impl_->CreateTable(request, response, done); - LOG(INFO) << "finish RPC (CreateTable)"; + LOG(INFO) << "run RPC (CreateTable)"; + master_impl_->CreateTable(request, response, done); + LOG(INFO) << "finish RPC (CreateTable)"; } void RemoteMaster::DoDeleteTable(google::protobuf::RpcController* controller, - const DeleteTableRequest* request, - DeleteTableResponse* response, + const DeleteTableRequest* request, DeleteTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (DeleteTable)"; - master_impl_->DeleteTable(request, response, done); - LOG(INFO) << "finish RPC (DeleteTable)"; + LOG(INFO) << "run RPC (DeleteTable)"; + master_impl_->DeleteTable(request, response, done); + LOG(INFO) << "finish RPC (DeleteTable)"; } void RemoteMaster::DoDisableTable(google::protobuf::RpcController* controller, const DisableTableRequest* request, - DisableTableResponse* response, - google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (DisableTable)"; - master_impl_->DisableTable(request, response, done); - LOG(INFO) << "finish RPC (DisableTable)"; + DisableTableResponse* response, google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (DisableTable)"; + master_impl_->DisableTable(request, response, done); + LOG(INFO) << "finish RPC (DisableTable)"; } void RemoteMaster::DoEnableTable(google::protobuf::RpcController* controller, - const EnableTableRequest* request, - EnableTableResponse* response, + const EnableTableRequest* request, EnableTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (EnableTable)"; - master_impl_->EnableTable(request, response, done); - LOG(INFO) << "finish RPC (EnableTable)"; + LOG(INFO) << "run RPC (EnableTable)"; + master_impl_->EnableTable(request, response, done); + LOG(INFO) << "finish RPC (EnableTable)"; } void RemoteMaster::DoUpdateTable(google::protobuf::RpcController* controller, - const UpdateTableRequest* request, - UpdateTableResponse* response, + const UpdateTableRequest* request, UpdateTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (UpdateTable)"; - master_impl_->UpdateTable(request, response, done); - LOG(INFO) << "finish RPC (UpdateTable)"; + LOG(INFO) << "run RPC (UpdateTable)"; + master_impl_->UpdateTable(request, response, done); + LOG(INFO) << "finish RPC (UpdateTable)"; } void RemoteMaster::DoUpdateCheck(google::protobuf::RpcController* controller, - const UpdateCheckRequest* request, - UpdateCheckResponse* response, + const UpdateCheckRequest* request, UpdateCheckResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "accept RPC (UpdateCheck)"; - master_impl_->UpdateCheck(request, response, done); - LOG(INFO) << "finish RPC (UpdateCheck)"; + LOG(INFO) << "accept RPC (UpdateCheck)"; + master_impl_->UpdateCheck(request, response, done); + LOG(INFO) << "finish RPC (UpdateCheck)"; } void RemoteMaster::DoSearchTable(google::protobuf::RpcController* controller, - const SearchTableRequest* request, - SearchTableResponse* response, + const SearchTableRequest* request, SearchTableResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (SearchTable)"; - master_impl_->SearchTable(request, response, done); - LOG(INFO) << "finish RPC (SearchTable)"; + LOG(INFO) << "run RPC (SearchTable)"; + master_impl_->SearchTable(request, response, done); + LOG(INFO) << "finish RPC (SearchTable)"; } void RemoteMaster::DoShowTables(google::protobuf::RpcController* controller, - const ShowTablesRequest* request, - ShowTablesResponse* response, + const ShowTablesRequest* request, ShowTablesResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (ShowTables)"; - if (request->has_all_brief() && request->all_brief()) { - master_impl_->ShowTablesBrief(request, response, done); - } else { - master_impl_->ShowTables(request, response, done); - } - LOG(INFO) << "finish RPC (ShowTables)"; + LOG(INFO) << "run RPC (ShowTables)"; + if (request->has_all_brief() && request->all_brief()) { + master_impl_->ShowTablesBrief(request, response, done); + } else { + master_impl_->ShowTables(request, response, done); + } + LOG(INFO) << "finish RPC (ShowTables)"; } void RemoteMaster::DoShowTabletNodes(google::protobuf::RpcController* controller, const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (ShowTabletNodes)"; - master_impl_->ShowTabletNodes(request, response, done); - LOG(INFO) << "finish RPC (ShowTabletNodes)"; + LOG(INFO) << "run RPC (ShowTabletNodes)"; + master_impl_->ShowTabletNodes(request, response, done); + LOG(INFO) << "finish RPC (ShowTabletNodes)"; } void RemoteMaster::DoCmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, + const CmdCtrlRequest* request, CmdCtrlResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (CmdCtrl)"; - master_impl_->CmdCtrl(request, response); - LOG(INFO) << "finish RPC (CmdCtrl)"; + LOG(INFO) << "run RPC (CmdCtrl)"; + master_impl_->CmdCtrl(request, response); + LOG(INFO) << "finish RPC (CmdCtrl)"; - done->Run(); + done->Run(); } void RemoteMaster::DoOperateUser(google::protobuf::RpcController* controller, - const OperateUserRequest* request, - OperateUserResponse* response, + const OperateUserRequest* request, OperateUserResponse* response, google::protobuf::Closure* done) { - LOG(INFO) << "run RPC (OperateUser)"; - master_impl_->OperateUser(request, response, done); - LOG(INFO) << "finish RPC (OperateUser)"; + LOG(INFO) << "run RPC (OperateUser)"; + master_impl_->OperateUser(request, response, done); + LOG(INFO) << "finish RPC (OperateUser)"; } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/remote_master.h b/src/master/remote_master.h index c826041ad..7c39f2c62 100644 --- a/src/master/remote_master.h +++ b/src/master/remote_master.h @@ -5,9 +5,9 @@ #ifndef TERA_MASTER_REMOTE_MASTER_H_ #define TERA_MASTER_REMOTE_MASTER_H_ +#include #include "common/base/scoped_ptr.h" #include "common/thread_pool.h" - #include "proto/master_rpc.pb.h" namespace tera { @@ -16,127 +16,86 @@ namespace master { class MasterImpl; class RemoteMaster : public MasterServer { -public: - explicit RemoteMaster(MasterImpl* master_impl); - ~RemoteMaster(); - - void CreateTable(google::protobuf::RpcController* controller, - const CreateTableRequest* request, - CreateTableResponse* response, - google::protobuf::Closure* done); - - void DeleteTable(google::protobuf::RpcController* controller, - const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* done); - - void DisableTable(google::protobuf::RpcController* controller, - const DisableTableRequest* request, - DisableTableResponse* response, - google::protobuf::Closure* done); - - void EnableTable(google::protobuf::RpcController* controller, - const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* done); - - void UpdateTable(google::protobuf::RpcController* controller, - const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* done); - - void UpdateCheck(google::protobuf::RpcController* controller, - const UpdateCheckRequest* request, - UpdateCheckResponse* response, - google::protobuf::Closure* done); - - void SearchTable(google::protobuf::RpcController* controller, - const SearchTableRequest* request, - SearchTableResponse* response, - google::protobuf::Closure* done); - - void ShowTables(google::protobuf::RpcController* controller, - const ShowTablesRequest* request, - ShowTablesResponse* response, - google::protobuf::Closure* done); - - void ShowTabletNodes(google::protobuf::RpcController* controller, - const ShowTabletNodesRequest* request, - ShowTabletNodesResponse* response, - google::protobuf::Closure* done); + public: + explicit RemoteMaster(MasterImpl* master_impl, std::shared_ptr thread_pool); + ~RemoteMaster(); - void CmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, - google::protobuf::Closure* done); + void CreateTable(google::protobuf::RpcController* controller, const CreateTableRequest* request, + CreateTableResponse* response, google::protobuf::Closure* done); - void OperateUser(google::protobuf::RpcController* controller, - const OperateUserRequest* request, - OperateUserResponse* response, - google::protobuf::Closure* done); + void DeleteTable(google::protobuf::RpcController* controller, const DeleteTableRequest* request, + DeleteTableResponse* response, google::protobuf::Closure* done); -private: - void DoCreateTable(google::protobuf::RpcController* controller, - const CreateTableRequest* request, - CreateTableResponse* response, - google::protobuf::Closure* done); + void DisableTable(google::protobuf::RpcController* controller, const DisableTableRequest* request, + DisableTableResponse* response, google::protobuf::Closure* done); - void DoDeleteTable(google::protobuf::RpcController* controller, - const DeleteTableRequest* request, - DeleteTableResponse* response, - google::protobuf::Closure* done); + void EnableTable(google::protobuf::RpcController* controller, const EnableTableRequest* request, + EnableTableResponse* response, google::protobuf::Closure* done); - void DoDisableTable(google::protobuf::RpcController* controller, - const DisableTableRequest* request, - DisableTableResponse* response, - google::protobuf::Closure* done); + void UpdateTable(google::protobuf::RpcController* controller, const UpdateTableRequest* request, + UpdateTableResponse* response, google::protobuf::Closure* done); - void DoEnableTable(google::protobuf::RpcController* controller, - const EnableTableRequest* request, - EnableTableResponse* response, - google::protobuf::Closure* done); + void UpdateCheck(google::protobuf::RpcController* controller, const UpdateCheckRequest* request, + UpdateCheckResponse* response, google::protobuf::Closure* done); - void DoUpdateTable(google::protobuf::RpcController* controller, - const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* done); + void SearchTable(google::protobuf::RpcController* controller, const SearchTableRequest* request, + SearchTableResponse* response, google::protobuf::Closure* done); - void DoUpdateCheck(google::protobuf::RpcController* controller, - const UpdateCheckRequest* request, - UpdateCheckResponse* response, - google::protobuf::Closure* done); + void ShowTables(google::protobuf::RpcController* controller, const ShowTablesRequest* request, + ShowTablesResponse* response, google::protobuf::Closure* done); - void DoSearchTable(google::protobuf::RpcController* controller, - const SearchTableRequest* request, - SearchTableResponse* response, + void ShowTabletNodes(google::protobuf::RpcController* controller, + const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response, google::protobuf::Closure* done); - void DoShowTables(google::protobuf::RpcController* controller, - const ShowTablesRequest* request, - ShowTablesResponse* response, + void CmdCtrl(google::protobuf::RpcController* controller, const CmdCtrlRequest* request, + CmdCtrlResponse* response, google::protobuf::Closure* done); + + void OperateUser(google::protobuf::RpcController* controller, const OperateUserRequest* request, + OperateUserResponse* response, google::protobuf::Closure* done); + + private: + void DoCreateTable(google::protobuf::RpcController* controller, const CreateTableRequest* request, + CreateTableResponse* response, google::protobuf::Closure* done); + + void DoDeleteTable(google::protobuf::RpcController* controller, const DeleteTableRequest* request, + DeleteTableResponse* response, google::protobuf::Closure* done); + + void DoDisableTable(google::protobuf::RpcController* controller, + const DisableTableRequest* request, DisableTableResponse* response, google::protobuf::Closure* done); - void DoShowTabletNodes(google::protobuf::RpcController* controller, - const ShowTabletNodesRequest* request, - ShowTabletNodesResponse* response, - google::protobuf::Closure* done); + void DoEnableTable(google::protobuf::RpcController* controller, const EnableTableRequest* request, + EnableTableResponse* response, google::protobuf::Closure* done); - void DoCmdCtrl(google::protobuf::RpcController* controller, - const CmdCtrlRequest* request, - CmdCtrlResponse* response, - google::protobuf::Closure* done); + void DoUpdateTable(google::protobuf::RpcController* controller, const UpdateTableRequest* request, + UpdateTableResponse* response, google::protobuf::Closure* done); - void DoOperateUser(google::protobuf::RpcController* controller, - const OperateUserRequest* request, - OperateUserResponse* response, - google::protobuf::Closure* done); -private: - MasterImpl* master_impl_; - scoped_ptr thread_pool_; -}; + void DoUpdateCheck(google::protobuf::RpcController* controller, const UpdateCheckRequest* request, + UpdateCheckResponse* response, google::protobuf::Closure* done); + + void DoSearchTable(google::protobuf::RpcController* controller, const SearchTableRequest* request, + SearchTableResponse* response, google::protobuf::Closure* done); + + void DoShowTables(google::protobuf::RpcController* controller, const ShowTablesRequest* request, + ShowTablesResponse* response, google::protobuf::Closure* done); + void DoShowTabletNodes(google::protobuf::RpcController* controller, + const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response, + google::protobuf::Closure* done); + + void DoCmdCtrl(google::protobuf::RpcController* controller, const CmdCtrlRequest* request, + CmdCtrlResponse* response, google::protobuf::Closure* done); + + void DoOperateUser(google::protobuf::RpcController* controller, const OperateUserRequest* request, + OperateUserResponse* response, google::protobuf::Closure* done); + + private: + MasterImpl* master_impl_; + std::shared_ptr thread_pool_; +}; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_REMOTE_MASTER_H_ +#endif // TERA_MASTER_REMOTE_MASTER_H_ diff --git a/src/master/remote_multi_tenancy_service.cc b/src/master/remote_multi_tenancy_service.cc new file mode 100644 index 000000000..916bbc9e1 --- /dev/null +++ b/src/master/remote_multi_tenancy_service.cc @@ -0,0 +1,171 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master/remote_multi_tenancy_service.h" +#include +#include +#include +#include "master/multi_tenancy_service_impl.h" +#include "utils/network_utils.h" + +namespace tera { +namespace master { + +RemoteMultiTenancyService::RemoteMultiTenancyService( + MultiTenacyServiceImpl* multi_tenancy_service_impl, std::shared_ptr thread_pool) + : multi_tenancy_service_impl_(multi_tenancy_service_impl), thread_pool_(thread_pool) {} + +RemoteMultiTenancyService::~RemoteMultiTenancyService() {} + +void RemoteMultiTenancyService::UpdateUgi(google::protobuf::RpcController* controller, + const UpdateUgiRequest* request, + UpdateUgiResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (UpdateUgi): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMultiTenancyService::DoUpdateUgi, this, controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::ShowUgi(google::protobuf::RpcController* controller, + const ShowUgiRequest* request, ShowUgiResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (ShowUgi): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMultiTenancyService::DoShowUgi, this, controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::UpdateAuth(google::protobuf::RpcController* controller, + const UpdateAuthRequest* request, + UpdateAuthResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (UpdateAuth): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = std::bind(&RemoteMultiTenancyService::DoUpdateAuth, this, controller, + request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::ShowAuth(google::protobuf::RpcController* controller, + const ShowAuthRequest* request, ShowAuthResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (ShowAuth): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMultiTenancyService::DoShowAuth, this, controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::SetAuthPolicy(google::protobuf::RpcController* controller, + const SetAuthPolicyRequest* request, + SetAuthPolicyResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (SetAuthPolicy): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = std::bind(&RemoteMultiTenancyService::DoSetAuthPolicy, this, + controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::ShowAuthPolicy(google::protobuf::RpcController* controller, + const ShowAuthPolicyRequest* request, + ShowAuthPolicyResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (ShowAuthPolicy): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = std::bind(&RemoteMultiTenancyService::DoShowAuthPolicy, this, + controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::SetQuota(google::protobuf::RpcController* controller, + const SetQuotaRequest* request, SetQuotaResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (SetQuota): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMultiTenancyService::DoSetQuota, this, controller, request, response, done); + thread_pool_->AddTask(callback); +} + +void RemoteMultiTenancyService::ShowQuota(google::protobuf::RpcController* controller, + const ShowQuotaRequest* request, + ShowQuotaResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "accept RPC (ShowQuota): " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteMultiTenancyService::DoShowQuota, this, controller, request, response, done); + thread_pool_->AddTask(callback); +} + +// private + +void RemoteMultiTenancyService::DoUpdateUgi(google::protobuf::RpcController* controller, + const UpdateUgiRequest* request, + UpdateUgiResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (UpdateUgi)"; + multi_tenancy_service_impl_->UpdateUgi(request, response, done); + LOG(INFO) << "finish RPC (UpdateUgi)"; +} + +void RemoteMultiTenancyService::DoShowUgi(google::protobuf::RpcController* controller, + const ShowUgiRequest* request, ShowUgiResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (ShowUgi)"; + multi_tenancy_service_impl_->ShowUgi(request, response, done); + LOG(INFO) << "finish RPC (ShowUgi)"; +} + +void RemoteMultiTenancyService::DoUpdateAuth(google::protobuf::RpcController* controller, + const UpdateAuthRequest* request, + UpdateAuthResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (UpdateAuth)"; + multi_tenancy_service_impl_->UpdateAuth(request, response, done); + LOG(INFO) << "finish RPC (UpdateAuth)"; +} + +void RemoteMultiTenancyService::DoShowAuth(google::protobuf::RpcController* controller, + const ShowAuthRequest* request, + ShowAuthResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (ShowAuth)"; + multi_tenancy_service_impl_->ShowAuth(request, response, done); + LOG(INFO) << "finish RPC (ShowAuth)"; +} + +void RemoteMultiTenancyService::DoSetAuthPolicy(google::protobuf::RpcController* controller, + const SetAuthPolicyRequest* request, + SetAuthPolicyResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (SetAuthPolicy)"; + multi_tenancy_service_impl_->SetAuthPolicy(request, response, done); + LOG(INFO) << "finish RPC (SetAuthPolicy)"; +} + +void RemoteMultiTenancyService::DoShowAuthPolicy(google::protobuf::RpcController* controller, + const ShowAuthPolicyRequest* request, + ShowAuthPolicyResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (ShowAuthPolicy)"; + multi_tenancy_service_impl_->ShowAuthPolicy(request, response, done); + LOG(INFO) << "finish RPC (ShowAuthPolicy)"; +} + +void RemoteMultiTenancyService::DoSetQuota(google::protobuf::RpcController* controller, + const SetQuotaRequest* request, + SetQuotaResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (SetQuota)"; + multi_tenancy_service_impl_->SetQuota(request, response, done); + LOG(INFO) << "finish RPC (SetQuota)"; +} + +void RemoteMultiTenancyService::DoShowQuota(google::protobuf::RpcController* controller, + const ShowQuotaRequest* request, + ShowQuotaResponse* response, + google::protobuf::Closure* done) { + LOG(INFO) << "run RPC (ShowQuota)"; + multi_tenancy_service_impl_->ShowQuota(request, response, done); + LOG(INFO) << "finish RPC (ShowQuota)"; +} +} +} diff --git a/src/master/remote_multi_tenancy_service.h b/src/master/remote_multi_tenancy_service.h new file mode 100644 index 000000000..e21d27a1d --- /dev/null +++ b/src/master/remote_multi_tenancy_service.h @@ -0,0 +1,73 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "common/thread_pool.h" +#include "proto/master_rpc.pb.h" + +namespace tera { +namespace master { + +class MultiTenacyServiceImpl; + +class RemoteMultiTenancyService : public MasterMultiTenancyService { + public: + explicit RemoteMultiTenancyService(MultiTenacyServiceImpl* multi_tenancy_service_impl, + std::shared_ptr thread_pool); + ~RemoteMultiTenancyService(); + + void UpdateUgi(google::protobuf::RpcController* controller, const UpdateUgiRequest* request, + UpdateUgiResponse* response, google::protobuf::Closure* done); + + void ShowUgi(google::protobuf::RpcController* controller, const ShowUgiRequest* request, + ShowUgiResponse* response, google::protobuf::Closure* done); + + void UpdateAuth(google::protobuf::RpcController* controller, const UpdateAuthRequest* request, + UpdateAuthResponse* response, google::protobuf::Closure* done); + + void ShowAuth(google::protobuf::RpcController* controller, const ShowAuthRequest* request, + ShowAuthResponse* response, google::protobuf::Closure* done); + + void SetAuthPolicy(google::protobuf::RpcController* controller, + const SetAuthPolicyRequest* request, SetAuthPolicyResponse* response, + google::protobuf::Closure* done); + + void ShowAuthPolicy(google::protobuf::RpcController* controller, + const ShowAuthPolicyRequest* request, ShowAuthPolicyResponse* response, + google::protobuf::Closure* done); + + void SetQuota(google::protobuf::RpcController* controller, const SetQuotaRequest* request, + SetQuotaResponse* response, google::protobuf::Closure* done); + + void ShowQuota(google::protobuf::RpcController* controller, const ShowQuotaRequest* request, + ShowQuotaResponse* response, google::protobuf::Closure* done); + + private: + void DoUpdateUgi(google::protobuf::RpcController* controller, const UpdateUgiRequest* request, + UpdateUgiResponse* response, google::protobuf::Closure* done); + void DoShowUgi(google::protobuf::RpcController* controller, const ShowUgiRequest* request, + ShowUgiResponse* response, google::protobuf::Closure* done); + void DoUpdateAuth(google::protobuf::RpcController* controller, const UpdateAuthRequest* request, + UpdateAuthResponse* response, google::protobuf::Closure* done); + void DoShowAuth(google::protobuf::RpcController* controller, const ShowAuthRequest* request, + ShowAuthResponse* response, google::protobuf::Closure* done); + void DoSetAuthPolicy(google::protobuf::RpcController* controller, + const SetAuthPolicyRequest* request, SetAuthPolicyResponse* response, + google::protobuf::Closure* done); + void DoShowAuthPolicy(google::protobuf::RpcController* controller, + const ShowAuthPolicyRequest* request, ShowAuthPolicyResponse* response, + google::protobuf::Closure* done); + void DoSetQuota(google::protobuf::RpcController* controller, const SetQuotaRequest* request, + SetQuotaResponse* response, google::protobuf::Closure* done); + void DoShowQuota(google::protobuf::RpcController* controller, const ShowQuotaRequest* request, + ShowQuotaResponse* response, google::protobuf::Closure* done); + + private: + MultiTenacyServiceImpl* multi_tenancy_service_impl_; + std::shared_ptr thread_pool_; +}; +} +} diff --git a/src/master/scheduler.h b/src/master/scheduler.h index 93c8bce54..391c0d1bd 100644 --- a/src/master/scheduler.h +++ b/src/master/scheduler.h @@ -14,31 +14,26 @@ namespace tera { namespace master { class Scheduler { -public: - virtual ~Scheduler() {} + public: + virtual ~Scheduler() {} - virtual bool MayMoveOut(const TabletNodePtr& node, + virtual bool MayMoveOut(const TabletNodePtr& node, const std::string& table_name) = 0; + virtual bool FindBestNode(const std::vector& node_list, + const std::string& table_name, size_t* best_index) = 0; + virtual bool FindBestTablet(const TabletNodePtr& src_node, const TabletNodePtr& dst_node, + const std::vector& table_list, + const std::string& table_name, size_t* best_index) = 0; + + virtual bool NeedSchedule(std::vector& node_list, const std::string& table_name) = 0; - virtual bool FindBestNode(const std::vector& node_list, - const std::string& table_name, - size_t* best_index) = 0; - virtual bool FindBestTablet(const TabletNodePtr& src_node, - const TabletNodePtr& dst_node, - const std::vector& table_list, - const std::string& table_name, - size_t* best_index) = 0; - - virtual bool NeedSchedule(std::vector& node_list, - const std::string& table_name) = 0; - virtual void AscendingSort(std::vector& node_list, - const std::string& table_name) = 0; - virtual void DescendingSort(std::vector& node_list, - const std::string& table_name) = 0; + virtual void AscendingSort(std::vector& node_list, + const std::string& table_name) = 0; + virtual void DescendingSort(std::vector& node_list, + const std::string& table_name) = 0; - virtual const char* Name() = 0; + virtual const char* Name() = 0; }; -} // namespace master -} // namespace tera - +} // namespace master +} // namespace tera diff --git a/src/master/set_quota_procedure.cc b/src/master/set_quota_procedure.cc new file mode 100644 index 000000000..f1f7ce918 --- /dev/null +++ b/src/master/set_quota_procedure.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master/set_quota_procedure.h" +#include "quota/helpers/quota_utils.h" + +DECLARE_int32(tera_master_meta_retry_times); + +namespace tera { +namespace master { + +std::map SetQuotaProcedure::phase_handlers_{ + {SetQuotaPhase::kSetMeta, std::bind(&SetQuotaProcedure::SetMetaHandler, _1, _2)}, + {SetQuotaPhase::kEofPhase, std::bind(&SetQuotaProcedure::EofPhaseHandler, _1, _2)}, +}; + +SetQuotaProcedure::SetQuotaProcedure(const SetQuotaRequest* request, SetQuotaResponse* response, + google::protobuf::Closure* closure, ThreadPool* thread_pool, + const std::shared_ptr& quota_entry, + std::unique_ptr& meta_write_record) + : request_(request), + response_(response), + rpc_closure_(closure), + thread_pool_(thread_pool), + done_(false), + update_meta_(false), + quota_entry_(quota_entry), + meta_write_record_(std::move(meta_write_record)) { + PROC_LOG(INFO) << "begin quota update prepare"; + SetNextPhase(SetQuotaPhase::kSetMeta); +} + +std::string SetQuotaProcedure::ProcId() const { + std::string prefix = std::string("SetQuota:"); + return prefix + quota::MasterQuotaHelper::GetTableNameFromMetaKey(meta_write_record_->key); +} + +void SetQuotaProcedure::RunNextStage() { + SetQuotaPhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase + << ", table_name: " << meta_write_record_->key; + SetQuotaPhaseHandler handler = it->second; + handler(this, phase); +} + +void SetQuotaProcedure::SetMetaHandler(const SetQuotaPhase& phase) { + if (update_meta_) { + return; + } + update_meta_.store(true); + PROC_LOG(INFO) << "set quota meta begin [table : " << meta_write_record_->key << "]"; + UpdateMetaClosure closure = std::bind(&SetQuotaProcedure::SetMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(*meta_write_record_, closure, + FLAGS_tera_master_meta_retry_times); +} + +void SetQuotaProcedure::SetMetaDone(bool succ) { + if (!succ) { + PROC_LOG(ERROR) << "update meta failed"; + EnterPhaseWithResponseStatus(kMetaTabletError, SetQuotaPhase::kEofPhase); + return; + } + // update master mem quota info at last + PROC_LOG(INFO) << "set quota info to meta succ"; + + // meta_write_record_->is_delete will allways be false. Quota doesn't have delete + if (!quota_entry_->AddRecord(meta_write_record_->key, meta_write_record_->value)) { + PROC_LOG(ERROR) << "Set quota failed!"; + } + EnterPhaseWithResponseStatus(kMasterOk, SetQuotaPhase::kEofPhase); +} + +void SetQuotaProcedure::EofPhaseHandler(const SetQuotaPhase&) { + done_.store(true); + PROC_LOG(INFO) << "set quota finish"; + rpc_closure_->Run(); +} + +std::ostream& operator<<(std::ostream& o, const SetQuotaPhase& phase) { + static const char* msg[] = {"SetQuotaPhase::kSetMeta", "SetQuotaPhase::kEofPhase", + "SetQuotaPhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = static_cast(phase) - static_cast(SetQuotaPhase::kSetMeta); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; +} +} +} diff --git a/src/master/set_quota_procedure.h b/src/master/set_quota_procedure.h new file mode 100644 index 000000000..d71d63e71 --- /dev/null +++ b/src/master/set_quota_procedure.h @@ -0,0 +1,76 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include "common/mutex.h" +#include "master/master_env.h" +#include "master/procedure.h" +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" +#include "proto/quota.pb.h" +#include "proto/master_rpc.pb.h" +#include "proto/status_code.pb.h" +#include "quota/master_quota_entry.h" + +namespace tera { +namespace master { + +enum class SetQuotaPhase { kSetMeta, kEofPhase }; + +std::ostream& operator<<(std::ostream& o, const SetQuotaPhase& phase); + +class SetQuotaProcedure : public Procedure { + public: + SetQuotaProcedure(const SetQuotaRequest* request, SetQuotaResponse* response, + google::protobuf::Closure* closure, ThreadPool* thread_pool, + const std::shared_ptr& quota_entry, + std::unique_ptr& meta_write_record); + virtual ~SetQuotaProcedure() {} + virtual std::string ProcId() const; + virtual void RunNextStage(); + virtual bool Done() { return done_.load(); } + + private: + using SetQuotaPhaseHandler = std::function; + + void SetNextPhase(const SetQuotaPhase& phase) { + MutexLock l(&phase_mutex_); + phases_.emplace_back(phase); + } + + SetQuotaPhase GetCurrentPhase() { + MutexLock l(&phase_mutex_); + return phases_.back(); + } + + void EnterPhaseWithResponseStatus(StatusCode code, SetQuotaPhase phase) { + response_->set_status(code); + SetNextPhase(phase); + } + void PrepareHandler(const SetQuotaPhase& phase); + void SetMetaHandler(const SetQuotaPhase& phase); + void SetMetaDone(bool succ); + void EofPhaseHandler(const SetQuotaPhase&); + + private: + const SetQuotaRequest* request_; + SetQuotaResponse* response_; + google::protobuf::Closure* rpc_closure_; + ThreadPool* thread_pool_; + std::atomic done_; + std::atomic update_meta_; + std::vector phases_; + mutable Mutex phase_mutex_; + static std::map phase_handlers_; + std::shared_ptr quota_entry_; + std::unique_ptr meta_write_record_; +}; +} +} diff --git a/src/master/split_tablet_procedure.cc b/src/master/split_tablet_procedure.cc index dcb2dab73..d3925efe0 100644 --- a/src/master/split_tablet_procedure.cc +++ b/src/master/split_tablet_procedure.cc @@ -19,250 +19,249 @@ DECLARE_string(tera_tabletnode_path_prefix); namespace tera { namespace master { -std::map - SplitTabletProcedure::phase_handlers_ { - {SplitTabletPhase::kPreSplitTablet, std::bind(&SplitTabletProcedure::PreSplitTabletPhaseHandler, _1, _2)}, - {SplitTabletPhase::kUnLoadTablet, std::bind(&SplitTabletProcedure::UnloadTabletPhaseHandler, _1, _2)}, - {SplitTabletPhase::kPostUnLoadTablet, std::bind(&SplitTabletProcedure::PostUnloadTabletPhaseHandler, _1, _2)}, - {SplitTabletPhase::kUpdateMeta, std::bind(&SplitTabletProcedure::UpdateMetaPhaseHandler, _1, _2)}, - {SplitTabletPhase::kLoadTablets, std::bind(&SplitTabletProcedure::LoadTabletsPhaseHandler, _1, _2)}, - {SplitTabletPhase::kFaultRecover, std::bind(&SplitTabletProcedure::FaultRecoverPhaseHandler, _1, _2)}, - {SplitTabletPhase::kEofPhase, std::bind(&SplitTabletProcedure::EOFPhaseHandler, _1, _2)} -}; +std::map + SplitTabletProcedure::phase_handlers_{ + {SplitTabletPhase::kPreSplitTablet, + std::bind(&SplitTabletProcedure::PreSplitTabletPhaseHandler, _1, _2)}, + {SplitTabletPhase::kUnLoadTablet, + std::bind(&SplitTabletProcedure::UnloadTabletPhaseHandler, _1, _2)}, + {SplitTabletPhase::kPostUnLoadTablet, + std::bind(&SplitTabletProcedure::PostUnloadTabletPhaseHandler, _1, _2)}, + {SplitTabletPhase::kUpdateMeta, + std::bind(&SplitTabletProcedure::UpdateMetaPhaseHandler, _1, _2)}, + {SplitTabletPhase::kLoadTablets, + std::bind(&SplitTabletProcedure::LoadTabletsPhaseHandler, _1, _2)}, + {SplitTabletPhase::kFaultRecover, + std::bind(&SplitTabletProcedure::FaultRecoverPhaseHandler, _1, _2)}, + {SplitTabletPhase::kEofPhase, std::bind(&SplitTabletProcedure::EOFPhaseHandler, _1, _2)}}; -SplitTabletProcedure::SplitTabletProcedure(TabletPtr tablet, std::string split_key, ThreadPool* thread_pool) : - id_(std::string("SplitTablet:") + tablet->GetPath() + ":" + TimeStamp()), - tablet_(tablet), split_key_(split_key), thread_pool_(thread_pool) { - PROC_LOG(INFO) << "split tablet begin, tablet: " << tablet_->GetPath(); - if (tablet_->GetStatus() != TabletMeta::kTabletReady) { - SetNextPhase(SplitTabletPhase::kEofPhase); - PROC_LOG(WARNING) << "tablet is not ready, give up split, tablet: " << tablet_; - return; - } - SetNextPhase(SplitTabletPhase::kPreSplitTablet); +SplitTabletProcedure::SplitTabletProcedure(TabletPtr tablet, std::string split_key, + ThreadPool* thread_pool) + : Procedure(ProcedureLimiter::LockType::kSplit), + id_(std::string("SplitTablet:") + tablet->GetPath() + ":" + TimeStamp()), + tablet_(tablet), + split_key_(split_key), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "split tablet begin, tablet: " << tablet_->GetPath(); + if (tablet_->GetStatus() != TabletMeta::kTabletReady) { + SetNextPhase(SplitTabletPhase::kEofPhase); + PROC_LOG(WARNING) << "tablet is not ready, give up split, tablet: " << tablet_; + return; + } + SetNextPhase(SplitTabletPhase::kPreSplitTablet); } -std::string SplitTabletProcedure::ProcId() const { - return id_; -} +std::string SplitTabletProcedure::ProcId() const { return id_; } void SplitTabletProcedure::RunNextStage() { - SplitTabletPhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK (it != phase_handlers_.end()) << "illegal phase: " << phase << ", tablet: " << tablet_; - SplitTabletPhaseHandler handler = it->second; - handler(this, phase); + SplitTabletPhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", tablet: " << tablet_; + SplitTabletPhaseHandler handler = it->second; + handler(this, phase); } void SplitTabletProcedure::PreSplitTabletPhaseHandler(const SplitTabletPhase&) { - if (!split_key_.empty()) { - if ((!tablet_->GetKeyStart().empty() && split_key_ <= tablet_->GetKeyStart()) || - (!tablet_->GetKeyEnd().empty() && split_key_ >= tablet_->GetKeyEnd())) { - PROC_LOG(WARNING) << "invalid split key: " << split_key_ << ", tablet: " << tablet_; - SetNextPhase(SplitTabletPhase::kEofPhase); - return; - } - SetNextPhase(SplitTabletPhase::kUnLoadTablet); - } - else if (dispatch_split_key_request_) { - // waiting RPC response - return; + if (!split_key_.empty()) { + if ((!tablet_->GetKeyStart().empty() && split_key_ <= tablet_->GetKeyStart()) || + (!tablet_->GetKeyEnd().empty() && split_key_ >= tablet_->GetKeyEnd())) { + PROC_LOG(WARNING) << "invalid split key: " << split_key_ << ", tablet: " << tablet_; + SetNextPhase(SplitTabletPhase::kEofPhase); + return; } - else { - dispatch_split_key_request_ = true; - ComputeSplitKeyAsync(); - } + SetNextPhase(SplitTabletPhase::kUnLoadTablet); + } else if (dispatch_split_key_request_) { + // waiting RPC response + return; + } else { + dispatch_split_key_request_ = true; + ComputeSplitKeyAsync(); + } } void SplitTabletProcedure::UnloadTabletPhaseHandler(const SplitTabletPhase&) { - if (!unload_proc_) { - unload_proc_.reset(new UnloadTabletProcedure(tablet_, thread_pool_, true)); - PROC_LOG(INFO) << "Generate UnloadTablet SubProcedure: " << unload_proc_->ProcId(); - MasterEnv().GetExecutor()->AddProcedure(unload_proc_); - } - if (!unload_proc_->Done()) { - return; - } - TabletNodePtr node = tablet_->GetTabletNode(); - if (tablet_->GetStatus() == TabletMeta::kTabletOffline) { - SetNextPhase(SplitTabletPhase::kPostUnLoadTablet); - } - else { - SetNextPhase(SplitTabletPhase::kEofPhase); - } + if (!unload_proc_) { + unload_proc_.reset(new UnloadTabletProcedure(tablet_, thread_pool_, true)); + PROC_LOG(INFO) << "Generate UnloadTablet SubProcedure: " << unload_proc_->ProcId(); + MasterEnv().GetExecutor()->AddProcedure(unload_proc_); + } + if (!unload_proc_->Done()) { + return; + } + TabletNodePtr node = tablet_->GetTabletNode(); + if (tablet_->GetStatus() == TabletMeta::kTabletOffline) { + SetNextPhase(SplitTabletPhase::kPostUnLoadTablet); + } else { + SetNextPhase(SplitTabletPhase::kEofPhase); + } } void SplitTabletProcedure::PostUnloadTabletPhaseHandler(const SplitTabletPhase&) { - if (!TabletStatusCheck()) { - SetNextPhase(SplitTabletPhase::kFaultRecover); - } - else { - SetNextPhase(SplitTabletPhase::kUpdateMeta); - } + if (!TabletStatusCheck()) { + SetNextPhase(SplitTabletPhase::kFaultRecover); + } else { + SetNextPhase(SplitTabletPhase::kUpdateMeta); + } } void SplitTabletProcedure::UpdateMetaPhaseHandler(const SplitTabletPhase&) { - if (!child_tablets_[0]) { - UpdateMeta(); - } + if (!child_tablets_[0]) { + UpdateMeta(); + } } void SplitTabletProcedure::LoadTabletsPhaseHandler(const SplitTabletPhase&) { - if (!load_procs_[0] && !load_procs_[1]) { - TabletNodePtr node = tablet_->GetTabletNode(); - // try load tablet at the origin tabletnode considering cache locality - load_procs_[0].reset(new LoadTabletProcedure(child_tablets_[0], node, thread_pool_/*, true*/)); - load_procs_[1].reset(new LoadTabletProcedure(child_tablets_[1], node, thread_pool_/*, true*/)); - PROC_LOG(INFO) << "Generate LoadTablet SubProcedure1: " << load_procs_[0]->ProcId(); - PROC_LOG(INFO) << "Generate LoadTablet SubProcedure2, " << load_procs_[1]->ProcId(); - MasterEnv().GetExecutor()->AddProcedure(load_procs_[0]); - MasterEnv().GetExecutor()->AddProcedure(load_procs_[1]); - } - PROC_CHECK(load_procs_[0] && load_procs_[1]); - SetNextPhase(SplitTabletPhase::kEofPhase); + if (!load_procs_[0] && !load_procs_[1]) { + TabletNodePtr node = tablet_->GetTabletNode(); + // try load tablet at the origin tabletnode considering cache locality + load_procs_[0].reset(new LoadTabletProcedure(child_tablets_[0], node, thread_pool_ /*, true*/)); + load_procs_[1].reset(new LoadTabletProcedure(child_tablets_[1], node, thread_pool_ /*, true*/)); + PROC_LOG(INFO) << "Generate LoadTablet SubProcedure1: " << load_procs_[0]->ProcId(); + PROC_LOG(INFO) << "Generate LoadTablet SubProcedure2, " << load_procs_[1]->ProcId(); + MasterEnv().GetExecutor()->AddProcedure(load_procs_[0]); + MasterEnv().GetExecutor()->AddProcedure(load_procs_[1]); + } + PROC_CHECK(load_procs_[0] && load_procs_[1]); + SetNextPhase(SplitTabletPhase::kEofPhase); } void SplitTabletProcedure::FaultRecoverPhaseHandler(const SplitTabletPhase&) { - PROC_CHECK(phases_.size() >= 2 && GetCurrentPhase() == SplitTabletPhase::kFaultRecover); - SplitTabletPhase fault_phase = phases_.at(phases_.size() - 2); - PROC_CHECK(fault_phase == SplitTabletPhase::kPostUnLoadTablet); - if (!recover_proc_) { - recover_proc_.reset(new LoadTabletProcedure(tablet_, tablet_->GetTabletNode(), thread_pool_/*, true*/)); - MasterEnv().GetExecutor()->AddProcedure(recover_proc_); - return; - } - SetNextPhase(SplitTabletPhase::kEofPhase); + PROC_CHECK(phases_.size() >= 2 && GetCurrentPhase() == SplitTabletPhase::kFaultRecover); + SplitTabletPhase fault_phase = phases_.at(phases_.size() - 2); + PROC_CHECK(fault_phase == SplitTabletPhase::kPostUnLoadTablet); + if (!recover_proc_) { + recover_proc_.reset( + new LoadTabletProcedure(tablet_, tablet_->GetTabletNode(), thread_pool_ /*, true*/)); + MasterEnv().GetExecutor()->AddProcedure(recover_proc_); + return; + } + SetNextPhase(SplitTabletPhase::kEofPhase); } void SplitTabletProcedure::EOFPhaseHandler(const SplitTabletPhase&) { - PROC_LOG(INFO) << "split tablet finish, tablet: " << tablet_->GetPath(); - // If parent tablet not in transition, unlock it's transition lock - if (!recover_proc_) { - tablet_->UnlockTransition(); - } - done_ = true; + PROC_LOG(INFO) << "split tablet finish, tablet: " << tablet_; + // If parent tablet not in transition, unlock it's transition lock + if (!recover_proc_) { + tablet_->UnlockTransition(); + } + + done_ = true; } void SplitTabletProcedure::ComputeSplitKeyAsync() { - SplitTabletRequest* request = new SplitTabletRequest; - SplitTabletResponse* response = new SplitTabletResponse; - - request->set_sequence_id(MasterEnv().SequenceId().Inc()); - request->set_tablet_name(tablet_->GetTableName()); - request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); - request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); - tabletnode::TabletNodeClient node_client(thread_pool_, tablet_->GetServerAddr(), - FLAGS_tera_master_split_rpc_timeout); - PROC_LOG(INFO) << "ComputeSplitKeyAsync id: " << request->sequence_id() << ", " << tablet_; - ComputeSplitKeyClosure done = - std::bind(&SplitTabletProcedure::ComputeSplitKeyCallback, this, _1, _2, _3, _4); - node_client.ComputeSplitKey(request, response, done); + SplitTabletRequest* request = new SplitTabletRequest; + SplitTabletResponse* response = new SplitTabletResponse; + + request->set_sequence_id(MasterEnv().SequenceId().Inc()); + request->set_tablet_name(tablet_->GetTableName()); + request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); + request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); + tabletnode::TabletNodeClient node_client(thread_pool_, tablet_->GetServerAddr(), + FLAGS_tera_master_split_rpc_timeout); + PROC_LOG(INFO) << "ComputeSplitKeyAsync id: " << request->sequence_id() << ", " << tablet_; + ComputeSplitKeyClosure done = + std::bind(&SplitTabletProcedure::ComputeSplitKeyCallback, this, _1, _2, _3, _4); + node_client.ComputeSplitKey(request, response, done); } -void SplitTabletProcedure::ComputeSplitKeyCallback(SplitTabletRequest* request, - SplitTabletResponse* response, - bool failed, - int error_code) { - std::unique_ptr request_deleter(request); - std::unique_ptr response_deleter(response); - StatusCode status = response->status(); - if (failed || status != kTabletNodeOk) { - std::string errmsg = (failed ? - sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status)); - PROC_LOG(WARNING) << "cannot get split key from ts, abort tablet split, " - << tablet_ << ", error: " << errmsg; - SetNextPhase(SplitTabletPhase::kEofPhase); - return; - } - split_key_ = response->split_keys(0); - SetNextPhase(SplitTabletPhase::kUnLoadTablet); +void SplitTabletProcedure::ComputeSplitKeyCallback(SplitTabletRequest* request, + SplitTabletResponse* response, bool failed, + int error_code) { + std::unique_ptr request_deleter(request); + std::unique_ptr response_deleter(response); + StatusCode status = response->status(); + if (failed || status != kTabletNodeOk) { + std::string errmsg = + (failed ? sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status)); + PROC_LOG(WARNING) << "cannot get split key from ts, abort tablet split, " << tablet_ + << ", error: " << errmsg; + SetNextPhase(SplitTabletPhase::kEofPhase); + return; + } + split_key_ = response->split_keys(0); + SetNextPhase(SplitTabletPhase::kUnLoadTablet); } void SplitTabletProcedure::UpdateMeta() { - std::vector records; - - std::string parent_path = tablet_->GetPath(); - TablePtr table = tablet_->GetTable(); - std::string child_key_start = tablet_->GetKeyStart(); - std::string child_key_end = split_key_; - for (int i = 0; i < 2; ++i) { - TabletMeta child_meta; - tablet_->ToMeta(&child_meta); - child_meta.clear_parent_tablets(); - child_meta.set_status(TabletMeta::kTabletOffline); - child_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(parent_path)); - child_meta.set_path(leveldb::GetChildTabletPath(parent_path, table->GetNextTabletNo())); - child_meta.mutable_key_range()->set_key_start(child_key_start); - child_meta.mutable_key_range()->set_key_end(child_key_end); - child_meta.set_size(tablet_->GetDataSize() / 2); - child_tablets_[i].reset(new Tablet(child_meta, table)); - child_key_start = child_key_end; - child_key_end = tablet_->GetKeyEnd(); - PackMetaWriteRecords(child_tablets_[i], false, records); - } - - UpdateMetaClosure done = std::bind(&SplitTabletProcedure::UpdateMetaDone, this, _1); - PROC_LOG(INFO) << "[split] update meta async: " << tablet_ ; - MasterEnv().BatchWriteMetaTableAsync(records, done, -1); + std::vector records; + std::string parent_path = tablet_->GetPath(); + TablePtr table = tablet_->GetTable(); + std::string child_key_start = tablet_->GetKeyStart(); + std::string child_key_end = split_key_; + for (int i = 0; i < 2; ++i) { + TabletMeta child_meta; + tablet_->ToMeta(&child_meta); + child_meta.clear_parent_tablets(); + child_meta.set_status(TabletMeta::kTabletOffline); + child_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(parent_path)); + child_meta.set_path(leveldb::GetChildTabletPath(parent_path, table->GetNextTabletNo())); + child_meta.mutable_key_range()->set_key_start(child_key_start); + child_meta.mutable_key_range()->set_key_end(child_key_end); + child_meta.set_size(tablet_->GetDataSize() / 2); + child_meta.set_version(tablet_->Version() + 1); + child_tablets_[i].reset(new Tablet(child_meta, table)); + child_key_start = child_key_end; + child_key_end = tablet_->GetKeyEnd(); + PackMetaWriteRecords(child_tablets_[i], false, records); + } + + UpdateMetaClosure done = std::bind(&SplitTabletProcedure::UpdateMetaDone, this, _1); + PROC_LOG(INFO) << "[split] update meta async: " << tablet_; + MasterEnv().BatchWriteMetaTableAsync(records, done, -1); } void SplitTabletProcedure::UpdateMetaDone(bool) { - TabletMeta first_meta, second_meta; - child_tablets_[0]->ToMeta(&first_meta); - first_meta.set_status(TabletMeta::kTabletOffline); - child_tablets_[1]->ToMeta(&second_meta); - second_meta.set_status(TabletMeta::kTabletOffline); - TablePtr table = tablet_->GetTable(); - child_tablets_[0]->LockTransition(); - child_tablets_[1]->LockTransition(); - - table->SplitTablet(tablet_, first_meta, second_meta, &child_tablets_[0], &child_tablets_[1]); - PROC_LOG(INFO) << "split finish, " << tablet_ << ", try load child tablet," - << "\nfirst: " << child_tablets_[0] - << "\nsecond: " << child_tablets_[1]; - SetNextPhase(SplitTabletPhase::kLoadTablets); + TabletMeta first_meta, second_meta; + child_tablets_[0]->ToMeta(&first_meta); + first_meta.set_status(TabletMeta::kTabletOffline); + child_tablets_[1]->ToMeta(&second_meta); + second_meta.set_status(TabletMeta::kTabletOffline); + TablePtr table = tablet_->GetTable(); + child_tablets_[0]->LockTransition(); + child_tablets_[1]->LockTransition(); + + tablet_->DoStateTransition(TabletEvent::kFinishSplitTablet); + table->SplitTablet(tablet_, first_meta, second_meta, &child_tablets_[0], &child_tablets_[1]); + PROC_LOG(INFO) << "split finish, " << tablet_ << ", try load child tablet," + << "\nfirst: " << child_tablets_[0] << "\nsecond: " << child_tablets_[1]; + SetNextPhase(SplitTabletPhase::kLoadTablets); } bool SplitTabletProcedure::TabletStatusCheck() { - leveldb::Env* env = io::LeveldbBaseEnv(); - std::vector children; - std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + "/" + tablet_->GetPath(); - leveldb::Status status = env->GetChildren(tablet_path, &children); - if (!status.ok()) { - PROC_LOG(WARNING) << "[split] abort, " << tablet_ - << ", tablet status check error: " << status.ToString(); - return false; + leveldb::Env* env = io::LeveldbBaseEnv(); + std::vector children; + std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + "/" + tablet_->GetPath(); + leveldb::Status status = env->GetChildren(tablet_path, &children); + if (!status.ok()) { + PROC_LOG(WARNING) << "[split] abort, " << tablet_ + << ", tablet status check error: " << status.ToString(); + return false; + } + for (size_t i = 0; i < children.size(); ++i) { + leveldb::FileType type = leveldb::kUnknown; + uint64_t number = 0; + if (ParseFileName(children[i], &number, &type) && type == leveldb::kLogFile) { + PROC_LOG(WARNING) << "[split] abort, " << tablet_ << ", tablet log not clear."; + return false; } - for (size_t i = 0; i < children.size(); ++i) { - leveldb::FileType type = leveldb::kUnknown; - uint64_t number = 0; - if (ParseFileName(children[i], &number, &type) && - type == leveldb::kLogFile) { - PROC_LOG(WARNING) << "[split] abort, " << tablet_ << ", tablet log not clear."; - return false; - } - } - return true; + } + return true; } -std::ostream& operator<< (std::ostream& o, const SplitTabletPhase& phase) { - static const char* msg[] = {"SplitTabletPhase::kPreSplitTablet", - "SplitTabletPhase::kUnLoadTablet", - "SplitTabletPhase::kPostUnLoadTablet", - "SplitTabletPhase::kUpdateMeta", - "SplitTabletPhase::kLoadTablets", - "SplitTabletPhase::kFaultRecover", - "SplitTabletPhase::kEofPhase", - "SplitTabletPhase::UNKNOWN"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(SplitTabletPhase::kPreSplitTablet); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const SplitTabletPhase& phase) { + static const char* msg[] = { + "SplitTabletPhase::kPreSplitTablet", "SplitTabletPhase::kUnLoadTablet", + "SplitTabletPhase::kPostUnLoadTablet", "SplitTabletPhase::kUpdateMeta", + "SplitTabletPhase::kLoadTablets", "SplitTabletPhase::kFaultRecover", + "SplitTabletPhase::kEofPhase", "SplitTabletPhase::UNKNOWN"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(SplitTabletPhase::kPreSplitTablet); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - - } } diff --git a/src/master/split_tablet_procedure.h b/src/master/split_tablet_procedure.h index de9f545f1..852b27873 100644 --- a/src/master/split_tablet_procedure.h +++ b/src/master/split_tablet_procedure.h @@ -15,82 +15,83 @@ namespace tera { namespace master { enum class SplitTabletPhase { - kPreSplitTablet, - kUnLoadTablet, - kPostUnLoadTablet, - kUpdateMeta, - kLoadTablets, - kFaultRecover, - kEofPhase, + kPreSplitTablet, + kUnLoadTablet, + kPostUnLoadTablet, + kUpdateMeta, + kLoadTablets, + kFaultRecover, + kEofPhase, }; -std::ostream& operator<< (std::ostream& o, const SplitTabletPhase& phase); +std::ostream& operator<<(std::ostream& o, const SplitTabletPhase& phase); -typedef std::function ComputeSplitKeyClosure; +typedef std::function + ComputeSplitKeyClosure; class SplitTabletProcedure : public Procedure { -public: - - explicit SplitTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool) - : SplitTabletProcedure(tablet, std::string(""), thread_pool) {} - - explicit SplitTabletProcedure(TabletPtr tablet, std::string, ThreadPool* thread_pool); - - virtual ~SplitTabletProcedure() {} - - virtual std::string ProcId() const; - - virtual bool Done() {return done_;} - - virtual void RunNextStage(); - -private: - typedef std::function SplitTabletPhaseHandler; - - SplitTabletPhase GetCurrentPhase() { - std::lock_guard lock(mutex_); - return phases_.back(); - } - - void SetNextPhase(SplitTabletPhase phase) { - std::lock_guard lock(mutex_); - phases_.emplace_back(phase); - } - - void PreSplitTabletPhaseHandler(const SplitTabletPhase&); - void UnloadTabletPhaseHandler(const SplitTabletPhase&); - void PostUnloadTabletPhaseHandler(const SplitTabletPhase&); - void UpdateMetaPhaseHandler(const SplitTabletPhase&); - void LoadTabletsPhaseHandler(const SplitTabletPhase&); - void FaultRecoverPhaseHandler(const SplitTabletPhase&); - void EOFPhaseHandler(const SplitTabletPhase&); - - void ComputeSplitKeyAsync(); - - void ComputeSplitKeyCallback(SplitTabletRequest* request, SplitTabletResponse* response, bool failed, int error_code); - - bool TabletStatusCheck(); - - void UpdateMeta(); - void UpdateMetaDone(bool); - -private: - const std::string id_; - std::mutex mutex_; - TabletPtr tablet_; - bool done_ = false; - std::string split_key_; - bool dispatch_split_key_request_ = false; - std::shared_ptr unload_proc_; - - TabletPtr child_tablets_[2]; - std::shared_ptr load_procs_[2]; - std::vector phases_; - - std::shared_ptr recover_proc_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; -}; + public: + explicit SplitTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool) + : SplitTabletProcedure(tablet, std::string(""), thread_pool) {} + + explicit SplitTabletProcedure(TabletPtr tablet, std::string, ThreadPool* thread_pool); + + virtual ~SplitTabletProcedure() {} + + virtual std::string ProcId() const; + + virtual bool Done() { return done_; } + + virtual void RunNextStage(); + + private: + typedef std::function + SplitTabletPhaseHandler; + + SplitTabletPhase GetCurrentPhase() { + std::lock_guard lock(mutex_); + return phases_.back(); + } + + void SetNextPhase(SplitTabletPhase phase) { + std::lock_guard lock(mutex_); + phases_.emplace_back(phase); + } + void PreSplitTabletPhaseHandler(const SplitTabletPhase&); + void UnloadTabletPhaseHandler(const SplitTabletPhase&); + void PostUnloadTabletPhaseHandler(const SplitTabletPhase&); + void UpdateMetaPhaseHandler(const SplitTabletPhase&); + void LoadTabletsPhaseHandler(const SplitTabletPhase&); + void FaultRecoverPhaseHandler(const SplitTabletPhase&); + void EOFPhaseHandler(const SplitTabletPhase&); + + void ComputeSplitKeyAsync(); + + void ComputeSplitKeyCallback(SplitTabletRequest* request, SplitTabletResponse* response, + bool failed, int error_code); + + bool TabletStatusCheck(); + + void UpdateMeta(); + void UpdateMetaDone(bool); + + private: + const std::string id_; + std::mutex mutex_; + TabletPtr tablet_; + bool done_ = false; + std::string split_key_; + bool dispatch_split_key_request_ = false; + std::shared_ptr unload_proc_; + + TabletPtr child_tablets_[2]; + std::shared_ptr load_procs_[2]; + std::vector phases_; + + std::shared_ptr recover_proc_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; +}; } } diff --git a/src/master/state_machine.h b/src/master/state_machine.h index 375b87f23..92b2e8632 100644 --- a/src/master/state_machine.h +++ b/src/master/state_machine.h @@ -3,61 +3,65 @@ // found in the LICENSE file. #pragma once +#include namespace tera { namespace master { -template +template class StateTransitionRules { -public: - StateTransitionRules() {}; - ~StateTransitionRules() {}; - StateTransitionRules(const StateTransitionRules&) = delete; - StateTransitionRules& operator=(const StateTransitionRules&) = delete; - - StateTransitionRules(StateTransitionRules&& transitions) noexcept { - transition_rules_.swap(transitions.transition_rules_); - } - - StateTransitionRules& operator=(StateTransitionRules&& transitions) { - if (this != &transitions) { - transition_rules_.swap(transitions.transition_rules_); - } - return *this; - } - // add a new transition rule representing object's state transfer from "curr_state" to "dest_state" driven by "event" - StateTransitionRules& AddTransitionRule(const StateType& curr_state, - const EventType& event, - const StateType& dest_state) { - transition_rules_[curr_state][event] = dest_state; - return *this; + public: + StateTransitionRules(){}; + ~StateTransitionRules(){}; + StateTransitionRules(const StateTransitionRules&) = delete; + StateTransitionRules& operator=(const StateTransitionRules&) = delete; + + StateTransitionRules(StateTransitionRules&& transitions) noexcept { + transition_rules_.swap(transitions.transition_rules_); + } + + StateTransitionRules& operator=(StateTransitionRules&& transitions) { + if (this != &transitions) { + transition_rules_.swap(transitions.transition_rules_); } - - // return true and the associated post_state if there is a valid transition rule for - // else return false - bool DoStateTransition(const StateType& curr_state, const EventType& event, StateType* post_state) const; - -private: - // this map is used to save all transition rules - // key represent PrevState, value of type std::map represent all events supported by PreState - // and the associated PostState PreState will transfered to driven by event - std::map> transition_rules_; + return *this; + } + // add a new transition rule representing object's state transfer from + // "curr_state" to "dest_state" driven by "event" + StateTransitionRules& AddTransitionRule(const StateType& curr_state, const EventType& event, + const StateType& dest_state) { + transition_rules_[curr_state][event] = dest_state; + return *this; + } + + // return true and the associated post_state if there is a valid transition + // rule for + // else return false + bool DoStateTransition(const StateType& curr_state, const EventType& event, + StateType* post_state) const; + + private: + // this map is used to save all transition rules + // key represent PrevState, value of type std::map + // represent all events supported by PreState + // and the associated PostState PreState will transfered to driven by event + std::map> transition_rules_; }; -template -bool StateTransitionRules:: - DoStateTransition(const StateType& curr_state, const EventType& event, StateType* post_state) const { - auto transition_rule = transition_rules_.find(curr_state); - if (transition_rule == transition_rules_.end()) { - return false; - } - auto transition = transition_rule->second.find(event); - if (transition == transition_rule->second.end()) { - return false; - } - *post_state = transition->second; - return true; +template +bool StateTransitionRules::DoStateTransition(const StateType& curr_state, + const EventType& event, + StateType* post_state) const { + auto transition_rule = transition_rules_.find(curr_state); + if (transition_rule == transition_rules_.end()) { + return false; + } + auto transition = transition_rule->second.find(event); + if (transition == transition_rule->second.end()) { + return false; + } + *post_state = transition->second; + return true; } - } } diff --git a/src/master/table_state_machine.cc b/src/master/table_state_machine.cc index 02a1aca10..68856913c 100644 --- a/src/master/table_state_machine.cc +++ b/src/master/table_state_machine.cc @@ -10,27 +10,21 @@ namespace master { static TableStateMachine::TableStateTransitionRulesType s_table_transition_rules; const TableStateMachine::TableStateTransitionRulesType TableStateMachine::state_transitions_( - std::move( - s_table_transition_rules - .AddTransitionRule(kTableEnable, TableEvent::kDisableTable, kTableDisable) - .AddTransitionRule(kTableDisable, TableEvent::kEnableTable, kTableEnable) - .AddTransitionRule(kTableDisable, TableEvent::kDeleteTable, kTableDeleting) - .AddTransitionRule(kTableDeleting, TableEvent::kDisableTable, kTableDisable) - )); + std::move(s_table_transition_rules.AddTransitionRule(kTableEnable, TableEvent::kDisableTable, + kTableDisable) + .AddTransitionRule(kTableDisable, TableEvent::kEnableTable, kTableEnable) + .AddTransitionRule(kTableDisable, TableEvent::kDeleteTable, kTableDeleting) + .AddTransitionRule(kTableDeleting, TableEvent::kDisableTable, kTableDisable))); -std::ostream& operator<< (std::ostream& o, const TableEvent event) { - static const char* msg[] = {"TableEvent::kEnableTable", - "TableEvent::kDisableTable", - "TableEvent::kDeleteTable", - "TableEvent::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(event) - static_cast(TableEvent::kEnableTable); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; +std::ostream& operator<<(std::ostream& o, const TableEvent event) { + static const char* msg[] = {"TableEvent::kEnableTable", "TableEvent::kDisableTable", + "TableEvent::kDeleteTable", "TableEvent::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = static_cast(event) - static_cast(TableEvent::kEnableTable); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } - diff --git a/src/master/table_state_machine.h b/src/master/table_state_machine.h index 669f7f474..213d99924 100644 --- a/src/master/table_state_machine.h +++ b/src/master/table_state_machine.h @@ -14,36 +14,35 @@ namespace tera { namespace master { enum class TableEvent { - kEnableTable, - kDisableTable, - kDeleteTable, + kEnableTable, + kDisableTable, + kDeleteTable, }; -std::ostream& operator<< (std::ostream& o, const TableEvent event); +std::ostream& operator<<(std::ostream& o, const TableEvent event); class TableStateMachine { -public: - TableStateMachine(TableStatus init_status) : curr_status_(init_status) {} - ~TableStateMachine() {} - - bool DoStateTransition(const TableEvent event) { - TableStatus post_status; - if (state_transitions_.DoStateTransition(curr_status_, event, &post_status)) { - curr_status_ = post_status; - return true; - } - return false; - }; - - TableStatus GetStatus() { return curr_status_; } - void SetStatus(TableStatus status) { curr_status_ = status; } - - typedef StateTransitionRules TableStateTransitionRulesType; -private: - TableStatus curr_status_; - const static TableStateTransitionRulesType state_transitions_; - + public: + TableStateMachine(TableStatus init_status) : curr_status_(init_status) {} + ~TableStateMachine() {} + + bool DoStateTransition(const TableEvent event) { + TableStatus post_status; + if (state_transitions_.DoStateTransition(curr_status_, event, &post_status)) { + curr_status_ = post_status; + return true; + } + return false; + }; + + TableStatus GetStatus() { return curr_status_; } + void SetStatus(TableStatus status) { curr_status_ = status; } + + typedef StateTransitionRules TableStateTransitionRulesType; + + private: + TableStatus curr_status_; + const static TableStateTransitionRulesType state_transitions_; }; } } - diff --git a/src/master/tablet_manager.cc b/src/master/tablet_manager.cc index 8553de35a..3f9867d7c 100644 --- a/src/master/tablet_manager.cc +++ b/src/master/tablet_manager.cc @@ -33,9 +33,9 @@ DECLARE_string(tera_master_meta_table_path); DECLARE_string(tera_master_meta_table_name); -DECLARE_string(tera_master_gc_strategy); DECLARE_bool(tera_master_gc_trash_enabled); DECLARE_int32(tera_master_impl_retry_times); +DECLARE_int32(tera_master_write_meta_retry_times); DECLARE_bool(tera_delete_obsolete_tabledir_enabled); @@ -46,412 +46,475 @@ DECLARE_double(tera_master_workload_merge_threshold); namespace tera { namespace master { -std::ostream& operator << (std::ostream& o, const TabletFile& file) { - o << file.tablet_id << "-" << file.lg_id << "-" << file.file_id; - return o; -} - -std::ostream& operator << (std::ostream& o, const Tablet& tablet) { - MutexLock lock(&tablet.mutex_); - o << tablet.meta_.path() - << ", status: " << StatusCodeToString(tablet.meta_.status()) << ", key range: [" - << DebugString(tablet.meta_.key_range().key_start()) << ", " - << DebugString(tablet.meta_.key_range().key_end()) << "] @ " - << tablet.meta_.server_addr() << "/" << tablet.server_id_; - return o; -} - -std::ostream& operator << (std::ostream& o, const TabletPtr& tablet) { - o << *tablet; - return o; -} - -Tablet::Tablet(const TabletMeta& meta) : - meta_(meta), - state_machine_(meta.status()), - update_time_(get_micros()), - last_move_time_us_(0), - merge_param_(NULL), - gc_reported_(false), - load_fail_cnt_(0) {} - -Tablet::Tablet(const TabletMeta& meta, TablePtr table) : - meta_(meta), - state_machine_(meta.status()), - table_(table), - update_time_(get_micros()), - last_move_time_us_(0), - merge_param_(NULL), - gc_reported_(false), - load_fail_cnt_(0) { -} - -Tablet::~Tablet() { - table_.reset(); -} +std::ostream& operator<<(std::ostream& o, const TabletFile& file) { + o << file.tablet_id << "-" << file.lg_id << "-" << file.file_id; + return o; +} + +std::ostream& operator<<(std::ostream& o, const Tablet& tablet) { + MutexLock lock(&tablet.mutex_); + o << tablet.meta_.path() << ", status: " << StatusCodeToString(tablet.meta_.status()) + << ", version: " << tablet.meta_.version() << ", ctime: " << tablet.create_time_ + << ", key range: [" << DebugString(tablet.meta_.key_range().key_start()) << ", " + << DebugString(tablet.meta_.key_range().key_end()) << "] @ " << tablet.meta_.server_addr() + << "/" << tablet.server_id_; + return o; +} + +std::ostream& operator<<(std::ostream& o, const TabletPtr& tablet) { + o << *tablet; + return o; +} + +Tablet::Tablet(const TabletMeta& meta) + : meta_(meta), + state_machine_(meta.status()), + update_time_(get_micros()), + last_move_time_us_(0), + data_size_on_flash_(0), + merge_param_(NULL), + gc_reported_(false), + load_fail_cnt_(0), + create_time_(get_micros()) {} + +Tablet::Tablet(const TabletMeta& meta, TablePtr table) + : meta_(meta), + state_machine_(meta.status()), + table_(table), + update_time_(get_micros()), + last_move_time_us_(0), + data_size_on_flash_(0), + merge_param_(NULL), + gc_reported_(false), + load_fail_cnt_(0), + create_time_(get_micros()) { + MutexLock lock(&mutex_); + if (meta_.has_create_time() && meta_.create_time() != 0) { + const_cast(create_time_) = meta_.create_time(); + } + if (create_time_ < table_->CreateTime()) { + const_cast(create_time_) = table_->CreateTime() + 1; + } + meta_.set_create_time(create_time_); + if (!meta_.has_version()) { + meta_.set_version(1); + } +} + +Tablet::~Tablet() { table_.reset(); } void Tablet::ToMeta(TabletMeta* meta) { - MutexLock lock(&mutex_); - meta->CopyFrom(meta_); + MutexLock lock(&mutex_); + meta->CopyFrom(meta_); } const std::string& Tablet::GetTableName() { - MutexLock lock(&mutex_); - return meta_.table_name(); + MutexLock lock(&mutex_); + return meta_.table_name(); } const std::string& Tablet::GetServerAddr() { - MutexLock lock(&mutex_); - return meta_.server_addr(); + MutexLock lock(&mutex_); + return meta_.server_addr(); } std::string Tablet::GetServerId() { - MutexLock lock(&mutex_); - return node_->uuid_; + MutexLock lock(&mutex_); + return node_->uuid_; } const std::string& Tablet::GetPath() { - MutexLock lock(&mutex_); - return meta_.path(); + MutexLock lock(&mutex_); + return meta_.path(); } int64_t Tablet::GetDataSize() { - MutexLock lock(&mutex_); - return meta_.size(); + MutexLock lock(&mutex_); + return meta_.size(); } void Tablet::GetDataSize(int64_t* size, std::vector* lg_size) { - MutexLock lock(&mutex_); - if (size) { - *size = meta_.size(); - } - if (lg_size) { - lg_size->clear(); - for (int64_t i = 0; i < meta_.lg_size_size(); ++i) { - lg_size->push_back(meta_.lg_size(i)); - } + MutexLock lock(&mutex_); + if (size) { + *size = meta_.size(); + } + if (lg_size) { + lg_size->clear(); + for (int64_t i = 0; i < meta_.lg_size_size(); ++i) { + lg_size->push_back(meta_.lg_size(i)); } + } +} + +int64_t Tablet::GetDataSizeOnFlash() { + MutexLock lock(&mutex_); + return data_size_on_flash_; +} + +void Tablet::SetDataSizeOnFlash(int64_t size) { + MutexLock lock(&mutex_); + data_size_on_flash_ = size; } int64_t Tablet::GetQps() { - MutexLock lock(&mutex_); - return average_counter_.read_rows() + average_counter_.write_rows() - + average_counter_.scan_rows(); + MutexLock lock(&mutex_); + return average_counter_.read_rows() + average_counter_.write_rows() + + average_counter_.scan_rows(); } int64_t Tablet::GetReadQps() { - MutexLock lock(&mutex_); - return average_counter_.read_rows(); + MutexLock lock(&mutex_); + return average_counter_.read_rows(); } int64_t Tablet::GetWriteQps() { - MutexLock lock(&mutex_); - return average_counter_.write_rows(); + MutexLock lock(&mutex_); + return average_counter_.write_rows(); } int64_t Tablet::GetScanQps() { - MutexLock lock(&mutex_); - return average_counter_.scan_rows(); + MutexLock lock(&mutex_); + return average_counter_.scan_rows(); +} + +int64_t Tablet::GetLRead() { + MutexLock lock(&mutex_); + return average_counter_.low_read_cell(); +} + +uint64_t Tablet::Version() { + MutexLock lock(&mutex_); + return meta_.version(); +} + +uint64_t Tablet::IncVersion() { + MutexLock lock(&mutex_); + meta_.set_version(meta_.version() + 1); + return meta_.version(); } const std::string& Tablet::GetKeyStart() { - MutexLock lock(&mutex_); - return meta_.key_range().key_start(); + MutexLock lock(&mutex_); + return meta_.key_range().key_start(); } const std::string& Tablet::GetKeyEnd() { - MutexLock lock(&mutex_); - return meta_.key_range().key_end(); + MutexLock lock(&mutex_); + return meta_.key_range().key_end(); } const KeyRange& Tablet::GetKeyRange() { - MutexLock lock(&mutex_); - return meta_.key_range(); + MutexLock lock(&mutex_); + return meta_.key_range(); } -const TableSchema& Tablet::GetSchema() { - return table_->GetSchema(); -} +const TableSchema& Tablet::GetSchema() { return table_->GetSchema(); } const TabletCounter& Tablet::GetCounter() { - MutexLock lock(&mutex_); - if (counter_list_.size() > 0) { - return counter_list_.back(); - } else { - return average_counter_; - } + MutexLock lock(&mutex_); + if (counter_list_.size() > 0) { + return counter_list_.back(); + } else { + return average_counter_; + } } const TabletCounter& Tablet::GetAverageCounter() { - MutexLock lock(&mutex_); - return average_counter_; + MutexLock lock(&mutex_); + return average_counter_; } TabletMeta::TabletStatus Tablet::GetStatus() { - MutexLock lock(&mutex_); - return state_machine_.GetStatus(); + MutexLock lock(&mutex_); + return state_machine_.GetStatus(); } CompactStatus Tablet::GetCompactStatus() { - MutexLock lock(&mutex_); - return meta_.compact_status(); + MutexLock lock(&mutex_); + return meta_.compact_status(); } -TablePtr Tablet::GetTable() { - return table_; -} +TablePtr Tablet::GetTable() { return table_; } bool Tablet::IsBusy() { - MutexLock lock(&mutex_); - if (counter_list_.size() > 0) { - return counter_list_.back().is_on_busy(); - } else { - return average_counter_.is_on_busy(); - } -} - -bool Tablet::TestAndSetSplitTimeStamp(int64_t ts) { // timestamp in us - ts /= 1000; // transalte into ms - //MutexLock lock(&mutex_); - if (split_history_.last_split_ts < (ts - FLAGS_tera_master_split_history_time_interval)) { - split_history_.last_split_ts = ts; - return true; - } - return false; + MutexLock lock(&mutex_); + if (counter_list_.size() > 0) { + return counter_list_.back().is_on_busy(); + } else { + return average_counter_.is_on_busy(); + } +} + +bool Tablet::TestAndSetSplitTimeStamp(int64_t ts) { // timestamp in us + ts /= 1000; // transalte into ms + // MutexLock lock(&mutex_); + if (split_history_.last_split_ts < (ts - FLAGS_tera_master_split_history_time_interval)) { + split_history_.last_split_ts = ts; + return true; + } + return false; } void Tablet::AssignTabletNode(TabletNodePtr node) { - MutexLock lock(&mutex_); - node_ = node; - // set server addr to TabletMeta - meta_.set_server_addr(node_->GetAddr()); + MutexLock lock(&mutex_); + node_ = node; + // set server addr to TabletMeta + meta_.set_server_addr(node_->GetAddr()); } bool Tablet::HasErrorIgnoredLGs() const { - MutexLock lock(&mutex_); - return !ignore_err_lgs_.empty(); + MutexLock lock(&mutex_); + return !ignore_err_lgs_.empty(); } void Tablet::GetErrorIgnoredLGs(std::vector* lgs) { - MutexLock lock(&mutex_); - *lgs = ignore_err_lgs_; + MutexLock lock(&mutex_); + *lgs = ignore_err_lgs_; } bool Tablet::SetErrorIgnoredLGs(const std::string& lg_list_str) { - if (lg_list_str.empty()) { - MutexLock lock(&mutex_); - ignore_err_lgs_.clear(); - return true; - } - std::vector lgs; - SplitString(lg_list_str, ":", &lgs); - const TableSchema& schema = GetSchema(); - std::set lg_schema_set; - for (int i = 0; i < schema.locality_groups_size(); ++i) { - lg_schema_set.insert(schema.locality_groups(i).name()); - } - for (const auto& lg : lgs) { - if (lg_schema_set.find(lg) == lg_schema_set.end()) { - LOG(WARNING) << "set error ignored locality group ["<< lg << "] failed."; - return false; - } - } + if (lg_list_str.empty()) { MutexLock lock(&mutex_); - ignore_err_lgs_ = lgs; + ignore_err_lgs_.clear(); return true; + } + std::vector lgs; + SplitString(lg_list_str, ":", &lgs); + const TableSchema& schema = GetSchema(); + std::set lg_schema_set; + for (int i = 0; i < schema.locality_groups_size(); ++i) { + lg_schema_set.insert(schema.locality_groups(i).name()); + } + for (const auto& lg : lgs) { + if (lg_schema_set.find(lg) == lg_schema_set.end()) { + LOG(WARNING) << "set error ignored locality group [" << lg << "] failed."; + return false; + } + } + MutexLock lock(&mutex_); + ignore_err_lgs_ = lgs; + return true; } std::string Tablet::DebugString() { - MutexLock lock(&mutex_); - return meta_.DebugString(); + MutexLock lock(&mutex_); + return meta_.DebugString(); } void Tablet::SetCounter(const TabletCounter& counter) { - MutexLock lock(&mutex_); - average_counter_.set_low_read_cell( - CounterWeightedSum(counter.low_read_cell(), average_counter_.low_read_cell())); - average_counter_.set_scan_rows( - CounterWeightedSum(counter.scan_rows(), average_counter_.scan_rows())); - average_counter_.set_scan_kvs( - CounterWeightedSum(counter.scan_kvs(), average_counter_.scan_kvs())); - average_counter_.set_scan_size( - CounterWeightedSum(counter.scan_size(), average_counter_.scan_size())); - average_counter_.set_read_rows( - CounterWeightedSum(counter.read_rows(), average_counter_.read_rows())); - average_counter_.set_read_kvs( - CounterWeightedSum(counter.read_kvs(), average_counter_.read_kvs())); - average_counter_.set_read_size( - CounterWeightedSum(counter.read_size(), average_counter_.read_size())); - average_counter_.set_write_rows( - CounterWeightedSum(counter.write_rows(), average_counter_.write_rows())); - average_counter_.set_write_kvs( - CounterWeightedSum(counter.write_kvs(), average_counter_.write_kvs())); - average_counter_.set_write_size( - CounterWeightedSum(counter.write_size(), average_counter_.write_size())); - average_counter_.set_write_workload(counter.write_workload()); - average_counter_.set_is_on_busy(counter.is_on_busy()); - average_counter_.set_db_status(counter.db_status()); + MutexLock lock(&mutex_); + average_counter_.set_low_read_cell( + CounterWeightedSum(counter.low_read_cell(), average_counter_.low_read_cell())); + average_counter_.set_scan_rows( + CounterWeightedSum(counter.scan_rows(), average_counter_.scan_rows())); + average_counter_.set_scan_kvs( + CounterWeightedSum(counter.scan_kvs(), average_counter_.scan_kvs())); + average_counter_.set_scan_size( + CounterWeightedSum(counter.scan_size(), average_counter_.scan_size())); + average_counter_.set_read_rows( + CounterWeightedSum(counter.read_rows(), average_counter_.read_rows())); + average_counter_.set_read_kvs( + CounterWeightedSum(counter.read_kvs(), average_counter_.read_kvs())); + average_counter_.set_read_size( + CounterWeightedSum(counter.read_size(), average_counter_.read_size())); + average_counter_.set_write_rows( + CounterWeightedSum(counter.write_rows(), average_counter_.write_rows())); + average_counter_.set_write_kvs( + CounterWeightedSum(counter.write_kvs(), average_counter_.write_kvs())); + average_counter_.set_write_size( + CounterWeightedSum(counter.write_size(), average_counter_.write_size())); + average_counter_.set_write_workload(counter.write_workload()); + average_counter_.set_is_on_busy(counter.is_on_busy()); + average_counter_.set_db_status(counter.db_status()); } void Tablet::UpdateSize(const TabletMeta& meta) { - MutexLock lock(&mutex_); - meta_.set_size(meta.size()); - meta_.mutable_lg_size()->CopyFrom(meta.lg_size()); + const TableSchema schema = GetSchema(); + MutexLock lock(&mutex_); + meta_.set_size(meta.size()); + meta_.mutable_lg_size()->CopyFrom(meta.lg_size()); + + int64_t size = 0; + for (int i = 0; i < meta_.lg_size_size(); ++i) { + if (schema.locality_groups(i).store_type() == FlashStore || + schema.locality_groups(i).store_type() == MemoryStore) { + size += meta_.lg_size(i); + } + } + data_size_on_flash_ = size; +} + +bool Tablet::HasFlashLg() { + MutexLock lock(&mutex_); + assert(table_); + bool has_flash_lg = false; + for (int i = 0; i < table_->schema_.locality_groups_size(); ++i) { + if (table_->schema_.locality_groups(i).store_type() == FlashStore || + table_->schema_.locality_groups(i).store_type() == MemoryStore) { + has_flash_lg = true; + break; + } + } + + return has_flash_lg; } void Tablet::SetCompactStatus(CompactStatus compact_status) { - MutexLock lock(&mutex_); - meta_.set_compact_status(compact_status); + MutexLock lock(&mutex_); + meta_.set_compact_status(compact_status); } bool Tablet::DoStateTransition(const TabletEvent event) { - MutexLock lock(&mutex_); - return DoStateTransitionUnSafe(event); + MutexLock lock(&mutex_); + return DoStateTransitionUnSafe(event); } bool Tablet::DoStateTransitionUnSafe(const TabletEvent event) { - TabletMeta::TabletStatus curr_status = state_machine_.GetStatus(); - if (!state_machine_.DoStateTransition(event)) { - LOG(WARNING) << "tablet: " << meta_.path() << ", not support state transition, curr_state: " - << StatusCodeToString(curr_status) << ", event: " << event; - return false; - } - TabletMeta::TabletStatus post_status = state_machine_.GetStatus(); - LOG(INFO) << "tablet: " << meta_.path() << ", prev_state: " << StatusCodeToString(curr_status) - << ", event: " << event << ", post_state: " << StatusCodeToString(post_status); - meta_.set_status(post_status); - // do some post actions after StateTransition, such as do tablet avability statistics - if (post_status == TabletMeta::kTabletReady || post_status == TabletMeta::kTabletDisable) { - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(meta_.path()); - } - else { - MasterEnv().GetTabletAvailability()->AddNotReadyTablet(meta_.path(), post_status); - } - return true; + TabletMeta::TabletStatus curr_status = state_machine_.GetStatus(); + if (!state_machine_.DoStateTransition(event)) { + LOG(WARNING) << "tablet: " << meta_.path() << ", not support state transition, curr_state: " + << StatusCodeToString(curr_status) << ", event: " << event; + return false; + } + TabletMeta::TabletStatus post_status = state_machine_.GetStatus(); + LOG(INFO) << "tablet: " << meta_.path() << ", prev_state: " << StatusCodeToString(curr_status) + << ", event: " << event << ", post_state: " << StatusCodeToString(post_status); + meta_.set_status(post_status); + // do some post actions after StateTransition, such as do tablet avability + // statistics + if (post_status == TabletMeta::kTabletReady || post_status == TabletMeta::kTabletDisable) { + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(meta_.path()); + } else { + MasterEnv().GetTabletAvailability()->AddNotReadyTablet(meta_.path(), post_status); + } + return true; +} + +bool UnknownTablet::DoStateTransition(const TabletEvent event) { + MutexLock lock(&mutex_); + TabletMeta::TabletStatus curr_status = state_machine_.GetStatus(); + if (!state_machine_.DoStateTransition(event)) { + LOG(WARNING) << "tablet: " << meta_.path() << ", not support state transition, curr_state: " + << StatusCodeToString(curr_status) << ", event: " << event; + return false; + } + TabletMeta::TabletStatus post_status = state_machine_.GetStatus(); + LOG(INFO) << "tablet: " << meta_.path() << ", prev_state: " << StatusCodeToString(curr_status) + << ", event: " << event << ", post_state: " << StatusCodeToString(post_status); + meta_.set_status(post_status); + return true; } bool MetaTablet::DoStateTransition(const TabletEvent event) { - bool root_tablet_addr_updated = false; - { - MutexLock lock(&mutex_); - if (!DoStateTransitionUnSafe(event)) { - return false; - } - // MetaTablet changed to kTableReady, we need update it's address to zk/nexus and - // resume all suspended meta operations - if (state_machine_.GetStatus() == TabletMeta::kTabletReady) { - root_tablet_addr_updated = UpdateRootTabletAddr(); - LOG_IF(INFO, root_tablet_addr_updated) << "update meta tablet addr: " <UpdateRootTabletNode(meta_.server_addr()); + return zk_adapter_->UpdateRootTabletNode(meta_.server_addr()); } void Tablet::SetStatus(const TabletMeta::TabletStatus status) { - MutexLock lock(&mutex_); - state_machine_.SetStatus(status); - meta_.set_status(status); - if (status == TabletMeta::kTabletReady || status == TabletMeta::kTabletDisable) { - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(meta_.path()); - } - else { - MasterEnv().GetTabletAvailability()->AddNotReadyTablet(meta_.path(), status); - } + MutexLock lock(&mutex_); + state_machine_.SetStatus(status); + meta_.set_status(status); + if (status == TabletMeta::kTabletReady || status == TabletMeta::kTabletDisable) { + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(meta_.path()); + } else { + MasterEnv().GetTabletAvailability()->AddNotReadyTablet(meta_.path(), status); + } } int64_t Tablet::UpdateTime() { - MutexLock lock(&mutex_); - return update_time_; + MutexLock lock(&mutex_); + return update_time_; } int64_t Tablet::SetUpdateTime(int64_t timestamp) { - MutexLock lock(&mutex_); - int64_t ts = update_time_; - update_time_ = timestamp; - return ts; + MutexLock lock(&mutex_); + int64_t ts = update_time_; + update_time_ = timestamp; + return ts; } int64_t Tablet::ReadyTime() { - MutexLock lock(&mutex_); - return state_machine_.ReadyTime(); + MutexLock lock(&mutex_); + return state_machine_.ReadyTime(); } int64_t Tablet::LastMoveTime() const { - MutexLock lock(&mutex_); - return last_move_time_us_; + MutexLock lock(&mutex_); + return last_move_time_us_; } void Tablet::SetLastMoveTime(int64_t time) { - MutexLock lock(&mutex_); - last_move_time_us_ = time; -} - -bool Tablet::IsBound() { - TablePtr null_ptr; - if (table_ != null_ptr) { - return true; - } - return false; + MutexLock lock(&mutex_); + last_move_time_us_ = time; } bool Tablet::Verify(const std::string& table_name, const std::string& key_start, - const std::string& key_end, const std::string& path, - const std::string& server_addr, StatusCode* ret_status) { - MutexLock lock(&mutex_); - if (meta_.table_name() != table_name - || meta_.key_range().key_start() != key_start - || meta_.key_range().key_end() != key_end - || meta_.path() != path - || meta_.server_addr() != server_addr) { - SetStatusCode(kTableInvalidArg, ret_status); - LOG(WARNING) << "tablet verify failed [" - << meta_.table_name() << "," - << meta_.key_range().key_start() << "," - << meta_.key_range().key_end() << "," - << meta_.path() << "," - << meta_.server_addr() << "] vs [" - << table_name << "," - << key_start << "," - << key_end << "," - << path << "," - << server_addr << "]."; - return false; - } - return true; + const std::string& key_end, const std::string& path, + const std::string& server_addr, StatusCode* ret_status) { + MutexLock lock(&mutex_); + if (meta_.table_name() != table_name || meta_.key_range().key_start() != key_start || + meta_.key_range().key_end() != key_end || meta_.path() != path || + meta_.server_addr() != server_addr) { + SetStatusCode(kTableInvalidArg, ret_status); + LOG(WARNING) << "tablet verify failed [" << meta_.table_name() << "," + << meta_.key_range().key_start() << "," << meta_.key_range().key_end() << "," + << meta_.path() << "," << meta_.server_addr() << "] vs [" << table_name << "," + << key_start << "," << key_end << "," << path << "," << server_addr << "]."; + return false; + } + return true; } -void Tablet::ToMetaTableKeyValue(std::string* packed_key, - std::string* packed_value) { - MutexLock lock(&mutex_); - MakeMetaTableKeyValue(meta_, packed_key, packed_value); +void Tablet::ToMetaTableKeyValue(std::string* packed_key, std::string* packed_value) { + MutexLock lock(&mutex_); + MakeMetaTableKeyValue(meta_, packed_key, packed_value); } -std::ostream& operator << (std::ostream& o, const Table& table) { - MutexLock lock(&table.mutex_); - o << "table: " << table.name_ << ", schema: " - << table.schema_.ShortDebugString(); - return o; +std::ostream& operator<<(std::ostream& o, const Table& table) { + MutexLock lock(&table.mutex_); + o << "table: " << table.name_ << ", schema: " << table.schema_.ShortDebugString(); + return o; } -std::ostream& operator << (std::ostream& o, const TablePtr& table) { - o << *table; - return o; +std::ostream& operator<<(std::ostream& o, const TablePtr& table) { + o << *table; + return o; } -Table::Table(const std::string& table_name, const TableMeta& meta) : - Table(table_name, meta.schema(), meta.status()) { - // reset table's create time - if (meta.has_create_time() && meta.create_time() > 0) { - create_time_ = meta.create_time(); - } +Table::Table(const std::string& table_name, const TableMeta& meta) + : Table(table_name, meta.schema(), meta.status()) { + // reset table's create time + MutexLock lock(&mutex_); + if (meta.has_create_time() && meta.create_time() > 0) { + const_cast(create_time_) = meta.create_time(); + } + // newly created table will has create_time_ in us, but some old table may has create_time in + // seconds, + // adjust old tables create_time_ here + if (create_time_ < time(NULL)) { + const_cast(create_time_) = create_time_ * 1000000; + } } Table::Table(const std::string& table_name, const TableSchema& schema, const TableStatus status) @@ -459,1600 +522,1523 @@ Table::Table(const std::string& table_name, const TableSchema& schema, const Tab schema_(schema), deleted_tablet_num_(0), max_tablet_no_(0), - create_time_((int64_t)time(NULL)), + create_time_((int64_t)get_micros()), metric_(table_name), schema_is_syncing_(false), - rangefragment_(NULL), - update_rpc_response_(NULL), - update_rpc_done_(NULL), old_schema_(NULL), reported_live_tablets_num_(0), state_machine_(status) { + if (name_ == FLAGS_tera_master_meta_table_name) { + // meta table always has epoch create time + const_cast(create_time_) = 1; + } } bool Table::FindTablet(const std::string& key_start, TabletPtr* tablet) { - MutexLock lock(&mutex_); - Table::TabletList::iterator it2 = tablets_list_.find(key_start); - if (it2 == tablets_list_.end()) { - return false; - } - *tablet = it2->second; - return true; + MutexLock lock(&mutex_); + Table::TabletList::iterator it2 = tablets_list_.find(key_start); + if (it2 == tablets_list_.end()) { + return false; + } + *tablet = it2->second; + return true; } -void Table::FindTablet(const std::string& server_addr, - std::vector* tablet_meta_list) { - MutexLock lock(&mutex_); - Table::TabletList::iterator it2 = tablets_list_.begin(); - for (; it2 != tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - tablet->mutex_.Lock(); - if (tablet->meta_.server_addr() == server_addr) { - tablet_meta_list->push_back(tablet); - } - tablet->mutex_.Unlock(); +void Table::FindTablet(const std::string& server_addr, std::vector* tablet_meta_list) { + MutexLock lock(&mutex_); + Table::TabletList::iterator it2 = tablets_list_.begin(); + for (; it2 != tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + tablet->mutex_.Lock(); + if (tablet->meta_.server_addr() == server_addr) { + tablet_meta_list->push_back(tablet); } + tablet->mutex_.Unlock(); + } } void Table::GetTablet(std::vector* tablet_meta_list) { - MutexLock lock(&mutex_); - Table::TabletList::iterator it2 = tablets_list_.begin(); - for (; it2 != tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - tablet_meta_list->push_back(tablet); - } + MutexLock lock(&mutex_); + Table::TabletList::iterator it2 = tablets_list_.begin(); + for (; it2 != tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + tablet_meta_list->push_back(tablet); + } } const std::string& Table::GetTableName() { - MutexLock lock(&mutex_); - return name_; + MutexLock lock(&mutex_); + return name_; } TableStatus Table::GetStatus() { - MutexLock lock(&mutex_); - return state_machine_.GetStatus(); + MutexLock lock(&mutex_); + return state_machine_.GetStatus(); } const TableSchema& Table::GetSchema() { - MutexLock lock(&mutex_); - return schema_; + MutexLock lock(&mutex_); + return schema_; } void Table::SetSchema(const TableSchema& schema) { - MutexLock lock(&mutex_); - schema_.CopyFrom(schema); + MutexLock lock(&mutex_); + schema_.CopyFrom(schema); } const TableCounter& Table::GetCounter() { - MutexLock lock(&mutex_); - return counter_; + MutexLock lock(&mutex_); + return counter_; } int64_t Table::GetTabletsCount() { - MutexLock lock(&mutex_); - return tablets_list_.size(); + MutexLock lock(&mutex_); + return tablets_list_.size(); +} + +void Table::GetTsAddrTabletsCount(std::map* ts_addr_tablets_count) { + MutexLock lock(&mutex_); + for (auto it = tablets_list_.begin(); it != tablets_list_.end(); ++it) { + const std::string& ts_addr = it->second->meta_.server_addr(); + auto ts_addr_it = ts_addr_tablets_count->find(ts_addr); + if (ts_addr_it == ts_addr_tablets_count->end()) { + (*ts_addr_tablets_count)[ts_addr] = 1; + } else { + ts_addr_it->second += 1; + } + } } void Table::AddDeleteTabletCount() { - MutexLock lock(&mutex_); - deleted_tablet_num_++; + MutexLock lock(&mutex_); + deleted_tablet_num_++; } bool Table::NeedDelete() { - MutexLock lock(&mutex_); - if (deleted_tablet_num_ == tablets_list_.size()) { - return true; - } - return false; + MutexLock lock(&mutex_); + if (deleted_tablet_num_ == tablets_list_.size()) { + return true; + } + return false; } -void Table::ToMetaTableKeyValue(std::string* packed_key, - std::string* packed_value) { - MutexLock lock(&mutex_); - TableMeta meta; - ToMeta(&meta); - MakeMetaTableKeyValue(meta, packed_key, packed_value); +void Table::ToMetaTableKeyValue(std::string* packed_key, std::string* packed_value) { + MutexLock lock(&mutex_); + TableMeta meta; + ToMeta(&meta); + MakeMetaTableKeyValue(meta, packed_key, packed_value); } bool Table::PrepareUpdate(const TableSchema& schema) { - if (!GetSchemaSyncLock()) { - return false; - } - TableSchema* origin_schema = new TableSchema; - origin_schema->CopyFrom(GetSchema()); - SetOldSchema(origin_schema); - SetSchema(schema); - return true; + if (!GetSchemaSyncLock()) { + return false; + } + TableSchema* origin_schema = new TableSchema; + origin_schema->CopyFrom(GetSchema()); + SetOldSchema(origin_schema); + SetSchema(schema); + return true; } void Table::AbortUpdate() { - TableSchema old_schema; - if (GetOldSchema(&old_schema)) { - SetSchema(old_schema); - ClearOldSchema(); - } - ClearSchemaSyncLock(); + TableSchema old_schema; + if (GetOldSchema(&old_schema)) { + SetSchema(old_schema); + ClearOldSchema(); + } + ClearSchemaSyncLock(); } void Table::CommitUpdate() { - ClearOldSchema(); - ClearSchemaSyncLock(); + ClearOldSchema(); + ClearSchemaSyncLock(); } void Table::ToMeta(TableMeta* meta) { - meta->set_table_name(name_); - meta->set_status(state_machine_.GetStatus()); - meta->mutable_schema()->CopyFrom(schema_); - meta->set_create_time(create_time_); + meta->set_table_name(name_); + meta->set_status(state_machine_.GetStatus()); + meta->mutable_schema()->CopyFrom(schema_); + meta->set_create_time(create_time_); } uint64_t Table::GetNextTabletNo() { - MutexLock lock(&mutex_); - max_tablet_no_++; - LOG(INFO) << "generate new tablet number: " << max_tablet_no_; - return max_tablet_no_; + MutexLock lock(&mutex_); + max_tablet_no_++; + LOG(INFO) << "generate new tablet number: " << max_tablet_no_; + return max_tablet_no_; } bool Table::GetSchemaIsSyncing() { - MutexLock lock(&mutex_); - return schema_is_syncing_; + MutexLock lock(&mutex_); + return schema_is_syncing_; } bool Table::GetSchemaSyncLock() { - MutexLock lock(&mutex_); - if (schema_is_syncing_) { - return false; - } - schema_is_syncing_ = true; - return true; + MutexLock lock(&mutex_); + if (schema_is_syncing_) { + return false; + } + schema_is_syncing_ = true; + return true; } void Table::ClearSchemaSyncLock() { - MutexLock lock(&mutex_); - schema_is_syncing_ = false; + MutexLock lock(&mutex_); + schema_is_syncing_ = false; } void Table::SetOldSchema(TableSchema* schema) { - MutexLock lock(&mutex_); - delete old_schema_; - old_schema_ = schema; + MutexLock lock(&mutex_); + delete old_schema_; + old_schema_ = schema; } bool Table::GetOldSchema(TableSchema* schema) { - MutexLock lock(&mutex_); - if ((schema != NULL) && (old_schema_ != NULL)) { - schema->CopyFrom(*old_schema_); - return true; - } - return false; + MutexLock lock(&mutex_); + if ((schema != NULL) && (old_schema_ != NULL)) { + schema->CopyFrom(*old_schema_); + return true; + } + return false; } void Table::ClearOldSchema() { - MutexLock lock(&mutex_); - delete old_schema_; - old_schema_ = NULL; -} - -void Table::ResetRangeFragment() { - MutexLock lock(&mutex_); - delete rangefragment_; - rangefragment_ = new RangeFragment; -} - -RangeFragment* Table::GetRangeFragment() { - MutexLock lock(&mutex_); - return rangefragment_; -} - -bool Table::AddToRange(const std::string& start, const std::string& end) { - MutexLock lock(&mutex_); - return rangefragment_->AddToRange(start, end); -} - -bool Table::IsCompleteRange() const { - MutexLock lock(&mutex_); - return rangefragment_->IsCompleteRange(); -} - -bool Table::IsSchemaSyncedAtRange(const std::string& start, const std::string& end) { - MutexLock lock(&mutex_); - return rangefragment_->IsCoverRange(start, end); -} - -void Table::StoreUpdateRpc(UpdateTableResponse* response, google::protobuf::Closure* done) { - MutexLock lock(&mutex_); - update_rpc_response_ = response; - update_rpc_done_ = done; -} - -void Table::UpdateRpcDone() { - MutexLock lock(&mutex_); - if (update_rpc_response_ != NULL) { - update_rpc_response_->set_status(kMasterOk); - update_rpc_done_->Run(); - - update_rpc_response_ = NULL; - update_rpc_done_ = NULL; - } + MutexLock lock(&mutex_); + delete old_schema_; + old_schema_ = NULL; } void Table::RefreshCounter() { - MutexLock lock(&mutex_); - int64_t size = 0; - int64_t tablet_num = 0; - int64_t notready = 0; - int64_t lread = 0; - int64_t read = 0; - int64_t rmax = 0; - int64_t rspeed = 0; - int64_t write = 0; - int64_t wmax = 0; - int64_t wspeed = 0; - int64_t scan = 0; - int64_t smax = 0; - int64_t sspeed = 0; - int64_t corrupt_num = 0; - size_t lg_num = 0; - std::vector lg_size; - - std::vector tablet_list; - Table::TabletList::iterator it = tablets_list_.begin(); - for (; it != tablets_list_.end(); ++it) { - tablet_num++; - TabletPtr tablet = it->second; - if (tablet->GetStatus() != TabletMeta::kTabletReady) { - notready++; - } - int64_t size_tmp; - std::vector lg_size_tmp; - tablet->GetDataSize(&size_tmp, &lg_size_tmp); - - size += size_tmp; - if (lg_num == 0) { - lg_num = lg_size_tmp.size(); - lg_size.resize(lg_num, 0); - } - for (size_t l = 0; l < lg_num; ++l) { - if (lg_size_tmp.size() > l) { - lg_size[l] += lg_size_tmp[l]; - } - } - - const TabletCounter& counter = tablet->GetCounter(); - lread += counter.low_read_cell(); - read += counter.read_rows(); - if (counter.read_rows() > rmax) { - rmax = counter.read_rows(); - } - rspeed += counter.read_size(); - write += counter.write_rows(); - if (counter.write_rows() > wmax) { - wmax = counter.write_rows(); - } - wspeed += counter.write_size(); - scan += counter.scan_rows(); - if (counter.scan_rows() > smax) { - smax = counter.scan_rows(); - } - sspeed += counter.scan_size(); - if (counter.db_status() == TabletMeta::kTabletCorruption) { - ++corrupt_num; - } + MutexLock lock(&mutex_); + int64_t size = 0; + int64_t tablet_num = 0; + int64_t notready = 0; + int64_t lread = 0; + int64_t read = 0; + int64_t rmax = 0; + int64_t rspeed = 0; + int64_t write = 0; + int64_t wmax = 0; + int64_t wspeed = 0; + int64_t scan = 0; + int64_t smax = 0; + int64_t sspeed = 0; + int64_t corrupt_num = 0; + size_t lg_num = 0; + std::vector lg_size; + + std::vector tablet_list; + Table::TabletList::iterator it = tablets_list_.begin(); + for (; it != tablets_list_.end(); ++it) { + tablet_num++; + TabletPtr tablet = it->second; + if (tablet->GetStatus() != TabletMeta::kTabletReady) { + notready++; + } + int64_t size_tmp; + std::vector lg_size_tmp; + tablet->GetDataSize(&size_tmp, &lg_size_tmp); + + size += size_tmp; + if (lg_num == 0) { + lg_num = lg_size_tmp.size(); + lg_size.resize(lg_num, 0); } - - metric_.SetTableSize(size); - metric_.SetTabletNum(tablet_num); - metric_.SetNotReady(notready); - metric_.SetCorruptNum(corrupt_num); - - counter_.set_size(size); - counter_.set_tablet_num(tablet_num); - counter_.set_notready_num(notready); - counter_.set_lread(lread); - counter_.set_read_rows(read); - counter_.set_read_max(rmax); - counter_.set_read_size(rspeed); - counter_.set_write_rows(write); - counter_.set_write_max(wmax); - counter_.set_write_size(wspeed); - counter_.set_scan_rows(scan); - counter_.set_scan_max(smax); - counter_.set_scan_size(sspeed); - counter_.clear_lg_size(); for (size_t l = 0; l < lg_num; ++l) { - counter_.add_lg_size(lg_size[l]); - } + if (lg_size_tmp.size() > l) { + lg_size[l] += lg_size_tmp[l]; + } + } + + const TabletCounter& counter = tablet->GetCounter(); + lread += counter.low_read_cell(); + read += counter.read_rows(); + if (counter.read_rows() > rmax) { + rmax = counter.read_rows(); + } + rspeed += counter.read_size(); + write += counter.write_rows(); + if (counter.write_rows() > wmax) { + wmax = counter.write_rows(); + } + wspeed += counter.write_size(); + scan += counter.scan_rows(); + if (counter.scan_rows() > smax) { + smax = counter.scan_rows(); + } + sspeed += counter.scan_size(); + if (counter.db_status() == TabletMeta::kTabletCorruption) { + ++corrupt_num; + } + } + + metric_.SetTableSize(size); + metric_.SetTabletNum(tablet_num); + metric_.SetNotReady(notready); + metric_.SetCorruptNum(corrupt_num); + + counter_.set_size(size); + counter_.set_tablet_num(tablet_num); + counter_.set_notready_num(notready); + counter_.set_lread(lread); + counter_.set_read_rows(read); + counter_.set_read_max(rmax); + counter_.set_read_size(rspeed); + counter_.set_write_rows(write); + counter_.set_write_max(wmax); + counter_.set_write_size(wspeed); + counter_.set_scan_rows(scan); + counter_.set_scan_max(smax); + counter_.set_scan_size(sspeed); + counter_.clear_lg_size(); + for (size_t l = 0; l < lg_num; ++l) { + counter_.add_lg_size(lg_size[l]); + } } void Table::MergeTablets(TabletPtr first_tablet, TabletPtr second_tablet, const TabletMeta& merged_meta, TabletPtr* merged_tablet) { - CHECK_EQ(first_tablet->GetKeyStart(), merged_meta.key_range().key_start()); - CHECK_EQ(second_tablet->GetKeyEnd(), merged_meta.key_range().key_end()); - CHECK_EQ(first_tablet->GetKeyEnd(), second_tablet->GetKeyStart()); - - MutexLock lock(&mutex_); - uint64_t tablet_num = leveldb::GetTabletNumFromPath(merged_meta.path()); - if (max_tablet_no_ < tablet_num) { - max_tablet_no_ = tablet_num; - } - - if (FLAGS_tera_master_gc_strategy == "trackable") { - uint64_t tablet_num1 = leveldb::GetTabletNumFromPath(first_tablet->GetPath()); - std::multiset::iterator it = first_tablet->inh_files_.begin(); - for (; it != first_tablet->inh_files_.end(); ++it) { - const TabletFile& file = *it; - InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; - CHECK_GT(file_info.ref, 0u); - VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num1 - << " pass to " << tablet_num << " ref is " << file_info.ref; - (*merged_tablet)->inh_files_.insert(file); - } - uint64_t tablet_num2 = leveldb::GetTabletNumFromPath(second_tablet->GetPath()); - it = second_tablet->inh_files_.begin(); - // ref: +1 for add child tablets, -1 for del parent tablets - for (; it != second_tablet->inh_files_.end(); ++it) { - const TabletFile& file = *it; - InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; - CHECK_GT(file_info.ref, 0u); - VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num2 - << " pass to " << tablet_num << " ref is " << file_info.ref; - (*merged_tablet)->inh_files_.insert(file); - } - - if (first_tablet->gc_reported_) { - --reported_live_tablets_num_; - } - if (second_tablet->gc_reported_) { - --reported_live_tablets_num_; - } - } - - tablets_list_.erase(first_tablet->GetKeyStart()); - tablets_list_.erase(second_tablet->GetKeyStart()); - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(first_tablet->GetPath()); - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(second_tablet->GetPath()); - tablets_list_[merged_meta.key_range().key_start()] = *merged_tablet; -} - -void Table::SplitTablet(TabletPtr splited_tablet, - const TabletMeta& first_half, const TabletMeta& second_half, - TabletPtr* first_tablet, TabletPtr* second_tablet) { - CHECK_EQ(splited_tablet->GetKeyStart(), first_half.key_range().key_start()); - CHECK_EQ(splited_tablet->GetKeyEnd(), second_half.key_range().key_end()); - CHECK_EQ(first_half.key_range().key_end(), second_half.key_range().key_start()); - - MutexLock lock(&mutex_); - uint64_t tablet_num1 = leveldb::GetTabletNumFromPath(first_half.path()); - if (max_tablet_no_ < tablet_num1) { - max_tablet_no_ = tablet_num1; - } - uint64_t tablet_num2 = leveldb::GetTabletNumFromPath(second_half.path()); - if (max_tablet_no_ < tablet_num2) { - max_tablet_no_ = tablet_num2; - } - - if (FLAGS_tera_master_gc_strategy == "trackable") { - uint64_t tablet_num = leveldb::GetTabletNumFromPath(splited_tablet->GetPath()); - (*first_tablet)->inh_files_ = splited_tablet->inh_files_; - (*second_tablet)->inh_files_ = splited_tablet->inh_files_; - std::multiset::iterator it = splited_tablet->inh_files_.begin(); - for (; it != splited_tablet->inh_files_.end(); ++it) { - const TabletFile& file = *it; - InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; - CHECK_GT(file_info.ref, 0u); - file_info.ref++; // ref: +2 for add child tablets, -1 for del parent tablets - VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num - << " pass to " << tablet_num1 << " and " << tablet_num2 - << " ref increment to " << file_info.ref; - } - - if (splited_tablet->gc_reported_) { - --reported_live_tablets_num_; - } - } - - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(splited_tablet->GetPath()); - tablets_list_.erase(first_half.key_range().key_start()); - tablets_list_[first_half.key_range().key_start()] = *first_tablet; - tablets_list_[second_half.key_range().key_start()] = *second_tablet; + CHECK_EQ(first_tablet->GetKeyStart(), merged_meta.key_range().key_start()); + CHECK_EQ(second_tablet->GetKeyEnd(), merged_meta.key_range().key_end()); + CHECK_EQ(first_tablet->GetKeyEnd(), second_tablet->GetKeyStart()); + + MutexLock lock(&mutex_); + uint64_t tablet_num = leveldb::GetTabletNumFromPath(merged_meta.path()); + if (max_tablet_no_ < tablet_num) { + max_tablet_no_ = tablet_num; + } + + { + uint64_t tablet_num1 = leveldb::GetTabletNumFromPath(first_tablet->GetPath()); + std::multiset::iterator it = first_tablet->inh_files_.begin(); + for (; it != first_tablet->inh_files_.end(); ++it) { + const TabletFile& file = *it; + InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; + CHECK_GT(file_info.ref, 0u); + VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num1 + << " pass to " << tablet_num << " ref is " << file_info.ref; + (*merged_tablet)->inh_files_.insert(file); + } + uint64_t tablet_num2 = leveldb::GetTabletNumFromPath(second_tablet->GetPath()); + it = second_tablet->inh_files_.begin(); + // ref: +1 for add child tablets, -1 for del parent tablets + for (; it != second_tablet->inh_files_.end(); ++it) { + const TabletFile& file = *it; + InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; + CHECK_GT(file_info.ref, 0u); + VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num2 + << " pass to " << tablet_num << " ref is " << file_info.ref; + (*merged_tablet)->inh_files_.insert(file); + } + + if (first_tablet->gc_reported_) { + --reported_live_tablets_num_; + } + if (second_tablet->gc_reported_) { + --reported_live_tablets_num_; + } + } + + tablets_list_.erase(first_tablet->GetKeyStart()); + tablets_list_.erase(second_tablet->GetKeyStart()); + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(first_tablet->GetPath()); + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(second_tablet->GetPath()); + tablets_list_[merged_meta.key_range().key_start()] = *merged_tablet; +} + +void Table::SplitTablet(TabletPtr splited_tablet, const TabletMeta& first_half, + const TabletMeta& second_half, TabletPtr* first_tablet, + TabletPtr* second_tablet) { + CHECK_EQ(splited_tablet->GetKeyStart(), first_half.key_range().key_start()); + CHECK_EQ(splited_tablet->GetKeyEnd(), second_half.key_range().key_end()); + CHECK_EQ(first_half.key_range().key_end(), second_half.key_range().key_start()); + + MutexLock lock(&mutex_); + uint64_t tablet_num1 = leveldb::GetTabletNumFromPath(first_half.path()); + if (max_tablet_no_ < tablet_num1) { + max_tablet_no_ = tablet_num1; + } + uint64_t tablet_num2 = leveldb::GetTabletNumFromPath(second_half.path()); + if (max_tablet_no_ < tablet_num2) { + max_tablet_no_ = tablet_num2; + } + + { + uint64_t tablet_num = leveldb::GetTabletNumFromPath(splited_tablet->GetPath()); + (*first_tablet)->inh_files_ = splited_tablet->inh_files_; + (*second_tablet)->inh_files_ = splited_tablet->inh_files_; + std::multiset::iterator it = splited_tablet->inh_files_.begin(); + for (; it != splited_tablet->inh_files_.end(); ++it) { + const TabletFile& file = *it; + InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; + CHECK_GT(file_info.ref, 0u); + file_info.ref++; // ref: +2 for add child tablets, -1 for del parent tablets + VLOG(10) << "[gc] [" << name_ << "] file " << file << " inherited by " << tablet_num + << " pass to " << tablet_num1 << " and " << tablet_num2 << " ref increment to " + << file_info.ref; + } + + if (splited_tablet->gc_reported_) { + --reported_live_tablets_num_; + } + } + + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(splited_tablet->GetPath()); + tablets_list_.erase(first_half.key_range().key_start()); + tablets_list_[first_half.key_range().key_start()] = *first_tablet; + tablets_list_[second_half.key_range().key_start()] = *second_tablet; } void Table::GarbageCollect(const TabletInheritedFileInfo& tablet_inh_info) { - // sort reported files - std::multiset report_inh_files; - for (int32_t i = 0; i < tablet_inh_info.lg_inh_files_size(); i++) { - const LgInheritedLiveFiles& lg_inh_files = tablet_inh_info.lg_inh_files(i); - struct TabletFile inh_file; - inh_file.lg_id = lg_inh_files.lg_no(); - for (int32_t j = 0; j < lg_inh_files.file_number_size(); j++) { - leveldb::ParseFullFileNumber(lg_inh_files.file_number(j), - &inh_file.tablet_id, - &inh_file.file_id); - report_inh_files.insert(inh_file); - } - } - - MutexLock l(&mutex_); - Table::TabletList::iterator tablet_it = tablets_list_.find(tablet_inh_info.key_start()); - if (tablet_it == tablets_list_.end()) { - return; - } - TabletPtr tablet = tablet_it->second; - if (tablet->GetKeyEnd() != tablet_inh_info.key_end()) { - return; + // sort reported files + std::multiset report_inh_files; + for (int32_t i = 0; i < tablet_inh_info.lg_inh_files_size(); i++) { + const LgInheritedLiveFiles& lg_inh_files = tablet_inh_info.lg_inh_files(i); + struct TabletFile inh_file; + inh_file.lg_id = lg_inh_files.lg_no(); + for (int32_t j = 0; j < lg_inh_files.file_number_size(); j++) { + leveldb::ParseFullFileNumber(lg_inh_files.file_number(j), &inh_file.tablet_id, + &inh_file.file_id); + report_inh_files.insert(inh_file); + } + } + + MutexLock l(&mutex_); + Table::TabletList::iterator tablet_it = tablets_list_.find(tablet_inh_info.key_start()); + if (tablet_it == tablets_list_.end()) { + return; + } + TabletPtr tablet = tablet_it->second; + if (tablet->GetKeyEnd() != tablet_inh_info.key_end()) { + return; + } + + // insert a MAX element to simplify two sets' comparason + struct TabletFile max = {UINT64_MAX, INT32_MAX, UINT64_MAX}; + report_inh_files.insert(max); + tablet->inh_files_.insert(max); + std::multiset::iterator old_it = tablet->inh_files_.begin(); + std::multiset::iterator new_it = report_inh_files.begin(); + while (old_it != tablet->inh_files_.end() && new_it != report_inh_files.end()) { + if (*old_it == *new_it) { + ++old_it; + ++new_it; + } else if (*old_it < *new_it) { + VLOG(10) << "[gc] " << tablet->GetPath() << " release file " << *old_it; + ReleaseInheritedFile(*old_it); + old_it = tablet->inh_files_.erase(old_it); // desc ref for tablet->inh_files_ + } else if (!tablet->gc_reported_) { + VLOG(10) << "[gc] " << tablet->GetPath() << " report file " << *new_it; + AddInheritedFile(*new_it, true); // inc ref for tablet->inh_files_ + tablet->inh_files_.insert(*new_it); + ++new_it; + } else { + LOG(WARNING) << "[gc] ignore(query error) " << tablet->GetPath() << " report new file " + << *new_it; + ++new_it; } + } + tablet->inh_files_.erase(max); - // insert a MAX element to simplify two sets' comparason - struct TabletFile max = {UINT64_MAX, INT32_MAX, UINT64_MAX}; - report_inh_files.insert(max); - tablet->inh_files_.insert(max); - std::multiset::iterator old_it = tablet->inh_files_.begin(); - std::multiset::iterator new_it = report_inh_files.begin(); - while (old_it != tablet->inh_files_.end() && new_it != report_inh_files.end()) { - if (*old_it == *new_it) { - ++old_it; - ++new_it; - } else if (*old_it < *new_it) { - VLOG(10) << "[gc] " << tablet->GetPath() << " release file " << *old_it; - ReleaseInheritedFile(*old_it); - old_it = tablet->inh_files_.erase(old_it); // desc ref for tablet->inh_files_ - } else if (!tablet->gc_reported_) { - VLOG(10) << "[gc] " << tablet->GetPath() << " report file " << *new_it; - AddInheritedFile(*new_it, true); // inc ref for tablet->inh_files_ - tablet->inh_files_.insert(*new_it); - ++new_it; - } else { - LOG(WARNING) << "[gc] ignore(query error) " << tablet->GetPath() << " report new file " << *new_it; - ++new_it; - } - } - tablet->inh_files_.erase(max); - - if (!tablet->gc_reported_) { - tablet->gc_reported_ = true; - if (++reported_live_tablets_num_ == tablets_list_.size()) { - // now all live tablets report finish - std::set::iterator it = gc_disabled_dead_tablets_.begin(); - for (; it != gc_disabled_dead_tablets_.end(); ++it) { - EnableDeadTabletGarbageCollect(*it); - } - gc_disabled_dead_tablets_.clear(); - } + if (!tablet->gc_reported_) { + tablet->gc_reported_ = true; + if (++reported_live_tablets_num_ == tablets_list_.size()) { + // now all live tablets report finish + std::set::iterator it = gc_disabled_dead_tablets_.begin(); + for (; it != gc_disabled_dead_tablets_.end(); ++it) { + EnableDeadTabletGarbageCollect(*it); + } + gc_disabled_dead_tablets_.clear(); } + } } void Table::EnableDeadTabletGarbageCollect(uint64_t tablet_id) { - mutex_.AssertHeld(); - LOG(INFO) << "[gc] [" << name_ << "] enable gc dir " << tablet_id; - std::map& dead_tablet_files = useful_inh_files_[tablet_id]; - std::map::iterator it = dead_tablet_files.begin(); - while (it != dead_tablet_files.end()) { - const TabletFile& file = it->first; - InheritedFileInfo& file_info = it->second; - CHECK_GT(file_info.ref, 0u); - VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref decrement to " << file_info.ref - 1; - if (--file_info.ref == 0) { // desc refs for gc_disabled_dead_tablets_ - // delete file - obsolete_inh_files_.push(file); - it = dead_tablet_files.erase(it); - } else { - ++it; - } - } - if (dead_tablet_files.size() == 0) { - // delete tablet dir - VLOG(10) << "[gc] [" << name_ << "] dir " << tablet_id << " has no useful file"; - TabletFile tablet_dir = {tablet_id, 0, 0}; - obsolete_inh_files_.push(tablet_dir); - useful_inh_files_.erase(tablet_id); + mutex_.AssertHeld(); + LOG(INFO) << "[gc] [" << name_ << "] enable gc dir " << tablet_id; + std::map& dead_tablet_files = useful_inh_files_[tablet_id]; + std::map::iterator it = dead_tablet_files.begin(); + while (it != dead_tablet_files.end()) { + const TabletFile& file = it->first; + InheritedFileInfo& file_info = it->second; + CHECK_GT(file_info.ref, 0u); + VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref decrement to " << file_info.ref - 1; + if (--file_info.ref == 0) { // desc refs for gc_disabled_dead_tablets_ + // delete file + obsolete_inh_files_.push(file); + it = dead_tablet_files.erase(it); + } else { + ++it; } + } + if (dead_tablet_files.size() == 0) { + // delete tablet dir + VLOG(10) << "[gc] [" << name_ << "] dir " << tablet_id << " has no useful file"; + TabletFile tablet_dir = {tablet_id, 0, 0}; + obsolete_inh_files_.push(tablet_dir); + useful_inh_files_.erase(tablet_id); + } } void Table::ReleaseInheritedFile(const TabletFile& file) { - mutex_.AssertHeld(); - - InheritedFiles::iterator it = useful_inh_files_.find(file.tablet_id); - CHECK(it != useful_inh_files_.end()); - std::map& dead_tablet_files = it->second; - - std::map::iterator it2 = dead_tablet_files.find(file); - CHECK(it2 != dead_tablet_files.end()); - InheritedFileInfo& inh_file = it2->second; - - CHECK_GT(inh_file.ref, 0u); - VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref decrement to " << inh_file.ref - 1; - if (--inh_file.ref == 0) { - // delete file - obsolete_inh_files_.push(file); - dead_tablet_files.erase(it2); - if (dead_tablet_files.size() == 0) { - // delete tablet dir - VLOG(10) << "[gc] [" << name_ << "] dir " << file.tablet_id << " has no useful file"; - TabletFile tablet_dir = {file.tablet_id, 0, 0}; - obsolete_inh_files_.push(tablet_dir); - useful_inh_files_.erase(it); - } + mutex_.AssertHeld(); + + InheritedFiles::iterator it = useful_inh_files_.find(file.tablet_id); + CHECK(it != useful_inh_files_.end()); + std::map& dead_tablet_files = it->second; + + std::map::iterator it2 = dead_tablet_files.find(file); + CHECK(it2 != dead_tablet_files.end()); + InheritedFileInfo& inh_file = it2->second; + + CHECK_GT(inh_file.ref, 0u); + VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref decrement to " << inh_file.ref - 1; + if (--inh_file.ref == 0) { + // delete file + obsolete_inh_files_.push(file); + dead_tablet_files.erase(it2); + if (dead_tablet_files.size() == 0) { + // delete tablet dir + VLOG(10) << "[gc] [" << name_ << "] dir " << file.tablet_id << " has no useful file"; + TabletFile tablet_dir = {file.tablet_id, 0, 0}; + obsolete_inh_files_.push(tablet_dir); + useful_inh_files_.erase(it); } + } } bool Table::TryCollectInheritedFile() { - if (GetTableName() == FLAGS_tera_master_meta_table_name) { - return false; - } - - std::set live_tablets, dead_tablets; - GetTabletsForGc(&live_tablets, &dead_tablets, true); - - std::set::iterator it = dead_tablets.begin(); - for (; it != dead_tablets.end(); ++it) { - std::vector tablet_files; - CollectInheritedFileFromFilesystem(name_, *it, &tablet_files); - - if (tablet_files.empty()) { - MutexLock l(&mutex_); - AddEmptyDeadTablet(*it); - } else { - for (uint32_t i = 0; i < tablet_files.size(); i++) { - MutexLock l(&mutex_); - AddInheritedFile(tablet_files[i], false); - } - } - } - return dead_tablets.size() > 0; -} + if (GetTableName() == FLAGS_tera_master_meta_table_name) { + return false; + } -bool Table::CollectInheritedFileFromFilesystem(const std::string& tablename, - uint64_t tablet_num, - std::vector* tablet_files) { - std::string tablepath = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; - std::string tablet_path = leveldb::GetTabletPathFromNum(tablepath, tablet_num); - leveldb::Env* env = io::LeveldbBaseEnv(); - - // list lg dir - std::vector children; - env->GetChildren(tablet_path, &children); - for (size_t lg = 0; lg < children.size(); ++lg) { - std::string lg_path = tablet_path + "/" + children[lg]; - leveldb::FileType type = leveldb::kUnknown; - uint64_t number = 0; - if (ParseFileName(children[lg], &number, &type)) { - LOG(INFO) << "[gc] parent tablet has log_file: " << lg_path; - continue; - } + std::set live_tablets, dead_tablets; + GetTabletsForGc(&live_tablets, &dead_tablets, true); - leveldb::Slice rest(children[lg]); - uint64_t lg_num = 0; - if (!leveldb::ConsumeDecimalNumber(&rest, &lg_num)) { - LOG(ERROR) << "[gc] skip unknown dir(not log_num nor lg_num): " << lg_path; - continue; - } + std::set::iterator it = dead_tablets.begin(); + for (; it != dead_tablets.end(); ++it) { + std::vector tablet_files; + CollectInheritedFileFromFilesystem(name_, *it, &tablet_files); - // collector sst file - std::vector files; - env->GetChildren(lg_path, &files); - for (size_t f = 0; f < files.size(); ++f) { - std::string file_path = lg_path + "/" + files[f]; - type = leveldb::kUnknown; - number = 0; - if (ParseFileName(files[f], &number, &type) && - type == leveldb::kTableFile) { - struct TabletFile tablet_file = {tablet_num, (uint32_t)lg_num, number}; - tablet_files->push_back(tablet_file); - } - } + if (tablet_files.empty()) { + MutexLock l(&mutex_); + AddEmptyDeadTablet(*it); + } else { + for (uint32_t i = 0; i < tablet_files.size(); i++) { + MutexLock l(&mutex_); + AddInheritedFile(tablet_files[i], false); + } } - return true; + } + return dead_tablets.size() > 0; } -bool Table::GetTabletsForGc(std::set* live_tablets, - std::set* dead_tablets, +bool Table::CollectInheritedFileFromFilesystem(const std::string& tablename, uint64_t tablet_num, + std::vector* tablet_files) { + std::string tablepath = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; + std::string tablet_path = leveldb::GetTabletPathFromNum(tablepath, tablet_num); + leveldb::Env* env = io::LeveldbBaseEnv(); + + // list lg dir + std::vector children; + env->GetChildren(tablet_path, &children); + for (size_t lg = 0; lg < children.size(); ++lg) { + std::string lg_path = tablet_path + "/" + children[lg]; + leveldb::FileType type = leveldb::kUnknown; + uint64_t number = 0; + if (ParseFileName(children[lg], &number, &type)) { + LOG(INFO) << "[gc] parent tablet has log_file: " << lg_path; + continue; + } + + leveldb::Slice rest(children[lg]); + uint64_t lg_num = 0; + if (!leveldb::ConsumeDecimalNumber(&rest, &lg_num)) { + LOG(ERROR) << "[gc] skip unknown dir(not log_num nor lg_num): " << lg_path; + continue; + } + + // collector sst file + std::vector files; + env->GetChildren(lg_path, &files); + for (size_t f = 0; f < files.size(); ++f) { + std::string file_path = lg_path + "/" + files[f]; + type = leveldb::kUnknown; + number = 0; + if (ParseFileName(files[f], &number, &type) && type == leveldb::kTableFile) { + struct TabletFile tablet_file = {tablet_num, (uint32_t)lg_num, number}; + tablet_files->push_back(tablet_file); + } + } + } + return true; +} + +bool Table::GetTabletsForGc(std::set* live_tablets, std::set* dead_tablets, bool ignore_not_ready) { - MutexLock lock(&mutex_); - - std::vector children; - leveldb::Env* env = io::LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/" + (name_ == FLAGS_tera_master_meta_table_name ? FLAGS_tera_master_meta_table_path : name_); - mutex_.Unlock(); - - leveldb::Status s = env->GetChildren(table_path, &children); - mutex_.Lock(); - if (!s.ok()) { - LOG(ERROR) << "[gc] fail to list directory: " << table_path; + MutexLock lock(&mutex_); + + std::vector children; + leveldb::Env* env = io::LeveldbBaseEnv(); + std::string table_path = + FLAGS_tera_tabletnode_path_prefix + "/" + + (name_ == FLAGS_tera_master_meta_table_name ? FLAGS_tera_master_meta_table_path : name_); + mutex_.Unlock(); + + leveldb::Status s = env->GetChildren(table_path, &children); + mutex_.Lock(); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to list directory: " << table_path; + return false; + } + + std::vector tablet_list; + Table::TabletList::iterator it = tablets_list_.begin(); + for (; it != tablets_list_.end(); ++it) { + TabletPtr tablet = it->second; + if (tablet->GetStatus() != TabletMeta::kTabletReady) { + if (!ignore_not_ready) { + // any tablet not ready, stop gc return false; + } } + const std::string& path = tablet->GetPath(); + live_tablets->insert(leveldb::GetTabletNumFromPath(path)); + VLOG(20) << "[gc] add live tablet: " << path; + } - std::vector tablet_list; - Table::TabletList::iterator it = tablets_list_.begin(); - for (; it != tablets_list_.end(); ++it) { - TabletPtr tablet = it->second; - if (tablet->GetStatus() != TabletMeta::kTabletReady) { - if (!ignore_not_ready) { - // any tablet not ready, stop gc - return false; - } - } - const std::string& path = tablet->GetPath(); - live_tablets->insert(leveldb::GetTabletNumFromPath(path)); - VLOG(20) << "[gc] add live tablet: " << path; + for (size_t i = 0; i < children.size(); ++i) { + if (children[i].size() < 5) { + // skip directory . and .. + continue; } - - for (size_t i = 0; i < children.size(); ++i) { - if (children[i].size() < 5) { - // skip directory . and .. - continue; - } - std::string path = table_path + "/" + children[i]; - uint64_t tabletnum = leveldb::GetTabletNumFromPath(path); - if (live_tablets->find(tabletnum) == live_tablets->end()) { - VLOG(10) << "[gc] add dead tablet: " << path; - dead_tablets->insert(tabletnum); - } - - if (0 == tabletnum) { - LOG(WARNING) << "[gc] invalid tablet path found: <" << path << ">"; - } + std::string path = table_path + "/" + children[i]; + uint64_t tabletnum = leveldb::GetTabletNumFromPath(path); + if (live_tablets->find(tabletnum) == live_tablets->end()) { + VLOG(10) << "[gc] add dead tablet: " << path; + dead_tablets->insert(tabletnum); } - if (dead_tablets->size() == 0) { - VLOG(10) << "[gc] there is none dead tablets: " << name_; - return false; + + if (0 == tabletnum) { + LOG(WARNING) << "[gc] invalid tablet path found: <" << path << ">"; } - return true; + } + if (dead_tablets->size() == 0) { + VLOG(10) << "[gc] there is none dead tablets: " << name_; + return false; + } + return true; } void Table::AddInheritedFile(const TabletFile& file, bool need_ref) { - mutex_.AssertHeld(); - - bool is_gc_disabled = false; - if (useful_inh_files_.find(file.tablet_id) == useful_inh_files_.end()) { - LOG(INFO) << "[gc] [" << name_ << "] new report dir " << file.tablet_id << ", gc disabled"; - gc_disabled_dead_tablets_.insert(file.tablet_id); - } - if (gc_disabled_dead_tablets_.find(file.tablet_id) != gc_disabled_dead_tablets_.end()) { - is_gc_disabled = true; - } - - InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; - if (is_gc_disabled && file_info.ref == 0) { - VLOG(10) << "[gc] [" << name_ << "] new report file " << file; - file_info.ref = 1; // gc_disabled_dead_tablets_ ref it - } - if (need_ref) { - ++file_info.ref; - } - VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref increment to " << file_info.ref; + mutex_.AssertHeld(); + + bool is_gc_disabled = false; + if (useful_inh_files_.find(file.tablet_id) == useful_inh_files_.end()) { + VLOG(10) << "[gc] [" << name_ << "] new report dir " << file.tablet_id << ", gc disabled"; + gc_disabled_dead_tablets_.insert(file.tablet_id); + } + if (gc_disabled_dead_tablets_.find(file.tablet_id) != gc_disabled_dead_tablets_.end()) { + is_gc_disabled = true; + } + + InheritedFileInfo& file_info = useful_inh_files_[file.tablet_id][file]; + if (is_gc_disabled && file_info.ref == 0) { + VLOG(10) << "[gc] [" << name_ << "] new report file " << file; + file_info.ref = 1; // gc_disabled_dead_tablets_ ref it + } + if (need_ref) { + ++file_info.ref; + } + VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref increment to " << file_info.ref; } void Table::AddEmptyDeadTablet(uint64_t tablet_id) { - mutex_.AssertHeld(); + mutex_.AssertHeld(); - if (useful_inh_files_.find(tablet_id) == useful_inh_files_.end()) { - LOG(INFO) << "[gc] [" << name_ << "] new empty dead tablet " - << tablet_id << ", gc disabled"; - gc_disabled_dead_tablets_.insert(tablet_id); - useful_inh_files_[tablet_id]; - } + if (useful_inh_files_.find(tablet_id) == useful_inh_files_.end()) { + LOG(INFO) << "[gc] [" << name_ << "] new empty dead tablet " << tablet_id << ", gc disabled"; + gc_disabled_dead_tablets_.insert(tablet_id); + useful_inh_files_[tablet_id]; + } } uint64_t Table::CleanObsoleteFile() { - leveldb::Env* env = io::LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/" + name_; - uint64_t delete_file_num = 0; - int64_t start_ts = get_micros(); - - MutexLock l(&mutex_); - while (!obsolete_inh_files_.empty()) { - TabletFile file = obsolete_inh_files_.front(); - mutex_.Unlock(); - - if (GetStatus() == kTableDeleting) { - LOG(INFO) << "[gc] [" << name_ << "] table deleted, give up clean"; - mutex_.Lock(); - break; - } + leveldb::Env* env = io::LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/" + name_; + uint64_t delete_file_num = 0; + int64_t start_ts = get_micros(); + + MutexLock l(&mutex_); + while (!obsolete_inh_files_.empty()) { + TabletFile file = obsolete_inh_files_.front(); + mutex_.Unlock(); - std::string path; - leveldb::Status s; - if (file.lg_id == 0 && file.file_id == 0) { - std::string path = leveldb::BuildTabletPath(table_path, file.tablet_id); - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - s = env->LockFile(path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << path << ", status: " << s.ToString(); - } - delete file_lock; - - LOG(INFO) << "[gc] [" << name_ << "] delete dir " << path; - s = io::DeleteEnvDir(path); //safely delete dir and all file in it - } else { - std::string lg_path = leveldb::BuildTabletLgPath(table_path, file.tablet_id, file.lg_id); - leveldb::FileLock* file_lock = nullptr; - // NEVER remove the trailing character '/', otherwise you will lock the parent directory - s = env->LockFile(lg_path + "/", &file_lock); - if (!s.ok()) { - LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); - } - delete file_lock; - - std::string path = leveldb::BuildTableFilePath(table_path, file.tablet_id, - file.lg_id, file.file_id); - if (FLAGS_tera_master_gc_trash_enabled) { - LOG(INFO) << "[gc] [" << name_ << "] move file to trash, file: " - << file << ", path: " << path; - // move sst to trackable gc trash instead of deleting it directly - s = io::MoveSstToTrackableGcTrash(name_, file.tablet_id, file.lg_id, file.file_id); - } else { - LOG(INFO) << "[gc] [" << name_ << "] delete file " << file << " path " << path; - s = env->DeleteFile(path); - } - } - mutex_.Lock(); - if (!s.ok()) { - LOG(WARNING) << "[gc] fail to delete: " << path << " status: " << s.ToString(); - break; - } - delete_file_num++; - obsolete_inh_files_.pop(); + if (GetStatus() == kTableDeleting) { + LOG(INFO) << "[gc] [" << name_ << "] table deleted, give up clean"; + mutex_.Lock(); + break; + } + + std::string path; + leveldb::Status s; + if (file.lg_id == 0 && file.file_id == 0) { + std::string path = leveldb::BuildTabletPath(table_path, file.tablet_id); + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the + // parent directory + s = env->LockFile(path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << path << ", status: " << s.ToString(); + } + delete file_lock; + + LOG(INFO) << "[gc] [" << name_ << "] delete dir " << path; + s = io::DeleteEnvDir(path); // safely delete dir and all file in it + } else { + std::string lg_path = leveldb::BuildTabletLgPath(table_path, file.tablet_id, file.lg_id); + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the + // parent directory + s = env->LockFile(lg_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); + } + delete file_lock; + + std::string path = + leveldb::BuildTableFilePath(table_path, file.tablet_id, file.lg_id, file.file_id); + if (FLAGS_tera_master_gc_trash_enabled) { + LOG(INFO) << "[gc] [" << name_ << "] move file to trash, file: " << file + << ", path: " << path; + // move sst to trackable gc trash instead of deleting it directly + s = io::MoveSstToTrackableGcTrash(name_, file.tablet_id, file.lg_id, file.file_id); + } else { + LOG(INFO) << "[gc] [" << name_ << "] delete file " << file << " path " << path; + s = env->DeleteFile(path); + } + } + mutex_.Lock(); + if (s.ok()) { + delete_file_num++; + } else { + LOG(WARNING) << "[gc] fail to delete: " << path << " status: " << s.ToString(); } - LOG(INFO) << "[gc] [" << name_ << "] clean obsolete file/dir, total: " << delete_file_num - << ", cost: " << (get_micros() - start_ts) / 1000 << " ms"; - return delete_file_num; + obsolete_inh_files_.pop(); + } + LOG(INFO) << "[gc] [" << name_ << "] clean obsolete file/dir, total: " << delete_file_num + << ", cost: " << (get_micros() - start_ts) / 1000 << " ms"; + return delete_file_num; } bool Table::DoStateTransition(const TableEvent event) { - MutexLock lock(&mutex_); - TableStatus curr_status = state_machine_.GetStatus(); - bool ret = state_machine_.DoStateTransition(event); - LOG_IF(WARNING, !ret) << "table: " << name_ << ", not support state transition, " - "curr_status: " << StatusCodeToString(curr_status) << ", event: " << event; - LOG_IF(INFO, ret) << "table: " << name_ << ", state transition prev_status: " - << StatusCodeToString(curr_status) << ", event: " << event - << ", post_status: " << StatusCodeToString(state_machine_.GetStatus()); - return ret; + MutexLock lock(&mutex_); + TableStatus curr_status = state_machine_.GetStatus(); + bool ret = state_machine_.DoStateTransition(event); + LOG_IF(WARNING, !ret) << "table: " << name_ << ", not support state transition, " + "curr_status: " << StatusCodeToString(curr_status) + << ", event: " << event; + LOG_IF(INFO, ret) << "table: " << name_ + << ", state transition prev_status: " << StatusCodeToString(curr_status) + << ", event: " << event + << ", post_status: " << StatusCodeToString(state_machine_.GetStatus()); + return ret; } MetaTablet::MetaTablet(const TabletMeta& meta, TablePtr table, - std::shared_ptr zk_adapter) : Tablet(meta, table), zk_adapter_(zk_adapter) {} + std::shared_ptr zk_adapter) + : Tablet(meta, table), zk_adapter_(zk_adapter) {} -TabletManager::TabletManager(Counter* sequence_id, - MasterImpl* master_impl, - ThreadPool* thread_pool) - : this_sequence_id_(sequence_id), - master_impl_(master_impl), - thread_pool_(thread_pool) {} +TabletManager::TabletManager(Counter* sequence_id, MasterImpl* master_impl, ThreadPool* thread_pool) + : this_sequence_id_(sequence_id), master_impl_(master_impl), thread_pool_(thread_pool) {} -TabletManager::~TabletManager() { - ClearTableList(); -} +TabletManager::~TabletManager() { ClearTableList(); } -void TabletManager::Init() { -} +void TabletManager::Init() {} -void TabletManager::Stop() { -} +void TabletManager::Stop() {} TablePtr TabletManager::CreateTable(const TableMeta& meta) { - return TablePtr(new Table(meta.table_name(), meta)); -} + TablePtr table(new Table(meta.table_name(), meta)); -TablePtr TabletManager::CreateTable(const std::string& name, const TableSchema& schema, const TableStatus& status) { - return TablePtr(new Table(name, schema, status)); + if (!meta.has_create_time() || (int64_t)meta.create_time() < time(nullptr)) { + AutoResetEvent ev; + auto f = [&ev](bool) mutable { ev.Set(); }; + MetaWriteRecord record = PackMetaWriteRecord(table, false); + MasterEnv().BatchWriteMetaTableAsync(record, f); + ev.Wait(); + } + return table; } -TabletPtr TabletManager::CreateTablet(const TabletMeta& meta) { - return TabletPtr(new Tablet(meta)); +TablePtr TabletManager::CreateTable(const std::string& name, const TableSchema& schema, + const TableStatus& status) { + return TablePtr(new Table(name, schema, status)); } bool TabletManager::AddTable(TablePtr& table, StatusCode* ret_status) { - MutexLock lock(&mutex_); - std::pair ret = - all_tables_.insert(std::pair(table->GetTableName(), table)); - TableList::iterator it = ret.first; - if (!ret.second) { - LOG(WARNING) << "table: " << table->GetTableName() << " exist"; - SetStatusCode(kTableExist, ret_status); - return false; - } - return true; -} - -bool Table::AddTablet(TabletPtr& tablet, StatusCode* ret_status) { - MutexLock lock(&mutex_); - if (tablets_list_.end() != tablets_list_.find(tablet->GetKeyStart())) { - LOG(WARNING) << "table: " << tablet->GetTableName() << ", start: [" - << DebugString(tablet->GetKeyStart()) << "] exist"; - SetStatusCode(kTableExist, ret_status); - return false; - } - tablet->table_ = shared_from_this(); - uint64_t tablet_num = leveldb::GetTabletNumFromPath(tablet->GetPath()); - if (max_tablet_no_ < tablet_num) { - max_tablet_no_ = tablet_num; - } - tablets_list_[tablet->GetKeyStart()] = tablet; - CHECK(tablet->GetStatus() == TabletMeta::kTabletOffline); - MasterEnv().GetTabletAvailability()->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus()); - return true; -} - -TabletPtr TabletManager::CreateTablet(TablePtr table, const TabletMeta& meta) { - return TabletPtr(new Tablet(meta, table)); -} - -MetaTabletPtr TabletManager::AddMetaTablet(TabletNodePtr node, std::shared_ptr zk_adapter) { - MutexLock lock(&mutex_); - if (meta_tablet_) { - LOG(WARNING) << "meta tablet has already added"; - return meta_tablet_; - } - - TableSchema schema; - schema.set_kv_only(true); - LocalityGroupSchema* lg = schema.add_locality_groups(); - schema.set_name(FLAGS_tera_master_meta_table_name); - lg->set_name("lg_meta"); - lg->set_compress_type(false); - lg->set_store_type(MemoryStore); - TabletMeta meta; - meta.set_table_name(FLAGS_tera_master_meta_table_name); - meta.set_path(FLAGS_tera_master_meta_table_path); - meta.set_server_addr(node->GetAddr()); - meta.set_size(0); - KeyRange* key_range = meta.mutable_key_range(); - const std::string start_key(""); - const std::string end_key(""); - key_range->set_key_start(start_key); - key_range->set_key_end(end_key); - - TablePtr meta_table(new Table(meta.table_name(), schema, kTableEnable)); - meta_tablet_.reset(new MetaTablet(meta, meta_table, zk_adapter)); - // meta table will be added with inital status kTableReady - meta_tablet_->SetStatus(TabletMeta::kTabletReady); - CHECK(meta_tablet_->GetStatus() == TabletMeta::kTabletReady); - meta_tablet_->AssignTabletNode(node); - meta_tablet_->UpdateRootTabletAddr(); - meta_table->tablets_list_[start_key] = meta_tablet_; - all_tables_[FLAGS_tera_master_meta_table_name] = meta_table; + MutexLock lock(&mutex_); + std::pair ret = + all_tables_.insert(std::pair(table->GetTableName(), table)); + TableList::iterator it = ret.first; + if (!ret.second) { + LOG(WARNING) << "table: " << table->GetTableName() << " exist"; + SetStatusCode(kTableExist, ret_status); + return false; + } + return true; +} + +TabletPtr Table::AddTablet(const TabletMeta& meta, StatusCode* ret_status) { + TabletPtr tablet(new Tablet(meta, shared_from_this())); + MutexLock lock(&mutex_); + if (tablets_list_.end() != tablets_list_.find(tablet->GetKeyStart())) { + LOG(WARNING) << "table: " << tablet->GetTableName() << ", start: [" + << DebugString(tablet->GetKeyStart()) << "] exist"; + SetStatusCode(kTableExist, ret_status); + return TabletPtr(nullptr); + } + uint64_t tablet_num = leveldb::GetTabletNumFromPath(tablet->GetPath()); + if (max_tablet_no_ < tablet_num) { + max_tablet_no_ = tablet_num; + } + tablets_list_[tablet->GetKeyStart()] = tablet; + CHECK(tablet->GetStatus() == TabletMeta::kTabletOffline); + MasterEnv().GetTabletAvailability()->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus()); + return tablet; +} + +MetaTabletPtr TabletManager::AddMetaTablet(TabletNodePtr node, + std::shared_ptr zk_adapter) { + MutexLock lock(&mutex_); + if (meta_tablet_) { + LOG(WARNING) << "meta tablet has already added"; return meta_tablet_; + } + + TableSchema schema; + schema.set_kv_only(true); + LocalityGroupSchema* lg = schema.add_locality_groups(); + schema.set_name(FLAGS_tera_master_meta_table_name); + lg->set_name("lg_meta"); + lg->set_compress_type(false); + lg->set_store_type(MemoryStore); + TabletMeta meta; + meta.set_table_name(FLAGS_tera_master_meta_table_name); + meta.set_path(FLAGS_tera_master_meta_table_path); + meta.set_server_addr(node->GetAddr()); + meta.set_size(0); + KeyRange* key_range = meta.mutable_key_range(); + const std::string start_key(""); + const std::string end_key(""); + key_range->set_key_start(start_key); + key_range->set_key_end(end_key); + + TablePtr meta_table(new Table(meta.table_name(), schema, kTableEnable)); + meta_tablet_.reset(new MetaTablet(meta, meta_table, zk_adapter)); + // meta table will be added with inital status kTableReady + meta_tablet_->SetStatus(TabletMeta::kTabletReady); + CHECK(meta_tablet_->GetStatus() == TabletMeta::kTabletReady); + BindTabletToTabletNode(meta_tablet_, node); + // meta_tablet_->AssignTabletNode(node); + meta_tablet_->UpdateRootTabletAddr(); + meta_table->tablets_list_[start_key] = meta_tablet_; + all_tables_[FLAGS_tera_master_meta_table_name] = meta_table; + return meta_tablet_; } - int64_t TabletManager::GetAllTabletsCount() { - MutexLock lock(&mutex_); - int64_t count = 0; - TableList::iterator it; - for (it = all_tables_.begin(); it != all_tables_.end(); ++it) { - count += it->second->GetTabletsCount(); - } - return count; + MutexLock lock(&mutex_); + int64_t count = 0; + TableList::iterator it; + for (it = all_tables_.begin(); it != all_tables_.end(); ++it) { + count += it->second->GetTabletsCount(); + } + return count; } -bool TabletManager::FindTablet(const std::string& table_name, - const std::string& key_start, +bool TabletManager::FindTablet(const std::string& table_name, const std::string& key_start, TabletPtr* tablet, StatusCode* ret_status) { - // lock table list - mutex_.Lock(); - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - VLOG(5) << "tablet: " << table_name << " [start: " - << DebugString(key_start) << "] not exist"; - SetStatusCode(kTableNotFound, ret_status); - return false; - } - Table& table = *it->second; - - // lock table - table.mutex_.Lock(); + // lock table list + mutex_.Lock(); + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); + VLOG(5) << "tablet: " << table_name << " [start: " << DebugString(key_start) << "] not exist"; + SetStatusCode(kTableNotFound, ret_status); + return false; + } + Table& table = *it->second; - // search tablet - Table::TabletList::iterator it2 = table.tablets_list_.find(key_start); - if (it2 == table.tablets_list_.end()) { - table.mutex_.Unlock(); - VLOG(5) << "table: " << table_name << "[start: " - << DebugString(key_start) << "] not exist"; - SetStatusCode(kTableNotFound, ret_status); - return false; - } - *tablet = it2->second; + // lock table + table.mutex_.Lock(); + mutex_.Unlock(); + + // search tablet + Table::TabletList::iterator it2 = table.tablets_list_.find(key_start); + if (it2 == table.tablets_list_.end()) { table.mutex_.Unlock(); - return true; + VLOG(5) << "table: " << table_name << "[start: " << DebugString(key_start) << "] not exist"; + SetStatusCode(kTableNotFound, ret_status); + return false; + } + *tablet = it2->second; + table.mutex_.Unlock(); + return true; } void TabletManager::FindTablet(const std::string& server_addr, std::vector* tablet_meta_list, bool need_disabled_tables) { - mutex_.Lock(); - TableList::iterator it = all_tables_.begin(); - for (; it != all_tables_.end(); ++it) { - Table& table = *it->second; - table.mutex_.Lock(); - if (table.state_machine_.GetStatus() == kTableDisable && !need_disabled_tables) { - VLOG(10) << "FindTablet skip disable table: " << table.name_; - table.mutex_.Unlock(); - continue; - } - Table::TabletList::iterator it2 = table.tablets_list_.begin(); - for (; it2 != table.tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - tablet->mutex_.Lock(); - if (tablet->meta_.server_addr() == server_addr) { - tablet_meta_list->push_back(tablet); - } - tablet->mutex_.Unlock(); - } - table.mutex_.Unlock(); - } - mutex_.Unlock(); -} - -bool TabletManager::FindOverlappedTablets(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - std::vector* tablets, - StatusCode* ret_status) { - // lock table list - mutex_.Lock(); - - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - VLOG(5) << "table: " << table_name << " not exist"; - SetStatusCode(kTableNotFound, ret_status); - return false; - } + mutex_.Lock(); + TableList::iterator it = all_tables_.begin(); + for (; it != all_tables_.end(); ++it) { Table& table = *it->second; - - // lock table table.mutex_.Lock(); - mutex_.Unlock(); - - // search tablet - Table::TabletList::iterator it2 = table.tablets_list_.upper_bound(key_start); - CHECK(it2 != table.tablets_list_.begin()); - --it2; - while (it2 != table.tablets_list_.end() && - (key_end.empty() || it2->second->meta_.key_range().key_start() < key_end)) { - tablets->push_back(it2->second); - ++it2; + if (table.state_machine_.GetStatus() == kTableDisable && !need_disabled_tables) { + VLOG(10) << "FindTablet skip disable table: " << table.name_; + table.mutex_.Unlock(); + continue; } - table.mutex_.Unlock(); - CHECK_GT(tablets->size(), 0u); - return true; -} - -bool TabletManager::SearchTablet(const std::string& table_name, - const std::string& key, - TabletPtr* tablet, - StatusCode* ret_status) { - // lock table list - mutex_.Lock(); - - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - VLOG(5) << "table: " << table_name << " not exist"; - SetStatusCode(kTableNotFound, ret_status); - return false; + Table::TabletList::iterator it2 = table.tablets_list_.begin(); + for (; it2 != table.tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + tablet->mutex_.Lock(); + if (tablet->meta_.server_addr() == server_addr) { + tablet_meta_list->push_back(tablet); + } + tablet->mutex_.Unlock(); } - Table& table = *it->second; - - // lock table - table.mutex_.Lock(); + table.mutex_.Unlock(); + } + mutex_.Unlock(); +} + +bool Table::FindOverlappedTablets(const std::string& key_start, const std::string& key_end, + std::vector* tablets) { + MutexLock lock(&mutex_); + TabletList::iterator it2 = tablets_list_.upper_bound(key_start); + CHECK(it2 != tablets_list_.begin()); + --it2; + while (it2 != tablets_list_.end() && + (key_end.empty() || it2->second->meta_.key_range().key_start() < key_end)) { + tablets->push_back(it2->second); + ++it2; + } + return !tablets->empty(); +} + +bool TabletManager::SearchTablet(const std::string& table_name, const std::string& key, + TabletPtr* tablet, StatusCode* ret_status) { + // lock table list + mutex_.Lock(); + + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); + VLOG(5) << "table: " << table_name << " not exist"; + SetStatusCode(kTableNotFound, ret_status); + return false; + } + Table& table = *it->second; - // search tablet - Table::TabletList::reverse_iterator rit2 = table.tablets_list_.rbegin(); - for (; rit2 != table.tablets_list_.rend(); ++rit2) { - if (rit2->first <= key) { - *tablet = rit2->second; - break; - } + // lock table + table.mutex_.Lock(); + mutex_.Unlock(); + + // search tablet + Table::TabletList::reverse_iterator rit2 = table.tablets_list_.rbegin(); + for (; rit2 != table.tablets_list_.rend(); ++rit2) { + if (rit2->first <= key) { + *tablet = rit2->second; + break; } + } - table.mutex_.Unlock(); - return true; + table.mutex_.Unlock(); + return true; } bool TabletManager::FindTable(const std::string& table_name, - std::vector* tablet_meta_list, - StatusCode* ret_status) { - // lock table list - mutex_.Lock(); - - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - LOG(WARNING) << "table: " << table_name << " not exist"; - SetStatusCode(kTableNotFound, ret_status); - return false; - } - Table& table = *it->second; + std::vector* tablet_meta_list, StatusCode* ret_status) { + // lock table list + mutex_.Lock(); - // lock table - table.mutex_.Lock(); + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); + LOG(WARNING) << "table: " << table_name << " not exist"; + SetStatusCode(kTableNotFound, ret_status); + return false; + } + Table& table = *it->second; - // search tablet - Table::TabletList::iterator it2 = table.tablets_list_.begin(); - for (; it2 != table.tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - tablet_meta_list->push_back(tablet); - } + // lock table + table.mutex_.Lock(); + mutex_.Unlock(); - table.mutex_.Unlock(); - return true; + // search tablet + Table::TabletList::iterator it2 = table.tablets_list_.begin(); + for (; it2 != table.tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + tablet_meta_list->push_back(tablet); + } + + table.mutex_.Unlock(); + return true; } -bool TabletManager::FindTable(const std::string& table_name, TablePtr* tablet) { - mutex_.Lock(); - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - VLOG(5) << "table: " << table_name << " not exist"; - return false; - } - *tablet = it->second; +bool TabletManager::FindTable(const std::string& table_name, TablePtr* table) { + mutex_.Lock(); + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); - return true; + VLOG(5) << "table: " << table_name << " not exist"; + return false; + } + *table = it->second; + mutex_.Unlock(); + return true; } int64_t TabletManager::SearchTable(std::vector* tablet_meta_list, const std::string& prefix_table_name, const std::string& start_table_name, - const std::string& start_tablet_key, - uint32_t max_found, StatusCode* ret_status) { - if (max_found == 0) { - return 0; - } - if (start_table_name.find(prefix_table_name) != 0) { - return 0; - } - - mutex_.Lock(); + const std::string& start_tablet_key, uint32_t max_found, + StatusCode* ret_status) { + if (max_found == 0) { + return 0; + } + if (start_table_name.find(prefix_table_name) != 0) { + return 0; + } + + mutex_.Lock(); + + TableList::iterator lower_it = all_tables_.lower_bound(start_table_name); + TableList::iterator upper_it = all_tables_.upper_bound(prefix_table_name + "\xFF"); + if (upper_it == all_tables_.begin() || lower_it == all_tables_.end()) { + mutex_.Unlock(); + SetStatusCode(kTableNotFound, ret_status); + return -1; + } - TableList::iterator lower_it = all_tables_.lower_bound(start_table_name); - TableList::iterator upper_it = all_tables_.upper_bound(prefix_table_name + "\xFF"); - if (upper_it == all_tables_.begin() || lower_it == all_tables_.end()) { - mutex_.Unlock(); - SetStatusCode(kTableNotFound, ret_status); - return -1; + uint32_t found_num = 0; + for (TableList::iterator it = lower_it; it != upper_it; ++it) { + Table& table = *it->second; + Table::TabletList::iterator it2; + table.mutex_.Lock(); + if (start_table_name == it->first) { + it2 = table.tablets_list_.lower_bound(start_tablet_key); + } else { + it2 = table.tablets_list_.begin(); } - uint32_t found_num = 0; - for (TableList::iterator it = lower_it; it != upper_it; ++it) { - Table& table = *it->second; - Table::TabletList::iterator it2; - table.mutex_.Lock(); - if (start_table_name == it->first) { - it2 = table.tablets_list_.lower_bound(start_tablet_key); - } else { - it2 = table.tablets_list_.begin(); - } - - for (; it2 != table.tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - tablet_meta_list->push_back(tablet); - if (++found_num >= max_found) { - break; - } - } - table.mutex_.Unlock(); - if (found_num >= max_found) { - break; - } + for (; it2 != table.tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + tablet_meta_list->push_back(tablet); + if (++found_num >= max_found) { + break; + } } + table.mutex_.Unlock(); + if (found_num >= max_found) { + break; + } + } - mutex_.Unlock(); - return found_num; + mutex_.Unlock(); + return found_num; } bool TabletManager::ShowTable(std::vector* table_meta_list, std::vector* tablet_meta_list, const std::string& start_table_name, - const std::string& start_tablet_key, - uint32_t max_table_found, - uint32_t max_tablet_found, - bool* is_more, StatusCode* ret_status) { - // lock table list - mutex_.Lock(); - - TableList::iterator it = all_tables_.lower_bound(start_table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - LOG(ERROR) << "table not found: " << start_table_name; - SetStatusCode(kTableNotFound, ret_status); - return false; - } - - uint32_t table_found_num = 0; - uint32_t tablet_found_num = 0; - for (; it != all_tables_.end(); ++it) { - TablePtr table = it->second; - Table::TabletList::iterator it2; - - table->mutex_.Lock(); - if (table_meta_list != NULL) { - table_meta_list->push_back(table); - } - table_found_num++; - if (it->first == start_table_name) { - it2 = table->tablets_list_.lower_bound(start_tablet_key); - } else { - it2 = table->tablets_list_.begin(); - } - for (; it2 != table->tablets_list_.end(); ++it2) { - if (tablet_found_num >= max_tablet_found) { - break; - } - TabletPtr tablet = it2->second; - tablet_found_num++; - if (tablet_meta_list != NULL) { - tablet_meta_list->push_back(tablet); - } - } - table->mutex_.Unlock(); - if (table_found_num >= max_table_found) { - break; - } - } + const std::string& start_tablet_key, uint32_t max_table_found, + uint32_t max_tablet_found, bool* is_more, StatusCode* ret_status) { + // lock table list + mutex_.Lock(); + TableList::iterator it = all_tables_.lower_bound(start_table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); - return true; -} + LOG(ERROR) << "table not found: " << start_table_name; + SetStatusCode(kTableNotFound, ret_status); + return false; + } -bool TabletManager::DeleteTable(const std::string& table_name, - StatusCode* ret_status) { - // lock table list - MutexLock lock(&mutex_); + uint32_t table_found_num = 0; + uint32_t tablet_found_num = 0; + for (; it != all_tables_.end(); ++it) { + TablePtr table = it->second; + Table::TabletList::iterator it2; - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - LOG(WARNING) << "table: " << table_name << " not exist"; - SetStatusCode(kTableNotFound, ret_status); - return true; + table->mutex_.Lock(); + if (table_meta_list != NULL) { + table_meta_list->push_back(table); } - Table& table = *it->second; - - // make sure no other thread ref this table - table.mutex_.Lock(); - table.mutex_.Unlock(); - - table.tablets_list_.clear(); - all_tables_.erase(it); - // clean up specific table dir in file system - if (FLAGS_tera_delete_obsolete_tabledir_enabled && - !io::MoveEnvDirToTrash(table.GetTableName())) { - LOG(ERROR) << "fail to move droped table to trash dir, tablename: " - << table.GetTableName(); + table_found_num++; + if (it->first == start_table_name) { + it2 = table->tablets_list_.lower_bound(start_tablet_key); + } else { + it2 = table->tablets_list_.begin(); + } + for (; it2 != table->tablets_list_.end(); ++it2) { + if (tablet_found_num >= max_tablet_found) { + break; + } + TabletPtr tablet = it2->second; + tablet_found_num++; + if (tablet_meta_list != NULL) { + tablet_meta_list->push_back(tablet); + } } - return true; -} - -bool TabletManager::DeleteTablet(const std::string& table_name, - const std::string& key_start, - StatusCode* ret_status) { - // lock table list - MutexLock lock(&mutex_); - - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - LOG(WARNING) << "table: " << table_name << " [start: " - << DebugString(key_start) << "] not exist"; - SetStatusCode(kTableNotFound, ret_status); - return true; + table->mutex_.Unlock(); + if (table_found_num >= max_table_found) { + break; } - Table& table = *it->second; - - // make sure no other thread ref this table - table.mutex_.Lock(); - table.mutex_.Unlock(); + } - // search tablet - Table::TabletList::iterator it2 = table.tablets_list_.find(key_start); - if (it2 == table.tablets_list_.end()) { - LOG(WARNING) << "table: " << table_name << " [start: " - << DebugString(key_start) << "] not exist"; - SetStatusCode(kTableNotFound, ret_status); - return true; - } -// Tablet& tablet = *it2->second; -// // make sure no other thread ref this tablet -// tablet.mutex_.Lock(); -// tablet.mutex_.Unlock(); -// delete &tablet; - MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(it2->second->GetPath()); - table.tablets_list_.erase(it2); - - if (table.tablets_list_.empty()) { - // clean up specific table dir in file system - if (FLAGS_tera_delete_obsolete_tabledir_enabled && - !io::MoveEnvDirToTrash(table.GetTableName())) { - LOG(ERROR) << "fail to move droped table to trash dir, tablename: " - << table.GetTableName(); - } - // delete &table; - all_tables_.erase(it); - } - return true; + mutex_.Unlock(); + return true; } -void TabletManager::WriteToStream(std::ofstream& ofs, - const std::string& key, - const std::string& value) { - uint32_t key_size = key.size(); - uint32_t value_size = value.size(); - ofs.write((char*)&key_size, sizeof(key_size)); - ofs.write(key.data(), key_size); - ofs.write((char*)&value_size, sizeof(value_size)); - ofs.write(value.data(), value_size); -} - -bool TabletManager::DumpMetaTableToFile(const std::string& filename, - StatusCode* status) { - std::ofstream ofs(filename.c_str(), std::ofstream::binary | std::ofstream::trunc); - if (!ofs.is_open()) { - LOG(WARNING) << "fail to open file " << filename << " for write"; - SetStatusCode(kIOError, status); - return false; - } - - // get all table and tablet meta - std::vector table_list; - std::vector tablet_list; - ShowTable(&table_list, &tablet_list); - - // dump table meta - for (size_t i = 0; i < table_list.size(); i++) { - TablePtr table = table_list[i]; - std::string key, value; - table->ToMetaTableKeyValue(&key, &value); - WriteToStream(ofs, key, value); - } - - // dump tablet meta - for (size_t i = 0; i < tablet_list.size(); i++) { - TabletPtr tablet = tablet_list[i]; - std::string key, value; - tablet->ToMetaTableKeyValue(&key, &value); - WriteToStream(ofs, key, value); - } +bool TabletManager::DeleteTable(const std::string& table_name, StatusCode* ret_status) { + // lock table list + MutexLock lock(&mutex_); - if (ofs.fail()) { - LOG(WARNING) << "fail to write to file " << filename; - SetStatusCode(kIOError, status); - return false; - } - ofs.close(); + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { + LOG(WARNING) << "table: " << table_name << " not exist"; + SetStatusCode(kTableNotFound, ret_status); return true; -} + } + Table& table = *it->second; -void TabletManager::LoadTableMeta(const std::string& key, - const std::string& value) { - TableMeta meta; - ParseMetaTableKeyValue(key, value, &meta); - TablePtr table = CreateTable(meta); - StatusCode ret_status = kTabletNodeOk; - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - LOG(INFO) << "ignore meta table record in meta table"; - } else if (!AddTable(table, &ret_status)) { - LOG(ERROR) << "duplicate table in meta table: table=" - << meta.table_name(); - // TODO: try correct invalid record - } else { - VLOG(5) << "load table record: " << table; - } + // make sure no other thread ref this table + table.mutex_.Lock(); + table.tablets_list_.clear(); + table.mutex_.Unlock(); + + all_tables_.erase(it); + // clean up specific table dir in file system + if (FLAGS_tera_delete_obsolete_tabledir_enabled && !io::MoveEnvDirToTrash(table.GetTableName())) { + LOG(ERROR) << "fail to move droped table to trash dir, tablename: " << table.GetTableName(); + } + return true; } -void TabletManager::LoadTabletMeta(const std::string& key, - const std::string& value) { - TabletMeta meta; - ParseMetaTableKeyValue(key, value, &meta); - meta.set_status(TabletMeta::kTabletOffline); - StatusCode ret_status = kTabletNodeOk; - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - LOG(INFO) << "ignore meta tablet record in meta table"; - } else { - TablePtr table; - if (!FindTable(meta.table_name(), &table)) { - LOG(WARNING) << "table schema not exist, skip this tablet: " - << meta.path(); - return; - } - TabletPtr tablet = CreateTablet(meta); - if (!table->AddTablet(tablet, &ret_status)) { - LOG(ERROR) << "duplicate tablet in meta table: table=" << meta.table_name() - << " start=" << DebugString(meta.key_range().key_start()); - // TODO: try correct invalid record - } +bool TabletManager::DeleteTablet(const std::string& table_name, const std::string& key_start, + StatusCode* ret_status) { + // lock table list + MutexLock lock(&mutex_); + + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { + LOG(WARNING) << "table: " << table_name << " [start: " << DebugString(key_start) + << "] not exist"; + SetStatusCode(kTableNotFound, ret_status); + return true; + } + Table& table = *it->second; + + // make sure no other thread ref this table + table.mutex_.Lock(); + table.mutex_.Unlock(); + + // search tablet + Table::TabletList::iterator it2 = table.tablets_list_.find(key_start); + if (it2 == table.tablets_list_.end()) { + LOG(WARNING) << "table: " << table_name << " [start: " << DebugString(key_start) + << "] not exist"; + SetStatusCode(kTableNotFound, ret_status); + return true; + } + // Tablet& tablet = *it2->second; + // // make sure no other thread ref this tablet + // tablet.mutex_.Lock(); + // tablet.mutex_.Unlock(); + // delete &tablet; + MasterEnv().GetTabletAvailability()->EraseNotReadyTablet(it2->second->GetPath()); + table.tablets_list_.erase(it2); + + if (table.tablets_list_.empty()) { + // clean up specific table dir in file system + if (FLAGS_tera_delete_obsolete_tabledir_enabled && + !io::MoveEnvDirToTrash(table.GetTableName())) { + LOG(ERROR) << "fail to move droped table to trash dir, tablename: " << table.GetTableName(); } + // delete &table; + all_tables_.erase(it); + } + return true; } -bool TabletManager::ClearMetaTable(const std::string& meta_tablet_addr, - StatusCode* ret_status) { - WriteTabletRequest write_request; - WriteTabletResponse write_response; - - ScanTabletRequest scan_request; - ScanTabletResponse scan_response; - scan_request.set_sequence_id(this_sequence_id_->Inc()); - scan_request.set_table_name(FLAGS_tera_master_meta_table_name); - scan_request.set_start(""); +void TabletManager::WriteToStream(std::ofstream& ofs, const std::string& key, + const std::string& value) { + uint32_t key_size = key.size(); + uint32_t value_size = value.size(); + ofs.write((char*)&key_size, sizeof(key_size)); + ofs.write(key.data(), key_size); + ofs.write((char*)&value_size, sizeof(value_size)); + ofs.write(value.data(), value_size); +} + +bool TabletManager::DumpMetaTableToFile(const std::string& filename, StatusCode* status) { + std::ofstream ofs(filename.c_str(), std::ofstream::binary | std::ofstream::trunc); + if (!ofs.is_open()) { + LOG(WARNING) << "fail to open file " << filename << " for write"; + SetStatusCode(kIOError, status); + return false; + } + + // get all table and tablet meta + std::vector table_list; + std::vector tablet_list; + ShowTable(&table_list, &tablet_list); + + // dump table meta + for (size_t i = 0; i < table_list.size(); i++) { + TablePtr table = table_list[i]; + std::string key, value; + table->ToMetaTableKeyValue(&key, &value); + WriteToStream(ofs, key, value); + } + + // dump tablet meta + for (size_t i = 0; i < tablet_list.size(); i++) { + TabletPtr tablet = tablet_list[i]; + std::string key, value; + tablet->ToMetaTableKeyValue(&key, &value); + WriteToStream(ofs, key, value); + } + + if (ofs.fail()) { + LOG(WARNING) << "fail to write to file " << filename; + SetStatusCode(kIOError, status); + return false; + } + ofs.close(); + return true; +} + +void TabletManager::LoadTableMeta(const std::string& key, const std::string& value) { + TableMeta meta; + ParseMetaTableKeyValue(key, value, &meta); + TablePtr table = CreateTable(meta); + StatusCode ret_status = kTabletNodeOk; + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + LOG(INFO) << "ignore meta table record in meta table"; + } else if (!AddTable(table, &ret_status)) { + LOG(ERROR) << "duplicate table in meta table: table=" << meta.table_name(); + // TODO: try correct invalid record + } else { + VLOG(5) << "load table record: " << table; + } +} + +void TabletManager::LoadTabletMeta(const std::string& key, const std::string& value) { + TabletMeta meta; + ParseMetaTableKeyValue(key, value, &meta); + meta.set_status(TabletMeta::kTabletOffline); + StatusCode ret_status = kTabletNodeOk; + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + LOG(INFO) << "ignore meta tablet record in meta table"; + } else { + TablePtr table; + if (!FindTable(meta.table_name(), &table)) { + LOG(WARNING) << "table schema not exist, skip this tablet: " << meta.path(); + return; + } + TabletPtr tablet = table->AddTablet(meta, &ret_status); + if (!tablet) { + LOG(ERROR) << "duplicate tablet in meta table: table=" << meta.table_name() + << " start=" << DebugString(meta.key_range().key_start()); + // TODO: try correct invalid record + } + } +} + +bool TabletManager::ClearMetaTable(const std::string& meta_tablet_addr, StatusCode* ret_status) { + WriteTabletRequest write_request; + WriteTabletResponse write_response; + + ScanTabletRequest scan_request; + ScanTabletResponse scan_response; + scan_request.set_sequence_id(this_sequence_id_->Inc()); + scan_request.set_table_name(FLAGS_tera_master_meta_table_name); + scan_request.set_start(""); + scan_request.set_end(""); + + tabletnode::TabletNodeClient meta_node_client(thread_pool_, meta_tablet_addr); + + bool scan_success = false; + while (meta_node_client.ScanTablet(&scan_request, &scan_response)) { + if (scan_response.status() != kTabletNodeOk) { + SetStatusCode(scan_response.status(), ret_status); + LOG(WARNING) << "fail to scan meta table: " << StatusCodeToString(scan_response.status()); + return false; + } + if (scan_response.results().key_values_size() <= 0) { + LOG(INFO) << "scan meta table success"; + scan_success = true; + break; + } + uint32_t record_size = scan_response.results().key_values_size(); + std::string last_record_key; + for (uint32_t i = 0; i < record_size; i++) { + const KeyValuePair& record = scan_response.results().key_values(i); + last_record_key = record.key(); + RowMutationSequence* mu_seq = write_request.add_row_list(); + mu_seq->set_row_key(record.key()); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kDeleteRow); + } + std::string next_record_key = NextKey(last_record_key); + scan_request.set_start(next_record_key); scan_request.set_end(""); + scan_request.set_sequence_id(this_sequence_id_->Inc()); + scan_response.Clear(); + } - tabletnode::TabletNodeClient meta_node_client(thread_pool_, meta_tablet_addr); + if (!scan_success) { + SetStatusCode(kRPCError, ret_status); + LOG(WARNING) << "fail to scan meta table: " << StatusCodeToString(kRPCError); + return false; + } - bool scan_success = false; - while (meta_node_client.ScanTablet(&scan_request, &scan_response)) { - if (scan_response.status() != kTabletNodeOk) { - SetStatusCode(scan_response.status(), ret_status); - LOG(WARNING) << "fail to scan meta table: " - << StatusCodeToString(scan_response.status()); - return false; + write_request.set_sequence_id(this_sequence_id_->Inc()); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + if (!meta_node_client.WriteTablet(&write_request, &write_response)) { + SetStatusCode(kRPCError, ret_status); + LOG(WARNING) << "fail to clear meta tablet: " << StatusCodeToString(kRPCError); + return false; + } + StatusCode status = write_response.status(); + if (status == kTabletNodeOk && write_response.row_status_list_size() > 0) { + status = write_response.row_status_list(0); + } + if (status != kTabletNodeOk) { + SetStatusCode(status, ret_status); + LOG(WARNING) << "fail to clear meta tablet: " << StatusCodeToString(status); + return false; + } + + LOG(INFO) << "clear meta tablet"; + return true; +} + +bool TabletManager::DumpMetaTable(const std::string& meta_tablet_addr, StatusCode* ret_status) { + std::vector tables; + std::vector tablets; + ShowTable(&tables, &tablets); + + WriteTabletRequest request; + WriteTabletResponse response; + request.set_sequence_id(this_sequence_id_->Inc()); + request.set_tablet_name(FLAGS_tera_master_meta_table_name); + request.set_is_sync(true); + request.set_is_instant(true); + // dump table record + for (size_t i = 0; i < tables.size(); i++) { + std::string packed_key; + std::string packed_value; + tables[i]->ToMetaTableKeyValue(&packed_key, &packed_value); + RowMutationSequence* mu_seq = request.add_row_list(); + mu_seq->set_row_key(packed_key); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value(packed_value); + } + // dump tablet record + uint64_t request_size = 0; + for (size_t i = 0; i < tablets.size(); i++) { + std::string packed_key; + std::string packed_value; + if (tablets[i]->GetPath().empty()) { + std::string path = leveldb::GetTabletPathFromNum(tablets[i]->GetTableName(), + tablets[i]->GetTable()->GetNextTabletNo()); + tablets[i]->meta_.set_path(path); + } + tablets[i]->ToMetaTableKeyValue(&packed_key, &packed_value); + RowMutationSequence* mu_seq = request.add_row_list(); + mu_seq->set_row_key(packed_key); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value(packed_value); + request_size += mu_seq->ByteSize(); + + if (i == tablets.size() - 1 || request_size >= kMaxMetaWriteSize) { + tabletnode::TabletNodeClient meta_node_client(thread_pool_, meta_tablet_addr); + int32_t retry_times = 0; + int32_t wait_time_ms = 10; + for (; retry_times < FLAGS_tera_master_write_meta_retry_times; ++retry_times) { + if (!meta_node_client.WriteTablet(&request, &response)) { + SetStatusCode(kRPCError, ret_status); + LOG(WARNING) << "fail to dump meta tablet: " << StatusCodeToString(kRPCError); + ThisThread::Sleep(wait_time_ms); + wait_time_ms = std::min(wait_time_ms << 1, 10000); + continue; } - if (scan_response.results().key_values_size() <= 0) { - LOG(INFO) << "scan meta table success"; - scan_success = true; - break; + StatusCode status = response.status(); + if (status == kTabletNodeOk && response.row_status_list_size() > 0) { + status = response.row_status_list(0); } - uint32_t record_size = scan_response.results().key_values_size(); - std::string last_record_key; - for (uint32_t i = 0; i < record_size; i++) { - const KeyValuePair& record = scan_response.results().key_values(i); - last_record_key = record.key(); - RowMutationSequence* mu_seq = write_request.add_row_list(); - mu_seq->set_row_key(record.key()); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kDeleteRow); + if (status != kTabletNodeOk) { + SetStatusCode(status, ret_status); + LOG(WARNING) << "fail to dump meta tablet: " << StatusCodeToString(status); + ThisThread::Sleep(wait_time_ms); + wait_time_ms = std::min(wait_time_ms << 1, 10000); + continue; } - std::string next_record_key = NextKey(last_record_key); - scan_request.set_start(next_record_key); - scan_request.set_end(""); - scan_request.set_sequence_id(this_sequence_id_->Inc()); - scan_response.Clear(); - } - - if (!scan_success) { - SetStatusCode(kRPCError, ret_status); - LOG(WARNING) << "fail to scan meta table: " - << StatusCodeToString(kRPCError); - return false; - } - - write_request.set_sequence_id(this_sequence_id_->Inc()); - write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); - if (!meta_node_client.WriteTablet(&write_request, &write_response)) { - SetStatusCode(kRPCError, ret_status); - LOG(WARNING) << "fail to clear meta tablet: " - << StatusCodeToString(kRPCError); - return false; - } - StatusCode status = write_response.status(); - if (status == kTabletNodeOk && write_response.row_status_list_size() > 0) { - status = write_response.row_status_list(0); - } - if (status != kTabletNodeOk) { - SetStatusCode(status, ret_status); - LOG(WARNING) << "fail to clear meta tablet: " - << StatusCodeToString(status); + break; + } + if (retry_times == FLAGS_tera_master_write_meta_retry_times) { return false; + } + request.clear_row_list(); + response.Clear(); + request_size = 0; } + } - LOG(INFO) << "clear meta tablet"; - return true; -} - -bool TabletManager::DumpMetaTable(const std::string& meta_tablet_addr, - StatusCode* ret_status) { - std::vector tables; - std::vector tablets; - ShowTable(&tables, &tablets); - - WriteTabletRequest request; - WriteTabletResponse response; - request.set_sequence_id(this_sequence_id_->Inc()); - request.set_tablet_name(FLAGS_tera_master_meta_table_name); - request.set_is_sync(true); - request.set_is_instant(true); - // dump table record - for (size_t i = 0; i < tables.size(); i++) { - std::string packed_key; - std::string packed_value; - tables[i]->ToMetaTableKeyValue(&packed_key, &packed_value); - RowMutationSequence* mu_seq = request.add_row_list(); - mu_seq->set_row_key(packed_key); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(packed_value); - } - // dump tablet record - uint64_t request_size = 0; - for (size_t i = 0; i < tablets.size(); i++) { - std::string packed_key; - std::string packed_value; - if (tablets[i]->GetPath().empty()) { - std::string path = leveldb::GetTabletPathFromNum(tablets[i]->GetTableName(), - tablets[i]->GetTable()->GetNextTabletNo()); - tablets[i]->meta_.set_path(path); - } - tablets[i]->ToMetaTableKeyValue(&packed_key, &packed_value); - RowMutationSequence* mu_seq = request.add_row_list(); - mu_seq->set_row_key(packed_key); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(packed_value); - request_size += mu_seq->ByteSize(); - - if (i == tablets.size() - 1 || request_size >= kMaxRpcSize) { - tabletnode::TabletNodeClient meta_node_client(thread_pool_, meta_tablet_addr); - if (!meta_node_client.WriteTablet(&request, &response)) { - SetStatusCode(kRPCError, ret_status); - LOG(WARNING) << "fail to dump meta tablet: " - << StatusCodeToString(kRPCError); - return false; - } - StatusCode status = response.status(); - if (status == kTabletNodeOk && response.row_status_list_size() > 0) { - status = response.row_status_list(0); - } - if (status != kTabletNodeOk) { - SetStatusCode(status, ret_status); - LOG(WARNING) << "fail to dump meta tablet: " - << StatusCodeToString(status); - return false; - } - request.clear_row_list(); - response.Clear(); - request_size = 0; - } - } - - LOG(INFO) << "dump meta tablet"; - return true; + LOG(INFO) << "dump meta tablet"; + return true; } void TabletManager::ClearTableList() { - MutexLock lock(&mutex_); - TableList::iterator it = all_tables_.begin(); - for (; it != all_tables_.end(); ++it) { - Table& table = *it->second; - table.mutex_.Lock(); - table.mutex_.Unlock(); - table.tablets_list_.clear(); - //delete &table; - } - all_tables_.clear(); -} - -void TabletManager::PackTabletMeta(TabletMeta* meta, - const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - const std::string& path, - const std::string& server_addr, - const TabletMeta::TabletStatus& table_status, - int64_t data_size) { - meta->set_table_name(table_name); - meta->set_path(path); - meta->set_server_addr(server_addr); - meta->set_status(table_status); - meta->set_size(data_size); - - KeyRange* key_range = meta->mutable_key_range(); - key_range->set_key_start(key_start); - key_range->set_key_end(key_end); + MutexLock lock(&mutex_); + TableList::iterator it = all_tables_.begin(); + for (; it != all_tables_.end(); ++it) { + Table& table = *it->second; + table.mutex_.Lock(); + table.mutex_.Unlock(); + table.tablets_list_.clear(); + // delete &table; + } + all_tables_.clear(); +} + +void TabletManager::PackTabletMeta(TabletMeta* meta, const std::string& table_name, + const std::string& key_start, const std::string& key_end, + const std::string& path, const std::string& server_addr, + const TabletMeta::TabletStatus& table_status, int64_t data_size, + uint64_t version) { + meta->set_table_name(table_name); + meta->set_path(path); + meta->set_server_addr(server_addr); + meta->set_status(table_status); + meta->set_size(data_size); + KeyRange* key_range = meta->mutable_key_range(); + key_range->set_key_start(key_start); + key_range->set_key_end(key_end); + meta->set_version(1); } bool TabletManager::GetMetaTabletAddr(std::string* addr) { - if (meta_tablet_ && meta_tablet_->GetStatus() == TabletMeta::kTabletReady) { - *addr = meta_tablet_->GetServerAddr(); - return true; - } - VLOG(5) << "fail to get meta addr"; - return false; + if (meta_tablet_ && meta_tablet_->GetStatus() == TabletMeta::kTabletReady) { + *addr = meta_tablet_->GetServerAddr(); + return true; + } + VLOG(5) << "fail to get meta addr"; + return false; } bool TabletManager::PickMergeTablet(TabletPtr& tablet, TabletPtr* tablet2) { - std::string table_name = tablet->GetTableName(); - TabletNodePtr node = tablet->GetTabletNode(); - if (tablet->IsBusy() || node->NodeDown()) { - LOG(WARNING) << "invalid merge candidate, tablet: " << tablet->GetPath() - << ", isbusy:" << tablet->IsBusy() << ", isdown: " << node->NodeDown(); - return false; - } - mutex_.Lock(); - // search table - TableList::iterator it = all_tables_.find(table_name); - if (it == all_tables_.end()) { - mutex_.Unlock(); - LOG(ERROR) << "[merge] table: " << table_name << " not exist"; - return false; - } - Table& table = *it->second; - MutexLock table_lock(&table.mutex_); + std::string table_name = tablet->GetTableName(); + TabletNodePtr node = tablet->GetTabletNode(); + if (tablet->IsBusy() || node->NodeDown()) { + LOG(WARNING) << "invalid merge candidate, tablet: " << tablet->GetPath() + << ", isbusy:" << tablet->IsBusy() << ", isdown: " << node->NodeDown(); + return false; + } + mutex_.Lock(); + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { mutex_.Unlock(); + LOG(ERROR) << "[merge] table: " << table_name << " not exist"; + return false; + } + Table& table = *it->second; + MutexLock table_lock(&table.mutex_); + mutex_.Unlock(); - if (table.tablets_list_.size() < 2) { - VLOG(20) << "[merge] table: " << table_name << " only have 1 tablet."; - return false; - } - - // search tablet - Table::TabletList::iterator it2 = table.tablets_list_.find(tablet->GetKeyStart()); - if (it2 == table.tablets_list_.end()) { - LOG(ERROR) << "[merge] table: " << table_name << " [start: " - << DebugString(tablet->GetKeyStart()) << "] not exist"; - return false; - } - TabletPtr prev, next, peer; - if (it2 == table.tablets_list_.begin()) { - peer = (++it2)->second; - } - else if (++it2 == table.tablets_list_.end()) { - --it2; - peer = (--it2)->second; - } - else { - next = it2->second; - --it2; - prev = (--it2)->second; - peer = prev->GetDataSize() > next->GetDataSize() ? next : prev; - } + if (table.tablets_list_.size() < 2) { + VLOG(20) << "[merge] table: " << table_name << " only have 1 tablet."; + return false; + } - if (peer->GetDataSize() < 0 || - peer->GetStatus() != TabletMeta::kTabletReady || - peer->IsBusy() || - peer->GetCounter().write_workload() >= FLAGS_tera_master_workload_merge_threshold || - peer->InTransition()) { - LOG(WARNING) << "[merge] no proper peer tablet. peer: " << peer - << " data size: " << peer->GetDataSize() - << " status: " << StatusCodeToString(peer->GetStatus()) - << " isbusy: " << peer->IsBusy() - << " write workload: " << peer->GetCounter().write_workload() - << " in transition: " << peer->InTransition(); - return false; - } - *tablet2 = peer; - return true; + // search tablet + Table::TabletList::iterator it2 = table.tablets_list_.find(tablet->GetKeyStart()); + if (it2 == table.tablets_list_.end()) { + LOG(ERROR) << "[merge] table: " << table_name + << " [start: " << DebugString(tablet->GetKeyStart()) << "] not exist"; + return false; + } + TabletPtr prev, next, peer; + if (it2 == table.tablets_list_.begin()) { + peer = (++it2)->second; + } else if (++it2 == table.tablets_list_.end()) { + --it2; + peer = (--it2)->second; + } else { + next = it2->second; + --it2; + prev = (--it2)->second; + peer = prev->GetDataSize() > next->GetDataSize() ? next : prev; + } + + if (peer->GetDataSize() < 0 || peer->GetStatus() != TabletMeta::kTabletReady || peer->IsBusy() || + peer->GetCounter().write_workload() >= FLAGS_tera_master_workload_merge_threshold || + peer->InTransition()) { + VLOG(13) << "[merge] no proper peer tablet. peer: " << peer + << " data size: " << peer->GetDataSize() + << " status: " << StatusCodeToString(peer->GetStatus()) + << " isbusy: " << peer->IsBusy() + << " write workload: " << peer->GetCounter().write_workload() + << " in transition: " << peer->InTransition(); + return false; + } + *tablet2 = peer; + return true; } double TabletManager::OfflineTabletRatio() { - uint32_t offline_tablet_count = 0, tablet_count = 0; - mutex_.Lock(); - TableList::iterator it = all_tables_.begin(); - for (; it != all_tables_.end(); ++it) { - Table& table = *it->second; - table.mutex_.Lock(); - Table::TabletList::iterator it2 = table.tablets_list_.begin(); - for (; it2 != table.tablets_list_.end(); ++it2) { - TabletPtr tablet = it2->second; - if (tablet->GetStatus() == TabletMeta::kTabletOffline) { - offline_tablet_count++; - } - tablet_count++; - } - table.mutex_.Unlock(); + uint32_t offline_tablet_count = 0, tablet_count = 0; + mutex_.Lock(); + TableList::iterator it = all_tables_.begin(); + for (; it != all_tables_.end(); ++it) { + Table& table = *it->second; + table.mutex_.Lock(); + Table::TabletList::iterator it2 = table.tablets_list_.begin(); + for (; it2 != table.tablets_list_.end(); ++it2) { + TabletPtr tablet = it2->second; + if (tablet->GetStatus() == TabletMeta::kTabletOffline) { + offline_tablet_count++; + } + tablet_count++; } - mutex_.Unlock(); + table.mutex_.Unlock(); + } + mutex_.Unlock(); - if (tablet_count == 0) { - return 0; - } - return (double)offline_tablet_count / tablet_count; + if (tablet_count == 0) { + return 0; + } + return (double)offline_tablet_count / tablet_count; } int64_t CounterWeightedSum(int64_t a1, int64_t a2) { - const int64_t w1 = 2; - const int64_t w2 = 1; - return (a1 * w1 + a2 * w2) / (w1 + w2); + const int64_t w1 = 2; + const int64_t w2 = 1; + return (a1 * w1 + a2 * w2) / (w1 + w2); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/tablet_manager.h b/src/master/tablet_manager.h index f484f1267..f738176a1 100644 --- a/src/master/tablet_manager.h +++ b/src/master/tablet_manager.h @@ -6,6 +6,7 @@ #define TERA_MASTER_TABLET_MANAGER_H_ #include +#include #include #include #include @@ -39,26 +40,24 @@ class MasterZkAdapterBase; typedef std::shared_ptr TabletNodePtr; struct TabletFile { - uint64_t tablet_id; - uint32_t lg_id; - uint64_t file_id; + uint64_t tablet_id; + uint32_t lg_id; + uint64_t file_id; - bool operator <(const TabletFile& f) const { - return tablet_id < f.tablet_id || - (tablet_id == f.tablet_id && + bool operator<(const TabletFile& f) const { + return tablet_id < f.tablet_id || + (tablet_id == f.tablet_id && (lg_id < f.lg_id || (lg_id == f.lg_id && file_id < f.file_id))); - } + } - bool operator ==(const TabletFile& f) const { - return tablet_id == f.tablet_id && - lg_id == f.lg_id && - file_id == f.file_id; - } + bool operator==(const TabletFile& f) const { + return tablet_id == f.tablet_id && lg_id == f.lg_id && file_id == f.file_id; + } }; struct InheritedFileInfo { - uint32_t ref; - InheritedFileInfo() : ref(0) {} + uint32_t ref; + InheritedFileInfo() : ref(0) {} }; class MasterImpl; @@ -66,451 +65,430 @@ class Table; typedef std::shared_ptr
TablePtr; class Tablet { - friend class TabletManager; - friend class Table; - friend std::ostream& operator << (std::ostream& o, const Tablet& tablet); - -public: - Tablet() = delete; - Tablet(const Tablet&) = delete; - Tablet& operator=(const Tablet&) = delete; - explicit Tablet(const TabletMeta& meta); - Tablet(const TabletMeta& meta, TablePtr table); - ~Tablet(); - - void ToMeta(TabletMeta* meta); - const std::string& GetTableName(); - const std::string& GetServerAddr(); - std::string GetServerId(); - const std::string& GetPath(); - int64_t GetDataSize(); - void GetDataSize(int64_t* size, std::vector* lg_size); - int64_t GetQps(); - int64_t GetReadQps(); - int64_t GetWriteQps(); - int64_t GetScanQps(); - - const std::string& GetKeyStart(); - const std::string& GetKeyEnd(); - const KeyRange& GetKeyRange(); - const TableSchema& GetSchema(); - const TabletCounter& GetCounter(); - const TabletCounter& GetAverageCounter(); - TabletMeta::TabletStatus GetStatus(); - CompactStatus GetCompactStatus(); - TablePtr GetTable(); - bool IsBusy(); - std::string DebugString(); - - void UpdateSize(const TabletMeta& meta); - void SetCounter(const TabletCounter& counter); - void SetCompactStatus(CompactStatus compact_status); - void SetStatus(const TabletMeta::TabletStatus status); - - TableStatus GetTableStatus(); - - // is belong to a table? - bool IsBound(); - - bool Verify(const std::string& table_name, const std::string& key_start, - const std::string& key_end, const std::string& path, - const std::string& server_addr, StatusCode* ret_status = NULL); - - void ToMetaTableKeyValue(std::string* packed_key = NULL, - std::string* packed_value = NULL); - bool GetSchemaIsSyncing(); - - int64_t UpdateTime(); - int64_t SetUpdateTime(int64_t timestamp); - int64_t ReadyTime(); - int64_t LastMoveTime() const; - void SetLastMoveTime(int64_t time); - - bool TestAndSetSplitTimeStamp(int64_t ts); - - void AssignTabletNode(TabletNodePtr node); - TabletNodePtr GetTabletNode() {return node_;} - - virtual bool DoStateTransition(const TabletEvent event); - bool DoStateTransitionUnSafe(const TabletEvent event); - - // Will set a flag to ignore lost file error when tabletserver load tablet. - // We should set specific locality_groups that avoid missing some of the - // exceptions in others locality_groups. - bool HasErrorIgnoredLGs() const; - void GetErrorIgnoredLGs(std::vector* lgs); - bool SetErrorIgnoredLGs(const std::string& lg_list_str = ""); - - bool LockTransition() { - MutexLock lock(&mutex_); - if (in_transition_ == true) { - return false; - } - in_transition_ = true; - return true; - } - - void UnlockTransition() { - MutexLock lock(&mutex_); - in_transition_ = false; - } - bool InTransition() { - MutexLock lock(&mutex_); - return in_transition_; + friend class TabletManager; + friend class Table; + friend std::ostream& operator<<(std::ostream& o, const Tablet& tablet); + + public: + Tablet() = delete; + Tablet(const Tablet&) = delete; + Tablet& operator=(const Tablet&) = delete; + Tablet(const TabletMeta& meta, TablePtr table); + virtual ~Tablet(); + + void ToMeta(TabletMeta* meta); + const std::string& GetTableName(); + const std::string& GetServerAddr(); + std::string GetServerId(); + const std::string& GetPath(); + int64_t GetDataSize(); + void GetDataSize(int64_t* size, std::vector* lg_size); + int64_t GetDataSizeOnFlash(); + void SetDataSizeOnFlash(int64_t size); + int64_t GetQps(); + int64_t GetReadQps(); + int64_t GetWriteQps(); + int64_t GetScanQps(); + int64_t GetLRead(); + int64_t CreateTime() const { return create_time_; } + uint64_t IncVersion(); + uint64_t Version(); + + const std::string& GetKeyStart(); + const std::string& GetKeyEnd(); + const KeyRange& GetKeyRange(); + const TableSchema& GetSchema(); + const TabletCounter& GetCounter(); + const TabletCounter& GetAverageCounter(); + TabletMeta::TabletStatus GetStatus(); + CompactStatus GetCompactStatus(); + TablePtr GetTable(); + bool IsBusy(); + std::string DebugString(); + + void UpdateSize(const TabletMeta& meta); + bool HasFlashLg(); + + void SetCounter(const TabletCounter& counter); + void SetCompactStatus(CompactStatus compact_status); + void SetStatus(const TabletMeta::TabletStatus status); + + TableStatus GetTableStatus(); + + bool Verify(const std::string& table_name, const std::string& key_start, + const std::string& key_end, const std::string& path, const std::string& server_addr, + StatusCode* ret_status = NULL); + + void ToMetaTableKeyValue(std::string* packed_key = NULL, std::string* packed_value = NULL); + bool GetSchemaIsSyncing(); + + int64_t UpdateTime(); + int64_t SetUpdateTime(int64_t timestamp); + int64_t ReadyTime(); + int64_t LastMoveTime() const; + void SetLastMoveTime(int64_t time); + + bool TestAndSetSplitTimeStamp(int64_t ts); + + void AssignTabletNode(TabletNodePtr node); + TabletNodePtr GetTabletNode() { return node_; } + + virtual bool DoStateTransition(const TabletEvent event); + bool DoStateTransitionUnSafe(const TabletEvent event); + + // Will set a flag to ignore lost file error when tabletserver load tablet. + // We should set specific locality_groups that avoid missing some of the + // exceptions in others locality_groups. + bool HasErrorIgnoredLGs() const; + void GetErrorIgnoredLGs(std::vector* lgs); + bool SetErrorIgnoredLGs(const std::string& lg_list_str = ""); + + bool LockTransition() { + MutexLock lock(&mutex_); + if (in_transition_ == true) { + return false; } + in_transition_ = true; + return true; + } + + void UnlockTransition() { + MutexLock lock(&mutex_); + in_transition_ = false; + } + bool InTransition() { + MutexLock lock(&mutex_); + return in_transition_; + } + + int LoadFailedCount() { return load_fail_cnt_; } + int IncLoadFailedCount() { return ++load_fail_cnt_; } + void ClearLoadFailedCount() { load_fail_cnt_ = 0; } + + protected: + explicit Tablet(const TabletMeta& meta); // only used by subclass + mutable Mutex mutex_; + TabletMeta meta_; + TabletStateMachine state_machine_; + + private: + TabletNodePtr node_; + TablePtr table_; + int64_t update_time_; + int64_t last_move_time_us_; + int64_t data_size_on_flash_; + std::string server_id_; + + std::vector ignore_err_lgs_; // lg array for ignore_err_ + std::list counter_list_; + TabletCounter average_counter_; + struct TabletAccumulateCounter { + uint64_t low_read_cell; + uint64_t scan_rows; + uint64_t scan_kvs; + uint64_t scan_size; + uint64_t read_rows; + uint64_t read_kvs; + uint64_t read_size; + uint64_t write_rows; + uint64_t write_kvs; + uint64_t write_size; + + TabletAccumulateCounter() { memset(this, 0, sizeof(TabletAccumulateCounter)); } + } accumu_counter_; + void* merge_param_; + + // Tablet Split History Tracing + struct TabletSplitHistory { + int64_t last_split_ts; + + TabletSplitHistory() : last_split_ts(0) {} + } split_history_; + + bool in_transition_ = false; + + // protected by Table::mutex_ + bool gc_reported_; + std::multiset inh_files_; + + // sucessive load failed count, will be cleared on tablet load succeed + std::atomic load_fail_cnt_; + const int64_t create_time_; +}; - int LoadFailedCount() { return load_fail_cnt_; } - int IncLoadFailedCount() { return ++load_fail_cnt_; } - void ClearLoadFailedCount() { load_fail_cnt_ = 0; } - -protected: - mutable Mutex mutex_; - TabletMeta meta_; - TabletStateMachine state_machine_; - -private: - - TabletNodePtr node_; - TablePtr table_; - int64_t update_time_; - int64_t last_move_time_us_; - std::string server_id_; - - std::vector ignore_err_lgs_; // lg array for ignore_err_ - std::list counter_list_; - TabletCounter average_counter_; - struct TabletAccumulateCounter { - uint64_t low_read_cell; - uint64_t scan_rows; - uint64_t scan_kvs; - uint64_t scan_size; - uint64_t read_rows; - uint64_t read_kvs; - uint64_t read_size; - uint64_t write_rows; - uint64_t write_kvs; - uint64_t write_size; - - TabletAccumulateCounter() { - memset(this, 0, sizeof(TabletAccumulateCounter)); - } - } accumu_counter_; - void* merge_param_; - - // Tablet Split History Tracing - struct TabletSplitHistory { - int64_t last_split_ts; - - TabletSplitHistory() - : last_split_ts(0) {} - } split_history_; - - bool in_transition_ = false; - - // protected by Table::mutex_ - bool gc_reported_; - std::multiset inh_files_; - - // sucessive load failed count, will be cleared on tablet load succeed - std::atomic load_fail_cnt_; +class UnknownTablet : public Tablet { + public: + explicit UnknownTablet(const TabletMeta& meta) : Tablet(meta) {} + bool DoStateTransition(const TabletEvent); +}; +class StaleTablet : public UnknownTablet { + public: + explicit StaleTablet(const TabletMeta& meta) : UnknownTablet(meta) { + MutexLock lock(&mutex_); + meta_.set_status(TabletMeta::kTabletReady); + state_machine_.SetStatus(TabletMeta::kTabletReady); + } }; class MetaTablet : public Tablet { -public: - MetaTablet(const TabletMeta& meta, TablePtr table, std::shared_ptr zk_adapter); - // MetaTablet overwrite virtual function DoStateTransition() as it should update - // MetaTablet's address to zk/nexus when it's status switched to kTableReady - bool DoStateTransition(const TabletEvent event); - bool UpdateRootTabletAddr(); - -private: - std::shared_ptr zk_adapter_; - + public: + MetaTablet(const TabletMeta& meta, TablePtr table, + std::shared_ptr zk_adapter); + // MetaTablet overwrite virtual function DoStateTransition() as it should + // update + // MetaTablet's address to zk/nexus when it's status switched to kTableReady + bool DoStateTransition(const TabletEvent event); + bool UpdateRootTabletAddr(); + + private: + std::shared_ptr zk_adapter_; }; typedef class std::shared_ptr TabletPtr; typedef class std::shared_ptr MetaTabletPtr; -std::ostream& operator << (std::ostream& o, const TabletPtr& tablet); -std::ostream& operator << (std::ostream& o, const TablePtr& table); +std::ostream& operator<<(std::ostream& o, const TabletPtr& tablet); +std::ostream& operator<<(std::ostream& o, const TablePtr& table); class Table : public std::enable_shared_from_this
{ - - class TableMetric { - public: - TableMetric(const std::string& name): - table_name_(name), - tablet_num_("tera_master_tablet_num", GetTableNameLabel(), - {SubscriberType::LATEST}, false), - not_ready_("tera_master_tablet_not_ready_num", GetTableNameLabel(), - {SubscriberType::LATEST}, false), - table_size_("tera_master_table_size", GetTableNameLabel(), - {SubscriberType::LATEST}, false), - corrupt_num_("tera_master_table_corrupt", GetTableNameLabel(), - {SubscriberType::LATEST}, false) - {} - - void SetTabletNum(int64_t tablet_num) { - tablet_num_.Set(tablet_num); - } - - void SetNotReady(int64_t not_ready) { - not_ready_.Set(not_ready); - } - - void SetTableSize(int64_t table_size) { - table_size_.Set(table_size); - } - - void SetCorruptNum(int64_t corrupt_num) { - corrupt_num_.Set(corrupt_num); - } - - private: - std::string GetTableNameLabel() { - return "table:" + table_name_; - } - - const std::string table_name_; - tera::MetricCounter tablet_num_; - tera::MetricCounter not_ready_; - tera::MetricCounter table_size_; - tera::MetricCounter corrupt_num_; - }; - - friend class Tablet; - friend class TabletManager; - friend std::ostream& operator << (std::ostream& o, const Table& tablet); -public: - Table(const std::string& table_name, const TableSchema& schema, const TableStatus status); - Table(const std::string& table_name, const TableMeta& meta); - - bool AddTablet(TabletPtr& tablet, StatusCode* ret_code); - - bool FindTablet(const std::string& key_start, TabletPtr* tablet); - void FindTablet(const std::string& server_addr, - std::vector* tablet_meta_list); - void GetTablet(std::vector* tablet_meta_list); - const std::string& GetTableName(); - TableStatus GetStatus(); - - const TableSchema& GetSchema(); - void SetSchema(const TableSchema& schema); - const TableCounter& GetCounter(); - void AddDeleteTabletCount(); - bool NeedDelete(); - void ToMetaTableKeyValue(std::string* packed_key = NULL, - std::string* packed_value = NULL); - void ToMeta(TableMeta* meta); - uint64_t GetNextTabletNo(); - void RefreshCounter(); - int64_t GetTabletsCount(); - bool GetSchemaIsSyncing(); - void SetSchemaIsSyncing(bool flag); - bool GetSchemaSyncLock(); - void ClearSchemaSyncLock(); - void ResetRangeFragment(); - bool AddToRange(const std::string& start, const std::string& end); - bool IsCompleteRange() const; - RangeFragment* GetRangeFragment(); - void UpdateRpcDone(); - void StoreUpdateRpc(UpdateTableResponse* response, google::protobuf::Closure* done); - bool IsSchemaSyncedAtRange(const std::string& start, const std::string& end); - void SetOldSchema(TableSchema* schema); - bool GetOldSchema(TableSchema* schema); - void ClearOldSchema(); - bool PrepareUpdate(const TableSchema& schema); - void AbortUpdate(); - void CommitUpdate(); - - bool TryCollectInheritedFile(); - bool GetTabletsForGc(std::set* live_tablets, - std::set* dead_tablets, - bool ignore_not_ready); - bool CollectInheritedFileFromFilesystem(const std::string& tablename, - uint64_t tablet_num, - std::vector* tablet_files); - void MergeTablets(TabletPtr first_tablet, TabletPtr second_tablet, - const TabletMeta& merged_meta, TabletPtr* merged_tablet); - void SplitTablet(TabletPtr splited_tablet, - const TabletMeta& first_half, const TabletMeta& second_half, - TabletPtr* first_tablet, TabletPtr* second_tablet); - void GarbageCollect(const TabletInheritedFileInfo& tablet_inh_info); - void EnableDeadTabletGarbageCollect(uint64_t tablet_id); - void ReleaseInheritedFile(const TabletFile& file); - void AddInheritedFile(const TabletFile& file, bool need_ref); - void AddEmptyDeadTablet(uint64_t tablet_id); - uint64_t CleanObsoleteFile(); - - bool LockTransition() { - MutexLock lock(&mutex_); - if (in_transition_ == true) { - return false; - } - in_transition_ = true; - return true; + class TableMetric { + public: + TableMetric(const std::string& name) + : table_name_(name), + tablet_num_("tera_master_tablet_num", GetTableNameLabel(), {SubscriberType::LATEST}, + false), + not_ready_("tera_master_tablet_not_ready_num", GetTableNameLabel(), + {SubscriberType::LATEST}, false), + table_size_("tera_master_table_size", GetTableNameLabel(), {SubscriberType::LATEST}, + false), + corrupt_num_("tera_master_table_corrupt", GetTableNameLabel(), {SubscriberType::LATEST}, + false) {} + + void SetTabletNum(int64_t tablet_num) { tablet_num_.Set(tablet_num); } + + void SetNotReady(int64_t not_ready) { not_ready_.Set(not_ready); } + + void SetTableSize(int64_t table_size) { table_size_.Set(table_size); } + + void SetCorruptNum(int64_t corrupt_num) { corrupt_num_.Set(corrupt_num); } + + private: + std::string GetTableNameLabel() { return "table:" + table_name_; } + + const std::string table_name_; + tera::MetricCounter tablet_num_; + tera::MetricCounter not_ready_; + tera::MetricCounter table_size_; + tera::MetricCounter corrupt_num_; + }; + + friend class Tablet; + friend class TabletManager; + friend std::ostream& operator<<(std::ostream& o, const Table& tablet); + + public: + Table(const std::string& table_name, const TableSchema& schema, const TableStatus status); + Table(const std::string& table_name, const TableMeta& meta); + + TabletPtr AddTablet(const TabletMeta& meta, StatusCode* ret_code); + + bool FindTablet(const std::string& key_start, TabletPtr* tablet); + void FindTablet(const std::string& server_addr, std::vector* tablet_meta_list); + + bool FindOverlappedTablets(const std::string& key_start, const std::string& key_end, + std::vector* tablets); + + void GetTablet(std::vector* tablet_meta_list); + const std::string& GetTableName(); + TableStatus GetStatus(); + + const TableSchema& GetSchema(); + void SetSchema(const TableSchema& schema); + const TableCounter& GetCounter(); + void AddDeleteTabletCount(); + bool NeedDelete(); + void ToMetaTableKeyValue(std::string* packed_key = NULL, std::string* packed_value = NULL); + void ToMeta(TableMeta* meta); + uint64_t GetNextTabletNo(); + void RefreshCounter(); + int64_t GetTabletsCount(); + void GetTsAddrTabletsCount(std::map* ts_addr_tablets_count); + + bool GetSchemaIsSyncing(); + void SetSchemaIsSyncing(bool flag); + bool GetSchemaSyncLock(); + void ClearSchemaSyncLock(); + void SetOldSchema(TableSchema* schema); + bool GetOldSchema(TableSchema* schema); + void ClearOldSchema(); + bool PrepareUpdate(const TableSchema& schema); + void AbortUpdate(); + void CommitUpdate(); + + bool TryCollectInheritedFile(); + bool GetTabletsForGc(std::set* live_tablets, std::set* dead_tablets, + bool ignore_not_ready); + bool CollectInheritedFileFromFilesystem(const std::string& tablename, uint64_t tablet_num, + std::vector* tablet_files); + void MergeTablets(TabletPtr first_tablet, TabletPtr second_tablet, const TabletMeta& merged_meta, + TabletPtr* merged_tablet); + void SplitTablet(TabletPtr splited_tablet, const TabletMeta& first_half, + const TabletMeta& second_half, TabletPtr* first_tablet, + TabletPtr* second_tablet); + void GarbageCollect(const TabletInheritedFileInfo& tablet_inh_info); + void EnableDeadTabletGarbageCollect(uint64_t tablet_id); + void ReleaseInheritedFile(const TabletFile& file); + void AddInheritedFile(const TabletFile& file, bool need_ref); + void AddEmptyDeadTablet(uint64_t tablet_id); + uint64_t CleanObsoleteFile(); + + bool LockTransition() { + MutexLock lock(&mutex_); + if (in_transition_ == true) { + return false; } - - void UnlockTransition() { - MutexLock lock(&mutex_); - in_transition_ = false; - } - bool InTransition() { - MutexLock lock(&mutex_); - return in_transition_; - } - - bool DoStateTransition(const TableEvent event); - -private: - Table(const Table&) = delete; - Table& operator=(const Table&) = delete; - typedef std::map TabletList; - TabletList tablets_list_; - mutable Mutex mutex_; - std::string name_; - TableSchema schema_; - std::vector snapshot_list_; - std::vector rollback_names_; - uint32_t deleted_tablet_num_; - uint64_t max_tablet_no_; - int64_t create_time_; - TableCounter counter_; - TableMetric metric_; - bool schema_is_syncing_; // is schema syncing to all ts(all tablets) - RangeFragment* rangefragment_; - UpdateTableResponse* update_rpc_response_; - google::protobuf::Closure* update_rpc_done_; - TableSchema* old_schema_; - - // map from dead tablet's ID to its inherited files set - typedef std::map > InheritedFiles; - InheritedFiles useful_inh_files_; - std::queue obsolete_inh_files_; - // If there is any live tablet hasn't reported since a tablet died, - // this dead tablet cannot GC. - std::set gc_disabled_dead_tablets_; - uint32_t reported_live_tablets_num_; // realtime live tablets num, which already reported - - TableStateMachine state_machine_; - bool in_transition_ = false; + in_transition_ = true; + return true; + } + + void UnlockTransition() { + MutexLock lock(&mutex_); + + in_transition_ = false; + } + bool InTransition() { + MutexLock lock(&mutex_); + return in_transition_; + } + + bool DoStateTransition(const TableEvent event); + + int64_t CreateTime() const { return create_time_; } + + private: + Table(const Table&) = delete; + Table& operator=(const Table&) = delete; + typedef std::map TabletList; + TabletList tablets_list_; + mutable Mutex mutex_; + std::string name_; + TableSchema schema_; + std::vector snapshot_list_; + std::vector rollback_names_; + uint32_t deleted_tablet_num_; + uint64_t max_tablet_no_; + const int64_t create_time_; + TableCounter counter_; + TableMetric metric_; + bool schema_is_syncing_; // is schema syncing to all ts(all tablets) + TableSchema* old_schema_; + + // map from dead tablet's ID to its inherited files set + typedef std::map > InheritedFiles; + InheritedFiles useful_inh_files_; + std::queue obsolete_inh_files_; + // If there is any live tablet hasn't reported since a tablet died, + // this dead tablet cannot GC. + std::set gc_disabled_dead_tablets_; + uint32_t reported_live_tablets_num_; // realtime live tablets num, which + // already reported + + TableStateMachine state_machine_; + bool in_transition_ = false; }; class TabletManager { -public: - static TablePtr CreateTable(const TableMeta& meta); - - static TablePtr CreateTable(const std::string& table_name, const TableSchema& schema, const TableStatus& status); + public: + static TablePtr CreateTable(const TableMeta& meta); - static TabletPtr CreateTablet(const TabletMeta& meta); + static TablePtr CreateTable(const std::string& table_name, const TableSchema& schema, + const TableStatus& status); - static TabletPtr CreateTablet(TablePtr table, const TabletMeta& meta); - static void PackTabletMeta(TabletMeta* meta, const std::string& table_name, - const std::string& key_start = "", - const std::string& key_end = "", - const std::string& path = "", - const std::string& server_addr = "", - const TabletMeta::TabletStatus& table_status = TabletMeta::kTabletOffline, - int64_t data_size = 0); + static void PackTabletMeta( + TabletMeta* meta, const std::string& table_name, const std::string& key_start = "", + const std::string& key_end = "", const std::string& path = "", + const std::string& server_addr = "", + const TabletMeta::TabletStatus& table_status = TabletMeta::kTabletOffline, + int64_t data_size = 0, uint64_t version = 1); + typedef std::function FindCondCallback; - typedef std::function FindCondCallback; + TabletManager(Counter* sequence_id, MasterImpl* master_impl, ThreadPool* thread_pool); + ~TabletManager(); - TabletManager(Counter* sequence_id, MasterImpl* master_impl, ThreadPool* thread_pool); - ~TabletManager(); + void Init(); + void Stop(); - void Init(); - void Stop(); + bool DumpMetaTable(const std::string& addr, StatusCode* ret_status = NULL); + bool ClearMetaTable(const std::string& addr, StatusCode* ret_status = NULL); - bool DumpMetaTable(const std::string& addr, StatusCode* ret_status = NULL); - bool ClearMetaTable(const std::string& addr, StatusCode* ret_status = NULL); + bool DumpMetaTableToFile(const std::string& filename, StatusCode* ret_status = NULL); - bool DumpMetaTableToFile(const std::string& filename, - StatusCode* ret_status = NULL); + bool AddTable(TablePtr& table, StatusCode* ret_status); - bool AddTable(TablePtr& table, StatusCode* ret_status); + MetaTabletPtr AddMetaTablet(TabletNodePtr node, std::shared_ptr zk_adapter); - MetaTabletPtr AddMetaTablet(TabletNodePtr node, std::shared_ptr zk_adapter); + bool DeleteTable(const std::string& table_name, StatusCode* ret_status = NULL); - bool DeleteTable(const std::string& table_name, - StatusCode* ret_status = NULL); - - bool DeleteTablet(const std::string& table_name, - const std::string& key_start, - StatusCode* ret_status = NULL); - - bool FindTablet(const std::string& table_name, - const std::string& key_start, TabletPtr* tablet, + bool DeleteTablet(const std::string& table_name, const std::string& key_start, StatusCode* ret_status = NULL); - void FindTablet(const std::string& server_addr, - std::vector* tablet_meta_list, - bool need_disabled_tables); - - bool FindOverlappedTablets(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - std::vector* tablets, - StatusCode* ret_status = NULL); + bool FindTablet(const std::string& table_name, const std::string& key_start, TabletPtr* tablet, + StatusCode* ret_status = NULL); - bool FindTable(const std::string& table_name, - std::vector* tablet_meta_list, - StatusCode* ret_status = NULL); + void FindTablet(const std::string& server_addr, std::vector* tablet_meta_list, + bool need_disabled_tables); - bool SearchTablet(const std::string& table_name, - const std::string& key, - TabletPtr* tablet, - StatusCode* ret_status); + bool FindTable(const std::string& table_name, std::vector* tablet_meta_list, + StatusCode* ret_status = NULL); - bool FindTable(const std::string& table_name, TablePtr* tablet); + bool SearchTablet(const std::string& table_name, const std::string& key, TabletPtr* tablet, + StatusCode* ret_status); - int64_t SearchTable(std::vector* tablet_meta_list, - const std::string& prefix_table_name, - const std::string& start_table_name = "", - const std::string& start_tablet_key = "", - uint32_t max_found = std::numeric_limits::max(), - StatusCode* ret_status = NULL); + bool FindTable(const std::string& table_name, TablePtr* table); - bool ShowTable(std::vector* table_meta_list, - std::vector* tablet_meta_list, + int64_t SearchTable(std::vector* tablet_meta_list, + const std::string& prefix_table_name, const std::string& start_table_name = "", const std::string& start_tablet_key = "", - uint32_t max_table_found = std::numeric_limits::max(), - uint32_t max_tablet_found = std::numeric_limits::max(), - bool* is_more = NULL, + uint32_t max_found = std::numeric_limits::max(), StatusCode* ret_status = NULL); - bool GetMetaTabletAddr(std::string* addr); + bool ShowTable(std::vector* table_meta_list, std::vector* tablet_meta_list, + const std::string& start_table_name = "", const std::string& start_tablet_key = "", + uint32_t max_table_found = std::numeric_limits::max(), + uint32_t max_tablet_found = std::numeric_limits::max(), + bool* is_more = NULL, StatusCode* ret_status = NULL); - void ClearTableList(); + bool GetMetaTabletAddr(std::string* addr); - double OfflineTabletRatio(); + void ClearTableList(); - bool PickMergeTablet(TabletPtr& tablet, TabletPtr* tablet2); + double OfflineTabletRatio(); - void LoadTableMeta(const std::string& key, const std::string& value); - void LoadTabletMeta(const std::string& key, const std::string& value); + bool PickMergeTablet(TabletPtr& tablet, TabletPtr* tablet2); - int64_t GetAllTabletsCount(); + void LoadTableMeta(const std::string& key, const std::string& value); + void LoadTabletMeta(const std::string& key, const std::string& value); -private: + int64_t GetAllTabletsCount(); - void WriteToStream(std::ofstream& ofs, const std::string& key, - const std::string& value); + private: + void WriteToStream(std::ofstream& ofs, const std::string& key, const std::string& value); -private: - typedef std::map TableList; - TableList all_tables_; - mutable Mutex mutex_; - Counter* this_sequence_id_; - MasterImpl* master_impl_; - MetaTabletPtr meta_tablet_; - ThreadPool* thread_pool_; + private: + typedef std::map TableList; + TableList all_tables_; + mutable Mutex mutex_; + Counter* this_sequence_id_; + MasterImpl* master_impl_; + MetaTabletPtr meta_tablet_; + ThreadPool* thread_pool_; }; int64_t CounterWeightedSum(int64_t a1, int64_t a2); -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_TABLET_MANAGER_H_ +#endif // TERA_MASTER_TABLET_MANAGER_H_ diff --git a/src/master/tablet_state_machine.cc b/src/master/tablet_state_machine.cc index 54e7b1353..a120ce801 100644 --- a/src/master/tablet_state_machine.cc +++ b/src/master/tablet_state_machine.cc @@ -6,100 +6,113 @@ #include "master/tablet_manager.h" #include "tablet_state_machine.h" -namespace tera{ -namespace master{ +namespace tera { +namespace master { static TabletStateMachine::TabletStateTransitionRulesType s_tablet_transition_rules; const TabletStateMachine::TabletStateTransitionRulesType TabletStateMachine::state_transitions_( - std::move( - s_tablet_transition_rules - // state transition rules from kTableOffLine - .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kLoadTablet, TabletMeta::kTabletLoading) - .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTsDelayOffline, TabletMeta::kTabletDelayOffline) - .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTabletLoadFail, TabletMeta::kTabletLoadFail) - .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTableDisable, TabletMeta::kTabletDisable) - // state transition rules from kTabletPending - .AddTransitionRule(TabletMeta::kTabletDelayOffline, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - .AddTransitionRule(TabletMeta::kTabletDelayOffline, TabletEvent::kTsRestart, TabletMeta::kTabletOffline) - // state transition rules from kTableOnLoad - .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsLoadSucc, TabletMeta::kTabletReady) - .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsLoadFail, TabletMeta::kTabletLoadFail) - .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsDelayOffline, TabletMeta::kTabletDelayOffline) - .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsRestart, TabletMeta::kTabletOffline) - // state transition rules from kTableUnLoading - .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadSucc, TabletMeta::kTabletOffline) - .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadFail, TabletMeta::kTabletUnloadFail) - .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - // state transition rules from kTableReady - .AddTransitionRule(TabletMeta::kTabletReady, TabletEvent::kUnLoadTablet, TabletMeta::kTabletUnloading) - .AddTransitionRule(TabletMeta::kTabletReady, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - // state transition rules from kTableUnLoadFail - .AddTransitionRule(TabletMeta::kTabletUnloadFail, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - // state transition rules from kTableLoadFail - .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kLoadTablet, TabletMeta::kTabletLoading) - .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kTsOffline, TabletMeta::kTabletOffline) - .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kTableDisable, TabletMeta::kTabletDisable) - // state transition rules from kTabletDisable - .AddTransitionRule(TabletMeta::kTabletDisable, TabletEvent::kTableEnable, TabletMeta::kTabletOffline) - )); + std::move(s_tablet_transition_rules + // state transition rules from kTableOffLine + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kLoadTablet, + TabletMeta::kTabletLoading) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTsDelayOffline, + TabletMeta::kTabletDelayOffline) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTabletLoadFail, + TabletMeta::kTabletLoadFail) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kTableDisable, + TabletMeta::kTabletDisable) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kFinishSplitTablet, + TabletMeta::kTabletSplitted) + .AddTransitionRule(TabletMeta::kTabletOffline, TabletEvent::kFinishMergeTablet, + TabletMeta::kTabletMerged) + // state transition rules from kTabletPending + .AddTransitionRule(TabletMeta::kTabletDelayOffline, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletDelayOffline, TabletEvent::kTsRestart, + TabletMeta::kTabletOffline) + // state transition rules from kTableOnLoad + .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsLoadSucc, + TabletMeta::kTabletReady) + .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsLoadFail, + TabletMeta::kTabletLoadFail) + .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsDelayOffline, + TabletMeta::kTabletDelayOffline) + .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletLoading, TabletEvent::kTsRestart, + TabletMeta::kTabletOffline) + // state transition rules from kTableUnloading + .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadSucc, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadFail, + TabletMeta::kTabletUnloadFail) + .AddTransitionRule(TabletMeta::kTabletUnloading, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + // state transition rules from kTableReady + .AddTransitionRule(TabletMeta::kTabletReady, TabletEvent::kUnLoadTablet, + TabletMeta::kTabletUnloading) + .AddTransitionRule(TabletMeta::kTabletReady, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletReady, TabletEvent::kTsDelayOffline, + TabletMeta::kTabletDelayOffline) + // state transition rules from kTableUnLoadFail + .AddTransitionRule(TabletMeta::kTabletUnloadFail, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + // state transition rules from kTableLoadFail + .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kLoadTablet, + TabletMeta::kTabletLoading) + .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kTsOffline, + TabletMeta::kTabletOffline) + .AddTransitionRule(TabletMeta::kTabletLoadFail, TabletEvent::kTableDisable, + TabletMeta::kTabletDisable) + // state transition rules from kTabletDisable + .AddTransitionRule(TabletMeta::kTabletDisable, TabletEvent::kTableEnable, + TabletMeta::kTabletOffline))); -TabletStateMachine::TabletStateMachine(const TabletMeta::TabletStatus init_state) : - curr_state_(init_state), ready_time_(std::numeric_limits::max()) { -} +TabletStateMachine::TabletStateMachine(const TabletMeta::TabletStatus init_state) + : curr_state_(init_state), ready_time_(std::numeric_limits::max()) {} bool TabletStateMachine::DoStateTransition(const TabletEvent& event) { - TabletMeta::TabletStatus post_state; - if (!state_transitions_.DoStateTransition(curr_state_, event, &post_state)) { - return false; - } - curr_state_ = post_state; - ready_time_ = (curr_state_ == TabletMeta::kTabletReady ? - get_micros() : std::numeric_limits::max()); - return true; + TabletMeta::TabletStatus post_state; + if (!state_transitions_.DoStateTransition(curr_state_, event, &post_state)) { + return false; + } + curr_state_ = post_state; + ready_time_ = (curr_state_ == TabletMeta::kTabletReady ? get_micros() + : std::numeric_limits::max()); + return true; } void TabletStateMachine::SetStatus(const TabletMeta::TabletStatus state) { - curr_state_ = state; - ready_time_ = (curr_state_ == TabletMeta::kTabletReady ? - get_micros() : std::numeric_limits::max()); + curr_state_ = state; + ready_time_ = (curr_state_ == TabletMeta::kTabletReady ? get_micros() + : std::numeric_limits::max()); } -TabletMeta::TabletStatus TabletStateMachine::GetStatus() const { - return curr_state_; -} +TabletMeta::TabletStatus TabletStateMachine::GetStatus() const { return curr_state_; } -int64_t TabletStateMachine::ReadyTime() { - return ready_time_; -} +int64_t TabletStateMachine::ReadyTime() { return ready_time_; } std::ostream& operator<<(std::ostream& o, const TabletEvent& event) { - static const char* msg[] = {"TabletEvent::kLoadTablet", - "TabletEvent::kUnLoadTablet", - "TabletEvent::kUpdateMeta", - "TabletEvent::kTsLoadSucc", - "TabletEvent::kTsLoadFail", - "TabletEvent::kTsLoadBusy", - "TabletEvent::kTsUnloadBusy", - "TabletEvent::kTsUnLoadSucc", - "TabletEvent::kTsUnLoadFail", - "TabletEvent::kTsOffline", - "TabletEvent::kTsRestart", - "TabletEvent::kWaitRpcResponse", - "TabletEvent::kTabletLoadFail", - "TabletEvent::kTsDelayOffline", - "TabletEvent::kTableDisable", - "TabletEvent::kTableEnable", - "TabletEvent::kEofEvent", - "TabletEvent::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(event) - static_cast(TabletEvent::kLoadTablet); - index = index < msg_size ? index : (msg_size - 1); - o << msg[index]; - return o; + static const char* msg[] = {"TabletEvent::kLoadTablet", "TabletEvent::kUnLoadTablet", + "TabletEvent::kFinishSplitTablet", "TabletEvent::kFinishMergeTablet", + "TabletEvent::kUpdateMeta", "TabletEvent::kTsLoadSucc", + "TabletEvent::kTsLoadFail", "TabletEvent::kTsLoadBusy", + "TabletEvent::kTsUnloadBusy", "TabletEvent::kTsUnLoadSucc", + "TabletEvent::kTsUnLoadFail", "TabletEvent::kTsOffline", + "TabletEvent::kTsRestart", "TabletEvent::kWaitRpcResponse", + "TabletEvent::kTabletLoadFail", "TabletEvent::kTsDelayOffline", + "TabletEvent::kTableDisable", "TabletEvent::kTableEnable", + "TabletEvent::kEofEvent", "TabletEvent::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = static_cast(event) - static_cast(TabletEvent::kLoadTablet); + index = index < msg_size ? index : (msg_size - 1); + o << msg[index]; + return o; } - } } diff --git a/src/master/tablet_state_machine.h b/src/master/tablet_state_machine.h index 1d0990f4f..d301f61e9 100644 --- a/src/master/tablet_state_machine.h +++ b/src/master/tablet_state_machine.h @@ -11,57 +11,65 @@ #include "master/state_machine.h" #include "proto/table_meta.pb.h" -namespace tera{ -namespace master{ +namespace tera { +namespace master { enum class TabletEvent { - kLoadTablet, - kUnLoadTablet, - kUpdateMeta, // update tablet info to meta table - kTsLoadSucc, // tabletnode load tablet succ - kTsLoadFail, // tabletnode load tablet fail - kTsLoadBusy, // tabletnode has reached its max load concurrency - kTsUnloadBusy, // tabletnode has reached its max unload cuncurrency - kTsUnLoadSucc, // tabletnode unload tablet succ - kTsUnLoadFail, // tabletnode unload tablet fail - kTsOffline, // tabletnode offline - // tabletnode restarted, notice that we may apperceive TS_RESTART rather than - // TS_OFFLINE because of the zk session timeout configuration - kTsRestart, - kWaitRpcResponse, // dispatch rpc - kTabletLoadFail, // tablet load failed finally exhausting all load attempts - // a special event somewhat equivalent to TS_OFFLINE with consideration for cache locality - kTsDelayOffline, - kTableDisable, // enable table - kTableEnable, // disable table - kEofEvent, // end-marker event, some cleanup work maybe triggered by this event + kLoadTablet, + kUnLoadTablet, + kFinishSplitTablet, + kFinishMergeTablet, + kUpdateMeta, // update tablet info to meta table + kTsLoadSucc, // tabletnode load tablet succ + kTsLoadFail, // tabletnode load tablet fail + kTsLoadBusy, // tabletnode has reached its max load concurrency + kTsUnloadBusy, // tabletnode has reached its max unload cuncurrency + kTsUnLoadSucc, // tabletnode unload tablet succ + kTsUnLoadFail, // tabletnode unload tablet fail + kTsOffline, // tabletnode offline + // tabletnode restarted, notice that we may apperceive TS_RESTART rather than + // TS_OFFLINE because of the zk session timeout configuration + kTsRestart, + kWaitRpcResponse, // dispatch rpc + kTabletLoadFail, // tablet load failed finally exhausting all load attempts + // a special event somewhat equivalent to TS_OFFLINE with consideration for + // cache locality + kTsDelayOffline, + kTableDisable, // enable table + kTableEnable, // disable table + kEofEvent, // end-marker event, some cleanup work maybe triggered by this + // event }; std::ostream& operator<<(std::ostream& o, const TabletEvent& event); class TabletStateMachine { -public: - TabletStateMachine(const TabletMeta::TabletStatus init_state); + public: + TabletStateMachine(const TabletMeta::TabletStatus init_state); - // return true and update curr_state_ to the associated PostState corresponding to - // pair if there is a valid transition rule for pair, - // else return false; - bool DoStateTransition(const TabletEvent& event); + // return true and update curr_state_ to the associated PostState + // corresponding to + // pair if there is a valid transition rule for + // pair, + // else return false; + bool DoStateTransition(const TabletEvent& event); - TabletMeta::TabletStatus GetStatus() const; - // update tablet's current status bypass TabletStateTransitionRule - void SetStatus(const TabletMeta::TabletStatus status); - // get tablet's readytime in us, return int64::max() if tablet not in kTableReady status - int64_t ReadyTime(); + TabletMeta::TabletStatus GetStatus() const; + // update tablet's current status bypass TabletStateTransitionRule + void SetStatus(const TabletMeta::TabletStatus status); + // get tablet's readytime in us, return int64::max() if tablet not in + // kTableReady status + int64_t ReadyTime(); - typedef StateTransitionRules TabletStateTransitionRulesType; -private: - TabletMeta::TabletStatus curr_state_; - // timestamp of tablet status changed to kTableReady, in us - int64_t ready_time_; - // TabletStatus transition rules - const static TabletStateTransitionRulesType state_transitions_; -}; + typedef StateTransitionRules + TabletStateTransitionRulesType; + private: + TabletMeta::TabletStatus curr_state_; + // timestamp of tablet status changed to kTableReady, in us + int64_t ready_time_; + // TabletStatus transition rules + const static TabletStateTransitionRulesType state_transitions_; +}; } } diff --git a/src/master/tabletnode_manager.cc b/src/master/tabletnode_manager.cc index 4776a2f7a..92051b251 100644 --- a/src/master/tabletnode_manager.cc +++ b/src/master/tabletnode_manager.cc @@ -15,573 +15,625 @@ DECLARE_int32(tera_master_load_interval); DECLARE_bool(tera_master_meta_isolate_enabled); DECLARE_int32(tera_master_tabletnode_timeout); DECLARE_int32(tera_master_max_unload_concurrency); +DECLARE_bool(tera_master_support_isomerism); namespace tera { namespace master { -void BindTabletToTabletNode(TabletPtr tablet, TabletNodePtr node) { - tablet->AssignTabletNode(node); - node->UpdateSize(tablet); -} +static TabletNode::TSStateTransitionRulesType s_ts_state_transition_rules; -TabletNode::TabletNode() : state_(kOffLine), - report_status_(kTabletNodeIsRunning), data_size_(0), qps_(0), load_(0), - update_time_(0), query_fail_count_(0), onload_count_(0), unloading_count_(0), - onsplit_count_(0), plan_move_in_count_(0) { - info_.set_addr(""); - info_.set_status_m(NodeStateToString(state_)); - info_.set_timestamp(get_micros()); - timestamp_ = get_millis(); - //ref_count_.Inc(); +const TabletNode::TSStateTransitionRulesType TabletNode::state_transitions_(std::move( + s_ts_state_transition_rules.AddTransitionRule(kOffline, NodeEvent::kZkNodeCreated, kReady) + .AddTransitionRule(kReady, NodeEvent::kZkSessionTimeout, kOffline) + .AddTransitionRule(kReady, NodeEvent::kPrepareKickTs, kWaitKick) + .AddTransitionRule(kWaitKick, NodeEvent::kCancelKickTs, kReady) + .AddTransitionRule(kWaitKick, NodeEvent::kZkKickNodeCreated, kKicked) + .AddTransitionRule(kWaitKick, NodeEvent::kZkSessionTimeout, kOffline) + .AddTransitionRule(kKicked, NodeEvent::kZkSessionTimeout, kOffline))); +void BindTabletToTabletNode(TabletPtr tablet, TabletNodePtr node) { + tablet->AssignTabletNode(node); + node->UpdateSize(tablet); +} + +TabletNode::TabletNode() + : state_(kOffline), + report_status_(kTabletNodeIsRunning), + data_size_(0), + qps_(0), + load_(0), + persistent_cache_size_(0), + update_time_(0), + query_fail_count_(0), + onload_count_(0), + unloading_count_(0), + onsplit_count_(0), + plan_move_in_count_(0) { + info_.set_addr(""); + info_.set_status_m(NodeStateToString(state_)); + info_.set_timestamp(get_micros()); + timestamp_ = get_millis(); + // ref_count_.Inc(); } TabletNode::TabletNode(const std::string& addr, const std::string& uuid) - : addr_(addr), uuid_(uuid), state_(kOffLine), - report_status_(kTabletNodeIsRunning), data_size_(0), qps_(0), load_(0), - update_time_(0), query_fail_count_(0), onload_count_(0), unloading_count_(0), - onsplit_count_(0), plan_move_in_count_(0) { - info_.set_addr(addr); - info_.set_status_m(NodeStateToString(state_)); - info_.set_timestamp(get_micros()); - timestamp_ = get_millis(); + : addr_(addr), + uuid_(uuid), + state_(kOffline), + report_status_(kTabletNodeIsRunning), + data_size_(0), + qps_(0), + load_(0), + persistent_cache_size_(0), + update_time_(0), + query_fail_count_(0), + onload_count_(0), + unloading_count_(0), + onsplit_count_(0), + plan_move_in_count_(0) { + info_.set_addr(addr); + info_.set_status_m(NodeStateToString(state_)); + info_.set_timestamp(get_micros()); + timestamp_ = get_millis(); } TabletNode::TabletNode(const TabletNode& t) { - MutexLock lock(&t.mutex_); - addr_ = t.addr_; - state_ = kOffLine; - uuid_ = t.uuid_; - state_ = t.state_; - timestamp_ = t.timestamp_; - report_status_ = t.report_status_; - info_ = t.info_; - data_size_ = t.data_size_; - qps_ = t.qps_; - load_ = t.load_; - update_time_ = t.update_time_; - table_size_ = t.table_size_; - table_qps_ = t.table_qps_; - average_counter_ = t.average_counter_; - accumulate_counter_ = t.accumulate_counter_; - counter_list_ = t.counter_list_; - query_fail_count_ = t.query_fail_count_; - onload_count_ = t.onload_count_; - unloading_count_ = t.unloading_count_; - onsplit_count_ = t.onsplit_count_; - plan_move_in_count_ = t.plan_move_in_count_; - recent_load_time_list_ = t.recent_load_time_list_; -} - -TabletNode::~TabletNode() { - -} + MutexLock lock(&t.mutex_); + addr_ = t.addr_; + // state_ = kOffLine; + uuid_ = t.uuid_; + state_ = t.state_; + timestamp_ = t.timestamp_; + report_status_ = t.report_status_; + info_ = t.info_; + data_size_ = t.data_size_; + qps_ = t.qps_; + load_ = t.load_; + persistent_cache_size_ = t.persistent_cache_size_; + update_time_ = t.update_time_; + table_size_ = t.table_size_; + table_qps_ = t.table_qps_; + average_counter_ = t.average_counter_; + accumulate_counter_ = t.accumulate_counter_; + counter_list_ = t.counter_list_; + query_fail_count_ = t.query_fail_count_; + onload_count_ = t.onload_count_; + unloading_count_ = t.unloading_count_; + onsplit_count_ = t.onsplit_count_; + plan_move_in_count_ = t.plan_move_in_count_; + recent_load_time_list_ = t.recent_load_time_list_; +} + +TabletNode::~TabletNode() {} TabletNodeInfo TabletNode::GetInfo() { - MutexLock lock(&mutex_); - return info_; + MutexLock lock(&mutex_); + return info_; } -const std::string& TabletNode::GetAddr() { - return addr_; -} +const std::string& TabletNode::GetAddr() { return addr_; } -const std::string& TabletNode::GetId() { - return uuid_; -} +const std::string& TabletNode::GetId() { return uuid_; } uint64_t TabletNode::GetSize(const std::string& table_name) { - MutexLock lock(&mutex_); - if (table_name.empty()) { - return data_size_; - } - uint64_t table_size = 0; - std::map::iterator it = table_size_.find(table_name); - if (it != table_size_.end()) { - table_size = it->second; - } - return table_size; + MutexLock lock(&mutex_); + if (table_name.empty()) { + return data_size_; + } + uint64_t table_size = 0; + std::map::iterator it = table_size_.find(table_name); + if (it != table_size_.end()) { + table_size = it->second; + } + return table_size; } uint64_t TabletNode::GetQps(const std::string& table_name) { - MutexLock lock(&mutex_); - if (table_name.empty()) { - return qps_; - } - uint64_t table_qps = 0; - std::map::iterator it = table_qps_.find(table_name); - if (it != table_qps_.end()) { - table_qps = it->second; - } - return table_qps; + MutexLock lock(&mutex_); + if (table_name.empty()) { + return qps_; + } + uint64_t table_qps = 0; + std::map::iterator it = table_qps_.find(table_name); + if (it != table_qps_.end()) { + table_qps = it->second; + } + return table_qps; } uint64_t TabletNode::GetReadPending() { - MutexLock lock(&mutex_); - return average_counter_.read_pending_; + MutexLock lock(&mutex_); + return average_counter_.read_pending_; } uint64_t TabletNode::GetWritePending() { - MutexLock lock(&mutex_); - return average_counter_.write_pending_; + MutexLock lock(&mutex_); + return average_counter_.write_pending_; } uint64_t TabletNode::GetScanPending() { - MutexLock lock(&mutex_); - return average_counter_.scan_pending_; + MutexLock lock(&mutex_); + return average_counter_.scan_pending_; } uint64_t TabletNode::GetRowReadDelay() { - MutexLock lock(&mutex_); - return average_counter_.row_read_delay_; + MutexLock lock(&mutex_); + return average_counter_.row_read_delay_; +} + +uint64_t TabletNode::GetPersistentCacheSize() { + MutexLock lock(&mutex_); + return persistent_cache_size_; } uint32_t TabletNode::GetPlanToMoveInCount() { - MutexLock lock(&mutex_); - VLOG(16) << "GetPlanToMoveInCount: " << addr_ << " " << plan_move_in_count_; - return plan_move_in_count_; + MutexLock lock(&mutex_); + VLOG(16) << "GetPlanToMoveInCount: " << addr_ << " " << plan_move_in_count_; + return plan_move_in_count_; } void TabletNode::PlanToMoveIn() { - MutexLock lock(&mutex_); - plan_move_in_count_++; - VLOG(16) << "PlanToMoveIn: " << addr_ << " " << plan_move_in_count_; + MutexLock lock(&mutex_); + plan_move_in_count_++; + VLOG(16) << "PlanToMoveIn: " << addr_ << " " << plan_move_in_count_; } void TabletNode::DoneMoveIn() { - MutexLock lock(&mutex_); - // TODO (likang): If node restart just before a tablet move in, - // this count will be reset to 0. So we have to make sure it is greater - // than 0 before dec. - if (plan_move_in_count_ > 0) { - plan_move_in_count_--; - } - VLOG(16) << "DoneMoveIn: " << addr_ << " " << plan_move_in_count_; + MutexLock lock(&mutex_); + // TODO (likang): If node restart just before a tablet move in, + // this count will be reset to 0. So we have to make sure it is greater + // than 0 before dec. + if (plan_move_in_count_ > 0) { + plan_move_in_count_--; + } + VLOG(16) << "DoneMoveIn: " << addr_ << " " << plan_move_in_count_; } bool TabletNode::MayLoadNow() { - MutexLock lock(&mutex_); - if (recent_load_time_list_.size() < static_cast(FLAGS_tera_master_max_load_concurrency)) { - return true; - } - if (recent_load_time_list_.front() + FLAGS_tera_master_load_interval * 1000000 - <= get_micros()) { - return true; - } - VLOG(16) << "MayLoadNow() " << addr_ << " last load time: " - << (get_micros() - recent_load_time_list_.front()) / 1000000 << " seconds ago"; - return false; + MutexLock lock(&mutex_); + if (recent_load_time_list_.size() < + static_cast(FLAGS_tera_master_max_load_concurrency)) { + return true; + } + if (recent_load_time_list_.front() + FLAGS_tera_master_load_interval * 1000000 <= get_micros()) { + return true; + } + VLOG(16) << "MayLoadNow() " << addr_ + << " last load time: " << (get_micros() - recent_load_time_list_.front()) / 1000000 + << " seconds ago"; + return false; } void TabletNode::UpdateSize(TabletPtr tablet) { - MutexLock lock(&mutex_); - data_size_ += tablet->GetDataSize(); - if (table_size_.find(tablet->GetTableName()) != table_size_.end()) { - table_size_[tablet->GetTableName()] += tablet->GetDataSize(); - } else { - table_size_[tablet->GetTableName()] = tablet->GetDataSize(); - } - qps_ += tablet->GetQps(); - if (table_qps_.find(tablet->GetTableName()) != table_qps_.end()) { - table_qps_[tablet->GetTableName()] += tablet->GetQps(); - } else { - table_qps_[tablet->GetTableName()] = tablet->GetQps(); - } - + MutexLock lock(&mutex_); + data_size_ += tablet->GetDataSize(); + if (table_size_.find(tablet->GetTableName()) != table_size_.end()) { + table_size_[tablet->GetTableName()] += tablet->GetDataSize(); + } else { + table_size_[tablet->GetTableName()] = tablet->GetDataSize(); + } + qps_ += tablet->GetQps(); + if (table_qps_.find(tablet->GetTableName()) != table_qps_.end()) { + table_qps_[tablet->GetTableName()] += tablet->GetQps(); + } else { + table_qps_[tablet->GetTableName()] = tablet->GetQps(); + } } bool TabletNode::TryLoad(TabletPtr tablet) { - MutexLock lock(&mutex_); - if (onload_count_ < static_cast(FLAGS_tera_master_max_load_concurrency)) { - BeginLoad(); - return true; - } - return false; + MutexLock lock(&mutex_); + if (onload_count_ < static_cast(FLAGS_tera_master_max_load_concurrency)) { + BeginLoad(); + return true; + } + return false; } void TabletNode::BeginLoad() { - ++onload_count_; - recent_load_time_list_.push_back(get_micros()); - uint32_t list_size = recent_load_time_list_.size(); - if (list_size > static_cast(FLAGS_tera_master_max_load_concurrency)) { - CHECK_EQ(list_size - 1, static_cast(FLAGS_tera_master_max_load_concurrency)); - recent_load_time_list_.pop_front(); - } + ++onload_count_; + recent_load_time_list_.push_back(get_micros()); + uint32_t list_size = recent_load_time_list_.size(); + if (list_size > static_cast(FLAGS_tera_master_max_load_concurrency)) { + CHECK_EQ(list_size - 1, static_cast(FLAGS_tera_master_max_load_concurrency)); + recent_load_time_list_.pop_front(); + } } bool TabletNode::FinishLoad(TabletPtr tablet) { - MutexLock lock(&mutex_); - //assert(onload_count_ > 0); - if (onload_count_ > 0) { - --onload_count_; - } - return true; + MutexLock lock(&mutex_); + // assert(onload_count_ > 0); + if (onload_count_ > 0) { + --onload_count_; + } + return true; } bool TabletNode::TrySplit(TabletPtr tablet, const std::string& split_key) { - MutexLock lock(&mutex_); - // data_size_ should be modified by LoadTabletProcedure UnloadTabletProcedure and QueryCallback - // should not be modified by TrySplit - if (onsplit_count_ < static_cast(FLAGS_tera_master_max_split_concurrency)) { - ++onsplit_count_; - data_size_ -= tablet->GetDataSize(); - return true; - } - return false; + MutexLock lock(&mutex_); + // data_size_ should be modified by LoadTabletProcedure UnloadTabletProcedure + // and QueryCallback + // should not be modified by TrySplit + if (onsplit_count_ < static_cast(FLAGS_tera_master_max_split_concurrency)) { + ++onsplit_count_; + data_size_ -= tablet->GetDataSize(); + return true; + } + return false; } bool TabletNode::FinishSplit() { - MutexLock lock(&mutex_); - --onsplit_count_; - return true; + MutexLock lock(&mutex_); + --onsplit_count_; + return true; } bool TabletNode::CanUnload() { - MutexLock lock(&mutex_); - if (unloading_count_ < static_cast(FLAGS_tera_master_max_unload_concurrency)) { - ++unloading_count_; - return true; - } - return false; + MutexLock lock(&mutex_); + if (unloading_count_ < static_cast(FLAGS_tera_master_max_unload_concurrency)) { + ++unloading_count_; + return true; + } + return false; } void TabletNode::FinishUnload() { - MutexLock lock(&mutex_); - --unloading_count_; + MutexLock lock(&mutex_); + --unloading_count_; } NodeState TabletNode::GetState() { - MutexLock lock(&mutex_); - if (state_ == kOffLine && get_millis() - timestamp_ < FLAGS_tera_master_tabletnode_timeout) { - return kPendingOffLine; - } - return state_; -} - -bool TabletNode::SetState(NodeState new_state, NodeState* old_state) { - MutexLock lock(&mutex_); - if (NULL != old_state) { - *old_state = state_; - } - if (CheckStateSwitch(state_, new_state)) { - LOG(INFO) << addr_ << " state switch " - << StatusCodeToString(static_cast(state_)) << " to " - << StatusCodeToString(static_cast(new_state)); - state_ = new_state; - info_.set_status_m(NodeStateToString(state_)); - timestamp_ = get_millis(); - return true; - } - VLOG(5) << addr_ << " not support state switch " - << StatusCodeToString(static_cast(state_)) << " to " - << StatusCodeToString(static_cast(new_state)); - return false; -} - - -bool TabletNode::CheckStateSwitch(NodeState old_state, NodeState new_state) { - switch (old_state) { - case kReady: - if (new_state == kOffLine || new_state == kWaitKick) { - return true; - } - break; - case kOffLine: - if (new_state == kReady) { - return true; - } - break; - case kWaitKick: - if (new_state == kOnKick || new_state == kOffLine) { - return true; - } - break; - case kOnKick: - if (new_state == kOffLine) { - return true; - } - break; - default: - break; - } + MutexLock lock(&mutex_); + if (state_ == kOffline && get_millis() - timestamp_ < FLAGS_tera_master_tabletnode_timeout) { + return kPendingOffline; + } + return state_; +} + +bool TabletNode::DoStateTransition(NodeEvent event) { + MutexLock lock(&mutex_); + NodeState post_state; + if (!state_transitions_.DoStateTransition(state_, event, &post_state)) { + LOG(WARNING) << "node: " << addr_ << ", uuid: " << uuid_ + << ", illegal transition state: " << StatusCodeToString((StatusCode)state_) + << ", event: " << event; return false; + } + LOG(INFO) << "node: " << addr_ << ", uuid: " << uuid_ << ", state switch: " + << ", event: " << event << StatusCodeToString((StatusCode)state_) + << ", post state: " << StatusCodeToString((StatusCode)post_state); + state_ = post_state; + return true; } uint32_t TabletNode::GetQueryFailCount() { - MutexLock lock(&mutex_); - return query_fail_count_; + MutexLock lock(&mutex_); + return query_fail_count_; } uint32_t TabletNode::IncQueryFailCount() { - MutexLock lock(&mutex_); - return ++query_fail_count_; + MutexLock lock(&mutex_); + return ++query_fail_count_; } void TabletNode::ResetQueryFailCount() { - MutexLock lock(&mutex_); - query_fail_count_ = 0; + MutexLock lock(&mutex_); + query_fail_count_ = 0; } TabletNodeManager::TabletNodeManager(MasterImpl* master_impl) : tabletnode_added_(&mutex_), master_impl_(master_impl) {} -TabletNodeManager::~TabletNodeManager() { - MutexLock lock(&mutex_); -} - -TabletNodePtr TabletNodeManager::AddTabletNode(const std::string& addr, - const std::string& uuid) { - MutexLock lock(&mutex_); - TabletNodePtr null_ptr; - std::pair ret = tabletnode_list_.insert( - std::pair(addr, null_ptr)); - TabletNodePtr& state = ret.first->second; - // already has one TS at the same IP:PORT addr, return the existing TabletNodePtr - if (!ret.second) { - TabletNodePtr existing_node = ret.first->second; - LOG(ERROR) << "tabletnode [" << addr << " exist, existing uuid: " - << existing_node->uuid_ << ", to be added uuid: " << uuid; - return existing_node; - } - else { - LOG(INFO) << "add tabletnode : " << addr << ", id : " << uuid; - state.reset(new TabletNode(addr, uuid)); - } - // kReady represent heartbeat status - state->SetState(kReady, NULL); - tabletnode_added_.Broadcast(); - return state; +TabletNodeManager::~TabletNodeManager() { MutexLock lock(&mutex_); } + +TabletNodePtr TabletNodeManager::AddTabletNode(const std::string& addr, const std::string& uuid) { + MutexLock lock(&mutex_); + TabletNodePtr null_ptr; + std::pair ret = + tabletnode_list_.insert(std::pair(addr, null_ptr)); + TabletNodePtr& state = ret.first->second; + // already has one TS at the same IP:PORT addr, return the existing + // TabletNodePtr + if (!ret.second) { + TabletNodePtr existing_node = ret.first->second; + LOG(ERROR) << "tabletnode [" << addr << " exist, existing uuid: " << existing_node->uuid_ + << ", to be added uuid: " << uuid; + return existing_node; + } else { + LOG(INFO) << "add tabletnode : " << addr << ", id : " << uuid; + state.reset(new TabletNode(addr, uuid)); + } + // kReady represent heartbeat status + // state->SetState(kReady, NULL); + state->DoStateTransition(NodeEvent::kZkNodeCreated); + tabletnode_added_.Broadcast(); + return state; } TabletNodePtr TabletNodeManager::DelTabletNode(const std::string& addr) { - TabletNodePtr state(nullptr); - - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.find(addr); - if (it == tabletnode_list_.end()) { - LOG(ERROR) << "tabletnode [" << addr << "] does not exist"; - return state; - } - state = it->second; - state->SetState(kOffLine, NULL); - tabletnode_list_.erase(it); - - // delete node may block, so we'd better release the mutex before that - LOG(INFO) << "delete tabletnode: " << addr << ", uuid: " << state->uuid_; - return state; -} + TabletNodePtr state(nullptr); -void TabletNodeManager::UpdateTabletNode(const std::string& addr, - const TabletNode& state) { - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.find(addr); - if (it == tabletnode_list_.end()) { - LOG(ERROR) << "tabletnode [" << addr << "] does not exist"; - return; - } - TabletNode* node = it->second.get(); - MutexLock node_lock(&node->mutex_); - node->report_status_ = state.report_status_; - node->data_size_ = state.data_size_; - node->qps_ = state.qps_; - node->info_ = state.info_; - node->info_.set_addr(addr); - node->load_ = state.load_; - node->update_time_ = state.update_time_; - node->table_size_ = state.table_size_; - node->table_qps_ = state.table_qps_; - - node->info_.set_status_m(NodeStateToString(node->state_)); - node->info_.set_tablet_onload(node->onload_count_); - node->info_.set_tablet_onsplit(node->onsplit_count_); - node->info_.set_tablet_unloading(node->unloading_count_); - - node->average_counter_.read_pending_ = - CounterWeightedSum(state.info_.read_pending(), - node->average_counter_.read_pending_); - node->average_counter_.write_pending_ = - CounterWeightedSum(state.info_.write_pending(), - node->average_counter_.write_pending_); - node->average_counter_.scan_pending_ = - CounterWeightedSum(state.info_.scan_pending(), - node->average_counter_.scan_pending_); - node->average_counter_.row_read_delay_ = - CounterWeightedSum(state.info_.extra_info_size() > 1 ? state.info_.extra_info(1).value() : 0, - node->average_counter_.row_read_delay_); - VLOG(15) << "update tabletnode : " << addr; + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.find(addr); + if (it == tabletnode_list_.end()) { + LOG(ERROR) << "tabletnode [" << addr << "] does not exist"; + return state; + } + state = it->second; + state->DoStateTransition(NodeEvent::kZkSessionTimeout); + // state->SetState(kOffLine, NULL); + tabletnode_list_.erase(it); + + // delete node may block, so we'd better release the mutex before that + LOG(INFO) << "delete tabletnode: " << addr << ", uuid: " << state->uuid_; + return state; +} + +void TabletNodeManager::UpdateTabletNode(const std::string& addr, const TabletNode& state) { + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.find(addr); + if (it == tabletnode_list_.end()) { + LOG(ERROR) << "tabletnode [" << addr << "] does not exist"; + return; + } + TabletNode* node = it->second.get(); + MutexLock node_lock(&node->mutex_); + node->report_status_ = state.report_status_; + node->data_size_ = state.data_size_; + node->qps_ = state.qps_; + node->info_ = state.info_; + node->info_.set_addr(addr); + node->load_ = state.load_; + node->persistent_cache_size_ = state.persistent_cache_size_; + node->update_time_ = state.update_time_; + node->table_size_ = state.table_size_; + node->table_qps_ = state.table_qps_; + + node->info_.set_status_m(NodeStateToString(node->state_)); + node->info_.set_tablet_onload(node->onload_count_); + node->info_.set_tablet_onsplit(node->onsplit_count_); + node->info_.set_tablet_unloading(node->unloading_count_); + + node->average_counter_.read_pending_ = + CounterWeightedSum(state.info_.read_pending(), node->average_counter_.read_pending_); + node->average_counter_.write_pending_ = + CounterWeightedSum(state.info_.write_pending(), node->average_counter_.write_pending_); + node->average_counter_.scan_pending_ = + CounterWeightedSum(state.info_.scan_pending(), node->average_counter_.scan_pending_); + node->average_counter_.row_read_delay_ = + CounterWeightedSum(state.info_.extra_info_size() > 1 ? state.info_.extra_info(1).value() : 0, + node->average_counter_.row_read_delay_); + VLOG(15) << "update tabletnode : " << addr; } void TabletNodeManager::GetAllTabletNodeAddr(std::vector* addr_array) { - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.begin(); - for (; it != tabletnode_list_.end(); ++it) { - addr_array->push_back(it->first); - } + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.begin(); + for (; it != tabletnode_list_.end(); ++it) { + addr_array->push_back(it->first); + } } void TabletNodeManager::GetAllTabletNodeId(std::map* id_map) { - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.begin(); - for (; it != tabletnode_list_.end(); ++it) { - TabletNodePtr node = it->second; - MutexLock lock2(&node->mutex_); - (*id_map)[it->first] = node->uuid_; - } + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.begin(); + for (; it != tabletnode_list_.end(); ++it) { + TabletNodePtr node = it->second; + MutexLock lock2(&node->mutex_); + (*id_map)[it->first] = node->uuid_; + } } void TabletNodeManager::GetAllTabletNodeInfo(std::vector* array) { - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.begin(); - for (; it != tabletnode_list_.end(); ++it) { - array->push_back(it->second); - } -} - -TabletNodePtr TabletNodeManager::FindTabletNode(const std::string& addr, - TabletNodePtr* state) { - TabletNodePtr node; - MutexLock lock(&mutex_); - TabletNodeList::iterator it = tabletnode_list_.find(addr); - if (it == tabletnode_list_.end()) { - //LOG(WARNING) << "tabletnode [" << addr << "] does not exist"; - return node; - } - node = it->second; - if (NULL != state) { - *state = it->second; - } + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.begin(); + for (; it != tabletnode_list_.end(); ++it) { + array->push_back(it->second); + } +} + +TabletNodePtr TabletNodeManager::FindTabletNode(const std::string& addr, TabletNodePtr* state) { + TabletNodePtr node; + MutexLock lock(&mutex_); + TabletNodeList::iterator it = tabletnode_list_.find(addr); + if (it == tabletnode_list_.end()) { + // LOG(WARNING) << "tabletnode [" << addr << "] does not exist"; return node; + } + node = it->second; + if (NULL != state) { + *state = it->second; + } + return node; } -bool TabletNodeManager::ScheduleTabletNodeOrWait(Scheduler* scheduler, - const std::string& table_name, bool is_move, TabletNodePtr* node) { - return ScheduleTabletNode(scheduler, table_name, is_move, node, true); +bool TabletNodeManager::ScheduleTabletNodeOrWait(Scheduler* scheduler, + const std::string& table_name, + const TabletPtr& tablet, bool is_move, + TabletNodePtr* node) { + return ScheduleTabletNode(scheduler, table_name, tablet, is_move, true, node); } bool TabletNodeManager::ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, - bool is_move, TabletNodePtr* node) { - return ScheduleTabletNode(scheduler, table_name, is_move, node, false); + const TabletPtr& tablet, bool is_move, + TabletNodePtr* node) { + return ScheduleTabletNode(scheduler, table_name, tablet, is_move, false, node); } -bool TabletNodeManager::ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, - bool is_move, TabletNodePtr* node, bool wait) { - MutexLock lock(&mutex_); - std::string meta_node_addr; - master_impl_->GetMetaTabletAddr(&meta_node_addr); +void TabletNodeManager::WaitTabletNodeReconnect(const std::string& addr, const std::string& uuid, + int64_t reconn_timeout_taskid) { + MutexLock lock(&mutex_); + CHECK(reconnecting_ts_list_.find(addr) == reconnecting_ts_list_.end()); - TabletNodePtr null_ptr, meta_node; - std::vector candidates; - std::vector slow_candidates; - while (tabletnode_list_.empty()) { - if (!wait) { - LOG(WARNING) << "currently no available tabletnode"; - return false; - } - // If tabletnode_list is empty, we should hang and wait TabletNodeManager::AddTabletNode wake us - LOG(WARNING) << "currently no available tabletnode, ScheduleTabletNode suspended"; - tabletnode_added_.Wait(); - } + reconnecting_ts_list_[addr] = reconn_timeout_taskid; + LOG(INFO) << "tabletnode addr: " << addr << " wait reconnect taskid : " << reconn_timeout_taskid; +} - TabletNodeList::const_iterator it = tabletnode_list_.begin(); - for (; it != tabletnode_list_.end(); ++it) { - TabletNodePtr tablet_node = it->second; - if (tablet_node->state_ != kReady) { - continue; - } - if (FLAGS_tera_master_meta_isolate_enabled - && tablet_node->addr_ == meta_node_addr) { - meta_node = tablet_node; - continue; - } - if (is_move) { - if (!tablet_node->MayLoadNow()) { - continue; - } - if (tablet_node->GetPlanToMoveInCount() > 0) { - continue; - } - } - if (tablet_node->average_counter_.read_pending_ < 100) { - candidates.push_back(tablet_node); - } else { - slow_candidates.push_back(tablet_node); - } - } - if (candidates.size() == 0) { - candidates = slow_candidates; - } - if (candidates.size() == 0) { - if (meta_node != null_ptr) { - *node = meta_node; - return true; - } else { - return false; - } - } +int64_t TabletNodeManager::PopTabletNodeReconnectTaskID(const std::string& addr) { + MutexLock lock(&mutex_); + if (reconnecting_ts_list_.find(addr) == reconnecting_ts_list_.end()) { + return 0; + } + int64_t task_id = reconnecting_ts_list_[addr]; + reconnecting_ts_list_.erase(addr); + return task_id; +} - size_t best_index = 0; - if (scheduler->FindBestNode(candidates, table_name, &best_index)) { - *node = candidates[best_index]; - return true; - } +bool TabletNodeManager::IsFlashSizeEnough(const TabletPtr& tablet, const TabletNodePtr& node) { + if (!FLAGS_tera_master_support_isomerism) { + return true; + } + + if (tablet->GetTableName() == FLAGS_tera_master_meta_table_name && + node->GetPersistentCacheSize() == 0) { + return false; + } + + if (tablet->HasFlashLg() && node->GetPersistentCacheSize() == 0) { return false; + } + + return true; +} + +bool TabletNodeManager::ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, + const TabletPtr& tablet, bool is_move, bool wait, + TabletNodePtr* node) { + MutexLock lock(&mutex_); + std::string meta_node_addr; + master_impl_->GetMetaTabletAddr(&meta_node_addr); + + TabletNodePtr null_ptr, meta_node; + std::vector candidates; + std::vector slow_candidates; + while (tabletnode_list_.empty()) { + if (!wait) { + LOG(WARNING) << "currently no available tabletnode"; + return false; + } + // If tabletnode_list is empty, we should hang and wait + // TabletNodeManager::AddTabletNode wake us + LOG(WARNING) << "currently no available tabletnode, ScheduleTabletNode suspended"; + tabletnode_added_.Wait(); + } + + TabletNodeList::const_iterator it = tabletnode_list_.begin(); + for (; it != tabletnode_list_.end(); ++it) { + TabletNodePtr tablet_node = it->second; + if (tablet_node->GetState() != kReady) { + continue; + } + if (FLAGS_tera_master_meta_isolate_enabled && tablet_node->addr_ == meta_node_addr) { + meta_node = tablet_node; + continue; + } + + if (tablet && !IsFlashSizeEnough(tablet, tablet_node)) { + continue; + } + + if (is_move) { + if (!tablet_node->MayLoadNow()) { + continue; + } + if (tablet_node->GetPlanToMoveInCount() > 0) { + continue; + } + } + if (tablet_node->average_counter_.read_pending_ < 100) { + candidates.push_back(tablet_node); + } else { + slow_candidates.push_back(tablet_node); + } + } + if (candidates.size() == 0) { + candidates = slow_candidates; + } + if (candidates.size() == 0) { + if (meta_node != null_ptr) { + *node = meta_node; + return true; + } else { + return false; + } + } + + size_t best_index = 0; + if (scheduler->FindBestNode(candidates, table_name, &best_index)) { + *node = candidates[best_index]; + return true; + } + return false; } bool TabletNodeManager::ShouldMoveData(Scheduler* scheduler, const std::string& table_name, TabletNodePtr src_node, TabletNodePtr dst_node, const std::vector& tablet_candidates, size_t* tablet_index) { - VLOG(16) << "ShouldMoveData()"; - MutexLock lock(&mutex_); - if (tablet_candidates.size() == 0) { - return false; - } - if (src_node == dst_node) { - return false; - } - if (dst_node->GetState() != kReady) { - return false; - } - if (dst_node->average_counter_.read_pending_ > 100) { - return false; - } - if (!dst_node->MayLoadNow()) { - return false; - } - if (dst_node->GetPlanToMoveInCount() > 0) { - return false; + VLOG(16) << "ShouldMoveData()"; + MutexLock lock(&mutex_); + if (tablet_candidates.size() == 0) { + return false; + } + if (src_node == dst_node) { + return false; + } + if (dst_node->GetState() != kReady) { + return false; + } + if (dst_node->average_counter_.read_pending_ > 100) { + return false; + } + if (!dst_node->MayLoadNow()) { + return false; + } + if (dst_node->GetPlanToMoveInCount() > 0) { + return false; + } + if (FLAGS_tera_master_meta_isolate_enabled) { + std::string meta_node_addr; + master_impl_->GetMetaTabletAddr(&meta_node_addr); + if (dst_node->GetAddr() == meta_node_addr) { + return false; } - if (FLAGS_tera_master_meta_isolate_enabled) { - std::string meta_node_addr; - master_impl_->GetMetaTabletAddr(&meta_node_addr); - if (dst_node->GetAddr() == meta_node_addr) { - return false; - } - if (src_node->GetAddr() == meta_node_addr) { - *tablet_index = 0; - return true; - } + if (src_node->GetAddr() == meta_node_addr) { + *tablet_index = 0; + return true; } - return scheduler->FindBestTablet(src_node, dst_node, tablet_candidates, - table_name, tablet_index); + } + return scheduler->FindBestTablet(src_node, dst_node, tablet_candidates, table_name, tablet_index); } std::string NodeStateToString(NodeState state) { - switch (state) { - case kReady: - return "kReady"; - case kOffLine: - return "kOffLine"; - case kPendingOffLine: - return "kPendingOffLine"; - case kOnKick: - return "kOnKick"; - case kWaitKick: - return "kWaitKick"; - default: - return ""; - } + switch (state) { + case kReady: + return "kReady"; + case kOffline: + return "kOffline"; + case kPendingOffline: + return "kPendingOffline"; + case kOnKick: + return "kOnKick"; + case kWaitKick: + return "kWaitKick"; + case kKicked: + return "kKicked"; + default: + return ""; + } +} + +std::ostream& operator<<(std::ostream& o, const NodeEvent event) { + static const char* msg[] = {"NodeEvent::kZkNodeCreated", "NodeEvent::kZkSessionTimeout", + "NodeEvent::kPrepareKickTs", "NodeEvent::kCancelKickTs", + "NodeEvent::kZkKickNodeCreated", "NodeEvent::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(event) - static_cast(NodeEvent::kZkNodeCreated); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/tabletnode_manager.h b/src/master/tabletnode_manager.h index 1c56920d2..b9d86e721 100644 --- a/src/master/tabletnode_manager.h +++ b/src/master/tabletnode_manager.h @@ -13,6 +13,7 @@ #include "common/mutex.h" #include "common/thread_pool.h" +#include "master/state_machine.h" #include "master/tablet_manager.h" #include "proto/proto_helper.h" @@ -23,116 +24,131 @@ class Tablet; typedef std::shared_ptr TabletPtr; enum NodeState { - kReady = kTabletNodeReady, - kOffLine = kTabletNodeOffLine, // before first query succe - kPendingOffLine = kTabletNodePendingOffLine, - kOnKick = kTabletNodeOnKick, - kWaitKick = kTabletNodeWaitKick + kReady = kTabletNodeReady, + kOffline = kTabletNodeOffline, // before first query succe + kPendingOffline = kTabletNodePendingOffline, + kOnKick = kTabletNodeOnKick, // not used, but kept for compatible + kWaitKick = kTabletNodeWaitKick, + kKicked = kTabletNodeKicked, }; +enum class NodeEvent { + kZkNodeCreated, + kZkSessionTimeout, + kPrepareKickTs, + kCancelKickTs, + kZkKickNodeCreated, +}; + +std::ostream& operator<<(std::ostream& o, const NodeEvent event); std::string NodeStateToString(NodeState state); struct TabletNode { - mutable Mutex mutex_; - std::string addr_; - std::string uuid_; - NodeState state_; - // state timestamp - int64_t timestamp_; - - // updated by query - StatusCode report_status_; - TabletNodeInfo info_; - uint64_t data_size_; - uint64_t qps_; - uint64_t load_; - uint64_t update_time_; - std::map table_size_; - std::map table_qps_; - - struct MutableCounter { - uint64_t read_pending_; - uint64_t write_pending_; - uint64_t scan_pending_; - uint64_t row_read_delay_; // micros - - MutableCounter() { - memset(this, 0, sizeof(MutableCounter)); - } - }; - MutableCounter average_counter_; - MutableCounter accumulate_counter_; - std::list counter_list_; - - uint32_t query_fail_count_; - uint32_t onload_count_; - uint32_t unloading_count_; - uint32_t onsplit_count_; - uint32_t plan_move_in_count_; - //std::list wait_load_list_; - //std::list > wait_split_list_; // (tablet, split_key) - - // The start time of recent load operation. - // Used to tell if node load too many tablets within short time. - // Keep FLAGS_tera_master_max_load_concurrency items at maximum. - std::list recent_load_time_list_; - - TabletNode(); - TabletNode(const std::string& addr, const std::string& uuid); - TabletNode(const TabletNode& t); - ~TabletNode(); - - TabletNodeInfo GetInfo(); - const std::string& GetAddr(); - const std::string& GetId(); - - // table_name == "" means all tables - uint64_t GetSize(const std::string& table_name = ""); - uint64_t GetQps(const std::string& table_name = ""); - uint64_t GetReadPending(); - uint64_t GetWritePending(); - uint64_t GetScanPending(); - uint64_t GetRowReadDelay(); - - uint32_t GetPlanToMoveInCount(); - void PlanToMoveIn(); - void DoneMoveIn(); - - // To tell if node load too many tablets within short time. - bool MayLoadNow(); - - void UpdateSize(TabletPtr tablet); - - bool TryLoad(TabletPtr tablet); - void BeginLoad(); - bool FinishLoad(TabletPtr tablet); - - bool TrySplit(TabletPtr tablet, const std::string& split_key = ""); - bool FinishSplit(); - - bool CanUnload(); - void FinishUnload(); - - NodeState GetState(); - bool NodeDown() { - MutexLock lock(&mutex_); - if (state_ == kOffLine) { - return true; - } - return false; - } - - bool SetState(NodeState new_state, NodeState* old_state); - bool CheckStateSwitch(NodeState old_state, NodeState new_state); - - uint32_t GetQueryFailCount(); - uint32_t IncQueryFailCount(); - void ResetQueryFailCount(); - int64_t GetTimeStamp() {return timestamp_;} - -private: - TabletNode& operator=(const TabletNode& t); + mutable Mutex mutex_; + std::string addr_; + std::string uuid_; + NodeState state_; + // state timestamp + int64_t timestamp_; + + // updated by query + StatusCode report_status_; + TabletNodeInfo info_; + uint64_t data_size_; + uint64_t qps_; + uint64_t load_; + uint64_t persistent_cache_size_; + uint64_t update_time_; + std::map table_size_; + std::map table_qps_; + + struct MutableCounter { + uint64_t read_pending_; + uint64_t write_pending_; + uint64_t scan_pending_; + uint64_t row_read_delay_; // micros + + MutableCounter() { memset(this, 0, sizeof(MutableCounter)); } + }; + MutableCounter average_counter_; + MutableCounter accumulate_counter_; + std::list counter_list_; + + uint32_t query_fail_count_; + uint32_t onload_count_; + uint32_t unloading_count_; + uint32_t onsplit_count_; + uint32_t plan_move_in_count_; + // std::list wait_load_list_; + // std::list > wait_split_list_; // (tablet, + // split_key) + + // The start time of recent load operation. + // Used to tell if node load too many tablets within short time. + // Keep FLAGS_tera_master_max_load_concurrency items at maximum. + std::list recent_load_time_list_; + + TabletNode(); + TabletNode(const std::string& addr, const std::string& uuid); + TabletNode(const TabletNode& t); + ~TabletNode(); + + TabletNodeInfo GetInfo(); + const std::string& GetAddr(); + const std::string& GetId(); + + // table_name == "" means all tables + uint64_t GetSize(const std::string& table_name = ""); + uint64_t GetQps(const std::string& table_name = ""); + uint64_t GetReadPending(); + uint64_t GetWritePending(); + uint64_t GetScanPending(); + uint64_t GetRowReadDelay(); + uint64_t GetPersistentCacheSize(); + + uint32_t GetPlanToMoveInCount(); + void PlanToMoveIn(); + void DoneMoveIn(); + + // To tell if node load too many tablets within short time. + bool MayLoadNow(); + + void UpdateSize(TabletPtr tablet); + + bool TryLoad(TabletPtr tablet); + void BeginLoad(); + bool FinishLoad(TabletPtr tablet); + + bool TrySplit(TabletPtr tablet, const std::string& split_key = ""); + bool FinishSplit(); + + bool CanUnload(); + void FinishUnload(); + + NodeState GetState(); + + bool NodeDown() { + MutexLock lock(&mutex_); + return (state_ == kOffline); + } + + bool NodeKicked() { + MutexLock lock(&mutex_); + return (state_ == kKicked || state_ == kOffline); + } + + bool DoStateTransition(NodeEvent event); + + uint32_t GetQueryFailCount(); + uint32_t IncQueryFailCount(); + void ResetQueryFailCount(); + + typedef StateTransitionRules TSStateTransitionRulesType; + + private: + TabletNode& operator=(const TabletNode& t); + const static TSStateTransitionRulesType state_transitions_; }; typedef std::shared_ptr TabletNodePtr; @@ -144,44 +160,50 @@ class Scheduler; class MasterImpl; class TabletNodeManager { -public: - explicit TabletNodeManager(MasterImpl* master_impl); - ~TabletNodeManager(); - - TabletNodePtr AddTabletNode(const std::string& addr, const std::string& uuid); - - // return the deleted tabletnode, if not exists return TabletNodePtr(nullptr) - TabletNodePtr DelTabletNode(const std::string& addr); - void UpdateTabletNode(const std::string& addr, const TabletNode& info); - TabletNodePtr FindTabletNode(const std::string& addr, TabletNodePtr* info); - void GetAllTabletNodeAddr(std::vector* addr_array); - void GetAllTabletNodeId(std::map* id_map); - void GetAllTabletNodeInfo(std::vector* info_array); - bool ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, - bool is_move, TabletNodePtr* node); - bool ScheduleTabletNodeOrWait(Scheduler* scheduler, const std::string& table_name, - bool is_move, TabletNodePtr* node); - bool ShouldMoveData(Scheduler* scheduler, const std::string& table_name, - TabletNodePtr src_node, TabletNodePtr dst_node, - const std::vector& tablet_candidates, - size_t* tablet_index); - bool CheckStateSwitch(NodeState old_state, NodeState new_state); - - void GetTablets(const std::string& server_addr, std::vector* tablet_list); - -private: - bool ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, - bool is_move, TabletNodePtr* node, bool wait); - - mutable Mutex mutex_; - CondVar tabletnode_added_; - MasterImpl* master_impl_; - - typedef std::map TabletNodeList; - TabletNodeList tabletnode_list_; + public: + explicit TabletNodeManager(MasterImpl* master_impl); + ~TabletNodeManager(); + + TabletNodePtr AddTabletNode(const std::string& addr, const std::string& uuid); + + // return the deleted tabletnode, if not exists return TabletNodePtr(nullptr) + TabletNodePtr DelTabletNode(const std::string& addr); + void UpdateTabletNode(const std::string& addr, const TabletNode& info); + TabletNodePtr FindTabletNode(const std::string& addr, TabletNodePtr* info); + void GetAllTabletNodeAddr(std::vector* addr_array); + void GetAllTabletNodeId(std::map* id_map); + void GetAllTabletNodeInfo(std::vector* info_array); + bool ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, + const TabletPtr& tablet, bool is_move, TabletNodePtr* node); + bool ScheduleTabletNodeOrWait(Scheduler* scheduler, const std::string& table_name, + const TabletPtr& tablet, bool is_move, TabletNodePtr* node); + bool ShouldMoveData(Scheduler* scheduler, const std::string& table_name, TabletNodePtr src_node, + TabletNodePtr dst_node, const std::vector& tablet_candidates, + size_t* tablet_index); + bool CheckStateSwitch(NodeState old_state, NodeState new_state); + + void WaitTabletNodeReconnect(const std::string& addr, const std::string& uuid, + int64_t reconn_timeout_taskid); + + int64_t PopTabletNodeReconnectTaskID(const std::string& addr); + + void GetTablets(const std::string& server_addr, std::vector* tablet_list); + + private: + bool IsFlashSizeEnough(const TabletPtr& tablet, const TabletNodePtr& node); + + bool ScheduleTabletNode(Scheduler* scheduler, const std::string& table_name, + const TabletPtr& tablet, bool is_move, bool wait, TabletNodePtr* node); + + mutable Mutex mutex_; + CondVar tabletnode_added_; + MasterImpl* master_impl_; + typedef std::map TabletNodeList; + TabletNodeList tabletnode_list_; + std::map reconnecting_ts_list_; }; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_TABLETNODE_MANAGER_H_ +#endif // TERA_MASTER_TABLETNODE_MANAGER_H_ diff --git a/src/master/test/abnormal_node_test.cc b/src/master/test/abnormal_node_test.cc new file mode 100644 index 000000000..b14b4ad64 --- /dev/null +++ b/src/master/test/abnormal_node_test.cc @@ -0,0 +1,110 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gtest/gtest.h" +#include "master/abnormal_node_mgr.h" + +namespace tera { +namespace master { +namespace test { + +class AbnormalNodeTest : public ::testing::Test { + public: + AbnormalNodeTest() {} + + virtual ~AbnormalNodeTest() {} +}; + +TEST_F(AbnormalNodeTest, TestRecoredNodeDelete) { + FLAGS_abnormal_node_check_period_s = 10; + FLAGS_abnormal_node_trigger_count = 3; + FLAGS_abnormal_node_auto_recovery_period_s = 30; + AbnormalNodeMgr mgr; + const std::string addr = "host0"; + + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + ASSERT_EQ(1, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(0, mgr.nodes_abnormal_infos_[addr].abnormal_count); + + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + ASSERT_EQ(2, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(0, mgr.nodes_abnormal_infos_[addr].abnormal_count); + + int64_t t1 = get_micros() / 1000000; + mgr.RecordNodeDelete(addr, t1); + ASSERT_EQ(0, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(1, mgr.nodes_abnormal_infos_[addr].abnormal_count); + ASSERT_EQ(mgr.nodes_abnormal_infos_[addr].recovery_time, + t1 + FLAGS_abnormal_node_auto_recovery_period_s); + + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + ASSERT_EQ(1, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(1, mgr.nodes_abnormal_infos_[addr].abnormal_count); + + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + ASSERT_EQ(2, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(1, mgr.nodes_abnormal_infos_[addr].abnormal_count); + + int64_t t2 = get_micros() / 1000000; + mgr.RecordNodeDelete(addr, t2); + ASSERT_EQ(0, mgr.nodes_abnormal_infos_[addr].deleted_times.size()); + ASSERT_EQ(2, mgr.nodes_abnormal_infos_[addr].abnormal_count); + ASSERT_EQ(mgr.nodes_abnormal_infos_[addr].recovery_time, + t2 + (FLAGS_abnormal_node_auto_recovery_period_s << 1)); +} + +TEST_F(AbnormalNodeTest, TestIsAbnormalNode) { + AbnormalNodeMgr mgr; + const std::string addr = "host0"; + const std::string uuid = "host0:uuid"; + FLAGS_abnormal_node_check_period_s = 10; + FLAGS_abnormal_node_trigger_count = 3; + FLAGS_abnormal_node_auto_recovery_period_s = 30; + + // empty node info + ASSERT_FALSE(mgr.IsAbnormalNode(addr, uuid)); + + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + std::vector& times = mgr.nodes_abnormal_infos_[addr].deleted_times; + mgr.RecordNodeDelete(addr, get_micros() / 1000000); + + // has't trigger delete too frequent + ASSERT_FALSE(mgr.IsAbnormalNode(addr, uuid)); + + int64_t t1 = get_micros() / 1000000; + mgr.RecordNodeDelete(addr, t1); + + // trigger delete too frequent + ASSERT_TRUE(mgr.IsAbnormalNode(addr, uuid)); + + // not recovery + ASSERT_TRUE(mgr.IsAbnormalNode(addr, uuid)); + + mgr.nodes_abnormal_infos_[addr].recovery_time = t1; + + // auto recovery + ASSERT_FALSE(mgr.IsAbnormalNode(addr, uuid)); +} + +TEST_F(AbnormalNodeTest, TestDeleteTooFrequent) { + AbnormalNodeMgr mgr; + const std::string addr = "host0"; + FLAGS_abnormal_node_check_period_s = 10; + FLAGS_abnormal_node_trigger_count = 3; + + std::vector& times = mgr.nodes_abnormal_infos_[addr].deleted_times; + ASSERT_FALSE(mgr.DeleteTooFrequent(times)); + + times.emplace_back(1); + ASSERT_FALSE(mgr.DeleteTooFrequent(times)); + + times.emplace_back(2); + ASSERT_FALSE(mgr.DeleteTooFrequent(times)); + + times.emplace_back(3); + ASSERT_TRUE(mgr.DeleteTooFrequent(times)); +} +} +} +} diff --git a/src/master/test/create_table_procedure_test.cc b/src/master/test/create_table_procedure_test.cc index c4fc69896..2474beb9f 100644 --- a/src/master/test/create_table_procedure_test.cc +++ b/src/master/test/create_table_procedure_test.cc @@ -17,66 +17,66 @@ namespace master { namespace test { class CreateTableProcedureTest : public ::testing::Test { -public: - CreateTableProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - proc_executor_(new ProcedureExecutor) {} + public: + CreateTableProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + proc_executor_(new ProcedureExecutor) {} - virtual ~CreateTableProcedureTest() {} + virtual ~CreateTableProcedureTest() {} - virtual void SetUp() { - FLAGS_tera_leveldb_env_type.assign("local"); - FLAGS_tera_tabletnode_path_prefix.assign("./create_table_procedure"); - InitMasterEnv(); - TableSchema schema; - StatusCode ret_code; - //table_ = TabletManager::CreateTable("test", schema, kTableEnable); - //EXPECT_TRUE(table_); - request_.reset(new CreateTableRequest); - response_.reset(new CreateTableResponse); - request_->set_table_name("test"); - create_proc_.reset(new CreateTableProcedure(request_.get(), response_.get(), nullptr, nullptr)); + virtual void SetUp() { + FLAGS_tera_leveldb_env_type.assign("local"); + FLAGS_tera_tabletnode_path_prefix.assign("./create_table_procedure"); + InitMasterEnv(); + TableSchema schema; + StatusCode ret_code; + // table_ = TabletManager::CreateTable("test", schema, kTableEnable); + // EXPECT_TRUE(table_); + request_.reset(new CreateTableRequest); + response_.reset(new CreateTableResponse); + request_->set_table_name("test"); + create_proc_.reset(new CreateTableProcedure(request_.get(), response_.get(), nullptr, nullptr)); - const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); - std::string test_name(test_case->name()); - //if (test_name.find("ObsoleteOldTableDir") != std::string::npos) { - InitFileSystem(); - //} - } + const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); + std::string test_name(test_case->name()); + // if (test_name.find("ObsoleteOldTableDir") != std::string::npos) { + InitFileSystem(); + //} + } -private: - void InitFileSystem() { - fs_env_ = io::LeveldbBaseEnv(); - } - - void InitMasterEnv() { - MasterEnv().Init(nullptr, nullptr, tablet_manager_, - nullptr, nullptr, std::shared_ptr(new ThreadPool), proc_executor_, - std::shared_ptr(new TabletAvailability(tablet_manager_)), nullptr); - } + private: + void InitFileSystem() { fs_env_ = io::LeveldbBaseEnv(); } -private: - TablePtr table_; - std::unique_ptr request_; - std::unique_ptr response_; - std::shared_ptr create_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr proc_executor_; - leveldb::Env* fs_env_; + void InitMasterEnv() { + MasterEnv().Init(nullptr, nullptr, tablet_manager_, access_builder_, nullptr, nullptr, nullptr, + std::shared_ptr(new ThreadPool), proc_executor_, + std::shared_ptr(new TabletAvailability(tablet_manager_)), + nullptr); + } + + private: + TablePtr table_; + std::unique_ptr request_; + std::unique_ptr response_; + std::shared_ptr create_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr proc_executor_; + leveldb::Env* fs_env_; }; TEST_F(CreateTableProcedureTest, ObsoleteOldTableDir) { - std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/test"; - std::string trash_path = FLAGS_tera_tabletnode_path_prefix + "/#trash"; - EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); - EXPECT_TRUE(fs_env_->CreateDir(trash_path).ok()); - int ret = chmod(trash_path.c_str(), 0600); - create_proc_->PreCheckHandler(CreateTablePhase::kPrepare); - EXPECT_EQ(response_->status(), kTableExist); - chmod(trash_path.c_str(), 0700); - create_proc_->PreCheckHandler(CreateTablePhase::kPrepare); - EXPECT_NE(access(table_path.c_str(), F_OK), 0); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + "/test"; + std::string trash_path = FLAGS_tera_tabletnode_path_prefix + "/#trash"; + EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); + EXPECT_TRUE(fs_env_->CreateDir(trash_path).ok()); + int ret = chmod(trash_path.c_str(), 0600); + create_proc_->PreCheckHandler(CreateTablePhase::kPrepare); + EXPECT_EQ(response_->status(), kTableExist); + chmod(trash_path.c_str(), 0700); + create_proc_->PreCheckHandler(CreateTablePhase::kPrepare); + EXPECT_NE(access(table_path.c_str(), F_OK), 0); } - } } } diff --git a/src/master/test/load_tablet_procedure_test.cc b/src/master/test/load_tablet_procedure_test.cc index ee5fd54bb..fc8a9b2c8 100644 --- a/src/master/test/load_tablet_procedure_test.cc +++ b/src/master/test/load_tablet_procedure_test.cc @@ -2,7 +2,7 @@ #include "gtest/gtest.h" #include "master/load_tablet_procedure.h" #include "master/master_env.h" -#include "master/master_zk_adapter.h" +#include "master/test/mock_master_zk_adapter.h" #include "common/thread_pool.h" DECLARE_int32(tera_master_tabletnode_timeout); @@ -14,329 +14,338 @@ DECLARE_int32(tablet_load_max_tried_ts); namespace tera { namespace master { - +namespace test { class LoadTabletProcedureTest : public ::testing::Test { -public: - LoadTabletProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - ts_manager_(new TabletNodeManager(new MasterImpl)), - tablet_availability_(new TabletAvailability(tablet_manager_)) {} - - virtual ~LoadTabletProcedureTest() {} - - virtual void SetUp() { - InitMasterEnv(); - - TableSchema schema; - StatusCode ret_code; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); - - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, - "test", "", "", "test/tablet00000001", "", TabletMeta::kTabletOffline, 0); - tablet_ = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(table_->AddTablet(tablet_, &ret_code)); - - load_proc_ = std::shared_ptr( - new LoadTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); - EXPECT_TRUE(load_proc_); - } - - virtual void TearDown() {} - - static void SetUpTestCase() {}; - static void TearDownTestCase() {}; - -private: - void InitLoadTabletCallbackParameters(StatusCode status) { - request_ = new LoadTabletRequest; - response_ = new LoadTabletResponse; - response_->set_status(status); - load_proc_->load_request_dispatching_ = true; - load_proc_->load_retrys_ = 0; - load_proc_->slow_load_retrys_ = 0; - } - - void InitMasterEnv(); - TablePtr table_; - TabletPtr tablet_; - std::shared_ptr load_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr ts_manager_; - std::shared_ptr tablet_availability_; - - LoadTabletRequest* request_; - LoadTabletResponse* response_; - + public: + LoadTabletProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + ts_manager_(new TabletNodeManager(new MasterImpl(nullptr, nullptr))), + tablet_availability_(new TabletAvailability(tablet_manager_)) {} + + virtual ~LoadTabletProcedureTest() {} + + virtual void SetUp() { + InitMasterEnv(); + + TableSchema schema; + StatusCode ret_code; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); + + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "", "", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 0); + tablet_ = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(table_); + + load_proc_ = std::shared_ptr( + new LoadTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); + EXPECT_TRUE(load_proc_); + } + + virtual void TearDown() {} + + static void SetUpTestCase(){}; + static void TearDownTestCase(){}; + + private: + void InitLoadTabletCallbackParameters(StatusCode status) { + request_ = new LoadTabletRequest; + response_ = new LoadTabletResponse; + response_->set_status(status); + load_proc_->load_request_dispatching_ = true; + load_proc_->load_retrys_ = 0; + load_proc_->slow_load_retrys_ = 0; + } + + void InitMasterEnv(); + TablePtr table_; + TabletPtr tablet_; + std::shared_ptr load_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr ts_manager_; + std::shared_ptr tablet_availability_; + + LoadTabletRequest* request_; + LoadTabletResponse* response_; }; void LoadTabletProcedureTest::InitMasterEnv() { - - MasterEnv().Init(new MasterImpl, ts_manager_, tablet_manager_, - std::shared_ptr(new SizeScheduler), nullptr, - std::shared_ptr(new ThreadPool), - std::shared_ptr(new ProcedureExecutor), tablet_availability_, - std::shared_ptr(new tera::sdk::StatTable(nullptr, sdk::StatTableCustomer::kMaster))); + MasterEnv().Init(new MasterImpl(nullptr, nullptr), ts_manager_, tablet_manager_, access_builder_, + nullptr, std::shared_ptr(new SizeScheduler), nullptr, + std::shared_ptr(new ThreadPool), + std::shared_ptr(new ProcedureExecutor), tablet_availability_, + std::shared_ptr(new tera::sdk::StatTable( + nullptr, access_builder_, sdk::StatTableCustomer::kMaster))); } -TEST_F(LoadTabletProcedureTest, GenerateEventForUserTabletInStatusOffLine) { - // events to be processed with tablet in status kTableOffLine - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); - TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); - load_proc_->dest_node_ = tabletnode; - // dest_node down without tera_master_tabletnode_timeout - tabletnode->SetState(kOffLine, NULL); - FLAGS_tera_master_tabletnode_timeout = 0; - EXPECT_EQ(tabletnode->GetState(), kOffLine); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); - // dest_node down with tera_master_tabletnode_timeout - FLAGS_tera_master_tabletnode_timeout = 100; - EXPECT_EQ(tabletnode->GetState(), kPendingOffLine); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsDelayOffline); - usleep(100 * 1000); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); - - // dest_node restarted - ts_manager_->AddTabletNode("127.0.0.1:2000", "1234570"); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsRestart); - - tabletnode->SetState(kReady, NULL); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kUpdateMeta); - - load_proc_->update_meta_done_ = true; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kLoadTablet); - EXPECT_EQ(tabletnode->onload_count_, 1); - FLAGS_tera_master_max_load_concurrency = 1; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadBusy); +TEST_F(LoadTabletProcedureTest, GenerateEventForUserTabletInStatusOffline) { + // events to be processed with tablet in status kTableOffline + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); + TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); + load_proc_->dest_node_ = tabletnode; + // dest_node down without tera_master_tabletnode_timeout + tabletnode->state_ = kOffline; + FLAGS_tera_master_tabletnode_timeout = 0; + EXPECT_EQ(tabletnode->GetState(), kOffline); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); + // dest_node down with tera_master_tabletnode_timeout + FLAGS_tera_master_tabletnode_timeout = 100; + EXPECT_EQ(tabletnode->GetState(), kPendingOffline); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsDelayOffline); + usleep(100 * 1000); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); + + // dest_node restarted + ts_manager_->AddTabletNode("127.0.0.1:2000", "1234570"); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsRestart); + + tabletnode->state_ = kReady; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kUpdateMeta); + + load_proc_->update_meta_done_ = true; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kLoadTablet); + EXPECT_EQ(tabletnode->onload_count_, 1); + FLAGS_tera_master_max_load_concurrency = 1; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadBusy); } TEST_F(LoadTabletProcedureTest, GenerateEventForUserTabletInStatusOnLoad) { - // events to be processed with tablet in status kTableOnLoad - tablet_->SetStatus(TabletMeta::kTabletLoading); - FLAGS_tera_master_impl_retry_times = 2; - TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); - tabletnode->SetState(kOffLine, NULL); - load_proc_->dest_node_ = tabletnode; - FLAGS_tera_master_tabletnode_timeout = 0; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); - FLAGS_tera_master_tabletnode_timeout = 100; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsDelayOffline); - ts_manager_->AddTabletNode("127.0.0.1:2000", "1234570"); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsRestart); - - tabletnode->SetState(kReady, NULL); - load_proc_->load_request_dispatching_ = true; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); - - load_proc_->load_request_dispatching_ = false; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadSucc); - - load_proc_->load_retrys_ = FLAGS_tera_master_impl_retry_times + 1; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadFail); - - tablet_->SetStatus(TabletMeta::kTabletReady); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kEofEvent); - tablet_->SetStatus(TabletMeta::kTabletOffline); - tablet_->SetStatus(TabletMeta::kTabletLoadFail); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kEofEvent); + // events to be processed with tablet in status kTableOnLoad + tablet_->SetStatus(TabletMeta::kTabletLoading); + FLAGS_tera_master_impl_retry_times = 2; + TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); + tabletnode->state_ = kOffline; + load_proc_->dest_node_ = tabletnode; + FLAGS_tera_master_tabletnode_timeout = 0; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); + FLAGS_tera_master_tabletnode_timeout = 100; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsDelayOffline); + ts_manager_->AddTabletNode("127.0.0.1:2000", "1234570"); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsRestart); + + tabletnode->state_ = kReady; + load_proc_->load_request_dispatching_ = true; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); + + load_proc_->load_request_dispatching_ = false; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadSucc); + + load_proc_->load_retrys_ = FLAGS_tera_master_impl_retry_times + 1; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsLoadFail); + + tablet_->SetStatus(TabletMeta::kTabletReady); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kEofEvent); + tablet_->SetStatus(TabletMeta::kTabletOffline); + tablet_->SetStatus(TabletMeta::kTabletLoadFail); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kEofEvent); } -class TestZkAdapter : public MasterZkAdapter { -public: - TestZkAdapter() : MasterZkAdapter(nullptr, std::string("")) {} - virtual bool UpdateRootTabletNode(const std::string& addr) { - return true; - } -}; - TEST_F(LoadTabletProcedureTest, GenerateEventForMetaTablet) { - TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); - MetaTabletPtr meta_tablet = tablet_manager_->AddMetaTablet(tabletnode, - std::shared_ptr(new TestZkAdapter)); - load_proc_->tablet_ = meta_tablet; - EXPECT_EQ(meta_tablet->GetStatus(), TabletMeta::kTabletReady); - meta_tablet->SetStatus(TabletMeta::kTabletOffline); - load_proc_->dest_node_ = tabletnode; - FLAGS_tera_master_tabletnode_timeout = 100; - EXPECT_EQ(tabletnode->GetState(), kPendingOffLine); - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); - // meta tablet always load immediate discard tabletnode concurrency loading limits - tabletnode->SetState(kReady, NULL); - tabletnode->onload_count_ = 3; - FLAGS_tera_master_max_load_concurrency = 2; - EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kLoadTablet); + TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); + MetaTabletPtr meta_tablet = tablet_manager_->AddMetaTablet( + tabletnode, std::shared_ptr(new TestZkAdapter)); + load_proc_->tablet_ = meta_tablet; + EXPECT_EQ(meta_tablet->GetStatus(), TabletMeta::kTabletReady); + meta_tablet->SetStatus(TabletMeta::kTabletOffline); + load_proc_->dest_node_ = tabletnode; + FLAGS_tera_master_tabletnode_timeout = 100; + EXPECT_EQ(tabletnode->GetState(), kPendingOffline); + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kTsOffline); + // meta tablet always load immediate discard tabletnode concurrency loading + // limits + tabletnode->state_ = kReady; + tabletnode->onload_count_ = 3; + FLAGS_tera_master_max_load_concurrency = 2; + EXPECT_EQ(load_proc_->GenerateEvent(), TabletEvent::kLoadTablet); } TEST_F(LoadTabletProcedureTest, IsNeweEvent) { - EXPECT_TRUE(load_proc_->events_.empty()); - EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kLoadTablet)); - // we should never got two successive TabletEvent::kLoadTablet - EXPECT_FALSE(load_proc_->IsNewEvent(TabletEvent::kLoadTablet)); - EXPECT_EQ(load_proc_->events_.size(), 1); - EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kLoadTablet); - EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kTsRestart)); - EXPECT_EQ(load_proc_->events_.size(), 2); - EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kTsRestart); - load_proc_->dest_node_ = TabletNodePtr(new TabletNode("127.0.0.1:2000", "1234567")); - EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kTsRestart)); - EXPECT_EQ(load_proc_->events_.size(), 3); - EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kTsRestart); + EXPECT_TRUE(load_proc_->events_.empty()); + EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kLoadTablet)); + // we should never got two successive TabletEvent::kLoadTablet + EXPECT_FALSE(load_proc_->IsNewEvent(TabletEvent::kLoadTablet)); + EXPECT_EQ(load_proc_->events_.size(), 1); + EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kLoadTablet); + EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kTsRestart)); + EXPECT_EQ(load_proc_->events_.size(), 2); + EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kTsRestart); + load_proc_->dest_node_ = TabletNodePtr(new TabletNode("127.0.0.1:2000", "1234567")); + EXPECT_TRUE(load_proc_->IsNewEvent(TabletEvent::kTsRestart)); + EXPECT_EQ(load_proc_->events_.size(), 3); + EXPECT_EQ(load_proc_->events_.back(), TabletEvent::kTsRestart); } TEST_F(LoadTabletProcedureTest, LoadTabletCallback) { - tablet_->SetStatus(TabletMeta::kTabletLoading); - TabletNodePtr tabletnode = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - EXPECT_EQ(tabletnode->GetState(), kReady); - InitLoadTabletCallbackParameters(kTabletNodeOk); - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_FALSE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, 0); - EXPECT_EQ(load_proc_->slow_load_retrys_, 0); - - InitLoadTabletCallbackParameters(kTabletReady); - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_FALSE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, 0); - EXPECT_EQ(load_proc_->slow_load_retrys_, 0); - - InitLoadTabletCallbackParameters(kTabletOnLoad); - load_proc_->slow_load_retrys_ = 0; - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_TRUE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, 0); - EXPECT_EQ(load_proc_->slow_load_retrys_, 1); - - InitLoadTabletCallbackParameters(kTabletOnLoad); - load_proc_->slow_load_retrys_ = 9; - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_TRUE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, 1); - EXPECT_EQ(load_proc_->slow_load_retrys_, 10); - // rpc succ with unexpect response->status - InitLoadTabletCallbackParameters(kTabletNotInit); - load_proc_->load_retrys_ = FLAGS_tera_master_impl_retry_times; - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_TRUE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, FLAGS_tera_master_impl_retry_times + 1); - - // rpc fail - InitLoadTabletCallbackParameters(kTabletNodeOk); - load_proc_->LoadTabletCallback(tabletnode, request_, response_, true, 1); - EXPECT_TRUE(load_proc_->load_request_dispatching_); - EXPECT_EQ(load_proc_->load_retrys_, 1); - EXPECT_EQ(load_proc_->slow_load_retrys_, 0); - // tabletnode down - InitLoadTabletCallbackParameters(kTabletNodeOk); - tabletnode->SetState(kOffLine, NULL); - load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); - EXPECT_EQ(load_proc_->load_retrys_, 1); - EXPECT_EQ(load_proc_->slow_load_retrys_, 0); + tablet_->SetStatus(TabletMeta::kTabletLoading); + TabletNodePtr tabletnode = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + EXPECT_EQ(tabletnode->GetState(), kReady); + InitLoadTabletCallbackParameters(kTabletNodeOk); + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_FALSE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, 0); + EXPECT_EQ(load_proc_->slow_load_retrys_, 0); + + InitLoadTabletCallbackParameters(kTabletReady); + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_FALSE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, 0); + EXPECT_EQ(load_proc_->slow_load_retrys_, 0); + + load_proc_->slow_load_retrys_ = 0; + load_proc_->load_retrys_ = 0; + InitLoadTabletCallbackParameters(kTabletNodeIsBusy); + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + InitLoadTabletCallbackParameters(kTabletNodeIsBusy); + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_EQ(load_proc_->load_retrys_, 0); + EXPECT_EQ(load_proc_->slow_load_retrys_, 0); + + InitLoadTabletCallbackParameters(kTabletOnLoad); + load_proc_->slow_load_retrys_ = 0; + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_TRUE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, 0); + EXPECT_EQ(load_proc_->slow_load_retrys_, 1); + + InitLoadTabletCallbackParameters(kTabletWaitLoad); + load_proc_->slow_load_retrys_ = 0; + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_EQ(load_proc_->slow_load_retrys_, 1); + + InitLoadTabletCallbackParameters(kTabletOnLoad); + load_proc_->slow_load_retrys_ = 9; + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_TRUE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, 1); + EXPECT_EQ(load_proc_->slow_load_retrys_, 10); + // rpc succ with unexpect response->status + InitLoadTabletCallbackParameters(kTabletNotInit); + load_proc_->load_retrys_ = FLAGS_tera_master_impl_retry_times; + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_TRUE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, FLAGS_tera_master_impl_retry_times + 1); + + // rpc fail + InitLoadTabletCallbackParameters(kTabletNodeOk); + load_proc_->LoadTabletCallback(tabletnode, request_, response_, true, 1); + EXPECT_TRUE(load_proc_->load_request_dispatching_); + EXPECT_EQ(load_proc_->load_retrys_, 1); + EXPECT_EQ(load_proc_->slow_load_retrys_, 0); + // tabletnode down + InitLoadTabletCallbackParameters(kTabletNodeOk); + tabletnode->state_ = kOffline; + load_proc_->LoadTabletCallback(tabletnode, request_, response_, false, 0); + EXPECT_EQ(load_proc_->load_retrys_, 1); + EXPECT_EQ(load_proc_->slow_load_retrys_, 0); } TEST_F(LoadTabletProcedureTest, TestEventHandlers) { - TabletNodePtr node1 = ts_manager_->AddTabletNode("127.0.0.1:2000", "1"); - TabletNodePtr node2 = ts_manager_->AddTabletNode("127.0.0.1:2001", "2"); - TabletNodePtr node3 = ts_manager_->AddTabletNode("127.0.0.1:2002", "3"); - - node1->table_size_[tablet_->GetTableName()] = 20; - node2->SetState(kOffLine, NULL); - node3->data_size_ = 10; - load_proc_->TabletNodeOffLineHandler(TabletEvent::kTsOffline); - EXPECT_EQ(load_proc_->dest_node_->uuid_, node3->uuid_); - TabletNodePtr del_node = ts_manager_->DelTabletNode("127.0.0.1:2002"); - load_proc_->restarted_dest_node_ = ts_manager_->AddTabletNode("127.0.0.1:2002", "4"); - load_proc_->TabletNodeRestartHandler(TabletEvent::kTsRestart); - EXPECT_NE(load_proc_->dest_node_->uuid_, "3"); - EXPECT_EQ(del_node, node3); - EXPECT_TRUE(del_node->NodeDown()); - EXPECT_EQ(load_proc_->dest_node_->uuid_, "4"); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); - - load_proc_->TabletNodeBusyHandler(TabletEvent::kTsLoadBusy); - load_proc_->TabletPendOffLineHandler(TabletEvent::kTsDelayOffline); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletDelayOffline); - - tablet_->SetStatus(TabletMeta::kTabletLoading); - load_proc_->WaitRpcResponseHandler(TabletEvent::kWaitRpcResponse); - load_proc_->TabletNodeLoadSuccHandler(TabletEvent::kTsLoadSucc); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); - - tablet_->SetStatus(TabletMeta::kTabletLoading); - load_proc_->TabletNodeLoadFailHandler(TabletEvent::kTsLoadFail); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoadFail); - tablet_->SetStatus(TabletMeta::kTabletOffline); - load_proc_->TabletLoadFailHandler(TabletEvent::kTabletLoadFail); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoadFail); - - // tablet can only transite to kTableOnLoad from kTableOffLine, so set status to kTableOffLine first - tablet_->SetStatus(TabletMeta::kTabletOffline); - load_proc_->dest_node_ = node1; - //load_proc_->LoadTabletHandler(TabletEvent::kLoadTablet); - //EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoading); - - tablet_->SetStatus(TabletMeta::kTabletReady); - load_proc_->EOFHandler(TabletEvent::kEofEvent); + TabletNodePtr node1 = ts_manager_->AddTabletNode("127.0.0.1:2000", "1"); + TabletNodePtr node2 = ts_manager_->AddTabletNode("127.0.0.1:2001", "2"); + TabletNodePtr node3 = ts_manager_->AddTabletNode("127.0.0.1:2002", "3"); + + node1->table_size_[tablet_->GetTableName()] = 20; + node2->state_ = kOffline; + node3->data_size_ = 10; + load_proc_->TabletNodeOfflineHandler(TabletEvent::kTsOffline); + EXPECT_EQ(load_proc_->dest_node_->uuid_, node3->uuid_); + TabletNodePtr del_node = ts_manager_->DelTabletNode("127.0.0.1:2002"); + load_proc_->restarted_dest_node_ = ts_manager_->AddTabletNode("127.0.0.1:2002", "4"); + load_proc_->TabletNodeRestartHandler(TabletEvent::kTsRestart); + EXPECT_NE(load_proc_->dest_node_->uuid_, "3"); + EXPECT_EQ(del_node, node3); + EXPECT_TRUE(del_node->NodeDown()); + EXPECT_EQ(load_proc_->dest_node_->uuid_, "4"); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); + + load_proc_->TabletNodeBusyHandler(TabletEvent::kTsLoadBusy); + load_proc_->TabletPendOfflineHandler(TabletEvent::kTsDelayOffline); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletDelayOffline); + + tablet_->SetStatus(TabletMeta::kTabletLoading); + load_proc_->WaitRpcResponseHandler(TabletEvent::kWaitRpcResponse); + load_proc_->TabletNodeLoadSuccHandler(TabletEvent::kTsLoadSucc); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); + + tablet_->SetStatus(TabletMeta::kTabletLoading); + load_proc_->TabletNodeLoadFailHandler(TabletEvent::kTsLoadFail); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoadFail); + tablet_->SetStatus(TabletMeta::kTabletOffline); + load_proc_->TabletLoadFailHandler(TabletEvent::kTabletLoadFail); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoadFail); + + // tablet can only transite to kTableOnLoad from kTableOffline, so set status + // to kTableOffline first + tablet_->SetStatus(TabletMeta::kTabletOffline); + load_proc_->dest_node_ = node1; + // load_proc_->LoadTabletHandler(TabletEvent::kLoadTablet); + // EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletLoading); + + tablet_->SetStatus(TabletMeta::kTabletReady); + load_proc_->EOFHandler(TabletEvent::kEofEvent); } TEST_F(LoadTabletProcedureTest, EOFPhaseHandler) { - std::shared_ptr proc_executor = MasterEnv().GetExecutor(); - - tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts; - tablet_->LockTransition(); - load_proc_->EOFHandler(TabletEvent::kEofEvent); - EXPECT_FALSE(tablet_->InTransition()); - EXPECT_EQ(proc_executor->procedures_.size(), 0); - - tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts + 1; - tablet_->LockTransition(); - load_proc_->EOFHandler(TabletEvent::kEofEvent); - EXPECT_FALSE(tablet_->InTransition()); - EXPECT_EQ(proc_executor->procedures_.size(), 0); - - tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts - 1; - tablet_->LockTransition(); - tablet_->SetStatus(TabletMeta::kTabletReady); - load_proc_->EOFHandler(TabletEvent::kEofEvent); - EXPECT_EQ(proc_executor->procedures_.size(), 0); - EXPECT_FALSE(tablet_->InTransition()); - - proc_executor->Start(); - tablet_->SetStatus(TabletMeta::kTabletLoadFail); - tablet_->LockTransition(); - load_proc_->EOFHandler(TabletEvent::kEofEvent); - EXPECT_EQ(proc_executor->procedures_.size(), 1); - EXPECT_TRUE(tablet_->InTransition()); - proc_executor->Stop(); + std::shared_ptr proc_executor = MasterEnv().GetExecutor(); + + tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts; + tablet_->LockTransition(); + load_proc_->EOFHandler(TabletEvent::kEofEvent); + EXPECT_FALSE(tablet_->InTransition()); + EXPECT_EQ(proc_executor->procedures_.size(), 0); + + tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts + 1; + tablet_->LockTransition(); + load_proc_->EOFHandler(TabletEvent::kEofEvent); + EXPECT_FALSE(tablet_->InTransition()); + EXPECT_EQ(proc_executor->procedures_.size(), 0); + + tablet_->load_fail_cnt_ = FLAGS_tablet_load_max_tried_ts - 1; + tablet_->LockTransition(); + tablet_->SetStatus(TabletMeta::kTabletReady); + load_proc_->EOFHandler(TabletEvent::kEofEvent); + EXPECT_EQ(proc_executor->procedures_.size(), 0); + EXPECT_FALSE(tablet_->InTransition()); + + proc_executor->Start(); + tablet_->SetStatus(TabletMeta::kTabletLoadFail); + tablet_->LockTransition(); + load_proc_->EOFHandler(TabletEvent::kEofEvent); + EXPECT_EQ(proc_executor->procedures_.size(), 1); + EXPECT_TRUE(tablet_->InTransition()); + proc_executor->Stop(); } -// those AsycLifeCycle* test cases should cause core dump. Since load_proc_ is dectructed, we cannot +// those AsycLifeCycle* test cases should cause core dump. Since load_proc_ is +// dectructed, we cannot // check its member fields TEST_F(LoadTabletProcedureTest, AsynLifeCycle_CallbackExceedProcedureCycle) { - tablet_->SetStatus(TabletMeta::kTabletReady); - TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); - tabletnode->SetState(kReady, NULL); - load_proc_->dest_node_ = tabletnode; - load_proc_->LoadTabletAsync(tabletnode); - load_proc_.reset(); + tablet_->SetStatus(TabletMeta::kTabletReady); + TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); + tabletnode->state_ = kReady; + load_proc_->dest_node_ = tabletnode; + load_proc_->LoadTabletAsync(tabletnode); + load_proc_.reset(); } TEST_F(LoadTabletProcedureTest, AsyncLifeCycleTest_DelayedLoadTaskExeedProcedureCycle) { - TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); - tabletnode->SetState(kReady, NULL); - load_proc_->dest_node_ = tabletnode; - ThreadPool::Task task = std::bind(&LoadTabletProcedure::LoadTabletAsyncWrapper, - std::weak_ptr(load_proc_), tabletnode); - MasterEnv().GetThreadPool()->DelayTask(100, task); - MasterEnv().GetThreadPool()->Start(); - EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 1); - load_proc_.reset(); - usleep(200 * 1000); - EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 0); - MasterEnv().GetThreadPool()->Stop(false); + TabletNodePtr tabletnode(new TabletNode("127.0.0.1:2000", "1234567")); + tabletnode->state_ = kReady; + load_proc_->dest_node_ = tabletnode; + ThreadPool::Task task = std::bind(&LoadTabletProcedure::LoadTabletAsyncWrapper, + std::weak_ptr(load_proc_), tabletnode); + MasterEnv().GetThreadPool()->DelayTask(100, task); + MasterEnv().GetThreadPool()->Start(); + EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 1); + load_proc_.reset(); + usleep(200 * 1000); + EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 0); + MasterEnv().GetThreadPool()->Stop(false); +} } - - } } diff --git a/src/master/test/master_impl_test.cc b/src/master/test/master_impl_test.cc new file mode 100644 index 000000000..23445efd0 --- /dev/null +++ b/src/master/test/master_impl_test.cc @@ -0,0 +1,115 @@ +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "master/master_impl.h" +#include "master/test/mock_master_zk_adapter.h" + +DECLARE_double(tera_safemode_tablet_locality_ratio); + +namespace tera { +namespace master { +namespace test { + +class MasterImplTest : public ::testing::Test { + public: + MasterImplTest() { + master_impl_ = new MasterImpl(nullptr, nullptr); + master_impl_->state_machine_.curr_status_ = kIsRunning; + } + ~MasterImplTest() { + if (master_impl_) { + delete master_impl_; + master_impl_ = nullptr; + } + } + + void InitTabletNodesAndTablets(); + + private: + MasterImpl* master_impl_; + TabletNodePtr tabletnodes_[2]; + TablePtr table_; + TabletPtr tablets_[4]; +}; + +void MasterImplTest::InitTabletNodesAndTablets() { + master_impl_->zk_adapter_.reset(new TestZkAdapter); + + tabletnodes_[0] = master_impl_->tabletnode_manager_->AddTabletNode("127.0.0.1:1", "10000"); + tabletnodes_[1] = master_impl_->tabletnode_manager_->AddTabletNode("127.0.0.1:2", "20000"); + + TableSchema schema; + StatusCode status; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(master_impl_->tablet_manager_->AddTable(table_, &status)); + + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "", "b", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 0); + tablets_[0] = table_->AddTablet(tablet_meta, &status); + EXPECT_TRUE(tablets_[0]); + BindTabletToTabletNode(tablets_[0], tabletnodes_[0]); + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + + TabletManager::PackTabletMeta(&tablet_meta, "test", "b", "d", "test/tablet00000002", "", + TabletMeta::kTabletOffline, 0); + tablets_[1] = table_->AddTablet(tablet_meta, &status); + EXPECT_TRUE(tablets_[1]); + BindTabletToTabletNode(tablets_[1], tabletnodes_[0]); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + + TabletManager::PackTabletMeta(&tablet_meta, "test", "d", "f", "test/tablet00000003", "", + TabletMeta::kTabletOffline, 0); + tablets_[2] = table_->AddTablet(tablet_meta, &status); + EXPECT_TRUE(tablets_[2]); + BindTabletToTabletNode(tablets_[2], tabletnodes_[1]); + tablets_[2]->SetStatus(TabletMeta::kTabletReady); + + TabletManager::PackTabletMeta(&tablet_meta, "test", "f", "", "test/tablet00000004", "", + TabletMeta::kTabletOffline, 0); + tablets_[3] = table_->AddTablet(tablet_meta, &status); + EXPECT_TRUE(tablets_[3]); + BindTabletToTabletNode(tablets_[3], tabletnodes_[1]); + tablets_[3]->SetStatus(TabletMeta::kTabletReady); + + master_impl_->meta_tablet_ = + master_impl_->tablet_manager_->AddMetaTablet(tabletnodes_[0], master_impl_->zk_adapter_); +} + +TEST_F(MasterImplTest, DeleteTsNotEnterSafemode) { + InitTabletNodesAndTablets(); + FLAGS_tera_safemode_tablet_locality_ratio = 0.3; + master_impl_->DeleteTabletNode("127.0.0.1:1", "10000"); + EXPECT_EQ(master_impl_->GetMasterStatus(), kIsRunning); + EXPECT_EQ(master_impl_->tabletnode_manager_->reconnecting_ts_list_.size(), 1); + EXPECT_EQ(tablets_[0]->GetStatus(), TabletMeta::kTabletDelayOffline); + EXPECT_EQ(tablets_[1]->GetStatus(), TabletMeta::kTabletDelayOffline); +} + +TEST_F(MasterImplTest, DeleteTsEnterSafemode) { + InitTabletNodesAndTablets(); + FLAGS_tera_safemode_tablet_locality_ratio = 0.8; + master_impl_->DeleteTabletNode("127.0.0.1:1", "10000"); + EXPECT_EQ(master_impl_->GetMasterStatus(), kIsReadonly); + EXPECT_EQ(master_impl_->tabletnode_manager_->reconnecting_ts_list_.size(), 0); + EXPECT_EQ(tablets_[0]->GetStatus(), TabletMeta::kTabletOffline); + EXPECT_EQ(tablets_[1]->GetStatus(), TabletMeta::kTabletOffline); +} + +TEST_F(MasterImplTest, TabletNodeReconnect) { + InitTabletNodesAndTablets(); + FLAGS_tera_safemode_tablet_locality_ratio = 0.3; + master_impl_->DeleteTabletNode("127.0.0.1:2", "20000"); + EXPECT_EQ(master_impl_->tabletnode_manager_->reconnecting_ts_list_.size(), 1); + EXPECT_EQ(master_impl_->GetMasterStatus(), kIsRunning); + EXPECT_EQ(tablets_[2]->GetStatus(), TabletMeta::kTabletDelayOffline); + EXPECT_EQ(tablets_[3]->GetStatus(), TabletMeta::kTabletDelayOffline); + master_impl_->AddTabletNode("127.0.0.1:2", "20001"); // reconnect with new uuid + EXPECT_EQ(master_impl_->tabletnode_manager_->reconnecting_ts_list_.size(), 0); + EXPECT_EQ(tablets_[2]->GetStatus(), TabletMeta::kTabletOffline); + EXPECT_EQ(tablets_[3]->GetStatus(), TabletMeta::kTabletOffline); +} +} +} +} diff --git a/src/master/test/master_state_machine_test.cc b/src/master/test/master_state_machine_test.cc new file mode 100644 index 000000000..407a28565 --- /dev/null +++ b/src/master/test/master_state_machine_test.cc @@ -0,0 +1,53 @@ +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "master/master_state_machine.h" + +namespace tera { +namespace master { +namespace test { + +class MasterStateMachineTest : public ::testing::Test { + public: + MasterStateMachineTest() : state_machine_(kIsSecondary) {} + virtual ~MasterStateMachineTest() {} + + virtual void SetUp() {} + virtual void TearDown() {} + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + bool TransitFromState(const MasterStatus status, const MasterEvent event) { + state_machine_.curr_status_ = status; + return state_machine_.DoStateTransition(event); + } + + MasterStateMachine state_machine_; +}; + +TEST_F(MasterStateMachineTest, LegalTransition) { + EXPECT_TRUE(TransitFromState(kIsSecondary, MasterEvent::kGetMasterLock)); + EXPECT_EQ(state_machine_.GetState(), kOnRestore); + EXPECT_TRUE(TransitFromState(kOnRestore, MasterEvent::kNoAvailTs)); + EXPECT_EQ(state_machine_.GetState(), kOnWait); + EXPECT_TRUE(TransitFromState(kOnRestore, MasterEvent::kMetaRestored)); + EXPECT_EQ(state_machine_.GetState(), kIsReadonly); + EXPECT_TRUE(TransitFromState(kOnRestore, MasterEvent::kLostMasterLock)); + EXPECT_EQ(state_machine_.GetState(), kIsSecondary); + EXPECT_TRUE(TransitFromState(kOnWait, MasterEvent::kAvailTs)); + EXPECT_EQ(state_machine_.GetState(), kOnRestore); + EXPECT_TRUE(TransitFromState(kOnWait, MasterEvent::kLostMasterLock)); + EXPECT_EQ(state_machine_.GetState(), kIsSecondary); + EXPECT_TRUE(TransitFromState(kIsReadonly, MasterEvent::kLeaveSafemode)); + EXPECT_EQ(state_machine_.GetState(), kIsRunning); + EXPECT_TRUE(TransitFromState(kIsReadonly, MasterEvent::kLostMasterLock)); + EXPECT_EQ(state_machine_.GetState(), kIsSecondary); + EXPECT_TRUE(TransitFromState(kIsRunning, MasterEvent::kEnterSafemode)); + EXPECT_EQ(state_machine_.GetState(), kIsReadonly); + EXPECT_TRUE(TransitFromState(kIsReadonly, MasterEvent::kLostMasterLock)); + EXPECT_EQ(state_machine_.GetState(), kIsSecondary); +} +} +} +} diff --git a/src/master/test/master_test.cc b/src/master/test/master_test.cc index d0ecfb87f..0aff08333 100644 --- a/src/master/test/master_test.cc +++ b/src/master/test/master_test.cc @@ -11,12 +11,11 @@ DECLARE_string(tera_leveldb_env_type); int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); - tera::utils::SetupLog("master_test"); - FLAGS_tera_leveldb_env_type = "local"; - ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + tera::utils::SetupLog("master_test"); + FLAGS_tera_leveldb_env_type = "local"; + ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + return RUN_ALL_TESTS(); } - diff --git a/src/master/test/merge_tablet_procedure_test.cc b/src/master/test/merge_tablet_procedure_test.cc index 9e00de7d8..c66b5f452 100644 --- a/src/master/test/merge_tablet_procedure_test.cc +++ b/src/master/test/merge_tablet_procedure_test.cc @@ -23,238 +23,236 @@ namespace test { using leveldb::EnvOptions; class MergeTabletProcedureTest : public ::testing::Test { -public: - MergeTabletProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - ts_manager_(new TabletNodeManager(nullptr)), - proc_executor_(new ProcedureExecutor) {} - - virtual ~MergeTabletProcedureTest() {} + public: + MergeTabletProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + ts_manager_(new TabletNodeManager(nullptr)), + proc_executor_(new ProcedureExecutor) {} - virtual void SetUp() { - FLAGS_tera_leveldb_env_type.assign("local"); - FLAGS_tera_tabletnode_path_prefix.assign("./merge_tablet_procedure/"); - InitMasterEnv(); - TableSchema schema; - StatusCode ret_code; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, "test", "a", "b", "test/tablet00000001", "", TabletMeta::kTabletOffline, 10); - tablets_[0] = TabletManager::CreateTablet(table_, tablet_meta); - TabletManager::PackTabletMeta(&tablet_meta, "test", "b", "c", "test/tablet00000002", "", TabletMeta::kTabletOffline, 10); - tablets_[1] = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(tablets_[0]); - EXPECT_TRUE(tablets_[1]); - EXPECT_TRUE(table_->AddTablet(tablets_[0], &ret_code)); - EXPECT_TRUE(table_->AddTablet(tablets_[1], &ret_code)); + virtual ~MergeTabletProcedureTest() {} + virtual void SetUp() { + FLAGS_tera_leveldb_env_type.assign("local"); + FLAGS_tera_tabletnode_path_prefix.assign("./merge_tablet_procedure/"); + InitMasterEnv(); + TableSchema schema; + StatusCode ret_code; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "a", "b", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 10); + tablets_[0] = table_->AddTablet(tablet_meta, &ret_code); + TabletManager::PackTabletMeta(&tablet_meta, "test", "b", "c", "test/tablet00000002", "", + TabletMeta::kTabletOffline, 10); + tablets_[1] = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(tablets_[0]); + EXPECT_TRUE(tablets_[1]); + proc_executor_->running_ = true; -// EXPECT_TRUE(tablet_manager_->AddTablet("test", -// "a", "b", "test/tablet00000001", "", schema, 10, &tablets_[0], &ret_code)); -// EXPECT_TRUE(tablet_manager_->AddTablet("test", -// "b", "c", "test/tablet00000002", "", schema, 10, &tablets_[1], &ret_code)); - proc_executor_->running_ = true; - - const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); - std::string test_name(test_case->name()); - if (test_name.find("PostUnloadTabletsPhase") != std::string::npos) { - InitFileSystemForMerge(); - } - - } - virtual void TearDown() { - proc_executor_->running_ = false; + const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); + std::string test_name(test_case->name()); + if (test_name.find("PostUnloadTabletsPhase") != std::string::npos) { + InitFileSystemForMerge(); } + } + virtual void TearDown() { proc_executor_->running_ = false; } - static void SetUpTestCase() {} - static void TearDownTestCase() { - - } -private: - void InitMasterEnv() { - MasterEnv().Init(nullptr, ts_manager_, tablet_manager_, - nullptr, nullptr, std::shared_ptr(new ThreadPool), proc_executor_, - std::shared_ptr(new TabletAvailability(tablet_manager_)), nullptr); - // push one element to the queue, avoiding call TryMoveTablet while call SuspendMetaOperation - MasterEnv().meta_task_queue_.push(nullptr); - } + static void SetUpTestCase() {} + static void TearDownTestCase() {} - void InitFileSystemForMerge() { - fs_env_ = io::LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + tablets_[0]->GetTableName(); - EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); - std::string tablet1_path = FLAGS_tera_tabletnode_path_prefix + tablets_[0]->GetPath(); - EXPECT_TRUE(fs_env_->CreateDir(tablet1_path).ok()); - std::string tablet2_path = FLAGS_tera_tabletnode_path_prefix + tablets_[1]->GetPath(); - EXPECT_TRUE(fs_env_->CreateDir(tablet2_path).ok()); - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - } + private: + void InitMasterEnv() { + MasterEnv().Init(nullptr, ts_manager_, tablet_manager_, access_builder_, nullptr, nullptr, + nullptr, std::shared_ptr(new ThreadPool), proc_executor_, + std::shared_ptr(new TabletAvailability(tablet_manager_)), + nullptr); + // push one element to the queue, avoiding call TryMoveTablet while call + // SuspendMetaOperation + MasterEnv().meta_task_queue_.push(nullptr); + } -private: - TablePtr table_; - TabletPtr tablets_[2]; - std::shared_ptr merge_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr ts_manager_; - std::shared_ptr proc_executor_; - leveldb::Env* fs_env_; + void InitFileSystemForMerge() { + fs_env_ = io::LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + tablets_[0]->GetTableName(); + EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); + std::string tablet1_path = FLAGS_tera_tabletnode_path_prefix + tablets_[0]->GetPath(); + EXPECT_TRUE(fs_env_->CreateDir(tablet1_path).ok()); + std::string tablet2_path = FLAGS_tera_tabletnode_path_prefix + tablets_[1]->GetPath(); + EXPECT_TRUE(fs_env_->CreateDir(tablet2_path).ok()); + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + } + + private: + TablePtr table_; + TabletPtr tablets_[2]; + std::shared_ptr merge_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr ts_manager_; + std::shared_ptr proc_executor_; + leveldb::Env* fs_env_; }; TEST_F(MergeTabletProcedureTest, MergeTabletProcedureInit) { - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUnLoadTablets); - merge_proc_.reset(new MergeTabletProcedure(tablets_[1], tablets_[0], MasterEnv().GetThreadPool().get())); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUnLoadTablets); - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, "test", "ab", "c", "test/tablet00000001", "", TabletMeta::kTabletOffline, 10); - StatusCode ret_code; - TabletPtr tablet = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(table_->AddTablet(tablet, &ret_code)); - tablet->SetStatus(TabletMeta::kTabletReady); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablet, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); - std::cout << merge_proc_->ProcId() << std::endl; + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUnLoadTablets); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[1], tablets_[0], MasterEnv().GetThreadPool().get())); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUnLoadTablets); + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "ab", "c", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 10); + StatusCode ret_code; + TabletPtr tablet = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(tablet); + tablet->SetStatus(TabletMeta::kTabletReady); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablet, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); + std::cout << merge_proc_->ProcId() << std::endl; } TEST_F(MergeTabletProcedureTest, UpdateMeta) { - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - tablets_[0]->AssignTabletNode(node); - tablets_[1]->AssignTabletNode(node); - EXPECT_FALSE(merge_proc_->merged_); - merge_proc_->UpdateMeta(); - EXPECT_TRUE(merge_proc_->merged_); - EXPECT_FALSE(merge_proc_->merged_->InTransition()); - EXPECT_EQ(merge_proc_->merged_->GetKeyStart(), tablets_[0]->GetKeyStart()); - EXPECT_EQ(merge_proc_->merged_->GetKeyEnd(), tablets_[1]->GetKeyEnd()); + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + tablets_[0]->AssignTabletNode(node); + tablets_[1]->AssignTabletNode(node); + EXPECT_FALSE(merge_proc_->merged_); + merge_proc_->UpdateMeta(); + EXPECT_TRUE(merge_proc_->merged_); + EXPECT_FALSE(merge_proc_->merged_->InTransition()); + EXPECT_EQ(merge_proc_->merged_->GetKeyStart(), tablets_[0]->GetKeyStart()); + EXPECT_EQ(merge_proc_->merged_->GetKeyEnd(), tablets_[1]->GetKeyEnd()); - merge_proc_.reset(new MergeTabletProcedure(tablets_[1], tablets_[0], MasterEnv().GetThreadPool().get())); - merge_proc_->UpdateMeta(); - EXPECT_EQ(merge_proc_->merged_->GetKeyStart(), tablets_[0]->GetKeyStart()); - EXPECT_EQ(merge_proc_->merged_->GetKeyEnd(), tablets_[1]->GetKeyEnd()); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[1], tablets_[0], MasterEnv().GetThreadPool().get())); + merge_proc_->UpdateMeta(); + EXPECT_EQ(merge_proc_->merged_->GetKeyStart(), tablets_[0]->GetKeyStart()); + EXPECT_EQ(merge_proc_->merged_->GetKeyEnd(), tablets_[1]->GetKeyEnd()); - merge_proc_->MergeUpdateMetaDone(true); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kLoadMergedTablet); + merge_proc_->MergeUpdateMetaDone(true); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kLoadMergedTablet); } TEST_F(MergeTabletProcedureTest, UnloadTabletsPhase) { - TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - node->SetState(kReady, NULL); - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - tablets_[0]->AssignTabletNode(node); - tablets_[1]->AssignTabletNode(node); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - EXPECT_FALSE(merge_proc_->unload_procs_[0]); - EXPECT_FALSE(merge_proc_->unload_procs_[1]); + TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + node->state_ = kReady; + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + tablets_[0]->AssignTabletNode(node); + tablets_[1]->AssignTabletNode(node); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + EXPECT_FALSE(merge_proc_->unload_procs_[0]); + EXPECT_FALSE(merge_proc_->unload_procs_[1]); - merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); - EXPECT_TRUE(merge_proc_->unload_procs_[0]); - EXPECT_TRUE(merge_proc_->unload_procs_[1]); - EXPECT_FALSE(merge_proc_->unload_procs_[0]->Done()); - EXPECT_FALSE(merge_proc_->unload_procs_[1]->Done()); - EXPECT_EQ(proc_executor_->procedures_.size(), 2); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, merge_proc_->unload_procs_[0]); - EXPECT_EQ(proc_executor_->procedures_[2]->proc_, merge_proc_->unload_procs_[1]); - std::shared_ptr unload_proc_0 = - std::dynamic_pointer_cast(merge_proc_->unload_procs_[0]); - std::shared_ptr unload_proc_1 = - std::dynamic_pointer_cast(merge_proc_->unload_procs_[1]); - unload_proc_0->done_ = true; - merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); - unload_proc_1->done_ = true; - merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); - - tablets_[0]->SetStatus(TabletMeta::kTabletOffline); - tablets_[1]->SetStatus(TabletMeta::kTabletOffline); - merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kPostUnLoadTablets); + merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); + EXPECT_TRUE(merge_proc_->unload_procs_[0]); + EXPECT_TRUE(merge_proc_->unload_procs_[1]); + EXPECT_FALSE(merge_proc_->unload_procs_[0]->Done()); + EXPECT_FALSE(merge_proc_->unload_procs_[1]->Done()); + EXPECT_EQ(proc_executor_->procedures_.size(), 2); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, merge_proc_->unload_procs_[0]); + EXPECT_EQ(proc_executor_->procedures_[2]->proc_, merge_proc_->unload_procs_[1]); + std::shared_ptr unload_proc_0 = + std::dynamic_pointer_cast(merge_proc_->unload_procs_[0]); + std::shared_ptr unload_proc_1 = + std::dynamic_pointer_cast(merge_proc_->unload_procs_[1]); + unload_proc_0->done_ = true; + merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); + unload_proc_1->done_ = true; + merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); + tablets_[0]->SetStatus(TabletMeta::kTabletOffline); + tablets_[1]->SetStatus(TabletMeta::kTabletOffline); + merge_proc_->UnloadTabletsPhaseHandler(MergeTabletPhase::kUnLoadTablets); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kPostUnLoadTablets); } TEST_F(MergeTabletProcedureTest, LoadMergedTabletPhase) { - TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - node->SetState(kReady, NULL); - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - tablets_[0]->AssignTabletNode(node); - tablets_[1]->AssignTabletNode(node); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - merge_proc_->UpdateMeta(); - EXPECT_TRUE(merge_proc_->merged_); - EXPECT_FALSE(merge_proc_->load_proc_); + TabletNodePtr node = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + node->state_ = kReady; + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + tablets_[0]->AssignTabletNode(node); + tablets_[1]->AssignTabletNode(node); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + merge_proc_->UpdateMeta(); + EXPECT_TRUE(merge_proc_->merged_); + EXPECT_FALSE(merge_proc_->load_proc_); - merge_proc_->LoadMergedTabletPhaseHandler(MergeTabletPhase::kLoadMergedTablet); - EXPECT_TRUE(merge_proc_->load_proc_); - EXPECT_EQ(merge_proc_->load_proc_, proc_executor_->procedures_[1]->proc_); - std::shared_ptr load_proc = - std::dynamic_pointer_cast(merge_proc_->load_proc_); - EXPECT_FALSE(merge_proc_->load_proc_->Done()); - EXPECT_EQ(merge_proc_->load_proc_, proc_executor_->procedures_[1]->proc_); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - load_proc->done_ = true; - merge_proc_->LoadMergedTabletPhaseHandler(MergeTabletPhase::kLoadMergedTablet); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); + merge_proc_->LoadMergedTabletPhaseHandler(MergeTabletPhase::kLoadMergedTablet); + EXPECT_TRUE(merge_proc_->load_proc_); + EXPECT_EQ(merge_proc_->load_proc_, proc_executor_->procedures_[1]->proc_); + std::shared_ptr load_proc = + std::dynamic_pointer_cast(merge_proc_->load_proc_); + EXPECT_FALSE(merge_proc_->load_proc_->Done()); + EXPECT_EQ(merge_proc_->load_proc_, proc_executor_->procedures_[1]->proc_); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + load_proc->done_ = true; + merge_proc_->LoadMergedTabletPhaseHandler(MergeTabletPhase::kLoadMergedTablet); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kEofPhase); } TEST_F(MergeTabletProcedureTest, EOFPhaseHandler) { - tablets_[0]->SetStatus(TabletMeta::kTabletReady); - tablets_[1]->SetStatus(TabletMeta::kTabletReady); - merge_proc_.reset(new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); - //TableSchema schema; - StatusCode ret_code; - TabletMeta meta; - meta.set_table_name("test"); - meta.set_path("test/tablet00000003"); - meta.mutable_key_range()->set_key_start("a"); - meta.mutable_key_range()->set_key_end("c"); - merge_proc_->merged_.reset(new Tablet(meta, tablets_[0]->GetTable())); - //EXPECT_TRUE(tablet_manager_->AddTablet("test", - // "a", "c", "test/tablet00000003", "", schema, 1, &merge_proc_->merged_, &ret_code)); - EXPECT_FALSE(merge_proc_->merged_->InTransition()); - merge_proc_->merged_->LockTransition(); - EXPECT_TRUE(merge_proc_->merged_->InTransition()); - EXPECT_FALSE(merge_proc_->done_); - merge_proc_->EOFPhaseHandler(MergeTabletPhase::kEofPhase); - EXPECT_TRUE(merge_proc_->merged_->InTransition()); - EXPECT_TRUE(merge_proc_->done_); + tablets_[0]->SetStatus(TabletMeta::kTabletReady); + tablets_[1]->SetStatus(TabletMeta::kTabletReady); + merge_proc_.reset( + new MergeTabletProcedure(tablets_[0], tablets_[1], MasterEnv().GetThreadPool().get())); + // TableSchema schema; + StatusCode ret_code; + TabletMeta meta; + meta.set_table_name("test"); + meta.set_path("test/tablet00000003"); + meta.mutable_key_range()->set_key_start("a"); + meta.mutable_key_range()->set_key_end("c"); + merge_proc_->merged_.reset(new Tablet(meta, tablets_[0]->GetTable())); + EXPECT_FALSE(merge_proc_->merged_->InTransition()); + merge_proc_->merged_->LockTransition(); + EXPECT_TRUE(merge_proc_->merged_->InTransition()); + EXPECT_FALSE(merge_proc_->done_); + merge_proc_->EOFPhaseHandler(MergeTabletPhase::kEofPhase); + EXPECT_TRUE(merge_proc_->merged_->InTransition()); + EXPECT_TRUE(merge_proc_->done_); } TEST_F(MergeTabletProcedureTest, PostUnloadTabletsPhaseCheck_Ok) { - - merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUpdateMeta); + merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kUpdateMeta); } TEST_F(MergeTabletProcedureTest, PostUnloadTabletsPhaseCheck_FirstTabletNotOk) { - std::string log_path =FLAGS_tera_tabletnode_path_prefix + - "/" + leveldb::LogHexFileName(tablets_[0]->GetPath(), 123); - leveldb::WritableFile* log_file; - EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); - delete log_file; - merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kFaultRecover); + std::string log_path = FLAGS_tera_tabletnode_path_prefix + "/" + + leveldb::LogHexFileName(tablets_[0]->GetPath(), 123); + leveldb::WritableFile* log_file; + EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); + delete log_file; + merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kFaultRecover); } TEST_F(MergeTabletProcedureTest, PostUnloadTabletsPhaseCheck_SecondTabletNotOk) { - std::string log_path =FLAGS_tera_tabletnode_path_prefix + - "/" + leveldb::LogHexFileName(tablets_[1]->GetPath(), 123); - leveldb::WritableFile* log_file; - EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); - delete log_file; - merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); - EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kFaultRecover); + std::string log_path = FLAGS_tera_tabletnode_path_prefix + "/" + + leveldb::LogHexFileName(tablets_[1]->GetPath(), 123); + leveldb::WritableFile* log_file; + EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); + delete log_file; + merge_proc_->PostUnloadTabletsPhaseHandler(MergeTabletPhase::kPostUnLoadTablets); + EXPECT_EQ(merge_proc_->phases_.back(), MergeTabletPhase::kFaultRecover); } - - } } } diff --git a/src/master/test/mock_master_zk_adapter.h b/src/master/test/mock_master_zk_adapter.h new file mode 100644 index 000000000..ffdd76f16 --- /dev/null +++ b/src/master/test/mock_master_zk_adapter.h @@ -0,0 +1,16 @@ +#include "master/master_zk_adapter.h" + +namespace tera { +namespace master { +namespace test { + +class TestZkAdapter : public MasterZkAdapter { + public: + TestZkAdapter() : MasterZkAdapter(nullptr, std::string("")) {} + virtual bool UpdateRootTabletNode(const std::string& addr) { return true; } + + bool MarkSafeMode() { return true; } +}; +} +} +} diff --git a/src/master/test/move_tablet_procedure_test.cc b/src/master/test/move_tablet_procedure_test.cc index 2e25f2a35..592614db0 100644 --- a/src/master/test/move_tablet_procedure_test.cc +++ b/src/master/test/move_tablet_procedure_test.cc @@ -14,140 +14,156 @@ namespace tera { namespace master { namespace test { class MoveTabletProcedureTest : public ::testing::Test { -public: - MoveTabletProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - ts_manager_(new TabletNodeManager(new MasterImpl)), - proc_executor_(new ProcedureExecutor), - tablet_availability_(new TabletAvailability(tablet_manager_)) {} - virtual ~MoveTabletProcedureTest() {} - - virtual void SetUp() { - InitMasterEnv(); - TableSchema schema; - StatusCode ret_code; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, "test", "", "", "test/tablet00000001", "", TabletMeta::kTabletOffline, 0); - tablet_ = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(tablet_); - EXPECT_TRUE(table_->AddTablet(tablet_, &ret_code)); - //EXPECT_TRUE(tablet_manager_->AddTablet("test", "", "", "test/tablet00000001", "", schema, 0, &tablet_, &ret_code)); - src_node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - EXPECT_EQ(src_node_->GetState(), kReady); - dest_node_= ts_manager_->AddTabletNode("127.0.0.2:2000", "1234568"); - EXPECT_EQ(dest_node_->GetState(), kReady); - // pretend proc_executor is running - proc_executor_->running_ = true; - } - virtual void TearDown() { - proc_executor_->running_ = false; - } - - static void SetUpTestCase() {} - static void TearDownTestCase() {} -private: - void InitMasterEnv(); - -private: - TablePtr table_; - TabletPtr tablet_; - TabletNodePtr src_node_; - TabletNodePtr dest_node_; - std::shared_ptr move_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr ts_manager_; - std::shared_ptr tablet_availability_; - std::shared_ptr proc_executor_; + public: + MoveTabletProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + ts_manager_(new TabletNodeManager(new MasterImpl(nullptr, nullptr))), + proc_executor_(new ProcedureExecutor), + tablet_availability_(new TabletAvailability(tablet_manager_)) {} + virtual ~MoveTabletProcedureTest() {} + + virtual void SetUp() { + InitMasterEnv(); + TableSchema schema; + StatusCode ret_code; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "", "", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 0); + tablet_ = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(tablet_); + src_node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + EXPECT_EQ(src_node_->GetState(), kReady); + dest_node_ = ts_manager_->AddTabletNode("127.0.0.2:2000", "1234568"); + EXPECT_EQ(dest_node_->GetState(), kReady); + // pretend proc_executor is running + proc_executor_->running_ = true; + } + virtual void TearDown() { proc_executor_->running_ = false; } + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + void InitMasterEnv(); + + private: + TablePtr table_; + TabletPtr tablet_; + TabletNodePtr src_node_; + TabletNodePtr dest_node_; + std::shared_ptr move_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr ts_manager_; + std::shared_ptr tablet_availability_; + std::shared_ptr proc_executor_; }; void MoveTabletProcedureTest::InitMasterEnv() { - MasterEnv().Init(new MasterImpl, ts_manager_, tablet_manager_, - std::shared_ptr(new SizeScheduler), nullptr, - std::shared_ptr(new ThreadPool), proc_executor_, tablet_availability_, nullptr); + MasterEnv().Init(new MasterImpl(nullptr, nullptr), ts_manager_, tablet_manager_, access_builder_, + nullptr, std::shared_ptr(new SizeScheduler), nullptr, + std::shared_ptr(new ThreadPool), proc_executor_, + tablet_availability_, nullptr); } TEST_F(MoveTabletProcedureTest, MoveProcedureInit) { - tablet_->SetStatus(TabletMeta::kTabletReady); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kUnLoadTablet); - tablet_->SetStatus(TabletMeta::kTabletOffline); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); - tablet_->SetStatus(TabletMeta::kTabletLoadFail); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); - - tablet_->SetStatus(TabletMeta::kTabletUnloading); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); - std::cout << move_proc_->ProcId(); - std::cout << MoveTabletPhase::kUnLoadTablet; - + tablet_->SetStatus(TabletMeta::kTabletReady); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kUnLoadTablet); + tablet_->SetStatus(TabletMeta::kTabletOffline); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); + tablet_->SetStatus(TabletMeta::kTabletLoadFail); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); + + tablet_->SetStatus(TabletMeta::kTabletUnloading); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); + std::cout << move_proc_->ProcId(); + std::cout << MoveTabletPhase::kUnLoadTablet; + + tablet_->SetStatus(TabletMeta::kTabletSplitted); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); + tablet_->SetStatus(TabletMeta::kTabletMerged); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); } TEST_F(MoveTabletProcedureTest, UnLoadTabletPhaseHandler) { - tablet_->SetStatus(TabletMeta::kTabletReady); - tablet_->AssignTabletNode(src_node_); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_FALSE(move_proc_->unload_proc_); - move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); - EXPECT_TRUE(move_proc_->unload_proc_); - std::shared_ptr unload_proc = - std::dynamic_pointer_cast(move_proc_->unload_proc_); - EXPECT_FALSE(move_proc_->unload_proc_->Done()); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->unload_proc_); - - move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); - EXPECT_FALSE(move_proc_->unload_proc_->Done()); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kUnLoadTablet); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->unload_proc_); - - unload_proc->done_ = true; - move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); - // as the procedure_executor is not really running, tablet_ status will not change - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); - - tablet_->SetStatus(TabletMeta::kTabletOffline); - move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); + tablet_->SetStatus(TabletMeta::kTabletReady); + tablet_->AssignTabletNode(src_node_); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_FALSE(move_proc_->unload_proc_); + move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); + EXPECT_TRUE(move_proc_->unload_proc_); + std::shared_ptr unload_proc = + std::dynamic_pointer_cast(move_proc_->unload_proc_); + EXPECT_FALSE(move_proc_->unload_proc_->Done()); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->unload_proc_); + + move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); + EXPECT_FALSE(move_proc_->unload_proc_->Done()); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kUnLoadTablet); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->unload_proc_); + + unload_proc->done_ = true; + move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); + // as the procedure_executor is not really running, tablet_ status will not + // change + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); + + tablet_->SetStatus(TabletMeta::kTabletOffline); + move_proc_->UnloadTabletPhaseHandler(MoveTabletPhase::kUnLoadTablet); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kLoadTablet); } TEST_F(MoveTabletProcedureTest, LoadTabletPhaseHandler) { - tablet_->SetStatus(TabletMeta::kTabletOffline); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); - EXPECT_FALSE(move_proc_->load_proc_); - move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); - EXPECT_TRUE(move_proc_->load_proc_); - std::shared_ptr load_proc = - std::dynamic_pointer_cast(move_proc_->load_proc_); - EXPECT_FALSE(move_proc_->load_proc_->Done()); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->load_proc_); - - move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); - EXPECT_FALSE(move_proc_->load_proc_->Done()); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->load_proc_); - - load_proc->done_ = true; - move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); - EXPECT_TRUE(move_proc_->load_proc_->Done()); - EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); + tablet_->SetStatus(TabletMeta::kTabletOffline); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, nullptr, MasterEnv().GetThreadPool().get())); + EXPECT_FALSE(move_proc_->load_proc_); + move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); + EXPECT_TRUE(move_proc_->load_proc_); + std::shared_ptr load_proc = + std::dynamic_pointer_cast(move_proc_->load_proc_); + EXPECT_FALSE(move_proc_->load_proc_->Done()); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->load_proc_); + + move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); + EXPECT_FALSE(move_proc_->load_proc_->Done()); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, move_proc_->load_proc_); + + load_proc->done_ = true; + move_proc_->LoadTabletPhaseHandler(MoveTabletPhase::kLoadTablet); + EXPECT_TRUE(move_proc_->load_proc_->Done()); + EXPECT_EQ(move_proc_->phases_.back(), MoveTabletPhase::kEofPhase); } TEST_F(MoveTabletProcedureTest, EofPhaseHandler) { - EXPECT_EQ(dest_node_->plan_move_in_count_, 0); - move_proc_ = std::shared_ptr(new MoveTabletProcedure(tablet_, dest_node_, MasterEnv().GetThreadPool().get())); - EXPECT_EQ(dest_node_->plan_move_in_count_, 1); - move_proc_->EOFPhaseHandler(MoveTabletPhase::kEofPhase); - EXPECT_EQ(dest_node_->plan_move_in_count_, 0); + EXPECT_EQ(dest_node_->plan_move_in_count_, 0); + move_proc_ = std::shared_ptr( + new MoveTabletProcedure(tablet_, dest_node_, MasterEnv().GetThreadPool().get())); + EXPECT_EQ(dest_node_->plan_move_in_count_, 1); + move_proc_->EOFPhaseHandler(MoveTabletPhase::kEofPhase); + EXPECT_EQ(dest_node_->plan_move_in_count_, 0); } - } } } diff --git a/src/master/test/procedure_executor_test.cc b/src/master/test/procedure_executor_test.cc index 74dae15e0..0ed6eab79 100644 --- a/src/master/test/procedure_executor_test.cc +++ b/src/master/test/procedure_executor_test.cc @@ -7,76 +7,76 @@ namespace master { namespace test { class ProcedureExecutorTest : public ::testing::Test { -public: - ProcedureExecutorTest() : executor_(new ProcedureExecutor){} - virtual ~ProcedureExecutorTest() {} + public: + ProcedureExecutorTest() : executor_(new ProcedureExecutor) {} + virtual ~ProcedureExecutorTest() {} - virtual void SetUp() {} - virtual void TearDown() {} + virtual void SetUp() {} + virtual void TearDown() {} - static void SetUpTestCase() {} - static void TearDownTestCase() {} + static void SetUpTestCase() {} + static void TearDownTestCase() {} -private: - std::shared_ptr executor_; + private: + std::shared_ptr executor_; }; class TestProcedure : public Procedure { -public: - TestProcedure(std::string id) : id_(id), done_(false) {} - virtual ~TestProcedure() {} - std::string ProcId() const {return id_;} - void RunNextStage() { - std::cout << "id: " << id_ << std::endl; - usleep(1000); - done_ = true; - } - bool Done() {return done_;} - - std::string id_; - bool done_; + public: + TestProcedure(std::string id) : id_(id), done_(false) {} + virtual ~TestProcedure() {} + std::string ProcId() const { return id_; } + void RunNextStage() { + std::cout << "id: " << id_ << std::endl; + usleep(1000); + done_ = true; + } + bool Done() { return done_; } + + std::string id_; + bool done_; }; TEST_F(ProcedureExecutorTest, StartStopProcedureExecutor) { - EXPECT_FALSE(executor_->running_); - EXPECT_TRUE(executor_->Start()); - EXPECT_TRUE(executor_->running_); - EXPECT_FALSE(executor_->Start()); - EXPECT_TRUE(executor_->running_); - executor_->Stop(); - EXPECT_FALSE(executor_->running_); - executor_->Stop(); + EXPECT_FALSE(executor_->running_); + EXPECT_TRUE(executor_->Start()); + EXPECT_TRUE(executor_->running_); + EXPECT_FALSE(executor_->Start()); + EXPECT_TRUE(executor_->running_); + executor_->Stop(); + EXPECT_FALSE(executor_->running_); + executor_->Stop(); } TEST_F(ProcedureExecutorTest, AddRemoveProcedures) { - std::shared_ptr proc(new TestProcedure("TestProcedure1")); - EXPECT_EQ(executor_->AddProcedure(proc), 0); - // pretend Procedureexecutor_->is running by setting memeber field running_ to true - executor_->running_ = true; - EXPECT_GE(executor_->AddProcedure(proc), 0); - EXPECT_EQ(executor_->procedures_.size(), 1); - // add again, wil return 0 - EXPECT_EQ(executor_->AddProcedure(proc), 0); - EXPECT_EQ(executor_->procedures_.size(), 1); - EXPECT_TRUE(executor_->RemoveProcedure(proc->ProcId())); - EXPECT_EQ(executor_->procedures_.size(), 0); - EXPECT_FALSE(executor_->RemoveProcedure(proc->ProcId())); - EXPECT_EQ(executor_->procedures_.size(), 0); - executor_->running_ = false; + std::shared_ptr proc(new TestProcedure("TestProcedure1")); + EXPECT_EQ(executor_->AddProcedure(proc), 0); + // pretend Procedureexecutor_->is running by setting memeber field running_ to + // true + executor_->running_ = true; + EXPECT_GE(executor_->AddProcedure(proc), 0); + EXPECT_EQ(executor_->procedures_.size(), 1); + // add again, wil return 0 + EXPECT_EQ(executor_->AddProcedure(proc), 0); + EXPECT_EQ(executor_->procedures_.size(), 1); + EXPECT_TRUE(executor_->RemoveProcedure(proc->ProcId())); + EXPECT_EQ(executor_->procedures_.size(), 0); + EXPECT_FALSE(executor_->RemoveProcedure(proc->ProcId())); + EXPECT_EQ(executor_->procedures_.size(), 0); + executor_->running_ = false; } TEST_F(ProcedureExecutorTest, ScheduleProcedures) { - std::shared_ptr proc1(new TestProcedure("TestProcedure1")); - executor_->Start(); - EXPECT_TRUE(executor_->running_); - EXPECT_FALSE(proc1->Done()); - executor_->AddProcedure(proc1); - EXPECT_EQ(executor_->procedures_.size(), 1); - usleep(50 * 1000); - EXPECT_TRUE(proc1->Done()); - EXPECT_TRUE(executor_->procedures_.empty()); + std::shared_ptr proc1(new TestProcedure("TestProcedure1")); + executor_->Start(); + EXPECT_TRUE(executor_->running_); + EXPECT_FALSE(proc1->Done()); + executor_->AddProcedure(proc1); + EXPECT_EQ(executor_->procedures_.size(), 1); + usleep(50 * 1000); + EXPECT_TRUE(proc1->Done()); + EXPECT_TRUE(executor_->procedures_.empty()); } - } } } diff --git a/src/master/test/procedure_limiter_test.cc b/src/master/test/procedure_limiter_test.cc new file mode 100644 index 000000000..db01c58b4 --- /dev/null +++ b/src/master/test/procedure_limiter_test.cc @@ -0,0 +1,212 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +#include "master/procedure_limiter.h" + +namespace tera { +namespace master { +namespace test { + +class ProcedureLimiterTest : public ::testing::Test { + public: + ProcedureLimiterTest() {} + virtual ~ProcedureLimiterTest() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} +}; + +class Worker { + public: + Worker(const ProcedureLimiter::LockType& type, size_t i) + : type_(type), index_(i), finished_(false) {} + + Worker(const Worker& w) : type_(w.type_), index_(w.index_), finished_(w.finished_) {} + + Worker(Worker&& w) : type_(w.type_), index_(w.index_), finished_(w.finished_) {} + + void Work() { + { + std::unique_lock lk(mutex_); + cv_.wait(lk, [this] { return this->finished_ == true; }); + } + ProcedureLimiter::Instance().ReleaseLock(type_); + } + + void Start() { ProcedureLimiter::Instance().GetLock(type_); } + + void Finish() { + { + std::lock_guard guard(mutex_); + finished_ = true; + } + cv_.notify_one(); + } + + private: + ProcedureLimiter::LockType type_; + std::condition_variable cv_; + std::mutex mutex_; + bool finished_; + size_t index_; +}; + +TEST_F(ProcedureLimiterTest, NoLimitTypeTest) { + for (size_t i = 0; i < 100; ++i) { + ASSERT_TRUE(ProcedureLimiter::Instance().GetLock(ProcedureLimiter::LockType::kNoLimit)); + } +} + +TEST_F(ProcedureLimiterTest, LargeConcurrencyTest) { + const ProcedureLimiter::LockType type = ProcedureLimiter::LockType::kMerge; + const uint32_t lock_num = 20; + ProcedureLimiter::Instance().SetLockLimit(type, lock_num); + ASSERT_EQ(lock_num, ProcedureLimiter::Instance().GetLockLimit(type)); + + // The reserve() is very important, vector may call class constructor when copacity grow, it may + // have side effect!!! + std::vector workers; + workers.reserve(lock_num); + std::vector threads; + threads.reserve(lock_num); + + for (size_t i = 0; i < lock_num; ++i) { + workers.emplace_back(type, i); + workers[i].Start(); + threads.emplace_back(&Worker::Work, &workers[i]); + } + + // wait for the cv_ to enter waiting state + std::this_thread::sleep_for(std::chrono::seconds(1)); + + ASSERT_EQ(lock_num, ProcedureLimiter::Instance().GetLockInUse(type)); + ASSERT_FALSE(ProcedureLimiter::Instance().GetLock(type)); + + for (size_t i = 0; i < lock_num; ++i) { + workers[i].Finish(); + threads[i].join(); + } + + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(type)); +} + +TEST_F(ProcedureLimiterTest, MultiTypeTest) { + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kMerge, 10); + ProcedureLimiter::Instance().SetLockLimit(ProcedureLimiter::LockType::kSplit, 10); + + Worker worker1(ProcedureLimiter::LockType::kMerge, 1); + worker1.Start(); + std::thread work_thread_1(&Worker::Work, &worker1); + + Worker worker2(ProcedureLimiter::LockType::kMerge, 2); + worker2.Start(); + std::thread work_thread_2(&Worker::Work, &worker2); + + Worker worker3(ProcedureLimiter::LockType::kSplit, 3); + worker3.Start(); + std::thread work_thread_3(&Worker::Work, &worker3); + + // wait for the cv_ to enter waiting state + std::this_thread::sleep_for(std::chrono::seconds(1)); + + ASSERT_EQ(2, ProcedureLimiter::Instance().GetLockInUse(ProcedureLimiter::LockType::kMerge)); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(ProcedureLimiter::LockType::kSplit)); + + worker1.Finish(); + work_thread_1.join(); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(ProcedureLimiter::LockType::kMerge)); + + worker2.Finish(); + work_thread_2.join(); + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(ProcedureLimiter::LockType::kMerge)); + + worker3.Finish(); + work_thread_3.join(); + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(ProcedureLimiter::LockType::kSplit)); +} + +TEST_F(ProcedureLimiterTest, IncLimitTest) { + ProcedureLimiter::LockType type = ProcedureLimiter::LockType::kMerge; + ProcedureLimiter::Instance().SetLockLimit(type, 1); + + Worker worker1(type, 1); + worker1.Start(); + std::thread work_thread_1(&Worker::Work, &worker1); + + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(type)); + ASSERT_FALSE(ProcedureLimiter::Instance().GetLock(type)); + + ProcedureLimiter::Instance().SetLockLimit(type, 2); + + Worker worker2(type, 2); + worker2.Start(); + std::thread work_thread_2(&Worker::Work, &worker2); + + ASSERT_EQ(2, ProcedureLimiter::Instance().GetLockInUse(type)); + ASSERT_FALSE(ProcedureLimiter::Instance().GetLock(type)); + + // wait for the cv_ to enter waiting state + std::this_thread::sleep_for(std::chrono::seconds(1)); + + worker1.Finish(); + work_thread_1.join(); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(type)); + + worker2.Finish(); + work_thread_2.join(); + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(type)); +} + +TEST_F(ProcedureLimiterTest, DecLimitTest) { + ProcedureLimiter::LockType type = ProcedureLimiter::LockType::kMerge; + ProcedureLimiter::Instance().SetLockLimit(type, 2); + + Worker worker1(type, 1); + worker1.Start(); + std::thread work_thread_1(&Worker::Work, &worker1); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(type)); + + Worker worker2(type, 2); + worker2.Start(); + std::thread work_thread_2(&Worker::Work, &worker2); + ASSERT_EQ(2, ProcedureLimiter::Instance().GetLockInUse(type)); + + ASSERT_FALSE(ProcedureLimiter::Instance().GetLock(type)); + + ProcedureLimiter::Instance().SetLockLimit(type, 1); + + // wait for the cv_ to enter waiting state + std::this_thread::sleep_for(std::chrono::seconds(1)); + + worker1.Finish(); + work_thread_1.join(); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(type)); + + ASSERT_FALSE(ProcedureLimiter::Instance().GetLock(type)); + + worker2.Finish(); + work_thread_2.join(); + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(type)); + + ASSERT_TRUE(ProcedureLimiter::Instance().GetLock(type)); + ASSERT_EQ(1, ProcedureLimiter::Instance().GetLockInUse(type)); + ProcedureLimiter::Instance().ReleaseLock(type); + ASSERT_EQ(0, ProcedureLimiter::Instance().GetLockInUse(type)); +} + +} // namespace test +} // namespace master +} // namespace tera diff --git a/src/master/test/set_quota_procedure_test.cc b/src/master/test/set_quota_procedure_test.cc new file mode 100644 index 000000000..63b850d1f --- /dev/null +++ b/src/master/test/set_quota_procedure_test.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "proto/quota.pb.h" +#include "quota/master_quota_entry.h" +#include "master/set_quota_procedure.h" +#include "master/master_env.h" +#include +#include +#include "quota/helpers/quota_utils.h" + +namespace tera { +namespace master { +namespace test { + +static const std::string table_name{"test"}; + +class MockClosure : public google::protobuf::Closure { + public: + virtual void Run() { return; } +}; + +class SetQuotaProcedureTest : public ::testing::Test { + public: + SetQuotaProcedureTest() + : quota_entry_(new quota::MasterQuotaEntry()), + request_(new SetQuotaRequest), + response_(new SetQuotaResponse), + proc_executor_(new ProcedureExecutor) {} + virtual ~SetQuotaProcedureTest() {} + + virtual void SetUp() { + InitMasterEnv(); + + TableQuota* table_quota = request_->mutable_table_quota(); + table_quota->set_table_name(table_name); + QuotaInfo* quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaWriteReqs); + quota_info->set_limit(100); + quota_info->set_period(2); + + std::unique_ptr meta_write_record( + quota::MasterQuotaHelper::NewMetaRecordFromQuota(*table_quota)); + set_proc_.reset(new SetQuotaProcedure(request_.get(), response_.get(), closure_.get(), + MasterEnv().GetThreadPool().get(), quota_entry_, + meta_write_record)); + } + + virtual void TearDown() { proc_executor_->running_ = false; } + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + void InitMasterEnv() { + MasterEnv().Init(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + std::shared_ptr(new ThreadPool), proc_executor_, nullptr, nullptr); + } + + private: + std::shared_ptr quota_entry_; + std::unique_ptr request_; + std::unique_ptr response_; + std::unique_ptr closure_; + std::shared_ptr set_proc_; + std::shared_ptr proc_executor_; +}; + +TEST_F(SetQuotaProcedureTest, CheckSetQuotaProcedure) { + EXPECT_EQ(set_proc_->ProcId(), "SetQuota:" + table_name); + std::string m_table_name = + quota::MasterQuotaHelper::GetTableNameFromMetaKey(set_proc_->meta_write_record_->key); + EXPECT_TRUE(!m_table_name.empty()); + EXPECT_EQ(m_table_name, table_name); +} + +TEST_F(SetQuotaProcedureTest, SetQuotaProcedureUpdateMetaFlase) { + set_proc_->SetMetaDone(false); + EXPECT_EQ(response_->status(), kMetaTabletError); +} + +TEST_F(SetQuotaProcedureTest, SetQuotaProcedureUpdateMetaTrue) { + set_proc_->SetMetaDone(true); + EXPECT_EQ(response_->status(), kMasterOk); +} +} +} +} diff --git a/src/master/test/split_tablet_procedure_test.cc b/src/master/test/split_tablet_procedure_test.cc index 3f020eaf4..b430fe326 100644 --- a/src/master/test/split_tablet_procedure_test.cc +++ b/src/master/test/split_tablet_procedure_test.cc @@ -17,207 +17,203 @@ DECLARE_string(tera_leveldb_env_type); DECLARE_string(tera_tabletnode_path_prefix); DECLARE_int32(tera_master_max_split_concurrency); - namespace tera { namespace master { namespace test { - + using leveldb::EnvOptions; class SplitTabletProcedureTest : public ::testing::Test { -public: - SplitTabletProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - ts_manager_(new TabletNodeManager(nullptr)), - proc_executor_(new ProcedureExecutor) {} - - virtual ~SplitTabletProcedureTest() {} - - virtual void SetUp() { - FLAGS_tera_leveldb_env_type.assign("local"); - FLAGS_tera_tabletnode_path_prefix.assign("./split_tablet_procedure/"); - InitMasterEnv(); - TableSchema schema; - StatusCode ret_code; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, "test", "b", "d", "test/tablet00000001", "", TabletMeta::kTabletOffline, 10); - tablet_ = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(tablet_); - EXPECT_TRUE(table_->AddTablet(tablet_, &ret_code)); -// EXPECT_TRUE(tablet_manager_->AddTablet("test", -// "b", "d", "test/tablet00000001", "", schema, 10, &tablet_, &ret_code)); - node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - tablet_->SetStatus(TabletMeta::kTabletReady); - split_proc_.reset(new SplitTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); - proc_executor_->running_ = true; - } - virtual void TearDown() { - proc_executor_->running_ = false; - } - - static void SetUpTestCase() {} - static void TearDownTestCase() {} - -private: - void InitMasterEnv() { - MasterEnv().Init(nullptr, ts_manager_, tablet_manager_, - nullptr, nullptr, std::shared_ptr(new ThreadPool), proc_executor_, - std::shared_ptr(new TabletAvailability(tablet_manager_)), - nullptr); - // push one element to the queue, avoiding call TryMoveTablet while call SuspendMetaOperation - MasterEnv().meta_task_queue_.push(nullptr); - } - - void InitFileSystemForSplit() { - fs_env_ = io::LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + tablet_->GetTableName(); - EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); - std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + tablet_->GetPath(); - EXPECT_TRUE(fs_env_->CreateDir(tablet_path).ok()); - } - -private: - TablePtr table_; - TabletPtr tablet_; - TabletNodePtr node_; - std::shared_ptr split_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr ts_manager_; - std::shared_ptr proc_executor_; - leveldb::Env* fs_env_; + public: + SplitTabletProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + ts_manager_(new TabletNodeManager(nullptr)), + proc_executor_(new ProcedureExecutor) {} + + virtual ~SplitTabletProcedureTest() {} + + virtual void SetUp() { + FLAGS_tera_leveldb_env_type.assign("local"); + FLAGS_tera_tabletnode_path_prefix.assign("./split_tablet_procedure/"); + InitMasterEnv(); + TableSchema schema; + StatusCode ret_code; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "b", "d", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 10); + tablet_ = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(tablet_); + node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + tablet_->SetStatus(TabletMeta::kTabletReady); + split_proc_.reset(new SplitTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); + proc_executor_->running_ = true; + } + virtual void TearDown() { proc_executor_->running_ = false; } + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + void InitMasterEnv() { + MasterEnv().Init(nullptr, ts_manager_, tablet_manager_, access_builder_, nullptr, nullptr, + nullptr, std::shared_ptr(new ThreadPool), proc_executor_, + std::shared_ptr(new TabletAvailability(tablet_manager_)), + nullptr); + // push one element to the queue, avoiding call TryMoveTablet while call + // SuspendMetaOperation + MasterEnv().meta_task_queue_.push(nullptr); + } + + void InitFileSystemForSplit() { + fs_env_ = io::LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + tablet_->GetTableName(); + EXPECT_TRUE(fs_env_->CreateDir(table_path).ok()); + std::string tablet_path = FLAGS_tera_tabletnode_path_prefix + tablet_->GetPath(); + EXPECT_TRUE(fs_env_->CreateDir(tablet_path).ok()); + } + + private: + TablePtr table_; + TabletPtr tablet_; + TabletNodePtr node_; + std::shared_ptr split_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr ts_manager_; + std::shared_ptr access_builder_; + std::shared_ptr proc_executor_; + leveldb::Env* fs_env_; }; -TEST_F(SplitTabletProcedureTest, SplitTabletProcedureInit){ - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kPreSplitTablet); - std::cout << split_proc_->ProcId() << ", " << split_proc_->phases_.back() << std::endl; +TEST_F(SplitTabletProcedureTest, SplitTabletProcedureInit) { + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kPreSplitTablet); + std::cout << split_proc_->ProcId() << ", " << split_proc_->phases_.back() << std::endl; } TEST_F(SplitTabletProcedureTest, UnloadTabletPhaseHandler) { - EXPECT_FALSE(split_proc_->unload_proc_); - split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); - EXPECT_TRUE(split_proc_->unload_proc_); - EXPECT_FALSE(split_proc_->unload_proc_->Done()); - std::shared_ptr unload_proc = - std::dynamic_pointer_cast(split_proc_->unload_proc_); - EXPECT_TRUE(unload_proc->is_sub_proc_); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, split_proc_->unload_proc_); - - split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); - EXPECT_FALSE(split_proc_->unload_proc_->Done()); - - unload_proc->done_ = true; - tablet_->AssignTabletNode(node_); - split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); - split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); - tablet_->SetStatus(TabletMeta::kTabletOffline); - split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kPostUnLoadTablet); + EXPECT_FALSE(split_proc_->unload_proc_); + split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); + EXPECT_TRUE(split_proc_->unload_proc_); + EXPECT_FALSE(split_proc_->unload_proc_->Done()); + std::shared_ptr unload_proc = + std::dynamic_pointer_cast(split_proc_->unload_proc_); + EXPECT_TRUE(unload_proc->is_sub_proc_); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, split_proc_->unload_proc_); + + split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); + EXPECT_FALSE(split_proc_->unload_proc_->Done()); + + unload_proc->done_ = true; + tablet_->AssignTabletNode(node_); + split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); + tablet_->SetStatus(TabletMeta::kTabletOffline); + split_proc_->UnloadTabletPhaseHandler(SplitTabletPhase::kUnLoadTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kPostUnLoadTablet); } TEST_F(SplitTabletProcedureTest, PostUnLoadTabletPhaseHandler) { - split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kFaultRecover); - InitFileSystemForSplit(); - split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kUpdateMeta); - - std::string log_path = FLAGS_tera_tabletnode_path_prefix + - leveldb::LogHexFileName(tablet_->GetPath(), 123); - leveldb::WritableFile *log_file; - EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); - delete log_file; - split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kFaultRecover); + split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kFaultRecover); + InitFileSystemForSplit(); + split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kUpdateMeta); + + std::string log_path = + FLAGS_tera_tabletnode_path_prefix + leveldb::LogHexFileName(tablet_->GetPath(), 123); + leveldb::WritableFile* log_file; + EXPECT_TRUE(fs_env_->NewWritableFile(log_path, &log_file, EnvOptions()).ok()); + delete log_file; + split_proc_->PostUnloadTabletPhaseHandler(SplitTabletPhase::kPostUnLoadTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kFaultRecover); } TEST_F(SplitTabletProcedureTest, UpdateMetaPhaseHandler) { - EXPECT_FALSE(split_proc_->child_tablets_[0]); - EXPECT_FALSE(split_proc_->child_tablets_[1]); - split_proc_->split_key_ = "c"; - split_proc_->UpdateMetaPhaseHandler(SplitTabletPhase::kUpdateMeta); - EXPECT_TRUE(split_proc_->child_tablets_[0]); - EXPECT_TRUE(split_proc_->child_tablets_[1]); - EXPECT_EQ(split_proc_->tablet_->GetKeyStart(), split_proc_->child_tablets_[0]->GetKeyStart()); - EXPECT_EQ(split_proc_->child_tablets_[0]->GetKeyEnd(), + EXPECT_FALSE(split_proc_->child_tablets_[0]); + EXPECT_FALSE(split_proc_->child_tablets_[1]); + split_proc_->split_key_ = "c"; + split_proc_->UpdateMetaPhaseHandler(SplitTabletPhase::kUpdateMeta); + EXPECT_TRUE(split_proc_->child_tablets_[0]); + EXPECT_TRUE(split_proc_->child_tablets_[1]); + EXPECT_EQ(split_proc_->tablet_->GetKeyStart(), split_proc_->child_tablets_[0]->GetKeyStart()); + EXPECT_EQ(split_proc_->child_tablets_[0]->GetKeyEnd(), split_proc_->child_tablets_[1]->GetKeyStart()); - EXPECT_EQ(split_proc_->tablet_->GetKeyEnd(), split_proc_->child_tablets_[1]->GetKeyEnd()); - EXPECT_EQ(split_proc_->child_tablets_[0]->GetPath(), "test/tablet00000002"); - EXPECT_EQ(split_proc_->child_tablets_[1]->GetPath(), "test/tablet00000003"); - EXPECT_EQ(split_proc_->child_tablets_[0]->GetStatus(), TabletMeta::kTabletOffline); - EXPECT_EQ(split_proc_->child_tablets_[1]->GetStatus(), TabletMeta::kTabletOffline); + EXPECT_EQ(split_proc_->tablet_->GetKeyEnd(), split_proc_->child_tablets_[1]->GetKeyEnd()); + EXPECT_EQ(split_proc_->child_tablets_[0]->GetPath(), "test/tablet00000002"); + EXPECT_EQ(split_proc_->child_tablets_[1]->GetPath(), "test/tablet00000003"); + EXPECT_EQ(split_proc_->child_tablets_[0]->GetStatus(), TabletMeta::kTabletOffline); + EXPECT_EQ(split_proc_->child_tablets_[1]->GetStatus(), TabletMeta::kTabletOffline); } TEST_F(SplitTabletProcedureTest, LoadTabletsPhaseHandler) { - split_proc_->split_key_ = "c"; - split_proc_->UpdateMetaPhaseHandler(SplitTabletPhase::kUpdateMeta); - EXPECT_FALSE(split_proc_->load_procs_[0]); - EXPECT_FALSE(split_proc_->load_procs_[1]); - split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); - EXPECT_TRUE(split_proc_->load_procs_[0]); - EXPECT_TRUE(split_proc_->load_procs_[1]); - std::shared_ptr load_proc1 = - std::dynamic_pointer_cast(split_proc_->load_procs_[0]); - std::shared_ptr load_proc2 = - std::dynamic_pointer_cast(split_proc_->load_procs_[1]); - EXPECT_FALSE(split_proc_->load_procs_[0]->Done()); - EXPECT_FALSE(split_proc_->load_procs_[1]->Done()); - EXPECT_EQ(proc_executor_->procedures_.size(), 2); - EXPECT_EQ(proc_executor_->procedures_[1]->proc_, split_proc_->load_procs_[0]); - EXPECT_EQ(proc_executor_->procedures_[2]->proc_, split_proc_->load_procs_[1]); - load_proc1->done_ = true; - split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); - load_proc2->done_ = true; - split_proc_->child_tablets_[0]->SetStatus(TabletMeta::kTabletReady); - split_proc_->child_tablets_[1]->SetStatus(TabletMeta::kTabletReady); - split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + split_proc_->split_key_ = "c"; + split_proc_->UpdateMetaPhaseHandler(SplitTabletPhase::kUpdateMeta); + EXPECT_FALSE(split_proc_->load_procs_[0]); + EXPECT_FALSE(split_proc_->load_procs_[1]); + split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); + EXPECT_TRUE(split_proc_->load_procs_[0]); + EXPECT_TRUE(split_proc_->load_procs_[1]); + std::shared_ptr load_proc1 = + std::dynamic_pointer_cast(split_proc_->load_procs_[0]); + std::shared_ptr load_proc2 = + std::dynamic_pointer_cast(split_proc_->load_procs_[1]); + EXPECT_FALSE(split_proc_->load_procs_[0]->Done()); + EXPECT_FALSE(split_proc_->load_procs_[1]->Done()); + EXPECT_EQ(proc_executor_->procedures_.size(), 2); + EXPECT_EQ(proc_executor_->procedures_[1]->proc_, split_proc_->load_procs_[0]); + EXPECT_EQ(proc_executor_->procedures_[2]->proc_, split_proc_->load_procs_[1]); + load_proc1->done_ = true; + split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); + load_proc2->done_ = true; + split_proc_->child_tablets_[0]->SetStatus(TabletMeta::kTabletReady); + split_proc_->child_tablets_[1]->SetStatus(TabletMeta::kTabletReady); + split_proc_->LoadTabletsPhaseHandler(SplitTabletPhase::kLoadTablets); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); } TEST_F(SplitTabletProcedureTest, FaultRecoverPhaseHandler) { - split_proc_->phases_.push_back(SplitTabletPhase::kPostUnLoadTablet); - split_proc_->phases_.push_back(SplitTabletPhase::kFaultRecover); - tablet_->SetStatus(TabletMeta::kTabletOffline); - EXPECT_FALSE(split_proc_->recover_proc_); - split_proc_->FaultRecoverPhaseHandler(SplitTabletPhase::kFaultRecover); - EXPECT_TRUE(split_proc_->recover_proc_); - EXPECT_EQ(proc_executor_->procedures_.size(), 1); - EXPECT_FALSE(split_proc_->recover_proc_->Done()); - std::shared_ptr recover_proc = - std::dynamic_pointer_cast(split_proc_->recover_proc_); - recover_proc->done_ = true; - split_proc_->FaultRecoverPhaseHandler(SplitTabletPhase::kFaultRecover); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + split_proc_->phases_.push_back(SplitTabletPhase::kPostUnLoadTablet); + split_proc_->phases_.push_back(SplitTabletPhase::kFaultRecover); + tablet_->SetStatus(TabletMeta::kTabletOffline); + EXPECT_FALSE(split_proc_->recover_proc_); + split_proc_->FaultRecoverPhaseHandler(SplitTabletPhase::kFaultRecover); + EXPECT_TRUE(split_proc_->recover_proc_); + EXPECT_EQ(proc_executor_->procedures_.size(), 1); + EXPECT_FALSE(split_proc_->recover_proc_->Done()); + std::shared_ptr recover_proc = + std::dynamic_pointer_cast(split_proc_->recover_proc_); + recover_proc->done_ = true; + split_proc_->FaultRecoverPhaseHandler(SplitTabletPhase::kFaultRecover); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); } TEST_F(SplitTabletProcedureTest, PreSplitTabletPhaseHandler) { - FLAGS_tera_master_max_split_concurrency = 1; - tablet_->AssignTabletNode(node_); - split_proc_->split_key_ = "c"; - split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kUnLoadTablet); - split_proc_->split_key_ = "b"; - split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); - split_proc_->phases_.clear(); - split_proc_->split_key_ = "a"; - split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); - - split_proc_->phases_.clear(); - split_proc_->split_key_ = "d"; - split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); - split_proc_->phases_.clear(); - split_proc_->split_key_ = "z"; - split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); - EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); - + FLAGS_tera_master_max_split_concurrency = 1; + tablet_->AssignTabletNode(node_); + split_proc_->split_key_ = "c"; + split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kUnLoadTablet); + split_proc_->split_key_ = "b"; + split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + split_proc_->phases_.clear(); + split_proc_->split_key_ = "a"; + split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + + split_proc_->phases_.clear(); + split_proc_->split_key_ = "d"; + split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); + split_proc_->phases_.clear(); + split_proc_->split_key_ = "z"; + split_proc_->PreSplitTabletPhaseHandler(SplitTabletPhase::kPreSplitTablet); + EXPECT_EQ(split_proc_->phases_.back(), SplitTabletPhase::kEofPhase); } - } } } diff --git a/src/master/test/table_state_machine_test.cc b/src/master/test/table_state_machine_test.cc index 61de4934d..242b2bdac 100644 --- a/src/master/test/table_state_machine_test.cc +++ b/src/master/test/table_state_machine_test.cc @@ -8,41 +8,40 @@ namespace master { namespace test { class TableStateMachineTest : public ::testing::Test { -public: - TableStateMachineTest() : state_mchine_(kTableEnable) {} - virtual ~TableStateMachineTest() {} - virtual void SetUp() {} - virtual void TearDown() {} - static void SetUpTestCase() {} - static void TearDownTestCase() {} + public: + TableStateMachineTest() : state_mchine_(kTableEnable) {} + virtual ~TableStateMachineTest() {} + virtual void SetUp() {} + virtual void TearDown() {} + static void SetUpTestCase() {} + static void TearDownTestCase() {} -private: - bool TransitFromState(const TableStatus& status, const TableEvent& event) { - state_mchine_.SetStatus(status); - return state_mchine_.DoStateTransition(event); - } - TableStateMachine state_mchine_; + private: + bool TransitFromState(const TableStatus& status, const TableEvent& event) { + state_mchine_.SetStatus(status); + return state_mchine_.DoStateTransition(event); + } + TableStateMachine state_mchine_; }; TEST_F(TableStateMachineTest, LegalTransition) { - EXPECT_TRUE(TransitFromState(kTableEnable, TableEvent::kDisableTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableDisable); - EXPECT_TRUE(TransitFromState(kTableDisable, TableEvent::kEnableTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableEnable); - EXPECT_TRUE(TransitFromState(kTableDisable, TableEvent::kDeleteTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableDeleting); - EXPECT_TRUE(TransitFromState(kTableDeleting, TableEvent::kDisableTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableDisable); + EXPECT_TRUE(TransitFromState(kTableEnable, TableEvent::kDisableTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableDisable); + EXPECT_TRUE(TransitFromState(kTableDisable, TableEvent::kEnableTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableEnable); + EXPECT_TRUE(TransitFromState(kTableDisable, TableEvent::kDeleteTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableDeleting); + EXPECT_TRUE(TransitFromState(kTableDeleting, TableEvent::kDisableTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableDisable); } TEST_F(TableStateMachineTest, IllegalTransition) { - EXPECT_FALSE(TransitFromState(kTableEnable, TableEvent::kDeleteTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableEnable); - EXPECT_FALSE(TransitFromState(kTableDeleting, TableEvent::kEnableTable)); - EXPECT_EQ(state_mchine_.GetStatus(), kTableDeleting); - std::cout << TableEvent::kEnableTable << std::endl; + EXPECT_FALSE(TransitFromState(kTableEnable, TableEvent::kDeleteTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableEnable); + EXPECT_FALSE(TransitFromState(kTableDeleting, TableEvent::kEnableTable)); + EXPECT_EQ(state_mchine_.GetStatus(), kTableDeleting); + std::cout << TableEvent::kEnableTable << std::endl; } - } } } diff --git a/src/master/test/tablet_state_machine_test.cc b/src/master/test/tablet_state_machine_test.cc index 2d2a89c0b..fed451416 100644 --- a/src/master/test/tablet_state_machine_test.cc +++ b/src/master/test/tablet_state_machine_test.cc @@ -8,92 +8,94 @@ namespace master { namespace test { class TabletStateMachineTest : public ::testing::Test { -public: - TabletStateMachineTest() : state_machine_(TabletMeta::kTabletOffline) {} - virtual ~TabletStateMachineTest() {} - virtual void SetUp() {} - virtual void TearDown() {} - - static void SetUpTestCase() {} - static void TearDownTestCase() {} + public: + TabletStateMachineTest() : state_machine_(TabletMeta::kTabletOffline) {} + virtual ~TabletStateMachineTest() {} + virtual void SetUp() {} + virtual void TearDown() {} -private: - bool TransitFromState(const TabletMeta::TabletStatus& status, const TabletEvent& event) { - state_machine_.SetStatus(status); - return state_machine_.DoStateTransition(event); - } - TabletStateMachine state_machine_; + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + bool TransitFromState(const TabletMeta::TabletStatus& status, const TabletEvent& event) { + state_machine_.SetStatus(status); + return state_machine_.DoStateTransition(event); + } + TabletStateMachine state_machine_; }; TEST_F(TabletStateMachineTest, LegalTransition) { - // state transitioin from kTableOffLine - state_machine_.SetStatus(TabletMeta::kTabletOffline); - EXPECT_EQ(state_machine_.ReadyTime(), std::numeric_limits::max()); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kLoadTablet)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoading); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTsDelayOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDelayOffline); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTableDisable)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDisable); - // state transition from kTabletPending - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDelayOffline, TabletEvent::kTsOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDelayOffline, TabletEvent::kTsRestart)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - // state transtion from kTableOnLoad - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsLoadSucc)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletReady); - EXPECT_NE(state_machine_.ReadyTime(), std::numeric_limits::max()); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsLoadFail)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoadFail); - EXPECT_EQ(state_machine_.ReadyTime(), std::numeric_limits::max()); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTabletLoadFail)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoadFail); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsDelayOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDelayOffline); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsRestart)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - // state transition from kTableUnLoading - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadSucc)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadFail)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletUnloadFail); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - // state transtion from kTableUnLoadFail - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloadFail, TabletEvent::kTsOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - // state transtion from kTableReady - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletReady, TabletEvent::kUnLoadTablet)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletUnloading); - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletReady, TabletEvent::kTsOffline)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - // state transition from kTabletDisable - EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDisable, TabletEvent::kTableEnable)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transitioin from kTableOffLine + state_machine_.SetStatus(TabletMeta::kTabletOffline); + EXPECT_EQ(state_machine_.ReadyTime(), std::numeric_limits::max()); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kLoadTablet)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoading); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTsDelayOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDelayOffline); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTableDisable)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDisable); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kTabletLoadFail)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoadFail); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kFinishSplitTablet)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletSplitted); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletOffline, TabletEvent::kFinishMergeTablet)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletMerged); + // state transition from kTabletPending + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDelayOffline, TabletEvent::kTsOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDelayOffline, TabletEvent::kTsRestart)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transtion from kTableOnLoad + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsLoadSucc)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletReady); + EXPECT_NE(state_machine_.ReadyTime(), std::numeric_limits::max()); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsLoadFail)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoadFail); + EXPECT_EQ(state_machine_.ReadyTime(), std::numeric_limits::max()); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsDelayOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletDelayOffline); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsRestart)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transition from kTableUnloading + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadSucc)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsUnLoadFail)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletUnloadFail); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloading, TabletEvent::kTsOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transtion from kTableUnLoadFail + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletUnloadFail, TabletEvent::kTsOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transtion from kTableReady + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletReady, TabletEvent::kUnLoadTablet)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletUnloading); + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletReady, TabletEvent::kTsOffline)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); + // state transition from kTabletDisable + EXPECT_TRUE(TransitFromState(TabletMeta::kTabletDisable, TabletEvent::kTableEnable)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); } TEST_F(TabletStateMachineTest, IllegalTransition) { - state_machine_.SetStatus(TabletMeta::kTabletOffline); - EXPECT_FALSE(state_machine_.DoStateTransition(TabletEvent::kTsLoadSucc)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - - state_machine_.SetStatus(TabletMeta::kTabletReady); - int64_t ready_time = state_machine_.ReadyTime(); - EXPECT_LE(get_micros() - ready_time, 1); - EXPECT_FALSE(state_machine_.DoStateTransition(TabletEvent::kTableDisable)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletReady); - EXPECT_EQ(ready_time, state_machine_.ReadyTime()); - - EXPECT_FALSE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsUnLoadSucc)); - EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoading); + state_machine_.SetStatus(TabletMeta::kTabletOffline); + EXPECT_FALSE(state_machine_.DoStateTransition(TabletEvent::kTsLoadSucc)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletOffline); - std::cout << TabletEvent::kLoadTablet << std::endl; -} + state_machine_.SetStatus(TabletMeta::kTabletReady); + int64_t ready_time = state_machine_.ReadyTime(); + EXPECT_LE(get_micros() - ready_time, 1); + EXPECT_FALSE(state_machine_.DoStateTransition(TabletEvent::kTableDisable)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletReady); + EXPECT_EQ(ready_time, state_machine_.ReadyTime()); + EXPECT_FALSE(TransitFromState(TabletMeta::kTabletLoading, TabletEvent::kTsUnLoadSucc)); + EXPECT_EQ(state_machine_.GetStatus(), TabletMeta::kTabletLoading); + std::cout << TabletEvent::kLoadTablet << std::endl; +} } } } diff --git a/src/master/test/tabletnode_state_transition.cc b/src/master/test/tabletnode_state_transition.cc new file mode 100644 index 000000000..e6514411f --- /dev/null +++ b/src/master/test/tabletnode_state_transition.cc @@ -0,0 +1,58 @@ +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "master/tabletnode_manager.h" + +namespace tera { +namespace master { +namespace test { + +class TSStateTransitionTest : public ::testing::Test { + public: + TSStateTransitionTest() : node_(new TabletNode("127.0.0.1:2000", "1234567890")) {} + virtual ~TSStateTransitionTest() {} + + virtual void SetUp() {} + virtual void TearDown() {} + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + bool TransitFromState(const NodeState& state, const NodeEvent& event) { + node_->state_ = state; + return node_->DoStateTransition(event); + } + + TabletNodePtr node_; +}; + +TEST_F(TSStateTransitionTest, LegalTransition) { + EXPECT_TRUE(TransitFromState(kOffline, NodeEvent::kZkNodeCreated)); + EXPECT_EQ(node_->state_, kReady); + EXPECT_TRUE(TransitFromState(kReady, NodeEvent::kZkSessionTimeout)); + EXPECT_EQ(node_->state_, kOffline); + EXPECT_TRUE(TransitFromState(kReady, NodeEvent::kPrepareKickTs)); + EXPECT_EQ(node_->state_, kWaitKick); + EXPECT_TRUE(TransitFromState(kWaitKick, NodeEvent::kCancelKickTs)); + EXPECT_EQ(node_->state_, kReady); + EXPECT_TRUE(TransitFromState(kWaitKick, NodeEvent::kZkKickNodeCreated)); + EXPECT_EQ(node_->state_, kKicked); + EXPECT_TRUE(TransitFromState(kWaitKick, NodeEvent::kZkSessionTimeout)); + EXPECT_EQ(node_->state_, kOffline); + EXPECT_TRUE(TransitFromState(kKicked, NodeEvent::kZkSessionTimeout)); + EXPECT_EQ(node_->state_, kOffline); +} + +TEST_F(TSStateTransitionTest, IllegalTransition) { + EXPECT_FALSE(TransitFromState(kOffline, NodeEvent::kZkSessionTimeout)); + EXPECT_EQ(node_->state_, kOffline); + EXPECT_FALSE(TransitFromState(kReady, NodeEvent::kZkNodeCreated)); + EXPECT_EQ(node_->state_, kReady); + EXPECT_FALSE(TransitFromState(kReady, NodeEvent::kCancelKickTs)); + EXPECT_EQ(node_->state_, kReady); + EXPECT_FALSE(TransitFromState(kWaitKick, NodeEvent::kPrepareKickTs)); + EXPECT_EQ(node_->state_, kWaitKick); +} +} +} +} diff --git a/src/master/test/trackable_gc_test.cc b/src/master/test/trackable_gc_test.cc index 222e29bea..b56acf150 100644 --- a/src/master/test/trackable_gc_test.cc +++ b/src/master/test/trackable_gc_test.cc @@ -15,7 +15,6 @@ DECLARE_string(tera_coord_type); DECLARE_string(tera_leveldb_env_type); -DECLARE_string(tera_master_gc_strategy); DECLARE_string(tera_tabletnode_path_prefix); namespace tera { @@ -24,550 +23,542 @@ namespace master { using leveldb::EnvOptions; class TrackableGcTest : public ::testing::Test { -public: - TrackableGcTest() : mgr_(nullptr, nullptr, nullptr) { - std::cout << "TrackableGcTest()" << std::endl; + public: + TrackableGcTest() : mgr_(nullptr, nullptr, nullptr) { + std::cout << "TrackableGcTest()" << std::endl; + } + virtual ~TrackableGcTest() { std::cout << "~TrackableGcTest()" << std::endl; } + + TablePtr CreateTable(const std::string& name) { + TableSchema schema; + TablePtr table(new Table(name, schema, kTableEnable)); + TableMeta meta; + EXPECT_TRUE(mgr_.AddTable(table, nullptr)); + std::cout << "create table " << name << " success" << std::endl; + + return table; + } + + TabletMeta CreateTabletMeta(const std::string& table_name, const std::string& start, + const std::string& end) { + TabletMeta meta; + meta.set_table_name(table_name); + meta.mutable_key_range()->set_key_start(start); + meta.mutable_key_range()->set_key_end(end); + meta.set_status(TabletMeta::kTabletOffline); + return meta; + } + + TabletPtr CreateTablet(const std::string& start, const std::string& end, TablePtr table) { + TabletMeta meta = CreateTabletMeta(table->GetTableName(), start, end); + StatusCode status; + TabletPtr tablet = table->AddTablet(meta, &status); + // TableSchema schema; + EXPECT_TRUE(tablet); + std::cout << "create tablet [" << start << ", " << end << "]" + << " success" << std::endl; + + return tablet; + } + + TabletFile CreateTabletFile(uint64_t tablet_id, uint32_t lg_id, uint64_t file_id, + bool create_local_file = false) { + TabletFile tablet_file; + tablet_file.tablet_id = tablet_id; + tablet_file.lg_id = lg_id; + tablet_file.file_id = file_id; + + if (create_local_file) { + leveldb::Env* env = io::LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + kTableName_; + std::string path; + + if (lg_id == 0 && file_id == 0) { + path = leveldb::BuildTabletPath(table_path, tablet_id); + EXPECT_TRUE(env->CreateDir(path).ok()); + } else { + path = leveldb::BuildTableFilePath(table_path, tablet_id, lg_id, file_id); + size_t dir_pos = path.rfind("/"); + EXPECT_TRUE(dir_pos != std::string::npos); + EXPECT_TRUE(env->CreateDir(path.substr(0, dir_pos)).ok()); + leveldb::WritableFile* writable_file; + EXPECT_TRUE(env->NewWritableFile(path, &writable_file, EnvOptions()).ok()); + delete writable_file; + } } - virtual ~TrackableGcTest() { - std::cout << "~TrackableGcTest()" << std::endl; - } - - TablePtr CreateTable(const std::string& name) { - TableSchema schema; - TablePtr table(new Table(name, schema, kTableEnable)); - TableMeta meta; - EXPECT_TRUE(mgr_.AddTable(table, nullptr)); - std::cout << "create table " << name << " success" << std::endl; - return table; + return tablet_file; + } + + TabletInheritedFileInfo CreateTabletInheritedFileInfo(const TabletPtr& tablet, + const TabletFile& tablet_file) { + TabletInheritedFileInfo inh_file_info; + inh_file_info.set_table_name(tablet->GetTableName()); + inh_file_info.set_key_start(tablet->GetKeyStart()); + inh_file_info.set_key_end(tablet->GetKeyEnd()); + + LgInheritedLiveFiles* lg_files = inh_file_info.add_lg_inh_files(); + lg_files->set_lg_no(tablet_file.lg_id); + lg_files->add_file_number(1UL << 63 | tablet_file.tablet_id << 32 | tablet_file.file_id); + + return inh_file_info; + } + + TabletInheritedFileInfo CreateEmptyTabletInheritedFileInfo(const TabletPtr& tablet) { + TabletInheritedFileInfo inh_file_info; + inh_file_info.set_table_name(tablet->GetTableName()); + inh_file_info.set_key_start(tablet->GetKeyStart()); + inh_file_info.set_key_end(tablet->GetKeyEnd()); + + return inh_file_info; + } + + void TestAddInheritedFile() { + TablePtr table = CreateTable(kTableName_); + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + + TabletFile file1 = CreateTabletFile(1, 0, 1); + MutexLock l(&table->mutex_); + + // first add, ref inc to 1 + // this step simulates collecting inherited file from filesystem + table->AddInheritedFile(file1, false); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // add the same file again, ref will not inc + // this step simulates collecting inherited file from filesystem again + table->AddInheritedFile(file1, false); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // add the same file again with ref, ref will inc to 2 + // this step simulates ts reporting using the TabletFile + table->AddInheritedFile(file1, true); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // add a new file from the same tablet, ref will inc to 1 + TabletFile file2 = CreateTabletFile(1, 0, 2); + table->AddInheritedFile(file2, false); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(2, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file2].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // add a new file from a different tablet, ref will inc to 1 + TabletFile file3 = CreateTabletFile(2, 0, 1); + table->AddInheritedFile(file3, false); + ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(2, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[2].size()); + ASSERT_EQ(1, table->useful_inh_files_[2][file3].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + } + + void TestReleaseInheritedFile() { + TablePtr table = CreateTable(kTableName_); + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + + TabletFile file1 = CreateTabletFile(1, 0, 1); + MutexLock l(&table->mutex_); + + // first add, ref inc to 1 + // this step simulates collecting inherited file from filesystem + table->AddInheritedFile(file1, false); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // add the same file again with ref, ref will inc to 2 + // this step simulates ts reporting using the TabletFile + table->AddInheritedFile(file1, true); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // reease the file, ref will dec to 1 + // this step simulates ts reporting unusing the TabletFile + table->ReleaseInheritedFile(file1); + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // this step simulates all live tablets fo the table have reported + table->EnableDeadTabletGarbageCollect(1); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 1-0-1 + } + + void TestSplit() { + TablePtr table = CreateTable(kTableName_); + TabletPtr tablet_1 = CreateTablet("a", "z", table); + + TabletFile file1 = CreateTabletFile(1, 0, 1); + { + MutexLock l(&table->mutex_); + table->AddInheritedFile(file1, false); } - - TabletMeta CreateTabletMeta(const std::string& table_name, const std::string& start, const std::string& end) { - TabletMeta meta; - meta.set_table_name(table_name); - meta.mutable_key_range()->set_key_start(start); - meta.mutable_key_range()->set_key_end(end); - meta.set_status(TabletMeta::kTabletOffline); - return meta; + table->reported_live_tablets_num_ = 1; + tablet_1->inh_files_.insert(file1); + tablet_1->gc_reported_ = true; + + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + TabletPtr tablet_2, tablet_3; + TabletMeta meta_2 = CreateTabletMeta(table->GetTableName(), "a", "k"); + TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "k", "z"); + tablet_2.reset(new Tablet(meta_2, tablet_1->GetTable())); + tablet_3.reset(new Tablet(meta_3, tablet_1->GetTable())); + + table->SplitTablet(tablet_1, meta_2, meta_3, &tablet_2, &tablet_3); + + // afer split: + // 1. each sub tablet shoud ref the inh file from the parent tablet + // 2. ref to the file shoud inc to 2 + // 3. reported_live_tablets_num_ shoud dec 1 if the parent tablet has + // reported + + // 1 + ASSERT_EQ(1, tablet_1->inh_files_.size()); + ASSERT_EQ(1, tablet_2->inh_files_.size()); + ASSERT_EQ(1, tablet_3->inh_files_.size()); + + // 2 + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // 3 + ASSERT_EQ(0, table->reported_live_tablets_num_); + } + + void TestMerge() { + TablePtr table = CreateTable(kTableName_); + TabletPtr tablet_1 = CreateTablet("a", "k", table); + TabletPtr tablet_2 = CreateTablet("k", "z", table); + + TabletFile file1 = CreateTabletFile(1, 0, 1); + { + MutexLock l(&table->mutex_); + table->AddInheritedFile(file1, false); } - - TabletPtr CreateTablet(const std::string& start, const std::string& end, TablePtr table) { - TabletMeta meta = CreateTabletMeta(table->GetTableName(), start, end); - TabletPtr tablet = TabletManager::CreateTablet(table, meta); - //TableSchema schema; - StatusCode status; - EXPECT_TRUE(table->AddTablet(tablet, &status)); - std::cout << "create tablet [" << start << ", " << end << "]" << " success" << std::endl; - - return tablet; + TabletFile file2 = CreateTabletFile(2, 0, 1); + { + MutexLock l(&table->mutex_); + table->AddInheritedFile(file2, false); } - TabletFile CreateTabletFile(uint64_t tablet_id, uint32_t lg_id, uint64_t file_id, bool create_local_file = false) { - TabletFile tablet_file; - tablet_file.tablet_id = tablet_id; - tablet_file.lg_id = lg_id; - tablet_file.file_id = file_id; - - if (create_local_file) { - leveldb::Env* env = io::LeveldbBaseEnv(); - std::string table_path = FLAGS_tera_tabletnode_path_prefix + kTableName_; - std::string path; - - if (lg_id == 0 && file_id == 0) { - path = leveldb::BuildTabletPath(table_path, tablet_id); - EXPECT_TRUE(env->CreateDir(path).ok()); - } else { - path = leveldb::BuildTableFilePath(table_path, tablet_id, lg_id, file_id); - size_t dir_pos = path.rfind("/"); - EXPECT_TRUE(dir_pos != std::string::npos); - EXPECT_TRUE(env->CreateDir(path.substr(0, dir_pos)).ok()); - leveldb::WritableFile* writable_file; - EXPECT_TRUE(env->NewWritableFile(path, &writable_file, EnvOptions()).ok()); - delete writable_file; - } - } - - return tablet_file; + table->reported_live_tablets_num_ = 2; + tablet_1->inh_files_.insert(file1); + tablet_1->gc_reported_ = true; + tablet_2->inh_files_.insert(file2); + tablet_2->gc_reported_ = true; + + ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(2, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(1, table->useful_inh_files_[2].size()); + ASSERT_EQ(1, table->useful_inh_files_[2][file2].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "a", "z"); + TabletPtr tablet_3(new Tablet(meta_3, tablet_1->GetTable())); + table->MergeTablets(tablet_1, tablet_2, meta_3, &tablet_3); + + // afer merge: + // 1. the merged tablet shoud ref all the inh file from the two parent + // tablet + // 2. ref to the file shoud not change + // 3. reported_live_tablets_num_ shoud dec by the reported num of parent + // tablets + + // 1 + ASSERT_EQ(1, tablet_1->inh_files_.size()); + ASSERT_EQ(1, tablet_2->inh_files_.size()); + ASSERT_EQ(2, tablet_3->inh_files_.size()); + + // 2 + ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(2, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(1, table->useful_inh_files_[2].size()); + ASSERT_EQ(1, table->useful_inh_files_[2][file2].ref); + + // 3 + ASSERT_EQ(0, table->reported_live_tablets_num_); + } + + // Case1: ts report using file1 when master restart + void TestGarbageCollect1() { + TablePtr table = CreateTable(kTableName_); + table->reported_live_tablets_num_ = 0; + TabletPtr tablet_1 = CreateTablet("a", "z", table); + TabletFile file1 = CreateTabletFile(2, 0, 1); + + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(0, tablet_1->inh_files_.size()); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + // no tablets have reported till now + ASSERT_EQ(0, table->reported_live_tablets_num_); + + // this step simulates tablet_1 reporting using file1 + TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_1, file1); + table->GarbageCollect(inh_file_1); + ASSERT_EQ(1, tablet_1->inh_files_.size()); + + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[2].size()); + // ref inc to 1: + // first, ref inc to 2 since tablet_1 report using file1 with + // need_ref=TRUE + // then, ref des to 1 since EnableDeadTabletGarbageCollect() + ASSERT_EQ(1, table->useful_inh_files_[2][file1].ref); + + // all live tablets have reported, but file1 hasn't been released, gc won't + // work + ASSERT_EQ(1, table->reported_live_tablets_num_); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + } + + // Case2: ts report using file1, then report releasing file1 + void TestGarbageCollect2() { + TablePtr table = CreateTable(kTableName_); + table->reported_live_tablets_num_ = 0; + TabletPtr tablet_1 = CreateTablet("a", "z", table); + TabletFile file1 = CreateTabletFile(2, 0, 1); + + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(0, tablet_1->inh_files_.size()); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + // no tablets have reported till now + ASSERT_EQ(0, table->reported_live_tablets_num_); + + // this step simulates tablet_1 reporting using file1 + TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_1, file1); + table->GarbageCollect(inh_file_1); + ASSERT_EQ(1, tablet_1->inh_files_.size()); + + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[2].size()); + // ref inc to 1: + // first, ref inc to 2 since tablet_1 report using file1 with + // need_ref=TRUE + // then, ref des to 1 since EnableDeadTabletGarbageCollect() + ASSERT_EQ(1, table->useful_inh_files_[2][file1].ref); + + // all live tablets have reported, but file1 hasn't been released, gc won't + // work + ASSERT_EQ(1, table->reported_live_tablets_num_); + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // this step simulates tablet_1 reporting releasing file1 + TabletInheritedFileInfo inh_file_2 = CreateEmptyTabletInheritedFileInfo(tablet_1); + table->GarbageCollect(inh_file_2); + ASSERT_EQ(0, tablet_1->inh_files_.size()); + + // all live tablets have reported, and ref files have been released, gc + // worked + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 2-0-1 + } + + // Case3: tablet_1 split into tablet_2 and tablet_3, + // then tablet_2 and tablet_3 will report + void TestGarbageCollect3() { + TablePtr table = CreateTable(kTableName_); + table->reported_live_tablets_num_ = 0; + TabletPtr tablet_1 = CreateTablet("a", "z", table); + TabletFile file1 = CreateTabletFile(1, 0, 1); + { + MutexLock l(&table->mutex_); + table->AddInheritedFile(file1, false); } - TabletInheritedFileInfo CreateTabletInheritedFileInfo(const TabletPtr& tablet, const TabletFile& tablet_file) { - TabletInheritedFileInfo inh_file_info; - inh_file_info.set_table_name(tablet->GetTableName()); - inh_file_info.set_key_start(tablet->GetKeyStart()); - inh_file_info.set_key_end(tablet->GetKeyEnd()); - - LgInheritedLiveFiles* lg_files = inh_file_info.add_lg_inh_files(); - lg_files->set_lg_no(tablet_file.lg_id); - lg_files->add_file_number(1UL << 63 | tablet_file.tablet_id << 32 | tablet_file.file_id); - - return inh_file_info; - } - - TabletInheritedFileInfo CreateEmptyTabletInheritedFileInfo(const TabletPtr& tablet) { - TabletInheritedFileInfo inh_file_info; - inh_file_info.set_table_name(tablet->GetTableName()); - inh_file_info.set_key_start(tablet->GetKeyStart()); - inh_file_info.set_key_end(tablet->GetKeyEnd()); - - return inh_file_info; - } - - void TestAddInheritedFile() { - TablePtr table = CreateTable(kTableName_); - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - - TabletFile file1 = CreateTabletFile(1, 0, 1); - MutexLock l(&table->mutex_); - - // first add, ref inc to 1 - // this step simulates collecting inherited file from filesystem - table->AddInheritedFile(file1, false); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // add the same file again, ref will not inc - // this step simulates collecting inherited file from filesystem again - table->AddInheritedFile(file1, false); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // add the same file again with ref, ref will inc to 2 - // this step simulates ts reporting using the TabletFile - table->AddInheritedFile(file1, true); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // add a new file from the same tablet, ref will inc to 1 - TabletFile file2 = CreateTabletFile(1, 0, 2); - table->AddInheritedFile(file2, false); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(2, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file2].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // add a new file from a different tablet, ref will inc to 1 - TabletFile file3 = CreateTabletFile(2, 0, 1); - table->AddInheritedFile(file3, false); - ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(2, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[2].size()); - ASSERT_EQ(1, table->useful_inh_files_[2][file3].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - } - - void TestReleaseInheritedFile() { - TablePtr table = CreateTable(kTableName_); - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - - TabletFile file1 = CreateTabletFile(1, 0, 1); - MutexLock l(&table->mutex_); - - // first add, ref inc to 1 - // this step simulates collecting inherited file from filesystem - table->AddInheritedFile(file1, false); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // add the same file again with ref, ref will inc to 2 - // this step simulates ts reporting using the TabletFile - table->AddInheritedFile(file1, true); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // reease the file, ref will dec to 1 - // this step simulates ts reporting unusing the TabletFile - table->ReleaseInheritedFile(file1); - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // this step simulates all live tablets fo the table have reported - table->EnableDeadTabletGarbageCollect(1); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 1-0-1 - } - - void TestSplit() { - TablePtr table = CreateTable(kTableName_); - TabletPtr tablet_1 = CreateTablet("a", "z", table); - - TabletFile file1 = CreateTabletFile(1, 0, 1); - { - MutexLock l(&table->mutex_); - table->AddInheritedFile(file1, false); - } - table->reported_live_tablets_num_ = 1; - tablet_1->inh_files_.insert(file1); - tablet_1->gc_reported_ = true; - - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - TabletPtr tablet_2, tablet_3; - TabletMeta meta_2 = CreateTabletMeta(table->GetTableName(), "a", "k"); - TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "k", "z"); - tablet_2.reset(new Tablet(meta_2, tablet_1->GetTable())); - tablet_3.reset(new Tablet(meta_3, tablet_1->GetTable())); - - table->SplitTablet(tablet_1, meta_2, meta_3, &tablet_2, &tablet_3); - - // afer split: - // 1. each sub tablet shoud ref the inh file from the parent tablet - // 2. ref to the file shoud inc to 2 - // 3. reported_live_tablets_num_ shoud dec 1 if the parent tablet has reported - - // 1 - ASSERT_EQ(1, tablet_1->inh_files_.size()); - ASSERT_EQ(1, tablet_2->inh_files_.size()); - ASSERT_EQ(1, tablet_3->inh_files_.size()); - - // 2 - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // 3 - ASSERT_EQ(0, table->reported_live_tablets_num_); - } - - void TestMerge() { - TablePtr table = CreateTable(kTableName_); - TabletPtr tablet_1 = CreateTablet("a", "k", table); - TabletPtr tablet_2 = CreateTablet("k", "z", table); - - TabletFile file1 = CreateTabletFile(1, 0, 1); - { - MutexLock l(&table->mutex_); - table->AddInheritedFile(file1, false); - } - TabletFile file2 = CreateTabletFile(2, 0, 1); - { - MutexLock l(&table->mutex_); - table->AddInheritedFile(file2, false); - } - - table->reported_live_tablets_num_ = 2; - tablet_1->inh_files_.insert(file1); - tablet_1->gc_reported_ = true; - tablet_2->inh_files_.insert(file2); - tablet_2->gc_reported_ = true; - - ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(2, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(1, table->useful_inh_files_[2].size()); - ASSERT_EQ(1, table->useful_inh_files_[2][file2].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "a", "z"); - TabletPtr tablet_3(new Tablet(meta_3, tablet_1->GetTable())); - table->MergeTablets(tablet_1, tablet_2, meta_3, &tablet_3); - - // afer merge: - // 1. the merged tablet shoud ref all the inh file from the two parent tablet - // 2. ref to the file shoud not change - // 3. reported_live_tablets_num_ shoud dec by the reported num of parent tablets - - // 1 - ASSERT_EQ(1, tablet_1->inh_files_.size()); - ASSERT_EQ(1, tablet_2->inh_files_.size()); - ASSERT_EQ(2, tablet_3->inh_files_.size()); - - // 2 - ASSERT_EQ(2, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(2, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(1, table->useful_inh_files_[2].size()); - ASSERT_EQ(1, table->useful_inh_files_[2][file2].ref); - - // 3 - ASSERT_EQ(0, table->reported_live_tablets_num_); - } - - // Case1: ts report using file1 when master restart - void TestGarbageCollect1() { - TablePtr table = CreateTable(kTableName_); - table->reported_live_tablets_num_ = 0; - TabletPtr tablet_1 = CreateTablet("a", "z", table); - TabletFile file1 = CreateTabletFile(2, 0, 1); - - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(0, tablet_1->inh_files_.size()); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - // no tablets have reported till now - ASSERT_EQ(0, table->reported_live_tablets_num_); - - // this step simulates tablet_1 reporting using file1 - TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_1, file1); - table->GarbageCollect(inh_file_1); - ASSERT_EQ(1, tablet_1->inh_files_.size()); - - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[2].size()); - // ref inc to 1: - // first, ref inc to 2 since tablet_1 report using file1 with need_ref=TRUE - // then, ref des to 1 since EnableDeadTabletGarbageCollect() - ASSERT_EQ(1, table->useful_inh_files_[2][file1].ref); - - // all live tablets have reported, but file1 hasn't been released, gc won't work - ASSERT_EQ(1, table->reported_live_tablets_num_); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - } - - // Case2: ts report using file1, then report releasing file1 - void TestGarbageCollect2() { - TablePtr table = CreateTable(kTableName_); - table->reported_live_tablets_num_ = 0; - TabletPtr tablet_1 = CreateTablet("a", "z", table); - TabletFile file1 = CreateTabletFile(2, 0, 1); - - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(0, tablet_1->inh_files_.size()); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - // no tablets have reported till now - ASSERT_EQ(0, table->reported_live_tablets_num_); - - // this step simulates tablet_1 reporting using file1 - TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_1, file1); - table->GarbageCollect(inh_file_1); - ASSERT_EQ(1, tablet_1->inh_files_.size()); - - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[2].size()); - // ref inc to 1: - // first, ref inc to 2 since tablet_1 report using file1 with need_ref=TRUE - // then, ref des to 1 since EnableDeadTabletGarbageCollect() - ASSERT_EQ(1, table->useful_inh_files_[2][file1].ref); - - // all live tablets have reported, but file1 hasn't been released, gc won't work - ASSERT_EQ(1, table->reported_live_tablets_num_); - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // this step simulates tablet_1 reporting releasing file1 - TabletInheritedFileInfo inh_file_2 = CreateEmptyTabletInheritedFileInfo(tablet_1); - table->GarbageCollect(inh_file_2); - ASSERT_EQ(0, tablet_1->inh_files_.size()); - - // all live tablets have reported, and ref files have been released, gc worked - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 2-0-1 - } - - // Case3: tablet_1 split into tablet_2 and tablet_3, - // then tablet_2 and tablet_3 will report - void TestGarbageCollect3() { - TablePtr table = CreateTable(kTableName_); - table->reported_live_tablets_num_ = 0; - TabletPtr tablet_1 = CreateTablet("a", "z", table); - TabletFile file1 = CreateTabletFile(1, 0, 1); - { - MutexLock l(&table->mutex_); - table->AddInheritedFile(file1, false); - } - - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // no tablets have reported till now - ASSERT_EQ(0, table->reported_live_tablets_num_); - - TabletPtr tablet_2, tablet_3; - TabletMeta meta_2 = CreateTabletMeta(table->GetTableName(), "a", "k"); - TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "k", "z"); - tablet_2.reset(new Tablet(meta_2, tablet_1->GetTable())); - tablet_3.reset(new Tablet(meta_3, tablet_1->GetTable())); - table->SplitTablet(tablet_1, meta_2, meta_3, &tablet_2, &tablet_3); - - // suppose after split, tablet_2 will ref file1 and talbet_3 has no ref - - // this step simulates tablet_2 reporting using file1 - TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_2, file1); - table->GarbageCollect(inh_file_1); - ASSERT_EQ(1, tablet_2->inh_files_.size()); - - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - // ref inc to 2 since tablet_2 report using file1 - ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); - - // only tablet_2 has reported, gc won't work - ASSERT_EQ(1, table->reported_live_tablets_num_); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // this step simulates tablet_3 reporting using no file - TabletInheritedFileInfo inh_file_2 = CreateEmptyTabletInheritedFileInfo(tablet_3); - table->GarbageCollect(inh_file_2); - ASSERT_EQ(0, tablet_3->inh_files_.size()); - - // all live tablets have reported, but file1 hasn't been released, gc won't work - ASSERT_EQ(2, table->reported_live_tablets_num_); - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // this step simulates tablet_2 reporting releasing file1 - TabletInheritedFileInfo inh_file_3 = CreateEmptyTabletInheritedFileInfo(tablet_2); - table->GarbageCollect(inh_file_3); - ASSERT_EQ(0, tablet_2->inh_files_.size()); - - // all live tablets have reported, and ref files have been released, gc worked - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 1-0-1 - } - - void TestCleanObsoleteFile() { - TablePtr table = CreateTable(kTableName_); - - // empty - ASSERT_EQ(0, table->CleanObsoleteFile()); - - // add a TabletFile and tablet dir - TabletFile file1 = CreateTabletFile(1, 0, 1, true); - TabletFile tablet_dir = CreateTabletFile(1, 0, 0, true); - table->obsolete_inh_files_.push(file1); - table->obsolete_inh_files_.push(tablet_dir); - - // obsolete_inh_files_ is has tablet_dir and file1 - ASSERT_EQ(2, table->obsolete_inh_files_.size()); - - // successfully delete 2 files: tablet_dir and file1 - ASSERT_EQ(2, table->CleanObsoleteFile()); - - // obsolete_inh_files_ is empty now - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - } - - void TestTryCollectInheritedFile() { - TablePtr table = CreateTable(kTableName_); - - // no dead tablet - ASSERT_FALSE(table->TryCollectInheritedFile()); - - // dead tablet1 with file1 - TabletFile file1 = CreateTabletFile(1, 0, 1, true); - - ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(0, table->useful_inh_files_.size()); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - - // collect dead tablet from filesystem - ASSERT_TRUE(table->TryCollectInheritedFile()); - - ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); - ASSERT_EQ(1, table->useful_inh_files_.size()); - ASSERT_EQ(1, table->useful_inh_files_[1].size()); - ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); - ASSERT_EQ(0, table->obsolete_inh_files_.size()); - } - -protected: - virtual void SetUp() { - std::shared_ptr tablet_availability(new TabletAvailability(nullptr)); - MasterEnv().Init(nullptr, nullptr, nullptr, nullptr, nullptr, - std::shared_ptr(new ThreadPool), nullptr, - tablet_availability, nullptr); - std::cout << "SetUp" << std::endl; - } - - virtual void TearDown() { - mgr_.ClearTableList(); - std::cout << "TearDown" << std::endl; - } - - static void SetUpTestCase() { - std::cout << "SetUpTestCase" << std::endl; - FLAGS_tera_coord_type = "fake_zk"; - FLAGS_tera_leveldb_env_type = "local"; - FLAGS_tera_master_gc_strategy = "trackable"; - FLAGS_tera_tabletnode_path_prefix = "./"; - } - - static void TearDownTestCase() { - std::cout << "TearDownTestCase" << std::endl; - std::string table_path = FLAGS_tera_tabletnode_path_prefix + kTableName_; - EXPECT_TRUE(io::DeleteEnvDir(table_path).ok()); - } - -private: - TabletManager mgr_; - const static std::string kTableName_; + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // no tablets have reported till now + ASSERT_EQ(0, table->reported_live_tablets_num_); + + TabletPtr tablet_2, tablet_3; + TabletMeta meta_2 = CreateTabletMeta(table->GetTableName(), "a", "k"); + TabletMeta meta_3 = CreateTabletMeta(table->GetTableName(), "k", "z"); + tablet_2.reset(new Tablet(meta_2, tablet_1->GetTable())); + tablet_3.reset(new Tablet(meta_3, tablet_1->GetTable())); + table->SplitTablet(tablet_1, meta_2, meta_3, &tablet_2, &tablet_3); + + // suppose after split, tablet_2 will ref file1 and talbet_3 has no ref + + // this step simulates tablet_2 reporting using file1 + TabletInheritedFileInfo inh_file_1 = CreateTabletInheritedFileInfo(tablet_2, file1); + table->GarbageCollect(inh_file_1); + ASSERT_EQ(1, tablet_2->inh_files_.size()); + + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + // ref inc to 2 since tablet_2 report using file1 + ASSERT_EQ(2, table->useful_inh_files_[1][file1].ref); + + // only tablet_2 has reported, gc won't work + ASSERT_EQ(1, table->reported_live_tablets_num_); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // this step simulates tablet_3 reporting using no file + TabletInheritedFileInfo inh_file_2 = CreateEmptyTabletInheritedFileInfo(tablet_3); + table->GarbageCollect(inh_file_2); + ASSERT_EQ(0, tablet_3->inh_files_.size()); + + // all live tablets have reported, but file1 hasn't been released, gc won't + // work + ASSERT_EQ(2, table->reported_live_tablets_num_); + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // this step simulates tablet_2 reporting releasing file1 + TabletInheritedFileInfo inh_file_3 = CreateEmptyTabletInheritedFileInfo(tablet_2); + table->GarbageCollect(inh_file_3); + ASSERT_EQ(0, tablet_2->inh_files_.size()); + + // all live tablets have reported, and ref files have been released, gc + // worked + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(2, table->obsolete_inh_files_.size()); // tablet dir and file 1-0-1 + } + + void TestCleanObsoleteFile() { + TablePtr table = CreateTable(kTableName_); + + // empty + ASSERT_EQ(0, table->CleanObsoleteFile()); + + // add a TabletFile and tablet dir + TabletFile file1 = CreateTabletFile(1, 0, 1, true); + TabletFile tablet_dir = CreateTabletFile(1, 0, 0, true); + table->obsolete_inh_files_.push(file1); + table->obsolete_inh_files_.push(tablet_dir); + + // obsolete_inh_files_ is has tablet_dir and file1 + ASSERT_EQ(2, table->obsolete_inh_files_.size()); + + // successfully delete 2 files: tablet_dir and file1 + ASSERT_EQ(2, table->CleanObsoleteFile()); + + // obsolete_inh_files_ is empty now + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + } + + void TestTryCollectInheritedFile() { + TablePtr table = CreateTable(kTableName_); + + // no dead tablet + ASSERT_FALSE(table->TryCollectInheritedFile()); + + // dead tablet1 with file1 + TabletFile file1 = CreateTabletFile(1, 0, 1, true); + + ASSERT_EQ(0, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(0, table->useful_inh_files_.size()); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + + // collect dead tablet from filesystem + ASSERT_TRUE(table->TryCollectInheritedFile()); + + ASSERT_EQ(1, table->gc_disabled_dead_tablets_.size()); + ASSERT_EQ(1, table->useful_inh_files_.size()); + ASSERT_EQ(1, table->useful_inh_files_[1].size()); + ASSERT_EQ(1, table->useful_inh_files_[1][file1].ref); + ASSERT_EQ(0, table->obsolete_inh_files_.size()); + } + + protected: + virtual void SetUp() { + std::shared_ptr tablet_availability(new TabletAvailability(nullptr)); + MasterEnv().Init(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + std::shared_ptr(new ThreadPool), nullptr, tablet_availability, + nullptr); + std::cout << "SetUp" << std::endl; + } + + virtual void TearDown() { + mgr_.ClearTableList(); + std::cout << "TearDown" << std::endl; + } + + static void SetUpTestCase() { + std::cout << "SetUpTestCase" << std::endl; + FLAGS_tera_coord_type = "fake_zk"; + FLAGS_tera_leveldb_env_type = "local"; + FLAGS_tera_tabletnode_path_prefix = "./"; + } + + static void TearDownTestCase() { + std::cout << "TearDownTestCase" << std::endl; + std::string table_path = FLAGS_tera_tabletnode_path_prefix + kTableName_; + EXPECT_TRUE(io::DeleteEnvDir(table_path).ok()); + } + + private: + TabletManager mgr_; + const static std::string kTableName_; }; const std::string TrackableGcTest::kTableName_ = "MasterTestTable"; -TEST_F(TrackableGcTest, AddInheritedFile) { - TestAddInheritedFile(); -} - -TEST_F(TrackableGcTest, ReleaseInheritedFile) { - TestReleaseInheritedFile(); -} +TEST_F(TrackableGcTest, AddInheritedFile) { TestAddInheritedFile(); } -TEST_F(TrackableGcTest, Split) { - TestSplit(); -} +TEST_F(TrackableGcTest, ReleaseInheritedFile) { TestReleaseInheritedFile(); } -TEST_F(TrackableGcTest, Merge) { - TestMerge(); -} +TEST_F(TrackableGcTest, Split) { TestSplit(); } -TEST_F(TrackableGcTest, GarbageCollect1) { - TestGarbageCollect1(); -} +TEST_F(TrackableGcTest, Merge) { TestMerge(); } -TEST_F(TrackableGcTest, GarbageCollect2) { - TestGarbageCollect2(); -} +TEST_F(TrackableGcTest, GarbageCollect1) { TestGarbageCollect1(); } -TEST_F(TrackableGcTest, GarbageCollect3) { - TestGarbageCollect3(); -} +TEST_F(TrackableGcTest, GarbageCollect2) { TestGarbageCollect2(); } -TEST_F(TrackableGcTest, CleanObsoleteFile) { - TestCleanObsoleteFile(); -} +TEST_F(TrackableGcTest, GarbageCollect3) { TestGarbageCollect3(); } -TEST_F(TrackableGcTest, TryCollectInheritedFile) { - TestTryCollectInheritedFile(); -} +TEST_F(TrackableGcTest, CleanObsoleteFile) { TestCleanObsoleteFile(); } -} // master -} // tera +TEST_F(TrackableGcTest, TryCollectInheritedFile) { TestTryCollectInheritedFile(); } +} // master +} // tera diff --git a/src/master/test/unload_tablet_procedure_test.cc b/src/master/test/unload_tablet_procedure_test.cc index 46e625bd7..e551f0162 100644 --- a/src/master/test/unload_tablet_procedure_test.cc +++ b/src/master/test/unload_tablet_procedure_test.cc @@ -10,164 +10,181 @@ namespace tera { namespace master { class UnloadTabletProcedureTest : public ::testing::Test { -public: - UnloadTabletProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - ts_manager_(new TabletNodeManager(new MasterImpl)), - tablet_availability_(new TabletAvailability(tablet_manager_)) {} - - virtual ~UnloadTabletProcedureTest() {} - - virtual void SetUp() { - InitMasterEnv(); - TableSchema schema; - StatusCode ret_code; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); - - TabletMeta tablet_meta; - TabletManager::PackTabletMeta(&tablet_meta, - "test", "", "", "test/tablet00000001", "", TabletMeta::kTabletOffline, 0); - tablet_ = TabletManager::CreateTablet(table_, tablet_meta); - EXPECT_TRUE(table_->AddTablet(tablet_, &ret_code)); - tablet_->SetStatus(TabletMeta::kTabletReady); - node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); - tablet_->AssignTabletNode(node_); - unload_proc_ = std::shared_ptr( - new UnloadTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); - EXPECT_TRUE(unload_proc_); - } - - virtual void TearDown() {} - - static void SetUpTestCase() {}; - static void TearDownTestCase() {}; - -private: - void PrepareUnloadTabletCallbackParameter(StatusCode status) { - request_ = new UnloadTabletRequest; - response_ = new UnloadTabletResponse; - response_->set_status(status); - unload_proc_->unload_request_dispatching_ = true; - unload_proc_->unload_retrys_ = 0; - } - - void InitMasterEnv(); - TablePtr table_; - TabletPtr tablet_; - TabletNodePtr node_; - std::shared_ptr unload_proc_; - std::shared_ptr tablet_manager_; - std::shared_ptr ts_manager_; - std::shared_ptr tablet_availability_; - UnloadTabletRequest* request_; - UnloadTabletResponse* response_; + public: + UnloadTabletProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + ts_manager_(new TabletNodeManager(new MasterImpl(nullptr, nullptr))), + tablet_availability_(new TabletAvailability(tablet_manager_)) {} + + virtual ~UnloadTabletProcedureTest() {} + + virtual void SetUp() { + InitMasterEnv(); + TableSchema schema; + StatusCode ret_code; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + EXPECT_TRUE(tablet_manager_->AddTable(table_, &ret_code)); + + TabletMeta tablet_meta; + TabletManager::PackTabletMeta(&tablet_meta, "test", "", "", "test/tablet00000001", "", + TabletMeta::kTabletOffline, 0); + tablet_ = table_->AddTablet(tablet_meta, &ret_code); + EXPECT_TRUE(tablet_); + tablet_->SetStatus(TabletMeta::kTabletReady); + node_ = ts_manager_->AddTabletNode("127.0.0.1:2000", "1234567"); + tablet_->AssignTabletNode(node_); + unload_proc_ = std::shared_ptr( + new UnloadTabletProcedure(tablet_, MasterEnv().GetThreadPool().get())); + EXPECT_TRUE(unload_proc_); + } + + virtual void TearDown() {} + + static void SetUpTestCase(){}; + static void TearDownTestCase(){}; + + private: + void PrepareUnloadTabletCallbackParameter(StatusCode status) { + request_ = new UnloadTabletRequest; + response_ = new UnloadTabletResponse; + response_->set_status(status); + unload_proc_->unload_request_dispatching_ = true; + unload_proc_->unload_retrys_ = 0; + } + + void InitMasterEnv(); + TablePtr table_; + TabletPtr tablet_; + TabletNodePtr node_; + std::shared_ptr unload_proc_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::shared_ptr ts_manager_; + std::shared_ptr tablet_availability_; + UnloadTabletRequest* request_; + UnloadTabletResponse* response_; }; void UnloadTabletProcedureTest::InitMasterEnv() { - MasterEnv().Init(nullptr, ts_manager_, tablet_manager_, - std::shared_ptr(new SizeScheduler), nullptr, - std::shared_ptr(new ThreadPool), nullptr, tablet_availability_, nullptr); + MasterEnv().Init(new MasterImpl(nullptr, nullptr), ts_manager_, tablet_manager_, access_builder_, + nullptr, std::shared_ptr(new SizeScheduler), nullptr, + std::shared_ptr(new ThreadPool), nullptr, tablet_availability_, + nullptr); } TEST_F(UnloadTabletProcedureTest, GenerateEvent) { - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); - EXPECT_EQ(node_->GetState(), kReady); - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kUnLoadTablet); - - node_->SetState(kOffLine, NULL); - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsOffline); - - node_->SetState(kReady, NULL); - tablet_->SetStatus(TabletMeta::kTabletUnloading); - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadSucc); - - unload_proc_->unload_request_dispatching_ = true; - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); - unload_proc_->unload_request_dispatching_ = false; - FLAGS_tera_master_kick_tabletnode_enabled = true; - unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times; - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadSucc); - unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times + 1; - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); - FLAGS_tera_master_kick_tabletnode_enabled = false; - EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadFail); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); + EXPECT_EQ(node_->GetState(), kReady); + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kUnLoadTablet); + + node_->state_ = kOffline; + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsOffline); + + node_->state_ = kReady; + tablet_->SetStatus(TabletMeta::kTabletUnloading); + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadSucc); + + unload_proc_->unload_request_dispatching_ = true; + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); + unload_proc_->unload_request_dispatching_ = false; + unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times; + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadSucc); + + unload_proc_->kick_ts_succ_ = true; + unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times + 1; + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kWaitRpcResponse); + unload_proc_->kick_ts_succ_ = false; + EXPECT_EQ(unload_proc_->GenerateEvent(), TabletEvent::kTsUnLoadFail); } TEST_F(UnloadTabletProcedureTest, UnloadTabletCallback) { - PrepareUnloadTabletCallbackParameter(kTabletNodeOk); - unload_proc_->UnloadTabletCallback(request_, response_, false, 0); - EXPECT_FALSE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, 0); - - PrepareUnloadTabletCallbackParameter(kKeyNotInRange); - unload_proc_->UnloadTabletCallback(request_, response_, false, 0); - EXPECT_FALSE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, 0); - - PrepareUnloadTabletCallbackParameter(kTabletUnLoading); - unload_proc_->UnloadTabletCallback(request_, response_, false, 0); - EXPECT_TRUE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, 1); - - PrepareUnloadTabletCallbackParameter(kTabletUnLoading); - unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times; - FLAGS_tera_master_kick_tabletnode_enabled = false; - unload_proc_->UnloadTabletCallback(request_, response_, false, 0); - EXPECT_FALSE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, FLAGS_tera_master_impl_retry_times + 1); - - PrepareUnloadTabletCallbackParameter(kTabletNodeOk); - unload_proc_->UnloadTabletCallback(request_, response_, true, 1); - EXPECT_TRUE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, 1); - - PrepareUnloadTabletCallbackParameter(kTabletNodeOk); - node_->SetState(kOffLine, NULL); - unload_proc_->UnloadTabletCallback(request_, response_, false, 0); - EXPECT_FALSE(unload_proc_->unload_request_dispatching_); - EXPECT_EQ(unload_proc_->unload_retrys_, 0); + PrepareUnloadTabletCallbackParameter(kTabletNodeOk); + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_FALSE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, 0); + + PrepareUnloadTabletCallbackParameter(kKeyNotInRange); + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_FALSE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, 0); + + PrepareUnloadTabletCallbackParameter(kTabletNodeIsBusy); + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + PrepareUnloadTabletCallbackParameter(kTabletNodeIsBusy); + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_EQ(unload_proc_->unload_retrys_, 0); + EXPECT_TRUE(unload_proc_->unload_request_dispatching_); + + PrepareUnloadTabletCallbackParameter(kTabletUnloading); + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_TRUE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, 1); + + PrepareUnloadTabletCallbackParameter(kTabletUnloading); + unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times; + node_->state_ = kKicked; + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_EQ(unload_proc_->unload_retrys_, FLAGS_tera_master_impl_retry_times + 1); + EXPECT_TRUE(unload_proc_->kick_ts_succ_); + + PrepareUnloadTabletCallbackParameter(kTabletUnloading); + FLAGS_tera_master_kick_tabletnode_enabled = false; + unload_proc_->unload_retrys_ = FLAGS_tera_master_impl_retry_times; + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_FALSE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, FLAGS_tera_master_impl_retry_times + 1); + EXPECT_FALSE(!unload_proc_->kick_ts_succ_); + + PrepareUnloadTabletCallbackParameter(kTabletNodeOk); + unload_proc_->UnloadTabletCallback(request_, response_, true, 1); + EXPECT_TRUE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, 1); + + PrepareUnloadTabletCallbackParameter(kTabletNodeOk); + node_->state_ = kOffline; + unload_proc_->UnloadTabletCallback(request_, response_, false, 0); + EXPECT_FALSE(unload_proc_->unload_request_dispatching_); + EXPECT_EQ(unload_proc_->unload_retrys_, 0); } TEST_F(UnloadTabletProcedureTest, TestEventHandlers) { - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); - unload_proc_->UnloadTabletHandler(TabletEvent::kUnLoadTablet); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloading); - unload_proc_->WaitRpcResponseHandler(TabletEvent::kWaitRpcResponse); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloading); - - unload_proc_->UnloadTabletSuccHandler(TabletEvent::kTsUnLoadSucc); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); - tablet_->SetStatus(TabletMeta::kTabletUnloading); - unload_proc_->UnloadTabletFailHandler(TabletEvent::kTsUnLoadFail); - EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloadFail); - unload_proc_->EOFHandler(TabletEvent::kEofEvent); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletReady); + unload_proc_->UnloadTabletHandler(TabletEvent::kUnLoadTablet); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloading); + unload_proc_->WaitRpcResponseHandler(TabletEvent::kWaitRpcResponse); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloading); + + unload_proc_->UnloadTabletSuccHandler(TabletEvent::kTsUnLoadSucc); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletOffline); + tablet_->SetStatus(TabletMeta::kTabletUnloading); + unload_proc_->UnloadTabletFailHandler(TabletEvent::kTsUnLoadFail); + EXPECT_EQ(tablet_->GetStatus(), TabletMeta::kTabletUnloadFail); + unload_proc_->EOFHandler(TabletEvent::kEofEvent); } -// those AsycLifeCycle* test cases should cause core dump. Since unload_proc_ is dectructed, we cannot +// those AsycLifeCycle* test cases should cause core dump. Since unload_proc_ is +// dectructed, we cannot // check its member fields TEST_F(UnloadTabletProcedureTest, AsynLifeCycle_CallbackExceedProcedureCycle) { - tablet_->SetStatus(TabletMeta::kTabletUnloading); - tablet_->node_->SetState(kReady, NULL); - unload_proc_->UnloadTabletAsync(); - unload_proc_.reset(); + tablet_->SetStatus(TabletMeta::kTabletUnloading); + tablet_->node_->state_ = kReady; + unload_proc_->UnloadTabletAsync(); + unload_proc_.reset(); } TEST_F(UnloadTabletProcedureTest, AsyncLifeCycleTest_DelayedUnloadTaskExeedProcedureCycle) { - tablet_->SetStatus(TabletMeta::kTabletUnloading); - tablet_->node_->SetState(kOffLine, NULL); - ThreadPool::Task task = std::bind(&UnloadTabletProcedure::UnloadTabletAsyncWrapper, - std::weak_ptr(unload_proc_)); - MasterEnv().GetThreadPool()->DelayTask(100, task); - MasterEnv().GetThreadPool()->Start(); - EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 1); - unload_proc_.reset(); - usleep(200 * 1000); - EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 0); - MasterEnv().GetThreadPool()->Stop(false); - + tablet_->SetStatus(TabletMeta::kTabletUnloading); + tablet_->node_->state_ = kOffline; + ThreadPool::Task task = std::bind(&UnloadTabletProcedure::UnloadTabletAsyncWrapper, + std::weak_ptr(unload_proc_)); + MasterEnv().GetThreadPool()->DelayTask(100, task); + MasterEnv().GetThreadPool()->Start(); + EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 1); + unload_proc_.reset(); + usleep(200 * 1000); + EXPECT_EQ(MasterEnv().GetThreadPool()->latest_.size(), 0); + MasterEnv().GetThreadPool()->Stop(false); } - - } } diff --git a/src/master/test/update_auth_procedure_test.cc b/src/master/test/update_auth_procedure_test.cc new file mode 100644 index 000000000..e391ba140 --- /dev/null +++ b/src/master/test/update_auth_procedure_test.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "proto/access_control.pb.h" +#include "master/update_auth_procedure.h" +#include "master/master_env.h" +#include +#include +#include "access/helpers/access_utils.h" + +namespace tera { +namespace master { +namespace test { + +class MockClosure : public google::protobuf::Closure { + public: + virtual void Run() { return; } +}; + +static const std::string user_name("tera_dev"); +static const std::string passwd("qwer1234"); + +class UpdateUgiProcedureTest : public ::testing::Test { + public: + UpdateUgiProcedureTest() + : request_(new UpdateUgiRequest), + response_(new UpdateUgiResponse), + proc_executor_(new ProcedureExecutor) { + std::string ugi_auth_policy; + auth::AccessUtils::GetAuthPolicy(AuthPolicyType::kUgiAuthPolicy, &ugi_auth_policy); + access_entry_.reset(new auth::AccessEntry(ugi_auth_policy)); + } + virtual ~UpdateUgiProcedureTest() {} + + virtual void SetUp() { + InitMasterEnv(); + + UpdateAuthInfo* update_auth_info = request_->mutable_update_info(); + UgiInfo* ugi_info = update_auth_info->mutable_ugi_info(); + ugi_info->set_user_name(user_name); + ugi_info->set_passwd(passwd); + update_auth_info->set_update_type(UpdateAuthType::kUpdateUgi); + + std::unique_ptr meta_write_record( + auth::AccessUtils::NewMetaRecord(access_entry_, *update_auth_info)); + update_proc_.reset(new UpdateAuthProcedure( + request_.get(), response_.get(), closure_.get(), MasterEnv().GetThreadPool().get(), + access_entry_, meta_write_record, auth::AccessUpdateType::UpdateUgi)); + } + + virtual void TearDown() { proc_executor_->running_ = false; } + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + void InitMasterEnv() { + MasterEnv().Init(nullptr, nullptr, nullptr, access_builder_, nullptr, nullptr, nullptr, + std::shared_ptr(new ThreadPool), proc_executor_, nullptr, nullptr); + } + + private: + std::shared_ptr access_builder_; + std::shared_ptr access_entry_; + std::unique_ptr request_; + std::unique_ptr response_; + std::unique_ptr closure_; + std::shared_ptr> update_proc_; + std::shared_ptr proc_executor_; +}; + +TEST_F(UpdateUgiProcedureTest, CheckUpdateUgiProcedure) { + EXPECT_EQ(update_proc_->ProcId(), "UpdateUgi:" + user_name); + std::string m_user_name = + auth::AccessUtils::GetNameFromMetaKey(update_proc_->meta_write_record_->key); + EXPECT_TRUE(!m_user_name.empty()); + EXPECT_EQ(m_user_name, user_name); +} + +TEST_F(UpdateUgiProcedureTest, UpdateUgiProcedureUpdateMetaFlase) { + update_proc_->UpdateMetaDone(false); + EXPECT_EQ(response_->status(), kMetaTabletError); +} + +TEST_F(UpdateUgiProcedureTest, UpdateUgiProcedureUpdateMetaTrue) { + update_proc_->UpdateMetaDone(true); + EXPECT_EQ(response_->status(), kMasterOk); +} +} +} +} diff --git a/src/master/test/update_table_procedure_test.cc b/src/master/test/update_table_procedure_test.cc index 12606f29d..df92ba400 100644 --- a/src/master/test/update_table_procedure_test.cc +++ b/src/master/test/update_table_procedure_test.cc @@ -14,60 +14,57 @@ namespace master { namespace test { class MockClosure : public google::protobuf::Closure { -public: - virtual void Run() { - return; - } + public: + virtual void Run() { return; } }; class UpdateTableProcedureTest : public ::testing::Test { -public: - UpdateTableProcedureTest() : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), - proc_executor_(new ProcedureExecutor) {} - virtual ~UpdateTableProcedureTest() {} + public: + UpdateTableProcedureTest() + : tablet_manager_(new TabletManager(nullptr, nullptr, nullptr)), + proc_executor_(new ProcedureExecutor) {} + virtual ~UpdateTableProcedureTest() {} - virtual void SetUp() { - TableSchema schema; - table_ = TabletManager::CreateTable("test", schema, kTableEnable); - EXPECT_TRUE(table_); - - StatusCode code; - EXPECT_TRUE(tablet_manager_->AddTable(table_, &code)); - InitMasterEnv(); - request_.reset(new UpdateTableRequest); - response_.reset(new UpdateTableResponse); - closure_.reset(new MockClosure); - table_->LockTransition(); - update_proc_.reset(new UpdateTableProcedure( - table_, request_.get(), response_.get(), closure_.get(), nullptr)); - } - - -private: - void InitMasterEnv() { - MasterEnv().Init(nullptr, nullptr, - tablet_manager_, nullptr, nullptr, - nullptr, proc_executor_, nullptr ,nullptr); - } + virtual void SetUp() { + TableSchema schema; + table_ = TabletManager::CreateTable("test", schema, kTableEnable); + EXPECT_TRUE(table_); + StatusCode code; + EXPECT_TRUE(tablet_manager_->AddTable(table_, &code)); + InitMasterEnv(); + request_.reset(new UpdateTableRequest); + response_.reset(new UpdateTableResponse); + closure_.reset(new MockClosure); + table_->LockTransition(); + update_proc_.reset( + new UpdateTableProcedure(table_, request_.get(), response_.get(), closure_.get(), nullptr)); + } -private: - TablePtr table_; - std::shared_ptr tablet_manager_; - std::unique_ptr request_; - std::unique_ptr response_; - std::unique_ptr closure_; - std::shared_ptr update_proc_; - std::shared_ptr proc_executor_; + private: + void InitMasterEnv() { + MasterEnv().Init(nullptr, nullptr, tablet_manager_, access_builder_, nullptr, nullptr, nullptr, + nullptr, proc_executor_, nullptr, nullptr); + } + + private: + TablePtr table_; + std::shared_ptr tablet_manager_; + std::shared_ptr access_builder_; + std::unique_ptr request_; + std::unique_ptr response_; + std::unique_ptr closure_; + std::shared_ptr update_proc_; + std::shared_ptr proc_executor_; }; TEST_F(UpdateTableProcedureTest, NoConcurrentUpdate) { - EXPECT_FALSE(table_->LockTransition()); - update_proc_->EofPhaseHandler(UpdateTablePhase::kEofPhase); - EXPECT_FALSE(table_->InTransition()); - EXPECT_TRUE(table_->LockTransition()); + EXPECT_TRUE(table_->InTransition()); + EXPECT_FALSE(table_->LockTransition()); + update_proc_->EofPhaseHandler(UpdateTablePhase::kEofPhase); + EXPECT_FALSE(table_->InTransition()); + EXPECT_TRUE(table_->LockTransition()); } - } } } diff --git a/src/master/unload_tablet_procedure.cc b/src/master/unload_tablet_procedure.cc index 2c82b0f65..92f2b54a5 100644 --- a/src/master/unload_tablet_procedure.cc +++ b/src/master/unload_tablet_procedure.cc @@ -13,209 +13,216 @@ DECLARE_string(tera_master_meta_table_path); DECLARE_int32(tera_master_unload_rpc_timeout); DECLARE_int32(tera_master_impl_retry_times); DECLARE_int32(tera_master_control_tabletnode_retry_period); -DECLARE_bool(tera_master_kick_tabletnode_enabled); +// DECLARE_bool(tera_master_kick_tabletnode_enabled); namespace tera { namespace master { -std::map - UnloadTabletProcedure::event_handlers_ { - {TabletEvent::kTsUnloadBusy, std::bind(&UnloadTabletProcedure::TsUnloadBusyHandler, _1, _2)}, - {TabletEvent::kUnLoadTablet, std::bind(&UnloadTabletProcedure::UnloadTabletHandler, _1, _2)}, - {TabletEvent::kWaitRpcResponse, std::bind(&UnloadTabletProcedure::WaitRpcResponseHandler, _1, _2)}, - {TabletEvent::kTsUnLoadSucc, std::bind(&UnloadTabletProcedure::UnloadTabletSuccHandler, _1, _2)}, - {TabletEvent::kTsOffline, std::bind(&UnloadTabletProcedure::UnloadTabletSuccHandler, _1, _2)}, - {TabletEvent::kTsUnLoadFail, std::bind(&UnloadTabletProcedure::UnloadTabletFailHandler, _1, _2)}, - {TabletEvent::kEofEvent, std::bind(&UnloadTabletProcedure::EOFHandler, _1, _2)} - }; - -UnloadTabletProcedure::UnloadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool, bool is_sub_proc) : - id_(std::string("UnloadTablet:") + tablet->GetPath() + ":" + TimeStamp()), - tablet_(tablet), - unload_retrys_(0), - unload_request_dispatching_(false), - done_(false), - ts_unload_busying_(false), - is_sub_proc_(is_sub_proc), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "unload tablet begin, tablet: " << tablet_->GetPath(); - // I played a trick here by setting tablet status to kTableReady when user want to unload a - // tablet currently in status kTableUnLoading - if (tablet_->GetStatus() == TabletMeta::kTabletUnloading) { - tablet_->SetStatus(TabletMeta::kTabletReady); - } - PROC_CHECK(tablet_->GetStatus() == TabletMeta::kTabletReady); -} - -std::string UnloadTabletProcedure::ProcId() const { - return id_; -} +std::map UnloadTabletProcedure::event_handlers_{ + {TabletEvent::kTsUnloadBusy, std::bind(&UnloadTabletProcedure::TsUnloadBusyHandler, _1, _2)}, + {TabletEvent::kUnLoadTablet, std::bind(&UnloadTabletProcedure::UnloadTabletHandler, _1, _2)}, + {TabletEvent::kWaitRpcResponse, + std::bind(&UnloadTabletProcedure::WaitRpcResponseHandler, _1, _2)}, + {TabletEvent::kTsUnLoadSucc, + std::bind(&UnloadTabletProcedure::UnloadTabletSuccHandler, _1, _2)}, + {TabletEvent::kTsOffline, std::bind(&UnloadTabletProcedure::UnloadTabletSuccHandler, _1, _2)}, + {TabletEvent::kTsUnLoadFail, + std::bind(&UnloadTabletProcedure::UnloadTabletFailHandler, _1, _2)}, + {TabletEvent::kEofEvent, std::bind(&UnloadTabletProcedure::EOFHandler, _1, _2)}}; + +UnloadTabletProcedure::UnloadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool, + bool is_sub_proc) + : id_(std::string("UnloadTablet:") + tablet->GetPath() + ":" + TimeStamp()), + tablet_(tablet), + unload_retrys_(0), + unload_request_dispatching_(false), + kick_ts_succ_(true), + done_(false), + ts_unload_busying_(false), + is_sub_proc_(is_sub_proc), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "unload tablet begin, tablet: " << tablet_; + // I played a trick here by setting tablet status to kTableReady when user + // want to unload a + // tablet currently in status kTableUnloading + if (tablet_->GetStatus() == TabletMeta::kTabletUnloading || + tablet_->GetStatus() == TabletMeta::kTabletUnloadFail) { + tablet_->SetStatus(TabletMeta::kTabletReady); + } + if (tablet_->GetStatus() == TabletMeta::kTabletDelayOffline) { + tablet_->DoStateTransition(TabletEvent::kTsOffline); + } +} + +std::string UnloadTabletProcedure::ProcId() const { return id_; } TabletEvent UnloadTabletProcedure::GenerateEvent() { - if (tablet_->GetStatus() == TabletMeta::kTabletOffline || - tablet_->GetStatus() == TabletMeta::kTabletUnloadFail) { - return TabletEvent::kEofEvent; + if (tablet_->GetStatus() == TabletMeta::kTabletOffline || + tablet_->GetStatus() == TabletMeta::kTabletUnloadFail) { + return TabletEvent::kEofEvent; + } + if (tablet_->GetTabletNode()->NodeDown()) { + return TabletEvent::kTsOffline; + } + if (tablet_->GetStatus() == TabletMeta::kTabletReady) { + if (!tablet_->GetTabletNode()->CanUnload()) { + return TabletEvent::kTsUnloadBusy; } - if (tablet_->GetTabletNode()->NodeDown()) { - return TabletEvent::kTsOffline; + return TabletEvent::kUnLoadTablet; + } + + if (tablet_->GetStatus() == TabletMeta::kTabletUnloading) { + if (unload_request_dispatching_) { + return TabletEvent::kWaitRpcResponse; } - if (tablet_->GetStatus() == TabletMeta::kTabletReady) { - if (!tablet_->GetTabletNode()->CanUnload()) { - return TabletEvent::kTsUnloadBusy; - } - return TabletEvent::kUnLoadTablet; + if (unload_retrys_ <= FLAGS_tera_master_impl_retry_times) { + return TabletEvent::kTsUnLoadSucc; } - - if (tablet_->GetStatus() == TabletMeta::kTabletUnloading) { - // if kick_tablenode_enabled, wait until the TS is kicked and TS->NodeDown() is true - if (unload_request_dispatching_) { - return TabletEvent::kWaitRpcResponse; - } - if (unload_retrys_ <= FLAGS_tera_master_impl_retry_times) { - return TabletEvent::kTsUnLoadSucc; - } - else if (FLAGS_tera_master_kick_tabletnode_enabled) { - return TabletEvent::kWaitRpcResponse; - } - return TabletEvent::kTsUnLoadFail; + // if exhausted all unload retries, wait kick result + if (kick_ts_succ_) { + return TabletEvent::kWaitRpcResponse; } - return TabletEvent::kEofEvent; + return TabletEvent::kTsUnLoadFail; + } + return TabletEvent::kEofEvent; } void UnloadTabletProcedure::RunNextStage() { - TabletEvent event = GenerateEvent(); - auto it = event_handlers_.find(event); - PROC_CHECK(it != event_handlers_.end()) << "illegal event: " << event << ", tablet: " << tablet_; - UnloadTabletEventHandler handler = it->second; - handler(this, event); + TabletEvent event = GenerateEvent(); + auto it = event_handlers_.find(event); + PROC_CHECK(it != event_handlers_.end()) << "illegal event: " << event << ", tablet: " << tablet_; + UnloadTabletEventHandler handler = it->second; + handler(this, event); } void UnloadTabletProcedure::TsUnloadBusyHandler(const TabletEvent& event) { - // only log once the first time we enter this handler, avoiding redundant logging - PROC_VLOG_IF(23, !ts_unload_busying_) << "ts unload busy, tablet: " << tablet_; - ts_unload_busying_ = true; + // only log once the first time we enter this handler, avoiding redundant + // logging + PROC_VLOG_IF(23, !ts_unload_busying_) << "ts unload busy, tablet: " << tablet_; + ts_unload_busying_ = true; } void UnloadTabletProcedure::UnloadTabletHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - unload_request_dispatching_.store(true); - PROC_LOG(INFO) << "dispatch UNLOAD tablet request to ts, tablet: " - << tablet_ << ", server: " << tablet_->GetServerAddr(); - UnloadTabletAsync(); + tablet_->DoStateTransition(event); + unload_request_dispatching_.store(true); + PROC_LOG(INFO) << "dispatch UNLOAD tablet request to ts, tablet: " << tablet_ + << ", server: " << tablet_->GetServerAddr(); + UnloadTabletAsync(); } -void UnloadTabletProcedure::WaitRpcResponseHandler(const TabletEvent&) { - -} +void UnloadTabletProcedure::WaitRpcResponseHandler(const TabletEvent&) {} void UnloadTabletProcedure::UnloadTabletSuccHandler(const TabletEvent& event) { - tablet_->DoStateTransition(event); - tablet_->GetTabletNode()->FinishUnload(); - PROC_LOG(INFO) << "tablet unload success, tablet: " << tablet_; + tablet_->DoStateTransition(event); + tablet_->GetTabletNode()->FinishUnload(); + PROC_LOG(INFO) << "tablet unload success, tablet: " << tablet_; } -// currently we will never enter this handler as we always kick tabletnode once all unload tries failed +// currently we will never enter this handler as we always kick tabletnode once +// all unload tries failed void UnloadTabletProcedure::UnloadTabletFailHandler(const TabletEvent& event) { - tablet_->GetTabletNode()->FinishUnload(); - tablet_->DoStateTransition(event); - PROC_LOG(ERROR) << "tablet unload fail finally, tablet: " << tablet_; + tablet_->GetTabletNode()->FinishUnload(); + tablet_->DoStateTransition(event); + PROC_LOG(ERROR) << "tablet unload fail finally, tablet: " << tablet_; } void UnloadTabletProcedure::EOFHandler(const TabletEvent&) { - PROC_LOG(INFO) << "UnloadTabletProcedure Done " << tablet_; - if (!is_sub_proc_) { - tablet_->UnlockTransition(); - } - done_.store(true); + PROC_LOG(INFO) << "UnloadTabletProcedure Done " << tablet_; + if (!is_sub_proc_) { + tablet_->UnlockTransition(); + } + done_.store(true); } -void UnloadTabletProcedure::UnloadTabletAsyncWrapper(std::weak_ptr weak_proc) { - auto proc = weak_proc.lock(); - if (!proc) { - LOG(WARNING) <<"weak_ptr expired, giveup the unloadtabletasync"; - return; - } - return proc->UnloadTabletAsync(); +void UnloadTabletProcedure::UnloadTabletAsyncWrapper( + std::weak_ptr weak_proc) { + auto proc = weak_proc.lock(); + if (!proc) { + LOG(WARNING) << "weak_ptr expired, giveup the unloadtabletasync"; + return; + } + return proc->UnloadTabletAsync(); } void UnloadTabletProcedure::UnloadTabletAsync() { - TabletNodePtr node = tablet_->GetTabletNode(); - if (node->NodeDown()) { - PROC_LOG(WARNING) << "tabletnode down ,giveup this unload rpc try, tablet: " << tablet_; - return; - } - tabletnode::TabletNodeClient node_client(thread_pool_, tablet_->GetServerAddr(), - FLAGS_tera_master_unload_rpc_timeout); - UnloadTabletRequest* request = new UnloadTabletRequest; - UnloadTabletResponse* response = new UnloadTabletResponse; - request->set_sequence_id(MasterEnv().SequenceId().Inc()); - request->set_tablet_name(tablet_->GetTableName()); - request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); - request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); - request->set_session_id(node->uuid_); - - PROC_LOG(INFO) << "UnloadTabletAsync id: " << request->sequence_id() << ", " << tablet_; - // the explicit cast from shared_ptr to weak_ptr is necessary! - UnloadClosure done = - std::bind(&UnloadTabletProcedure::UnloadTabletCallbackWrapper, + TabletNodePtr node = tablet_->GetTabletNode(); + if (node->NodeDown()) { + PROC_LOG(WARNING) << "tabletnode down ,giveup this unload rpc try, tablet: " << tablet_; + return; + } + tabletnode::TabletNodeClient node_client(thread_pool_, tablet_->GetServerAddr(), + FLAGS_tera_master_unload_rpc_timeout); + UnloadTabletRequest* request = new UnloadTabletRequest; + UnloadTabletResponse* response = new UnloadTabletResponse; + request->set_sequence_id(MasterEnv().SequenceId().Inc()); + request->set_tablet_name(tablet_->GetTableName()); + request->mutable_key_range()->set_key_start(tablet_->GetKeyStart()); + request->mutable_key_range()->set_key_end(tablet_->GetKeyEnd()); + request->set_session_id(node->uuid_); + + PROC_LOG(INFO) << "UnloadTabletAsync id: " << request->sequence_id() << ", " << tablet_; + // the explicit cast from shared_ptr to weak_ptr is necessary! + UnloadClosure done = + std::bind(&UnloadTabletProcedure::UnloadTabletCallbackWrapper, std::weak_ptr(shared_from_this()), _1, _2, _3, _4); - node_client.UnloadTablet(request, response, done); + node_client.UnloadTablet(request, response, done); } void UnloadTabletProcedure::UnloadTabletCallbackWrapper( - std::weak_ptr weak_proc, - UnloadTabletRequest* request, - UnloadTabletResponse* response, - bool failed, int error_code) { - auto proc = weak_proc.lock(); - if (!proc) { - LOG(WARNING) << "weak_ptr expired, giveup the unloadtabletcallback"; - return; - } - return proc->UnloadTabletCallback(request, response, failed, error_code); -} - -void UnloadTabletProcedure::UnloadTabletCallback(UnloadTabletRequest* request, - UnloadTabletResponse* response, - bool failed, int error_code) { - StatusCode status = response->status(); - std::unique_ptr request_holder(request); - std::unique_ptr response_holder(response); - - TabletNodePtr node = tablet_->GetTabletNode(); - // we regard TS offline/restart as TabletUnload succ - if (node->NodeDown() || - (!failed && (status == kTabletNodeOk || status == kKeyNotInRange))){ - unload_request_dispatching_.store(false); - return; - } - - std::string errmsg = (failed ? - sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status)); - PROC_LOG(WARNING) << "unload tablet failed, " << tablet_ - << ", " << unload_retrys_ << "th attemp, error: " << errmsg; - - // failed and has no more retry times, return immediately - unload_retrys_++; - if (unload_retrys_ > FLAGS_tera_master_impl_retry_times) { - unload_request_dispatching_.store(false); - if (FLAGS_tera_master_kick_tabletnode_enabled) { - PROC_LOG(ERROR) << kSms << "unload tablet failed finally, " << tablet_ - << "now try kick ts: " << tablet_->GetTabletNode()->GetAddr(); - MasterEnv().GetMaster()->TryKickTabletNode(tablet_->GetTabletNode()->GetAddr()); - } - else { - PROC_LOG(ERROR) << kSms << "Unload tablet failed finally, " << tablet_; - } - return; - } - - // the explicit cast from shared_ptr to weak_ptr is necessary! - ThreadPool::Task task = - std::bind(&UnloadTabletProcedure::UnloadTabletAsyncWrapper, - std::weak_ptr(shared_from_this())); - MasterEnv().GetThreadPool()->DelayTask(FLAGS_tera_master_control_tabletnode_retry_period, task); + std::weak_ptr weak_proc, UnloadTabletRequest* request, + UnloadTabletResponse* response, bool failed, int error_code) { + auto proc = weak_proc.lock(); + if (!proc) { + LOG(WARNING) << "weak_ptr expired, giveup the unloadtabletcallback"; + return; + } + return proc->UnloadTabletCallback(request, response, failed, error_code); +} + +void UnloadTabletProcedure::UnloadTabletCallback(UnloadTabletRequest* request, + UnloadTabletResponse* response, bool failed, + int error_code) { + StatusCode status = response->status(); + std::unique_ptr request_holder(request); + std::unique_ptr response_holder(response); + uint64_t sequence_id = request->sequence_id(); + TabletNodePtr node = tablet_->GetTabletNode(); + // we regard TS offline/restart as TabletUnload succ + if (node->NodeDown() || (!failed && (status == kTabletNodeOk || status == kKeyNotInRange))) { + unload_request_dispatching_.store(false); + PROC_LOG(INFO) << "id: " << sequence_id << ", unload tablet succ, tablet: " << tablet_; + return; + } + + bool ts_ctrl_busy = (!failed && status == kTabletNodeIsBusy); + unload_retrys_ = (ts_ctrl_busy ? unload_retrys_ : unload_retrys_ + 1); + std::string errmsg = + (failed ? sofa::pbrpc::RpcErrorCodeToString(error_code) : StatusCodeToString(status)); + PROC_LOG_IF(WARNING, ts_ctrl_busy) + << "id: " << sequence_id + << ", ts is too busy, unload request is rejected and will retry later, " + "tablet: " << tablet_; + PROC_LOG_IF(WARNING, !ts_ctrl_busy) << "id: " << sequence_id << ", unload tablet failed, " + << tablet_ << ", " << unload_retrys_ + << "th attemp, error: " << errmsg; + + // failed and has no more retry times, return immediately + if (unload_retrys_ > FLAGS_tera_master_impl_retry_times) { + unload_request_dispatching_.store(false); + kick_ts_succ_ = MasterEnv().GetMaster()->TryKickTabletNode(tablet_->GetTabletNode()); + + PROC_LOG_IF(ERROR, kick_ts_succ_) << kSms << "unload tablet failed finally, " << tablet_ + << " kick ts succ: " << tablet_->GetTabletNode()->GetAddr(); + PROC_LOG_IF(ERROR, !kick_ts_succ_) + << kSms << "Unload tablet failed finally, kick ts failed too, " << tablet_; + return; + } + // the explicit cast from shared_ptr to weak_ptr is necessary! + ThreadPool::Task retry_task = std::bind(&UnloadTabletProcedure::UnloadTabletAsyncWrapper, + std::weak_ptr(shared_from_this())); + MasterEnv().GetThreadPool()->DelayTask(FLAGS_tera_master_control_tabletnode_retry_period, + retry_task); } - } } diff --git a/src/master/unload_tablet_procedure.h b/src/master/unload_tablet_procedure.h index 8ed197290..ff3dab109 100644 --- a/src/master/unload_tablet_procedure.h +++ b/src/master/unload_tablet_procedure.h @@ -15,58 +15,65 @@ namespace tera { namespace master { -class UnloadTabletProcedure : public Procedure, public std::enable_shared_from_this { -public: - UnloadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool, bool is_sub_proc = false); - - virtual ~UnloadTabletProcedure() {}; - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual bool Done() {return done_.load();} - -private: - typedef std::function UnloadTabletEventHandler; - - typedef std::function UnloadClosure; - - TabletEvent GenerateEvent(); - - static void UnloadTabletAsyncWrapper(std::weak_ptr weak_proc); - - static void UnloadTabletCallbackWrapper(std::weak_ptr weak_proc, - UnloadTabletRequest* request, - UnloadTabletResponse* response, - bool failed, int error_code); +class UnloadTabletProcedure : public Procedure, + public std::enable_shared_from_this { + public: + UnloadTabletProcedure(TabletPtr tablet, ThreadPool* thread_pool, bool is_sub_proc = false); + + virtual ~UnloadTabletProcedure(){}; + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + virtual ProcedureLimiter::LockType GetLockType() override { + if (is_sub_proc_) { + return ProcedureLimiter::LockType::kNoLimit; + } else { + return ProcedureLimiter::LockType::kUnload; + } + } + + private: + typedef std::function UnloadTabletEventHandler; + + typedef std::function UnloadClosure; - void UnloadTabletAsync(); + TabletEvent GenerateEvent(); - void UnloadTabletCallback(UnloadTabletRequest* request, - UnloadTabletResponse* response, + static void UnloadTabletAsyncWrapper(std::weak_ptr weak_proc); + + static void UnloadTabletCallbackWrapper(std::weak_ptr weak_proc, + UnloadTabletRequest* request, + UnloadTabletResponse* response, bool failed, + int error_code); + + void UnloadTabletAsync(); + + void UnloadTabletCallback(UnloadTabletRequest* request, UnloadTabletResponse* response, bool failed, int error_code); - - void TsUnloadBusyHandler(const TabletEvent& event); - void UnloadTabletHandler(const TabletEvent& event); - void WaitRpcResponseHandler(const TabletEvent&); - void UnloadTabletSuccHandler(const TabletEvent& event); - void UnloadTabletFailHandler(const TabletEvent& event); - void EOFHandler(const TabletEvent&); - -private: - const std::string id_; - std::mutex mutex_; - TabletPtr tablet_; - int32_t unload_retrys_; - std::atomic unload_request_dispatching_; - std::atomic done_; - bool ts_unload_busying_; - bool is_sub_proc_; - static std::map event_handlers_; - ThreadPool* thread_pool_; -}; + void TsUnloadBusyHandler(const TabletEvent& event); + void UnloadTabletHandler(const TabletEvent& event); + void WaitRpcResponseHandler(const TabletEvent&); + void UnloadTabletSuccHandler(const TabletEvent& event); + void UnloadTabletFailHandler(const TabletEvent& event); + void EOFHandler(const TabletEvent&); + + private: + const std::string id_; + std::mutex mutex_; + TabletPtr tablet_; + int32_t unload_retrys_; + std::atomic unload_request_dispatching_; + std::atomic kick_ts_succ_; + std::atomic done_; + bool ts_unload_busying_; + bool is_sub_proc_; + static std::map event_handlers_; + ThreadPool* thread_pool_; +}; } } - diff --git a/src/master/update_auth_procedure.cc b/src/master/update_auth_procedure.cc new file mode 100644 index 000000000..4848cdb92 --- /dev/null +++ b/src/master/update_auth_procedure.cc @@ -0,0 +1,22 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master/update_auth_procedure.h" + +namespace tera { +namespace master { + +std::ostream& operator<<(std::ostream& o, const UpdateAuthPhase& phase) { + static const char* msg[] = {"UpdateAuthPhase::kUpdateMeta", "UpdateAuthPhase::kEofPhase", + "UpdateAuthPhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + using UnderType = std::underlying_type::type; + uint32_t index = + static_cast(phase) - static_cast(UpdateAuthPhase::kUpdateMeta); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; +} +} +} diff --git a/src/master/update_auth_procedure.h b/src/master/update_auth_procedure.h new file mode 100644 index 000000000..d9d6be1ed --- /dev/null +++ b/src/master/update_auth_procedure.h @@ -0,0 +1,199 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include +#include +#include "common/mutex.h" +#include "access/access_entry.h" +#include "master/master_env.h" +#include "master/procedure.h" +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" +#include "proto/access_control.pb.h" +#include "proto/master_rpc.pb.h" +#include "proto/status_code.pb.h" +#include "access/helpers/access_utils.h" + +DECLARE_int32(tera_master_meta_retry_times); + +namespace tera { +namespace master { + +enum class UpdateAuthPhase { kUpdatePrepare, kUpdateMeta, kEofPhase }; + +struct UpdateUgiPair { + using Req = UpdateUgiRequest; + using Res = UpdateUgiResponse; +}; + +struct UpdateAuthPair { + using Req = UpdateAuthRequest; + using Res = UpdateAuthResponse; +}; + +template +class UpdateAuthProcedure : public Procedure { + public: + explicit UpdateAuthProcedure(const typename UpdatePair::Req* request, + typename UpdatePair::Res* response, + google::protobuf::Closure* closure, ThreadPool* thread_pool, + const std::shared_ptr& access_entry, + std::unique_ptr& meta_write_record, + auth::AccessUpdateType access_update_type); + + virtual ~UpdateAuthProcedure() {} + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + private: + using UpdateAuthPhaseHandler = std::function; + + void SetNextPhase(const UpdateAuthPhase& phase) { + MutexLock l(&phase_mutex_); + phases_.emplace_back(phase); + } + + UpdateAuthPhase GetCurrentPhase() { + MutexLock l(&phase_mutex_); + return phases_.back(); + } + + void EnterPhaseWithResponseStatus(StatusCode code, UpdateAuthPhase phase) { + response_->set_status(code); + SetNextPhase(phase); + } + + void PrepareHandler(const UpdateAuthPhase& phase); + + void UpdateMetaHandler(const UpdateAuthPhase& phase); + + void UpdateMetaDone(bool succ); + + void EofPhaseHandler(const UpdateAuthPhase&); + + private: + const typename UpdatePair::Req* request_; + typename UpdatePair::Res* response_; + google::protobuf::Closure* rpc_closure_; + ThreadPool* thread_pool_; + std::atomic done_; + std::atomic update_meta_; + std::shared_ptr access_entry_; + std::unique_ptr meta_write_record_; + auth::AccessUpdateType access_update_type_; + + std::vector phases_; + mutable Mutex phase_mutex_; + static std::map phase_handlers_; +}; + +std::ostream& operator<<(std::ostream& o, const UpdateAuthPhase& phase); + +template +std::map::UpdateAuthPhaseHandler> + UpdateAuthProcedure::phase_handlers_{ + {UpdateAuthPhase::kUpdatePrepare, + std::bind(&UpdateAuthProcedure::PrepareHandler, _1, _2)}, + {UpdateAuthPhase::kUpdateMeta, + std::bind(&UpdateAuthProcedure::UpdateMetaHandler, _1, _2)}, + {UpdateAuthPhase::kEofPhase, + std::bind(&UpdateAuthProcedure::EofPhaseHandler, _1, _2)}, + }; + +template +UpdateAuthProcedure::UpdateAuthProcedure( + const typename UpdatePair::Req* request, typename UpdatePair::Res* response, + google::protobuf::Closure* closure, ThreadPool* thread_pool, + const std::shared_ptr& access_entry, + std::unique_ptr& meta_write_record, + auth::AccessUpdateType access_update_type) + : request_(request), + response_(response), + rpc_closure_(closure), + thread_pool_(thread_pool), + done_(false), + update_meta_(false), + access_entry_(access_entry), + meta_write_record_(std::move(meta_write_record)), + access_update_type_(access_update_type) { + PROC_LOG(INFO) << "begin auth update procedure"; + SetNextPhase(UpdateAuthPhase::kUpdatePrepare); +} + +template +std::string UpdateAuthProcedure::ProcId() const { + std::string prefix = + (access_update_type_ == auth::AccessUpdateType::UpdateUgi) ? "UpdateUgi:" : "UpdateAuth:"; + return prefix + auth::AccessUtils::GetNameFromMetaKey(meta_write_record_->key); +} + +template +void UpdateAuthProcedure::RunNextStage() { + UpdateAuthPhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase + << ", key name: " << meta_write_record_->key; + UpdateAuthPhaseHandler handler = it->second; + handler(this, phase); +} + +template +void UpdateAuthProcedure::PrepareHandler(const UpdateAuthPhase& phase) { + SetNextPhase(UpdateAuthPhase::kUpdateMeta); +} + +template +void UpdateAuthProcedure::UpdateMetaHandler(const UpdateAuthPhase& phase) { + if (update_meta_) { + // procedure will invoke in cycle + return; + } + update_meta_.store(true); + std::string type = meta_write_record_->is_delete ? std::string("delete") : std::string("update"); + PROC_LOG(INFO) << "update auth meta begin [key name : " << meta_write_record_->key + << ", value = " << meta_write_record_->value << ", type = " << type << "]"; + UpdateMetaClosure closure = std::bind(&UpdateAuthProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(*meta_write_record_, closure, + FLAGS_tera_master_meta_retry_times); +} + +template +void UpdateAuthProcedure::UpdateMetaDone(bool succ) { + if (!succ) { + PROC_LOG(ERROR) << "update meta failed"; + EnterPhaseWithResponseStatus(kMetaTabletError, UpdateAuthPhase::kEofPhase); + return; + } + // update master mem auth info at last + PROC_LOG(INFO) << "update auth info to meta succ"; + if (!meta_write_record_->is_delete) { + if (!access_entry_->GetAccessUpdater().AddRecord(meta_write_record_->key, + meta_write_record_->value)) { + PROC_LOG(INFO) << "Mismatch update auth type, not ugi && none"; + } + } else { + if (!access_entry_->GetAccessUpdater().DelRecord(meta_write_record_->key)) { + PROC_LOG(INFO) << "Mismatch update auth type, not ugi && none"; + } + } + EnterPhaseWithResponseStatus(kMasterOk, UpdateAuthPhase::kEofPhase); +} + +template +void UpdateAuthProcedure::EofPhaseHandler(const UpdateAuthPhase&) { + done_.store(true); + PROC_LOG(INFO) << "update auth finish"; + rpc_closure_->Run(); +} +} +} diff --git a/src/master/update_table_procedure.cc b/src/master/update_table_procedure.cc index c1729b90c..26e74666d 100644 --- a/src/master/update_table_procedure.cc +++ b/src/master/update_table_procedure.cc @@ -14,283 +14,277 @@ DECLARE_int32(tera_master_schema_update_retry_period); namespace tera { namespace master { -std::map - UpdateTableProcedure::phase_handlers_ { - {UpdateTablePhase::kPrepare, - std::bind(&UpdateTableProcedure::PrepareHandler, _1, _2)}, - {UpdateTablePhase::kUpdateMeta, - std::bind(&UpdateTableProcedure::UpdateMetaHandler, _1, _2)}, - {UpdateTablePhase::kTabletsSchemaSyncing, - std::bind(&UpdateTableProcedure::SyncTabletsSchemaHandler, _1, _2)}, - {UpdateTablePhase::kEofPhase, - std::bind(&UpdateTableProcedure::EofPhaseHandler, _1, _2)}, +std::map UpdateTableProcedure::phase_handlers_{ + {UpdateTablePhase::kPrepare, std::bind(&UpdateTableProcedure::PrepareHandler, _1, _2)}, + {UpdateTablePhase::kUpdateMeta, std::bind(&UpdateTableProcedure::UpdateMetaHandler, _1, _2)}, + {UpdateTablePhase::kTabletsSchemaSyncing, + std::bind(&UpdateTableProcedure::SyncTabletsSchemaHandler, _1, _2)}, + {UpdateTablePhase::kEofPhase, std::bind(&UpdateTableProcedure::EofPhaseHandler, _1, _2)}, }; -UpdateTableProcedure::UpdateTableProcedure(TablePtr table, - const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool) : - table_(table), - request_(request), - response_(response), - rpc_closure_(closure), - update_meta_(false), - sync_tablets_schema_(false), - done_(false), - tablet_sync_cnt_(0), - thread_pool_(thread_pool) { - PROC_LOG(INFO) << "update schema begin, table: " << table_->GetTableName(); - SetNextPhase(UpdateTablePhase::kPrepare); +static bool IsUpdateCf(TablePtr table) { + TableSchema schema; + if (table->GetOldSchema(&schema)) { + return IsSchemaCfDiff(table->GetSchema(), schema); + } + return true; +} + +UpdateTableProcedure::UpdateTableProcedure(TablePtr table, const UpdateTableRequest* request, + UpdateTableResponse* response, + google::protobuf::Closure* closure, + ThreadPool* thread_pool) + : table_(table), + request_(request), + response_(response), + rpc_closure_(closure), + update_meta_(false), + sync_tablets_schema_(false), + done_(false), + tablet_sync_cnt_(0), + thread_pool_(thread_pool) { + PROC_LOG(INFO) << "update schema begin, table: " << table_->GetTableName(); + SetNextPhase(UpdateTablePhase::kPrepare); } std::string UpdateTableProcedure::ProcId() const { - std::string prefix("UpdateTable:"); - return prefix + table_->GetTableName(); + std::string prefix("UpdateTable:"); + return prefix + table_->GetTableName(); } void UpdateTableProcedure::RunNextStage() { - UpdateTablePhase phase = GetCurrentPhase(); - auto it = phase_handlers_.find(phase); - PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", table: " << table_; - UpdateTablePhaseHandler handler = it->second; - handler(this, phase); + UpdateTablePhase phase = GetCurrentPhase(); + auto it = phase_handlers_.find(phase); + PROC_CHECK(it != phase_handlers_.end()) << "illegal phase: " << phase << ", table: " << table_; + UpdateTablePhaseHandler handler = it->second; + handler(this, phase); } void UpdateTableProcedure::PrepareHandler(const UpdateTablePhase&) { - if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "update table")) { - EnterPhaseWithResponseStatus(kNotPermission, UpdateTablePhase::kEofPhase); - return; - } + if (!MasterEnv().GetMaster()->HasPermission(request_, table_, "update table")) { + EnterPhaseWithResponseStatus(kNotPermission, UpdateTablePhase::kEofPhase); + return; + } - if (request_->schema().locality_groups_size() < 1) { - PROC_LOG(WARNING) << "No LocalityGroupSchema for " << request_->table_name(); - EnterPhaseWithResponseStatus(kInvalidArgument, UpdateTablePhase::kEofPhase); - return; - } - if (!table_->PrepareUpdate(request_->schema())) { - // another schema-update is doing... - PROC_LOG(INFO) << "[update] no concurrent schema-update, table:" << table_; + if (request_->schema().locality_groups_size() < 1) { + PROC_LOG(WARNING) << "No LocalityGroupSchema for " << request_->table_name(); + EnterPhaseWithResponseStatus(kInvalidArgument, UpdateTablePhase::kEofPhase); + return; + } + if (!table_->PrepareUpdate(request_->schema())) { + // another schema-update is doing... + PROC_LOG(INFO) << "[update] no concurrent schema-update, table:" << table_; + EnterPhaseWithResponseStatus(kTableNotSupport, UpdateTablePhase::kEofPhase); + return; + } + if (FLAGS_tera_online_schema_update_enabled && table_->GetStatus() == kTableEnable && + IsUpdateCf(table_)) { + table_->GetTablet(&tablet_list_); + for (std::size_t i = 0; i < tablet_list_.size(); ++i) { + TabletPtr tablet = tablet_list_[i]; + // no other tablet transition procedure is allowed while tablet is + // updating schema + // Should be very carefully with tablet's transition lock + if (!tablet->LockTransition()) { + PROC_LOG(WARNING) << "abort update online table schema, tablet: " << tablet->GetPath() + << " in transition"; + for (std::size_t j = 0; j < i; ++j) { + tablet = tablet_list_[j]; + tablet->UnlockTransition(); + } + table_->AbortUpdate(); EnterPhaseWithResponseStatus(kTableNotSupport, UpdateTablePhase::kEofPhase); return; - } - if (FLAGS_tera_online_schema_update_enabled && table_->GetStatus() == kTableEnable) { - table_->GetTablet(&tablet_list_); - for (std::size_t i = 0; i < tablet_list_.size(); ++i) { - TabletPtr tablet = tablet_list_[i]; - // no other tablet transition procedure is allowed while tablet is updating schema - // Should be very carefully with tablet's transition lock - if (!tablet->LockTransition()) { - PROC_LOG(WARNING) << "abort update online table schema, tablet: " - << tablet->GetPath() << " in transition"; - for (std::size_t j = 0; j < i; ++j) { - tablet = tablet_list_[j]; - tablet->UnlockTransition(); - } - table_->AbortUpdate(); - EnterPhaseWithResponseStatus(kTableNotSupport, UpdateTablePhase::kEofPhase); - return; - } - TabletMeta::TabletStatus status = tablet->GetStatus(); - if (status != TabletMeta::kTabletReady && status != TabletMeta::kTabletOffline) { - PROC_LOG(WARNING) << "abort update online table schema, tablet: " - << tablet->GetPath() << " not in ready status, status: " << StatusCodeToString(status); - for (std::size_t j = 0; j <= i; ++j) { - tablet = tablet_list_[j]; - tablet->UnlockTransition(); - } - table_->AbortUpdate(); - EnterPhaseWithResponseStatus(kTableNotSupport, UpdateTablePhase::kEofPhase); - return; - } - + } + TabletMeta::TabletStatus status = tablet->GetStatus(); + if (status != TabletMeta::kTabletReady && status != TabletMeta::kTabletOffline) { + PROC_LOG(WARNING) << "abort update online table schema, tablet: " << tablet->GetPath() + << " not in ready status, status: " << StatusCodeToString(status); + for (std::size_t j = 0; j <= i; ++j) { + tablet = tablet_list_[j]; + tablet->UnlockTransition(); } - } - SetNextPhase(UpdateTablePhase::kUpdateMeta); -} - -void UpdateTableProcedure::UpdateMetaHandler(const UpdateTablePhase&) { - if (update_meta_) { + table_->AbortUpdate(); + EnterPhaseWithResponseStatus(kTableNotSupport, UpdateTablePhase::kEofPhase); return; + } } - update_meta_.store(true); - PROC_LOG(INFO) << "table: " << table_->GetTableName() << " update meta begin"; - MetaWriteRecord record = PackMetaWriteRecord(table_, false); - UpdateMetaClosure closure = std::bind(&UpdateTableProcedure::UpdateMetaDone, this, _1); - MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); + } + SetNextPhase(UpdateTablePhase::kUpdateMeta); } -static bool IsUpdateCf(TablePtr table) { - TableSchema schema; - if (table->GetOldSchema(&schema)) { - return IsSchemaCfDiff(table->GetSchema(), schema); - } - return true; +void UpdateTableProcedure::UpdateMetaHandler(const UpdateTablePhase&) { + if (update_meta_) { + return; + } + update_meta_.store(true); + PROC_LOG(INFO) << "table: " << table_->GetTableName() << " update meta begin"; + MetaWriteRecord record = PackMetaWriteRecord(table_, false); + UpdateMetaClosure closure = std::bind(&UpdateTableProcedure::UpdateMetaDone, this, _1); + MasterEnv().BatchWriteMetaTableAsync(record, closure, FLAGS_tera_master_meta_retry_times); } - void UpdateTableProcedure::UpdateMetaDone(bool succ) { - if (!succ) { - PROC_LOG(WARNING) << "fail to update meta"; - table_->AbortUpdate(); - EnterPhaseWithResponseStatus(kMetaTabletError, UpdateTablePhase::kEofPhase); - return; - } - PROC_LOG(INFO) << "update table info to meta succ"; - if (FLAGS_tera_online_schema_update_enabled && - table_->GetStatus() == kTableEnable && - IsUpdateCf(table_)) { - SetNextPhase(UpdateTablePhase::kTabletsSchemaSyncing); - return; - } - else { - table_->CommitUpdate(); - EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); - return; - } + if (!succ) { + PROC_LOG(WARNING) << "fail to update meta"; + table_->AbortUpdate(); + EnterPhaseWithResponseStatus(kMetaTabletError, UpdateTablePhase::kEofPhase); + return; + } + PROC_LOG(INFO) << "update table info to meta succ"; + if (FLAGS_tera_online_schema_update_enabled && table_->GetStatus() == kTableEnable && + IsUpdateCf(table_)) { + SetNextPhase(UpdateTablePhase::kTabletsSchemaSyncing); + return; + } else { + table_->CommitUpdate(); + EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); + return; + } } void UpdateTableProcedure::SyncTabletsSchemaHandler(const UpdateTablePhase&) { - if (sync_tablets_schema_) { - return; + if (sync_tablets_schema_) { + return; + } + sync_tablets_schema_.store(true); + PROC_LOG(INFO) << "begin sync tablets schema"; + tablet_sync_cnt_++; + // No LoadTabletProcedure will be issued once the tablet falls into + // kTabletOffline status while + // UpdateTableProcedure is running, because UpdateTableProcedure has got the + // tablet's TransitionLocks. + // So UpdateTableProcedure should takes of those offline tablets by issue + // LoadTabletProcedure for + // those tablets at the right point of time. + for (std::size_t i = 0; i < tablet_list_.size(); ++i) { + TabletPtr tablet = tablet_list_[i]; + if (tablet->GetStatus() == TabletMeta::kTabletOffline) { + offline_tablets_.emplace_back(tablet); + continue; } - sync_tablets_schema_.store(true); - PROC_LOG(INFO) << "begin sync tablets schema"; + UpdateClosure done = std::bind(&UpdateTableProcedure::UpdateTabletSchemaCallback, this, tablet, + 0, _1, _2, _3, _4); + NoticeTabletSchemaUpdate(tablet_list_[i], done); tablet_sync_cnt_++; - // No LoadTabletProcedure will be issued once the tablet falls into kTabletOffline status while - // UpdateTableProcedure is running, because UpdateTableProcedure has got the tablet's TransitionLocks. - // So UpdateTableProcedure should takes of those offline tablets by issue LoadTabletProcedure for - // those tablets at the right point of time. - for (std::size_t i = 0; i < tablet_list_.size(); ++i) { - TabletPtr tablet = tablet_list_[i]; - if (tablet->GetStatus() == TabletMeta::kTabletOffline) { - offline_tablets_.emplace_back(tablet); - continue; - } - UpdateClosure done = std::bind( - &UpdateTableProcedure::UpdateTabletSchemaCallback, this, tablet, 0, _1, _2, _3, _4); - NoticeTabletSchemaUpdate(tablet_list_[i], done); - tablet_sync_cnt_++; - } - if (--tablet_sync_cnt_ == 0) { - RecoverOfflineTablets(); - table_->CommitUpdate(); - table_->ClearSchemaSyncLock(); - PROC_VLOG(23) << "sync tablets schema finished"; - EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); - } - + } + if (--tablet_sync_cnt_ == 0) { + RecoverOfflineTablets(); + table_->CommitUpdate(); + PROC_VLOG(23) << "sync tablets schema finished"; + EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); + } } void UpdateTableProcedure::NoticeTabletSchemaUpdate(TabletPtr tablet, UpdateClosure done) { - - tabletnode::TabletNodeClient node_client(thread_pool_, tablet->GetServerAddr()); - UpdateRequest* request = new UpdateRequest; - UpdateResponse* response = new UpdateResponse; - request->set_sequence_id(MasterEnv().SequenceId().Inc()); - request->mutable_schema()->CopyFrom(tablet->GetSchema()); - request->set_tablet_name(tablet->GetTableName()); - request->mutable_key_range()->set_key_start(tablet->GetKeyStart()); - request->mutable_key_range()->set_key_end(tablet->GetKeyEnd()); - node_client.Update(request, response, done); + tabletnode::TabletNodeClient node_client(thread_pool_, tablet->GetServerAddr()); + UpdateRequest* request = new UpdateRequest; + UpdateResponse* response = new UpdateResponse; + request->set_sequence_id(MasterEnv().SequenceId().Inc()); + request->mutable_schema()->CopyFrom(tablet->GetSchema()); + request->set_tablet_name(tablet->GetTableName()); + request->mutable_key_range()->set_key_start(tablet->GetKeyStart()); + request->mutable_key_range()->set_key_end(tablet->GetKeyEnd()); + node_client.Update(request, response, done); } -void UpdateTableProcedure::UpdateTabletSchemaCallback(TabletPtr tablet, - int32_t retry_times, - UpdateRequest* request, - UpdateResponse* response, - bool rpc_failed, - int status_code) { - std::unique_ptr request_holder(request); - std::unique_ptr response_holder(response); - StatusCode status = response_holder->status(); - TabletNodePtr node = tablet->GetTabletNode(); - PROC_VLOG(23) << "tablet: " << tablet->GetPath() - << ", update schema callback returned, remain cnt: " << tablet_sync_cnt_; - if (tablet->GetStatus() == TabletMeta::kTabletOffline || - (!rpc_failed && status == kTabletNodeOk)) { - if (tablet->GetStatus() == TabletMeta::kTabletOffline) { - offline_tablets_.emplace_back(tablet); - } - // do not unlock offline tablets' TransitionLock. After all online tablets UpdateTabletSchema RPC - // has been collected, UpdateTableProcedure will issue LoadTabletProcedure for those offline tablets - // and those offline tablets will be locked until their LoadTabletProcedure finished. - else { - tablet->UnlockTransition(); - } - if (--tablet_sync_cnt_ == 0) { - PROC_VLOG(23) << "sync tablets schema finished"; - RecoverOfflineTablets(); - table_->CommitUpdate(); - EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); - } - return; +void UpdateTableProcedure::UpdateTabletSchemaCallback(TabletPtr tablet, int32_t retry_times, + UpdateRequest* request, + UpdateResponse* response, bool rpc_failed, + int status_code) { + std::unique_ptr request_holder(request); + std::unique_ptr response_holder(response); + StatusCode status = response_holder->status(); + TabletNodePtr node = tablet->GetTabletNode(); + PROC_VLOG(23) << "tablet: " << tablet->GetPath() + << ", update schema callback returned, remain cnt: " << tablet_sync_cnt_; + if (tablet->GetStatus() == TabletMeta::kTabletOffline || + (!rpc_failed && status == kTabletNodeOk)) { + if (tablet->GetStatus() == TabletMeta::kTabletOffline) { + offline_tablets_.emplace_back(tablet); } + // do not unlock offline tablets' TransitionLock. After all online tablets + // UpdateTabletSchema RPC + // has been collected, UpdateTableProcedure will issue LoadTabletProcedure + // for those offline tablets + // and those offline tablets will be locked until their LoadTabletProcedure + // finished. + else { + tablet->UnlockTransition(); + } + if (--tablet_sync_cnt_ == 0) { + PROC_VLOG(23) << "sync tablets schema finished"; + RecoverOfflineTablets(); + table_->CommitUpdate(); + EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); + } + return; + } - if (rpc_failed || status != kTabletNodeOk) { - if (rpc_failed) { - PROC_LOG(WARNING) << "[update] fail to update schema: " - << sofa::pbrpc::RpcErrorCodeToString(status_code) - << ": " << tablet; - } else { - PROC_LOG(WARNING) << "[update] fail to update schema: " << StatusCodeToString(status) - << ": " << tablet; - } - if (retry_times > FLAGS_tera_master_schema_update_retry_times) { - PROC_LOG(ERROR) << "[update] retry " << retry_times << " times, kick " - << tablet->GetServerAddr(); - // we ensure tablet's schema been updated by kickoff the hosting tabletnode if all - // UpdateTabletSchema RPC tries failed - tablet->UnlockTransition(); - MasterEnv().GetMaster()->TryKickTabletNode(tablet->GetServerAddr()); - if (--tablet_sync_cnt_ == 0) { - RecoverOfflineTablets(); - table_->CommitUpdate(); - EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); - } - } else { - UpdateClosure done = - std::bind(&UpdateTableProcedure::UpdateTabletSchemaCallback, this, tablet, - retry_times + 1, _1, _2, _3, _4); - ThreadPool::Task task = - std::bind(&UpdateTableProcedure::NoticeTabletSchemaUpdate, this, tablet, done); - MasterEnv().GetThreadPool()->DelayTask( - FLAGS_tera_master_schema_update_retry_period * 1000, task); - } - return; + if (rpc_failed || status != kTabletNodeOk) { + if (rpc_failed) { + PROC_LOG(WARNING) << "[update] fail to update schema: " + << sofa::pbrpc::RpcErrorCodeToString(status_code) << ": " << tablet; + } else { + PROC_LOG(WARNING) << "[update] fail to update schema: " << StatusCodeToString(status) << ": " + << tablet; } + if (retry_times > FLAGS_tera_master_schema_update_retry_times) { + PROC_LOG(ERROR) << "[update] retry " << retry_times << " times, kick " + << tablet->GetServerAddr(); + // we ensure tablet's schema been updated by kickoff the hosting + // tabletnode if all + // UpdateTabletSchema RPC tries failed + tablet->UnlockTransition(); + MasterEnv().GetMaster()->TryKickTabletNode(tablet->GetServerAddr()); + if (--tablet_sync_cnt_ == 0) { + RecoverOfflineTablets(); + table_->CommitUpdate(); + EnterPhaseWithResponseStatus(kMasterOk, UpdateTablePhase::kEofPhase); + } + } else { + UpdateClosure done = std::bind(&UpdateTableProcedure::UpdateTabletSchemaCallback, this, + tablet, retry_times + 1, _1, _2, _3, _4); + ThreadPool::Task task = + std::bind(&UpdateTableProcedure::NoticeTabletSchemaUpdate, this, tablet, done); + MasterEnv().GetThreadPool()->DelayTask(FLAGS_tera_master_schema_update_retry_period * 1000, + task); + } + return; + } } void UpdateTableProcedure::RecoverOfflineTablets() { - for (auto tablet : offline_tablets_) { - std::shared_ptr proc(new LoadTabletProcedure(tablet, tablet->GetTabletNode(), thread_pool_)); - MasterEnv().GetExecutor()->AddProcedure(proc); - } + for (auto tablet : offline_tablets_) { + std::shared_ptr proc( + new LoadTabletProcedure(tablet, tablet->GetTabletNode(), thread_pool_)); + MasterEnv().GetExecutor()->AddProcedure(proc); + } } void UpdateTableProcedure::EofPhaseHandler(const UpdateTablePhase&) { - done_.store(true); - PROC_LOG(INFO) << "update table finish"; - if (table_ && table_->InTransition()) { - table_->UnlockTransition(); - } + done_.store(true); + PROC_LOG(INFO) << "update table finish"; + if (table_ && table_->InTransition()) { + table_->UnlockTransition(); + } + if (rpc_closure_) { rpc_closure_->Run(); + } } -std::ostream& operator<< (std::ostream& o, const UpdateTablePhase& phase) { - static const char* msg[] = {"UpdateTablePhase::kPrepare", - "UpdateTablePhase::kUpdateMeta", - "UpdateTablePhase::kTabletsSchemaSyncing", - "UpdateTablePhase::kEofPhase", - "UpdateTablePhase::kUnknown"}; - static uint32_t msg_size = sizeof(msg) / sizeof(const char*); - typedef std::underlying_type::type UnderType; - uint32_t index = static_cast(phase) - static_cast(UpdateTablePhase::kPrepare); - index = index < msg_size ? index : msg_size - 1; - o << msg[index]; - return o; - +std::ostream& operator<<(std::ostream& o, const UpdateTablePhase& phase) { + static const char* msg[] = {"UpdateTablePhase::kPrepare", "UpdateTablePhase::kUpdateMeta", + "UpdateTablePhase::kTabletsSchemaSyncing", + "UpdateTablePhase::kEofPhase", "UpdateTablePhase::kUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + typedef std::underlying_type::type UnderType; + uint32_t index = + static_cast(phase) - static_cast(UpdateTablePhase::kPrepare); + index = index < msg_size ? index : msg_size - 1; + o << msg[index]; + return o; } - } } diff --git a/src/master/update_table_procedure.h b/src/master/update_table_procedure.h index 2630fd47a..89b1db03a 100644 --- a/src/master/update_table_procedure.h +++ b/src/master/update_table_procedure.h @@ -11,84 +11,104 @@ #include "master/procedure.h" #include "proto/master_rpc.pb.h" +#include + namespace tera { namespace master { -enum class UpdateTablePhase{ - kPrepare, - kUpdateMeta, - kTabletsSchemaSyncing, - kEofPhase, +enum class UpdateTablePhase { + kPrepare, + kUpdateMeta, + kTabletsSchemaSyncing, + kEofPhase, }; -std::ostream& operator<< (std::ostream& o, const UpdateTablePhase& phase); +std::ostream& operator<<(std::ostream& o, const UpdateTablePhase& phase); class UpdateTableProcedure : public Procedure { -public: - UpdateTableProcedure(TablePtr table, - const UpdateTableRequest* request, - UpdateTableResponse* response, - google::protobuf::Closure* closure, - ThreadPool* thread_pool); - - virtual std::string ProcId() const; - - virtual void RunNextStage(); - - virtual bool Done() {return done_.load();} - - virtual ~UpdateTableProcedure() {} -private: -typedef std::function UpdateClosure; - - typedef std::function UpdateTablePhaseHandler; - - void SetNextPhase(const UpdateTablePhase& phase) {phases_.emplace_back(phase);} - - void EnterPhaseWithResponseStatus(StatusCode code, UpdateTablePhase phase) { - response_->set_status(code); - SetNextPhase(phase); - } - - UpdateTablePhase GetCurrentPhase() {return phases_.back();} - - void PrepareHandler(const UpdateTablePhase& phase); - - void UpdateMetaHandler(const UpdateTablePhase& phase); - - void UpdateMetaDone(bool succ); - - void SyncTabletsSchemaHandler(const UpdateTablePhase& phase); - - void EofPhaseHandler(const UpdateTablePhase&); - - void NoticeTabletSchemaUpdate(TabletPtr tablet, UpdateClosure done); - - void UpdateTabletSchemaCallback(TabletPtr tablet, - int32_t retry_times, - UpdateRequest* request, - UpdateResponse* response, - bool fail, - int status_code); - - void RecoverOfflineTablets(); - -private: - TablePtr table_; - const UpdateTableRequest* request_; - UpdateTableResponse* response_; - google::protobuf::Closure* rpc_closure_; - std::atomic update_meta_; - std::atomic sync_tablets_schema_; - std::atomic done_; - std::vector phases_; - std::vector tablet_list_; - std::vector offline_tablets_; - std::atomic tablet_sync_cnt_; - static std::map phase_handlers_; - ThreadPool* thread_pool_; + public: + UpdateTableProcedure(TablePtr table, const UpdateTableRequest* request, + UpdateTableResponse* response, google::protobuf::Closure* closure, + ThreadPool* thread_pool); + + virtual std::string ProcId() const; + + virtual void RunNextStage(); + + virtual bool Done() { return done_.load(); } + + virtual ~UpdateTableProcedure() {} + + private: + typedef std::function UpdateClosure; + + typedef std::function + UpdateTablePhaseHandler; + + void SetNextPhase(const UpdateTablePhase& phase) { phases_.emplace_back(phase); } + + void EnterPhaseWithResponseStatus(StatusCode code, UpdateTablePhase phase) { + response_->set_status(code); + SetNextPhase(phase); + } + + UpdateTablePhase GetCurrentPhase() { return phases_.back(); } + + void PrepareHandler(const UpdateTablePhase& phase); + + void UpdateMetaHandler(const UpdateTablePhase& phase); + + void UpdateMetaDone(bool succ); + + void SyncTabletsSchemaHandler(const UpdateTablePhase& phase); + + void EofPhaseHandler(const UpdateTablePhase&); + + void NoticeTabletSchemaUpdate(TabletPtr tablet, UpdateClosure done); + + void UpdateTabletSchemaCallback(TabletPtr tablet, int32_t retry_times, UpdateRequest* request, + UpdateResponse* response, bool fail, int status_code); + + void RecoverOfflineTablets(); + + private: + TablePtr table_; + const UpdateTableRequest* request_; + UpdateTableResponse* response_; + google::protobuf::Closure* rpc_closure_; + std::atomic update_meta_; + std::atomic sync_tablets_schema_; + std::atomic done_; + std::vector phases_; + std::vector tablet_list_; + std::vector offline_tablets_; + std::atomic tablet_sync_cnt_; + static std::map phase_handlers_; + ThreadPool* thread_pool_; }; +class UpdateDoneClosure : public google::protobuf::Closure { + public: + static google::protobuf::Closure* NewInstance(UpdateTableRequest* request, + UpdateTableResponse* response) { + return new UpdateDoneClosure(request, response); + } + + virtual void Run() override { + delete request_; + delete response_; + delete this; + } + + virtual ~UpdateDoneClosure() {} + + protected: + UpdateDoneClosure(UpdateTableRequest* request, UpdateTableResponse* response) + : request_(request), response_(response) {} + + private: + UpdateTableRequest* request_; + UpdateTableResponse* response_; +}; } } - diff --git a/src/master/user_manager.cc b/src/master/user_manager.cc index cc290dc14..5145c8fcf 100644 --- a/src/master/user_manager.cc +++ b/src/master/user_manager.cc @@ -14,222 +14,208 @@ User::User(const std::string& name, const UserInfo& user_info) : name_(name), user_info_(user_info) {} std::string User::DebugString() { - MutexLock locker(&mutex_); - std::stringstream ss; - ss << "user:" << name_ - << ", token:" << user_info_.token() - << ", group(" << user_info_.group_name_size() << "):"; - for (int i = 0; i < user_info_.group_name_size(); ++i) { - ss << user_info_.group_name(i) << " "; - } - return ss.str(); + MutexLock locker(&mutex_); + std::stringstream ss; + ss << "user:" << name_ << ", token:" << user_info_.token() << ", group(" + << user_info_.group_name_size() << "):"; + for (int i = 0; i < user_info_.group_name_size(); ++i) { + ss << user_info_.group_name(i) << " "; + } + return ss.str(); } std::string User::GetUserName() { - MutexLock locker(&mutex_); - return name_; + MutexLock locker(&mutex_); + return name_; } UserInfo User::GetUserInfo() { - MutexLock locker(&mutex_); - return user_info_; + MutexLock locker(&mutex_); + return user_info_; } void User::SetUserInfo(const UserInfo& user_info) { - MutexLock locker(&mutex_); - user_info_.CopyFrom(user_info); + MutexLock locker(&mutex_); + user_info_.CopyFrom(user_info); } std::string User::GetToken() { - MutexLock locker(&mutex_); - return user_info_.token(); + MutexLock locker(&mutex_); + return user_info_.token(); } -void User::ToMetaTableKeyValue(std::string* packed_key, - std::string* packed_value) { - MutexLock locker(&mutex_); - *packed_key = '~' + name_; - user_info_.SerializeToString(packed_value); +void User::ToMetaTableKeyValue(std::string* packed_key, std::string* packed_value) { + MutexLock locker(&mutex_); + *packed_key = '~' + name_; + user_info_.SerializeToString(packed_value); } bool UserManager::AddUser(const std::string& user_name, const UserInfo& user_info) { - MutexLock locker(&mutex_); - std::shared_ptr user(new User(user_name, user_info)); - user->SetUserInfo(user_info); - - std::pair ret = - all_users_.insert(std::pair(user_name, user)); - if (ret.second) { - LOG(INFO) << "[user-manager] add user: " << user_name << " success"; - } else { - LOG(INFO) << "[user-manager] add user: " << user_name << " failed: user exists"; - } - return ret.second; + MutexLock locker(&mutex_); + std::shared_ptr user(new User(user_name, user_info)); + user->SetUserInfo(user_info); + + std::pair ret = + all_users_.insert(std::pair(user_name, user)); + if (ret.second) { + LOG(INFO) << "[user-manager] add user: " << user_name << " success"; + } else { + LOG(INFO) << "[user-manager] add user: " << user_name << " failed: user exists"; + } + return ret.second; } bool UserManager::DeleteUser(const std::string& user_name) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - LOG(INFO) << "[user-manager] delete user: " << user_name << " failed: user not found"; - } else { - all_users_.erase(user_name); - LOG(INFO) << "[user-manager] delete user: " << user_name << " success"; - } - return it == all_users_.end(); + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { + LOG(INFO) << "[user-manager] delete user: " << user_name << " failed: user not found"; + } else { + all_users_.erase(user_name); + LOG(INFO) << "[user-manager] delete user: " << user_name << " success"; + } + return it == all_users_.end(); } bool UserManager::IsUserInGroup(const std::string& user_name, const std::string& group_name) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - return false; - } - UserInfo user_info = it->second->GetUserInfo(); - for (int i = 0; i < user_info.group_name_size(); ++i) { - if (user_info.group_name(i) == group_name) { - LOG(INFO) << "[user-manager] found group:" << group_name << " for:" << user_name; - return true; - } - } - LOG(INFO) << "[user-manager] not found group:" << group_name << " for:" << user_name; + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { return false; + } + UserInfo user_info = it->second->GetUserInfo(); + for (int i = 0; i < user_info.group_name_size(); ++i) { + if (user_info.group_name(i) == group_name) { + LOG(INFO) << "[user-manager] found group:" << group_name << " for:" << user_name; + return true; + } + } + LOG(INFO) << "[user-manager] not found group:" << group_name << " for:" << user_name; + return false; } bool UserManager::IsUserExist(const std::string& user_name) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - return it != all_users_.end(); + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + return it != all_users_.end(); } -void UserManager::LoadUserMeta(const std::string& key, - const std::string& value) { - if (key.length() <= 1 || key[0] != '~') { - LOG(ERROR) << "[user-manager] invalid argument"; - return; - } - std::string user_name = key.substr(1); +void UserManager::LoadUserMeta(const std::string& key, const std::string& value) { + if (key.length() <= 1 || key[0] != '~') { + LOG(ERROR) << "[user-manager] invalid argument"; + return; + } + std::string user_name = key.substr(1); - UserInfo user_info; - user_info.ParseFromString(value); - AddUser(user_name, user_info); + UserInfo user_info; + user_info.ParseFromString(value); + AddUser(user_name, user_info); } void UserManager::SetupRootUser() { - // there is no races, so there is no lock - UserList::iterator it = all_users_.find("root"); - if (it == all_users_.end()) { - UserInfo user_info; - user_info.set_user_name("root"); - user_info.set_token("af6a89c2"); - AddUser("root", user_info); - LOG(INFO) << "[user-manager] root not found in meta table, add a root with default password"; - } - LOG(INFO) << "[user-manager] root restored"; + // there is no races, so there is no lock + UserList::iterator it = all_users_.find("root"); + if (it == all_users_.end()) { + UserInfo user_info; + user_info.set_user_name("root"); + user_info.set_token("af6a89c2"); + AddUser("root", user_info); + LOG(INFO) << "[user-manager] root not found in meta table, add a root with " + "default password"; + } + LOG(INFO) << "[user-manager] root restored"; } -bool UserManager::IsValidForCreate(const std::string& token, - const std::string& user_name) { - LOG(INFO) << "[user-manager] " << user_name << ", " << token; - return IsValidUserName(user_name) - && !IsUserExist(user_name) - && TokenToUserName(token) == "root"; +bool UserManager::IsValidForCreate(const std::string& token, const std::string& user_name) { + LOG(INFO) << "[user-manager] " << user_name << ", " << token; + return IsValidUserName(user_name) && !IsUserExist(user_name) && TokenToUserName(token) == "root"; } -bool UserManager::IsValidForDelete(const std::string& token, - const std::string& user_name) { - LOG(INFO) << "[user-manager] " << user_name << ", " << token; - return IsUserExist(user_name) - && user_name != "root" - && TokenToUserName(token) == "root"; +bool UserManager::IsValidForDelete(const std::string& token, const std::string& user_name) { + LOG(INFO) << "[user-manager] " << user_name << ", " << token; + return IsUserExist(user_name) && user_name != "root" && TokenToUserName(token) == "root"; } -bool UserManager::IsValidForChangepwd(const std::string& token, - const std::string& user_name) { - LOG(INFO) << "[user-manager] " << user_name << ", " << token << ", who call:" << TokenToUserName(token); - return IsUserExist(user_name) - && (TokenToUserName(token) == "root" || TokenToUserName(token) == user_name); +bool UserManager::IsValidForChangepwd(const std::string& token, const std::string& user_name) { + LOG(INFO) << "[user-manager] " << user_name << ", " << token + << ", who call:" << TokenToUserName(token); + return IsUserExist(user_name) && + (TokenToUserName(token) == "root" || TokenToUserName(token) == user_name); } -bool UserManager::IsValidForAddToGroup(const std::string& token, - const std::string& user_name, +bool UserManager::IsValidForAddToGroup(const std::string& token, const std::string& user_name, const std::string& group_name) { - return IsUserExist(user_name) - && !IsUserInGroup(user_name, group_name) - && TokenToUserName(token) == "root"; + return IsUserExist(user_name) && !IsUserInGroup(user_name, group_name) && + TokenToUserName(token) == "root"; } -bool UserManager::IsValidForDeleteFromGroup(const std::string& token, - const std::string& user_name, +bool UserManager::IsValidForDeleteFromGroup(const std::string& token, const std::string& user_name, const std::string& group_name) { - return IsUserExist(user_name) - && IsUserInGroup(user_name, group_name) - && (TokenToUserName(token) == "root" || TokenToUserName(token) == user_name); + return IsUserExist(user_name) && IsUserInGroup(user_name, group_name) && + (TokenToUserName(token) == "root" || TokenToUserName(token) == user_name); } std::string UserManager::UserNameToToken(const std::string& user_name) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - return "#UnknownUser"; - } - return it->second->GetToken(); + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { + return "#UnknownUser"; + } + return it->second->GetToken(); } std::string UserManager::TokenToUserName(const std::string& token) { - MutexLock locker(&mutex_); - for (UserList::const_iterator it = all_users_.begin(); - it != all_users_.end(); ++it) { - if (token == it->second->GetToken()) { - return it->second->GetUserName(); - } + MutexLock locker(&mutex_); + for (UserList::const_iterator it = all_users_.begin(); it != all_users_.end(); ++it) { + if (token == it->second->GetToken()) { + return it->second->GetUserName(); } - return "#UnknownUser"; + } + return "#UnknownUser"; } bool UserManager::SetUserInfo(const std::string& user_name, const UserInfo& user_info) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - LOG(INFO) << "[user-manager] user:" << user_name << " not found"; - return false; - } - it->second->SetUserInfo(user_info); - return true; + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { + LOG(INFO) << "[user-manager] user:" << user_name << " not found"; + return false; + } + it->second->SetUserInfo(user_info); + return true; } UserInfo UserManager::GetUserInfo(const std::string& user_name) { - MutexLock locker(&mutex_); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - LOG(INFO) << "[user-manager] user:" << user_name << " not found"; - UserInfo ui; - ui.set_user_name("(user_not_found)"); - return ui; - } - return it->second->GetUserInfo(); -} - -bool UserManager::DeleteGroupFromUserInfo(UserInfo& user_info, - const std::string& group) { - MutexLock locker(&mutex_); - std::string user_name = user_info.user_name(); - UserList::iterator it = all_users_.find(user_name); - if (it == all_users_.end()) { - LOG(INFO) << "[user-manager] user:" << user_name << " not found"; - return false; - } - user_info.clear_group_name(); - UserInfo orig_user = it->second->GetUserInfo(); - for (int i = 0; i < orig_user.group_name_size(); ++i) { - if (orig_user.group_name(i) == group) { - LOG(INFO) << "[user-manager] found delete group:" << group; - continue; - } - user_info.add_group_name(orig_user.group_name(i)); + MutexLock locker(&mutex_); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { + LOG(INFO) << "[user-manager] user:" << user_name << " not found"; + UserInfo ui; + ui.set_user_name("(user_not_found)"); + return ui; + } + return it->second->GetUserInfo(); +} + +bool UserManager::DeleteGroupFromUserInfo(UserInfo& user_info, const std::string& group) { + MutexLock locker(&mutex_); + std::string user_name = user_info.user_name(); + UserList::iterator it = all_users_.find(user_name); + if (it == all_users_.end()) { + LOG(INFO) << "[user-manager] user:" << user_name << " not found"; + return false; + } + user_info.clear_group_name(); + UserInfo orig_user = it->second->GetUserInfo(); + for (int i = 0; i < orig_user.group_name_size(); ++i) { + if (orig_user.group_name(i) == group) { + LOG(INFO) << "[user-manager] found delete group:" << group; + continue; } - return true; + user_info.add_group_name(orig_user.group_name(i)); + } + return true; } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/user_manager.h b/src/master/user_manager.h index 08899996b..e34096165 100644 --- a/src/master/user_manager.h +++ b/src/master/user_manager.h @@ -16,72 +16,69 @@ namespace tera { namespace master { class User { -public: - User(const std::string& name, const UserInfo& user_info); - std::string GetUserName(); - void SetUserInfo(const UserInfo& user_info); - UserInfo GetUserInfo(); - std::string GetToken(); - void ToMetaTableKeyValue(std::string* packed_key, - std::string* packed_value); - std::string DebugString(); - -private: - User(const User&) {} - User& operator=(const User&) {return *this;} - - mutable Mutex mutex_; - std::string name_; - UserInfo user_info_; + public: + User(const std::string& name, const UserInfo& user_info); + std::string GetUserName(); + void SetUserInfo(const UserInfo& user_info); + UserInfo GetUserInfo(); + std::string GetToken(); + void ToMetaTableKeyValue(std::string* packed_key, std::string* packed_value); + std::string DebugString(); + + private: + User(const User&) {} + User& operator=(const User&) { return *this; } + + mutable Mutex mutex_; + std::string name_; + UserInfo user_info_; }; typedef std::shared_ptr UserPtr; class UserManager { -public: - // load a user meta entry(memtable) into user_manager(memory) - void LoadUserMeta(const std::string& key, - const std::string& value); - - // setups root user if root not found in metatable after master init - // e.g. the tera cluster first starts. - void SetupRootUser(); - - // valid user name: - // 1. kLenMin <= user_name.length() <= kLenMax - // 2. first char of user_name is alphabet - // 3. contains only alphabet or digit - bool IsUserNameValid(const std::string& user_name); - - bool AddUser(const std::string& user_name, const UserInfo& user_info); - bool DeleteUser(const std::string& user_name); - bool IsUserExist(const std::string& user_name); - - std::string UserNameToToken(const std::string& user_name); - std::string TokenToUserName(const std::string& token); - - // who call this function should use IsUserExist() first, - // otherwise, gets a defalut UserInfo. - UserInfo GetUserInfo(const std::string& user_name); - bool SetUserInfo(const std::string& user_name, const UserInfo& user_info); - - bool DeleteGroupFromUserInfo(UserInfo& user_info, const std::string& group); - bool IsUserInGroup(const std::string& user_name, const std::string& group_name); - - bool IsValidForCreate(const std::string& token, const std::string& user_name); - bool IsValidForDelete(const std::string& token, const std::string& user_name); - bool IsValidForChangepwd(const std::string& token, const std::string& user_name); - bool IsValidForAddToGroup(const std::string& token, - const std::string& user_name, - const std::string& group_name); - bool IsValidForDeleteFromGroup(const std::string& token, - const std::string& user_name, - const std::string& group_name); -private: - mutable Mutex mutex_; - typedef std::map UserList; - UserList all_users_; + public: + // load a user meta entry(memtable) into user_manager(memory) + void LoadUserMeta(const std::string& key, const std::string& value); + + // setups root user if root not found in metatable after master init + // e.g. the tera cluster first starts. + void SetupRootUser(); + + // valid user name: + // 1. kLenMin <= user_name.length() <= kLenMax + // 2. first char of user_name is alphabet + // 3. contains only alphabet or digit + bool IsUserNameValid(const std::string& user_name); + + bool AddUser(const std::string& user_name, const UserInfo& user_info); + bool DeleteUser(const std::string& user_name); + bool IsUserExist(const std::string& user_name); + + std::string UserNameToToken(const std::string& user_name); + std::string TokenToUserName(const std::string& token); + + // who call this function should use IsUserExist() first, + // otherwise, gets a defalut UserInfo. + UserInfo GetUserInfo(const std::string& user_name); + bool SetUserInfo(const std::string& user_name, const UserInfo& user_info); + + bool DeleteGroupFromUserInfo(UserInfo& user_info, const std::string& group); + bool IsUserInGroup(const std::string& user_name, const std::string& group_name); + + bool IsValidForCreate(const std::string& token, const std::string& user_name); + bool IsValidForDelete(const std::string& token, const std::string& user_name); + bool IsValidForChangepwd(const std::string& token, const std::string& user_name); + bool IsValidForAddToGroup(const std::string& token, const std::string& user_name, + const std::string& group_name); + bool IsValidForDeleteFromGroup(const std::string& token, const std::string& user_name, + const std::string& group_name); + + private: + mutable Mutex mutex_; + typedef std::map UserList; + UserList all_users_; }; -} // namespace master -} // namespace tera -#endif // TERA_MASTER_USER_MANAGER_H_ +} // namespace master +} // namespace tera +#endif // TERA_MASTER_USER_MANAGER_H_ diff --git a/src/master/workload_scheduler.cc b/src/master/workload_scheduler.cc index 5933827cb..cf58dc7f0 100644 --- a/src/master/workload_scheduler.cc +++ b/src/master/workload_scheduler.cc @@ -18,39 +18,39 @@ namespace tera { namespace master { class Comparator { -public: - // Three-way comparison. Returns value: - // < 0 iff "a" < "b", - // == 0 iff "a" == "b", - // > 0 iff "a" > "b" - virtual int Compare(const TabletNodePtr& a, const TabletNodePtr& b, - const std::string& table_name) = 0; - + public: + // Three-way comparison. Returns value: + // < 0 iff "a" < "b", + // == 0 iff "a" == "b", + // > 0 iff "a" > "b" + virtual int Compare(const TabletNodePtr& a, const TabletNodePtr& b, + const std::string& table_name) = 0; }; class WorkloadLess { -public: - bool operator() (const TabletNodePtr& a, const TabletNodePtr& b) { - return comparator_->Compare(a, b, table_name_) < 0; - } - WorkloadLess(Comparator* comparator, const std::string& table_name = "") - : comparator_(comparator), table_name_(table_name) {} -private: - Comparator* comparator_; - std::string table_name_; + public: + bool operator()(const TabletNodePtr& a, const TabletNodePtr& b) { + return comparator_->Compare(a, b, table_name_) < 0; + } + WorkloadLess(Comparator* comparator, const std::string& table_name = "") + : comparator_(comparator), table_name_(table_name) {} + + private: + Comparator* comparator_; + std::string table_name_; }; class WorkloadGreater { -public: - bool operator() (const TabletNodePtr& a, const TabletNodePtr& b) { - return comparator_->Compare(a, b, table_name_) > 0; - } - WorkloadGreater(Comparator* comparator, const std::string& table_name = "") - : comparator_(comparator), table_name_(table_name) {} - -private: - Comparator* comparator_; - std::string table_name_; + public: + bool operator()(const TabletNodePtr& a, const TabletNodePtr& b) { + return comparator_->Compare(a, b, table_name_) > 0; + } + WorkloadGreater(Comparator* comparator, const std::string& table_name = "") + : comparator_(comparator), table_name_(table_name) {} + + private: + Comparator* comparator_; + std::string table_name_; }; ///////////////////////////////////////////////// @@ -58,123 +58,116 @@ class WorkloadGreater { ///////////////////////////////////////////////// class SizeComparator : public Comparator { -public: - int Compare(const TabletNodePtr& a, const TabletNodePtr& b, - const std::string& table_name) { - uint64_t a_size = a->GetSize(table_name); - uint64_t b_size = b->GetSize(table_name); - if (a_size < b_size) { - return -1; - } else if (a_size > b_size) { - return 1; - } else { - return 0; - } + public: + int Compare(const TabletNodePtr& a, const TabletNodePtr& b, const std::string& table_name) { + uint64_t a_size = a->GetSize(table_name); + uint64_t b_size = b->GetSize(table_name); + if (a_size < b_size) { + return -1; + } else if (a_size > b_size) { + return 1; + } else { + return 0; } + } }; -bool SizeScheduler::MayMoveOut(const TabletNodePtr& node, - const std::string& table_name) { - VLOG(16) << "[size-sched] MayMoveOut()"; - int64_t node_size = node->GetSize(table_name); - if (node_size <= FLAGS_tera_master_load_balance_ts_size_threshold) { - VLOG(16) << "[size-sched] node do not need loadbalance"; - return false; - } - return true; +bool SizeScheduler::MayMoveOut(const TabletNodePtr& node, const std::string& table_name) { + VLOG(16) << "[size-sched] MayMoveOut()"; + int64_t node_size = node->GetSize(table_name); + if (node_size <= FLAGS_tera_master_load_balance_ts_size_threshold) { + VLOG(16) << "[size-sched] node do not need loadbalance"; + return false; + } + return true; } bool SizeScheduler::FindBestNode(const std::vector& node_list, - const std::string& table_name, - size_t* best_index) { - VLOG(16) << "[size-sched] FindBestNode()"; - if (node_list.size() == 0) { - return false; - } - - SizeComparator comparator; - *best_index = 0; - for (size_t i = 1; i < node_list.size(); ++i) { - int r = comparator.Compare(node_list[*best_index], node_list[i], table_name); - if (r > 0) { - *best_index = i; - } else if (r < 0) { - // do nothing - } else if (node_list[*best_index]->GetAddr() <= last_choose_node_ - && node_list[i]->GetAddr() > last_choose_node_) { - // round-robin - *best_index = i; - } + const std::string& table_name, size_t* best_index) { + VLOG(16) << "[size-sched] FindBestNode()"; + if (node_list.size() == 0) { + return false; + } + + SizeComparator comparator; + *best_index = 0; + for (size_t i = 1; i < node_list.size(); ++i) { + int r = comparator.Compare(node_list[*best_index], node_list[i], table_name); + if (r > 0) { + *best_index = i; + } else if (r < 0) { + // do nothing + } else if (node_list[*best_index]->GetAddr() <= last_choose_node_ && + node_list[i]->GetAddr() > last_choose_node_) { + // round-robin + *best_index = i; } - last_choose_node_ = node_list[*best_index]->GetAddr(); - VLOG(16) << "[size-sched] best node = " << last_choose_node_; - return true; + } + last_choose_node_ = node_list[*best_index]->GetAddr(); + VLOG(16) << "[size-sched] best node = " << last_choose_node_; + return true; } -bool SizeScheduler::FindBestTablet(const TabletNodePtr& src_node, - const TabletNodePtr& dst_node, +bool SizeScheduler::FindBestTablet(const TabletNodePtr& src_node, const TabletNodePtr& dst_node, const std::vector& tablet_list, - const std::string& table_name, - size_t* best_index) { - VLOG(16) << "[size-sched] FindBestTablet() " << src_node->GetAddr() - << " -> " << dst_node->GetAddr(); - - int64_t src_node_size = src_node->GetSize(table_name); - int64_t dst_node_size = dst_node->GetSize(table_name); - - const double& size_ratio = FLAGS_tera_master_load_balance_size_ratio_trigger; - if ((double)src_node_size < (double)dst_node_size * size_ratio) { - VLOG(16) << "[size-sched] size ratio not reach threshold: " << src_node_size - << " : " << dst_node_size; - return false; - } - - int64_t ideal_move_size = (src_node_size - dst_node_size) / 2; - VLOG(16) << "[size-sched] size = " << src_node_size << " : " << dst_node_size - << " ideal_move_size = " << ideal_move_size; - - int64_t best_tablet_index = -1; - int64_t best_tablet_size = 0; - int64_t best_tablet_qps = 0; - for (size_t i = 0; i < tablet_list.size(); ++i) { - TabletPtr tablet = tablet_list[i]; - int64_t size = tablet->GetDataSize(); - int64_t qps = tablet->GetQps(); - if (size <= ideal_move_size - && (best_tablet_index == -1 || size > best_tablet_size)) { - best_tablet_index = i; - best_tablet_size = size; - best_tablet_qps = qps; - } + const std::string& table_name, size_t* best_index) { + VLOG(16) << "[size-sched] FindBestTablet() " << src_node->GetAddr() << " -> " + << dst_node->GetAddr(); + + int64_t src_node_size = src_node->GetSize(table_name); + int64_t dst_node_size = dst_node->GetSize(table_name); + + const double& size_ratio = FLAGS_tera_master_load_balance_size_ratio_trigger; + if ((double)src_node_size < (double)dst_node_size * size_ratio) { + VLOG(16) << "[size-sched] size ratio not reach threshold: " << src_node_size << " : " + << dst_node_size; + return false; + } + + int64_t ideal_move_size = (src_node_size - dst_node_size) / 2; + VLOG(16) << "[size-sched] size = " << src_node_size << " : " << dst_node_size + << " ideal_move_size = " << ideal_move_size; + + int64_t best_tablet_index = -1; + int64_t best_tablet_size = 0; + int64_t best_tablet_qps = 0; + for (size_t i = 0; i < tablet_list.size(); ++i) { + TabletPtr tablet = tablet_list[i]; + int64_t size = tablet->GetDataSize(); + int64_t qps = tablet->GetQps(); + if (size <= ideal_move_size && (best_tablet_index == -1 || size > best_tablet_size)) { + best_tablet_index = i; + best_tablet_size = size; + best_tablet_qps = qps; } - if (best_tablet_index == -1) { - return false; - } - *best_index = best_tablet_index; - TabletPtr best_tablet = tablet_list[best_tablet_index]; - VLOG(16) << "[size-sched] best tablet = " << best_tablet->GetPath() - << " size = " << best_tablet_size - << " qps = " << best_tablet_qps; - return true; + } + if (best_tablet_index == -1) { + return false; + } + *best_index = best_tablet_index; + TabletPtr best_tablet = tablet_list[best_tablet_index]; + VLOG(16) << "[size-sched] best tablet = " << best_tablet->GetPath() + << " size = " << best_tablet_size << " qps = " << best_tablet_qps; + return true; } bool SizeScheduler::NeedSchedule(std::vector& node_list, const std::string& table_name) { - return true; + return true; } void SizeScheduler::AscendingSort(std::vector& node_list, const std::string& table_name) { - SizeComparator comparator; - WorkloadLess less(&comparator, table_name); - std::sort(node_list.begin(), node_list.end(), less); + SizeComparator comparator; + WorkloadLess less(&comparator, table_name); + std::sort(node_list.begin(), node_list.end(), less); } void SizeScheduler::DescendingSort(std::vector& node_list, const std::string& table_name) { - SizeComparator comparator; - WorkloadGreater greater(&comparator, table_name); - std::sort(node_list.begin(), node_list.end(), greater); + SizeComparator comparator; + WorkloadGreater greater(&comparator, table_name); + std::sort(node_list.begin(), node_list.end(), greater); } ///////////////////////////////////////////////// @@ -182,161 +175,156 @@ void SizeScheduler::DescendingSort(std::vector& node_list, ///////////////////////////////////////////////// static uint64_t GetPending(const TabletNodePtr& ts) { - return ts->GetReadPending() + ts->GetWritePending() - + ts->GetScanPending() * FLAGS_tera_master_load_balance_scan_weight; + return ts->GetReadPending() + ts->GetWritePending() + + ts->GetScanPending() * FLAGS_tera_master_load_balance_scan_weight; } class LoadComparator : public Comparator { -public: - int Compare(const TabletNodePtr& a, const TabletNodePtr& b, - const std::string& table_name) { - uint64_t a_read_pending = GetPending(a); - uint64_t b_read_pending = GetPending(b); - if (a_read_pending < b_read_pending) { - return -1; - } else if (a_read_pending > b_read_pending) { - return 1; - } - - uint64_t a_row_read_delay = a->GetRowReadDelay(); - uint64_t b_row_read_delay = b->GetRowReadDelay(); - if (a_row_read_delay < b_row_read_delay) { - return -1; - } else if (a_row_read_delay > b_row_read_delay) { - return 1; - } - - uint64_t a_qps = a->GetQps(table_name); - uint64_t b_qps = b->GetQps(table_name); - if (a_qps < b_qps) { - return -1; - } else if (a_qps > b_qps) { - return 1; - } else { - return 0; - } + public: + int Compare(const TabletNodePtr& a, const TabletNodePtr& b, const std::string& table_name) { + uint64_t a_read_pending = GetPending(a); + uint64_t b_read_pending = GetPending(b); + if (a_read_pending < b_read_pending) { + return -1; + } else if (a_read_pending > b_read_pending) { + return 1; + } + + uint64_t a_row_read_delay = a->GetRowReadDelay(); + uint64_t b_row_read_delay = b->GetRowReadDelay(); + if (a_row_read_delay < b_row_read_delay) { + return -1; + } else if (a_row_read_delay > b_row_read_delay) { + return 1; } - virtual ~LoadComparator() {} + uint64_t a_qps = a->GetQps(table_name); + uint64_t b_qps = b->GetQps(table_name); + if (a_qps < b_qps) { + return -1; + } else if (a_qps > b_qps) { + return 1; + } else { + return 0; + } + } + + virtual ~LoadComparator() {} }; bool LoadScheduler::MayMoveOut(const TabletNodePtr& node, const std::string& table_name) { - VLOG(16) << "[load-sched] MayMoveOut()"; - int64_t node_read_pending = GetPending(node); - if (node_read_pending <= FLAGS_tera_master_load_balance_ts_load_threshold) { - VLOG(16) << "[load-sched] node do not need loadbalance: " << node_read_pending; - return false; - } - int64_t node_qps = node->GetQps(table_name); - if (node_qps <= 0) { - VLOG(16) << "[load-sched] node has 0 qps."; - return false; - } - return true; + VLOG(16) << "[load-sched] MayMoveOut()"; + int64_t node_read_pending = GetPending(node); + if (node_read_pending <= FLAGS_tera_master_load_balance_ts_load_threshold) { + VLOG(16) << "[load-sched] node do not need loadbalance: " << node_read_pending; + return false; + } + int64_t node_qps = node->GetQps(table_name); + if (node_qps <= 0) { + VLOG(16) << "[load-sched] node has 0 qps."; + return false; + } + return true; } bool LoadScheduler::FindBestNode(const std::vector& node_list, - const std::string& table_name, - size_t* best_index) { - VLOG(16) << "[load-sched] FindBestNode()"; - if (node_list.size() == 0) { - return false; - } - - LoadComparator comparator; - *best_index = 0; - for (size_t i = 1; i < node_list.size(); ++i) { - int r = comparator.Compare(node_list[*best_index], node_list[i], table_name); - if (r > 0) { - *best_index = i; - } else if (r < 0) { - // do nothing - } else if (node_list[*best_index]->GetAddr() <= last_choose_node_ - && node_list[i]->GetAddr() > last_choose_node_) { - // round-robin - *best_index = i; - } + const std::string& table_name, size_t* best_index) { + VLOG(16) << "[load-sched] FindBestNode()"; + if (node_list.size() == 0) { + return false; + } + + LoadComparator comparator; + *best_index = 0; + for (size_t i = 1; i < node_list.size(); ++i) { + int r = comparator.Compare(node_list[*best_index], node_list[i], table_name); + if (r > 0) { + *best_index = i; + } else if (r < 0) { + // do nothing + } else if (node_list[*best_index]->GetAddr() <= last_choose_node_ && + node_list[i]->GetAddr() > last_choose_node_) { + // round-robin + *best_index = i; } - last_choose_node_ = node_list[*best_index]->GetAddr(); - VLOG(16) << "[load-sched] best node : " << last_choose_node_; - return true; + } + last_choose_node_ = node_list[*best_index]->GetAddr(); + VLOG(16) << "[load-sched] best node : " << last_choose_node_; + return true; } -bool LoadScheduler::FindBestTablet(const TabletNodePtr& src_node, - const TabletNodePtr& dst_node, +bool LoadScheduler::FindBestTablet(const TabletNodePtr& src_node, const TabletNodePtr& dst_node, const std::vector& tablet_list, - const std::string& table_name, - size_t* best_index) { - VLOG(16) << "[load-sched] FindBestTablet() " << src_node->GetAddr() - << " -> " << dst_node->GetAddr(); - - int64_t src_node_read_pending = GetPending(src_node); - int64_t dst_node_read_pending = GetPending(dst_node); - if (src_node_read_pending <= 0 || dst_node_read_pending > 0) { - VLOG(16) << "[load-sched] read pending not reach threshold: " << src_node_read_pending - << " : " << dst_node_read_pending; - return false; - } - - VLOG(16) << "[load-sched]" - << " rpending = " << src_node_read_pending << " : " << dst_node_read_pending - << " delay = " << src_node->GetRowReadDelay() << " : " << dst_node->GetRowReadDelay() - << " qps = " << src_node->GetQps(table_name) << " : " << dst_node->GetQps(table_name); - - // Donot move out the most busy tablet, move the second one - std::map tablet_sort; - for (size_t i = 0; i < tablet_list.size(); ++i) { - TabletPtr tablet = tablet_list[i]; - int64_t qps = tablet->GetQps(); - tablet_sort[qps] = i; - } - std::map::reverse_iterator it = tablet_sort.rbegin(); - it++; - int64_t best_tablet_qps = it->first; - int64_t best_tablet_index = it->second; - if (best_tablet_qps == 0) { - VLOG(16) << "[load-sched] no need to move 0 QPS tablet"; - return false; - } - *best_index = best_tablet_index; - TabletPtr best_tablet = tablet_list[best_tablet_index]; - VLOG(16) << "[load-sched] best tablet = " << best_tablet->GetPath() - << " size = " << best_tablet->GetDataSize() - << " qps = " << best_tablet_qps; - return true; + const std::string& table_name, size_t* best_index) { + VLOG(16) << "[load-sched] FindBestTablet() " << src_node->GetAddr() << " -> " + << dst_node->GetAddr(); + + int64_t src_node_read_pending = GetPending(src_node); + int64_t dst_node_read_pending = GetPending(dst_node); + if (src_node_read_pending <= 0 || dst_node_read_pending > 0) { + VLOG(16) << "[load-sched] read pending not reach threshold: " << src_node_read_pending << " : " + << dst_node_read_pending; + return false; + } + + VLOG(16) << "[load-sched]" + << " rpending = " << src_node_read_pending << " : " << dst_node_read_pending + << " delay = " << src_node->GetRowReadDelay() << " : " << dst_node->GetRowReadDelay() + << " qps = " << src_node->GetQps(table_name) << " : " << dst_node->GetQps(table_name); + + // Donot move out the most busy tablet, move the second one + std::map tablet_sort; + for (size_t i = 0; i < tablet_list.size(); ++i) { + TabletPtr tablet = tablet_list[i]; + int64_t qps = tablet->GetQps(); + tablet_sort[qps] = i; + } + std::map::reverse_iterator it = tablet_sort.rbegin(); + ++it; + int64_t best_tablet_qps = it->first; + int64_t best_tablet_index = it->second; + if (best_tablet_qps == 0) { + VLOG(16) << "[load-sched] no need to move 0 QPS tablet"; + return false; + } + *best_index = best_tablet_index; + TabletPtr best_tablet = tablet_list[best_tablet_index]; + VLOG(16) << "[load-sched] best tablet = " << best_tablet->GetPath() + << " size = " << best_tablet->GetDataSize() << " qps = " << best_tablet_qps; + return true; } bool LoadScheduler::NeedSchedule(std::vector& node_list, const std::string& table_name) { - size_t pending_node_num = 0; - for (size_t i = 0; i < node_list.size(); ++i) { - int64_t node_read_pending = GetPending(node_list[i]); - if (node_read_pending > FLAGS_tera_master_load_balance_ts_load_threshold) { - pending_node_num++; - } - } - - // If pending_node_num large than 10%, we think read bottleneck is dfs io, - // do not need load balance by read. - if (pending_node_num * 10 > node_list.size()) { - return false; + size_t pending_node_num = 0; + for (size_t i = 0; i < node_list.size(); ++i) { + int64_t node_read_pending = GetPending(node_list[i]); + if (node_read_pending > FLAGS_tera_master_load_balance_ts_load_threshold) { + pending_node_num++; } - return true; + } + + // If pending_node_num large than 10%, we think read bottleneck is dfs io, + // do not need load balance by read. + if (pending_node_num * 10 > node_list.size()) { + return false; + } + return true; } void LoadScheduler::AscendingSort(std::vector& node_list, const std::string& table_name) { - LoadComparator comparator; - WorkloadLess less(&comparator, table_name); - std::sort(node_list.begin(), node_list.end(), less); + LoadComparator comparator; + WorkloadLess less(&comparator, table_name); + std::sort(node_list.begin(), node_list.end(), less); } void LoadScheduler::DescendingSort(std::vector& node_list, const std::string& table_name) { - LoadComparator comparator; - WorkloadGreater greater(&comparator, table_name); - std::sort(node_list.begin(), node_list.end(), greater); + LoadComparator comparator; + WorkloadGreater greater(&comparator, table_name); + std::sort(node_list.begin(), node_list.end(), greater); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/master/workload_scheduler.h b/src/master/workload_scheduler.h index 02fed0140..e75bac3f0 100644 --- a/src/master/workload_scheduler.h +++ b/src/master/workload_scheduler.h @@ -11,74 +11,58 @@ namespace tera { namespace master { class SizeScheduler : public Scheduler { -public: - SizeScheduler() {} - virtual ~SizeScheduler() {} + public: + SizeScheduler() {} + virtual ~SizeScheduler() {} - virtual bool MayMoveOut(const TabletNodePtr& node, const std::string& table_name); - virtual bool FindBestNode(const std::vector& node_list, - const std::string& table_name, - size_t* best_index); + virtual bool MayMoveOut(const TabletNodePtr& node, const std::string& table_name); + virtual bool FindBestNode(const std::vector& node_list, + const std::string& table_name, size_t* best_index); - virtual bool FindBestTablet(const TabletNodePtr& src_node, - const TabletNodePtr& dst_node, - const std::vector& tablet_list, - const std::string& table_name, - size_t* best_index); + virtual bool FindBestTablet(const TabletNodePtr& src_node, const TabletNodePtr& dst_node, + const std::vector& tablet_list, + const std::string& table_name, size_t* best_index); - virtual bool NeedSchedule(std::vector& node_list, - const std::string& table_name); + virtual bool NeedSchedule(std::vector& node_list, const std::string& table_name); - virtual void AscendingSort(std::vector& node_list, - const std::string& table_name); + virtual void AscendingSort(std::vector& node_list, const std::string& table_name); - virtual void DescendingSort(std::vector& node_list, - const std::string& table_name); + virtual void DescendingSort(std::vector& node_list, const std::string& table_name); - virtual const char* Name() { - return "size"; - } + virtual const char* Name() { return "size"; } -private: - std::string last_choose_node_; - std::string last_choose_tablet_; + private: + std::string last_choose_node_; + std::string last_choose_tablet_; }; class LoadScheduler : public Scheduler { -public: - LoadScheduler() {} - virtual ~LoadScheduler() {} + public: + LoadScheduler() {} + virtual ~LoadScheduler() {} - virtual bool MayMoveOut(const TabletNodePtr& node, const std::string& table_name); - virtual bool FindBestNode(const std::vector& node_list, - const std::string& table_name, - size_t* best_index); + virtual bool MayMoveOut(const TabletNodePtr& node, const std::string& table_name); + virtual bool FindBestNode(const std::vector& node_list, + const std::string& table_name, size_t* best_index); - virtual bool FindBestTablet(const TabletNodePtr& src_node, - const TabletNodePtr& dst_node, - const std::vector& tablet_list, - const std::string& table_name, - size_t* best_index); + virtual bool FindBestTablet(const TabletNodePtr& src_node, const TabletNodePtr& dst_node, + const std::vector& tablet_list, + const std::string& table_name, size_t* best_index); - virtual bool NeedSchedule(std::vector& node_list, - const std::string& table_name); + virtual bool NeedSchedule(std::vector& node_list, const std::string& table_name); - virtual void AscendingSort(std::vector& node_list, - const std::string& table_name); + virtual void AscendingSort(std::vector& node_list, const std::string& table_name); - virtual void DescendingSort(std::vector& node_list, - const std::string& table_name); + virtual void DescendingSort(std::vector& node_list, const std::string& table_name); - virtual const char* Name() { - return "load"; - } + virtual const char* Name() { return "load"; } -private: - std::string last_choose_node_; - std::string last_choose_tablet_; + private: + std::string last_choose_node_; + std::string last_choose_tablet_; }; -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera -#endif // TERA_MASTER_WORKLOAD_SCHEDULER_H_ +#endif // TERA_MASTER_WORKLOAD_SCHEDULER_H_ diff --git a/src/metacli.cc b/src/metacli.cc deleted file mode 100644 index c4d64bec3..000000000 --- a/src/metacli.cc +++ /dev/null @@ -1,447 +0,0 @@ -// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "ins_sdk.h" - -#include "common/base/string_ext.h" -#include "common/base/string_number.h" -#include "common/console/progress_bar.h" -#include "common/file/file_path.h" -#include "io/coding.h" -#include "proto/kv_helper.h" -#include "proto/proto_helper.h" -#include "proto/tabletnode.pb.h" -#include "proto/tabletnode_client.h" -#include "sdk/client_impl.h" -#include "sdk/cookie.h" -#include "sdk/sdk_utils.h" -#include "sdk/sdk_zk.h" -#include "sdk/table_impl.h" -#include "tera.h" -#include "types.h" -#include "utils/config_utils.h" -#include "utils/crypt.h" -#include "utils/schema_utils.h" -#include "utils/string_util.h" -#include "utils/tprinter.h" -#include "utils/utils_cmd.h" -#include "version.h" - -DECLARE_string(flagfile); -DECLARE_string(log_dir); -DECLARE_string(tera_master_meta_table_name); - -DEFINE_string(meta_cli_token, "", "Only be executed for the guys who has the token. \ - Please figure out what metacli is before use it."); - -using namespace tera; -namespace { - static uint64_t seq_id = 0; -} - -const char* metacli_builtin_cmds[] = { - "show", - "show \n\ - show all meta info in meta table", - - "get", - "get \n\ - get the value for table_name+row_key in meta_table \n\ - e.g. get \"test_table1\" \"abc\" ", - - "modify", - "modify \n\ - modify the value of key_end \n\ - e.g. modify \"test_table1\" \"abc\" ", - - "delete", - "delete \n\ - delete the table_name+row_key in meta_table \n\ - e.g. delete \"test_table1\" \"abc\" ", - - "help", - "help [cmd] \n\ - show manual for a or all cmd(s)", - - "version", - "version \n\ - show version info", -}; - -static void ShowCmdHelpInfo(const char* msg) { - if (msg == NULL) { - return; - } - int count = sizeof(metacli_builtin_cmds)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if(strncmp(msg, metacli_builtin_cmds[i], 32) == 0) { - std::cout << metacli_builtin_cmds[i + 1] << std::endl; - return; - } - } -} - -static void ShowAllCmd() { - std::cout << "there is cmd list:" << std::endl; - int count = sizeof(metacli_builtin_cmds)/sizeof(char*); - bool newline = false; - for (int i = 0; i < count; i+=2) { - std::cout << std::setiosflags(std::ios::left) << std::setw(20) << metacli_builtin_cmds[i]; - if (newline) { - std::cout << std::endl; - newline = false; - } else { - newline = true; - } - } - std::cout << std::endl << "help [cmd] for details." << std::endl; -} - -int32_t HelpOp(int32_t argc, char** argv) { - if (argc == 2) { - ShowAllCmd(); - } else if (argc == 3) { - ShowCmdHelpInfo(argv[2]); - } else { - ShowCmdHelpInfo("help"); - } - return 0; -} - -static void PrintMetaInfo(const TabletMeta* meta) { - std::cout << "tablet: " << meta->table_name() << " [" - << meta->key_range().key_start() << "," - << meta->key_range().key_end() << "], " - << meta->path() << ", " << meta->server_addr() << ", " - << meta->size() << ", " - << StatusCodeToString(meta->status()) << ", " - << StatusCodeToString(meta->compact_status()) << std::endl; -} - -static int GetMetaValue(const std::string& meta_server, - common::ThreadPool* thread_pool, - const std::string& tablet_name, - const std::string& start_key, - TableMeta* table_meta, - TabletMeta* tablet_meta) { - tabletnode::TabletNodeClient read_meta_client(thread_pool, meta_server); - ReadTabletRequest read_request; - ReadTabletResponse read_response; - read_request.set_sequence_id(seq_id++); - read_request.set_tablet_name(FLAGS_tera_master_meta_table_name); - RowReaderInfo* row_info = read_request.add_row_info_list(); - MakeMetaTableKey(tablet_name, start_key, row_info->mutable_key()); - if (!read_meta_client.ReadTablet(&read_request, &read_response)) { - std::cout << "read tablet failed" << std::endl; - return -1; - } - StatusCode err = read_response.status(); - if (err != tera::kTabletNodeOk) { - std::cerr << "Read meta table response not kTabletNodeOk!"; - return -1; - } - const KeyValuePair& record = read_response.detail().row_result(0).key_values(0); - char first_key_char = record.key()[0]; - if (first_key_char == '~') { - std::cout << "(user: " << record.key().substr(1) << ")" << std::endl; - } else if (first_key_char == '@') { - ParseMetaTableKeyValue(record.key(), record.value(), table_meta); - std::cout << "ok, you find a table meta info" << std::endl; - } else if (first_key_char > '@') { - ParseMetaTableKeyValue(record.key(), record.value(), tablet_meta); - } else { - std::cerr << "invalid record: " << record.key(); - } - - if (first_key_char <= '@') { - std::cout << "couldn't find tablet meta" << std::endl; - return -1; - } - return 0; -} - -static bool Confirm() { - std::cout << "[Y/N] "; - std::string ensure; - if (!std::getline(std::cin, ensure)) { - std::cout << "Get input error" << std::endl; - return false; - } - if (ensure != "Y") { - return false; - } - return true; -} - -int GetMeta(const std::string& meta_server, - common::ThreadPool* thread_pool, - const std::string& tablet_name, - const std::string& start_key) { - TabletMeta tablet_meta; - TableMeta table_meta; - if (-1 == GetMetaValue(meta_server, thread_pool, tablet_name, start_key, &table_meta, &tablet_meta)) { - std::cout << "wrong tablet input" << std::endl; - return -1; - } - PrintMetaInfo(&tablet_meta); - return 0; -} - -int DeleteMetaTablet(const std::string& meta_server, - common::ThreadPool* thread_pool, - const std::string& tablet_name, - const std::string& start_key) { - TabletMeta tablet_meta; - TableMeta table_meta; - if (-1 == GetMetaValue(meta_server, thread_pool, tablet_name, start_key, &table_meta, &tablet_meta)) { - std::cout << "wrong tablet input" << std::endl; - return -1; - } - tabletnode::TabletNodeClient write_meta_client(thread_pool, meta_server); - WriteTabletRequest write_request; - WriteTabletResponse write_response; - write_request.set_sequence_id(seq_id++); - write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); - RowMutationSequence* mu_seq = write_request.add_row_list(); - - std::cout << "Are you sure delete the tablet meta info?" << std::endl; - PrintMetaInfo(&tablet_meta); - if (!Confirm()) { - return -1; - } - - std::string row_key; - MakeMetaTableKey(tablet_name, start_key, &row_key); - mu_seq->set_row_key(row_key); - tera::Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(tera::kDeleteRow); - mutation->set_timestamp(kLatestTimestamp); - if (!write_meta_client.WriteTablet(&write_request, &write_response)) { - std::cout << "write tablet failed" << std::endl; - return -1; - } - StatusCode err = write_response.status(); - if (err != tera::kTabletNodeOk) { - std::cerr << "Write meta table response not kTabletNodeOk!"; - return -1; - } - return 0; -} - -int ModifyMetaValue(const std::string& meta_server, - common::ThreadPool* thread_pool, - const std::string& tablet_name, - const std::string& start_key) { - TabletMeta tablet_meta; - TableMeta table_meta; - if (-1 == GetMetaValue(meta_server, thread_pool, tablet_name, start_key, &table_meta, &tablet_meta)) { - std::cout << "wrong tablet input" << std::endl; - return -1; - } - - tabletnode::TabletNodeClient write_meta_client(thread_pool, meta_server); - WriteTabletRequest write_request; - WriteTabletResponse write_response; - write_request.set_sequence_id(seq_id++); - write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); - RowMutationSequence* mu_seq = write_request.add_row_list(); - - std::string end_key; - std::cout << "Modify key_end as : "; - if (!std::getline(std::cin, end_key)) { - std::cout << "Get input error" << std::endl; - return -1; - } - - std::cout << "Are you sure modify key_end?" << std::endl; - std::cout << "[" << tablet_meta.key_range().key_start() << ", " - << tablet_meta.key_range().key_end() << "] => "; - tera::KeyRange* key_range = new tera::KeyRange(); - key_range->set_key_start(tablet_meta.key_range().key_start()); - key_range->set_key_end(end_key); - - tablet_meta.clear_key_range(); - tablet_meta.set_allocated_key_range(key_range); - std::cout << "[" << tablet_meta.key_range().key_start() << ", " - << tablet_meta.key_range().key_end() << "]" << std::endl; - if (!Confirm()) { - return -1; - } - - std::string row_key; - MakeMetaTableKey(tablet_name, start_key, &row_key); - mu_seq->set_row_key(row_key); - tera::Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(tera::kPut); - - std::string modify_value; - MakeMetaTableValue(tablet_meta, &modify_value); - mutation->set_value(modify_value); - mutation->set_timestamp(kLatestTimestamp); - - if (!write_meta_client.WriteTablet(&write_request, &write_response)) { - std::cout << "write tablet failed" << std::endl; - return -1; - } - StatusCode err = write_response.status(); - if (err != tera::kTabletNodeOk) { - std::cerr << "Write meta table response not kTabletNodeOk!"; - return -1; - } - return 0; -} - -int ShowMeta(const std::string& meta_server, common::ThreadPool* thread_pool) { - tabletnode::TabletNodeClient meta_client(thread_pool, meta_server); - TableMeta table_meta; - TableSchema table_schema; - TableMetaList table_list; - TabletMetaList tablet_list; - ScanTabletRequest request; - ScanTabletResponse response; - request.set_sequence_id(seq_id++); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(""); - request.set_end(""); - while (meta_client.ScanTablet(&request, &response)) { - StatusCode err = response.status(); - if (err != tera::kTabletNodeOk) { - std::cerr << "Read meta table response not kTabletNodeOk!"; - return -1; - } - - int32_t record_size = response.results().key_values_size(); - std::cout << "recode size = " << record_size << std::endl; - if (record_size <= 0) { - std::cout << "scan meta table success" << std::endl; - break; - } - std::string last_record_key; - for (int i = 0; i < record_size; ++i) { - const tera::KeyValuePair& record = response.results().key_values(i); - last_record_key = record.key(); - char first_key_char = record.key()[0]; - if (first_key_char == '~') { - std::cout << "(user: " << record.key().substr(1) << ")" << std::endl; - } else if (first_key_char == '@') { - ParseMetaTableKeyValue(record.key(), record.value(), table_list.add_meta()); - } else if (first_key_char > '@') { - ParseMetaTableKeyValue(record.key(), record.value(), tablet_list.add_meta()); - } else { - std::cerr << "invalid record: " << record.key(); - } - } - std::string next_record_key = tera::NextKey(last_record_key); - request.set_start(next_record_key); - request.set_end(""); - request.set_sequence_id(seq_id++); - response.Clear(); - } - - int32_t table_num = table_list.meta_size(); - for (int32_t i = 0; i < table_num; ++i) { - const tera::TableMeta& meta = table_list.meta(i); - std::cout << "table: " << meta.table_name() << std::endl; - int32_t lg_size = meta.schema().locality_groups_size(); - for (int32_t lg_id = 0; lg_id < lg_size; lg_id++) { - const tera::LocalityGroupSchema& lg = - meta.schema().locality_groups(lg_id); - std::cout << " lg" << lg_id << ": " << lg.name() << " (" - << lg.store_type() << ", " - << lg.compress_type() << ", " - << lg.block_size() << ")" << std::endl; - } - int32_t cf_size = meta.schema().column_families_size(); - for (int32_t cf_id = 0; cf_id < cf_size; cf_id++) { - const tera::ColumnFamilySchema& cf = - meta.schema().column_families(cf_id); - std::cout << " cf" << cf_id << ": " << cf.name() << " (" - << cf.locality_group() << ", " - << cf.type() << ", " - << cf.max_versions() << ", " - << cf.time_to_live() << ")" << std::endl; - } - } - - int32_t tablet_num = tablet_list.meta_size(); - for (int32_t i = 0; i < tablet_num; ++i) { - const tera::TabletMeta& meta = tablet_list.meta(i); - std::cout << "tablet: " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "], " - << meta.path() << ", " << meta.server_addr() << ", " - << meta.size() << ", " - << StatusCodeToString(meta.status()) << ", " - << StatusCodeToString(meta.compact_status()) << std::endl; - } - return 0; -} - -int main(int argc, char* argv[]) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_flagfile == "") { - FLAGS_flagfile = "../conf/tera.flag"; - if (access(FLAGS_flagfile.c_str(), R_OK) != 0) { - FLAGS_flagfile = "./tera.flag"; - } - utils::LoadFlagFile(FLAGS_flagfile); - } - if (FLAGS_meta_cli_token != "2862933555777941757") { - std::cout << "Please figure out what metacli is before use it." << std::endl; - return -1; - } - scoped_ptr finder(tera::sdk::NewClusterFinder()); - const std::string meta_server = finder->RootTableAddr(); - if (meta_server.empty()) { - std::cerr << "read root addr from zk fail"; - return -1; - } - if (argc <= 1) { - HelpOp(argc, argv); - return -1; - } - common::ThreadPool thread_pool(1); - std::string op(argv[1]); - if (argc == 2) { - if (op == "show") { - return ShowMeta(meta_server, &thread_pool); - } else if (op == "version") { - PrintSystemVersion(); - } else { - HelpOp(argc, argv); - } - } else if (argc == 4) { - const std::string tablet_name(argv[2]); - const std::string start_key(argv[3]); - if (op == "get") { - return GetMeta(meta_server, &thread_pool, tablet_name, start_key); - } else if (op == "modify") { - return ModifyMetaValue(meta_server, &thread_pool, tablet_name, start_key); - } else if (op == "delete") { - return DeleteMetaTablet(meta_server, &thread_pool, tablet_name, start_key); - } else { - HelpOp(argc, argv); - } - } else { - HelpOp(argc, argv); - } - - return 0; -} \ No newline at end of file diff --git a/src/monitor/teramo_main.cc b/src/monitor/teramo_main.cc index ffb4fa48b..6e91ecc36 100644 --- a/src/monitor/teramo_main.cc +++ b/src/monitor/teramo_main.cc @@ -3,7 +3,6 @@ // found in the LICENSE file. // - #include #include @@ -41,472 +40,458 @@ using namespace tera; using std::string; void FillTabletNodeStat(const TabletNodeInfo& info, TabletNodeStat* stat) { - stat->set_timestamp(info.timestamp()); - stat->set_load(info.load()); - stat->set_tablet_total(info.tablet_total()); - stat->set_tablet_onbusy(info.tablet_onbusy()); - - stat->set_low_read_cell(info.low_read_cell()); - stat->set_scan_rows(info.scan_rows()); - stat->set_scan_size(info.scan_size()); - stat->set_read_rows(info.read_rows()); - stat->set_read_size(info.read_size()); - stat->set_write_rows(info.write_rows()); - stat->set_write_size(info.write_size()); - - stat->set_mem_used(info.mem_used()); - stat->set_net_tx(info.net_tx()); - stat->set_net_rx(info.net_rx()); - stat->set_dfs_io_r(info.dfs_io_r()); - stat->set_dfs_io_w(info.dfs_io_w()); - stat->set_local_io_r(info.local_io_r()); - stat->set_local_io_w(info.local_io_w()); - - stat->set_cpu_usage(info.cpu_usage()); - - stat->set_status_m(info.status_m()); - stat->set_tablet_onload(info.tablet_onload()); - stat->set_tablet_onsplit(info.tablet_onsplit()); - stat->set_tablet_unloading(info.tablet_unloading()); - - stat->set_read_pending(info.read_pending()); - stat->set_write_pending(info.write_pending()); - stat->set_scan_pending(info.scan_pending()); - - for (int i = 0; i < info.extra_info_size(); ++i) { - ExtraStat* estat = stat->add_extra_stat(); - estat->set_name(info.extra_info(i).name()); - estat->set_value(info.extra_info(i).value()); - } + stat->set_timestamp(info.timestamp()); + stat->set_load(info.load()); + stat->set_tablet_total(info.tablet_total()); + stat->set_tablet_onbusy(info.tablet_onbusy()); + + stat->set_low_read_cell(info.low_read_cell()); + stat->set_scan_rows(info.scan_rows()); + stat->set_scan_size(info.scan_size()); + stat->set_read_rows(info.read_rows()); + stat->set_read_size(info.read_size()); + stat->set_write_rows(info.write_rows()); + stat->set_write_size(info.write_size()); + + stat->set_mem_used(info.mem_used()); + stat->set_net_tx(info.net_tx()); + stat->set_net_rx(info.net_rx()); + stat->set_dfs_io_r(info.dfs_io_r()); + stat->set_dfs_io_w(info.dfs_io_w()); + stat->set_local_io_r(info.local_io_r()); + stat->set_local_io_w(info.local_io_w()); + + stat->set_cpu_usage(info.cpu_usage()); + + stat->set_status_m(info.status_m()); + stat->set_tablet_onload(info.tablet_onload()); + stat->set_tablet_onsplit(info.tablet_onsplit()); + stat->set_tablet_unloading(info.tablet_unloading()); + + stat->set_read_pending(info.read_pending()); + stat->set_write_pending(info.write_pending()); + stat->set_scan_pending(info.scan_pending()); + + for (int i = 0; i < info.extra_info_size(); ++i) { + ExtraStat* estat = stat->add_extra_stat(); + estat->set_name(info.extra_info(i).name()); + estat->set_value(info.extra_info(i).value()); + } } void FillTabletNodeStats(std::list& raw_stats, TabletNodeStats* stat_list) { - int64_t last_timestamp = 0; - int64_t interval = FLAGS_tera_master_stat_table_interval * 1000000; - std::list::iterator it = raw_stats.begin(); - for (; it != raw_stats.end(); ++it) { - TabletNodeStat* stat = stat_list->add_stat(); - TabletNodeInfo info; - info.ParseFromString(*it); - if (last_timestamp != 0) { - while ((int64_t)info.timestamp() - last_timestamp > interval * 3 / 2) { - last_timestamp += interval; - FillTabletNodeStat(TabletNodeInfo(), stat); - stat->set_timestamp(last_timestamp); - stat = stat_list->add_stat(); - } - } - last_timestamp = info.timestamp(); - FillTabletNodeStat(info, stat); - } - if (stat_list->stat_size() > 0) { - stat_list->set_av_ratio(raw_stats.size() * 1000000 / stat_list->stat_size()); + int64_t last_timestamp = 0; + int64_t interval = FLAGS_tera_master_stat_table_interval * 1000000; + std::list::iterator it = raw_stats.begin(); + for (; it != raw_stats.end(); ++it) { + TabletNodeStat* stat = stat_list->add_stat(); + TabletNodeInfo info; + info.ParseFromString(*it); + if (last_timestamp != 0) { + while ((int64_t)info.timestamp() - last_timestamp > interval * 3 / 2) { + last_timestamp += interval; + FillTabletNodeStat(TabletNodeInfo(), stat); + stat->set_timestamp(last_timestamp); + stat = stat_list->add_stat(); + } } + last_timestamp = info.timestamp(); + FillTabletNodeStat(info, stat); + } + if (stat_list->stat_size() > 0) { + stat_list->set_av_ratio(raw_stats.size() * 1000000 / stat_list->stat_size()); + } } -void ParseStartEndTime(const MonitorRequest& request, - int64_t* min_time, - int64_t* max_time) { - *min_time = request.min_timestamp(); - if (request.max_timestamp() > 0) { - *max_time = request.max_timestamp(); - } else { - *max_time = std::numeric_limits::max(); - } +void ParseStartEndTime(const MonitorRequest& request, int64_t* min_time, int64_t* max_time) { + *min_time = request.min_timestamp(); + if (request.max_timestamp() > 0) { + *max_time = request.max_timestamp(); + } else { + *max_time = std::numeric_limits::max(); + } } -int ListTabletNodes(Table* table, - const MonitorRequest& request, - MonitorResponse* response) { - ScanDescriptor desc("#"); - desc.SetEnd("$"); +int ListTabletNodes(Table* table, const MonitorRequest& request, MonitorResponse* response) { + ScanDescriptor desc("#"); + desc.SetEnd("$"); + desc.SetBufferSize((1024 << 10)); + desc.SetAsync(false); + + ErrorCode err; + ResultStream* stream = table->Scan(desc, &err); + while (!stream->Done()) { + string addr = stream->RowName().substr(1, stream->RowName().size() - 1); + TabletNodeStats* stat_list = response->add_stat_list(); + stat_list->set_addr(addr); + stream->Next(); + } + delete stream; + return 0; +} + +int GetPartTabletNodes(Table* table, const MonitorRequest& request, MonitorResponse* response) { + int64_t min_time, max_time; + ParseStartEndTime(request, &min_time, &max_time); + ErrorCode err; + int ts_num = request.tabletnodes_size(); + if (ts_num == 0) { + response->set_errmsg("none tabletnodes"); + return -1; + } + + for (int i = 0; i < ts_num; ++i) { + const string& cur_ts = request.tabletnodes(i); + ScanDescriptor desc(cur_ts); + desc.SetEnd(cur_ts + "a"); desc.SetBufferSize((1024 << 10)); desc.SetAsync(false); - ErrorCode err; ResultStream* stream = table->Scan(desc, &err); + std::list stats; while (!stream->Done()) { - string addr = stream->RowName().substr(1, stream->RowName().size() - 1); - TabletNodeStats* stat_list = response->add_stat_list(); - stat_list->set_addr(addr); - stream->Next(); + if (stream->Timestamp() > max_time) { + // skip out-time-range records + } else if (stream->Timestamp() < min_time || stream->RowName().find(cur_ts) == string::npos) { + // skip rest records + break; + } else if (stream->Family() == "tsinfo") { + stats.push_front(stream->Value()); + } + stream->Next(); } delete stream; - return 0; -} -int GetPartTabletNodes(Table* table, - const MonitorRequest& request, - MonitorResponse* response) { - int64_t min_time, max_time; - ParseStartEndTime(request, &min_time, &max_time); - ErrorCode err; - int ts_num = request.tabletnodes_size(); - if (ts_num == 0) { - response->set_errmsg("none tabletnodes"); - return -1; - } - - for (int i = 0; i < ts_num; ++i) { - const string& cur_ts = request.tabletnodes(i); - ScanDescriptor desc(cur_ts); - desc.SetEnd(cur_ts + "a"); - desc.SetBufferSize((1024 << 10)); - desc.SetAsync(false); - - ResultStream* stream = table->Scan(desc, &err); - std::list stats; - while (!stream->Done()) { - if (stream->Timestamp() > max_time) { - // skip out-time-range records - } else if (stream->Timestamp() < min_time || - stream->RowName().find(cur_ts) == string::npos) { - // skip rest records - break; - } else if (stream->Family() == "tsinfo") { - stats.push_front(stream->Value()); - } - stream->Next(); - } - delete stream; - - TabletNodeStats* stat_list = response->add_stat_list(); - stat_list->set_addr(cur_ts); - FillTabletNodeStats(stats, stat_list); - // fill response - } - return 0; + TabletNodeStats* stat_list = response->add_stat_list(); + stat_list->set_addr(cur_ts); + FillTabletNodeStats(stats, stat_list); + // fill response + } + return 0; } -int GetAllTabletNodes(Table* table, - const MonitorRequest& request, - MonitorResponse* response) { - int64_t min_time, max_time; - ParseStartEndTime(request, &min_time, &max_time); - - ScanDescriptor desc("A"); - desc.SetEnd(""); - desc.SetBufferSize((1024 << 10)); - desc.SetAsync(false); - - ErrorCode err; - ResultStream* stream; - stream = table->Scan(desc, &err); - std::cout << err.GetReason() << std::endl; - int ts_count = 0; +int GetAllTabletNodes(Table* table, const MonitorRequest& request, MonitorResponse* response) { + int64_t min_time, max_time; + ParseStartEndTime(request, &min_time, &max_time); + + ScanDescriptor desc("A"); + desc.SetEnd(""); + desc.SetBufferSize((1024 << 10)); + desc.SetAsync(false); + + ErrorCode err; + ResultStream* stream; + stream = table->Scan(desc, &err); + std::cout << err.GetReason() << std::endl; + int ts_count = 0; + while (!stream->Done()) { + int slen = stream->RowName().size() - 16; + string cur_ts = stream->RowName().substr(0, slen); + std::list stats; while (!stream->Done()) { - int slen = stream->RowName().size() - 16; - string cur_ts = stream->RowName().substr(0, slen); - std::list stats; - while (!stream->Done()) { - if (string::npos == stream->RowName().find(cur_ts)) { - break; - } - if (stream->Timestamp() >= max_time || - stream->Timestamp() < min_time) { // [min_time, max_time) - // skip out-time-range records - } else if (stream->Family() == "tsinfo") { - stats.push_front(stream->Value()); - } - stream->Next(); - } - - // fill response - TabletNodeStats* stat_list = response->add_stat_list(); - stat_list->set_addr(cur_ts); - FillTabletNodeStats(stats, stat_list); - LOG(INFO) << "get stat finish: " << cur_ts << ", " << ts_count++; + if (string::npos == stream->RowName().find(cur_ts)) { + break; + } + if (stream->Timestamp() >= max_time || + stream->Timestamp() < min_time) { // [min_time, max_time) + // skip out-time-range records + } else if (stream->Family() == "tsinfo") { + stats.push_front(stream->Value()); + } + stream->Next(); } - delete stream; - return 0; + + // fill response + TabletNodeStats* stat_list = response->add_stat_list(); + stat_list->set_addr(cur_ts); + FillTabletNodeStats(stats, stat_list); + LOG(INFO) << "get stat finish: " << cur_ts << ", " << ts_count++; + } + delete stream; + return 0; } int FillResponse(const MonitorRequest& request, MonitorResponse* response) { - ErrorCode err_code; - string tablename = FLAGS_tera_master_stat_table_name; - - Client* client = Client::NewClient(FLAGS_flagfile); - if (client == NULL) { - LOG(ERROR) << "client instance not exist"; - response->set_errmsg("system error"); - return -3; - } - Table* table = client->OpenTable(tablename, &err_code); - if (table == NULL) { - LOG(ERROR) << "fail to open stat table: " << tablename; - response->set_errmsg("system error"); - return -4; - } - - switch (request.cmd()) { + ErrorCode err_code; + string tablename = FLAGS_tera_master_stat_table_name; + + Client* client = Client::NewClient(FLAGS_flagfile); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + response->set_errmsg("system error"); + return -3; + } + Table* table = client->OpenTable(tablename, &err_code); + if (table == NULL) { + LOG(ERROR) << "fail to open stat table: " << tablename; + response->set_errmsg("system error"); + return -4; + } + + switch (request.cmd()) { case kList: - ListTabletNodes(table, request, response); - break; + ListTabletNodes(table, request, response); + break; case kGetAll: - GetAllTabletNodes(table, request, response); - break; + GetAllTabletNodes(table, request, response); + break; case kGetPart: - GetPartTabletNodes(table, request, response); - break; + GetPartTabletNodes(table, request, response); + break; default: - LOG(ERROR) << "request cmd error."; - response->set_errmsg("cmd error"); - return -1; - } - delete table; - delete client; - return 0; + LOG(ERROR) << "request cmd error."; + response->set_errmsg("cmd error"); + return -1; + } + delete table; + delete client; + return 0; } void InitFlags(int32_t argc, char** argv, const MonitorRequest& request) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_flagfile.empty()) { - string found_path; - if (!FLAGS_tera_sdk_conf_file.empty()) { - found_path = FLAGS_tera_sdk_conf_file; - } else { - found_path = utils::GetValueFromEnv("tera_CONF"); - if (!found_path.empty() || found_path == "") { - found_path = "tera.flag"; - } - } - - if (!found_path.empty() && IsExist(found_path)) { - VLOG(5) << "config file is not defined, use default one: " - << found_path; - FLAGS_flagfile = found_path; - } else if (IsExist("./tera.flag")) { - VLOG(5) << "config file is not defined, use default one: ./tera.flag"; - FLAGS_flagfile = "./tera.flag"; - } + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_flagfile.empty()) { + string found_path; + if (!FLAGS_tera_sdk_conf_file.empty()) { + found_path = FLAGS_tera_sdk_conf_file; + } else { + found_path = utils::GetValueFromEnv("tera_CONF"); + if (!found_path.empty() || found_path == "") { + found_path = "tera.flag"; + } } - // init log dir - /* - if (FLAGS_log_dir.empty()) { - FLAGS_log_dir = "./"; + if (!found_path.empty() && IsExist(found_path)) { + VLOG(5) << "config file is not defined, use default one: " << found_path; + FLAGS_flagfile = found_path; + } else if (IsExist("./tera.flag")) { + VLOG(5) << "config file is not defined, use default one: ./tera.flag"; + FLAGS_flagfile = "./tera.flag"; } - - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); - utils::SetupLog(argv[0]); - */ - if (request.use_nexus()) { - if (request.has_tera_zk_addr()) { - FLAGS_tera_ins_addr_list = request.tera_zk_addr(); - } - if (request.has_tera_zk_root()) { - FLAGS_tera_ins_root_path = request.tera_zk_root(); - } - FLAGS_tera_coord_type = "ins"; - FLAGS_tera_ins_enabled = true; - FLAGS_tera_zk_enabled = false; - } else { - FLAGS_tera_coord_type = "zk"; - if (request.has_tera_zk_addr()) { - FLAGS_tera_zk_addr_list = request.tera_zk_addr(); - } - if (request.has_tera_zk_root()) { - FLAGS_tera_zk_root_path = request.tera_zk_root(); - } + } + + // init log dir + /* + if (FLAGS_log_dir.empty()) { + FLAGS_log_dir = "./"; + } + + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + utils::SetupLog(argv[0]); + */ + if (request.use_nexus()) { + if (request.has_tera_zk_addr()) { + FLAGS_tera_ins_addr_list = request.tera_zk_addr(); + } + if (request.has_tera_zk_root()) { + FLAGS_tera_ins_root_path = request.tera_zk_root(); } + FLAGS_tera_coord_type = "ins"; + FLAGS_tera_ins_enabled = true; + FLAGS_tera_zk_enabled = false; + } else { + FLAGS_tera_coord_type = "zk"; + if (request.has_tera_zk_addr()) { + FLAGS_tera_zk_addr_list = request.tera_zk_addr(); + } + if (request.has_tera_zk_root()) { + FLAGS_tera_zk_root_path = request.tera_zk_root(); + } + } } int DumpResponse(const string& resfile, const MonitorResponse& response) { - string res; - if (!response.SerializeToString(&res)) { - LOG(ERROR) << "fail to serialize response to string."; - return -1; - } - - FILE* fp; - if ((fp = fopen(resfile.data(), "wb")) == NULL) { - LOG(ERROR) << "fail to open " << resfile; - return -1; - } - fwrite(res.data(), 1, res.size(), fp); - fclose(fp); - return 0; + string res; + if (!response.SerializeToString(&res)) { + LOG(ERROR) << "fail to serialize response to string."; + return -1; + } + + FILE* fp; + if ((fp = fopen(resfile.data(), "wb")) == NULL) { + LOG(ERROR) << "fail to open " << resfile; + return -1; + } + fwrite(res.data(), 1, res.size(), fp); + fclose(fp); + return 0; } int ParseRequest(const string& reqfile, MonitorRequest* request) { - FILE* fp; - const int kLenMax = 1024000; - char buf[kLenMax]; - int len; - if ((fp = fopen(reqfile.data(), "rb")) == NULL) { - LOG(ERROR) << "fail to open " << reqfile; - return -1; - } - len = fread(buf, 1, kLenMax, fp); - fclose(fp); - - if (!request->ParseFromString(string(buf, len))) { - LOG(ERROR) << "fail to parse monitor request, file: " << reqfile - << ", len: " << len; - return -2; - } - return 0; + FILE* fp; + const int kLenMax = 1024000; + char buf[kLenMax]; + int len; + if ((fp = fopen(reqfile.data(), "rb")) == NULL) { + LOG(ERROR) << "fail to open " << reqfile; + return -1; + } + len = fread(buf, 1, kLenMax, fp); + fclose(fp); + + if (!request->ParseFromString(string(buf, len))) { + LOG(ERROR) << "fail to parse monitor request, file: " << reqfile << ", len: " << len; + return -2; + } + return 0; } void PrintResponse(const MonitorResponse& response) { - /* - for (int i = 0; i < response.stat_list(0).stat_size(); ++i) { - int64_t total = 0; - uint64_t t_time = 0; - int j; - int ts_count = 0; - for (j = 0; j < response.stat_list_size(); ++j) { - const TabletNodeStats& stat_list = response.stat_list(j); - if (stat_list.stat_size() <= i) { - continue; - } - ts_count++; - total += stat_list.stat(i).write_rows(); - t_time += stat_list.stat(i).timestamp(); - } - printf("%20lu%10lu%14ld%6d\n", - t_time / ts_count, total / ts_count, total, ts_count); - } - */ - for (int i = 0; i < response.stat_list_size(); ++i) { - const TabletNodeStats& stat_list = response.stat_list(i); - for (int j = 0; j < stat_list.stat_size(); ++j) { - const TabletNodeStat& stat = stat_list.stat(j); - std::cout << stat.ShortDebugString() << " "; - for (int k = 0; k < stat.extra_stat_size(); ++k) { - ExtraStat extra_stat = stat.extra_stat(k); - if (extra_stat.name() == "rand_read_delay") { - std::cout << extra_stat.name() << ": " << extra_stat.value() << " "; - } - } - std::cout << std::endl; + /* + for (int i = 0; i < response.stat_list(0).stat_size(); ++i) { + int64_t total = 0; + uint64_t t_time = 0; + int j; + int ts_count = 0; + for (j = 0; j < response.stat_list_size(); ++j) { + const TabletNodeStats& stat_list = response.stat_list(j); + if (stat_list.stat_size() <= i) { + continue; + } + ts_count++; + total += stat_list.stat(i).write_rows(); + t_time += stat_list.stat(i).timestamp(); + } + printf("%20lu%10lu%14ld%6d\n", + t_time / ts_count, total / ts_count, total, ts_count); + } + */ + for (int i = 0; i < response.stat_list_size(); ++i) { + const TabletNodeStats& stat_list = response.stat_list(i); + for (int j = 0; j < stat_list.stat_size(); ++j) { + const TabletNodeStat& stat = stat_list.stat(j); + std::cout << stat.ShortDebugString() << " "; + for (int k = 0; k < stat.extra_stat_size(); ++k) { + ExtraStat extra_stat = stat.extra_stat(k); + if (extra_stat.name() == "rand_read_delay") { + std::cout << extra_stat.name() << ": " << extra_stat.value() << " "; } + } + std::cout << std::endl; } + } } void PrintResponseFile(const string resfile) { - FILE* fp; - const int kLenMax = 1024000; - char buf[kLenMax]; - string res; - MonitorResponse response; - int len = kLenMax;; - if ((fp = fopen(resfile.data(), "rb")) == NULL) { - LOG(ERROR) << "fail to open " << resfile; - return; - } - while (len == kLenMax) { - len = fread(buf, 1, kLenMax, fp); - res.append(string(buf, len)); - } - fclose(fp); - - if (!response.ParseFromString(res)) { - LOG(ERROR) << "fail to parse monitor response, file: " << resfile - << ", len: " << len; - return; - } - PrintResponse(response); + FILE* fp; + const int kLenMax = 1024000; + char buf[kLenMax]; + string res; + MonitorResponse response; + int len = kLenMax; + ; + if ((fp = fopen(resfile.data(), "rb")) == NULL) { + LOG(ERROR) << "fail to open " << resfile; + return; + } + while (len == kLenMax) { + len = fread(buf, 1, kLenMax, fp); + res.append(string(buf, len)); + } + fclose(fp); + + if (!response.ParseFromString(res)) { + LOG(ERROR) << "fail to parse monitor response, file: " << resfile << ", len: " << len; + return; + } + PrintResponse(response); } -void TEST_FillListRequest(MonitorRequest* request) { - request->set_cmd(tera::kList); -} +void TEST_FillListRequest(MonitorRequest* request) { request->set_cmd(tera::kList); } void TEST_FillGetPartRequest(MonitorRequest* request) { - request->set_cmd(tera::kGetPart); - request->add_tabletnodes("nj02-stest-tera1.nj02.baidu.com:7702"); - uint64_t cur_time = get_micros() - 100 * 1000000; - request->set_max_timestamp(cur_time); - request->set_min_timestamp(cur_time - 30 * 60 * 1000000); + request->set_cmd(tera::kGetPart); + request->add_tabletnodes("nj02-stest-tera1.nj02.baidu.com:7702"); + uint64_t cur_time = get_micros() - 100 * 1000000; + request->set_max_timestamp(cur_time); + request->set_min_timestamp(cur_time - 30 * 60 * 1000000); } void TEST_FillGetAllRequest(MonitorRequest* request) { - request->set_cmd(tera::kGetAll); - request->set_min_timestamp(0); - request->set_max_timestamp(std::numeric_limits::max()); + request->set_cmd(tera::kGetAll); + request->set_min_timestamp(0); + request->set_max_timestamp(std::numeric_limits::max()); } -void Eva_FillGetInfoRequest(MonitorRequest* request, const std::string& ts_start, const std::string& ts_end, const std::string& ts) { - std::stringstream ss; - int64_t start, end; - ss << ts_start; - ss >> start; - std::stringstream se; - se << ts_end; - se >> end; - request->set_min_timestamp(start); - request->set_max_timestamp(end); - if (ts != "") { - std::ifstream in; - in.open(ts.c_str()); - if (!in) { - LOG(ERROR) << "fail to open file: " << ts; - return; - } - while (!in.eof()) { - std::string addr; - in >> addr; - request->add_tabletnodes(addr); - } - request->set_cmd(tera::kGetPart); - } else { - request->set_cmd(tera::kGetAll); +void Eva_FillGetInfoRequest(MonitorRequest* request, const std::string& ts_start, + const std::string& ts_end, const std::string& ts) { + std::stringstream ss; + int64_t start, end; + ss << ts_start; + ss >> start; + std::stringstream se; + se << ts_end; + se >> end; + request->set_min_timestamp(start); + request->set_max_timestamp(end); + if (ts != "") { + std::ifstream in; + in.open(ts.c_str()); + if (!in) { + LOG(ERROR) << "fail to open file: " << ts; + return; + } + while (!in.eof()) { + std::string addr; + in >> addr; + request->add_tabletnodes(addr); } + request->set_cmd(tera::kGetPart); + } else { + request->set_cmd(tera::kGetAll); + } } - int main(int argc, char* argv[]) { - int ret = 0; - string reqfile = FLAGS_tera_monitor_default_request_filename; - string resfile = FLAGS_tera_monitor_default_response_filename; - MonitorRequest request; - MonitorResponse response; - if (argc < 2) { - // scan all - } else if (string(argv[1]) == "print") { - // print response file - string resfile = argv[2]; - PrintResponseFile(resfile); - return 0; - } else if (string(argv[1]) == "testlist") { - TEST_FillListRequest(&request); - } else if (string(argv[1]) == "testgetpart") { - TEST_FillGetPartRequest(&request); - } else if (string(argv[1]) == "testgetall") { - TEST_FillGetAllRequest(&request); - } else if (string(argv[1]) == "eva") { // ./teramo eva timestamp_strat timestamp_end - Eva_FillGetInfoRequest(&request, argv[2], argv[3], ""); - } else if (string(argv[1]) == "trace") { // ./teramo eva timestamp_strat timestamp_end - Eva_FillGetInfoRequest(&request, argv[2], argv[3], argv[4]); - resfile = string(argv[4]) + ".response"; - } else { - reqfile = argv[1]; - if (argc >= 3) { - resfile = argv[2]; - } - ret = ParseRequest(reqfile, &request); - if (ret < 0) { - std::cout << ret << std::endl; - return ret; - } + int ret = 0; + string reqfile = FLAGS_tera_monitor_default_request_filename; + string resfile = FLAGS_tera_monitor_default_response_filename; + MonitorRequest request; + MonitorResponse response; + if (argc < 2) { + // scan all + } else if (string(argv[1]) == "print") { + // print response file + string resfile = argv[2]; + PrintResponseFile(resfile); + return 0; + } else if (string(argv[1]) == "testlist") { + TEST_FillListRequest(&request); + } else if (string(argv[1]) == "testgetpart") { + TEST_FillGetPartRequest(&request); + } else if (string(argv[1]) == "testgetall") { + TEST_FillGetAllRequest(&request); + } else if (string(argv[1]) == "eva") { // ./teramo eva timestamp_strat timestamp_end + Eva_FillGetInfoRequest(&request, argv[2], argv[3], ""); + } else if (string(argv[1]) == "trace") { // ./teramo eva timestamp_strat timestamp_end + Eva_FillGetInfoRequest(&request, argv[2], argv[3], argv[4]); + resfile = string(argv[4]) + ".response"; + } else { + reqfile = argv[1]; + if (argc >= 3) { + resfile = argv[2]; } - InitFlags(argc, argv, request); - - ret = FillResponse(request, &response); + ret = ParseRequest(reqfile, &request); if (ret < 0) { - std::cout << ret << std::endl; - return ret; - } - if (string(argv[1]) == "testlist" || - string(argv[1]) == "testgetpart" || - string(argv[1]) == "testgetall") { - PrintResponse(response); + std::cout << ret << std::endl; + return ret; } + } + InitFlags(argc, argv, request); - ret = DumpResponse(resfile, response); + ret = FillResponse(request, &response); + if (ret < 0) { std::cout << ret << std::endl; - return ret; + } + if (string(argv[1]) == "testlist" || string(argv[1]) == "testgetpart" || + string(argv[1]) == "testgetall") { + PrintResponse(response); + } + + ret = DumpResponse(resfile, response); + std::cout << ret << std::endl; + + return ret; } diff --git a/src/observer/executor/key_selector.h b/src/observer/executor/key_selector.h index b6746b612..04f9b73a1 100644 --- a/src/observer/executor/key_selector.h +++ b/src/observer/executor/key_selector.h @@ -14,16 +14,18 @@ namespace tera { namespace observer { class KeySelector { -public: - virtual ~KeySelector() {} + public: + virtual ~KeySelector() {} - // output: selected table name, selected start key - virtual bool SelectStart(std::string* table_name, - std::string* start_key) = 0; - virtual ErrorCode Observe(const std::string& table_name) = 0; + // output: selected table name, selected start key, end_key + // the range of [start_key, end_key) + virtual bool SelectRange(std::string* table_name, std::string* start_key, + std::string* end_key) = 0; + + virtual ErrorCode Observe(const std::string& table_name) = 0; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_EXECUTOR_KEY_SELECTOR_H_ diff --git a/src/observer/executor/notification.h b/src/observer/executor/notification.h deleted file mode 100644 index 7f6993602..000000000 --- a/src/observer/executor/notification.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ -#define TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ - -#include - -#include "tera.h" - -#pragma GCC visibility push(default) - -namespace tera { -namespace observer { - -class Notification { -public: - virtual ~Notification() {} - - virtual void Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) = 0; - - virtual void Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) = 0; - - // relases resource after OnNotify finished - // and delete this - virtual void Done() = 0; -}; - -} // namespace observer -} // namespace tera - -#pragma GCC visibility pop - -#endif // TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ diff --git a/src/observer/executor/notification_impl.cc b/src/observer/executor/notification_impl.cc index a93762a96..b614865f8 100644 --- a/src/observer/executor/notification_impl.cc +++ b/src/observer/executor/notification_impl.cc @@ -15,57 +15,104 @@ namespace tera { namespace observer { Notification* GetNotification(const std::shared_ptr& notify_cell) { - return new NotificationImpl(notify_cell); + return new NotificationImpl(notify_cell); } NotificationImpl::NotificationImpl(const std::shared_ptr& notify_cell) : notify_cell_(notify_cell), - start_timestamp_(get_micros()), - notify_timestamp_(0) {} + start_timestamp_(get_micros()), + notify_timestamp_(0), + ack_callback_(nullptr), + notify_callback_(nullptr), + ack_context_(nullptr), + notify_context_(nullptr) {} -void NotificationImpl::Ack(Table* t, - const std::string& row_key, - const std::string& column_family, +void NotificationImpl::SetAckCallBack(Notification::Callback callback) { + if (notify_cell_->notify_transaction == NULL) { + ack_callback_ = callback; + } else { + LOG(ERROR) << "Support ack callback only when TransactionType = kNoneTransaction"; + abort(); + } +} + +void NotificationImpl::SetAckContext(void* context) { ack_context_ = context; } + +void* NotificationImpl::GetAckContext() { return ack_context_; } + +void NotificationImpl::Ack(Table* t, const std::string& row_key, const std::string& column_family, const std::string& qualifier) { - if (notify_cell_->notify_transaction != NULL) { - notify_cell_->notify_transaction->Ack(t, row_key, column_family, qualifier); - return; - } - - // kNoneTransaction - tera::RowMutation* mutation = t->NewRowMutation(row_key); - std::string notify_qulifier = PackNotifyName(column_family, qualifier); - mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); - t->ApplyMutation(mutation); + if (notify_cell_->notify_transaction != NULL) { + notify_cell_->notify_transaction->Ack(t, row_key, column_family, qualifier); + return; + } + + // kNoneTransaction + tera::RowMutation* mutation = t->NewRowMutation(row_key); + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); + if (ack_callback_ != nullptr) { + mutation->SetContext(this); + mutation->SetCallBack([](RowMutation* mu) { + NotificationImpl* notification_impl = (NotificationImpl*)mu->GetContext(); + ErrorCode err = mu->GetError(); + notification_impl->ack_callback_(notification_impl, err); + delete mu; + }); + } + t->ApplyMutation(mutation); + if (ack_callback_ == nullptr) { delete mutation; + } } -void NotificationImpl::Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - if (notify_cell_->notify_transaction != NULL) { - notify_cell_->notify_transaction->Notify(t, row_key, column_family, qualifier); - return; - } - - // kNoneTransaction - if (notify_timestamp_ == 0) { - notify_timestamp_ = get_micros(); - } - - tera::ErrorCode err; - std::string notify_qulifier = PackNotifyName(column_family, qualifier); - t->Put(row_key, kNotifyColumnFamily, notify_qulifier, NumberToString(notify_timestamp_), notify_timestamp_, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "Notify error. table: " << t->GetName() << " row " - << row_key << " pos: " << column_family << ":" << qualifier; - } +void NotificationImpl::SetNotifyCallBack(Notification::Callback callback) { + if (notify_cell_->notify_transaction == NULL) { + notify_callback_ = callback; + } else { + LOG(ERROR) << "Support notify callback only when TransactionType = kNoneTransaction"; + abort(); + } } -void NotificationImpl::Done() { - delete this; +void NotificationImpl::SetNotifyContext(void* context) { notify_context_ = context; } + +void* NotificationImpl::GetNotifyContext() { return notify_context_; } + +void NotificationImpl::Notify(Table* t, const std::string& row_key, + const std::string& column_family, const std::string& qualifier) { + if (notify_cell_->notify_transaction != NULL) { + notify_cell_->notify_transaction->Notify(t, row_key, column_family, qualifier); + return; + } + + // kNoneTransaction + if (notify_timestamp_ == 0) { + notify_timestamp_ = get_micros(); + } + + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + tera::RowMutation* mutation = t->NewRowMutation(row_key); + + mutation->Put(kNotifyColumnFamily, notify_qulifier, NumberToString(notify_timestamp_), + notify_timestamp_); + + if (notify_callback_ != nullptr) { + mutation->SetContext(this); + mutation->SetCallBack([](RowMutation* mu) { + NotificationImpl* notification_impl = (NotificationImpl*)mu->GetContext(); + ErrorCode err = mu->GetError(); + notification_impl->notify_callback_(notification_impl, err); + delete mu; + }); + } + t->ApplyMutation(mutation); + if (notify_callback_ == nullptr) { + delete mutation; + } } -} // namespace observer -} // namespace tera +void NotificationImpl::Done() { delete this; } + +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/notification_impl.h b/src/observer/executor/notification_impl.h index e5b9bda90..dbed164df 100644 --- a/src/observer/executor/notification_impl.h +++ b/src/observer/executor/notification_impl.h @@ -7,10 +7,9 @@ #include #include -#include #include "observer/executor/notify_cell.h" -#include "observer/executor/notification.h" +#include "observer/notification.h" #include "tera.h" namespace tera { @@ -19,29 +18,37 @@ namespace observer { Notification* GetNotification(const std::shared_ptr& notify_cell); class NotificationImpl : public Notification { -public: - explicit NotificationImpl(const std::shared_ptr& notify_cell); - virtual ~NotificationImpl() {} - - virtual void Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - virtual void Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - virtual void Done(); - -private: - std::shared_ptr notify_cell_; - int64_t start_timestamp_; - int64_t notify_timestamp_; + public: + explicit NotificationImpl(const std::shared_ptr& notify_cell); + virtual ~NotificationImpl() {} + + virtual void SetAckCallBack(Notification::Callback callback); + virtual void SetAckContext(void* context); + virtual void* GetAckContext(); + + virtual void Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + virtual void SetNotifyCallBack(Notification::Callback callback); + virtual void SetNotifyContext(void* context); + virtual void* GetNotifyContext(); + + virtual void Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + virtual void Done(); + + private: + std::shared_ptr notify_cell_; + int64_t start_timestamp_; + int64_t notify_timestamp_; + Notification::Callback ack_callback_; + Notification::Callback notify_callback_; + void* ack_context_; + void* notify_context_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_EXECUTOR_NOTIFICATION_IMPL_H_ diff --git a/src/observer/executor/notify_cell.h b/src/observer/executor/notify_cell.h index 9a0954a64..2fb3c0a33 100644 --- a/src/observer/executor/notify_cell.h +++ b/src/observer/executor/notify_cell.h @@ -5,110 +5,100 @@ #ifndef TERA_OBSERVER_EXECUTOR_NOTIFY_CELL_H_ #define TERA_OBSERVER_EXECUTOR_NOTIFY_CELL_H_ -#include #include -#include #include +#include +#include #include "gflags/gflags.h" #include "glog/logging.h" #include "common/semaphore.h" #include "common/timer.h" -#include "observer/executor/observer.h" -#include "observer/rowlocknode/fake_rowlock_client.h" +#include "observer/observer.h" #include "sdk/rowlock_client.h" -#include "tera.h" DECLARE_string(rowlock_server_port); DECLARE_string(rowlock_server_ip); DECLARE_bool(mock_rowlock_enable); - namespace tera { namespace observer { struct Column { - std::string table_name; - std::string family; - std::string qualifier; - - bool operator<(const Column& other) const { - int32_t result = 0; - result = table_name.compare(other.table_name); - if (result != 0) { - return result < 0; - } - result = family.compare(other.family); - if (result != 0) { - return result < 0; - } - result = qualifier.compare(other.qualifier); - - return result < 0; + std::string table_name; + std::string family; + std::string qualifier; + + bool operator<(const Column& other) const { + int32_t result = 0; + result = table_name.compare(other.table_name); + if (result != 0) { + return result < 0; } - - bool operator==(const Column& other) const { - return table_name == other.table_name && family == other.family - && qualifier == other.qualifier; + result = family.compare(other.family); + if (result != 0) { + return result < 0; } + result = qualifier.compare(other.qualifier); + + return result < 0; + } + + bool operator==(const Column& other) const { + return table_name == other.table_name && family == other.family && qualifier == other.qualifier; + } }; struct AutoRowUnlocker { - AutoRowUnlocker(const std::string& table, - const std::string& unlock_row) - : table_name(table), - row(unlock_row) {} - AutoRowUnlocker() {} + AutoRowUnlocker(const std::string& table, const std::string& unlock_row) + : table_name(table), row(unlock_row) {} + AutoRowUnlocker() {} - ~AutoRowUnlocker() { - // UnLockRow + ~AutoRowUnlocker() { + // UnLockRow - if (FLAGS_mock_rowlock_enable == true) { - client.reset(new FakeRowlockClient()); - } else { - client.reset(new RowlockClient()); - } + if (FLAGS_mock_rowlock_enable == true) { + client.reset(new FakeRowlockClient()); + } else { + client.reset(new RowlockClient()); + } - RowlockRequest request; - RowlockResponse response; + RowlockRequest request; + RowlockResponse response; - request.set_row(row); - request.set_table_name(table_name); + request.set_row(row); + request.set_table_name(table_name); - client->UnLock(&request, &response); - VLOG(12) <<"[time] UnLock finish. [row] " << row; - } + client->UnLock(&request, &response); + VLOG(12) << "[time] UnLock finish. [row] " << row; + } - std::unique_ptr client; - std::string table_name; - std::string row; + std::unique_ptr client; + std::string table_name; + std::string row; }; // info inside scanner struct NotifyCell { - NotifyCell(common::Semaphore& semaphore) - : semaphore_ref(semaphore), - table(NULL), - observer(NULL) { } + NotifyCell(common::Semaphore& semaphore) + : semaphore_ref(semaphore), table(NULL), observer(NULL) {} - ~NotifyCell() { - semaphore_ref.Release(); - } + ~NotifyCell() { semaphore_ref.Release(); } + + common::Semaphore& semaphore_ref; + std::string row; + std::string value; + int64_t timestamp; + + Column observed_column; + std::unique_ptr notify_transaction; + tera::Table* table; + Observer* observer; - common::Semaphore& semaphore_ref; - std::string row; - std::string value; - int64_t timestamp; - - Column observed_column; - std::unique_ptr notify_transaction; - tera::Table* table; - Observer* observer; - - std::shared_ptr unlocker; + std::shared_ptr unlocker; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_EXECUTOR_NOTIFY_CELL_H_ diff --git a/src/observer/executor/random_key_selector.cc b/src/observer/executor/random_key_selector.cc index 788ce167a..36aa70ca7 100644 --- a/src/observer/executor/random_key_selector.cc +++ b/src/observer/executor/random_key_selector.cc @@ -10,6 +10,7 @@ #include "types.h" DECLARE_string(flagfile); +DECLARE_int64(observer_update_table_info_period_s); namespace tera { namespace observer { @@ -18,117 +19,118 @@ RandomKeySelector::RandomKeySelector() : tables_(new std::map>), quit_(false), cond_(&quit_mutex_) { - tera::ErrorCode err; - client_ = tera::Client::NewClient(FLAGS_flagfile, &err); - update_thread_.Start(std::bind(&RandomKeySelector::Update, this)); + tera::ErrorCode err; + client_ = tera::Client::NewClient(FLAGS_flagfile, &err); + update_thread_ = std::thread{&RandomKeySelector::Update, this}; } RandomKeySelector::~RandomKeySelector() { - { - MutexLock locker(&quit_mutex_); - quit_ = true; - cond_.Broadcast(); - } - - update_thread_.Join(); - if (client_ != NULL) { - delete client_; - } + { + MutexLock locker(&quit_mutex_); + quit_ = true; + cond_.Broadcast(); + } + + update_thread_.join(); + if (client_ != NULL) { + delete client_; + } } -bool RandomKeySelector::SelectStart(std::string* table_name, - std::string* start_key) { - srand((unsigned)time(NULL)); +bool RandomKeySelector::SelectRange(std::string* table_name, std::string* start_key, + std::string* end_key) { + srand((unsigned)time(NULL)); - std::shared_ptr>> table_read_copy; - { - MutexLock locker(&table_mutex_); - // copy for copy-on-write, ref +1 - table_read_copy = tables_; - } + std::shared_ptr>> table_read_copy; + { + MutexLock locker(&table_mutex_); + // copy for copy-on-write, ref +1 + table_read_copy = tables_; + } + + if (table_read_copy->size() == 0) { + return false; + } + + // random table + uint32_t table_no = rand() % observe_tables_.size(); + *table_name = observe_tables_[table_no]; + + // random key + size_t tablet_num = (*table_read_copy)[*table_name].size(); + if (0 == tablet_num) { + LOG(ERROR) << "No tablet"; + return false; + } + + uint32_t tablet_no = rand() % tablet_num; + *start_key = (*table_read_copy)[*table_name][tablet_no].start_key; + *end_key = ""; + + VLOG(25) << "Random StartKey=" << *start_key << " TabletNo=" << tablet_no; + return true; +} - if (table_read_copy->size() == 0) { - return false; +ErrorCode RandomKeySelector::Observe(const std::string& table_name) { + tera::ErrorCode err; + + MutexLock locker(&table_mutex_); + + if (!tables_.unique()) { + // In this case threads may reading this copy. + // Shared_ptr construct a new copy from the original one. + // Later requests will operate on the new copy. + tables_.reset(new std::map>(*tables_)); + } + if (tables_->find(table_name) == tables_->end()) { + std::vector tablets; + client_->GetTabletLocation(table_name, &tablets, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe table failed, " << err.ToString(); + return err; } + observe_tables_.push_back(table_name); + (*tables_)[table_name] = tablets; + } + return err; +} - // random table - uint32_t table_no = rand() % observe_tables_.size(); - *table_name = observe_tables_[table_no]; - - - // random key - size_t tablet_num = (*table_read_copy)[*table_name].size(); - if (0 == tablet_num) { - LOG(ERROR) << "No tablet"; - return false; +void RandomKeySelector::Update() { + tera::ErrorCode err; + while (true) { + { + MutexLock locker(&quit_mutex_); + if (quit_) { + return; + } + cond_.TimeWaitInUs(FLAGS_observer_update_table_info_period_s * 1000000); } - uint32_t tablet_no = rand() % tablet_num; - *start_key = (*table_read_copy)[*table_name][tablet_no].start_key; - - VLOG(25) << "Random StartKey=" << *start_key << " TabletNo=" << tablet_no; - return true; -} + // update data first + std::shared_ptr>> table_update_copy( + new std::map>); -ErrorCode RandomKeySelector::Observe(const std::string& table_name) { - tera::ErrorCode err; + // updated table + for (uint32_t i = 0; i < observe_tables_.size(); ++i) { + std::string table_name = observe_tables_[i]; - MutexLock locker(&table_mutex_); + std::vector tablets; + client_->GetTabletLocation(table_name, &tablets, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Update table info failed, tablename:" << table_name + << " err:" << err.ToString(); + continue; + } - if (!tables_.unique()) { - // In this case threads may reading this copy. - // Shared_ptr construct a new copy from the original one. - // Later requests will operate on the new copy. - tables_.reset(new std::map>(*tables_)); + table_update_copy->insert( + std::pair>(table_name, tablets)); } - if (tables_->find(table_name) == tables_->end()) { - - std::vector tablets; - client_->GetTabletLocation(table_name, &tablets, &err); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "Observe table failed, " << err.ToString(); - return err; - } - observe_tables_.push_back(table_name); - (*tables_)[table_name] = tablets; - } - return err; -} -void RandomKeySelector::Update() { - tera::ErrorCode err; - while (true) { - { - MutexLock locker(&quit_mutex_); - if (quit_) { - return; - } - cond_.TimeWaitInUs(kObserverWaitTimeMs * 1000); - } - - // update data first - std::shared_ptr>> table_update_copy( - new std::map>); - - // updated table - for (uint32_t i = 0; i < observe_tables_.size(); ++i) { - std::string table_name = observe_tables_[i]; - - std::vector tablets; - client_->GetTabletLocation(table_name, &tablets, &err); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "Observe table failed, " << err.ToString(); - continue; - } - - table_update_copy->insert(std::pair>(table_name, tablets)); - } - - // update pointer - MutexLock locker(&table_mutex_); - tables_.swap(table_update_copy); - } + // update pointer + MutexLock locker(&table_mutex_); + tables_.swap(table_update_copy); + } } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/random_key_selector.h b/src/observer/executor/random_key_selector.h index 5a20fb4f3..65adcaa80 100644 --- a/src/observer/executor/random_key_selector.h +++ b/src/observer/executor/random_key_selector.h @@ -8,10 +8,10 @@ #include #include #include +#include #include #include "common/mutex.h" -#include "common/thread.h" #include "observer/executor/key_selector.h" #include "tera.h" @@ -19,29 +19,30 @@ namespace tera { namespace observer { class RandomKeySelector : public KeySelector { -public: - RandomKeySelector(); - virtual ~RandomKeySelector(); - - virtual bool SelectStart(std::string* table_name, - std::string* start_key); - virtual ErrorCode Observe(const std::string& table_name); -private: - void Update(); - -private: - tera::Client* client_; - mutable Mutex table_mutex_; - std::vector observe_tables_; - std::shared_ptr>> tables_; - common::Thread update_thread_; - - mutable Mutex quit_mutex_; - bool quit_; - common::CondVar cond_; + public: + RandomKeySelector(); + virtual ~RandomKeySelector(); + + virtual bool SelectRange(std::string* table_name, std::string* start_key, std::string* end_key); + + virtual ErrorCode Observe(const std::string& table_name); + + private: + void Update(); + + private: + tera::Client* client_; + mutable Mutex table_mutex_; + std::vector observe_tables_; + std::shared_ptr>> tables_; + std::thread update_thread_; + + mutable Mutex quit_mutex_; + bool quit_; + common::CondVar cond_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera -#endif // TERA_OBSERVER_EXECUTOR_RANDOM_KEY_SELECTOR_H_ \ No newline at end of file +#endif // TERA_OBSERVER_EXECUTOR_RANDOM_KEY_SELECTOR_H_ diff --git a/src/observer/executor/scanner.h b/src/observer/executor/scanner.h deleted file mode 100644 index a11a8646d..000000000 --- a/src/observer/executor/scanner.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_SCANNER_H_ -#define TERA_SCANNER_H_ - -#include - -#include "observer/executor/observer.h" -#include "tera/error_code.h" - -#pragma GCC visibility push(default) -namespace tera { -namespace observer { - -class Scanner { -public: - static Scanner* GetScanner(); - - virtual ~Scanner() {} - - // register user define observers - // user should not destruct observers, which will be handled by scanner - virtual ErrorCode Observe(const std::string& table_name, - const std::string& column_family, - const std::string& qualifier, - Observer* observer) = 0; - - virtual bool Init() = 0; - - virtual bool Start() = 0; - - virtual void Exit() = 0; -}; - -} // namespace observer -} // namespace tera -#pragma GCC visibility pop - -#endif // TERA_SCANNER_H_ diff --git a/src/observer/executor/scanner_entry.cc b/src/observer/executor/scanner_entry.cc index 5b012b339..a3c583b90 100644 --- a/src/observer/executor/scanner_entry.cc +++ b/src/observer/executor/scanner_entry.cc @@ -2,12 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "observer/executor/scanner_entry.h" - -#include "gflags/gflags.h" -#include "glog/logging.h" +#include +#include "common/this_thread.h" #include "observer/executor/scanner_impl.h" +#include "observer/scanner_entry.h" namespace tera { namespace observer { @@ -17,47 +16,49 @@ ScannerEntry::ScannerEntry() {} ScannerEntry::~ScannerEntry() {} bool ScannerEntry::StartServer() { - scanner_.reset(tera::observer::Scanner::GetScanner()); - - if(!scanner_->Init()) { - LOG(ERROR) << "fail to init scanner_impl"; - return false; - } - - // observe observers to scanner - ErrorCode err = Observe(); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "Observe failed, reason: " << err.ToString(); - return false; - } - - if(!scanner_->Start()) { - LOG(ERROR) << "fail to start scanner_impl"; - return false; - } - return true; + Scanner* scanner = tera::observer::Scanner::GetScanner(); + + scanner->SetOptions(options_); + + if (!scanner->Init()) { + LOG(ERROR) << "fail to init scanner_impl"; + return false; + } + + // observe observers to scanner + ErrorCode err = Observe(); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe failed, reason: " << err.ToString(); + return false; + } + + if (!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return false; + } + return true; } void ScannerEntry::ShutdownServer() { - LOG(INFO) << "shut down scanner"; - scanner_->Exit(); - scanner_.reset(); - LOG(INFO) << "scanner stop done!"; + LOG(INFO) << "shut down scanner"; + Scanner* scanner = tera::observer::Scanner::GetScanner(); + scanner->Exit(); + LOG(INFO) << "scanner stop done!"; } bool ScannerEntry::Run() { - ThisThread::Sleep(1000); - return true; + ThisThread::Sleep(1000); + return true; } ErrorCode ScannerEntry::Observe() { - ErrorCode err; - return err; + ErrorCode err; + return err; } -Scanner* ScannerEntry::GetScanner() const { - return scanner_.get(); -} +void ScannerEntry::SetOptions(const ScannerOptions& options) { options_ = options; } + +Scanner* ScannerEntry::GetScanner() const { return tera::observer::Scanner::GetScanner(); } -} // namespace observer -} // namespace tera \ No newline at end of file +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/scanner_impl.cc b/src/observer/executor/scanner_impl.cc index 968fc393e..bcecc6eae 100644 --- a/src/observer/executor/scanner_impl.cc +++ b/src/observer/executor/scanner_impl.cc @@ -4,23 +4,22 @@ #include "observer/executor/scanner_impl.h" +#include #include #include #include -#include - #include "gflags/gflags.h" #include "common/base/string_number.h" #include "common/this_thread.h" #include "observer/executor/random_key_selector.h" -#include "observer/executor/notification.h" +#include "observer/executor/tablet_bucket_key_selector.h" #include "observer/executor/notification_impl.h" -#include "observer/rowlocknode/fake_rowlock_client.h" -#include "sdk/table_impl.h" +#include "observer/notification.h" +#include "sdk/rowlock_client.h" #include "sdk/sdk_utils.h" -#include "tera.h" +#include "sdk/table_impl.h" #include "types.h" DECLARE_int32(observer_proc_thread_num); @@ -40,736 +39,738 @@ using namespace std::placeholders; namespace tera { namespace observer { -ScannerImpl* ScannerImpl::scanner_instance_ = new ScannerImpl(); -Scanner* Scanner::GetScanner() { - return ScannerImpl::GetInstance(); -} +Scanner* Scanner::GetScanner() { return ScannerImpl::GetInstance(); } ScannerImpl* ScannerImpl::GetInstance() { - return scanner_instance_; + static ScannerImpl instance; + return &instance; } ScannerImpl::ScannerImpl() : table_observe_info_(new std::map), scan_table_threads_(new common::ThreadPool(FLAGS_observer_scanner_thread_num)), observer_threads_(new common::ThreadPool(FLAGS_observer_proc_thread_num)), - transaction_callback_threads_(new common::ThreadPool(FLAGS_observer_random_access_thread_num)), + transaction_callback_threads_( + new common::ThreadPool(FLAGS_observer_random_access_thread_num)), quit_(false), semaphore_(FLAGS_observer_max_pending_limit) { - profiling_thread_.Start(std::bind(&ScannerImpl::Profiling, this)); + VLOG(13) << "FLAGS_observer_proc_thread_num = " << FLAGS_observer_proc_thread_num; + VLOG(13) << "FLAGS_observer_scanner_thread_num = " << FLAGS_observer_scanner_thread_num; + VLOG(13) << "FLAGS_observer_max_pending_limit = " << FLAGS_observer_max_pending_limit; + VLOG(13) << "FLAGS_observer_random_access_thread_num = " + << FLAGS_observer_random_access_thread_num; + profiling_thread_ = std::thread{&ScannerImpl::Profiling, this}; } +void ScannerImpl::SetOptions(const ScannerOptions& options) { options_ = options; } + +void ScannerImpl::SetScanHook(const std::shared_ptr& hook) { scan_hook_ = hook; } + ScannerImpl::~ScannerImpl() { - Exit(); + Exit(); - scan_table_threads_->Stop(true); - transaction_callback_threads_->Stop(false); - observer_threads_->Stop(true); + scan_table_threads_->Stop(true); + transaction_callback_threads_->Stop(false); + observer_threads_->Stop(true); - profiling_thread_.Join(); + profiling_thread_.join(); - MutexLock locker(&table_mutex_); - // close table - for (auto it = table_observe_info_->begin(); it != table_observe_info_->end(); ++it) { - if (it->second.table) { - delete it->second.table; - } + MutexLock locker(&table_mutex_); + // close table + for (auto it = table_observe_info_->begin(); it != table_observe_info_->end(); ++it) { + if (it->second.table) { + delete it->second.table; } + } - for (auto it = observers_.begin(); it != observers_.end(); ++it) { - delete *it; - } + for (auto it = observers_.begin(); it != observers_.end(); ++it) { + delete *it; + } } -ErrorCode ScannerImpl::Observe(const std::string& table_name, - const std::string& column_family, - const std::string& qualifier, - Observer* observer) { - // Observe before init - tera::ErrorCode err; - if (!tera_client_) { - LOG(ERROR) << "Init scanner first!"; - err.SetFailed(ErrorCode::kSystem, "observe before scanner init"); - return err; - } +ErrorCode ScannerImpl::Observe(const std::string& table_name, const std::string& column_family, + const std::string& qualifier, Observer* observer) { + // Observe before init + tera::ErrorCode err; + if (!tera_client_) { + LOG(ERROR) << "Init scanner first!"; + err.SetFailed(ErrorCode::kSystem, "observe before scanner init"); + return err; + } - Column column = {table_name, column_family, qualifier}; + Column column = {table_name, column_family, qualifier}; - { - - MutexLock locker(&table_mutex_); - if (!table_observe_info_.unique()) { - // Shared_ptr construct a new copy from the original one. - // Former requests still reading the original shared_ptr - // Write operation executed on the new copy, so as the later requests - table_observe_info_.reset(new std::map(*table_observe_info_)); - } - - if (!(*table_observe_info_)[table_name].table) { - // init table - tera::Table* table = tera_client_->OpenTable(table_name, &err); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "open tera table [" << table_name - << "] failed, " << err.ToString(); - return err; - } - LOG(INFO) << "open tera table [" << table_name << "] succ"; - - // build map - (*table_observe_info_)[table_name].table = table; - (*table_observe_info_)[table_name].type = GetTableTransactionType(table); - } + { + MutexLock locker(&table_mutex_); + if (!table_observe_info_.unique()) { + // Shared_ptr construct a new copy from the original one. + // Former requests still reading the original shared_ptr + // Write operation executed on the new copy, so as the later requests + table_observe_info_.reset(new std::map(*table_observe_info_)); + } + + if (!(*table_observe_info_)[table_name].table) { + // init table + tera::Table* table = tera_client_->OpenTable(table_name, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open tera table [" << table_name << "] failed, " << err.ToString(); + return err; + } + LOG(INFO) << "open tera table [" << table_name << "] succ"; - if (!CheckTransactionTypeLegalForTable(observer->GetTransactionType(), - (*table_observe_info_)[table_name].type)) { + // build map + (*table_observe_info_)[table_name].table = table; + (*table_observe_info_)[table_name].type = GetTableTransactionType(table); + } - LOG(ERROR) << "Transaction type does not match table. table_name: " - << table_name << " type: " - << (*table_observe_info_)[table_name].type - << " , observer name: " - << observer->GetObserverName() << " type: " - << observer->GetTransactionType(); + if (!CheckTransactionTypeLegalForTable(observer->GetTransactionType(), + (*table_observe_info_)[table_name].type)) { + LOG(ERROR) << "Transaction type does not match table. table_name: " << table_name + << " type: " << (*table_observe_info_)[table_name].type + << " , observer name: " << observer->GetObserverName() + << " type: " << observer->GetTransactionType(); - err.SetFailed(ErrorCode::kSystem, "Transaction type does not match table"); - return err; - } + err.SetFailed(ErrorCode::kSystem, "Transaction type does not match table"); + return err; + } - auto it = (*table_observe_info_)[table_name].observe_columns[column].insert(observer); - if (!it.second) { - LOG(ERROR) << "Observer " << observer->GetObserverName() - << " observe " << table_name << ":" - << column_family << ":" << qualifier - << " more than once!"; - err.SetFailed(ErrorCode::kSystem, - "the same observer observe the same column more than once"); - return err; - } - observers_.insert(observer); + auto it = (*table_observe_info_)[table_name].observe_columns[column].insert(observer); + if (!it.second) { + LOG(ERROR) << "Observer " << observer->GetObserverName() << " observe " << table_name << ":" + << column_family << ":" << qualifier << " more than once!"; + err.SetFailed(ErrorCode::kSystem, "the same observer observe the same column more than once"); + return err; } + observers_.insert(observer); + } - err = key_selector_->Observe(table_name); - LOG(INFO) << "Observer start. table: " << table_name - << " cf:qu " << column_family << ":" - << qualifier << " observer: " - << observer->GetObserverName(); + err = key_selector_->Observe(table_name); + LOG(INFO) << "Observer start. table: " << table_name << " cf:qu " << column_family << ":" + << qualifier << " observer: " << observer->GetObserverName(); - return err; + return err; } bool ScannerImpl::Init() { - tera::ErrorCode err; - if (!tera_client_) { - tera_client_.reset(tera::Client::NewClient(FLAGS_flagfile, &err)); - - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "init tera client [" << FLAGS_flagfile - << "] failed, " << err.ToString(); - return false; - } + tera::ErrorCode err; + if (!tera_client_) { + tera_client_.reset(tera::Client::NewClient(FLAGS_flagfile, &err)); + + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "init tera client [" << FLAGS_flagfile << "] failed, " << err.ToString(); + return false; } + } - // init key_selector_ - // different selector started by different flags + // init key_selector_ + // different selector started by different flags + if (options_.strategy == ScanStrategy::kRandom) { + LOG(INFO) << "random key"; key_selector_.reset(new RandomKeySelector()); + } else if (options_.strategy == ScanStrategy::kTabletBucket) { + LOG(INFO) << "tablet bucket key"; + key_selector_.reset(new TabletBucketKeySelector(options_.bucket_id, options_.bucket_cnt)); + } - return true; + return true; } bool ScannerImpl::Start() { - for (int32_t idx = 0; idx < FLAGS_observer_scanner_thread_num; ++idx) { - scan_table_threads_->AddTask(std::bind(&ScannerImpl::ScanTable, this)); - } - return true; + for (int32_t idx = 0; idx < FLAGS_observer_scanner_thread_num; ++idx) { + scan_table_threads_->AddTask(std::bind(&ScannerImpl::ScanTable, this)); + } + return true; } -void ScannerImpl::Exit() { - quit_ = true; -} +void ScannerImpl::Exit() { quit_ = true; } -tera::Client* ScannerImpl::GetTeraClient() const { - return tera_client_.get(); -} +tera::Client* ScannerImpl::GetTeraClient() const { return tera_client_.get(); } void ScannerImpl::ScanTable() { - std::string start_key; - std::string table_name; - std::set observe_columns; - tera::Table* table = NULL; - - // table and start key will be refreshed. - while (!quit_) { - if (key_selector_->SelectStart(&table_name, &start_key)) { - GetObserveColumns(table_name, &observe_columns); - table = GetTable(table_name); - if (DoScanTable(table, observe_columns, start_key, "")) { - DoScanTable(table, observe_columns, "", start_key); - } + std::string start_key; + std::string end_key; + std::string table_name; + std::set observe_columns; + tera::Table* table = nullptr; + ScanHook::Columns filter_columns; + + // table and start key will be refreshed. + while (!quit_) { + // when random select strategy this scanner will scan all range of + // table, but tablet bucket strategy will scan range [start_key, end_key) + // again and again + + if (key_selector_->SelectRange(&table_name, &start_key, &end_key)) { + LOG(INFO) << "table_name=" << table_name << " start_key=[" << start_key << "] end_key=[" + << end_key << "]"; + GetObserveColumns(table_name, &observe_columns); + for (const auto& col : observe_columns) { + filter_columns.insert({col.family, col.qualifier}); + } + table = GetTable(table_name); + BeforeScanTable(table_name, filter_columns); + bool scan_ret = DoScanTable(table, observe_columns, start_key, end_key); + AfterScanTable(table_name, filter_columns, scan_ret); + if (scan_ret) { + if (options_.strategy == ScanStrategy::kRandom) { + BeforeScanTable(table_name, filter_columns); + scan_ret = DoScanTable(table, observe_columns, end_key, start_key); + AfterScanTable(table_name, filter_columns, scan_ret); + } else if (options_.strategy == ScanStrategy::kTabletBucket) { + BeforeScanTable(table_name, filter_columns); + scan_ret = DoScanTable(table, observe_columns, start_key, end_key); + AfterScanTable(table_name, filter_columns, scan_ret); + } else { + abort(); } + } } + } } -bool ScannerImpl::DoScanTable(tera::Table* table, - const std::set& observe_columns, - const std::string& start_key, - const std::string& end_key) { - if (table == NULL) { - return false; - } - LOG(INFO) << "Start scan table. Table name: [" << table->GetName() - << "]. Start key: [" << start_key << "]"; +void ScannerImpl::BeforeScanTable(const std::string& table_name, const ScanHook::Columns& columns) { + if (scan_hook_) { + scan_hook_->Before(table_name, columns); + } +} - tera::ScanDescriptor desc(start_key); - desc.SetEnd(end_key); +void ScannerImpl::AfterScanTable(const std::string& table_name, const ScanHook::Columns& columns, + bool scan_ret) { + if (scan_hook_) { + scan_hook_->After(table_name, columns, scan_ret); + } +} - // Notify stores in single lg - desc.AddColumnFamily(kNotifyColumnFamily); - tera::ErrorCode err; - std::unique_ptr result_stream(table->Scan(desc, &err)); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "table scan failed, " << err.ToString(); - return false; - } +bool ScannerImpl::DoScanTable(tera::Table* table, const std::set& observe_columns, + const std::string& start_key, const std::string& end_key) { + if (table == nullptr) { + LOG(ERROR) << "table not opened or closed"; + return false; + } + LOG(INFO) << "Start scan table. Table name: [" << table->GetName() << "]. Start key: [" + << start_key << "]"; + + tera::ScanDescriptor desc(start_key); + desc.SetEnd(end_key); + + // Notify stores in single lg + desc.AddColumnFamily(kNotifyColumnFamily); + tera::ErrorCode err; + std::unique_ptr result_stream(table->Scan(desc, &err)); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "table scan failed, " << err.ToString(); + return false; + } - if (result_stream->Done(&err)) { - return !quit_; - } + if (result_stream->Done(&err)) { + return !quit_; + } - bool finished = false; - while (true) { - std::string rowkey; - std::vector notify_columns; - if (!NextRow(result_stream.get(), table->GetName(), &finished, &rowkey, ¬ify_columns)) { - return finished; - } + bool finished = false; + while (true) { + std::string rowkey; + std::vector notify_columns; + if (!NextRow(result_stream.get(), table->GetName(), &finished, &rowkey, ¬ify_columns)) { + return finished; + } - if (!TryLockRow(table->GetName(), rowkey)) { - // collision - LOG(INFO) <<"[rowlock failed] table=" << table->GetName() << " row=" << rowkey; - return false; - } - VLOG(12) <<"[time] read value start. [row] " << rowkey; + if (!TryLockRow(table->GetName(), rowkey)) { + // collision + LOG(INFO) << "[rowlock failed] table=" << table->GetName() << " row=" << rowkey; + return false; + } + VLOG(12) << "[time] read value start. [row] " << rowkey; - std::shared_ptr unlocker(new AutoRowUnlocker(table->GetName(), rowkey)); - std::vector> notify_cells; - PrepareNotifyCell(table, rowkey, observe_columns, notify_columns, unlocker, ¬ify_cells); + std::shared_ptr unlocker(new AutoRowUnlocker(table->GetName(), rowkey)); + std::vector> notify_cells; + PrepareNotifyCell(table, rowkey, observe_columns, notify_columns, unlocker, ¬ify_cells); - for (uint32_t i = 0; i < notify_cells.size(); ++i) { - AsyncReadCell(notify_cells[i]); - } + for (uint32_t i = 0; i < notify_cells.size(); ++i) { + AsyncReadCell(notify_cells[i]); } - return true; + } + return true; } -void ScannerImpl::PrepareNotifyCell(tera::Table* table, - const std::string& rowkey, +void ScannerImpl::PrepareNotifyCell(tera::Table* table, const std::string& rowkey, const std::set& observe_columns, - const std::vector& notify_columns, + const std::vector& notify_columns, std::shared_ptr unlocker, std::vector>* notify_cells) { - std::shared_ptr> table_observe_info_read_copy; - { - MutexLock locker(&table_mutex_); - // shared_ptr ref +1 - table_observe_info_read_copy = table_observe_info_; - } - - for (auto notify_column = notify_columns.begin(); notify_column != notify_columns.end(); ++notify_column) { - if (observe_columns.find(*notify_column) == observe_columns.end()) { - LOG(WARNING) << "miss observed column, table_name" << table->GetName() - << " cf=" << notify_column->family << " qu=" << notify_column->qualifier; + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + // shared_ptr ref +1 + table_observe_info_read_copy = table_observe_info_; + } + + for (auto notify_column = notify_columns.begin(); notify_column != notify_columns.end(); + ++notify_column) { + if (observe_columns.find(*notify_column) == observe_columns.end()) { + LOG(WARNING) << "miss observed column, table_name" << table->GetName() + << " cf=" << notify_column->family << " qu=" << notify_column->qualifier; + continue; + } + std::map>& observe_columns = + (*table_observe_info_read_copy)[table->GetName()].observe_columns; + + TransactionType type = (*table_observe_info_read_copy)[table->GetName()].type; + + for (auto observer = observe_columns[*notify_column].begin(); + observer != observe_columns[*notify_column].end(); ++observer) { + semaphore_.Acquire(); + std::shared_ptr notify_cell(new NotifyCell(semaphore_)); + switch (type) { + case kGlobalTransaction: + notify_cell->notify_transaction.reset(tera_client_->NewGlobalTransaction()); + if (!notify_cell->notify_transaction) { + LOG(ERROR) << "NewGlobalTransaction failed. Notify cell ignored. table: " + << table->GetName() << " row: " << rowkey + << " family: " << notify_column->family + << " qualifier: " << notify_column->qualifier; continue; - } - std::map>& observe_columns = - (*table_observe_info_read_copy)[table->GetName()].observe_columns; - - TransactionType type = (*table_observe_info_read_copy)[table->GetName()].type; - - for (auto observer = observe_columns[*notify_column].begin(); - observer != observe_columns[*notify_column].end(); ++observer) { - semaphore_.Acquire(); - std::shared_ptr notify_cell(new NotifyCell(semaphore_)); - switch (type) { - case kGlobalTransaction: - notify_cell->notify_transaction.reset(tera_client_->NewGlobalTransaction()); - if (!notify_cell->notify_transaction) { - LOG(ERROR) << "NewGlobalTransaction failed. Notify cell ignored. table: " - << table->GetName() << " row: " << rowkey - << " family: " << notify_column->family - << " qualifier: " << notify_column->qualifier; - continue; - } - break; - case kSingleRowTransaction: - notify_cell->notify_transaction.reset(table->StartRowTransaction(rowkey)); - if (!notify_cell->notify_transaction) { - LOG(ERROR) << "StartRowTransaction failed. Notify cell ignored. table: " - << table->GetName() << " row: " - << rowkey << " family: " << notify_column->family - << " qualifier: " << notify_column->qualifier; - continue; - } - break; - default: - break; - } - - notify_cell->table = table; - notify_cell->row = rowkey; - notify_cell->observed_column = *notify_column; - notify_cell->unlocker = unlocker; - notify_cell->observer = *observer; - notify_cells->push_back(notify_cell); - } - } + } + break; + case kSingleRowTransaction: + notify_cell->notify_transaction.reset(table->StartRowTransaction(rowkey)); + if (!notify_cell->notify_transaction) { + LOG(ERROR) << "StartRowTransaction failed. Notify cell ignored. table: " + << table->GetName() << " row: " << rowkey + << " family: " << notify_column->family + << " qualifier: " << notify_column->qualifier; + continue; + } + break; + default: + break; + } + + notify_cell->table = table; + notify_cell->row = rowkey; + notify_cell->observed_column = *notify_column; + notify_cell->unlocker = unlocker; + notify_cell->observer = *observer; + notify_cells->push_back(notify_cell); + } + } } -bool ScannerImpl::NextRow(tera::ResultStream* result_stream, - const std::string& table_name, bool* finished, - std::string* row, std::vector* notify_columns) { - tera::ErrorCode err; +bool ScannerImpl::NextRow(tera::ResultStream* result_stream, const std::string& table_name, + bool* finished, std::string* row, std::vector* notify_columns) { + tera::ErrorCode err; - // check finish - if (result_stream->Done(&err)) { - *finished = true; - return false; - } + // check finish + if (result_stream->Done(&err)) { + *finished = true; + return false; + } - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "scanning failed" << err.ToString(); - return false; - } + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "scanning failed" << err.ToString(); + return false; + } - notify_columns->clear(); - *row = result_stream->RowName(); + notify_columns->clear(); + *row = result_stream->RowName(); - // scan cell - while (!result_stream->Done(&err) && result_stream->RowName() == *row) { - std::string observe_cf; - std::string observe_qu; - if (quit_) { - return false; - } + // scan cell + while (!result_stream->Done(&err) && result_stream->RowName() == *row) { + std::string observe_cf; + std::string observe_qu; + if (quit_) { + return false; + } - if (!ParseNotifyQualifier(result_stream->Qualifier(), &observe_cf, &observe_qu)) { - LOG(WARNING) << "parse notify qualifier failed: " << result_stream->Qualifier(); - result_stream->Next(); - continue; - } + if (!ParseNotifyQualifier(result_stream->Qualifier(), &observe_cf, &observe_qu)) { + LOG(WARNING) << "parse notify qualifier failed: " << result_stream->Qualifier(); + result_stream->Next(); + continue; + } - Column notify_column = {table_name, observe_cf, observe_qu}; + Column notify_column = {table_name, observe_cf, observe_qu}; - notify_columns->push_back(notify_column); - result_stream->Next(); - } - return true; + notify_columns->push_back(notify_column); + result_stream->Next(); + } + return true; } // example qualifier: C:url // C: cf; column: url; bool ScannerImpl::ParseNotifyQualifier(const std::string& notify_qualifier, - std::string* data_family, - std::string* data_qualifier) { - - std::vector frags; - std::size_t pos = std::string::npos; - std::size_t start_pos = 0; - std::string frag; - - // parse cf - pos = notify_qualifier.find_first_of(':', start_pos); - if (pos == std::string::npos) { - LOG(ERROR) << "Parse notify qualifier error: " << notify_qualifier; - return false; - } - frag = notify_qualifier.substr(start_pos, pos - start_pos); - frags.push_back(frag); - start_pos = pos + 1; - - pos = notify_qualifier.size(); - frag = notify_qualifier.substr(start_pos, pos - start_pos); - frags.push_back(frag); - if (2 != frags.size()) { - return false; - } - if (frags[0] == "" || frags[1] == "") { - return false; - } - *data_family = frags[0]; - *data_qualifier = frags[1]; + std::string* data_family, std::string* data_qualifier) { + std::vector frags; + std::size_t pos = std::string::npos; + std::size_t start_pos = 0; + std::string frag; + + // parse cf + pos = notify_qualifier.find_first_of(':', start_pos); + if (pos == std::string::npos) { + LOG(ERROR) << "Parse notify qualifier error: " << notify_qualifier; + return false; + } + frag = notify_qualifier.substr(start_pos, pos - start_pos); + frags.push_back(frag); + start_pos = pos + 1; + + pos = notify_qualifier.size(); + frag = notify_qualifier.substr(start_pos, pos - start_pos); + frags.push_back(frag); + if (2 != frags.size()) { + return false; + } + if (frags[0] == "" || frags[1] == "") { + return false; + } + *data_family = frags[0]; + *data_qualifier = frags[1]; - return true; + return true; } void ScannerImpl::AsyncReadCell(std::shared_ptr notify_cell) { - VLOG(12) <<"[time] do read value start. [row] " - << notify_cell->row << " cf:qu " << notify_cell->observed_column.family - << ":" << notify_cell->observed_column.qualifier; - tera::RowReader* value_reader = - notify_cell->table->NewRowReader(notify_cell->row); - assert(value_reader != NULL); - value_reader->AddColumn(notify_cell->observed_column.family, + VLOG(12) << "[time] do read value start. [row] " << notify_cell->row << " cf:qu " + << notify_cell->observed_column.family << ":" << notify_cell->observed_column.qualifier; + tera::RowReader* value_reader = notify_cell->table->NewRowReader(notify_cell->row); + assert(value_reader != NULL); + value_reader->AddColumn(notify_cell->observed_column.family, notify_cell->observed_column.qualifier); - // transaction read - NotificationContext* context = new NotificationContext(); - context->notify_cell = notify_cell; - context->scanner_impl = this; - - value_reader->SetContext(context); - value_reader->SetCallBack([] (RowReader* value_reader) { - NotificationContext* context = (NotificationContext*)(value_reader->GetContext()); - if (!context->scanner_impl->quit_) { - context->scanner_impl->transaction_callback_threads_-> - AddTask(std::bind(&ScannerImpl::ValidateCellValue, - context->scanner_impl, - value_reader)); - } else { - // call auto unlocker - delete context; - context = NULL; - delete value_reader; - } - }); - if (notify_cell->notify_transaction.get()) { - notify_cell->notify_transaction->Get(value_reader); + // transaction read + NotificationContext* context = new NotificationContext(); + context->notify_cell = notify_cell; + context->scanner_impl = this; + + value_reader->SetContext(context); + value_reader->SetCallBack([](RowReader* value_reader) { + NotificationContext* context = (NotificationContext*)(value_reader->GetContext()); + if (!context->scanner_impl->quit_) { + context->scanner_impl->transaction_callback_threads_->AddTask( + std::bind(&ScannerImpl::ValidateCellValue, context->scanner_impl, value_reader)); } else { - notify_cell->table->Get(value_reader); - } + // call auto unlocker + delete context; + context = NULL; + delete value_reader; + } + }); + if (notify_cell->notify_transaction.get()) { + notify_cell->notify_transaction->Get(value_reader); + } else { + notify_cell->table->Get(value_reader); + } } void ScannerImpl::GetObserveColumns(const std::string& table_name, std::set* observe_columns) { - observe_columns->clear(); + observe_columns->clear(); - std::shared_ptr> table_observe_info_read_copy; - { - - MutexLock locker(&table_mutex_); - // shared_ptr ref +1 - table_observe_info_read_copy = table_observe_info_; - } + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + // shared_ptr ref +1 + table_observe_info_read_copy = table_observe_info_; + } - for (auto it : (*table_observe_info_read_copy)[table_name].observe_columns) { - observe_columns->insert(it.first); - } + for (auto it : (*table_observe_info_read_copy)[table_name].observe_columns) { + observe_columns->insert(it.first); + } } tera::Table* ScannerImpl::GetTable(const std::string table_name) { - std::shared_ptr> table_observe_info_read_copy; - { - - MutexLock locker(&table_mutex_); - table_observe_info_read_copy = table_observe_info_; - } - return (*table_observe_info_read_copy)[table_name].table; + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + table_observe_info_read_copy = table_observe_info_; + } + return (*table_observe_info_read_copy)[table_name].table; } void ScannerImpl::Profiling() { - while (!quit_) { - LOG(INFO) << "[Observer Profiling Info] total: " - << total_counter_.Get() << " failed: " - << fail_counter_.Get() - << " transaction pending: " - << observer_threads_->PendingNum(); - ThisThread::Sleep(1000); - total_counter_.Clear(); - fail_counter_.Clear(); - } + while (!quit_) { + LOG(INFO) << "[Observer Profiling Info] total: " << total_counter_.Get() + << " failed: " << fail_counter_.Get() + << " transaction pending: " << observer_threads_->PendingNum(); + ThisThread::Sleep(1000); + total_counter_.Clear(); + fail_counter_.Clear(); + } } void ScannerImpl::AsyncReadAck(std::shared_ptr notify_cell) { - VLOG(12) <<"[time] Check ACK start. [cf:qu] " - << notify_cell->observed_column.family - << notify_cell->observed_column.qualifier; - - const std::string& ack_qualifier_prefix = - GetAckQualifierPrefix(notify_cell->observed_column.family, - notify_cell->observed_column.qualifier); - - // use transaction to read column Ack - std::shared_ptr row_transaction( - notify_cell->table->StartRowTransaction(notify_cell->row)); - NotificationContext* context = new NotificationContext(); - - // read Acks - tera::RowReader* row_reader = notify_cell->table->NewRowReader(notify_cell->row); - - const std::string& ack_qualifier = GetAckQualifier(ack_qualifier_prefix, - notify_cell->observer->GetObserverName()); - context->ack_qualifier = ack_qualifier; - - row_reader->AddColumn(notify_cell->observed_column.family, ack_qualifier); - - context->notify_cell = notify_cell; - context->scanner_impl = this; - context->ack_transaction = row_transaction; - row_reader->SetContext(context); - row_reader->SetCallBack([] (RowReader* ack_reader) { - NotificationContext* context = (NotificationContext*)(ack_reader->GetContext()); - if (!context->scanner_impl->quit_) { - context->scanner_impl->transaction_callback_threads_->AddTask( - std::bind(&ScannerImpl::ValidateAckConfict, - context->scanner_impl, - ack_reader)); - } else { - // call auto unlocker - delete context; - context = NULL; - delete ack_reader; - } - }); + VLOG(12) << "[time] Check ACK start. [cf:qu] " << notify_cell->observed_column.family + << notify_cell->observed_column.qualifier; + + const std::string& ack_qualifier_prefix = GetAckQualifierPrefix( + notify_cell->observed_column.family, notify_cell->observed_column.qualifier); + + // use transaction to read column Ack + std::shared_ptr row_transaction( + notify_cell->table->StartRowTransaction(notify_cell->row)); + NotificationContext* context = new NotificationContext(); + + // read Acks + tera::RowReader* row_reader = notify_cell->table->NewRowReader(notify_cell->row); - row_transaction->Get(row_reader); + const std::string& ack_qualifier = + GetAckQualifier(ack_qualifier_prefix, notify_cell->observer->GetObserverName()); + context->ack_qualifier = ack_qualifier; + + row_reader->AddColumn(notify_cell->observed_column.family, ack_qualifier); + + context->notify_cell = notify_cell; + context->scanner_impl = this; + context->ack_transaction = row_transaction; + row_reader->SetContext(context); + row_reader->SetCallBack([](RowReader* ack_reader) { + NotificationContext* context = (NotificationContext*)(ack_reader->GetContext()); + if (!context->scanner_impl->quit_) { + context->scanner_impl->transaction_callback_threads_->AddTask( + std::bind(&ScannerImpl::ValidateAckConfict, context->scanner_impl, ack_reader)); + } else { + // call auto unlocker + delete context; + context = NULL; + delete ack_reader; + } + }); + + row_transaction->Get(row_reader); } -std::string ScannerImpl::GetAckQualifierPrefix( - const std::string& family, - const std::string& qualifier) const { - return family + ":" + qualifier; +std::string ScannerImpl::GetAckQualifierPrefix(const std::string& family, + const std::string& qualifier) const { + return family + ":" + qualifier; } std::string ScannerImpl::GetAckQualifier(const std::string& prefix, const std::string& observer_name) const { - return prefix + "+ack_" + observer_name; + return prefix + "+ack_" + observer_name; } -bool ScannerImpl::TryLockRow(const std::string& table_name, - const std::string& row) const { - VLOG(12) << "[time] trylock wait " << table_name << " " << row; - - RowlockRequest request; - RowlockResponse response; +bool ScannerImpl::TryLockRow(const std::string& table_name, const std::string& row) const { + VLOG(12) << "[time] trylock wait " << table_name << " " << row; - std::shared_ptr rowlock_client; + RowlockRequest request; + RowlockResponse response; - if (FLAGS_mock_rowlock_enable == true) { - rowlock_client.reset(new FakeRowlockClient()); - } else { - rowlock_client.reset(new RowlockClient()); - } + std::shared_ptr rowlock_client; - request.set_table_name(table_name); - request.set_row(row); + if (FLAGS_mock_rowlock_enable == true) { + rowlock_client.reset(new FakeRowlockClient()); + } else { + rowlock_client.reset(new RowlockClient()); + } - VLOG(12) << "[time] trylock " << table_name << " " << row; - if (!rowlock_client->TryLock(&request, &response)) { - LOG(ERROR) << "TryLock rpc fail, row: " << row; - return false; - } + request.set_table_name(table_name); + request.set_row(row); - if (response.lock_status() != kLockSucc) { - LOG(INFO) << "Lock row fail, row: " << request.row(); - return false; - } - VLOG(12) << "[time] lock success " << request.table_name() - << " " << request.row(); + VLOG(12) << "[time] trylock " << table_name << " " << row; + if (!rowlock_client->TryLock(&request, &response)) { + LOG(ERROR) << "TryLock rpc fail, row: " << row; + return false; + } - return true; + if (response.lock_status() != kLockSucc) { + LOG(INFO) << "Lock row fail, row: " << request.row(); + return false; + } + VLOG(12) << "[time] lock success " << request.table_name() << " " << request.row(); + + return true; } bool ScannerImpl::CheckTransactionTypeLegalForTable(TransactionType transaction_type, TransactionType table_type) { - if (transaction_type == table_type) { - return true; - } + if (transaction_type == table_type) { + return true; + } - if (transaction_type == kNoneTransaction && table_type == kSingleRowTransaction) { - return true; - } + if (transaction_type == kNoneTransaction && table_type == kSingleRowTransaction) { + return true; + } - return false; + return false; } TransactionType ScannerImpl::GetTableTransactionType(tera::Table* table) { - tera::ErrorCode err; - std::shared_ptr
table_ptr; - table_ptr.reset(tera_client_->OpenTable(table->GetName(), &err)); - std::shared_ptr table_impl(static_cast(table_ptr.get())->GetTableImpl()); - TableSchema schema = table_impl->GetTableSchema(); - - if (IsTransactionTable(schema)) { - std::set gtxn_cfs; - FindGlobalTransactionCfs(schema, >xn_cfs); - if (gtxn_cfs.size() > 0) { - return kGlobalTransaction; - } - return kSingleRowTransaction; - } - return kNoneTransaction; + tera::ErrorCode err; + std::shared_ptr
table_ptr; + table_ptr.reset(tera_client_->OpenTable(table->GetName(), &err)); + std::shared_ptr table_impl( + static_cast(table_ptr.get())->GetTableImpl()); + TableSchema schema = table_impl->GetTableSchema(); + + if (IsTransactionTable(schema)) { + std::set gtxn_cfs; + FindGlobalTransactionCfs(schema, >xn_cfs); + if (gtxn_cfs.size() > 0) { + return kGlobalTransaction; + } + return kSingleRowTransaction; + } + return kNoneTransaction; } void ScannerImpl::ValidateCellValue(RowReader* value_reader) { - std::unique_ptr context((NotificationContext*)(value_reader->GetContext())); - std::shared_ptr notify_cell = context->notify_cell; - VLOG(12) <<"[time] do read value finish. [row] " << notify_cell->row; + std::unique_ptr context((NotificationContext*)(value_reader->GetContext())); + std::shared_ptr notify_cell = context->notify_cell; + VLOG(12) << "[time] do read value finish. [row] " << notify_cell->row; - std::unique_ptr cell_reader(value_reader); + std::unique_ptr cell_reader(value_reader); - if (cell_reader->Done()) { - LOG(WARNING) << "No read value, row: " << notify_cell->row; - return; - } + if (cell_reader->Done()) { + LOG(WARNING) << "No read value, row: " << notify_cell->row; + return; + } - if (tera::ErrorCode::kOK == cell_reader->GetError().GetType()) { - notify_cell->value = cell_reader->Value(); - notify_cell->timestamp = cell_reader->Timestamp(); + if (tera::ErrorCode::kOK == cell_reader->GetError().GetType()) { + notify_cell->value = cell_reader->Value(); + notify_cell->timestamp = cell_reader->Timestamp(); - std::shared_ptr> table_observe_info_read_copy; - { - MutexLock locker(&table_mutex_); - table_observe_info_read_copy = table_observe_info_; - } + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + table_observe_info_read_copy = table_observe_info_; + } - auto it = table_observe_info_read_copy->find(notify_cell->observed_column.table_name); - if (it == table_observe_info_read_copy->end()) { - LOG(WARNING) << "table not found: " << notify_cell->observed_column.table_name; - return; - } + auto it = table_observe_info_read_copy->find(notify_cell->observed_column.table_name); + if (it == table_observe_info_read_copy->end()) { + LOG(WARNING) << "table not found: " << notify_cell->observed_column.table_name; + return; + } - if (it->second.observe_columns.find(notify_cell->observed_column) - == it->second.observe_columns.end()) { - LOG(WARNING) << "column not found. cf: " - << notify_cell->observed_column.family - << " qu: " << notify_cell->observed_column.qualifier; - return; - } + if (it->second.observe_columns.find(notify_cell->observed_column) == + it->second.observe_columns.end()) { + LOG(WARNING) << "column not found. cf: " << notify_cell->observed_column.family + << " qu: " << notify_cell->observed_column.qualifier; + return; + } - if (it->second.observe_columns[notify_cell->observed_column].size() == 0) { - LOG(WARNING) << "no match observers, table=" - << notify_cell->observed_column.table_name - <<" cf=" << notify_cell->observed_column.family - << " qu=" << notify_cell->observed_column.qualifier; - return; - } + if (it->second.observe_columns[notify_cell->observed_column].size() == 0) { + LOG(WARNING) << "no match observers, table=" << notify_cell->observed_column.table_name + << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier; + return; + } - if (notify_cell->observer->GetTransactionType() != kGlobalTransaction) { - ObserveCell(notify_cell); - } else { - AsyncReadAck(notify_cell); - } + if (notify_cell->observer->GetTransactionType() != kGlobalTransaction) { + ObserveCell(notify_cell); } else { - LOG(WARNING) << "[read failed] table=" << notify_cell->table->GetName() - << " cf=" << notify_cell->observed_column.family - << " qu=" << notify_cell->observed_column.qualifier - << " row=" << notify_cell->row << " err=" - << cell_reader->GetError().GetType() - << cell_reader->GetError().GetReason(); - return; - } + AsyncReadAck(notify_cell); + } + } else { + LOG(WARNING) << "[read failed] table=" << notify_cell->table->GetName() + << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier << " row=" << notify_cell->row + << " err=" << cell_reader->GetError().GetType() + << cell_reader->GetError().GetReason(); + return; + } } void ScannerImpl::ObserveCell(std::shared_ptr notify_cell) { - observer_threads_->AddTask( [=] (int64_t) { - Notification* notification = GetNotification(notify_cell); - notify_cell->observer->OnNotify(notify_cell->notify_transaction.get(), tera_client_.get(), - notify_cell->observed_column.table_name, - notify_cell->observed_column.family, - notify_cell->observed_column.qualifier, - notify_cell->row, notify_cell->value, - notify_cell->timestamp, notification); - total_counter_.Inc(); - }); + observer_threads_->AddTask([=](int64_t) { + Notification* notification = GetNotification(notify_cell); + notify_cell->observer->OnNotify(notify_cell->notify_transaction.get(), tera_client_.get(), + notify_cell->observed_column.table_name, + notify_cell->observed_column.family, + notify_cell->observed_column.qualifier, notify_cell->row, + notify_cell->value, notify_cell->timestamp, notification); + total_counter_.Inc(); + }); } - void ScannerImpl::ValidateAckConfict(RowReader* ack_reader) { - NotificationContext* context = (NotificationContext*)(ack_reader->GetContext()); - std::shared_ptr notify_cell = context->notify_cell; - std::unique_ptr ack_row_reader(ack_reader); - - bool is_collision = false; - - if (tera::ErrorCode::kOK == ack_row_reader->GetError().GetType()) { - while (!ack_reader->Done()) { - int64_t latest_observer_start_ts = 0; - if (!StringToNumber(ack_row_reader->Value(), &latest_observer_start_ts)) { - LOG(INFO) << "Convert string to timestamp failed! String: " - << ack_row_reader->Value() << " row=" - << notify_cell->row << " cf=" - << notify_cell->observed_column.family << " qu=" - << notify_cell->observed_column.qualifier; - is_collision = true; - break; - } - - // collision check ack ts later than notify ts - if (latest_observer_start_ts >= notify_cell->timestamp && - notify_cell->notify_transaction->GetStartTimestamp() - latest_observer_start_ts - < FLAGS_observer_ack_conflict_timeout) { - // time too short, collisision, ignore - is_collision = true; - LOG(INFO) << "own collision. row=" << notify_cell->row - << " cf=" << notify_cell->observed_column.family - << " qu=" << notify_cell->observed_column.qualifier - << ", latest observer start_ts=" - << latest_observer_start_ts - << ", observer start_ts=" - << notify_cell->notify_transaction->GetStartTimestamp() - << ", data commit_ts=" << notify_cell->timestamp; - break; - - } - ack_row_reader->Next(); - } - } else { - LOG(INFO) << "read Acks failed, err=" - << ack_row_reader->GetError().GetReason() << " row=" - << notify_cell->row << " cf=" - << notify_cell->observed_column.family << " qu=" - << notify_cell->observed_column.qualifier; - } - - if (!is_collision) { - context->scanner_impl->SetAckVersion(context); - } else { - delete context; - context = NULL; - } + NotificationContext* context = (NotificationContext*)(ack_reader->GetContext()); + std::shared_ptr notify_cell = context->notify_cell; + std::unique_ptr ack_row_reader(ack_reader); + + bool is_collision = false; + + if (tera::ErrorCode::kOK == ack_row_reader->GetError().GetType()) { + while (!ack_reader->Done()) { + int64_t latest_observer_start_ts = 0; + if (!StringToNumber(ack_row_reader->Value(), &latest_observer_start_ts)) { + LOG(INFO) << "Convert string to timestamp failed! String: " << ack_row_reader->Value() + << " row=" << notify_cell->row << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier; + is_collision = true; + break; + } + + // collision check ack ts later than notify ts + if (latest_observer_start_ts >= notify_cell->timestamp && + notify_cell->notify_transaction->GetStartTimestamp() - latest_observer_start_ts < + FLAGS_observer_ack_conflict_timeout) { + // time too short, collisision, ignore + is_collision = true; + LOG(INFO) << "own collision. row=" << notify_cell->row + << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier + << ", latest observer start_ts=" << latest_observer_start_ts + << ", observer start_ts=" << notify_cell->notify_transaction->GetStartTimestamp() + << ", data commit_ts=" << notify_cell->timestamp; + break; + } + ack_row_reader->Next(); + } + } else { + LOG(INFO) << "read Acks failed, err=" << ack_row_reader->GetError().GetReason() + << " row=" << notify_cell->row << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier; + } + + if (!is_collision) { + context->scanner_impl->SetAckVersion(context); + } else { + delete context; + context = NULL; + } } void ScannerImpl::SetAckVersion(NotificationContext* ack_context) { - std::shared_ptr row_transaction = ack_context->ack_transaction; - std::shared_ptr notify_cell = ack_context->notify_cell; - // set Acks - std::unique_ptr set_ack_version(notify_cell->table->NewRowMutation(notify_cell->row)); - - set_ack_version->Put(notify_cell->observed_column.family, ack_context->ack_qualifier, - std::to_string(notify_cell->notify_transaction->GetStartTimestamp()), - FLAGS_observer_ack_conflict_timeout); - - row_transaction->SetContext(ack_context); - row_transaction->SetCommitCallback([] (Transaction* ack_transaction) { - NotificationContext* ack_context = (NotificationContext*)(ack_transaction->GetContext()); - if (!ack_context->scanner_impl->quit_) { - ack_context->scanner_impl->transaction_callback_threads_->AddTask( - std::bind(&ScannerImpl::SetAckVersionCallBack, - ack_context->scanner_impl, - ack_transaction)); - } else { - delete ack_context; - ack_context = NULL; - } - }); - row_transaction->ApplyMutation(set_ack_version.get()); - row_transaction->Commit(); + std::shared_ptr row_transaction = ack_context->ack_transaction; + std::shared_ptr notify_cell = ack_context->notify_cell; + // set Acks + std::unique_ptr set_ack_version( + notify_cell->table->NewRowMutation(notify_cell->row)); + + set_ack_version->Put(notify_cell->observed_column.family, ack_context->ack_qualifier, + std::to_string(notify_cell->notify_transaction->GetStartTimestamp()), + FLAGS_observer_ack_conflict_timeout); + + row_transaction->SetContext(ack_context); + row_transaction->SetCommitCallback([](Transaction* ack_transaction) { + NotificationContext* ack_context = (NotificationContext*)(ack_transaction->GetContext()); + if (!ack_context->scanner_impl->quit_) { + ack_context->scanner_impl->transaction_callback_threads_->AddTask(std::bind( + &ScannerImpl::SetAckVersionCallBack, ack_context->scanner_impl, ack_transaction)); + } else { + delete ack_context; + ack_context = NULL; + } + }); + row_transaction->ApplyMutation(set_ack_version.get()); + row_transaction->Commit(); } void ScannerImpl::SetAckVersionCallBack(Transaction* ack_transaction) { - std::unique_ptr ack_context((NotificationContext*)(ack_transaction->GetContext())); - std::shared_ptr notify_cell = ack_context->notify_cell; - - if (ack_transaction->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(INFO) << "write Ack failed, row=" << notify_cell->row - << " err=" << ack_transaction->GetError().GetReason() - << " cf=" << notify_cell->observed_column.family - << " qu=" << notify_cell->observed_column.qualifier; - return; - } - VLOG(12) <<"[time] ACK mutation finish. [cf:qu] " - << notify_cell->observed_column.family - << notify_cell->observed_column.qualifier; - - ObserveCell(notify_cell); + std::unique_ptr ack_context( + (NotificationContext*)(ack_transaction->GetContext())); + std::shared_ptr notify_cell = ack_context->notify_cell; + + if (ack_transaction->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(INFO) << "write Ack failed, row=" << notify_cell->row + << " err=" << ack_transaction->GetError().GetReason() + << " cf=" << notify_cell->observed_column.family + << " qu=" << notify_cell->observed_column.qualifier; + return; + } + VLOG(12) << "[time] ACK mutation finish. [cf:qu] " << notify_cell->observed_column.family + << notify_cell->observed_column.qualifier; + + ObserveCell(notify_cell); } -} // namespace observer -} // namespace tera - +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/scanner_impl.h b/src/observer/executor/scanner_impl.h index d221626b1..cf18f9e5d 100644 --- a/src/observer/executor/scanner_impl.h +++ b/src/observer/executor/scanner_impl.h @@ -1,23 +1,20 @@ // Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - -#ifndef TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ -#define TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ +#pragma once #include -#include +#include #include "common/counter.h" #include "common/mutex.h" #include "common/semaphore.h" #include "common/thread_pool.h" -#include "common/thread.h" #include "common/this_thread.h" #include "common/timer.h" #include "observer/executor/notify_cell.h" -#include "observer/executor/observer.h" -#include "observer/executor/scanner.h" +#include "observer/observer.h" +#include "observer/scanner.h" #include "tera.h" namespace tera { @@ -27,123 +24,122 @@ class Observer; class KeySelector; class ScannerImpl : public Scanner { -private: - struct TableObserveInfo { - std::map> observe_columns; - tera::Table* table; - TransactionType type; - }; - - struct NotificationContext { - std::shared_ptr notify_cell; - ScannerImpl* scanner_impl; - std::string ack_qualifier; - std::shared_ptr ack_transaction; // ValidateAckConfict transaction - int64_t ts; - NotificationContext() { - ts = get_micros(); - VLOG(12) << "NotificationContext create " << ts; - } - ~NotificationContext() { - VLOG(12) << "NotificationContext destory " << ts; - } - }; - -public: - ScannerImpl(); - virtual ~ScannerImpl(); - - virtual ErrorCode Observe(const std::string& table_name, - const std::string& column_family, - const std::string& qualifier, - Observer* observer); - - virtual bool Init(); - - virtual bool Start(); - - virtual void Exit(); - - tera::Client* GetTeraClient() const; - - static ScannerImpl* GetInstance(); - - void ValidateAckConfict(RowReader* ack_reader); - void SetAckVersionCallBack(Transaction* ack_transaction); - -private: - void ScanTable(); - - bool DoScanTable(tera::Table* table, - const std::set& column_set, - const std::string& start_key, - const std::string& end_key); - - void AsyncReadCell(std::shared_ptr notify_cell); - void ValidateCellValue(RowReader* value_reader); - - bool ParseNotifyQualifier(const std::string& notify_qualifier, - std::string* data_family, - std::string* data_qualfier); - - void GetObserveColumns(const std::string& table_name, - std::set* columns); - - tera::Table* GetTable(const std::string table_name); - - bool NextRow(tera::ResultStream* result_stream, - const std::string& table_name, bool* finished, - std::string* row, std::vector* notify_columns); - - void Profiling(); - - void AsyncReadAck(std::shared_ptr notify_cell); - std::string GetAckQualifierPrefix(const std::string& family, const std::string& qualifier) const; - std::string GetAckQualifier(const std::string& prefix, const std::string& observer_name) const; - bool TryLockRow(const std::string& table_name, - const std::string& row) const; - - bool CheckTransactionTypeLegalForTable(TransactionType transaction_type, TransactionType table_type); - TransactionType GetTableTransactionType(tera::Table* table); - - void ObserveCell(std::shared_ptr notify_cell); - - void PrepareNotifyCell(tera::Table* table, - const std::string& rowkey, - const std::set& observe_columns, - const std::vector& notify_columns, - std::shared_ptr unlocker, - std::vector>* notify_cells); - - void SetAckVersion(NotificationContext* ack_context); - -private: - mutable Mutex table_mutex_; - std::unique_ptr tera_client_; - std::unique_ptr key_selector_; - - // map
> - std::shared_ptr> table_observe_info_; - // This set stores unique user-define observer addresses. - // Release user-define observers when scanner destruct - std::set observers_; - - std::unique_ptr scan_table_threads_; - std::unique_ptr observer_threads_; - std::unique_ptr transaction_callback_threads_; - - // for quit - std::atomic quit_; + private: + struct TableObserveInfo { + std::map> observe_columns; + tera::Table* table; + TransactionType type; + }; - common::Thread profiling_thread_; - Counter total_counter_; - Counter fail_counter_; - common::Semaphore semaphore_; + struct NotificationContext { + std::shared_ptr notify_cell; + ScannerImpl* scanner_impl; + std::string ack_qualifier; + std::shared_ptr ack_transaction; // ValidateAckConfict transaction + int64_t ts; + NotificationContext() { + ts = get_micros(); + VLOG(12) << "NotificationContext create " << ts; + } + ~NotificationContext() { VLOG(12) << "NotificationContext destory " << ts; } + }; - static ScannerImpl* scanner_instance_; -}; + public: + virtual ~ScannerImpl(); + + virtual ErrorCode Observe(const std::string& table_name, const std::string& column_family, + const std::string& qualifier, Observer* observer); + + virtual bool Init(); + + virtual bool Start(); + + virtual void Exit(); + + virtual void SetOptions(const ScannerOptions& options); + + virtual void SetScanHook(const std::shared_ptr& hook); + + tera::Client* GetTeraClient() const; + + static ScannerImpl* GetInstance(); + + void ValidateAckConfict(RowReader* ack_reader); + void SetAckVersionCallBack(Transaction* ack_transaction); + + private: + ScannerImpl(); + + void ScanTable(); + + bool DoScanTable(tera::Table* table, const std::set& column_set, + const std::string& start_key, const std::string& end_key); + + void BeforeScanTable(const std::string& table_name, const ScanHook::Columns& columns); + + void AfterScanTable(const std::string& table_name, const ScanHook::Columns& columns, + bool scan_ret); + + void AsyncReadCell(std::shared_ptr notify_cell); -} // namespace observer -} // namespace tera + void ValidateCellValue(RowReader* value_reader); + + bool ParseNotifyQualifier(const std::string& notify_qualifier, std::string* data_family, + std::string* data_qualfier); + + void GetObserveColumns(const std::string& table_name, std::set* columns); + + tera::Table* GetTable(const std::string table_name); + + bool NextRow(tera::ResultStream* result_stream, const std::string& table_name, bool* finished, + std::string* row, std::vector* notify_columns); + + void Profiling(); + + void AsyncReadAck(std::shared_ptr notify_cell); + std::string GetAckQualifierPrefix(const std::string& family, const std::string& qualifier) const; + std::string GetAckQualifier(const std::string& prefix, const std::string& observer_name) const; + bool TryLockRow(const std::string& table_name, const std::string& row) const; + + bool CheckTransactionTypeLegalForTable(TransactionType transaction_type, + TransactionType table_type); + TransactionType GetTableTransactionType(tera::Table* table); + + void ObserveCell(std::shared_ptr notify_cell); + + void PrepareNotifyCell(tera::Table* table, const std::string& rowkey, + const std::set& observe_columns, + const std::vector& notify_columns, + std::shared_ptr unlocker, + std::vector>* notify_cells); + + void SetAckVersion(NotificationContext* ack_context); + + private: + mutable Mutex table_mutex_; + std::unique_ptr tera_client_; + std::unique_ptr key_selector_; + + // map
> + std::shared_ptr> table_observe_info_; + // This set stores unique user-define observer addresses. + // Release user-define observers when scanner destruct + std::set observers_; + + std::unique_ptr scan_table_threads_; + std::unique_ptr observer_threads_; + std::unique_ptr transaction_callback_threads_; + + // for quit + std::atomic quit_; + + std::thread profiling_thread_; + Counter total_counter_; + Counter fail_counter_; + common::Semaphore semaphore_; + ScannerOptions options_; + std::shared_ptr scan_hook_; +}; -#endif // TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/tablet_bucket_key_selector.cc b/src/observer/executor/tablet_bucket_key_selector.cc new file mode 100644 index 000000000..467b1c4b7 --- /dev/null +++ b/src/observer/executor/tablet_bucket_key_selector.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/executor/tablet_bucket_key_selector.h" + +#include + +#include +#include + +#include "types.h" + +DECLARE_string(flagfile); + +namespace tera { +namespace observer { + +TabletBucketKeySelector::TabletBucketKeySelector(int32_t bucket_id, int32_t bucket_cnt) + : tables_(new std::map>), + bucket_id_(bucket_id), + bucket_cnt_(bucket_cnt) { + tera::ErrorCode err; + client_ = tera::Client::NewClient(FLAGS_flagfile, &err); +} + +TabletBucketKeySelector::~TabletBucketKeySelector() { + if (client_ != NULL) { + delete client_; + } +} + +bool TabletBucketKeySelector::SelectRange(std::string* table_name, std::string* start_key, + std::string* end_key) { + LOG(INFO) << "select range"; + srand((unsigned)time(NULL)); + + if (tables_->size() == 0) { + return false; + } + + // random table + uint32_t table_no = rand() % observe_tables_.size(); + *table_name = observe_tables_[table_no]; + + // random key + size_t tablet_num = (*tables_)[*table_name].size(); + if (0 == tablet_num) { + LOG(ERROR) << "No tablet"; + return false; + } + + uint32_t start_tablet_no, end_tablet_no; + int bucket_size = tablet_num / bucket_cnt_; + int remainder = tablet_num % bucket_cnt_; + bucket_size = bucket_id_ < remainder ? bucket_size + 1 : bucket_size; + start_tablet_no = bucket_size * bucket_id_ + (bucket_id_ < remainder ? 0 : remainder); + if (start_tablet_no > tablet_num - 1) { + VLOG(13) << "this bucket_id[ " << bucket_id_ << " ] not cover this bucket." + << " bucket_cnt[ " << bucket_cnt_ << " ]"; + return false; + } + *start_key = (*tables_)[*table_name][start_tablet_no].start_key; + end_tablet_no = start_tablet_no + bucket_size; // can't reach this tablet + if (end_tablet_no >= tablet_num - 1) { + *end_key = ""; + end_tablet_no = tablet_num; + } else { + *end_key = (*tables_)[*table_name][end_tablet_no].start_key; + } + + VLOG(13) << "Select Range=[" << *start_key << " .. " << *end_key << ") TabletRange=[" + << start_tablet_no << ", " << end_tablet_no << "]"; + return true; +} + +ErrorCode TabletBucketKeySelector::Observe(const std::string& table_name) { + LOG(INFO) << "tablet bucket key selector observe"; + tera::ErrorCode err; + + if (tables_->find(table_name) == tables_->end()) { + std::vector tablets; + client_->GetTabletLocation(table_name, &tablets, &err); + LOG(ERROR) << "find tablet count = " << tablets.size(); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe table failed, " << err.ToString(); + return err; + } + observe_tables_.push_back(table_name); + (*tables_)[table_name] = tablets; + } + return err; +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/tablet_bucket_key_selector.h b/src/observer/executor/tablet_bucket_key_selector.h new file mode 100644 index 000000000..82bcb000f --- /dev/null +++ b/src/observer/executor/tablet_bucket_key_selector.h @@ -0,0 +1,37 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include + +#include "common/mutex.h" +#include "observer/executor/key_selector.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class TabletBucketKeySelector : public KeySelector { + public: + TabletBucketKeySelector(int32_t bucket_id, int32_t bucket_cnt); + virtual ~TabletBucketKeySelector(); + + virtual bool SelectRange(std::string* table_name, std::string* start_key, std::string* end_key); + + virtual ErrorCode Observe(const std::string& table_name); + + private: + tera::Client* client_; + std::vector observe_tables_; + std::shared_ptr>> tables_; + int32_t bucket_id_; + int32_t bucket_cnt_; +}; + +} // namespace observer +} // namespace tera diff --git a/src/observer/observer_demo/demo_entry.cc b/src/observer/observer_demo/demo_entry.cc index dff1cc2a1..454c7a773 100644 --- a/src/observer/observer_demo/demo_entry.cc +++ b/src/observer/observer_demo/demo_entry.cc @@ -2,60 +2,56 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "observer/observer_demo/demo_entry.h" +#include -#include "observer/executor/observer.h" -#include "observer/executor/scanner.h" +#include "observer/observer_demo/demo_entry.h" #include "observer/observer_demo/demo_observer.h" #include "tera.h" -#include "gflags/gflags.h" - DEFINE_bool(observer_demo_for_integration, false, "use this demo for test integration"); -std::string GetTeraEntryName() { - return "DemoEntry"; -} +std::string GetTeraEntryName() { return "DemoEntry"; } -tera::TeraEntry* GetTeraEntry() { - return new tera::observer::DemoEntry(); -} +tera::TeraEntry* GetTeraEntry() { return new tera::observer::DemoEntry(); } namespace tera { namespace observer { -DemoEntry::DemoEntry() {} +DemoEntry::DemoEntry() { hook_.reset(new DemoScanHook()); } + +DemoEntry::~DemoEntry() { hook_.reset(); } ErrorCode DemoEntry::Observe() { - ErrorCode err; - Scanner* scanner = GetScanner(); - // new an observer ptr and do not delete it - if (!FLAGS_observer_demo_for_integration) { - Observer* demo = new DemoObserver(); - Observer* parser = new ParseObserver(); - err = scanner->Observe("observer_test_table", "Data", "Page", demo); - if (tera::ErrorCode::kOK != err.GetType()) { - return err; - } - - err = scanner->Observe("observer_test_table", "Data", "Link", demo); - if (tera::ErrorCode::kOK != err.GetType()) { - return err; - } - - err = scanner->Observe("observer_test_table", "Data", "Url", parser); - if (tera::ErrorCode::kOK != err.GetType()) { - return err; - } - } else { - Observer* integration = new IntegrationObserver(); - err = scanner->Observe("observer_test_table", "Data", "qu2", integration); - if (tera::ErrorCode::kOK != err.GetType()) { - return err; - } + ErrorCode err; + Scanner* scanner = GetScanner(); + scanner->SetScanHook(hook_); + // new an observer ptr and do not delete it + if (!FLAGS_observer_demo_for_integration) { + Observer* demo = new DemoObserver(); + Observer* parser = new ParseObserver(); + err = scanner->Observe("observer_test_table", "Data", "Page", demo); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + + err = scanner->Observe("observer_test_table", "Data", "Link", demo); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + + err = scanner->Observe("observer_test_table", "Data", "Url", parser); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + } else { + Observer* integration = new IntegrationObserver(); + err = scanner->Observe("observer_test_table", "Data", "qu2", integration); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; } - return err; + } + return err; } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/observer_demo/demo_entry.h b/src/observer/observer_demo/demo_entry.h index 5f01ec840..4d75111ad 100644 --- a/src/observer/observer_demo/demo_entry.h +++ b/src/observer/observer_demo/demo_entry.h @@ -5,26 +5,45 @@ #ifndef TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ #define TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ +#include #include #include -#include "observer/executor/scanner_entry.h" #include "tera.h" namespace tera { namespace observer { +class DemoScanHook : public ScanHook { + virtual void Before(const std::string& table_name, const ScanHook::Columns& columns) { + std::cout << "demo scan filter before scan : " << table_name << std::endl; + for (const auto& col : columns) { + std::cout << col.first << "\t" << col.second << std::endl; + } + } + + virtual void After(const std::string& table_name, const ScanHook::Columns& columns, + bool scan_ret) { + std::cout << "demo scan filter before scan : " << table_name << " scan_ret :" << scan_ret + << std::endl; + for (const auto& col : columns) { + std::cout << col.first << "\t" << col.second << std::endl; + } + } +}; + class DemoEntry : public ScannerEntry { -public: - DemoEntry(); - virtual ~DemoEntry() {} + public: + DemoEntry(); + virtual ~DemoEntry(); - virtual ErrorCode Observe(); -}; + virtual ErrorCode Observe(); + private: + std::shared_ptr hook_; +}; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ - diff --git a/src/observer/observer_demo/demo_observer.cc b/src/observer/observer_demo/demo_observer.cc index 9212b849b..e5055957b 100644 --- a/src/observer/observer_demo/demo_observer.cc +++ b/src/observer/observer_demo/demo_observer.cc @@ -12,249 +12,215 @@ namespace tera { namespace observer { void CommitCallBack(tera::Transaction* txn) { - IntegrationObserver::TxnContext* ctx - = (IntegrationObserver::TxnContext*)(txn->GetContext()); - if (txn->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << txn->GetError().ToString() << " fail_cnt:" << ++(((IntegrationObserver*)(ctx->observer))->fail_cnt_); - } - LOG(INFO) <<"[time] OnNotify finish. [row] " << ctx->row << " time:" << get_micros() - ctx->begin_time << " done_cnt:" - << ++(((IntegrationObserver*)(ctx->observer))->done_cnt_); - ctx->notification->Done(); - delete ctx; + IntegrationObserver::TxnContext* ctx = (IntegrationObserver::TxnContext*)(txn->GetContext()); + if (txn->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << txn->GetError().ToString() + << " fail_cnt:" << ++(((IntegrationObserver*)(ctx->observer))->fail_cnt_); + } + LOG(INFO) << "[time] OnNotify finish. [row] " << ctx->row + << " time:" << get_micros() - ctx->begin_time + << " done_cnt:" << ++(((IntegrationObserver*)(ctx->observer))->done_cnt_); + ctx->notification->Done(); + delete ctx; } void ReadRowCallBack(tera::RowReader* reader) { - std::unique_ptr reader_ptr(reader); - IntegrationObserver::TxnContext* ctx - = (IntegrationObserver::TxnContext*)(reader_ptr->GetContext()); - if (reader_ptr->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "row:" << ctx->row << " " << reader_ptr->GetError().ToString() - << " fail_cnt:" << ++(((IntegrationObserver*)(ctx->observer))->fail_cnt_); - ctx->notification->Done(); - delete ctx; - return; - } - std::string s1; - std::string s2; - while (!reader_ptr->Done()) { - if (reader_ptr->ColumnName() == "Data:qu0") { - s1 = reader_ptr->Value(); - } - else if (reader_ptr->ColumnName() == "Data:qu1") { - s2 = reader_ptr->Value(); - } - reader_ptr->Next(); + std::unique_ptr reader_ptr(reader); + IntegrationObserver::TxnContext* ctx = + (IntegrationObserver::TxnContext*)(reader_ptr->GetContext()); + if (reader_ptr->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "row:" << ctx->row << " " << reader_ptr->GetError().ToString() + << " fail_cnt:" << ++(((IntegrationObserver*)(ctx->observer))->fail_cnt_); + ctx->notification->Done(); + delete ctx; + return; + } + std::string s1; + std::string s2; + while (!reader_ptr->Done()) { + if (reader_ptr->ColumnName() == "Data:qu0") { + s1 = reader_ptr->Value(); + } else if (reader_ptr->ColumnName() == "Data:qu1") { + s2 = reader_ptr->Value(); } - std::unique_ptr mutation(ctx->output_table->NewRowMutation(ctx->row)); - mutation->Put(ctx->family, "qu3", s1 + s2); - ctx->txn->ApplyMutation(mutation.get()); - - ctx->notification->Ack(ctx->input_table, ctx->row, ctx->family, ctx->qualifier); - ctx->txn->SetContext(ctx); - ctx->txn->SetCommitCallback(CommitCallBack); - ctx->txn->Commit(); - // CommitCallBack(ctx->txn); -} - -void IntegrationObserver::OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, + reader_ptr->Next(); + } + std::unique_ptr mutation(ctx->output_table->NewRowMutation(ctx->row)); + mutation->Put(ctx->family, "qu3", s1 + s2); + ctx->txn->ApplyMutation(mutation.get()); + + ctx->notification->Ack(ctx->input_table, ctx->row, ctx->family, ctx->qualifier); + ctx->txn->SetContext(ctx); + ctx->txn->SetCommitCallback(CommitCallBack); + ctx->txn->Commit(); + // CommitCallBack(ctx->txn); +} + +void IntegrationObserver::OnNotify(tera::Transaction* t, tera::Client* client, + const std::string& table_name, const std::string& family, + const std::string& qualifier, const std::string& row, + const std::string& value, int64_t timestamp, Notification* notification) { - LOG(INFO) << "[OnNotify start] table:row:cf:qu=" - << table_name << ":" << row << ":" << family << ":" - << qualifier << ":" << timestamp - << " begin count:" << ++notify_cnt_; - TxnContext* ctx = new TxnContext(); - ctx->observer = this; - ctx->txn = t; - ctx->notification = notification; - ctx->row = row; - ctx->family = family; - ctx->qualifier = qualifier; - ctx->begin_time = get_micros(); - - tera::ErrorCode err; - ctx->input_table = client->OpenTable(table_name, &err); - ctx->output_table = client->OpenTable("second_table", &err); - - tera::RowReader* reader = ctx->input_table->NewRowReader(row); - reader->AddColumn(family, "qu0"); - reader->AddColumn(family, "qu1"); - reader->SetContext(ctx); - reader->SetCallBack(ReadRowCallBack); - t->Get(reader); -} + LOG(INFO) << "[OnNotify start] table:row:cf:qu=" << table_name << ":" << row << ":" << family + << ":" << qualifier << ":" << timestamp << " begin count:" << ++notify_cnt_; + TxnContext* ctx = new TxnContext(); + ctx->observer = this; + ctx->txn = t; + ctx->notification = notification; + ctx->row = row; + ctx->family = family; + ctx->qualifier = qualifier; + ctx->begin_time = get_micros(); + + tera::ErrorCode err; + ctx->input_table = client->OpenTable(table_name, &err); + ctx->output_table = client->OpenTable("second_table", &err); + + tera::RowReader* reader = ctx->input_table->NewRowReader(row); + reader->AddColumn(family, "qu0"); + reader->AddColumn(family, "qu1"); + reader->SetContext(ctx); + reader->SetCallBack(ReadRowCallBack); + t->Get(reader); +} + +std::string IntegrationObserver::GetObserverName() const { return "IntegrationObserver"; } + +TransactionType IntegrationObserver::GetTransactionType() const { return kGlobalTransaction; } + +void DemoObserver::OnNotify(tera::Transaction* t, tera::Client* client, + const std::string& table_name, const std::string& family, + const std::string& qualifier, const std::string& row, + const std::string& value, int64_t timestamp, + Notification* notification) { + VLOG(12) << "[time] OnNotify start. [row] " << row; + LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << table_name << ":" << family << ":" + << qualifier << " row=" << row << " value=" << value << " timestamps=" << timestamp; -std::string IntegrationObserver::GetObserverName() const { - return "IntegrationObserver"; -} + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); -TransactionType IntegrationObserver::GetTransactionType() const { - return kGlobalTransaction; -} + // write ForwordIndex column + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put("Data", "ForwordIndex", "FIValue_" + row); + t->ApplyMutation(mutation); -void DemoObserver::OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - VLOG(12) <<"[time] OnNotify start. [row] " << row; - LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - tera::ErrorCode err; - tera::Table* table = client->OpenTable(table_name, &err); - - // write ForwordIndex column - tera::RowMutation* mutation = table->NewRowMutation(row); - mutation->Put("Data", "ForwordIndex", "FIValue_" + row); - t->ApplyMutation(mutation); - - tera::ErrorCode error; - notification->Ack(table, row, family, qualifier); - error = t->Commit(); - delete mutation; - notification->Done(); - VLOG(12) <<"[time] OnNotify finish. [row] " << row; + tera::ErrorCode error; + notification->Ack(table, row, family, qualifier); + error = t->Commit(); + delete mutation; + notification->Done(); + VLOG(12) << "[time] OnNotify finish. [row] " << row; } -std::string DemoObserver::GetObserverName() const { - return "DemoObserver"; -} +std::string DemoObserver::GetObserverName() const { return "DemoObserver"; } -TransactionType DemoObserver::GetTransactionType() const { - return kGlobalTransaction; -} +TransactionType DemoObserver::GetTransactionType() const { return kGlobalTransaction; } void TranscationCallback(Transaction* transaction) { - ParseObserver::TransactionContext* context = (ParseObserver::TransactionContext*)(transaction->GetContext()); - LOG(INFO) << "table: " << context->table_name << " row: " << context->row; + ParseObserver::TransactionContext* context = + (ParseObserver::TransactionContext*)(transaction->GetContext()); + LOG(INFO) << "table: " << context->table_name << " row: " << context->row; } -void ParseObserver::OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, +void ParseObserver::OnNotify(tera::Transaction* t, tera::Client* client, + const std::string& table_name, const std::string& family, + const std::string& qualifier, const std::string& row, + const std::string& value, int64_t timestamp, Notification* notification) { - LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - tera::ErrorCode err; - TransactionContext* context = new TransactionContext(); - context->table_name = table_name; - context->row = row; - t->SetContext(context); - t->SetCommitCallback(TranscationCallback); - // do nothing - tera::Table* table = client->OpenTable(table_name, &err); - - tera::RowMutation* mutation = table->NewRowMutation(row); - mutation->Put(family, qualifier, "value"); - t->ApplyMutation(mutation); - notification->Ack(table, row, family, qualifier); - err = t->Commit(); - notification->Done(); -} - -std::string ParseObserver::GetObserverName() const { - return "ParseObserver"; -} - -TransactionType ParseObserver::GetTransactionType() const { - return kGlobalTransaction; -} - -void SingleRowObserver::OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, + LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << table_name << ":" << family << ":" + << qualifier << " row=" << row << " value=" << value << " timestamps=" << timestamp; + + tera::ErrorCode err; + TransactionContext* context = new TransactionContext(); + context->table_name = table_name; + context->row = row; + t->SetContext(context); + t->SetCommitCallback(TranscationCallback); + // do nothing + tera::Table* table = client->OpenTable(table_name, &err); + + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put(family, qualifier, "value"); + t->ApplyMutation(mutation); + notification->Ack(table, row, family, qualifier); + err = t->Commit(); + notification->Done(); +} + +std::string ParseObserver::GetObserverName() const { return "ParseObserver"; } + +TransactionType ParseObserver::GetTransactionType() const { return kGlobalTransaction; } + +void SingleRowObserver::OnNotify(tera::Transaction* t, tera::Client* client, + const std::string& table_name, const std::string& family, + const std::string& qualifier, const std::string& row, + const std::string& value, int64_t timestamp, Notification* notification) { - LOG(INFO) << "[Notify SingleRowObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - tera::ErrorCode err; - tera::Table* table = client->OpenTable(table_name, &err); - - // single row txn - tera::RowMutation* mutation = table->NewRowMutation(row); - mutation->Put(family, "another_qu", "value"); - t->ApplyMutation(mutation); - - tera::ErrorCode error; - notification->Ack(table, row, family, qualifier); - tera::Table* another_table = client->OpenTable("another_table", &err); - notification->Ack(another_table, "somerow", "family", "qualifier"); - error = t->Commit(); - delete mutation; - notification->Done(); -} + LOG(INFO) << "[Notify SingleRowObserver] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; -std::string SingleRowObserver::GetObserverName() const { - return "SingleRowObserver"; -} + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); -TransactionType SingleRowObserver::GetTransactionType() const { - return kSingleRowTransaction; -} + // single row txn + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put(family, "another_qu", "value"); + t->ApplyMutation(mutation); -void NoneTransactionObserver::OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify NoneTransactionObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - tera::ErrorCode err; - tera::Table* table = client->OpenTable(table_name, &err); - - // do something - // kNoneTransaction notify - notification->Ack(table, row, family, qualifier); - - // kNoneTransaction ack - tera::Table* notify_table = client->OpenTable("notify_table", &err); - notification->Notify(notify_table, "notify_row", "family", "qualifier"); - notification->Done(); + tera::ErrorCode error; + notification->Ack(table, row, family, qualifier); + tera::Table* another_table = client->OpenTable("another_table", &err); + notification->Ack(another_table, "somerow", "family", "qualifier"); + error = t->Commit(); + delete mutation; + notification->Done(); } -std::string NoneTransactionObserver::GetObserverName() const { - return "NoneTransactionObserver"; -} +std::string SingleRowObserver::GetObserverName() const { return "SingleRowObserver"; } -TransactionType NoneTransactionObserver::GetTransactionType() const { - return kNoneTransaction; -} +TransactionType SingleRowObserver::GetTransactionType() const { return kSingleRowTransaction; } -} // namespace observer -} // namespace tera +struct AckContext { + tera::Client* client; + std::string row; +}; +void NoneTransactionObserver::OnNotify(tera::Transaction* t, tera::Client* client, + const std::string& table_name, const std::string& family, + const std::string& qualifier, const std::string& row, + const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify NoneTransactionObserver] table:family:qualifer=" << table_name << ":" + << family << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; + + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); + + // do something + AckContext* ctx = new AckContext(); + ctx->client = client; + ctx->row = row; + notification->SetAckContext(ctx); + notification->SetAckCallBack([](Notification* n, const tera::ErrorCode& err) { + std::unique_ptr ctx((AckContext*)(n->GetAckContext())); + LOG(INFO) << "ack status:" << err.ToString() << "@" << ctx->row; + tera::ErrorCode open_table_err; + tera::Table* notify_table = ctx->client->OpenTable("notify_table", &open_table_err); + n->SetNotifyCallBack([](Notification* n1, const tera::ErrorCode& err1) { + LOG(INFO) << "ack status:" << err1.ToString(); + n1->Done(); + }); + n->Notify(notify_table, "notify_row", "family", "qualifier"); + }); + notification->Ack(table, row, family, qualifier); +} + +std::string NoneTransactionObserver::GetObserverName() const { return "NoneTransactionObserver"; } + +TransactionType NoneTransactionObserver::GetTransactionType() const { return kNoneTransaction; } + +} // namespace observer +} // namespace tera diff --git a/src/observer/observer_demo/demo_observer.h b/src/observer/observer_demo/demo_observer.h index 8c9e5c1bf..2a4e37c60 100644 --- a/src/observer/observer_demo/demo_observer.h +++ b/src/observer/observer_demo/demo_observer.h @@ -5,7 +5,6 @@ #ifndef TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ #define TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ -#include "observer/executor/observer.h" #include "tera.h" #include @@ -14,119 +13,93 @@ namespace tera { namespace observer { class DemoObserver : public tera::observer::Observer { -public: - DemoObserver() {} - virtual ~DemoObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification); - virtual std::string GetObserverName() const; - virtual TransactionType GetTransactionType() const; + public: + DemoObserver() {} + virtual ~DemoObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; }; class ParseObserver : public tera::observer::Observer { -public: - ParseObserver() {} - virtual ~ParseObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification); - virtual std::string GetObserverName() const; - virtual TransactionType GetTransactionType() const; - -public: - struct TransactionContext { - std::string table_name; - std::string row; - }; + public: + ParseObserver() {} + virtual ~ParseObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; + + public: + struct TransactionContext { + std::string table_name; + std::string row; + }; }; class IntegrationObserver : public tera::observer::Observer { -public: - IntegrationObserver() : notify_cnt_(0), done_cnt_(0), fail_cnt_(0) {} - virtual ~IntegrationObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification); - virtual std::string GetObserverName() const; - virtual TransactionType GetTransactionType() const; - -public: - struct TxnContext { - TxnContext() {} - ~TxnContext() { - delete input_table; - delete output_table; - } - tera::observer::Observer* observer; - tera::Transaction* txn; - Notification* notification; - tera::Table* input_table; - tera::Table* output_table; - std::string row; - std::string family; - std::string qualifier; - int64_t begin_time; - }; - std::atomic notify_cnt_; - std::atomic done_cnt_; - std::atomic fail_cnt_; + public: + IntegrationObserver() : notify_cnt_(0), done_cnt_(0), fail_cnt_(0) {} + virtual ~IntegrationObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; + + public: + struct TxnContext { + TxnContext() {} + ~TxnContext() { + delete input_table; + delete output_table; + } + tera::observer::Observer* observer; + tera::Transaction* txn; + Notification* notification; + tera::Table* input_table; + tera::Table* output_table; + std::string row; + std::string family; + std::string qualifier; + int64_t begin_time; + }; + std::atomic notify_cnt_; + std::atomic done_cnt_; + std::atomic fail_cnt_; }; class SingleRowObserver : public tera::observer::Observer { -public: - SingleRowObserver() {} - virtual ~SingleRowObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification); - virtual std::string GetObserverName() const; - virtual TransactionType GetTransactionType() const; + public: + SingleRowObserver() {} + virtual ~SingleRowObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; }; class NoneTransactionObserver : public tera::observer::Observer { -public: - NoneTransactionObserver() {} - virtual ~NoneTransactionObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification); - virtual std::string GetObserverName() const; - virtual TransactionType GetTransactionType() const; + public: + NoneTransactionObserver() {} + virtual ~NoneTransactionObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ - diff --git a/src/observer/observer_demo/observe_demo_main.cc b/src/observer/observer_demo/observe_demo_main.cc index 112031059..8578b28fb 100644 --- a/src/observer/observer_demo/observe_demo_main.cc +++ b/src/observer/observer_demo/observe_demo_main.cc @@ -10,7 +10,7 @@ #include "common/base/scoped_ptr.h" #include "common/log/log_cleaner.h" #include "common/heap_profiler.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" #include "utils/utils_cmd.h" #include "version.h" @@ -26,66 +26,64 @@ extern tera::TeraEntry* GetTeraEntry(); volatile sig_atomic_t g_quit = 0; -static void SignalIntHandler(int sig) { - g_quit = 1; -} +static void SignalIntHandler(int sig) { g_quit = 1; } int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = GetTeraEntryName(); if (FLAGS_tera_log_prefix.empty()) { - FLAGS_tera_log_prefix = GetTeraEntryName(); - if (FLAGS_tera_log_prefix.empty()) { - FLAGS_tera_log_prefix = "tera"; - } + FLAGS_tera_log_prefix = "tera"; } - tera::utils::SetupLog(FLAGS_tera_log_prefix); - - if (argc > 1) { - std::string ext_cmd = argv[1]; - if (ext_cmd == "version") { - PrintSystemVersion(); - return 0; - } + } + tera::utils::SetupLog(FLAGS_tera_log_prefix); + + if (argc > 1) { + std::string ext_cmd = argv[1]; + if (ext_cmd == "version") { + PrintSystemVersion(); + return 0; } - tera::HeapProfiler heap_profiler; - heap_profiler.SetEnable(FLAGS_heap_profiler_enabled) - .SetInterval(FLAGS_heap_profiler_dump_interval); - - signal(SIGINT, SignalIntHandler); - signal(SIGTERM, SignalIntHandler); - - scoped_ptr entry(GetTeraEntry()); - if (entry.get() == NULL) { - return -1; + } + tera::HeapProfiler heap_profiler; + heap_profiler.SetEnable(FLAGS_heap_profiler_enabled) + .SetInterval(FLAGS_heap_profiler_dump_interval); + + signal(SIGINT, SignalIntHandler); + signal(SIGTERM, SignalIntHandler); + + scoped_ptr entry(GetTeraEntry()); + if (entry.get() == NULL) { + return -1; + } + + if (!entry->Start()) { + return -1; + } + + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } + + while (!g_quit) { + if (!entry->Run()) { + LOG(ERROR) << "Server run error ,and then exit now "; + break; } + } + if (g_quit) { + LOG(INFO) << "received interrupt signal from user, will stop"; + } - if (!entry->Start()) { - return -1; - } + common::LogCleaner::StopCleaner(); - // start log cleaner - if (FLAGS_tera_info_log_clean_enable) { - common::LogCleaner::StartCleaner(); - LOG(INFO) << "start log cleaner"; - } else { - LOG(INFO) << "log cleaner is disable"; - } - - while (!g_quit) { - if (!entry->Run()) { - LOG(ERROR) << "Server run error ,and then exit now "; - break; - } - } - if (g_quit) { - LOG(INFO) << "received interrupt signal from user, will stop"; - } - - common::LogCleaner::StopCleaner(); - - if (!entry->Shutdown()) { - return -1; - } + if (!entry->Shutdown()) { + return -1; + } - return 0; + return 0; } diff --git a/src/observer/rowlocknode/fake_rowlock_client.h b/src/observer/rowlocknode/fake_rowlock_client.h deleted file mode 100644 index 9d9bb0076..000000000 --- a/src/observer/rowlocknode/fake_rowlock_client.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ -#define TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ - -#include -#include - -#include "proto/rpc_client.h" -#include "sdk/rowlock_client.h" - -namespace tera { -namespace observer { - -class FakeRowlockClient : public RowlockClient { -public: - FakeRowlockClient() : RowlockClient("127.0.0.1:22222") {}; - ~FakeRowlockClient() {} - - virtual bool TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL) { - response->set_lock_status(kLockSucc); - if (done != NULL) { - bool failed = true; - int error_code = 0; - done(request, response, failed, error_code); - } - return true; - } - - virtual bool UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL) { - response->set_lock_status(kLockSucc); - - return true; - } -}; - -} // namespace observer -} // namespace tera -#endif // TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ - - diff --git a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc index 2cf0d8974..e5687bba9 100644 --- a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc +++ b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc @@ -21,46 +21,43 @@ namespace tera { namespace observer { FakeRowlockNodeZkAdapter::FakeRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, - const std::string& server_addr) : - rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { -} + const std::string& server_addr) + : rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) {} -FakeRowlockNodeZkAdapter::~FakeRowlockNodeZkAdapter() { -} +FakeRowlockNodeZkAdapter::~FakeRowlockNodeZkAdapter() {} void FakeRowlockNodeZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_fake_root_path; - - std::string node_num_key = root_path + kRowlockNodeNumPath; - zk::FakeZkUtil::WriteNode(node_num_key, std::to_string(FLAGS_rowlock_server_node_num)); - - // create node - int id = 0; - std::string id_lock_key; - std::string host_lock_key; - while (true) { - id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); - std::string file_path = "mkdir -p " + root_path + kRowlockNodeIdListPath; - system(file_path.c_str()); - if (zk::FakeZkUtil::WriteNode(id_lock_key, std::to_string(id))) { - break; - } else { - LOG(ERROR) << "[Fake rowlock zk]: write node " << id_lock_key << " failed"; - } - if (++id >= FLAGS_rowlock_server_node_num) { - id = 0; - } - ThisThread::Sleep(1); + std::string root_path = FLAGS_rowlock_fake_root_path; + + std::string node_num_key = root_path + kRowlockNodeNumPath; + zk::FakeZkUtil::WriteNode(node_num_key, std::to_string(FLAGS_rowlock_server_node_num)); + + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + std::string file_path = "mkdir -p " + root_path + kRowlockNodeIdListPath; + system(file_path.c_str()); + if (zk::FakeZkUtil::WriteNode(id_lock_key, std::to_string(id))) { + break; + } else { + LOG(ERROR) << "[Fake rowlock zk]: write node " << id_lock_key << " failed"; + } + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; } + ThisThread::Sleep(1); + } - LOG(INFO) << "RowlockNode Id=" << id << " host=" << server_addr_ - << " nodenum=" << FLAGS_rowlock_server_node_num; + LOG(INFO) << "RowlockNode Id=" << id << " host=" << server_addr_ + << " nodenum=" << FLAGS_rowlock_server_node_num; } void FakeRowlockNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { - _Exit(EXIT_FAILURE); + _Exit(EXIT_FAILURE); } -} // namespace observer -} // namespace tera - +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h index 686b2cdef..9c1755e3f 100644 --- a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h +++ b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h @@ -15,10 +15,10 @@ namespace galaxy { namespace ins { namespace sdk { - class InsSDK; -} // namespace sdk -} // namespace ins -} // namespace galaxy +class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy namespace tera { namespace observer { @@ -26,30 +26,26 @@ namespace observer { class RowlockNodeImpl; class FakeRowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { -public: - FakeRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); - virtual ~FakeRowlockNodeZkAdapter(); - virtual void Init(); - void OnLockChange(std::string session_id, bool deleted); - -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} - virtual void OnSessionTimeout() {} - -private: - RowlockNodeImpl* rowlocknode_impl_; - std::string server_addr_; + public: + FakeRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~FakeRowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(std::string session_id, bool deleted); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} + + private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_FAKE_ROWLOCKNODE_ZK_ADAPTER_H_ - diff --git a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc deleted file mode 100644 index 01c9e8970..000000000 --- a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "observer/rowlocknode/ins_rowlock_client_zk_adapter.h" - -#include -#include - -#include "ins_sdk.h" - -#include "sdk/rowlock_client.h" -#include "types.h" - -DECLARE_string(rowlock_ins_root_path); -DECLARE_string(tera_ins_addr_list); -DECLARE_int32(rowlock_server_node_num); -DECLARE_int64(tera_zk_retry_period); -DECLARE_int32(tera_zk_timeout); -DECLARE_int32(tera_zk_retry_max_times); - -namespace tera { -namespace observer { - -InsRowlockClientZkAdapter::InsRowlockClientZkAdapter(RowlockClient* server_client, - const std::string& server_addr) - : ZkRowlockClientZkAdapter(server_client, server_addr), - client_(server_client), - server_addr_(server_addr) {} - -bool InsRowlockClientZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_ins_root_path; - std::vector value; - // create session - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - - // put server_node_num - std::string rowlock_proxy_path = root_path + kRowlockProxyPath; - - galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(rowlock_proxy_path + "/!", - rowlock_proxy_path + "/~"); - while (!result->Done()) { - CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); - value.push_back(result->Value()); - result->Next(); - } - delete result; - - client_->Update(value); - return true; -} - -} // namespace observer -} // namespace tera - diff --git a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h deleted file mode 100644 index 7f56389ce..000000000 --- a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ -#define TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ - -#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" -#include "zk/zk_adapter.h" - -namespace galaxy { -namespace ins { -namespace sdk { - class InsSDK; -} // namespace sdk -} // namespace ins -} // namespace galaxy - -namespace tera { -namespace observer { - -class RowlockClient; - -class InsRowlockClientZkAdapter : public ZkRowlockClientZkAdapter { -public: - InsRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); - virtual ~InsRowlockClientZkAdapter() {}; - virtual bool Init(); -protected: - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnSessionTimeout() {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - -private: - RowlockClient* client_; - std::string server_addr_; - galaxy::ins::sdk::InsSDK* ins_sdk_; -}; - -} // namespace observer -} // namespace tera - -#endif // TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ diff --git a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc index c0ec709d5..a48ebcdae 100644 --- a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc +++ b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc @@ -9,7 +9,7 @@ #include "observer/rowlocknode/ins_rowlocknode_zk_adapter.h" #include "types.h" -DECLARE_int64(tera_zk_retry_period); +DECLARE_int64(tera_zk_retry_period); DECLARE_string(rowlock_ins_root_path); DECLARE_string(tera_ins_addr_list); DECLARE_int32(rowlock_server_node_num); @@ -19,62 +19,59 @@ namespace tera { namespace observer { InsRowlockNodeZkAdapter::InsRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, - const std::string& server_addr) : - rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { -} + const std::string& server_addr) + : rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) {} -InsRowlockNodeZkAdapter::~InsRowlockNodeZkAdapter() { -} +InsRowlockNodeZkAdapter::~InsRowlockNodeZkAdapter() {} static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { - LOG(ERROR) << "recv lock change event" ; - InsRowlockNodeZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnLockChange(param.value, param.deleted); + LOG(ERROR) << "recv lock change event"; + InsRowlockNodeZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); } void InsRowlockNodeZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_ins_root_path; - galaxy::ins::sdk::SDKError err; - // create session - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - // get session id - std::string session_id = ins_sdk_->GetSessionID(); + std::string root_path = FLAGS_rowlock_ins_root_path; + galaxy::ins::sdk::SDKError err; + // create session + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + // get session id + std::string session_id = ins_sdk_->GetSessionID(); - // put server_node_num - std::string node_num_key = root_path + kRowlockNodeNumPath; - if (!ins_sdk_->Put(node_num_key, std::to_string(FLAGS_rowlock_server_node_num), &err)) { - LOG(WARNING) << "put NodeNum fail"; - } + // put server_node_num + std::string node_num_key = root_path + kRowlockNodeNumPath; + if (!ins_sdk_->Put(node_num_key, std::to_string(FLAGS_rowlock_server_node_num), &err)) { + LOG(WARNING) << "put NodeNum fail"; + } - // create node - int id = 0; - std::string id_lock_key; - std::string host_lock_key; - while (true) { - id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); - if (ins_sdk_->Put(id_lock_key, server_addr_, &err) && galaxy::ins::sdk::kOK == err) { - host_lock_key = root_path + kRowlockNodeHostListPath + "/" + server_addr_; - CHECK(ins_sdk_->Lock(host_lock_key, &err)) << "register fail"; - break; - } - if (++id >= FLAGS_rowlock_server_node_num) { - id = 0; - } - ThisThread::Sleep(FLAGS_tera_zk_retry_period); + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + if (ins_sdk_->Put(id_lock_key, server_addr_, &err) && galaxy::ins::sdk::kOK == err) { + host_lock_key = root_path + kRowlockNodeHostListPath + "/" + server_addr_; + CHECK(ins_sdk_->Lock(host_lock_key, &err)) << "register fail"; + break; } + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; + } + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } - // create watch node - CHECK(ins_sdk_->Watch(host_lock_key, &InsOnLockChange, this, &err)) << "watch lock fail"; + // create watch node + CHECK(ins_sdk_->Watch(host_lock_key, &InsOnLockChange, this, &err)) << "watch lock fail"; - LOG(ERROR) << "RowlockNode Id=" << id << " host=" << server_addr_ - << " nodenum=" << FLAGS_rowlock_server_node_num; + LOG(ERROR) << "RowlockNode Id=" << id << " host=" << server_addr_ + << " nodenum=" << FLAGS_rowlock_server_node_num; } void InsRowlockNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { - _Exit(EXIT_FAILURE); + _Exit(EXIT_FAILURE); } -} // namespace observer -} // namespace tera - +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h index b335115fa..cc1aa2820 100644 --- a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h +++ b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h @@ -15,10 +15,10 @@ namespace galaxy { namespace ins { namespace sdk { - class InsSDK; -} // namespace sdk -} // namespace ins -} // namespace galaxy +class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy namespace tera { namespace observer { @@ -26,31 +26,27 @@ namespace observer { class RowlockNodeImpl; class InsRowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { -public: - InsRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); - virtual ~InsRowlockNodeZkAdapter(); - virtual void Init(); - void OnLockChange(std::string session_id, bool deleted); - -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} - virtual void OnSessionTimeout() {} - -private: - RowlockNodeImpl* rowlocknode_impl_; - std::string server_addr_; - galaxy::ins::sdk::InsSDK* ins_sdk_; + public: + InsRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~InsRowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(std::string session_id, bool deleted); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} + + private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCKNODE_ZK_ADAPTER_H_ - diff --git a/src/observer/rowlocknode/remote_rowlocknode.cc b/src/observer/rowlocknode/remote_rowlocknode.cc index 533672607..42c3ff7a9 100644 --- a/src/observer/rowlocknode/remote_rowlocknode.cc +++ b/src/observer/rowlocknode/remote_rowlocknode.cc @@ -11,26 +11,22 @@ DECLARE_int32(rowlock_thread_max_num); namespace tera { namespace observer { -RemoteRowlockNode::RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl) : - rowlocknode_impl_(rowlocknode_impl) { -} +RemoteRowlockNode::RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl) + : rowlocknode_impl_(rowlocknode_impl) {} -RemoteRowlockNode::~RemoteRowlockNode() { -} +RemoteRowlockNode::~RemoteRowlockNode() {} void RemoteRowlockNode::Lock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done) { - rowlocknode_impl_->TryLock(request, response, done); + const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done) { + rowlocknode_impl_->TryLock(request, response, done); } void RemoteRowlockNode::UnLock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, + const RowlockRequest* request, RowlockResponse* response, google::protobuf::Closure* done) { - rowlocknode_impl_->UnLock(request, response, done); + rowlocknode_impl_->UnLock(request, response, done); } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/remote_rowlocknode.h b/src/observer/rowlocknode/remote_rowlocknode.h index 6c65d79d2..da1f7e2ae 100644 --- a/src/observer/rowlocknode/remote_rowlocknode.h +++ b/src/observer/rowlocknode/remote_rowlocknode.h @@ -13,25 +13,20 @@ namespace tera { namespace observer { class RemoteRowlockNode : public RowlockService { -public: - explicit RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl); - ~RemoteRowlockNode(); - - void Lock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); - - void UnLock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); - -private: - RowlockNodeImpl* rowlocknode_impl_; + public: + explicit RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl); + ~RemoteRowlockNode(); + + void Lock(google::protobuf::RpcController* controller, const RowlockRequest* request, + RowlockResponse* response, google::protobuf::Closure* done); + + void UnLock(google::protobuf::RpcController* controller, const RowlockRequest* request, + RowlockResponse* response, google::protobuf::Closure* done); + + private: + RowlockNodeImpl* rowlocknode_impl_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_REMOTE_ROWLOCKNODE_H_ - diff --git a/src/observer/rowlocknode/rowlock_db.h b/src/observer/rowlocknode/rowlock_db.h index 94c98889c..56e939a7b 100644 --- a/src/observer/rowlocknode/rowlock_db.h +++ b/src/observer/rowlocknode/rowlock_db.h @@ -31,131 +31,132 @@ namespace tera { namespace observer { class RowlockDB { -public: - RowlockDB() - : timing_wheel_pos_(0), - timing_wheel_patch_num_(FLAGS_rowlock_timing_wheel_patch_num) { - timing_wheel_.resize(timing_wheel_patch_num_); + public: + RowlockDB() + : timing_wheel_pos_(0), timing_wheel_patch_num_(FLAGS_rowlock_timing_wheel_patch_num) { + timing_wheel_.resize(timing_wheel_patch_num_); + } + + ~RowlockDB() {} + + bool TryLock(uint64_t row) { + MutexLock locker(&mutex_); + if (locks_.find(row) == locks_.end()) { + locks_[row].reset(new uint64_t(row)); + std::weak_ptr ptr = locks_[row]; + timing_wheel_[timing_wheel_pos_].push_back(ptr); + return true; + } else { + return false; } - - ~RowlockDB() {} - - bool TryLock(uint64_t row) { - MutexLock locker(&mutex_); - if (locks_.find(row) == locks_.end()) { - locks_[row].reset(new uint64_t(row)); - std::weak_ptr ptr = locks_[row]; - timing_wheel_[timing_wheel_pos_].push_back(ptr); - return true; - } else { - return false; - } - } - - void UnLock(uint64_t row) { - MutexLock locker(&mutex_); - locks_.erase(row); - } - - // call this function ever timeout period - // 1. pointer of timing wheel move forward by one step - // 2. clear all the rowlock keys and remove them from locks_ - // 3. the next 60 seconds all new rowlock keys will be put into this wheel patch - void ClearTimeout() { - // pointer forward + } + + void UnLock(uint64_t row) { + MutexLock locker(&mutex_); + locks_.erase(row); + } + + // call this function ever timeout period + // 1. pointer of timing wheel move forward by one step + // 2. clear all the rowlock keys and remove them from locks_ + // 3. the next 60 seconds all new rowlock keys will be put into this wheel + // patch + void ClearTimeout() { + // pointer forward + mutex_.Lock(); + timing_wheel_pos_ = (timing_wheel_pos_ + 1) % timing_wheel_patch_num_; + std::vector> buffer; + + // release memory + buffer.swap(timing_wheel_[timing_wheel_pos_]); + mutex_.Unlock(); + + // remove key from locks_ + for (uint32_t i = 0; i < buffer.size(); ++i) { + if (!buffer[i].expired()) { mutex_.Lock(); - timing_wheel_pos_ = (timing_wheel_pos_ + 1) % timing_wheel_patch_num_; - std::vector> buffer; - - // release memory - buffer.swap(timing_wheel_[timing_wheel_pos_]); + auto it = buffer[i].lock(); + locks_.erase(*it); mutex_.Unlock(); - - // remove key from locks_ - for (uint32_t i = 0; i < buffer.size(); ++i) { - if (!buffer[i].expired()) { - mutex_.Lock(); - auto it = buffer[i].lock(); - locks_.erase(*it); - mutex_.Unlock(); - } - } + } } + } - size_t Size() const { - MutexLock locker(&mutex_); - return locks_.size(); - } + size_t Size() const { + MutexLock locker(&mutex_); + return locks_.size(); + } -private: - mutable Mutex mutex_; + private: + mutable Mutex mutex_; - std::unordered_map> locks_; + std::unordered_map> locks_; - // timing wheel - uint32_t timing_wheel_pos_; - uint32_t timing_wheel_patch_num_; - std::vector>> timing_wheel_; + // timing wheel + uint32_t timing_wheel_pos_; + uint32_t timing_wheel_patch_num_; + std::vector>> timing_wheel_; }; class ShardedRowlockDB { -public: - ShardedRowlockDB() : thread_pool_(new ThreadPool(1)) { - lock_map_.resize(FLAGS_rowlock_db_sharding_number); - - for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { - std::unique_ptr db(new RowlockDB()); - lock_map_[i].reset(db.release()); - } - ScheduleClearTimeout(); - } - - ~ShardedRowlockDB() {} - - bool TryLock(uint64_t row) { - std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; + public: + ShardedRowlockDB() : thread_pool_(new ThreadPool(1)) { + lock_map_.resize(FLAGS_rowlock_db_sharding_number); - if (db_node->TryLock(row) == true) { - return true; - } else { - return false; - } + for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { + std::unique_ptr db(new RowlockDB()); + lock_map_[i].reset(db.release()); } + ScheduleClearTimeout(); + } - void UnLock(uint64_t row) { - std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; - db_node->UnLock(row); - } + ~ShardedRowlockDB() {} + + bool TryLock(uint64_t row) { + std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; - size_t Size() const { - size_t size = 0; - for (uint32_t i = 0; i < lock_map_.size(); ++i) { - size += lock_map_[i]->Size(); - } - return size; + if (db_node->TryLock(row) == true) { + return true; + } else { + return false; } + } -private: - void ScheduleClearTimeout() { - ClearTimeout(); + void UnLock(uint64_t row) { + std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; + db_node->UnLock(row); + } - ThreadPool::Task task = std::bind(&ShardedRowlockDB::ScheduleClearTimeout, this); - // everytime timing wheel move forward one step, every patch_num steps data will be cleared - thread_pool_->DelayTask(FLAGS_rowlock_db_ttl / FLAGS_rowlock_timing_wheel_patch_num, task); + size_t Size() const { + size_t size = 0; + for (uint32_t i = 0; i < lock_map_.size(); ++i) { + size += lock_map_[i]->Size(); } - - void ClearTimeout() { - for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { - lock_map_[i]->ClearTimeout(); - } + return size; + } + + private: + void ScheduleClearTimeout() { + ClearTimeout(); + + ThreadPool::Task task = std::bind(&ShardedRowlockDB::ScheduleClearTimeout, this); + // everytime timing wheel move forward one step, every patch_num steps data + // will be cleared + thread_pool_->DelayTask(FLAGS_rowlock_db_ttl / FLAGS_rowlock_timing_wheel_patch_num, task); + } + + void ClearTimeout() { + for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { + lock_map_[i]->ClearTimeout(); } + } -private: - std::vector> lock_map_; - scoped_ptr thread_pool_; + private: + std::vector> lock_map_; + scoped_ptr thread_pool_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCK_DB_H_ diff --git a/src/observer/rowlocknode/rowlocknode_entry.cc b/src/observer/rowlocknode/rowlocknode_entry.cc index eb2eb4e17..b96af2169 100644 --- a/src/observer/rowlocknode/rowlocknode_entry.cc +++ b/src/observer/rowlocknode/rowlocknode_entry.cc @@ -22,66 +22,62 @@ DECLARE_string(rowlock_server_port); DECLARE_int32(rowlock_io_service_pool_size); DECLARE_int32(rowlock_rpc_work_thread_num); -std::string GetTeraEntryName() { - return "rowlock"; -} +std::string GetTeraEntryName() { return "rowlock"; } -tera::TeraEntry* GetTeraEntry() { - return new tera::observer::RowlockNodeEntry(); -} +tera::TeraEntry* GetTeraEntry() { return new tera::observer::RowlockNodeEntry(); } namespace tera { namespace observer { RowlockNodeEntry::RowlockNodeEntry() : rowlocknode_impl_(NULL), remote_rowlocknode_(NULL) { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_options.max_throughput_in = -1; - rpc_options.max_throughput_out = -1; - rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; - rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; - rpc_options.no_delay = false; //use Nagle's Algorithm - rpc_options.write_buffer_base_block_factor = 0; //64Bytes per malloc - rpc_options.read_buffer_base_block_factor = 7; //8kBytes per malloc - rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = -1; + rpc_options.max_throughput_out = -1; + rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; + rpc_options.no_delay = false; // use Nagle's Algorithm + rpc_options.write_buffer_base_block_factor = 0; // 64Bytes per malloc + rpc_options.read_buffer_base_block_factor = 7; // 8kBytes per malloc + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); } RowlockNodeEntry::~RowlockNodeEntry() {} bool RowlockNodeEntry::StartServer() { - SetProcessorAffinity(); - IpAddress rowlocknode_addr("0.0.0.0", FLAGS_rowlock_server_port); - LOG(INFO) << "Start RPC server at: " << rowlocknode_addr.ToString(); - rowlocknode_impl_.reset(new RowlockNodeImpl()); - remote_rowlocknode_ = new RemoteRowlockNode(rowlocknode_impl_.get()); - rpc_server_->RegisterService(remote_rowlocknode_); - if (!rpc_server_->Start(rowlocknode_addr.ToString())) { - LOG(ERROR) << "start RPC server error"; - return false; - } - if (!rowlocknode_impl_->Init()) { - LOG(ERROR) << "fail to init rowlocknode_impl"; - return false; - } - LOG(INFO) << "finish starting RPC server"; + SetProcessorAffinity(); + IpAddress rowlocknode_addr("0.0.0.0", FLAGS_rowlock_server_port); + LOG(INFO) << "Start RPC server at: " << rowlocknode_addr.ToString(); + rowlocknode_impl_.reset(new RowlockNodeImpl()); + remote_rowlocknode_ = new RemoteRowlockNode(rowlocknode_impl_.get()); + rpc_server_->RegisterService(remote_rowlocknode_); + if (!rpc_server_->Start(rowlocknode_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + if (!rowlocknode_impl_->Init()) { + LOG(ERROR) << "fail to init rowlocknode_impl"; + return false; + } + LOG(INFO) << "finish starting RPC server"; - return true; + return true; } void RowlockNodeEntry::ShutdownServer() { - LOG(INFO) << "shut down server"; - rpc_server_->Stop(); - rowlocknode_impl_->Exit(); - rowlocknode_impl_.reset(); - LOG(INFO) << "RowlockNodeEntry stop done!"; + LOG(INFO) << "shut down server"; + rpc_server_->Stop(); + rowlocknode_impl_->Exit(); + rowlocknode_impl_.reset(); + LOG(INFO) << "RowlockNodeEntry stop done!"; } bool RowlockNodeEntry::Run() { - ThisThread::Sleep(3000); - rowlocknode_impl_->PrintQPS(); - return true; + ThisThread::Sleep(3000); + rowlocknode_impl_->PrintQPS(); + return true; } void RowlockNodeEntry::SetProcessorAffinity() {} -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/rowlocknode_entry.h b/src/observer/rowlocknode/rowlocknode_entry.h index b968e8d4e..63080b56b 100644 --- a/src/observer/rowlocknode/rowlocknode_entry.h +++ b/src/observer/rowlocknode/rowlocknode_entry.h @@ -10,30 +10,30 @@ #include "common/base/scoped_ptr.h" #include "observer/rowlocknode/remote_rowlocknode.h" #include "observer/rowlocknode/rowlocknode_impl.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" namespace tera { namespace observer { class RowlockNodeEntry : public tera::TeraEntry { -public: - RowlockNodeEntry(); - virtual ~RowlockNodeEntry(); + public: + RowlockNodeEntry(); + virtual ~RowlockNodeEntry(); - virtual bool StartServer(); - virtual bool Run(); - virtual void ShutdownServer(); - void SetProcessorAffinity(); + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + void SetProcessorAffinity(); -private: - common::Mutex mutex_; + private: + common::Mutex mutex_; - scoped_ptr rowlocknode_impl_; - RemoteRowlockNode* remote_rowlocknode_; - scoped_ptr rpc_server_; + scoped_ptr rowlocknode_impl_; + RemoteRowlockNode* remote_rowlocknode_; + scoped_ptr rpc_server_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ENTRY_H_ diff --git a/src/observer/rowlocknode/rowlocknode_impl.cc b/src/observer/rowlocknode/rowlocknode_impl.cc index 0bf30708a..fb70a5522 100644 --- a/src/observer/rowlocknode/rowlocknode_impl.cc +++ b/src/observer/rowlocknode/rowlocknode_impl.cc @@ -21,64 +21,54 @@ RowlockNodeImpl::RowlockNodeImpl() {} RowlockNodeImpl::~RowlockNodeImpl() {} bool RowlockNodeImpl::Init() { - std::string local_addr = tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_server_port; - if (FLAGS_tera_coord_type == "zk") { - zk_adapter_.reset(new RowlockNodeZkAdapter(this, local_addr)); - } else if (FLAGS_tera_coord_type == "ins") { - zk_adapter_.reset(new InsRowlockNodeZkAdapter(this, local_addr)); - } else { - zk_adapter_.reset(new FakeRowlockNodeZkAdapter(this, local_addr)); - } - - zk_adapter_->Init(); - - LOG(INFO) << "Rowlock node init finish"; - return true; + std::string local_addr = tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_server_port; + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new RowlockNodeZkAdapter(this, local_addr)); + } else if (FLAGS_tera_coord_type == "ins") { + zk_adapter_.reset(new InsRowlockNodeZkAdapter(this, local_addr)); + } else { + zk_adapter_.reset(new FakeRowlockNodeZkAdapter(this, local_addr)); + } + + zk_adapter_->Init(); + + LOG(INFO) << "Rowlock node init finish"; + return true; } -bool RowlockNodeImpl::Exit() { - return true; -} - -void RowlockNodeImpl::TryLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done) { - uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); - if (rowlock_db_.TryLock(rowlock_key)) { - response->set_lock_status(kLockSucc); - VLOG(12) << "Lock success: " << request->row(); - } else { - response->set_lock_status(kLockFail); - LOG(WARNING) << " table name: " << request->table_name() - << " row :" << request->row(); - } - - done->Run(); -} +bool RowlockNodeImpl::Exit() { return true; } -void RowlockNodeImpl::UnLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done) { - uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); - rowlock_db_.UnLock(rowlock_key); +void RowlockNodeImpl::TryLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); + if (rowlock_db_.TryLock(rowlock_key)) { response->set_lock_status(kLockSucc); - VLOG(12) << "Unlock success: " << request->row(); - done->Run(); -} + VLOG(12) << "Lock success: " << request->row(); + } else { + response->set_lock_status(kLockFail); + LOG(WARNING) << " table name: " << request->table_name() << " row :" << request->row(); + } -void RowlockNodeImpl::PrintQPS() { - return; + done->Run(); } -uint64_t RowlockNodeImpl::GetRowlockKey(const std::string& table_name, - const std::string& row) const { - // RowlockKey : TableName + Row - std::string rowlock_key_str = table_name + row; - return std::hash()(rowlock_key_str); - +void RowlockNodeImpl::UnLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); + rowlock_db_.UnLock(rowlock_key); + response->set_lock_status(kLockSucc); + VLOG(12) << "Unlock success: " << request->row(); + done->Run(); } +void RowlockNodeImpl::PrintQPS() { return; } -} // namespace observer -} // namespace tera +uint64_t RowlockNodeImpl::GetRowlockKey(const std::string& table_name, + const std::string& row) const { + // RowlockKey : TableName + Row + std::string rowlock_key_str = table_name + row; + return std::hash()(rowlock_key_str); +} +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/rowlocknode_impl.h b/src/observer/rowlocknode/rowlocknode_impl.h index a60b89dde..34a95c637 100644 --- a/src/observer/rowlocknode/rowlocknode_impl.h +++ b/src/observer/rowlocknode/rowlocknode_impl.h @@ -22,31 +22,31 @@ namespace tera { namespace observer { class RowlockNodeImpl { -public: - RowlockNodeImpl(); - ~RowlockNodeImpl(); + public: + RowlockNodeImpl(); + ~RowlockNodeImpl(); - bool Init(); + bool Init(); - bool Exit(); + bool Exit(); - void TryLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); + void TryLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done); - void UnLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); + void UnLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done); - void PrintQPS(); -private: - uint64_t GetRowlockKey(const std::string& table_name, const std::string& row) const; -private: - ShardedRowlockDB rowlock_db_; - std::unique_ptr zk_adapter_; + void PrintQPS(); + + private: + uint64_t GetRowlockKey(const std::string& table_name, const std::string& row) const; + + private: + ShardedRowlockDB rowlock_db_; + std::unique_ptr zk_adapter_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_IMPL_H_ diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/rowlocknode_zk_adapter.cc index 9d079a502..48e47ed86 100644 --- a/src/observer/rowlocknode/rowlocknode_zk_adapter.cc +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter.cc @@ -13,7 +13,7 @@ DECLARE_string(rowlock_zk_root_path); DECLARE_string(tera_zk_addr_list); DECLARE_int32(rowlock_server_node_num); -DECLARE_int64(tera_zk_retry_period); +DECLARE_int64(tera_zk_retry_period); DECLARE_int32(tera_zk_timeout); DECLARE_int32(tera_zk_retry_max_times); @@ -21,99 +21,94 @@ namespace tera { namespace observer { RowlockNodeZkAdapter::RowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, - const std::string& server_addr) : - rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { -} + const std::string& server_addr) + : rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) {} -RowlockNodeZkAdapter::~RowlockNodeZkAdapter() { -} +RowlockNodeZkAdapter::~RowlockNodeZkAdapter() {} void RowlockNodeZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_zk_root_path; - std::string node_num_key = root_path + kRowlockNodeNumPath; - - int zk_errno = zk::ZE_OK;; - // init zk client - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "init zk success"; - - // get session id - int64_t session_id_int = 0; - if (!GetSessionId(&session_id_int, &zk_errno)) { - LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + + int zk_errno = zk::ZE_OK; + ; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_rowlock_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "init zk success"; + + // get session id + int64_t session_id_int = 0; + if (!GetSessionId(&session_id_int, &zk_errno)) { + LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + return; + } + + // put server_node_num + zk_errno = zk::ZE_OK; + bool is_exist = true; + int32_t retry_count = 0; + std::string value = std::to_string(FLAGS_rowlock_server_node_num); + CheckExist(node_num_key, &is_exist, &zk_errno); + if (!is_exist) { + while (!CreateEphemeralNode(node_num_key, value, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create master node"; return; + } + LOG(ERROR) << "retry create rowlock number node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; } - - // put server_node_num + } else { + WriteNode(node_num_key, value, &zk_errno); zk_errno = zk::ZE_OK; - bool is_exist = true; - int32_t retry_count = 0; - std::string value = std::to_string(FLAGS_rowlock_server_node_num); - CheckExist(node_num_key, &is_exist, &zk_errno); - if (!is_exist) { - while (!CreateEphemeralNode(node_num_key, value, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create master node"; - return; - } - LOG(ERROR) << "retry create rowlock number node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - } else { - WriteNode(node_num_key, value, &zk_errno); - zk_errno = zk::ZE_OK; - } - - value = server_addr_; + } - // create node - int id = 0; - std::string id_lock_key; - std::string host_lock_key; + value = server_addr_; - while (true) { - id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); - zk_errno = zk::ZE_OK; + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; - if (!CreateEphemeralNode(id_lock_key, server_addr_, &zk_errno)) { - LOG(ERROR) << "create rowlock node fail: " << id_lock_key; - } else { - break; - } - LOG(ERROR) << "fail to create serve-node : " << zk::ZkErrnoToString(zk_errno); - - if (++id >= FLAGS_rowlock_server_node_num) { - id = 0; - } - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "create serve-node success"; - - is_exist = false; + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + zk_errno = zk::ZE_OK; - // watch my node - while (!CheckAndWatchExist(id_lock_key, &is_exist, &zk_errno)) { - LOG(ERROR) << "fail to watch serve-node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); + if (!CreateEphemeralNode(id_lock_key, server_addr_, &zk_errno)) { + LOG(ERROR) << "create rowlock node fail: " << id_lock_key; + } else { + break; } - LOG(INFO) << "watch rowlock-node success"; + LOG(ERROR) << "fail to create serve-node : " << zk::ZkErrnoToString(zk_errno); - if (!is_exist) { - OnLockChange(); + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; } + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "create serve-node success"; + + is_exist = false; + + // watch my node + while (!CheckAndWatchExist(id_lock_key, &is_exist, &zk_errno)) { + LOG(ERROR) << "fail to watch serve-node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch rowlock-node success"; + + if (!is_exist) { + OnLockChange(); + } } -void RowlockNodeZkAdapter::OnLockChange() { - _Exit(EXIT_FAILURE); -} - -} // namespace observer -} // namespace tera +void RowlockNodeZkAdapter::OnLockChange() { _Exit(EXIT_FAILURE); } +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter.h b/src/observer/rowlocknode/rowlocknode_zk_adapter.h index 67324f85f..0b48187b4 100644 --- a/src/observer/rowlocknode/rowlocknode_zk_adapter.h +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter.h @@ -15,10 +15,10 @@ namespace galaxy { namespace ins { namespace sdk { - class InsSDK; -} // namespace sdk -} // namespace ins -} // namespace galaxy +class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy namespace tera { namespace observer { @@ -26,30 +26,26 @@ namespace observer { class RowlockNodeImpl; class RowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { -public: - RowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); - virtual ~RowlockNodeZkAdapter(); - virtual void Init(); - void OnLockChange(); - -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} - virtual void OnSessionTimeout() {} - -private: - RowlockNodeImpl* rowlocknode_impl_; - std::string server_addr_; + public: + RowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~RowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} + + private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_H_ - diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h b/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h index 1ef93ccfb..aa5a9789e 100644 --- a/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h @@ -11,11 +11,11 @@ namespace tera { namespace observer { class RowlockNodeZkAdapterBase : public tera::zk::ZooKeeperAdapter { -public: - virtual ~RowlockNodeZkAdapterBase() {} - virtual void Init() = 0; + public: + virtual ~RowlockNodeZkAdapterBase() {} + virtual void Init() = 0; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_BASE_H_ diff --git a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc deleted file mode 100644 index cacd993fc..000000000 --- a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" - -#include -#include - -#include "sdk/rowlock_client.h" -#include "types.h" - -DECLARE_string(rowlock_zk_root_path); -DECLARE_string(tera_zk_addr_list); -DECLARE_int32(rowlock_server_node_num); -DECLARE_int64(tera_zk_retry_period); -DECLARE_int32(tera_zk_timeout); -DECLARE_int32(tera_zk_retry_max_times); - -namespace tera { -namespace observer { - -ZkRowlockClientZkAdapter::ZkRowlockClientZkAdapter(RowlockClient* server_client, - const std::string& server_addr) - : client_(server_client), - server_addr_(server_addr) {} - -ZkRowlockClientZkAdapter::~ZkRowlockClientZkAdapter() { - ZooKeeperAdapter::Finalize(); -} - -bool ZkRowlockClientZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_zk_root_path; - std::string proxy_path = root_path + kRowlockProxyPath; - - int zk_errno = zk::ZE_OK;; - // init zk client - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "init zk success"; - - std::vector child; - std::vector value; - - while (!ListChildren(proxy_path, &child, &value, &zk_errno)) { - LOG(ERROR) << "fail to get proxy addr : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - client_->Update(value); - return true; -} - -} // namespace observer -} // namespace tera \ No newline at end of file diff --git a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h deleted file mode 100644 index 76a388895..000000000 --- a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ -#define TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ - -#include "zk/zk_adapter.h" - -namespace tera { -namespace observer { - -class RowlockClient; - -class ZkRowlockClientZkAdapter : public zk::ZooKeeperLightAdapter { -public: - ZkRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); - virtual ~ZkRowlockClientZkAdapter(); - virtual bool Init(); - -private: - RowlockClient* client_; - std::string server_addr_; -}; - -} // namespace observer -} // namespace tera - -#endif // TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ diff --git a/src/observer/rowlockproxy/remote_rowlock_proxy.cc b/src/observer/rowlockproxy/remote_rowlock_proxy.cc index 845d30fbe..7a37d2734 100644 --- a/src/observer/rowlockproxy/remote_rowlock_proxy.cc +++ b/src/observer/rowlockproxy/remote_rowlock_proxy.cc @@ -1,7 +1,7 @@ // Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - + #include "observer/rowlockproxy/remote_rowlock_proxy.h" #include "gflags/gflags.h" @@ -11,26 +11,22 @@ DECLARE_int32(rowlock_thread_max_num); namespace tera { namespace observer { -RemoteRowlockProxy::RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl) : - rowlock_proxy_impl_(rowlock_proxy_impl) { -} +RemoteRowlockProxy::RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl) + : rowlock_proxy_impl_(rowlock_proxy_impl) {} -RemoteRowlockProxy::~RemoteRowlockProxy() { -} +RemoteRowlockProxy::~RemoteRowlockProxy() {} void RemoteRowlockProxy::Lock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done) { - rowlock_proxy_impl_->TryLock(request, response, done); + const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done) { + rowlock_proxy_impl_->TryLock(request, response, done); } void RemoteRowlockProxy::UnLock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done) { - rowlock_proxy_impl_->UnLock(request, response, done); + const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done) { + rowlock_proxy_impl_->UnLock(request, response, done); } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlockproxy/remote_rowlock_proxy.h b/src/observer/rowlockproxy/remote_rowlock_proxy.h index df8e2c2b8..7483b5d9b 100644 --- a/src/observer/rowlockproxy/remote_rowlock_proxy.h +++ b/src/observer/rowlockproxy/remote_rowlock_proxy.h @@ -15,24 +15,20 @@ namespace tera { namespace observer { class RemoteRowlockProxy : public RowlockService { -public: - explicit RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl); - ~RemoteRowlockProxy(); - - void Lock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); - - void UnLock(google::protobuf::RpcController* controller, - const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); - -private: - RowlockProxyImpl* rowlock_proxy_impl_; + public: + explicit RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl); + ~RemoteRowlockProxy(); + + void Lock(google::protobuf::RpcController* controller, const RowlockRequest* request, + RowlockResponse* response, google::protobuf::Closure* done); + + void UnLock(google::protobuf::RpcController* controller, const RowlockRequest* request, + RowlockResponse* response, google::protobuf::Closure* done); + + private: + RowlockProxyImpl* rowlock_proxy_impl_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKPROXY_REMOTE_ROWLOCK_PROXY_H_ diff --git a/src/observer/rowlockproxy/rowlock_proxy_entry.cc b/src/observer/rowlockproxy/rowlock_proxy_entry.cc index e9f19faa0..6bda88442 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_entry.cc +++ b/src/observer/rowlockproxy/rowlock_proxy_entry.cc @@ -21,59 +21,55 @@ DECLARE_string(rowlock_proxy_port); DECLARE_int32(rowlock_io_service_pool_size); DECLARE_int32(rowlock_rpc_work_thread_num); -std::string GetTeraEntryName() { - return "rowlock_proxy"; -} +std::string GetTeraEntryName() { return "rowlock_proxy"; } -tera::TeraEntry* GetTeraEntry() { - return new tera::observer::RowlockProxyEntry(); -} +tera::TeraEntry* GetTeraEntry() { return new tera::observer::RowlockProxyEntry(); } namespace tera { namespace observer { RowlockProxyEntry::RowlockProxyEntry() { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_options.max_throughput_in = -1; - rpc_options.max_throughput_out = -1; - rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; - rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; - rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = -1; + rpc_options.max_throughput_out = -1; + rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); } RowlockProxyEntry::~RowlockProxyEntry() {} bool RowlockProxyEntry::StartServer() { - IpAddress rowlock_proxy_addr("0.0.0.0", FLAGS_rowlock_proxy_port); - LOG(INFO) << "Start RPC server at: " << rowlock_proxy_addr.ToString(); - rowlock_proxy_impl_.reset(new RowlockProxyImpl()); - remote_rowlock_proxy_ = new RemoteRowlockProxy(rowlock_proxy_impl_.get()); - rpc_server_->RegisterService(remote_rowlock_proxy_); - if (!rpc_server_->Start(rowlock_proxy_addr.ToString())) { - LOG(ERROR) << "start RPC server error"; - return false; - } - if (!rowlock_proxy_impl_->Init()) { - LOG(ERROR) << "fail to init rowlocknode_impl"; - return false; - } - LOG(INFO) << "finish starting RPC server"; + IpAddress rowlock_proxy_addr("0.0.0.0", FLAGS_rowlock_proxy_port); + LOG(INFO) << "Start RPC server at: " << rowlock_proxy_addr.ToString(); + rowlock_proxy_impl_.reset(new RowlockProxyImpl()); + remote_rowlock_proxy_ = new RemoteRowlockProxy(rowlock_proxy_impl_.get()); + rpc_server_->RegisterService(remote_rowlock_proxy_); + if (!rpc_server_->Start(rowlock_proxy_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + if (!rowlock_proxy_impl_->Init()) { + LOG(ERROR) << "fail to init rowlocknode_impl"; + return false; + } + LOG(INFO) << "finish starting RPC server"; - return true; + return true; } void RowlockProxyEntry::ShutdownServer() { - LOG(INFO) << "shut down server"; - rpc_server_->Stop(); + LOG(INFO) << "shut down server"; + rpc_server_->Stop(); - LOG(INFO) << "RowlockProxyEntry stop done!"; - _exit(0); + LOG(INFO) << "RowlockProxyEntry stop done!"; + _exit(0); } bool RowlockProxyEntry::Run() { - ThisThread::Sleep(1000); - return true; + ThisThread::Sleep(1000); + return true; } -} // namespace observer -} // namespace tera \ No newline at end of file +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/rowlockproxy/rowlock_proxy_entry.h b/src/observer/rowlockproxy/rowlock_proxy_entry.h index 547cf8d04..76690d368 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_entry.h +++ b/src/observer/rowlockproxy/rowlock_proxy_entry.h @@ -11,27 +11,27 @@ #include "observer/rowlockproxy/remote_rowlock_proxy.h" #include "observer/rowlockproxy/rowlock_proxy_impl.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" namespace tera { namespace observer { class RowlockProxyEntry : public tera::TeraEntry { -public: - RowlockProxyEntry(); - virtual ~RowlockProxyEntry(); - - virtual bool StartServer(); - virtual bool Run(); - virtual void ShutdownServer(); - -private: - std::unique_ptr rowlock_proxy_impl_; - RemoteRowlockProxy* remote_rowlock_proxy_; - std::unique_ptr rpc_server_; + public: + RowlockProxyEntry(); + virtual ~RowlockProxyEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + + private: + std::unique_ptr rowlock_proxy_impl_; + RemoteRowlockProxy* remote_rowlock_proxy_; + std::unique_ptr rpc_server_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera -#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ENTRY_H_ \ No newline at end of file +#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ENTRY_H_ diff --git a/src/observer/rowlockproxy/rowlock_proxy_impl.cc b/src/observer/rowlockproxy/rowlock_proxy_impl.cc index f19a0d46b..5a9d9a9b6 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_impl.cc +++ b/src/observer/rowlockproxy/rowlock_proxy_impl.cc @@ -18,97 +18,84 @@ namespace tera { namespace observer { RowlockProxyImpl::RowlockProxyImpl() - : server_addrs_(new std::vector), - server_number_(1) {} + : server_addrs_(new std::vector), server_number_(1) {} -RowlockProxyImpl::~RowlockProxyImpl() { -} +RowlockProxyImpl::~RowlockProxyImpl() {} bool RowlockProxyImpl::Init() { - if (FLAGS_tera_coord_type == "zk") { - zk_adapter_.reset(new RowlockProxyZkAdapter(this, - tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); - } else { - zk_adapter_.reset(new InsRowlockProxyZkAdapter(this, - tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); - } - - if (!zk_adapter_->Init()) { - LOG(ERROR) << "init zk adapter fail"; - return false; - } - - LOG(INFO) << "Rowlock node init finish"; - return true; + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new RowlockProxyZkAdapter( + this, tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); + } else { + zk_adapter_.reset(new InsRowlockProxyZkAdapter( + this, tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); + } + + if (!zk_adapter_->Init()) { + LOG(ERROR) << "init zk adapter fail"; + return false; + } + + LOG(INFO) << "Rowlock node init finish"; + return true; } -void RowlockProxyImpl::TryLock(const RowlockRequest* request, - RowlockResponse* response, +void RowlockProxyImpl::TryLock(const RowlockRequest* request, RowlockResponse* response, google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); + std::string addr = ScheduleRowKey(rowlock_key); - uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); - std::string addr = ScheduleRowKey(rowlock_key); - - RowlockStub client(addr); - client.TryLock(request, response); - VLOG(12) << "lock row: " << rowlock_key; - done->Run(); + RowlockStub client(addr); + client.TryLock(request, response); + VLOG(12) << "lock row: " << rowlock_key; + done->Run(); } -void RowlockProxyImpl::UnLock(const RowlockRequest* request, - RowlockResponse* response, +void RowlockProxyImpl::UnLock(const RowlockRequest* request, RowlockResponse* response, google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); + std::string addr = ScheduleRowKey(rowlock_key); - uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); - std::string addr = ScheduleRowKey(rowlock_key); - - RowlockStub client(addr); - client.UnLock(request, response); - VLOG(12) << "unlock row: " << rowlock_key; - done->Run(); - + RowlockStub client(addr); + client.UnLock(request, response); + VLOG(12) << "unlock row: " << rowlock_key; + done->Run(); } -uint64_t RowlockProxyImpl::GetRowKey(const std::string& table_name, - const std::string& row) const { - std::string rowkey_str = table_name + row; - return std::hash()(rowkey_str); +uint64_t RowlockProxyImpl::GetRowKey(const std::string& table_name, const std::string& row) const { + std::string rowkey_str = table_name + row; + return std::hash()(rowkey_str); } std::string RowlockProxyImpl::ScheduleRowKey(uint64_t row_key) { - std::shared_ptr> server_addrs_copy; + std::shared_ptr> server_addrs_copy; - MutexLock locker(&server_addrs_mutex_); - // copy for copy-on-write, ref +1 - server_addrs_copy = server_addrs_; + MutexLock locker(&server_addrs_mutex_); + // copy for copy-on-write, ref +1 + server_addrs_copy = server_addrs_; - return (*server_addrs_copy)[row_key % server_number_]; + return (*server_addrs_copy)[row_key % server_number_]; } void RowlockProxyImpl::SetServerNumber(uint32_t number) { - MutexLock locker(&server_addrs_mutex_); + MutexLock locker(&server_addrs_mutex_); - server_number_ = number; + server_number_ = number; - if (server_addrs_->size() < number) { - server_addrs_->resize(number); - } + if (server_addrs_->size() < number) { + server_addrs_->resize(number); + } } void RowlockProxyImpl::UpdateServers(uint32_t id, const std::string& addr) { - // update data first - { - MutexLock locker(&server_addrs_mutex_); - (*server_addrs_)[id] = addr; - } -} - -uint32_t RowlockProxyImpl::GetServerNumber() { - return server_number_; + // update data first + { + MutexLock locker(&server_addrs_mutex_); + (*server_addrs_)[id] = addr; + } } -} // namespace observer -} // namespace tera - - +uint32_t RowlockProxyImpl::GetServerNumber() { return server_number_; } +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlockproxy/rowlock_proxy_impl.h b/src/observer/rowlockproxy/rowlock_proxy_impl.h index 687e0fc42..a63443f64 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_impl.h +++ b/src/observer/rowlockproxy/rowlock_proxy_impl.h @@ -23,48 +23,43 @@ class RowlockProxyZkAdapterBase; class RowLockStub; class RowlockProxyImpl { -public: - RowlockProxyImpl(); - ~RowlockProxyImpl(); + public: + RowlockProxyImpl(); + ~RowlockProxyImpl(); - bool Init(); + bool Init(); - void TryLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); + void TryLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done); - void UnLock(const RowlockRequest* request, - RowlockResponse* response, - google::protobuf::Closure* done); + void UnLock(const RowlockRequest* request, RowlockResponse* response, + google::protobuf::Closure* done); - // for zk - void SetServerNumber(uint32_t number); - uint32_t GetServerNumber(); - void UpdateServers(uint32_t id, const std::string& addr); -private: - uint64_t GetRowKey(const std::string& table_name, - const std::string& row) const; - // rowkey -> server addr - std::string ScheduleRowKey(uint64_t row_key); - void ProxyCallBack(google::protobuf::Closure* done, - const RowlockRequest* request, - RowlockResponse* response, - bool failed, - int error_code); + // for zk + void SetServerNumber(uint32_t number); + uint32_t GetServerNumber(); + void UpdateServers(uint32_t id, const std::string& addr); -private: - common::Mutex server_addrs_mutex_; - // a map from virtual node to server addr - // key: vector index, virtual node number - // value: vector value, server address - // shared_ptr: used for copy-on-write - std::shared_ptr> server_addrs_; + private: + uint64_t GetRowKey(const std::string& table_name, const std::string& row) const; + // rowkey -> server addr + std::string ScheduleRowKey(uint64_t row_key); + void ProxyCallBack(google::protobuf::Closure* done, const RowlockRequest* request, + RowlockResponse* response, bool failed, int error_code); - uint32_t server_number_; - std::unique_ptr zk_adapter_; + private: + common::Mutex server_addrs_mutex_; + // a map from virtual node to server addr + // key: vector index, virtual node number + // value: vector value, server address + // shared_ptr: used for copy-on-write + std::shared_ptr> server_addrs_; + + uint32_t server_number_; + std::unique_ptr zk_adapter_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera #endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_IMPL_H_ diff --git a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc index 290c6815c..011b27d7f 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc +++ b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc @@ -15,7 +15,7 @@ DECLARE_string(rowlock_zk_root_path); DECLARE_string(tera_zk_addr_list); DECLARE_int32(rowlock_server_node_num); -DECLARE_int64(tera_zk_retry_period); +DECLARE_int64(tera_zk_retry_period); DECLARE_int32(tera_zk_timeout); DECLARE_int32(tera_zk_retry_max_times); @@ -25,387 +25,375 @@ DECLARE_string(tera_ins_addr_list); namespace tera { namespace observer { -RowlockProxyZkAdapter::RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, - const std::string& server_addr) - : rowlock_proxy_impl_(rowlock_proxy_impl), - server_addr_(server_addr) {} +RowlockProxyZkAdapter::RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, + const std::string& server_addr) + : rowlock_proxy_impl_(rowlock_proxy_impl), server_addr_(server_addr) {} bool RowlockProxyZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_zk_root_path; - std::string node_num_key = root_path + kRowlockNodeNumPath; - std::string id_lock_path; - std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; - - int zk_errno = zk::ZE_OK; - int32_t retry_count = 0; - // init zk client - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); - return false; - } - - LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "init zk success"; - - // get session id - int64_t session_id_int = 0; - if (!GetSessionId(&session_id_int, &zk_errno)) { - LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); - return false; - } - - bool is_exist = false; - uint32_t node_num; - while(!is_exist) { - CheckExist(node_num_key, &is_exist, &zk_errno); - if (!is_exist) { - LOG(ERROR) << "rowlock service number node not found: " << node_num_key - << " make sure rowlock zk available"; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - } - std::string value; - ReadAndWatchNode(node_num_key, &value, &zk_errno); - - if (!StringToNumber(value, &node_num)) { - LOG(ERROR) << "read number node fail"; - return false; + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + std::string id_lock_path; + std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; + + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_rowlock_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; } - - rowlock_proxy_impl_->SetServerNumber(node_num); - retry_count = 0; - id_lock_path = root_path + kRowlockNodeIdListPath; - std::vector name_list; - std::vector data_list; - - while (!ListAndWatchChildren(id_lock_path, &name_list, &data_list, - &zk_errno) || name_list.size() != node_num) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; - return false; - } - LOG(ERROR) << "retry watch rowlock server list in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count - << " node_num: " << node_num << " list size: " << name_list.size(); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + + // get session id + int64_t session_id_int = 0; + if (!GetSessionId(&session_id_int, &zk_errno)) { + LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + return false; + } + + bool is_exist = false; + uint32_t node_num; + while (!is_exist) { + CheckExist(node_num_key, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock service number node not found: " << node_num_key + << " make sure rowlock zk available"; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); } - size_t list_count = name_list.size(); - for (size_t i = 0; i < list_count; i++) { - const std::string& name = name_list[i]; - const std::string& data = data_list[i]; - - uint32_t id; - StringToNumber(name, &id); - rowlock_proxy_impl_->UpdateServers(id, data); + } + std::string value; + ReadAndWatchNode(node_num_key, &value, &zk_errno); + + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return false; + } + + rowlock_proxy_impl_->SetServerNumber(node_num); + + retry_count = 0; + id_lock_path = root_path + kRowlockNodeIdListPath; + std::vector name_list; + std::vector data_list; + + while (!ListAndWatchChildren(id_lock_path, &name_list, &data_list, &zk_errno) || + name_list.size() != node_num) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; + return false; } - - // create proxy node - retry_count = 0; - while (!CreateEphemeralNode(proxy_path, server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create proxy node"; - return false; - } - LOG(ERROR) << "retry create rowlock number node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + LOG(ERROR) << "retry watch rowlock server list in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count << " node_num: " << node_num + << " list size: " << name_list.size(); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = name_list[i]; + const std::string& data = data_list[i]; + + uint32_t id; + StringToNumber(name, &id); + rowlock_proxy_impl_->UpdateServers(id, data); + } + + // create proxy node + retry_count = 0; + while (!CreateEphemeralNode(proxy_path, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create proxy node"; + return false; } - return true; + LOG(ERROR) << "retry create rowlock number node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + return true; } -void RowlockProxyZkAdapter::OnNodeValueChanged(const std::string& path, - const std::string& value) { - std::string value_str; - int zk_errno = zk::ZE_OK; - std::string node_num_key = FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath; - - if (path == node_num_key) { - LOG(WARNING) << "rowlock service server number changed to " << value; - uint32_t node_num; - StringToNumber(value, &node_num); - rowlock_proxy_impl_->SetServerNumber(node_num); - ReadAndWatchNode(node_num_key, &value_str, &zk_errno); - } +void RowlockProxyZkAdapter::OnNodeValueChanged(const std::string& path, const std::string& value) { + std::string value_str; + int zk_errno = zk::ZE_OK; + std::string node_num_key = FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath; + + if (path == node_num_key) { + LOG(WARNING) << "rowlock service server number changed to " << value; + uint32_t node_num; + StringToNumber(value, &node_num); + rowlock_proxy_impl_->SetServerNumber(node_num); + ReadAndWatchNode(node_num_key, &value_str, &zk_errno); + } } -void RowlockProxyZkAdapter::OnWatchFailed(const std::string& path, int watch_type, - int err) { - LOG(ERROR) << "watch failed ! " << path; - _Exit(EXIT_FAILURE); +void RowlockProxyZkAdapter::OnWatchFailed(const std::string& path, int watch_type, int err) { + LOG(ERROR) << "watch failed ! " << path; + _Exit(EXIT_FAILURE); } void RowlockProxyZkAdapter::OnSessionTimeout() { - LOG(ERROR) << "zk session timeout!"; - _Exit(EXIT_FAILURE); + LOG(ERROR) << "zk session timeout!"; + _Exit(EXIT_FAILURE); } void RowlockProxyZkAdapter::OnNodeCreated(const std::string& path) { - std::string value; - int zk_errno = zk::ZE_OK; - - if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { - LOG(WARNING) << "rowlock service number node create"; - ReadAndWatchNode(path, &value, &zk_errno); - uint32_t node_num; - StringToNumber(value, &node_num); - rowlock_proxy_impl_->SetServerNumber(node_num); - } else { - std::string id_str = path.substr(path.find_last_of("/"), - path.size() - path.find_last_of("/")); - uint32_t id; - StringToNumber(id_str, &id); - ReadAndWatchNode(path, &value, &zk_errno); - rowlock_proxy_impl_->UpdateServers(id, value); - } + std::string value; + int zk_errno = zk::ZE_OK; + + if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { + LOG(WARNING) << "rowlock service number node create"; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + StringToNumber(value, &node_num); + rowlock_proxy_impl_->SetServerNumber(node_num); + } else { + std::string id_str = path.substr(path.find_last_of("/"), path.size() - path.find_last_of("/")); + uint32_t id; + StringToNumber(id_str, &id); + ReadAndWatchNode(path, &value, &zk_errno); + rowlock_proxy_impl_->UpdateServers(id, value); + } } void RowlockProxyZkAdapter::OnNodeDeleted(const std::string& path) { - LOG(ERROR) << "node deleted: " << path; - - int zk_errno = zk::ZE_OK; - bool is_exist = false; - if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { - while(!is_exist) { - CheckExist(path, &is_exist, &zk_errno); - if (!is_exist) { - LOG(ERROR) << "rowlock service number node not found: " << path - << " make sure rowlock zk available"; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - - std::string value; - ReadAndWatchNode(path, &value, &zk_errno); - uint32_t node_num; - if (!StringToNumber(value, &node_num)) { - LOG(ERROR) << "read number node fail"; - return; - } - - rowlock_proxy_impl_->SetServerNumber(node_num); - } - return; - } - // server node - std::string id_str = path.substr(path.find_last_of("/"), - path.size() - path.find_last_of("/")); - uint32_t id; - StringToNumber(id_str, &id); - - if (id >= rowlock_proxy_impl_->GetServerNumber()) { - return; - } - - while(!is_exist) { - CheckExist(path, &is_exist, &zk_errno); - if (!is_exist) { - LOG(ERROR) << "rowlock server node not found: " << path; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - - std::string value; - ReadAndWatchNode(path, &value, &zk_errno); - uint32_t node_num; - if (!StringToNumber(value, &node_num)) { - LOG(ERROR) << "read number node fail"; - return; - } - - rowlock_proxy_impl_->UpdateServers(node_num, value); - } + LOG(ERROR) << "node deleted: " << path; + + int zk_errno = zk::ZE_OK; + bool is_exist = false; + if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { + while (!is_exist) { + CheckExist(path, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock service number node not found: " << path + << " make sure rowlock zk available"; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + + std::string value; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + + rowlock_proxy_impl_->SetServerNumber(node_num); + } + return; + } + // server node + std::string id_str = path.substr(path.find_last_of("/"), path.size() - path.find_last_of("/")); + uint32_t id; + StringToNumber(id_str, &id); + + if (id >= rowlock_proxy_impl_->GetServerNumber()) { + return; + } + + while (!is_exist) { + CheckExist(path, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock server node not found: " << path; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + + std::string value; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + + rowlock_proxy_impl_->UpdateServers(node_num, value); + } } void RowlockProxyZkAdapter::OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) { - std::string root_path = FLAGS_rowlock_ins_root_path; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - std::string id_lock_path = root_path + kRowlockNodeIdListPath; - std::vector names; - std::vector datum; - - while (!ListAndWatchChildren(id_lock_path, &names, &datum, - &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; - _Exit(EXIT_FAILURE); - } - LOG(ERROR) << "retry watch rowlock server list in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - size_t list_count = name_list.size(); - for (size_t i = 0; i < list_count; i++) { - const std::string& name = names[i]; - const std::string& data = datum[i]; - - uint32_t id; - StringToNumber(name, &id); - rowlock_proxy_impl_->UpdateServers(id, data); + const std::vector& name_list, + const std::vector& data_list) { + std::string root_path = FLAGS_rowlock_ins_root_path; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + std::vector names; + std::vector datum; + + while (!ListAndWatchChildren(id_lock_path, &names, &datum, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; + _Exit(EXIT_FAILURE); } + LOG(ERROR) << "retry watch rowlock server list in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = names[i]; + const std::string& data = datum[i]; + + uint32_t id; + StringToNumber(name, &id); + rowlock_proxy_impl_->UpdateServers(id, data); + } } // ins -InsRowlockProxyZkAdapter::InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, - const std::string& server_addr) - : rowlock_proxy_impl_(rowlock_proxy_impl), - server_addr_(server_addr) {} +InsRowlockProxyZkAdapter::InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, + const std::string& server_addr) + : rowlock_proxy_impl_(rowlock_proxy_impl), server_addr_(server_addr) {} static void InsOnNumberChange(const galaxy::ins::sdk::WatchParam& param, - galaxy::ins::sdk::SDKError error) { - InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnValueChange(param.key, param.value); + galaxy::ins::sdk::SDKError error) { + InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnValueChange(param.key, param.value); } static void InsOnServerChange(const galaxy::ins::sdk::WatchParam& param, - galaxy::ins::sdk::SDKError error) { - InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnServerChange(); + galaxy::ins::sdk::SDKError error) { + InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnServerChange(); } bool InsRowlockProxyZkAdapter::Init() { - std::string root_path = FLAGS_rowlock_ins_root_path; - std::string node_num_key = root_path + kRowlockNodeNumPath; - std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; - std::string value; - galaxy::ins::sdk::SDKError err; - - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - - LOG(INFO) << "init ins success"; - - if (!ins_sdk_->Get(node_num_key, &value, &err)) { - LOG(ERROR) << "ins rowlock service number node not found: " << node_num_key - << " make sure rowlock ins available"; - return false; + std::string root_path = FLAGS_rowlock_ins_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; + std::string value; + galaxy::ins::sdk::SDKError err; + + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + + LOG(INFO) << "init ins success"; + + if (!ins_sdk_->Get(node_num_key, &value, &err)) { + LOG(ERROR) << "ins rowlock service number node not found: " << node_num_key + << " make sure rowlock ins available"; + return false; + } + + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return false; + } + rowlock_proxy_impl_->SetServerNumber(node_num); + + if (!ins_sdk_->Watch(node_num_key, InsOnNumberChange, this, &err)) { + LOG(ERROR) << "try to watch number node ,path=" << node_num_key << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } + + // read server addr + int32_t retry_count = 0; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + + while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { + LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," + << ins_sdk_->ErrorToString(err); + if (retry_count++ > FLAGS_tera_zk_retry_max_times) { + return false; } + } - uint32_t node_num; - if (!StringToNumber(value, &node_num)) { - LOG(ERROR) << "read number node fail"; - return false; - } - rowlock_proxy_impl_->SetServerNumber(node_num); + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path + "/!", id_lock_path + "/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string value = result->Value(); + std::string key = result->Key(); + VLOG(12) << "Key: " << key << " value: " << value; - if (!ins_sdk_->Watch(node_num_key, InsOnNumberChange, this, &err)) { - LOG(ERROR) << "try to watch number node ,path=" << node_num_key << " failed," - << ins_sdk_->ErrorToString(err); - return false; + uint32_t node_num; + uint32_t pos = key.find_last_of("/") + 1; + key = key.substr(pos, key.length() - pos); + VLOG(12) << "key: " << key; + if (!StringToNumber(key, &node_num)) { + LOG(ERROR) << "read number node fail"; + _Exit(EXIT_FAILURE); } - - // read server addr - int32_t retry_count = 0; - std::string id_lock_path = root_path + kRowlockNodeIdListPath; - - while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { - LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," - << ins_sdk_->ErrorToString(err); - if (retry_count++ > FLAGS_tera_zk_retry_max_times) { - return false; - } - } - - galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path+"/!", - id_lock_path+"/~"); - while (!result->Done()) { - CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); - std::string value = result->Value(); - std::string key = result->Key(); - VLOG(12) << "Key: " << key << " value: " << value; - - uint32_t node_num; - uint32_t pos = key.find_last_of("/") + 1; - key = key.substr(pos, key.length() - pos); - VLOG(12) << "key: " << key; - if (!StringToNumber(key, &node_num)) { - LOG(ERROR) << "read number node fail"; - _Exit(EXIT_FAILURE); - } - - rowlock_proxy_impl_->UpdateServers(node_num, value); - result->Next(); - } - delete result; - - // create proxy node - retry_count = 0; - while (!ins_sdk_->Put(proxy_path, server_addr_, &err)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create proxy node"; - return false; - } - LOG(ERROR) << "retry create rowlock number node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); + rowlock_proxy_impl_->UpdateServers(node_num, value); + result->Next(); + } + delete result; + + // create proxy node + retry_count = 0; + while (!ins_sdk_->Put(proxy_path, server_addr_, &err)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create proxy node"; + return false; } - return true; + LOG(ERROR) << "retry create rowlock number node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + return true; } void InsRowlockProxyZkAdapter::OnValueChange(const std::string& path, const std::string& value) { - uint32_t node_num; - galaxy::ins::sdk::SDKError err; + uint32_t node_num; + galaxy::ins::sdk::SDKError err; + + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + rowlock_proxy_impl_->SetServerNumber(node_num); + + if (!ins_sdk_->Watch(path, InsOnNumberChange, this, &err)) { + LOG(ERROR) << "try to watch number node ,path=" << path << " failed," + << ins_sdk_->ErrorToString(err); + return; + } +} - if (!StringToNumber(value, &node_num)) { - LOG(ERROR) << "read number node fail"; - return; - } - rowlock_proxy_impl_->SetServerNumber(node_num); +void InsRowlockProxyZkAdapter::OnServerChange() { + galaxy::ins::sdk::SDKError err; + std::string root_path = FLAGS_rowlock_ins_root_path; - if (!ins_sdk_->Watch(path, InsOnNumberChange, this, &err)) { - LOG(ERROR) << "try to watch number node ,path=" << path << " failed," - << ins_sdk_->ErrorToString(err); - return; + int32_t retry_count = 0; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + + while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { + LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," + << ins_sdk_->ErrorToString(err); + if (retry_count++ > FLAGS_tera_zk_retry_max_times) { + _Exit(EXIT_FAILURE); } -} + } -void InsRowlockProxyZkAdapter::OnServerChange() { - galaxy::ins::sdk::SDKError err; - std::string root_path = FLAGS_rowlock_ins_root_path; - - int32_t retry_count = 0; - std::string id_lock_path = root_path + kRowlockNodeIdListPath; - - while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { - LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," - << ins_sdk_->ErrorToString(err); - if (retry_count++ > FLAGS_tera_zk_retry_max_times) { - _Exit(EXIT_FAILURE); - } - } - - galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path+"/!", - id_lock_path+"/~"); - while (!result->Done()) { - CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); - std::string value = result->Value(); - std::string key = result->Key(); - - uint32_t node_num; - uint32_t pos = key.find_last_of("/") + 1; - key = key.substr(pos, key.length() - pos); - VLOG(12) << "key: " << key; - if (!StringToNumber(key, &node_num)) { - LOG(ERROR) << "read number node fail"; - _Exit(EXIT_FAILURE); - } - - rowlock_proxy_impl_->UpdateServers(node_num, value); - result->Next(); + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path + "/!", id_lock_path + "/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string value = result->Value(); + std::string key = result->Key(); + + uint32_t node_num; + uint32_t pos = key.find_last_of("/") + 1; + key = key.substr(pos, key.length() - pos); + VLOG(12) << "key: " << key; + if (!StringToNumber(key, &node_num)) { + LOG(ERROR) << "read number node fail"; + _Exit(EXIT_FAILURE); } - delete result; + + rowlock_proxy_impl_->UpdateServers(node_num, value); + result->Next(); + } + delete result; } -} // namespace observer -} // namespace tera \ No newline at end of file +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h index 02125135c..f8e18793d 100644 --- a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h +++ b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h @@ -10,10 +10,10 @@ namespace galaxy { namespace ins { namespace sdk { - class InsSDK; -} // namespace sdk -} // namespace ins -} // namespace galaxy +class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy namespace tera { namespace observer { @@ -21,63 +21,56 @@ namespace observer { class RowlockProxyImpl; class RowlockProxyZkAdapterBase : public zk::ZooKeeperAdapter { -public: - virtual ~RowlockProxyZkAdapterBase() {} - virtual bool Init() = 0; + public: + virtual ~RowlockProxyZkAdapterBase() {} + virtual bool Init() = 0; }; class RowlockProxyZkAdapter : public RowlockProxyZkAdapterBase { -public: - RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); - virtual ~RowlockProxyZkAdapter() {} - virtual bool Init(); - -protected: - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value); - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err); - virtual void OnNodeDeleted(const std::string& path); - virtual void OnSessionTimeout(); - virtual void OnNodeCreated(const std::string& path); - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list); - -private: - RowlockProxyImpl* rowlock_proxy_impl_; - std::string server_addr_; - + public: + RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); + virtual ~RowlockProxyZkAdapter() {} + virtual bool Init(); + + protected: + virtual void OnNodeValueChanged(const std::string& path, const std::string& value); + virtual void OnWatchFailed(const std::string& path, int watch_type, int err); + virtual void OnNodeDeleted(const std::string& path); + virtual void OnSessionTimeout(); + virtual void OnNodeCreated(const std::string& path); + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list); + + private: + RowlockProxyImpl* rowlock_proxy_impl_; + std::string server_addr_; }; class InsRowlockProxyZkAdapter : public RowlockProxyZkAdapterBase { -public: - InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); - virtual ~InsRowlockProxyZkAdapter() {} - virtual bool Init(); - - void OnValueChange(const std::string& path, const std::string& value); - void OnServerChange(); - -protected: - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnSessionTimeout() {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - -private: - RowlockProxyImpl* rowlock_proxy_impl_; - std::string server_addr_; - galaxy::ins::sdk::InsSDK* ins_sdk_; + public: + InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); + virtual ~InsRowlockProxyZkAdapter() {} + virtual bool Init(); + + void OnValueChange(const std::string& path, const std::string& value); + void OnServerChange(); + + protected: + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnSessionTimeout() {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + + private: + RowlockProxyImpl* rowlock_proxy_impl_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; }; -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera -#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ZK_ADAPTER_H_ \ No newline at end of file +#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ZK_ADAPTER_H_ diff --git a/src/observer/test/notification_test.cc b/src/observer/test/notification_test.cc new file mode 100644 index 000000000..c82dae488 --- /dev/null +++ b/src/observer/test/notification_test.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include +#include +#include + +#include "observer/executor/notification_impl.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class NotificationImplTest : public ::testing::Test { + public: + NotificationImplTest() : semaphore_(10), notify_cell_(nullptr) { + semaphore_.Acquire(); + notify_cell_.reset(new NotifyCell(semaphore_)); + notify_cell_->notify_transaction = nullptr; + } + + ~NotificationImplTest() {} + + NotificationImpl* GetNotification() { return new NotificationImpl(notify_cell_); } + + common::Semaphore semaphore_; + std::shared_ptr notify_cell_; +}; + +TEST_F(NotificationImplTest, SetAckCallBack) { + NotificationImpl* n = GetNotification(); + n->SetAckContext(n); + n->SetAckCallBack([](Notification* n1, const ErrorCode& err) { + NotificationImpl* n2 = (NotificationImpl*)(n1->GetAckContext()); + EXPECT_EQ(n1, n2); + EXPECT_EQ(err.GetType(), ErrorCode::kOK); + delete n1; + }); + ErrorCode ec; + n->ack_callback_(n, ec); +} + +TEST_F(NotificationImplTest, SetNotifyCallBack) { + NotificationImpl* n = GetNotification(); + n->SetNotifyContext(n); + n->SetNotifyCallBack([](Notification* n1, const ErrorCode& err) { + NotificationImpl* n2 = (NotificationImpl*)(n1->GetNotifyContext()); + EXPECT_EQ(n1, n2); + EXPECT_EQ(err.GetType(), ErrorCode::kOK); + delete n1; + }); + ErrorCode ec; + n->notify_callback_(n, ec); +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/test/observer_test.cc b/src/observer/test/observer_test.cc index dfde2dfbe..f38f6e7c1 100644 --- a/src/observer/test/observer_test.cc +++ b/src/observer/test/observer_test.cc @@ -11,9 +11,9 @@ #include "common/thread_pool.h" #include "common/semaphore.h" -#include "observer/executor/observer.h" +#include "observer/observer.h" #include "observer/executor/random_key_selector.h" -#include "observer/executor/scanner.h" +#include "observer/scanner.h" #include "observer/executor/scanner_impl.h" #include "observer/executor/notification_impl.h" #include "sdk/client_impl.h" @@ -40,721 +40,669 @@ namespace tera { namespace observer { class TestWorker : public Observer { -public: - TestWorker(): counter_(0), notified_(false) {} - virtual ~TestWorker() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - table_name_ = table_name; - family_ = family; - qualifier_ = qualifier; - row_ = row; - value_ = value; - - tera::ErrorCode err; - notified_ = true; - ++counter_; - - std::unique_ptr
table(client->OpenTable(table_name, &err)); - notification->Ack(table.get(), row, family, qualifier); - } + public: + TestWorker() : counter_(0), notified_(false) {} + virtual ~TestWorker() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + tera::ErrorCode err; + notified_ = true; + ++counter_; + + std::unique_ptr
table(client->OpenTable(table_name, &err)); + notification->Ack(table.get(), row, family, qualifier); + } + + virtual std::string GetObserverName() const { return "DemoObserver"; } + + virtual TransactionType GetTransactionType() const { return kGlobalTransaction; } + + private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class TestWorkerGTX : public Observer { + public: + TestWorkerGTX() : counter_(0), notified_(false) {} + virtual ~TestWorkerGTX() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestWorkerGTX] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + tera::ErrorCode err; + notified_ = true; + ++counter_; + + std::unique_ptr
table(client->OpenTable(table_name, &err)); + + // write ForwordIndex column + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put(family, qualifier + "_test", row + "_"); + t->ApplyMutation(mutation); + + tera::ErrorCode error; + t->Ack(table.get(), row, family, qualifier); + t->Commit(); + delete mutation; + } + + virtual std::string GetObserverName() const { return "DemoObserver"; } + + virtual TransactionType GetTransactionType() const { return kSingleRowTransaction; } + + private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class DemoObserver : public tera::observer::Observer { + public: + DemoObserver() {} + virtual ~DemoObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; + // do nothing + } + virtual std::string GetObserverName() const { return "DemoObserver"; } + virtual TransactionType GetTransactionType() const { return kGlobalTransaction; } +}; + +class TestWorkerNTX : public Observer { + public: + TestWorkerNTX() : counter_(0), notified_(false) {} + virtual ~TestWorkerNTX() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestWorkerNTX] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + notified_ = true; + ++counter_; + + // do something without transaction + } + + virtual std::string GetObserverName() const { return "DemoObserver"; } + + virtual TransactionType GetTransactionType() const { return kNoneTransaction; } + + private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class TestTxn : public SingleRowTxn { + public: + TestTxn(Table* table, const std::string& row_key, common::ThreadPool* thread_pool, + int64_t start_ts = 0) + : SingleRowTxn(static_cast(table)->GetTableImpl(), row_key, thread_pool), + start_timestamp_(start_ts) {} + ~TestTxn() {} + + virtual int64_t GetStartTimestamp() { return 10; } + + private: + int64_t start_timestamp_; +}; - virtual std::string GetObserverName() const { - return "DemoObserver"; +class ObserverImplTest : public ::testing::Test { + public: + void OnNotifyTest() { + tera::ErrorCode err; + LOG(ERROR) << "FALG FILE: " << FLAGS_flagfile; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; } - virtual TransactionType GetTransactionType() const { - return kGlobalTransaction; + // create table + tera::TableDescriptor table_desc("observer_test_table"); + table_desc.EnableTxn(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableGlobalTransaction(); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + return; } -private: - std::atomic counter_; - std::atomic notified_; - - std::string table_name_; - std::string family_; - std::string qualifier_; - std::string row_; - std::string value_; -}; -class TestWorkerGTX : public Observer { -public: - TestWorkerGTX(): counter_(0), notified_(false) {} - virtual ~TestWorkerGTX() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify TestWorkerGTX] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - table_name_ = table_name; - family_ = family; - qualifier_ = qualifier; - row_ = row; - value_ = value; - - tera::ErrorCode err; - notified_ = true; - ++counter_; - - std::unique_ptr
table(client->OpenTable(table_name, &err)); - - // write ForwordIndex column - tera::RowMutation* mutation = table->NewRowMutation(row); - mutation->Put(family, qualifier + "_test", row + "_"); - t->ApplyMutation(mutation); - - tera::ErrorCode error; - t->Ack(table.get(), row, family, qualifier); - t->Commit(); - delete mutation; + std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; } - virtual std::string GetObserverName() const { - return "DemoObserver"; + sleep(1); + std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); + + assert(t != NULL); + std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); + mu0->Put("_N_", "cf:Page", "I am not important"); + t->ApplyMutation(mu0.get()); + t->Commit(); + + std::unique_ptr g_txn(client->NewGlobalTransaction()); + assert(g_txn != NULL); + std::unique_ptr mu1(table->NewRowMutation("www.baidu.com")); + + mu1->Put("cf", "Page", "hello world", -1); + g_txn->ApplyMutation(mu1.get()); + g_txn->Commit(); + + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; } - virtual TransactionType GetTransactionType() const { - return kSingleRowTransaction; + // varibles for fake timeoracle + FLAGS_start_ts = 10; + FLAGS_begin_commit_ts = 1; + FLAGS_begin_prewrite_ts = 1; + FLAGS_end_prewrite_ts = 1; + FLAGS_commit_ts = 13; + + Observer* observer = new TestWorker(); + Observer* demo = new DemoObserver(); + + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); + EXPECT_EQ(true, ret); + if (!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; } -private: - std::atomic counter_; - std::atomic notified_; - - std::string table_name_; - std::string family_; - std::string qualifier_; - std::string row_; - std::string value_; -}; -class DemoObserver : public tera::observer::Observer { -public: - DemoObserver() {} - virtual ~DemoObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - // do nothing + err = scanner->Observe("observer_test_table", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + err = scanner->Observe("observer_test_table", "cf", "Page", demo); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + if (!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; } - virtual std::string GetObserverName() const { - return "DemoObserver"; + + while (!static_cast(observer)->notified_) { + sleep(1); } - virtual TransactionType GetTransactionType() const { - return kGlobalTransaction; + + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_test_table", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + + scanner->Exit(); + delete scanner; + } + + void SingleRowTransactionTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; } -}; -class TestWorkerNTX : public Observer { -public: - TestWorkerNTX(): counter_(0), notified_(false) {} - virtual ~TestWorkerNTX() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify TestWorkerNTX] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps=" << timestamp; - - table_name_ = table_name; - family_ = family; - qualifier_ = qualifier; - row_ = row; - value_ = value; - - notified_ = true; - ++counter_; - - // do something without transaction + // create table + tera::TableDescriptor table_desc("observer_table_gtx"); + table_desc.EnableTxn(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + return; } - virtual std::string GetObserverName() const { - return "DemoObserver"; + std::unique_ptr
table(client->OpenTable("observer_table_gtx", &err)); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; } - virtual TransactionType GetTransactionType() const { - return kNoneTransaction; + std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); + + assert(t != NULL); + std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); + mu0->Put("_N_", "cf:Page", "I am not important"); + mu0->Put("cf", "Page", "hello world", -1); + t->ApplyMutation(mu0.get()); + t->Commit(); + + if (t->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << t->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; } -private: - std::atomic counter_; - std::atomic notified_; - - std::string table_name_; - std::string family_; - std::string qualifier_; - std::string row_; - std::string value_; -}; -class TestTxn : public SingleRowTxn { -public: - TestTxn(Table* table, const std::string& row_key, - common::ThreadPool* thread_pool, int64_t start_ts = 0) - : SingleRowTxn(static_cast(table)->GetTableImpl(), row_key, thread_pool), - start_timestamp_(start_ts) {} - ~TestTxn() {} - - virtual int64_t GetStartTimestamp() { return 10; } -private: - int64_t start_timestamp_; -}; + Observer* observer = new TestWorkerGTX(); -class ObserverImplTest : public ::testing::Test { -public: - void OnNotifyTest() { - tera::ErrorCode err; - LOG(ERROR) << "FALG FILE: " << FLAGS_flagfile; - tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - // create table - tera::TableDescriptor table_desc("observer_test_table"); - table_desc.EnableTxn(); - - table_desc.AddLocalityGroup("lg1"); - tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); - cf1->EnableGlobalTransaction(); - cf1->EnableNotify(); - ExtendNotifyLgToDescriptor(&table_desc); - - client->CreateTable(table_desc, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "Create table fail"; - return; - } - - std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "open table failed"; - return; - } - - sleep(1); - std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); - - assert(t != NULL); - std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); - mu0->Put("_N_", "cf:Page", "I am not important"); - t->ApplyMutation(mu0.get()); - t->Commit(); - - std::unique_ptr g_txn(client->NewGlobalTransaction()); - assert(g_txn != NULL); - std::unique_ptr mu1(table->NewRowMutation("www.baidu.com")); - - mu1->Put("cf", "Page", "hello world", -1); - g_txn->ApplyMutation(mu1.get()); - g_txn->Commit(); - - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << g_txn->GetError().ToString() << std::endl; - } else { - std::cout << "commit success" << std::endl; - } - - // varibles for fake timeoracle - FLAGS_start_ts = 10; - FLAGS_begin_commit_ts = 1; - FLAGS_begin_prewrite_ts = 1; - FLAGS_end_prewrite_ts = 1; - FLAGS_commit_ts = 13; - - Observer* observer = new TestWorker(); - Observer* demo = new DemoObserver(); - - Scanner* scanner = new ScannerImpl(); - bool ret = scanner->Init(); - EXPECT_EQ(true, ret); - if(!ret) { - LOG(ERROR) << "fail to init scanner_impl"; - return; - } - - err = scanner->Observe("observer_test_table", "cf", "Page", observer); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - err = scanner->Observe("observer_test_table", "cf", "Page", demo); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - if(!scanner->Start()) { - LOG(ERROR) << "fail to start scanner_impl"; - return; - } - - while (!static_cast(observer)->notified_) { - sleep(1); - } - - - EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); - EXPECT_EQ("observer_test_table", static_cast(observer)->table_name_); - EXPECT_EQ("cf", static_cast(observer)->family_); - EXPECT_EQ("Page", static_cast(observer)->qualifier_); - EXPECT_EQ("hello world", static_cast(observer)->value_); - - scanner->Exit(); - delete scanner; + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); + + EXPECT_EQ(true, ret); + if (!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; } - void SingleRowTransactionTest() { - tera::ErrorCode err; - tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - // create table - tera::TableDescriptor table_desc("observer_table_gtx"); - table_desc.EnableTxn(); - - table_desc.AddLocalityGroup("lg1"); - tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); - cf1->EnableNotify(); - ExtendNotifyLgToDescriptor(&table_desc); - - client->CreateTable(table_desc, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "Create table fail"; - return; - } - - std::unique_ptr
table(client->OpenTable("observer_table_gtx", &err)); - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "open table failed"; - return; - } - - std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); - - assert(t != NULL); - std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); - mu0->Put("_N_", "cf:Page", "I am not important"); - mu0->Put("cf", "Page", "hello world", -1); - t->ApplyMutation(mu0.get()); - t->Commit(); - - if (t->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << t->GetError().ToString() << std::endl; - } else { - std::cout << "commit success" << std::endl; - } - - Observer* observer = new TestWorkerGTX(); - - Scanner* scanner = new ScannerImpl(); - bool ret = scanner->Init(); - - EXPECT_EQ(true, ret); - if(!ret) { - LOG(ERROR) << "fail to init scanner_impl"; - return; - } - - err = scanner->Observe("observer_table_gtx", "cf", "Page", observer); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - if(!scanner->Start()) { - LOG(ERROR) << "fail to start scanner_impl"; - return; - } - - while (!static_cast(observer)->notified_) { - sleep(1); - } - - EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); - EXPECT_EQ("observer_table_gtx", static_cast(observer)->table_name_); - EXPECT_EQ("cf", static_cast(observer)->family_); - EXPECT_EQ("Page", static_cast(observer)->qualifier_); - EXPECT_EQ("hello world", static_cast(observer)->value_); - LOG(ERROR) << "Finish"; - scanner->Exit(); - delete scanner; + err = scanner->Observe("observer_table_gtx", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + if (!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; } - void NonTransactionTest() { - tera::ErrorCode err; - tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - // create table - tera::TableDescriptor table_desc("observer_table_ntx"); - - table_desc.AddLocalityGroup("lg1"); - tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); - cf1->EnableNotify(); - ExtendNotifyLgToDescriptor(&table_desc); - - client->CreateTable(table_desc, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "Create table fail"; - return; - } - - std::unique_ptr
table(client->OpenTable("observer_table_ntx", &err)); - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "open table failed"; - return; - } - - table->Put("www.baidu.com", "_N_", "cf:Page", "I am not important", &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "put _N_ error"; - return; - } - table->Put("www.baidu.com", "cf", "Page", "hello world", -1, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "put cf error"; - return; - } - - Observer* observer = new TestWorkerNTX(); - - Scanner* scanner = new ScannerImpl(); - bool ret = scanner->Init(); - - EXPECT_EQ(true, ret); - if(!ret) { - LOG(ERROR) << "fail to init scanner_impl"; - return; - } - - err = scanner->Observe("observer_table_ntx", "cf", "Page", observer); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - if(!scanner->Start()) { - LOG(ERROR) << "fail to start scanner_impl"; - return; - } - - while (!static_cast(observer)->notified_) { - sleep(1); - } - - EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); - EXPECT_EQ("observer_table_ntx", static_cast(observer)->table_name_); - EXPECT_EQ("cf", static_cast(observer)->family_); - EXPECT_EQ("Page", static_cast(observer)->qualifier_); - EXPECT_EQ("hello world", static_cast(observer)->value_); - scanner->Exit(); - delete scanner; + while (!static_cast(observer)->notified_) { + sleep(1); + } + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_table_gtx", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + LOG(ERROR) << "Finish"; + scanner->Exit(); + delete scanner; + } + + void NonTransactionTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; } - void ObserveTest() { - tera::ErrorCode err; - tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - // create table - tera::TableDescriptor table_desc("observer_table"); - table_desc.EnableTxn(); - table_desc.AddLocalityGroup("notify"); - tera::ColumnFamilyDescriptor* cf_t = table_desc.AddColumnFamily(kNotifyColumnFamily, "notify"); - cf_t->EnableGlobalTransaction(); - - table_desc.AddLocalityGroup("lg1"); - tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); - cf1->EnableGlobalTransaction(); - cf1->EnableNotify(); - tera::ColumnFamilyDescriptor* cf2 = table_desc.AddColumnFamily("cf_1", "lg1"); - cf2->EnableGlobalTransaction(); - cf2->EnableNotify(); - - ExtendNotifyLgToDescriptor(&table_desc); - - client->CreateTable(table_desc, &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "Create table fail"; - } - - FLAGS_tera_sdk_client_for_gtxn = true; - FLAGS_tera_coord_type = "ins"; - common::ThreadPool thread_pool(5); - ScannerImpl* scanner = new ScannerImpl(); - Observer* observer = new DemoObserver(); - scanner->key_selector_.reset(new RandomKeySelector()); - - // single thread - - err = scanner->Observe("observer_table", "cf", "qualifier", observer); - EXPECT_TRUE(err.GetType() != tera::ErrorCode::kOK); - - scanner->tera_client_.reset(tera::Client::NewClient(FLAGS_flagfile, &err)); - EXPECT_EQ(scanner->table_observe_info_->size(), 0); - - err = scanner->Observe("observer_table", "cf", "qualifier", observer); - EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); - - err = scanner->Observe("observer_table", "cf", "qualifier", observer); - EXPECT_FALSE(err.GetType() == tera::ErrorCode::kOK); - - err = scanner->Observe("observer_table", "cf_1", "qualifier", observer); - EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); - - // multi thread - std::string qualifier; - - for (uint32_t i = 0; i < 10; ++i) { - qualifier += 'a'; - thread_pool.AddTask(std::bind(&ScannerImpl::Observe, scanner, "observer_table", "cf", qualifier, observer)); - } - thread_pool.Stop(true); - EXPECT_EQ(1, scanner->observers_.size()); - EXPECT_EQ(10 + 2, (*(scanner->table_observe_info_))["observer_table"].observe_columns.size()); - scanner->Exit(); - delete scanner; + // create table + tera::TableDescriptor table_desc("observer_table_ntx"); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + return; } - void ValidateCellValueTest() { - tera::ErrorCode err; - std::unique_ptr client(tera::Client::NewClient(FLAGS_flagfile, &err)); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - common::ThreadPool thread_pool(5); - ScannerImpl* scanner = new ScannerImpl(); - scanner->key_selector_.reset(new RandomKeySelector()); - std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); - - Observer* observer = new TestWorker(); - bool ret = scanner->Init(); - EXPECT_EQ(true, ret); - if(!ret) { - LOG(ERROR) << "fail to init scanner_impl"; - return; - } - err = scanner->Observe("observer_test_table", "Data", "Page", observer); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - ScannerImpl::NotificationContext* context = new ScannerImpl::NotificationContext(); - common::Semaphore s(1); - s.Acquire(); - std::shared_ptr notify_cell(new NotifyCell(s)); - context->notify_cell = notify_cell; - Column column = {"observer_test_table", "Data", "qu"}; - notify_cell->row = "row"; - notify_cell->observed_column = column; - notify_cell->table = table.get(); - - // no value - RowReader* row_reader(table->NewRowReader("no_row")); - row_reader->SetContext(context); - row_reader->AddColumn("Data", "qu"); - table->Get(row_reader); - scanner->ValidateCellValue(row_reader); - sleep(1); - EXPECT_FALSE(static_cast(observer)->notified_); - - // no table - table->Put("row1", "Data", "qu", "value", &err); - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "put error: " << err.GetReason(); - return; - } - row_reader = table->NewRowReader("row1"); - context = new ScannerImpl::NotificationContext(); - context->notify_cell = notify_cell; - notify_cell->row = "row 1"; - - row_reader->SetContext(context); - table->Get(row_reader); - scanner->ValidateCellValue(row_reader); - sleep(1); - EXPECT_FALSE(static_cast(observer)->notified_); - - // no column - column = {"observer_test_table", "Data", "qu"}; - row_reader = table->NewRowReader("row1"); - context = new ScannerImpl::NotificationContext(); - context->notify_cell = notify_cell; - notify_cell->row = "row 1"; - row_reader->SetContext(context); - - table->Get(row_reader); - scanner->ValidateCellValue(row_reader); - sleep(1); - EXPECT_FALSE(static_cast(observer)->notified_); - - scanner->Exit(); - delete scanner; + std::unique_ptr
table(client->OpenTable("observer_table_ntx", &err)); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; } - void ValidateAckConfilictTest() { - tera::ErrorCode err; - std::unique_ptr client(tera::Client::NewClient(FLAGS_flagfile, &err)); - // for ut test - EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); - // for no core - if (tera::ErrorCode::kOK != err.GetType()) { - LOG(ERROR) << "new client failed"; - return; - } - - common::ThreadPool thread_pool(5); - ScannerImpl* scanner = new ScannerImpl(); - scanner->key_selector_.reset(new RandomKeySelector()); - std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); - - Observer* observer = new TestWorker(); - bool ret = scanner->Init(); - EXPECT_EQ(true, ret); - if(!ret) { - LOG(ERROR) << "fail to init scanner_impl"; - return; - } - err = scanner->Observe("observer_test_table", "cf", "qu", observer); - EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); - - ScannerImpl::NotificationContext* context = new ScannerImpl::NotificationContext(); - common::Semaphore s(1); - s.Acquire(); - std::shared_ptr notify_cell(new NotifyCell(s)); - context->notify_cell = notify_cell; - Column column = {"observer_test_table", "cf", "qu"}; - notify_cell->row = "row1"; - notify_cell->observed_column = column; - notify_cell->timestamp = 1; - - // no value - RowReader* row_reader = table->NewRowReader("row1"); - row_reader->AddColumn("cf", "qu"); - row_reader->SetContext(context); - - // wrong value, context deleted - table->Put("row1", "cf", "qu", "!#%E^E%&$&$%&$^", &err); - table->Get(row_reader); - scanner->ValidateAckConfict(row_reader); - sleep(1); - EXPECT_FALSE(static_cast(observer)->notified_); - - // wrong ts - table->Put("row2", "cf", "qu", "10", &err); - context = new ScannerImpl::NotificationContext(); - row_reader = table->NewRowReader("row2"); - row_reader->AddColumn("cf", "qu"); - row_reader->SetContext(context); - notify_cell->row = "row2"; - context->notify_cell = notify_cell; - notify_cell->notify_transaction.reset(new TestTxn(table.get(), "row2", &thread_pool, 10)); - - table->Get(row_reader); - scanner->ValidateAckConfict(row_reader); - sleep(1); - EXPECT_FALSE(static_cast(observer)->notified_); - - scanner->Exit(); - delete scanner; + table->Put("www.baidu.com", "_N_", "cf:Page", "I am not important", &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "put _N_ error"; + return; + } + table->Put("www.baidu.com", "cf", "Page", "hello world", -1, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "put cf error"; + return; } -}; -TEST_F(ObserverImplTest, OnNotifyTest) { - OnNotifyTest(); -} + Observer* observer = new TestWorkerNTX(); -TEST_F(ObserverImplTest, SingleRowTransactionTest) { - SingleRowTransactionTest(); -} + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); -TEST_F(ObserverImplTest, NoneTransactionTest) { - NonTransactionTest(); -} + EXPECT_EQ(true, ret); + if (!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; + } -TEST_F(ObserverImplTest, ObserveTest) { - ObserveTest(); -} + err = scanner->Observe("observer_table_ntx", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); -TEST_F(ObserverImplTest, ValidateCellValue) { - ValidateCellValueTest(); -} + if (!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; + } -TEST_F(ObserverImplTest, ValidateAckConfilict) { - ValidateAckConfilictTest(); -} + while (!static_cast(observer)->notified_) { + sleep(1); + } -} // namespace observer -} // namespace tera + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_table_ntx", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + scanner->Exit(); + delete scanner; + } + + void ObserveTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } -int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); + // create table + tera::TableDescriptor table_desc("observer_table"); + table_desc.EnableTxn(); + table_desc.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cf_t = table_desc.AddColumnFamily(kNotifyColumnFamily, "notify"); + cf_t->EnableGlobalTransaction(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableGlobalTransaction(); + cf1->EnableNotify(); + tera::ColumnFamilyDescriptor* cf2 = table_desc.AddColumnFamily("cf_1", "lg1"); + cf2->EnableGlobalTransaction(); + cf2->EnableNotify(); + + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + } - FLAGS_mock_rowlock_enable = true; FLAGS_tera_sdk_client_for_gtxn = true; - FLAGS_tera_gtxn_test_opened = false; - FLAGS_tera_sdk_tso_client_enabled = false; - FLAGS_observer_scanner_thread_num = 1; - int ret = RUN_ALL_TESTS(); + FLAGS_tera_coord_type = "ins"; + common::ThreadPool thread_pool(5); + ScannerImpl* scanner = new ScannerImpl(); + Observer* observer = new DemoObserver(); + scanner->key_selector_.reset(new RandomKeySelector()); + + // single thread + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_TRUE(err.GetType() != tera::ErrorCode::kOK); + + scanner->tera_client_.reset(tera::Client::NewClient(FLAGS_flagfile, &err)); + EXPECT_EQ(scanner->table_observe_info_->size(), 0); + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_FALSE(err.GetType() == tera::ErrorCode::kOK); + + err = scanner->Observe("observer_table", "cf_1", "qualifier", observer); + EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); + + // multi thread + std::string qualifier; + + for (uint32_t i = 0; i < 10; ++i) { + qualifier += 'a'; + thread_pool.AddTask( + std::bind(&ScannerImpl::Observe, scanner, "observer_table", "cf", qualifier, observer)); + } + thread_pool.Stop(true); + EXPECT_EQ(1, scanner->observers_.size()); + EXPECT_EQ(10 + 2, (*(scanner->table_observe_info_))["observer_table"].observe_columns.size()); + scanner->Exit(); + delete scanner; + } + + void ValidateCellValueTest() { + tera::ErrorCode err; + std::unique_ptr client(tera::Client::NewClient(FLAGS_flagfile, &err)); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } + + common::ThreadPool thread_pool(5); + ScannerImpl* scanner = new ScannerImpl(); + scanner->key_selector_.reset(new RandomKeySelector()); + std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); + + Observer* observer = new TestWorker(); + bool ret = scanner->Init(); + EXPECT_EQ(true, ret); if (!ret) { - exit(EXIT_FAILURE); + LOG(ERROR) << "fail to init scanner_impl"; + return; + } + err = scanner->Observe("observer_test_table", "Data", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + ScannerImpl::NotificationContext* context = new ScannerImpl::NotificationContext(); + common::Semaphore s(1); + s.Acquire(); + std::shared_ptr notify_cell(new NotifyCell(s)); + context->notify_cell = notify_cell; + Column column = {"observer_test_table", "Data", "qu"}; + notify_cell->row = "row"; + notify_cell->observed_column = column; + notify_cell->table = table.get(); + + // no value + RowReader* row_reader(table->NewRowReader("no_row")); + row_reader->SetContext(context); + row_reader->AddColumn("Data", "qu"); + table->Get(row_reader); + scanner->ValidateCellValue(row_reader); + sleep(1); + EXPECT_FALSE(static_cast(observer)->notified_); + + // no table + table->Put("row1", "Data", "qu", "value", &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "put error: " << err.GetReason(); + return; + } + row_reader = table->NewRowReader("row1"); + context = new ScannerImpl::NotificationContext(); + context->notify_cell = notify_cell; + notify_cell->row = "row 1"; + + row_reader->SetContext(context); + table->Get(row_reader); + scanner->ValidateCellValue(row_reader); + sleep(1); + EXPECT_FALSE(static_cast(observer)->notified_); + + // no column + column = {"observer_test_table", "Data", "qu"}; + row_reader = table->NewRowReader("row1"); + context = new ScannerImpl::NotificationContext(); + context->notify_cell = notify_cell; + notify_cell->row = "row 1"; + row_reader->SetContext(context); + + table->Get(row_reader); + scanner->ValidateCellValue(row_reader); + sleep(1); + EXPECT_FALSE(static_cast(observer)->notified_); + + scanner->Exit(); + delete scanner; + } + + void ValidateAckConfilictTest() { + tera::ErrorCode err; + std::unique_ptr client(tera::Client::NewClient(FLAGS_flagfile, &err)); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; } - return 0; -} + common::ThreadPool thread_pool(5); + ScannerImpl* scanner = new ScannerImpl(); + scanner->key_selector_.reset(new RandomKeySelector()); + std::unique_ptr
table(client->OpenTable("observer_test_table", &err)); + + Observer* observer = new TestWorker(); + bool ret = scanner->Init(); + EXPECT_EQ(true, ret); + if (!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; + } + err = scanner->Observe("observer_test_table", "cf", "qu", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + ScannerImpl::NotificationContext* context = new ScannerImpl::NotificationContext(); + common::Semaphore s(1); + s.Acquire(); + std::shared_ptr notify_cell(new NotifyCell(s)); + context->notify_cell = notify_cell; + Column column = {"observer_test_table", "cf", "qu"}; + notify_cell->row = "row1"; + notify_cell->observed_column = column; + notify_cell->timestamp = 1; + + // no value + RowReader* row_reader = table->NewRowReader("row1"); + row_reader->AddColumn("cf", "qu"); + row_reader->SetContext(context); + + // wrong value, context deleted + table->Put("row1", "cf", "qu", "!#%E^E%&$&$%&$^", &err); + table->Get(row_reader); + scanner->ValidateAckConfict(row_reader); + sleep(1); + EXPECT_FALSE(static_cast(observer)->notified_); + + // wrong ts + table->Put("row2", "cf", "qu", "10", &err); + context = new ScannerImpl::NotificationContext(); + row_reader = table->NewRowReader("row2"); + row_reader->AddColumn("cf", "qu"); + row_reader->SetContext(context); + notify_cell->row = "row2"; + context->notify_cell = notify_cell; + notify_cell->notify_transaction.reset(new TestTxn(table.get(), "row2", &thread_pool, 10)); + + table->Get(row_reader); + scanner->ValidateAckConfict(row_reader); + sleep(1); + EXPECT_FALSE(static_cast(observer)->notified_); + + scanner->Exit(); + delete scanner; + } +}; + +TEST_F(ObserverImplTest, OnNotifyTest) { OnNotifyTest(); } + +TEST_F(ObserverImplTest, SingleRowTransactionTest) { SingleRowTransactionTest(); } + +TEST_F(ObserverImplTest, NoneTransactionTest) { NonTransactionTest(); } + +TEST_F(ObserverImplTest, ObserveTest) { ObserveTest(); } + +TEST_F(ObserverImplTest, ValidateCellValue) { ValidateCellValueTest(); } + +TEST_F(ObserverImplTest, ValidateAckConfilict) { ValidateAckConfilictTest(); } + +} // namespace observer +} // namespace tera + +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + + FLAGS_mock_rowlock_enable = true; + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_gtxn_test_opened = false; + FLAGS_tera_sdk_tso_client_enabled = false; + FLAGS_observer_scanner_thread_num = 1; + int ret = RUN_ALL_TESTS(); + exit(EXIT_FAILURE); + return 0; +} diff --git a/src/observer/test/rowlock_proxy_test.cc b/src/observer/test/rowlock_proxy_test.cc index fe818ec79..129880d7c 100644 --- a/src/observer/test/rowlock_proxy_test.cc +++ b/src/observer/test/rowlock_proxy_test.cc @@ -18,52 +18,50 @@ #include "utils/utils_cmd.h" class TestClosure : public google::protobuf::Closure { -public: - TestClosure() {} - virtual void Run() {} + public: + TestClosure() {} + virtual void Run() {} }; namespace tera { namespace observer { class TestClient : public RowlockStub { -public: - TestClient() : RowlockStub("127.0.0.1:22222") {}; - ~TestClient() {} + public: + TestClient() : RowlockStub("127.0.0.1:22222"){}; + ~TestClient() {} - virtual bool TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL) { - response->set_lock_status(kLockSucc); - return true; - } + virtual bool TryLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } - virtual bool UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL) { - response->set_lock_status(kLockSucc); - return true; - } + virtual bool UnLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } }; -TEST(RowlockProxyTest, ValueTest) { - RowlockProxyImpl rowlock_proxy_impl; +TEST(RowlockProxyTest, ValueTest) { + RowlockProxyImpl rowlock_proxy_impl; - rowlock_proxy_impl.SetServerNumber(100); - EXPECT_EQ(100, rowlock_proxy_impl.server_number_); - EXPECT_EQ(100, rowlock_proxy_impl.GetServerNumber()); - - rowlock_proxy_impl.SetServerNumber(1000); - EXPECT_EQ(1000, rowlock_proxy_impl.server_number_); - EXPECT_EQ(1000, rowlock_proxy_impl.GetServerNumber()); + rowlock_proxy_impl.SetServerNumber(100); + EXPECT_EQ(100, rowlock_proxy_impl.server_number_); + EXPECT_EQ(100, rowlock_proxy_impl.GetServerNumber()); - EXPECT_EQ(std::hash()("tablerow"), - rowlock_proxy_impl.GetRowKey("table", "row")); + rowlock_proxy_impl.SetServerNumber(1000); + EXPECT_EQ(1000, rowlock_proxy_impl.server_number_); + EXPECT_EQ(1000, rowlock_proxy_impl.GetServerNumber()); - EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[0], rowlock_proxy_impl.ScheduleRowKey(0)); - EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[1], rowlock_proxy_impl.ScheduleRowKey(1)); -} + EXPECT_EQ(std::hash()("tablerow"), rowlock_proxy_impl.GetRowKey("table", "row")); -} // namespace observer -} // namespace tera + EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[0], rowlock_proxy_impl.ScheduleRowKey(0)); + EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[1], rowlock_proxy_impl.ScheduleRowKey(1)); +} +} // namespace observer +} // namespace tera diff --git a/src/observer/test/rowlock_test.cc b/src/observer/test/rowlock_test.cc index 611cf195c..9ce0f5800 100644 --- a/src/observer/test/rowlock_test.cc +++ b/src/observer/test/rowlock_test.cc @@ -1,6 +1,6 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. +// found in the LICENSE file. #include #include @@ -19,166 +19,166 @@ namespace tera { namespace observer { class LockTest { -public: - void Lock(tera::observer::ShardedRowlockDB* db, Counter* succeed) { - for (uint32_t i = 0; i < 10; ++i) { - uint64_t key = 1; - - if (db->TryLock(key) == true) { - succeed->Inc(); - } - } + public: + void Lock(tera::observer::ShardedRowlockDB* db, Counter* succeed) { + for (uint32_t i = 0; i < 10; ++i) { + uint64_t key = 1; + + if (db->TryLock(key) == true) { + succeed->Inc(); + } } + } }; TEST(ShardedRowlockDB, LockTest) { - ShardedRowlockDB db; - - // test for lock - EXPECT_EQ(0, db.Size()); - - // different keys - EXPECT_TRUE(db.TryLock(0)); - EXPECT_TRUE(db.TryLock(1)); - EXPECT_TRUE(db.TryLock(2)); - - // same key that has been locked - EXPECT_FALSE(db.TryLock(0)); - EXPECT_FALSE(db.TryLock(1)); - EXPECT_FALSE(db.TryLock(2)); - - // test for unlock - db.UnLock(0); - EXPECT_TRUE(db.TryLock(0)); - - // unlock for other locked keys - EXPECT_FALSE(db.TryLock(1)); - EXPECT_FALSE(db.TryLock(2)); - - // double unlock - db.UnLock(0); - db.UnLock(0); - EXPECT_TRUE(db.TryLock(0)); - - // unlock size - EXPECT_EQ(3, db.Size()); - db.UnLock(0); - EXPECT_EQ(2, db.Size()); - db.UnLock(0); - EXPECT_EQ(2, db.Size()); - db.UnLock(1); - EXPECT_EQ(1, db.Size()); - db.UnLock(2); - EXPECT_EQ(0, db.Size()); - - // test for ClearTimeout - for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { - // all keys will not be unlocked until timeing wheel works - EXPECT_TRUE(db.TryLock(i)); - EXPECT_EQ(i + 1, db.Size()); - db.ClearTimeout(); - } + ShardedRowlockDB db; + + // test for lock + EXPECT_EQ(0, db.Size()); + + // different keys + EXPECT_TRUE(db.TryLock(0)); + EXPECT_TRUE(db.TryLock(1)); + EXPECT_TRUE(db.TryLock(2)); + + // same key that has been locked + EXPECT_FALSE(db.TryLock(0)); + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // test for unlock + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock for other locked keys + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // double unlock + db.UnLock(0); + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock size + EXPECT_EQ(3, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(1); + EXPECT_EQ(1, db.Size()); + db.UnLock(2); + EXPECT_EQ(0, db.Size()); + + // test for ClearTimeout + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i)); + EXPECT_EQ(i + 1, db.Size()); + db.ClearTimeout(); + } - // timing wheel has run a circle, oldest key will be unlocked - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); + // timing wheel has run a circle, oldest key will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); - // unlock the second oldest key + // unlock the second oldest key + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); + + // test for ClearTimeout multi keys + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i * 10 + 1000000)); + EXPECT_TRUE(db.TryLock(i * 10 + 1000001)); + EXPECT_TRUE(db.TryLock(i * 10 + 1000002)); db.ClearTimeout(); - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); - - // test for ClearTimeout multi keys - for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { - // all keys will not be unlocked until timeing wheel works - EXPECT_TRUE(db.TryLock(i * 10 + 1000000)); - EXPECT_TRUE(db.TryLock(i * 10 + 1000001)); - EXPECT_TRUE(db.TryLock(i * 10 + 1000002)); - db.ClearTimeout(); - } + } - // timing wheel has run a circle, oldest 3 keys will be unlocked - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 3, db.Size()); + // timing wheel has run a circle, oldest 3 keys will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 3, db.Size()); - // unlock the oldest 3 keys - db.ClearTimeout(); - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 6, db.Size()); + // unlock the oldest 3 keys + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 6, db.Size()); } TEST(RowlockDB, LockTest) { - RowlockDB db; - - // test for lock - EXPECT_EQ(0, db.Size()); - - // different keys - EXPECT_TRUE(db.TryLock(0)); - EXPECT_TRUE(db.TryLock(1)); - EXPECT_TRUE(db.TryLock(2)); - - // same key that has been locked - EXPECT_FALSE(db.TryLock(0)); - EXPECT_FALSE(db.TryLock(1)); - EXPECT_FALSE(db.TryLock(2)); - - // test for unlock - db.UnLock(0); - EXPECT_TRUE(db.TryLock(0)); - - // unlock for other locked keys - EXPECT_FALSE(db.TryLock(1)); - EXPECT_FALSE(db.TryLock(2)); - - // double unlock - db.UnLock(0); - db.UnLock(0); - EXPECT_TRUE(db.TryLock(0)); - - // unlock size - EXPECT_EQ(3, db.Size()); - db.UnLock(0); - EXPECT_EQ(2, db.Size()); - db.UnLock(0); - EXPECT_EQ(2, db.Size()); - db.UnLock(1); - EXPECT_EQ(1, db.Size()); - db.UnLock(2); - EXPECT_EQ(0, db.Size()); - - // test for ClearTimeout - for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { - // all keys will not be unlocked until timeing wheel works - EXPECT_TRUE(db.TryLock(i)); - EXPECT_EQ(i + 1, db.Size()); - db.ClearTimeout(); - } + RowlockDB db; + + // test for lock + EXPECT_EQ(0, db.Size()); + + // different keys + EXPECT_TRUE(db.TryLock(0)); + EXPECT_TRUE(db.TryLock(1)); + EXPECT_TRUE(db.TryLock(2)); + + // same key that has been locked + EXPECT_FALSE(db.TryLock(0)); + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // test for unlock + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock for other locked keys + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // double unlock + db.UnLock(0); + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock size + EXPECT_EQ(3, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(1); + EXPECT_EQ(1, db.Size()); + db.UnLock(2); + EXPECT_EQ(0, db.Size()); + + // test for ClearTimeout + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i)); + EXPECT_EQ(i + 1, db.Size()); + db.ClearTimeout(); + } - // timing wheel has run a circle, oldest key will be unlocked - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); + // timing wheel has run a circle, oldest key will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); - // unlock the second oldest key - db.ClearTimeout(); - EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); + // unlock the second oldest key + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); } TEST(ShardedRowlockDB, ParaTest) { - Counter counter; - ShardedRowlockDB db; - LockTest test; - - // 10 threads to lock the same key - ThreadPool thread_pool(10); - for (uint32_t i = 0; i < 10; ++i) { - ThreadPool::Task task = std::bind(&LockTest::Lock, &test, &db, &counter); - thread_pool.AddTask(task); - } - sleep(1); - EXPECT_EQ(1, db.Size()); - EXPECT_EQ(1, counter.Get()); - - for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { - db.ClearTimeout(); - } - EXPECT_EQ(0, db.Size()); + Counter counter; + ShardedRowlockDB db; + LockTest test; + + // 10 threads to lock the same key + ThreadPool thread_pool(10); + for (uint32_t i = 0; i < 10; ++i) { + ThreadPool::Task task = std::bind(&LockTest::Lock, &test, &db, &counter); + thread_pool.AddTask(task); + } + sleep(1); + EXPECT_EQ(1, db.Size()); + EXPECT_EQ(1, counter.Get()); + + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + db.ClearTimeout(); + } + EXPECT_EQ(0, db.Size()); } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/observer/test/scanner_test.cc b/src/observer/test/scanner_test.cc index 25e9e5fc3..aa6eaa8fe 100644 --- a/src/observer/test/scanner_test.cc +++ b/src/observer/test/scanner_test.cc @@ -33,397 +33,372 @@ DECLARE_bool(mock_rowlock_enable); namespace tera { namespace observer { - class TestRowReader : public RowReaderImpl { -public: - TestRowReader(TableImpl* table, const std::string& row_key) - : RowReaderImpl(table, row_key), seq_(0) { - if (row_key == "empty" || row_key == "empty_fail") { - // empty case - } else if (row_key == "900" || row_key == "900_fail") { - value_.push_back("900"); - value_.push_back("900"); - value_.push_back("901"); - value_.push_back("920"); - } else if (row_key == "1100") { - value_.push_back("1000"); - value_.push_back("1000"); - value_.push_back("1100"); - value_.push_back("1100"); - } else if (row_key == "1hour") { - value_.push_back("810"); - value_.push_back("820"); - value_.push_back("830"); - value_.push_back("840"); - } else if (row_key == "collision_mix") { - value_.push_back("100"); - value_.push_back("1000"); - value_.push_back("4700"); - value_.push_back("1100"); - } else if (row_key == "error_ts") { - value_.push_back("100:sffaeeew"); - } else if (row_key == "some_error_ts") { - value_.push_back("wrong_string"); - value_.push_back("900"); - value_.push_back("900"); - value_.push_back("900"); - } else if (row_key == "one_version") { - value_.push_back("901"); - } else { - value_.push_back("1010"); - value_.push_back("1012"); - value_.push_back("1013"); - value_.push_back("1014"); - value_.push_back("1015"); - value_.push_back("1016"); - value_.push_back("1017"); - } - } - virtual std::string Value() { - return value_[seq_]; - - } - virtual int64_t Timestamp() { - return 9999999; - } - virtual void AddColumn(const std::string& family, const std::string& qualifier) {} - virtual bool Done() { - return seq_ >= value_.size(); + public: + TestRowReader(TableImpl* table, const std::string& row_key) + : RowReaderImpl(table, row_key), seq_(0) { + if (row_key == "empty" || row_key == "empty_fail") { + // empty case + } else if (row_key == "900" || row_key == "900_fail") { + value_.push_back("900"); + value_.push_back("900"); + value_.push_back("901"); + value_.push_back("920"); + } else if (row_key == "1100") { + value_.push_back("1000"); + value_.push_back("1000"); + value_.push_back("1100"); + value_.push_back("1100"); + } else if (row_key == "1hour") { + value_.push_back("810"); + value_.push_back("820"); + value_.push_back("830"); + value_.push_back("840"); + } else if (row_key == "collision_mix") { + value_.push_back("100"); + value_.push_back("1000"); + value_.push_back("4700"); + value_.push_back("1100"); + } else if (row_key == "error_ts") { + value_.push_back("100:sffaeeew"); + } else if (row_key == "some_error_ts") { + value_.push_back("wrong_string"); + value_.push_back("900"); + value_.push_back("900"); + value_.push_back("900"); + } else if (row_key == "one_version") { + value_.push_back("901"); + } else { + value_.push_back("1010"); + value_.push_back("1012"); + value_.push_back("1013"); + value_.push_back("1014"); + value_.push_back("1015"); + value_.push_back("1016"); + value_.push_back("1017"); } - virtual void Next() { - if (seq_ < value_.size()) { - seq_++; - } + } + virtual std::string Value() { return value_[seq_]; } + virtual int64_t Timestamp() { return 9999999; } + virtual void AddColumn(const std::string& family, const std::string& qualifier) {} + virtual bool Done() { return seq_ >= value_.size(); } + virtual void Next() { + if (seq_ < value_.size()) { + seq_++; } + } -private: - std::vector value_; - uint32_t seq_; - //void* user_context_; + private: + std::vector value_; + uint32_t seq_; + // void* user_context_; }; class TestTransaction : public GlobalTxn { -public: - TestTransaction(int64_t start_ts, common::ThreadPool* thread_pool, bool error = false) - : GlobalTxn(NULL, thread_pool, NULL), - start_timestamp_(1000), error_(error) {} - - virtual ~TestTransaction() {} - virtual ErrorCode Get(RowReader* row_reader) { - ErrorCode err; - return err; - } - virtual int64_t GetStartTimestamp() { - return start_timestamp_; - } - virtual const ErrorCode& GetError() { - if (error_ == true) { - err_.SetFailed(ErrorCode::kSystem, ""); - } - return err_; + public: + TestTransaction(int64_t start_ts, common::ThreadPool* thread_pool, bool error = false) + : GlobalTxn(NULL, thread_pool, NULL), start_timestamp_(1000), error_(error) {} + + virtual ~TestTransaction() {} + virtual ErrorCode Get(RowReader* row_reader) { + ErrorCode err; + return err; + } + virtual int64_t GetStartTimestamp() { return start_timestamp_; } + virtual const ErrorCode& GetError() { + if (error_ == true) { + err_.SetFailed(ErrorCode::kSystem, ""); } -private: - int64_t start_timestamp_; - ErrorCode err_; - bool error_; + return err_; + } + + private: + int64_t start_timestamp_; + ErrorCode err_; + bool error_; }; class TestRowMutationImpl : public RowMutationImpl { -public: - TestRowMutationImpl(TableImpl* table, const std::string& row_key) - : RowMutationImpl(table, row_key) {} - virtual void Put(const std::vector& value, int32_t ttl = -1) {} - virtual void ApplyMutation(RowMutation* row_mu) {} + public: + TestRowMutationImpl(TableImpl* table, const std::string& row_key) + : RowMutationImpl(table, row_key) {} + virtual void Put(const std::vector& value, int32_t ttl = -1) {} + virtual void ApplyMutation(RowMutation* row_mu) {} }; class TestTable : public TableImpl { -public: - TestTable(const std::string& table_name, - ThreadPool* thread_pool) - : TableImpl(table_name, thread_pool, std::shared_ptr()), - global_txn_(true), - thread_pool_(thread_pool) {} - virtual RowReader* NewRowReader(const std::string& row_key) { - return new TestRowReader(this, row_key); + public: + TestTable(const std::string& table_name, ThreadPool* thread_pool) + : TableImpl(table_name, thread_pool, std::shared_ptr()), + global_txn_(true), + thread_pool_(thread_pool) {} + virtual RowReader* NewRowReader(const std::string& row_key) { + return new TestRowReader(this, row_key); + } + virtual Transaction* StartRowTransaction(const std::string& row_key) { + if (row_key == "empty_fail" || row_key == "900_fail") { + return new TestTransaction(1, thread_pool_, true); } - virtual Transaction* StartRowTransaction(const std::string& row_key) { - if (row_key == "empty_fail" || row_key == "900_fail") { - return new TestTransaction(1, thread_pool_, true); - } - return new TestTransaction(1, thread_pool_); + return new TestTransaction(1, thread_pool_); + } + virtual RowMutation* NewRowMutation(const std::string& row_key) { + return new TestRowMutationImpl(this, row_key); + } + virtual void CommitRowTransaction(Transaction* transaction) {} + virtual bool GetDescriptor(TableDescriptor* schema, ErrorCode* err) { + schema->AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema->AddColumnFamily("cf1"); + cfd1->EnableNotify(); + ExtendNotifyLgToDescriptor(schema); + if (!global_txn_) { + cfd1->DisableGlobalTransaction(); } - virtual RowMutation* NewRowMutation(const std::string& row_key) { - return new TestRowMutationImpl(this, row_key); - } - virtual void CommitRowTransaction(Transaction* transaction) {} - virtual bool GetDescriptor(TableDescriptor* schema, ErrorCode* err) { - schema->AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema->AddColumnFamily("cf1"); - cfd1->EnableNotify(); - ExtendNotifyLgToDescriptor(schema); - if (!global_txn_) { - cfd1->DisableGlobalTransaction(); - } - return true; - } - virtual void Get(RowReader* row_reader) {} -private: - bool global_txn_; - common::ThreadPool* thread_pool_; + return true; + } + virtual void Get(RowReader* row_reader) {} + + private: + bool global_txn_; + common::ThreadPool* thread_pool_; }; -class TestResultStream : public tera::ResultStream{ -public: - virtual bool Done(ErrorCode* err) { - if (next_number_ < row_name_.size()) { - return false; - } else { - return true; - } - } - virtual void Next() { - next_number_++; +class TestResultStream : public tera::ResultStream { + public: + virtual bool Done(ErrorCode* err) { + if (next_number_ < row_name_.size()) { + return false; + } else { + return true; } + } + virtual void Next() { next_number_++; } - virtual std::string RowName() const { - return row_name_[next_number_]; - } - virtual std::string Qualifier() const { - return qualifier_[next_number_]; - } + virtual std::string GetLastKey() const { return ""; } + virtual uint64_t GetDataSize() const { return 0; } - virtual std::string Family() const { - return ""; - } + virtual uint64_t GetRowCount() const { return 0; } - virtual int64_t Timestamp() const { - return 0; - } - virtual std::string Value() const { - return ""; - } + virtual void Cancel() { return; } - virtual int64_t ValueInt64() const { - return 0; - } + virtual std::string RowName() const { return row_name_[next_number_]; } + virtual std::string Qualifier() const { return qualifier_[next_number_]; } - virtual bool LookUp(const std::string& row_key) { - return true; - } + virtual std::string Family() const { return ""; } - virtual std::string ColumnName() const { - return ""; - } -private: - uint32_t next_number_; - std::vector row_name_; - std::vector qualifier_; - bool done_; + virtual int64_t Timestamp() const { return 0; } + virtual std::string Value() const { return ""; } + + virtual int64_t ValueInt64() const { return 0; } + + virtual bool LookUp(const std::string& row_key) { return true; } + + virtual std::string ColumnName() const { return ""; } + + private: + uint32_t next_number_; + std::vector row_name_; + std::vector qualifier_; + bool done_; }; class TestObserver : public tera::observer::Observer { -public: - TestObserver() : count_(0) {} - virtual ~TestObserver() {} - virtual void OnNotify(tera::Transaction* t, - tera::Client* client, - const std::string& table_name, - const std::string& family, - const std::string& qualifier, - const std::string& row, - const std::string& value, - int64_t timestamp, - Notification* notification) { - LOG(INFO) << "[Notify TestObserver] table:family:qualifer=" << - table_name << ":" << family << ":" << - qualifier << " row=" << row << - " value=" << value << " timestamps[0]=" << value; - - count_++; - // do nothing - } - virtual std::string GetObserverName() const { - return "TestObserver"; - } - - virtual TransactionType GetTransactionType() const { - return kGlobalTransaction; - } -private: - std::atomic count_; + public: + TestObserver() : count_(0) {} + virtual ~TestObserver() {} + virtual void OnNotify(tera::Transaction* t, tera::Client* client, const std::string& table_name, + const std::string& family, const std::string& qualifier, + const std::string& row, const std::string& value, int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestObserver] table:family:qualifer=" << table_name << ":" << family + << ":" << qualifier << " row=" << row << " value=" << value + << " timestamps[0]=" << value; + + count_++; + // do nothing + } + virtual std::string GetObserverName() const { return "TestObserver"; } + + virtual TransactionType GetTransactionType() const { return kGlobalTransaction; } + + private: + std::atomic count_; }; class TestClient : public ClientImpl { -public: - TestClient() : ClientImpl(ClientOptions(), NULL, NULL), - thread_pool_(5) {} - ~TestClient() {} - virtual Table* OpenTable(const std::string& table_name, ErrorCode* err) { - return static_cast(new TestTable(table_name, &thread_pool_)); - } - virtual Transaction* NewGlobalTransaction() { - return new TestTransaction(1, &thread_pool_); - } -private: - common::ThreadPool thread_pool_; + public: + TestClient() : ClientImpl(ClientOptions(), NULL, NULL), thread_pool_(5) {} + ~TestClient() {} + virtual Table* OpenTable(const std::string& table_name, ErrorCode* err) { + return static_cast(new TestTable(table_name, &thread_pool_)); + } + virtual Transaction* NewGlobalTransaction() { return new TestTransaction(1, &thread_pool_); } + + private: + common::ThreadPool thread_pool_; }; class TestKeySelector : public RandomKeySelector { -public: - TestKeySelector() {} - virtual ErrorCode Observe(const std::string& table_name) { - tera::ErrorCode err; - return err; - } + public: + TestKeySelector() {} + virtual ErrorCode Observe(const std::string& table_name) { + tera::ErrorCode err; + return err; + } }; TEST(ScannerImpl, ParseNotifyQualifier) { - FLAGS_tera_sdk_client_for_gtxn = true; - FLAGS_tera_coord_type = "mock_zk"; - ScannerImpl scanner; + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "mock_zk"; + ScannerImpl scanner; - std::string data_family; - std::string data_qualfier; + std::string data_family; + std::string data_qualfier; - EXPECT_TRUE(scanner.ParseNotifyQualifier("C:url", &data_family, &data_qualfier)); - EXPECT_EQ(data_family, "C"); - EXPECT_EQ(data_qualfier, "url"); + EXPECT_TRUE(scanner.ParseNotifyQualifier("C:url", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "C"); + EXPECT_EQ(data_qualfier, "url"); - EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:page", &data_family, &data_qualfier)); - EXPECT_EQ(data_family, "cf"); - EXPECT_EQ(data_qualfier, "page"); + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:page", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, "page"); - EXPECT_TRUE(scanner.ParseNotifyQualifier("cf::::::", &data_family, &data_qualfier)); - EXPECT_EQ(data_family, "cf"); - EXPECT_EQ(data_qualfier, ":::::"); + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf::::::", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, ":::::"); - EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:___", &data_family, &data_qualfier)); - EXPECT_EQ(data_family, "cf"); - EXPECT_EQ(data_qualfier, "___"); - - EXPECT_FALSE(scanner.ParseNotifyQualifier("Curl", &data_family, &data_qualfier)); - EXPECT_FALSE(scanner.ParseNotifyQualifier("C_url", &data_family, &data_qualfier)); - EXPECT_FALSE(scanner.ParseNotifyQualifier("C.urlN_", &data_family, &data_qualfier)); - EXPECT_FALSE(scanner.ParseNotifyQualifier("++page", &data_family, &data_qualfier)); + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:___", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, "___"); + EXPECT_FALSE(scanner.ParseNotifyQualifier("Curl", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("C_url", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("C.urlN_", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("++page", &data_family, &data_qualfier)); } TEST(ScannerImpl, NextRow) { - FLAGS_tera_sdk_client_for_gtxn = true; - FLAGS_tera_coord_type = "mock_zk"; - std::unique_ptr result_stream(new TestResultStream()); - ScannerImpl scanner; - bool finished = false; - std::string vec_rowkey; - std::vector vec_col; - - // stream done - EXPECT_FALSE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); - EXPECT_EQ(true, finished); - - finished = false; - static_cast(result_stream.get())->row_name_.push_back("row1"); - static_cast(result_stream.get())->qualifier_.push_back("cf:page1"); - static_cast(result_stream.get())->row_name_.push_back("row1"); - static_cast(result_stream.get())->qualifier_.push_back("cf:page2"); - static_cast(result_stream.get())->row_name_.push_back("row2"); - static_cast(result_stream.get())->qualifier_.push_back("cf:page3"); - static_cast(result_stream.get())->row_name_.push_back("row2"); - static_cast(result_stream.get())->qualifier_.push_back("cf:page4"); - - // row 1 - EXPECT_TRUE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); - EXPECT_FALSE(finished); - - // row 1 data - EXPECT_EQ(vec_col.size(), 2); - EXPECT_EQ(vec_rowkey, "row1"); - EXPECT_EQ(vec_col[0].qualifier, "page1"); - EXPECT_EQ(vec_col[1].qualifier, "page2"); - - // row 2 - EXPECT_TRUE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); - EXPECT_FALSE(finished); - - // row 2 data - EXPECT_EQ(vec_col.size(), 2); - EXPECT_EQ(vec_rowkey, "row2"); - EXPECT_EQ(vec_col[0].qualifier, "page3"); - EXPECT_EQ(vec_col[1].qualifier, "page4"); - - // scan finish - EXPECT_FALSE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); - EXPECT_TRUE(finished); + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "mock_zk"; + std::unique_ptr result_stream(new TestResultStream()); + ScannerImpl scanner; + bool finished = false; + std::string vec_rowkey; + std::vector vec_col; + + // stream done + EXPECT_FALSE( + scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_EQ(true, finished); + + finished = false; + static_cast(result_stream.get())->row_name_.push_back("row1"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page1"); + static_cast(result_stream.get())->row_name_.push_back("row1"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page2"); + static_cast(result_stream.get())->row_name_.push_back("row2"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page3"); + static_cast(result_stream.get())->row_name_.push_back("row2"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page4"); + + // row 1 + EXPECT_TRUE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_FALSE(finished); + + // row 1 data + EXPECT_EQ(vec_col.size(), 2); + EXPECT_EQ(vec_rowkey, "row1"); + EXPECT_EQ(vec_col[0].qualifier, "page1"); + EXPECT_EQ(vec_col[1].qualifier, "page2"); + + // row 2 + EXPECT_TRUE(scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_FALSE(finished); + + // row 2 data + EXPECT_EQ(vec_col.size(), 2); + EXPECT_EQ(vec_rowkey, "row2"); + EXPECT_EQ(vec_col[0].qualifier, "page3"); + EXPECT_EQ(vec_col[1].qualifier, "page4"); + + // scan finish + EXPECT_FALSE( + scanner.NextRow(result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_TRUE(finished); } TEST(ScannerImpl, CheckTransactionTypeLegalForTable) { - ScannerImpl scanner; - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kGlobalTransaction), true); - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kSingleRowTransaction), true); - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kNoneTransaction), true); - - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kGlobalTransaction), false); - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kSingleRowTransaction), true); - - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kNoneTransaction), false); - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kGlobalTransaction), false); - - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kNoneTransaction), false); - EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kSingleRowTransaction), false); + ScannerImpl scanner; + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kGlobalTransaction), + true); + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kSingleRowTransaction), + true); + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kNoneTransaction), true); + + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kGlobalTransaction), false); + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kNoneTransaction, kSingleRowTransaction), + true); + + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kNoneTransaction), + false); + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kSingleRowTransaction, kGlobalTransaction), + false); + + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kNoneTransaction), false); + EXPECT_EQ(scanner.CheckTransactionTypeLegalForTable(kGlobalTransaction, kSingleRowTransaction), + false); } TEST(ScannerImpl, PrepareNotifyCell) { - FLAGS_tera_coord_type = "mock_zk"; - FLAGS_mock_rowlock_enable = true; - ScannerImpl scanner; - scanner.tera_client_.reset(new TestClient()); - ErrorCode err; - - std::vector notify_columns; - std::vector> notify_cells; - std::set observe_columns; - std::shared_ptr unlocker(new AutoRowUnlocker("test_table", "row")); - Observer* observer = new TestObserver(); - - Column column = {"test_table", "row", "qualifier"}; - observe_columns.insert(column); - tera::Table* table = scanner.tera_client_->OpenTable("test_table", &err); - (*(scanner.table_observe_info_))["test_table"].table = table; - (*(scanner.table_observe_info_))["test_table"].type = kGlobalTransaction; - (*(scanner.table_observe_info_))["test_table"].observe_columns[column].insert(observer); - - scanner.PrepareNotifyCell(table, "row", observe_columns, notify_columns, unlocker, ¬ify_cells); - EXPECT_EQ(notify_cells.size(), 0); - - notify_columns.push_back(column); - scanner.PrepareNotifyCell(table, "row", observe_columns, notify_columns, unlocker, ¬ify_cells); - EXPECT_EQ(notify_cells.size(), 1); - EXPECT_EQ(notify_cells[0]->table, table); - EXPECT_EQ(notify_cells[0]->row, "row"); - EXPECT_EQ(notify_cells[0]->observed_column, column); - EXPECT_EQ(notify_cells[0]->observer, observer); + FLAGS_tera_coord_type = "mock_zk"; + FLAGS_mock_rowlock_enable = true; + ScannerImpl scanner; + scanner.tera_client_.reset(new TestClient()); + ErrorCode err; + + std::vector notify_columns; + std::vector> notify_cells; + std::set observe_columns; + std::shared_ptr unlocker(new AutoRowUnlocker("test_table", "row")); + Observer* observer = new TestObserver(); + + Column column = {"test_table", "row", "qualifier"}; + observe_columns.insert(column); + tera::Table* table = scanner.tera_client_->OpenTable("test_table", &err); + (*(scanner.table_observe_info_))["test_table"].table = table; + (*(scanner.table_observe_info_))["test_table"].type = kGlobalTransaction; + (*(scanner.table_observe_info_))["test_table"].observe_columns[column].insert(observer); + + scanner.PrepareNotifyCell(table, "row", observe_columns, notify_columns, unlocker, ¬ify_cells); + EXPECT_EQ(notify_cells.size(), 0); + + notify_columns.push_back(column); + scanner.PrepareNotifyCell(table, "row", observe_columns, notify_columns, unlocker, ¬ify_cells); + EXPECT_EQ(notify_cells.size(), 1); + EXPECT_EQ(notify_cells[0]->table, table); + EXPECT_EQ(notify_cells[0]->row, "row"); + EXPECT_EQ(notify_cells[0]->observed_column, column); + EXPECT_EQ(notify_cells[0]->observer, observer); } TEST(ScannerImpl, GetAckQualifierPrefix) { - ScannerImpl scanner; + ScannerImpl scanner; - EXPECT_EQ(scanner.GetAckQualifierPrefix("family", "qualifier"), "family:qualifier"); - EXPECT_EQ(scanner.GetAckQualifierPrefix("a", "b"), "a:b"); - EXPECT_EQ(scanner.GetAckQualifierPrefix("a:", "b"), "a::b"); - EXPECT_EQ(scanner.GetAckQualifierPrefix("a", ":b"), "a::b"); + EXPECT_EQ(scanner.GetAckQualifierPrefix("family", "qualifier"), "family:qualifier"); + EXPECT_EQ(scanner.GetAckQualifierPrefix("a", "b"), "a:b"); + EXPECT_EQ(scanner.GetAckQualifierPrefix("a:", "b"), "a::b"); + EXPECT_EQ(scanner.GetAckQualifierPrefix("a", ":b"), "a::b"); } TEST(ScannerImpl, GetAckQualifier) { - ScannerImpl scanner; + ScannerImpl scanner; - EXPECT_EQ(scanner.GetAckQualifier("prefix", "observer_name"), "prefix+ack_observer_name"); - EXPECT_EQ(scanner.GetAckQualifier("a", "b"), "a+ack_b"); - EXPECT_EQ(scanner.GetAckQualifier("a+", "b"), "a++ack_b"); - EXPECT_EQ(scanner.GetAckQualifier("a", "_b"), "a+ack__b"); - EXPECT_EQ(scanner.GetAckQualifier("a+", "_b"), "a++ack__b"); + EXPECT_EQ(scanner.GetAckQualifier("prefix", "observer_name"), "prefix+ack_observer_name"); + EXPECT_EQ(scanner.GetAckQualifier("a", "b"), "a+ack_b"); + EXPECT_EQ(scanner.GetAckQualifier("a+", "b"), "a++ack_b"); + EXPECT_EQ(scanner.GetAckQualifier("a", "_b"), "a+ack__b"); + EXPECT_EQ(scanner.GetAckQualifier("a+", "_b"), "a++ack__b"); } -} // namespace observer -} // namespace tera - +} // namespace observer +} // namespace tera diff --git a/src/observer/test/tablet_bucket_key_selector_test.cc b/src/observer/test/tablet_bucket_key_selector_test.cc new file mode 100644 index 000000000..3575310c5 --- /dev/null +++ b/src/observer/test/tablet_bucket_key_selector_test.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "observer/executor/tablet_bucket_key_selector.h" + +namespace tera { + +class TabletBucketKeySelectorTest : public ::testing::Test { + public: + TabletBucketKeySelectorTest() : selector_(new observer::TabletBucketKeySelector(0, 0)) {} + ~TabletBucketKeySelectorTest() { delete selector_; } + + void ResetKeySelector(int32_t bucket_id, int32_t bucket_cnt) { + selector_->bucket_id_ = bucket_id; + selector_->bucket_cnt_ = bucket_cnt; + } + void AddTabletInfo(const std::string& table_name, const std::string& start_key, + const std::string& end_key) { + TabletInfo tablet; + tablet.start_key = start_key; + tablet.end_key = end_key; + std::map>* tables = selector_->tables_.get(); + (*tables)[table_name].push_back(tablet); + } + + void AddObseverTable(const std::string& table_name) { + selector_->observe_tables_.push_back(table_name); + } + + observer::TabletBucketKeySelector& GetSelector() { return *selector_; } + + private: + observer::TabletBucketKeySelector* selector_; +}; + +TEST_F(TabletBucketKeySelectorTest, SelectRange0) { + ResetKeySelector(0, 0); + AddObseverTable("t1"); + AddTabletInfo("t2", "a", "z"); + AddTabletInfo("t3", "a", "z"); + std::string table_name, start_key, end_key; + EXPECT_FALSE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); +} + +TEST_F(TabletBucketKeySelectorTest, SelectRange1) { + ResetKeySelector(1, 1); + AddObseverTable("t1"); + AddTabletInfo("t1", "", "b"); + AddTabletInfo("t1", "b", ""); + EXPECT_TRUE(2 == (*(GetSelector().tables_.get()))["t1"].size()); + std::string table_name, start_key, end_key; + EXPECT_FALSE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); + EXPECT_TRUE(table_name == "t1"); + EXPECT_TRUE(start_key == ""); + EXPECT_TRUE(end_key == ""); +} + +TEST_F(TabletBucketKeySelectorTest, SelectRange2) { + ResetKeySelector(0, 1); + AddObseverTable("t1"); + AddTabletInfo("t1", "", "b"); + AddTabletInfo("t1", "b", ""); + EXPECT_TRUE(2 == (*(GetSelector().tables_.get()))["t1"].size()); + std::string table_name, start_key, end_key; + EXPECT_TRUE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); + EXPECT_TRUE(table_name == "t1"); + EXPECT_TRUE(start_key == ""); + EXPECT_TRUE(end_key == ""); +} + +TEST_F(TabletBucketKeySelectorTest, SelectRange3) { + ResetKeySelector(0, 1); + AddObseverTable("t1"); + AddTabletInfo("t1", "", "b"); + AddTabletInfo("t1", "b", "c"); + AddTabletInfo("t1", "c", "d"); + AddTabletInfo("t1", "d", "e"); + AddTabletInfo("t1", "e", "f"); + AddTabletInfo("t1", "f", "g"); + AddTabletInfo("t1", "g", ""); + EXPECT_TRUE(7 == (*(GetSelector().tables_.get()))["t1"].size()); + std::string table_name, start_key, end_key; + EXPECT_TRUE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); + EXPECT_TRUE(table_name == "t1"); + EXPECT_TRUE(start_key == ""); + EXPECT_TRUE(end_key == ""); +} + +TEST_F(TabletBucketKeySelectorTest, SelectRange4) { + ResetKeySelector(3, 4); + AddObseverTable("t1"); + AddTabletInfo("t1", "", "b"); + AddTabletInfo("t1", "b", "c"); + AddTabletInfo("t1", "c", "d"); + AddTabletInfo("t1", "d", "e"); + AddTabletInfo("t1", "e", "f"); + AddTabletInfo("t1", "f", "g"); + AddTabletInfo("t1", "g", ""); + EXPECT_TRUE(7 == (*(GetSelector().tables_.get()))["t1"].size()); + std::string table_name, start_key, end_key; + EXPECT_TRUE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); + EXPECT_TRUE(table_name == "t1"); + EXPECT_TRUE(start_key == "g"); + EXPECT_TRUE(end_key == ""); +} + +TEST_F(TabletBucketKeySelectorTest, SelectRange5) { + ResetKeySelector(3, 100); + AddObseverTable("t1"); + AddTabletInfo("t1", "", "b"); + AddTabletInfo("t1", "b", "c"); + AddTabletInfo("t1", "c", "d"); + AddTabletInfo("t1", "d", "e"); + AddTabletInfo("t1", "e", "f"); + AddTabletInfo("t1", "f", "g"); + AddTabletInfo("t1", "g", ""); + EXPECT_TRUE(7 == (*(GetSelector().tables_.get()))["t1"].size()); + std::string table_name, start_key, end_key; + EXPECT_TRUE(GetSelector().SelectRange(&table_name, &start_key, &end_key)); + EXPECT_TRUE(table_name == "t1"); + EXPECT_TRUE(start_key == "d"); + EXPECT_TRUE(end_key == "e"); +} + +} // tera + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/proto/access_control.proto b/src/proto/access_control.proto new file mode 100644 index 000000000..b39fd1cb8 --- /dev/null +++ b/src/proto/access_control.proto @@ -0,0 +1,99 @@ +import "sofa/pbrpc/rpc_option.proto"; +import "status_code.proto"; + +package tera; + +option cc_generic_services = true; + +enum AuthPolicyType { + kNoneAuthPolicy = 1; + kUgiAuthPolicy = 2; + kGianoAuthPolicy = 3; +} + +message TableAuthPolicyInfo { + required string table_name = 1; + required AuthPolicyType auth_policy_type = 2; +} + +// User----Group----Role----Permission,一个用户属于若干产品线,且可以拥有很多Role(Role是权限的集合) +message IdentityInfo { + required AuthPolicyType auth_policy_type = 1; // policy : Giano 或者 Ugi 或者 null + required string name = 2; // name是group_name或者user_name + required bytes token = 3; // token是cred或者passwd + required string ip_addr = 4; +} + +enum UpdateAuthType { + kUpdateUgi = 1; + kDelUgi = 2; + kAddRole = 3; + kDelRole = 4; + kGrantRole = 5; + kRevokeRole = 6; +} + +message UgiInfo { + required string user_name = 1; + required string passwd = 2; +} + +message UpdateAuthInfo { + required UpdateAuthType update_type = 1; + optional UgiInfo ugi_info = 2; + optional RoleInfo role_info = 3; + optional AuthorityInfo authority_info = 4; +} + +message RoleInfo { + required string role = 1; + repeated Permission permission = 2; +} + +message AuthorityInfo { + required string user_name = 1; + required string role = 2; +} + +// Only for master meta +// Key:user_name, Value:passwd,[role1, role2, ...] +message UgiMetaInfo { + required string passwd = 1; + repeated string roles = 2; + required string user_name = 3; +} + +message Permission { + enum Action { + kRead = 1; + kWrite = 2; + kAdmin = 3; + } + enum Type { + kGlobal = 1; + kNamespace = 2; + kTable = 3; + } + required Type type = 1; + optional GlobalPermission global_permission = 2; + optional NamespacePermission namespace_permission = 3; + optional TablePermission table_permission = 4; +} + +message TablePermission { + required string namespace_name = 1; + required string table_name = 2; + // 可扩展作用到cell级别 + optional string family = 3; + optional bytes qualifier = 4; + required Permission.Action action = 5; +} + +message NamespacePermission { + required string namespace_name = 1; + required Permission.Action action = 2; +} + +message GlobalPermission { + required Permission.Action action = 1; +} diff --git a/src/proto/filter.proto b/src/proto/filter.proto new file mode 100755 index 000000000..879a2899a --- /dev/null +++ b/src/proto/filter.proto @@ -0,0 +1,82 @@ +package tera.filter; + +enum FilterValueType { + kINT64 = 50; + kUINT64 = 51; + kINT32 = 52; + kUINT32 = 53; + kINT16 = 54; + kUINT16 = 55; + kINT8 = 56; + kUINT8 = 57; + kUnknownValueType = 69; +} + +enum CompareType { + kLess = 31; + kLessOrEqual = 32; + kEqual = 33; + kNotEqual = 34; + kGreaterOrEqual = 35; + kGreater = 36; + kNoOp = 37; + +} + +message ComparatorDesc { + required ComparatorType type = 1; + optional bytes serialized_comparator = 2; + + enum ComparatorType { + kIntegerComparator = 1; + kDecimalComparator = 2; + kBinaryComparator = 3; + kUnknownComparator = 10; + } +} + +message IntegerComparatorDesc { + required FilterValueType value_type = 1; + required uint64 integer_value = 2; +} + +message DecimalComparatorDesc { + required double decimal_value = 1; +} + +message BinaryComparatorDesc { + required bytes value = 1; +} + +message ValueFilterDesc { + optional string column_family = 1; + optional bytes column_qualifier = 2; + // eg. compare_op=GREATER_OR_EQUAL means output (not filtered) if value >= ref_value, or not output (filtered) + required CompareType compare_op = 3; + required ComparatorDesc comparator = 4; + // if family or qualifier has not been found, output (means not filtered) if false, + // do not output (means filtered) if true + optional bool filter_if_missing = 5; +} + +message FilterDesc { + required FilterType type = 1; + optional bytes serialized_filter = 2; + + enum FilterType { + kFilterList = 1; + kValueFilter = 2; + kUnknownType = 20; + } +} + +message FilterListDesc { + required Operator op = 1; + repeated FilterDesc filters = 2; + + enum Operator { + kAnd = 1; + kOr = 2; + kInvalidOp = 3; + } +} diff --git a/src/proto/kv_helper.cc b/src/proto/kv_helper.cc index 814dc7dbb..911fc5008 100644 --- a/src/proto/kv_helper.cc +++ b/src/proto/kv_helper.cc @@ -10,137 +10,130 @@ namespace tera { // TABLE record -void MakeMetaTableKeyValue(const TableMeta& meta, std::string* key, - std::string* value) { - const std::string& table_name = meta.table_name(); - MakeMetaTableKey(table_name, key); - MakeMetaTableValue(meta, value); +void MakeMetaTableKeyValue(const TableMeta& meta, std::string* key, std::string* value) { + const std::string& table_name = meta.table_name(); + MakeMetaTableKey(table_name, key); + MakeMetaTableValue(meta, value); } void MakeMetaTableKey(const std::string& table_name, std::string* key) { - if (NULL != key) { - *key = "@" + table_name; - } + if (NULL != key) { + *key = "@" + table_name; + } } void MakeMetaTableValue(const TableMeta& meta, std::string* value) { - if (NULL != value) { - meta.SerializeToString(value); - } + if (NULL != value) { + meta.SerializeToString(value); + } } -void ParseMetaTableKeyValue(const std::string& key, const std::string& value, - TableMeta* meta) { - ParseMetaTableValue(value, meta); +void ParseMetaTableKeyValue(const std::string& key, const std::string& value, TableMeta* meta) { + ParseMetaTableValue(value, meta); } void ParseMetaTableKey(const std::string& key, std::string* table_name) { - if (key.size() < 2 || key[0] != '@') { - return; - } - size_t pos = key.find('@', 1); - if (pos != std::string::npos) { - return; - } - if (NULL != table_name) { - table_name->assign(key, 1, std::string::npos); - } + if (key.size() < 2 || key[0] != '@') { + return; + } + size_t pos = key.find('@', 1); + if (pos != std::string::npos) { + return; + } + if (NULL != table_name) { + table_name->assign(key, 1, std::string::npos); + } } void ParseMetaTableValue(const std::string& value, TableMeta* meta) { - if (NULL != meta) { - meta->ParseFromString(value); - } + if (NULL != meta) { + meta->ParseFromString(value); + } } // TABLET record -void MakeMetaTableKeyValue(const TabletMeta& meta, std::string* key, - std::string* value) { - const std::string& table_name = meta.table_name(); - const std::string& key_start = meta.key_range().key_start(); - MakeMetaTableKey(table_name, key_start, key); - MakeMetaTableValue(meta, value); +void MakeMetaTableKeyValue(const TabletMeta& meta, std::string* key, std::string* value) { + const std::string& table_name = meta.table_name(); + const std::string& key_start = meta.key_range().key_start(); + MakeMetaTableKey(table_name, key_start, key); + MakeMetaTableValue(meta, value); } -void MakeMetaTableKey(const std::string& table_name, - const std::string& key_start, +void MakeMetaTableKey(const std::string& table_name, const std::string& key_start, std::string* key) { - if (NULL != key) { - *key = table_name + "#" + key_start; - } + if (NULL != key) { + *key = table_name + "#" + key_start; + } } void MakeMetaTableValue(const TabletMeta& meta, std::string* value) { - if (NULL != value) { - meta.SerializeToString(value); - } + if (NULL != value) { + meta.SerializeToString(value); + } } -void ParseMetaTableKeyValue(const std::string& key, const std::string& value, - TabletMeta* meta) { - ParseMetaTableValue(value, meta); +void ParseMetaTableKeyValue(const std::string& key, const std::string& value, TabletMeta* meta) { + ParseMetaTableValue(value, meta); } -void ParseMetaTableKey(const std::string& key, std::string* table_name, - std::string* key_start) { - size_t pos = key.find('#'); - if (NULL != table_name) { - table_name->assign(key, 0, pos); - } - if (NULL != key_start) { - if (pos != std::string::npos && pos + 1 < key.size()) { - key_start->assign(key, pos + 1, std::string::npos); - } else { - key_start->clear(); - } +void ParseMetaTableKey(const std::string& key, std::string* table_name, std::string* key_start) { + size_t pos = key.find('#'); + if (NULL != table_name) { + table_name->assign(key, 0, pos); + } + if (NULL != key_start) { + if (pos != std::string::npos && pos + 1 < key.size()) { + key_start->assign(key, pos + 1, std::string::npos); + } else { + key_start->clear(); } + } } void ParseMetaTableValue(const std::string& value, TabletMeta* meta) { - if (NULL != meta) { - meta->ParseFromString(value); - } + if (NULL != meta) { + meta->ParseFromString(value); + } } -void MetaTableScanRange(const std::string& table_name, - std::string* key_start, std::string* key_end) { - if (NULL != key_start) { - *key_start = table_name + "#"; - } - if (NULL != key_end) { - *key_end = table_name + "$"; - } +void MetaTableScanRange(const std::string& table_name, std::string* key_start, + std::string* key_end) { + if (NULL != key_start) { + *key_start = table_name + "#"; + } + if (NULL != key_end) { + *key_end = table_name + "$"; + } } std::string NextKey(const std::string& key) { - //return key + "\0"; - std::string next = key; - next.push_back('\0'); - return next; -} - -void MetaTableScanRange(const std::string& table_name, - const std::string& tablet_key_start, - const std::string& tablet_key_end, - std::string* key_start, std::string* key_end) { - if (NULL != key_start) { - *key_start = table_name + "#" + tablet_key_start; - } - if (NULL != key_end) { - if (tablet_key_end.empty()) { - *key_end = table_name + "$"; - } else { - *key_end = table_name + "#" + tablet_key_end; - } + // return key + "\0"; + std::string next = key; + next.push_back('\0'); + return next; +} + +void MetaTableScanRange(const std::string& table_name, const std::string& tablet_key_start, + const std::string& tablet_key_end, std::string* key_start, + std::string* key_end) { + if (NULL != key_start) { + *key_start = table_name + "#" + tablet_key_start; + } + if (NULL != key_end) { + if (tablet_key_end.empty()) { + *key_end = table_name + "$"; + } else { + *key_end = table_name + "#" + tablet_key_end; } + } } void MetaTableListScanRange(std::string* key_start, std::string* key_end) { - key_start->assign("@"); - key_end->assign("@~"); + key_start->assign("@"); + key_end->assign("@~"); } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/proto/kv_helper.h b/src/proto/kv_helper.h index d9b029a03..e5be976ca 100644 --- a/src/proto/kv_helper.h +++ b/src/proto/kv_helper.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_PROTO_KV_HELPER_H_ -#define TERA_PROTO_KV_HELPER_H_ +#ifndef TERA_PROTO_KV_HELPER_H_ +#define TERA_PROTO_KV_HELPER_H_ #include @@ -12,38 +12,32 @@ namespace tera { // table meta record -void MakeMetaTableKeyValue(const TableMeta& meta, std::string* key, - std::string* value); +void MakeMetaTableKeyValue(const TableMeta& meta, std::string* key, std::string* value); void MakeMetaTableKey(const std::string& table_name, std::string* key); void MakeMetaTableValue(const TableMeta& meta, std::string* value); -void ParseMetaTableKeyValue(const std::string& key, const std::string& value, - TableMeta* meta); +void ParseMetaTableKeyValue(const std::string& key, const std::string& value, TableMeta* meta); void ParseMetaTableKey(const std::string& key, std::string* table_name); void ParseMetaTableValue(const std::string& value, TableMeta* meta); // tablet meta record -void MakeMetaTableKeyValue(const TabletMeta& meta, std::string* key, - std::string* value); -void MakeMetaTableKey(const std::string& table_name, - const std::string& key_start, std::string* key); +void MakeMetaTableKeyValue(const TabletMeta& meta, std::string* key, std::string* value); +void MakeMetaTableKey(const std::string& table_name, const std::string& key_start, + std::string* key); void MakeMetaTableValue(const TabletMeta& meta, std::string* value); -void ParseMetaTableKeyValue(const std::string& key, const std::string& value, - TabletMeta* meta); -void ParseMetaTableKey(const std::string& key, std::string* table_name, - std::string* key_start); +void ParseMetaTableKeyValue(const std::string& key, const std::string& value, TabletMeta* meta); +void ParseMetaTableKey(const std::string& key, std::string* table_name, std::string* key_start); void ParseMetaTableValue(const std::string& value, TabletMeta* meta); void MetaTableScanRange(const std::string& table_name, std::string* key_start, std::string* key_end); -void MetaTableScanRange(const std::string& table_name, - const std::string& tablet_key_start, - const std::string& tablet_key_end, - std::string* key_start, std::string* key_end); +void MetaTableScanRange(const std::string& table_name, const std::string& tablet_key_start, + const std::string& tablet_key_end, std::string* key_start, + std::string* key_end); void MetaTableListScanRange(std::string* key_start, std::string* key_end); std::string NextKey(const std::string& key); -} // namespace tera +} // namespace tera #endif // TERA_PROTO_KV_HELPER_H_ diff --git a/src/proto/lb_client.cc b/src/proto/lb_client.cc index 0b70af707..77aece5c4 100644 --- a/src/proto/lb_client.cc +++ b/src/proto/lb_client.cc @@ -15,23 +15,17 @@ DECLARE_int32(tera_master_connect_timeout_period); namespace tera { namespace load_balancer { -LBClient::LBClient(const std::string& server_addr, - int32_t rpc_timeout) - : RpcClient(server_addr), - rpc_timeout_(rpc_timeout) { -} +LBClient::LBClient(const std::string& server_addr, int32_t rpc_timeout) + : RpcClient(server_addr), rpc_timeout_(rpc_timeout) {} -LBClient::~LBClient() { -} +LBClient::~LBClient() {} -bool LBClient::CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - return SendMessageWithRetry(&LoadBalancerService::Stub::CmdCtrl, - request, response, - (std::function)NULL, - "CmdCtrl", rpc_timeout_); +bool LBClient::CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response) { + return SendMessageWithRetry( + &LoadBalancerService::Stub::CmdCtrl, request, response, + (std::function)NULL, "CmdCtrl", + rpc_timeout_); } -} // namespace load_balancer -} // namespace tera - +} // namespace load_balancer +} // namespace tera diff --git a/src/proto/lb_client.h b/src/proto/lb_client.h index faf47b59a..9c8d6ba64 100644 --- a/src/proto/lb_client.h +++ b/src/proto/lb_client.h @@ -16,20 +16,18 @@ namespace tera { namespace load_balancer { class LBClient : public RpcClient { -public: - LBClient(const std::string& server_addr = "", - int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); - virtual ~LBClient(); + public: + LBClient(const std::string& server_addr = "", + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + virtual ~LBClient(); - virtual bool CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); + virtual bool CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); -private: - int32_t rpc_timeout_; + private: + int32_t rpc_timeout_; }; -} // namespace load_balancer -} // namespace tera - -#endif // TERA_LOAD_BALANCER_LB_CLIENT_H_ +} // namespace load_balancer +} // namespace tera +#endif // TERA_LOAD_BALANCER_LB_CLIENT_H_ diff --git a/src/proto/master_client.cc b/src/proto/master_client.cc index 20a997c9d..a3378875e 100644 --- a/src/proto/master_client.cc +++ b/src/proto/master_client.cc @@ -8,7 +8,6 @@ #include "proto/master_client.h" - DECLARE_int32(tera_master_connect_retry_times); DECLARE_int32(tera_master_connect_retry_period); DECLARE_int32(tera_master_connect_timeout_period); @@ -16,116 +15,102 @@ DECLARE_int32(tera_master_connect_timeout_period); namespace tera { namespace master { -MasterClient::MasterClient(const std::string& server_addr, - int32_t rpc_timeout) - : RpcClient(server_addr), - rpc_timeout_(rpc_timeout) {} +MasterClient::MasterClient(const std::string& server_addr, int32_t rpc_timeout) + : RpcClient(server_addr), rpc_timeout_(rpc_timeout) {} MasterClient::~MasterClient() {} -bool MasterClient::CreateTable(const CreateTableRequest* request, - CreateTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::CreateTable, - request, response, - (std::function)NULL, - "CreateTable", rpc_timeout_); +bool MasterClient::CreateTable(const CreateTableRequest* request, CreateTableResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::CreateTable, request, response, + (std::function)NULL, + "CreateTable", rpc_timeout_); } -bool MasterClient::DeleteTable(const DeleteTableRequest* request, - DeleteTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::DeleteTable, - request, response, - (std::function)NULL, - "DeleteTable", rpc_timeout_); +bool MasterClient::DeleteTable(const DeleteTableRequest* request, DeleteTableResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::DeleteTable, request, response, + (std::function)NULL, + "DeleteTable", rpc_timeout_); } bool MasterClient::DisableTable(const DisableTableRequest* request, DisableTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::DisableTable, - request, response, - (std::function)NULL, - "DisableTable", rpc_timeout_); + return SendMessageWithRetry( + &MasterServer::Stub::DisableTable, request, response, + (std::function)NULL, + "DisableTable", rpc_timeout_); } -bool MasterClient::EnableTable(const EnableTableRequest* request, - EnableTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::EnableTable, - request, response, - (std::function)NULL, - "EnableTable", rpc_timeout_); +bool MasterClient::EnableTable(const EnableTableRequest* request, EnableTableResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::EnableTable, request, response, + (std::function)NULL, + "EnableTable", rpc_timeout_); } -bool MasterClient::UpdateTable(const UpdateTableRequest* request, - UpdateTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::UpdateTable, - request, response, - (std::function)NULL, - "UpdateTable", rpc_timeout_); +bool MasterClient::UpdateTable(const UpdateTableRequest* request, UpdateTableResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::UpdateTable, request, response, + (std::function)NULL, + "UpdateTable", rpc_timeout_); } -bool MasterClient::UpdateCheck(const UpdateCheckRequest* request, - UpdateCheckResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::UpdateCheck, - request, response, - (std::function)NULL, - "UpdateCheck", rpc_timeout_); +bool MasterClient::UpdateCheck(const UpdateCheckRequest* request, UpdateCheckResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::UpdateCheck, request, response, + (std::function)NULL, + "UpdateCheck", rpc_timeout_); } -bool MasterClient::SearchTable(const SearchTableRequest* request, - SearchTableResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::SearchTable, - request, response, - (std::function)NULL, - "SearchTable", rpc_timeout_); +bool MasterClient::SearchTable(const SearchTableRequest* request, SearchTableResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::SearchTable, request, response, + (std::function)NULL, + "SearchTable", rpc_timeout_); } -bool MasterClient::ShowTables(const ShowTablesRequest* request, - ShowTablesResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::ShowTables, - request, response, - (std::function)NULL, - "ShowTables", rpc_timeout_); +bool MasterClient::ShowTables(const ShowTablesRequest* request, ShowTablesResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::ShowTables, request, response, + (std::function)NULL, "ShowTables", + rpc_timeout_); } bool MasterClient::ShowTabletNodes(const ShowTabletNodesRequest* request, ShowTabletNodesResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::ShowTabletNodes, - request, response, - (std::function)NULL, - "ShowTabletNodes", rpc_timeout_); + return SendMessageWithRetry( + &MasterServer::Stub::ShowTabletNodes, request, response, + (std::function)NULL, + "ShowTabletNodes", rpc_timeout_); } -bool MasterClient::Register(const RegisterRequest* request, - RegisterResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::Register, - request, response, - (std::function)NULL, - "Register", rpc_timeout_); +bool MasterClient::Register(const RegisterRequest* request, RegisterResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::Register, request, response, + (std::function)NULL, "Register", + rpc_timeout_); } -bool MasterClient::Report(const ReportRequest* request, - ReportResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::Report, - request, response, - (std::function)NULL, - "Report", rpc_timeout_); +bool MasterClient::Report(const ReportRequest* request, ReportResponse* response) { + return SendMessageWithRetry(&MasterServer::Stub::Report, request, response, + (std::function)NULL, + "Report", rpc_timeout_); } -bool MasterClient::CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::CmdCtrl, - request, response, - (std::function)NULL, - "CmdCtrl", rpc_timeout_); +bool MasterClient::CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::CmdCtrl, request, response, + (std::function)NULL, "CmdCtrl", + rpc_timeout_); } -bool MasterClient::OperateUser(const OperateUserRequest* request, - OperateUserResponse* response) { - return SendMessageWithRetry(&MasterServer::Stub::OperateUser, - request, response, - (std::function)NULL, - "OperateUser", rpc_timeout_); +bool MasterClient::OperateUser(const OperateUserRequest* request, OperateUserResponse* response) { + return SendMessageWithRetry( + &MasterServer::Stub::OperateUser, request, response, + (std::function)NULL, + "OperateUser", rpc_timeout_); } -} // namespace master -} // namespace tera +} // namespace master +} // namespace tera diff --git a/src/proto/master_client.h b/src/proto/master_client.h index 6147a15fb..9fdce211a 100644 --- a/src/proto/master_client.h +++ b/src/proto/master_client.h @@ -16,54 +16,43 @@ namespace tera { namespace master { class MasterClient : public RpcClient { -public: - MasterClient(const std::string& server_addr = "", - int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); - virtual ~MasterClient(); + public: + MasterClient(const std::string& server_addr = "", + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + virtual ~MasterClient(); - virtual bool CreateTable(const CreateTableRequest* request, - CreateTableResponse* response); + virtual bool CreateTable(const CreateTableRequest* request, CreateTableResponse* response); - virtual bool DeleteTable(const DeleteTableRequest* request, - DeleteTableResponse* response); + virtual bool DeleteTable(const DeleteTableRequest* request, DeleteTableResponse* response); - virtual bool DisableTable(const DisableTableRequest* request, - DisableTableResponse* response); + virtual bool DisableTable(const DisableTableRequest* request, DisableTableResponse* response); - virtual bool EnableTable(const EnableTableRequest* request, - EnableTableResponse* response); + virtual bool EnableTable(const EnableTableRequest* request, EnableTableResponse* response); - virtual bool UpdateTable(const UpdateTableRequest* request, - UpdateTableResponse* response); + virtual bool UpdateTable(const UpdateTableRequest* request, UpdateTableResponse* response); - virtual bool UpdateCheck(const UpdateCheckRequest* request, - UpdateCheckResponse* response); + virtual bool UpdateCheck(const UpdateCheckRequest* request, UpdateCheckResponse* response); - virtual bool SearchTable(const SearchTableRequest* request, - SearchTableResponse* response); + virtual bool SearchTable(const SearchTableRequest* request, SearchTableResponse* response); - virtual bool ShowTables(const ShowTablesRequest* request, - ShowTablesResponse* response); + virtual bool ShowTables(const ShowTablesRequest* request, ShowTablesResponse* response); - virtual bool ShowTabletNodes(const ShowTabletNodesRequest* request, - ShowTabletNodesResponse* response); + virtual bool ShowTabletNodes(const ShowTabletNodesRequest* request, + ShowTabletNodesResponse* response); - virtual bool Register(const RegisterRequest* request, - RegisterResponse* response); + virtual bool Register(const RegisterRequest* request, RegisterResponse* response); - virtual bool Report(const ReportRequest* request, - ReportResponse* response); + virtual bool Report(const ReportRequest* request, ReportResponse* response); - virtual bool CmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response); + virtual bool CmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); - virtual bool OperateUser(const OperateUserRequest* request, - OperateUserResponse* response); -private: - int32_t rpc_timeout_; + virtual bool OperateUser(const OperateUserRequest* request, OperateUserResponse* response); + + private: + int32_t rpc_timeout_; }; -} // namespace -} // namespace +} // namespace +} // namespace -#endif // TERA_MASTER_MASTER_CLIENT_H_ +#endif // TERA_MASTER_MASTER_CLIENT_H_ diff --git a/src/proto/master_mutli_tenancy_client.cc b/src/proto/master_mutli_tenancy_client.cc new file mode 100644 index 000000000..04cbe7750 --- /dev/null +++ b/src/proto/master_mutli_tenancy_client.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "master_mutli_tenancy_client.h" + +namespace tera { +namespace master { + +MasterMultiTenancyClient::MasterMultiTenancyClient(const std::string& server_addr, + int32_t rpc_timeout) + : RpcClient(server_addr), rpc_timeout_(rpc_timeout) {} + +MasterMultiTenancyClient::~MasterMultiTenancyClient() {} + +bool MasterMultiTenancyClient::UpdateUgi(const UpdateUgiRequest* request, + UpdateUgiResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::UpdateUgi, request, response, + (std::function)NULL, "UpdateUgi", + rpc_timeout_); +} + +bool MasterMultiTenancyClient::ShowUgi(const ShowUgiRequest* request, ShowUgiResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::ShowUgi, request, response, + (std::function)NULL, "ShowUgi", + rpc_timeout_); +} + +bool MasterMultiTenancyClient::UpdateAuth(const UpdateAuthRequest* request, + UpdateAuthResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::UpdateAuth, request, response, + (std::function)NULL, "UpdateAuth", + rpc_timeout_); +} + +bool MasterMultiTenancyClient::ShowAuth(const ShowAuthRequest* request, + ShowAuthResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::ShowAuth, request, response, + (std::function)NULL, "ShowAuth", + rpc_timeout_); +} + +bool MasterMultiTenancyClient::SetAuthPolicy(const SetAuthPolicyRequest* request, + SetAuthPolicyResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::SetAuthPolicy, request, response, + (std::function)NULL, + "SetAuthPolicy", rpc_timeout_); +} + +bool MasterMultiTenancyClient::ShowAuthPolicy(const ShowAuthPolicyRequest* request, + ShowAuthPolicyResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::ShowAuthPolicy, request, response, + (std::function)NULL, + "ShowAuthPolicy", rpc_timeout_); +} + +bool MasterMultiTenancyClient::SetQuota(const SetQuotaRequest* request, + SetQuotaResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::SetQuota, request, response, + (std::function)NULL, "SetQuota", + rpc_timeout_); +} + +bool MasterMultiTenancyClient::ShowQuota(const ShowQuotaRequest* request, + ShowQuotaResponse* response) { + return SendMessageWithRetry( + &MasterMultiTenancyService::Stub::ShowQuota, request, response, + (std::function)NULL, "ShowQuota", + rpc_timeout_); +} +} +} diff --git a/src/proto/master_mutli_tenancy_client.h b/src/proto/master_mutli_tenancy_client.h new file mode 100644 index 000000000..2da684e50 --- /dev/null +++ b/src/proto/master_mutli_tenancy_client.h @@ -0,0 +1,38 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "proto/master_rpc.pb.h" +#include "proto/rpc_client.h" + +DECLARE_int32(tera_rpc_timeout_period); + +namespace tera { +namespace master { + +class MasterMultiTenancyClient : public RpcClient { + public: + MasterMultiTenancyClient(const std::string& server_addr = "", + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + virtual ~MasterMultiTenancyClient(); + virtual bool UpdateUgi(const UpdateUgiRequest* request, UpdateUgiResponse* response); + virtual bool ShowUgi(const ShowUgiRequest* request, ShowUgiResponse* response); + virtual bool UpdateAuth(const UpdateAuthRequest* request, UpdateAuthResponse* response); + virtual bool ShowAuth(const ShowAuthRequest* request, ShowAuthResponse* response); + + virtual bool SetAuthPolicy(const SetAuthPolicyRequest* request, SetAuthPolicyResponse* response); + virtual bool ShowAuthPolicy(const ShowAuthPolicyRequest* request, + ShowAuthPolicyResponse* response); + + virtual bool SetQuota(const SetQuotaRequest* request, SetQuotaResponse* response); + virtual bool ShowQuota(const ShowQuotaRequest* request, ShowQuotaResponse* response); + + private: + int32_t rpc_timeout_; +}; +} +} diff --git a/src/proto/master_rpc.proto b/src/proto/master_rpc.proto index 88db126e9..423d6a63e 100644 --- a/src/proto/master_rpc.proto +++ b/src/proto/master_rpc.proto @@ -3,6 +3,8 @@ import "status_code.proto"; import "tabletnode.proto"; import "table_schema.proto"; import "table_meta.proto"; +import "access_control.proto"; +import "quota.proto"; package tera; @@ -14,6 +16,7 @@ message CreateTableRequest { optional TableSchema schema = 3; repeated bytes delimiters = 6; optional bytes user_token = 7; + optional IdentityInfo identity_info = 8; } message CreateTableResponse { @@ -26,6 +29,7 @@ message DeleteTableRequest { required uint64 sequence_id = 1; required string table_name = 2; optional bytes user_token = 3; + optional IdentityInfo identity_info = 4; } message DeleteTableResponse { @@ -37,6 +41,7 @@ message DisableTableRequest { required uint64 sequence_id = 1; required string table_name = 2; optional bytes user_token = 3; + optional IdentityInfo identity_info = 4; } message DisableTableResponse { @@ -48,6 +53,7 @@ message EnableTableRequest { required uint64 sequence_id = 1; required string table_name = 2; optional bytes user_token = 3; + optional IdentityInfo identity_info = 4; } message EnableTableResponse { @@ -60,6 +66,7 @@ message UpdateTableRequest { required string table_name = 2; optional TableSchema schema = 3; optional bytes user_token = 4; + optional IdentityInfo identity_info = 5; } message UpdateTableResponse { @@ -214,4 +221,104 @@ service MasterServer { rpc CmdCtrl(CmdCtrlRequest) returns(CmdCtrlResponse); rpc OperateUser(OperateUserRequest) returns(OperateUserResponse); } + +// Multi-Tenancy +message UpdateUgiRequest { + required uint64 sequence_id = 1; + required UpdateAuthInfo update_info = 2; + optional IdentityInfo identity_info = 3; +} + +message UpdateUgiResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; +} + +message ShowUgiRequest { + required uint64 sequence_id = 1; + optional IdentityInfo identity_info = 2; +} + +message ShowUgiResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; + repeated UgiMetaInfo ugi_meta_infos = 3; +} + +message UpdateAuthRequest { + required uint64 sequence_id = 1; + required UpdateAuthInfo update_info = 2; + optional IdentityInfo identity_info = 3; +} + +message UpdateAuthResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; +} + +message ShowAuthRequest { + required uint64 sequence_id = 1; + optional IdentityInfo identity_info = 2; +} + +message ShowAuthResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; + repeated RoleInfo role_infos = 3; +} + +message SetAuthPolicyRequest { + required TableAuthPolicyInfo table_auth_policy_info = 1; + optional IdentityInfo identity_info = 2; +} + +message SetAuthPolicyResponse { + required StatusCode status = 1; +} + +message ShowAuthPolicyRequest { + optional IdentityInfo identity_info = 1; +} + +message ShowAuthPolicyResponse { + required StatusCode status = 1; + repeated TableAuthPolicyInfo table_auth_policy_infos = 2; +} + +message SetQuotaRequest { + required uint64 sequence_id = 1; + required TableQuota table_quota = 2; +} + +message SetQuotaResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; +} + +message ShowQuotaRequest { + required uint64 sequence_id = 1; + required bool brief_show = 10 [default = true]; +} + +message ShowQuotaResponse { + required uint64 sequence_id = 1; + required StatusCode status = 2; + repeated TableQuota table_quota_list = 3; + repeated TsQuota ts_quota_list = 4; +} + +service MasterMultiTenancyService { + rpc UpdateUgi(UpdateUgiRequest) returns (UpdateUgiResponse); + rpc ShowUgi(ShowUgiRequest) returns (ShowUgiResponse); + + rpc UpdateAuth(UpdateAuthRequest) returns (UpdateAuthResponse); + rpc ShowAuth(ShowAuthRequest) returns (ShowAuthResponse); + + rpc SetAuthPolicy(SetAuthPolicyRequest) returns(SetAuthPolicyResponse); + rpc ShowAuthPolicy(ShowAuthPolicyRequest) returns (ShowAuthPolicyResponse); + + rpc SetQuota(SetQuotaRequest) returns(SetQuotaResponse); + rpc ShowQuota(ShowQuotaRequest) returns(ShowQuotaResponse); +} + option cc_generic_services = true; diff --git a/src/proto/mock_master_client.h b/src/proto/mock_master_client.h index 8c666ffb7..147e25d87 100644 --- a/src/proto/mock_master_client.h +++ b/src/proto/mock_master_client.h @@ -16,48 +16,25 @@ namespace tera { namespace master { class MockMasterClient : public MasterClient { -public: - MOCK_METHOD1(ResetMasterClient, - void(const std::string& server_addr)); - MOCK_METHOD2(CreateTable, - bool(const CreateTableRequest* request, - CreateTableResponse* response)); - MOCK_METHOD2(DeleteTable, - bool(const DeleteTableRequest* request, - DeleteTableResponse* response)); - MOCK_METHOD2(DisableTable, - bool(const DisableTableRequest* request, - DisableTableResponse* response)); - MOCK_METHOD2(EnableTable, - bool(const EnableTableRequest* request, - EnableTableResponse* response)); - MOCK_METHOD2(UpdateTable, - bool(const UpdateTableRequest* request, - UpdateTableResponse* response)); - MOCK_METHOD2(SearchTable, - bool(const SearchTableRequest* request, - SearchTableResponse* response)); - MOCK_METHOD2(CompactTable, - bool(const CompactTableRequest* request, - CompactTableResponse* response)); - MOCK_METHOD2(ShowTables, - bool(const ShowTablesRequest* request, - ShowTablesResponse* response)); - MOCK_METHOD2(MergeTable, - bool(const MergeTableRequest* request, - MergeTableResponse* response)); - MOCK_METHOD2(Register, - bool(const RegisterRequest* request, - RegisterResponse* response)); - MOCK_METHOD2(Report, - bool(const ReportRequest* request, - ReportResponse* response)); - MOCK_METHOD2(CmdCtrl, - bool(const CmdCtrlRequest* request, - CmdCtrlResponse* response)); - }; + public: + MOCK_METHOD1(ResetMasterClient, void(const std::string& server_addr)); + MOCK_METHOD2(CreateTable, bool(const CreateTableRequest* request, CreateTableResponse* response)); + MOCK_METHOD2(DeleteTable, bool(const DeleteTableRequest* request, DeleteTableResponse* response)); + MOCK_METHOD2(DisableTable, + bool(const DisableTableRequest* request, DisableTableResponse* response)); + MOCK_METHOD2(EnableTable, bool(const EnableTableRequest* request, EnableTableResponse* response)); + MOCK_METHOD2(UpdateTable, bool(const UpdateTableRequest* request, UpdateTableResponse* response)); + MOCK_METHOD2(SearchTable, bool(const SearchTableRequest* request, SearchTableResponse* response)); + MOCK_METHOD2(CompactTable, + bool(const CompactTableRequest* request, CompactTableResponse* response)); + MOCK_METHOD2(ShowTables, bool(const ShowTablesRequest* request, ShowTablesResponse* response)); + MOCK_METHOD2(MergeTable, bool(const MergeTableRequest* request, MergeTableResponse* response)); + MOCK_METHOD2(Register, bool(const RegisterRequest* request, RegisterResponse* response)); + MOCK_METHOD2(Report, bool(const ReportRequest* request, ReportResponse* response)); + MOCK_METHOD2(CmdCtrl, bool(const CmdCtrlRequest* request, CmdCtrlResponse* response)); +}; } // namespace master } // namespace tera -#endif // TERA_MASTER_MOCK_MASTER_CLIENT_H_ +#endif // TERA_MASTER_MOCK_MASTER_CLIENT_H_ diff --git a/src/proto/proto_helper.cc b/src/proto/proto_helper.cc index 76db1ad9a..0584d3c8b 100644 --- a/src/proto/proto_helper.cc +++ b/src/proto/proto_helper.cc @@ -8,44 +8,38 @@ namespace tera { -std::string StatusCodeToString(StatusCode status) { - return StatusCode_Name(status); -} +std::string StatusCodeToString(StatusCode status) { return StatusCode_Name(status); } std::string StatusCodeToString(TabletMeta::TabletStatus status) { - return TabletMeta::TabletStatus_Name(status); + return TabletMeta::TabletStatus_Name(status); } -std::string StatusCodeToString(TableStatus status) { - return TableStatus_Name(status); -} +std::string StatusCodeToString(TableStatus status) { return TableStatus_Name(status); } -std::string StatusCodeToString(CompactStatus status) { - return CompactStatus_Name(status); -} +std::string StatusCodeToString(CompactStatus status) { return CompactStatus_Name(status); } void SetStatusCode(const StatusCode& code, StatusCode* tera_status) { - if (tera_status) { - *tera_status = code; - } + if (tera_status) { + *tera_status = code; + } } void SetStatusCode(const TabletMeta::TabletStatus& tablet_status, StatusCode* tera_status) { - if (tera_status) { - *tera_status = static_cast(tablet_status); - } + if (tera_status) { + *tera_status = static_cast(tablet_status); + } } void SetStatusCode(const TableStatus& table_status, StatusCode* tera_status) { - if (tera_status) { - *tera_status = static_cast(table_status); - } + if (tera_status) { + *tera_status = static_cast(table_status); + } } void SetStatusCode(const CompactStatus& compact_status, StatusCode* tera_status) { - if (tera_status) { - *tera_status = static_cast(compact_status); - } + if (tera_status) { + *tera_status = static_cast(compact_status); + } } -} // namespace tera +} // namespace tera diff --git a/src/proto/proto_helper.h b/src/proto/proto_helper.h index 8fb15b38c..9eb0fc99b 100644 --- a/src/proto/proto_helper.h +++ b/src/proto/proto_helper.h @@ -14,8 +14,8 @@ namespace tera { -typedef ::google::protobuf::RepeatedPtrField< RowMutationSequence> RowMutationList; -typedef ::google::protobuf::RepeatedPtrField< KeyValuePair> KeyValueList; +typedef ::google::protobuf::RepeatedPtrField RowMutationList; +typedef ::google::protobuf::RepeatedPtrField KeyValueList; typedef ::google::protobuf::RepeatedPtrField< ::std::string> KeyList; typedef ::google::protobuf::RepeatedPtrField< ::tera::RowResult> RowResultList; typedef ::google::protobuf::RepeatedPtrField< ::tera::RowReaderInfo> RowReaderList; @@ -30,5 +30,5 @@ void SetStatusCode(const TabletMeta::TabletStatus& tablet_status, StatusCode* te void SetStatusCode(const TableStatus& table_status, StatusCode* tera_status); void SetStatusCode(const CompactStatus& code, StatusCode* tera_status); -} // namespace tera -#endif // TERA_PROTO_PROTO_HELPER_H_ +} // namespace tera +#endif // TERA_PROTO_PROTO_HELPER_H_ diff --git a/src/proto/quota.proto b/src/proto/quota.proto new file mode 100644 index 000000000..e22f9152a --- /dev/null +++ b/src/proto/quota.proto @@ -0,0 +1,34 @@ +package tera; + +import "table_meta.proto"; + +enum QuotaOperationType { + kQuotaWriteReqs = 1; + kQuotaWriteBytes = 2; + kQuotaReadReqs = 3; + kQuotaReadBytes = 4; + kQuotaScanReqs = 5; + kQuotaScanBytes = 6; +} + +message QuotaInfo { + required QuotaOperationType type = 1; + required int64 limit = 2; + // default 1s + required int64 period = 3 [default = 1]; +} + +message TableQuota { + enum TableQuotaType { + kSetQuota = 1; + kDelQuota = 2; // when type == kDelQuota, doesn't need quota_infos. + } + required string table_name = 1; + repeated QuotaInfo quota_infos = 2; + required TableQuotaType type = 3 [default = kSetQuota]; +} + +message TsQuota { + required string ts_addr = 1; + repeated TableQuota table_quotas = 2; +} diff --git a/src/proto/rpc_client.cc b/src/proto/rpc_client.cc index 37dfce90a..f74132296 100644 --- a/src/proto/rpc_client.cc +++ b/src/proto/rpc_client.cc @@ -12,4 +12,4 @@ sofa::pbrpc::RpcClientOptions RpcClientBase::rpc_client_options_; sofa::pbrpc::RpcClient RpcClientBase::rpc_client_; Mutex RpcClientBase::mutex_; -} // namespace tera +} // namespace tera diff --git a/src/proto/rpc_client.h b/src/proto/rpc_client.h index 9067cc96d..32defaae6 100644 --- a/src/proto/rpc_client.h +++ b/src/proto/rpc_client.h @@ -23,214 +23,203 @@ namespace tera { template struct RpcCallbackParam { - sofa::pbrpc::RpcController* rpc_controller; - const Request* request; - Response* response; - Callback closure; - std::string tips; - ThreadPool* thread_pool; - - RpcCallbackParam(sofa::pbrpc::RpcController* ctrler, const Request* req, - Response* resp, Callback cb, const std::string& str, - ThreadPool* tpool) - : rpc_controller(ctrler), request(req), response(resp), - closure(cb), tips(str), thread_pool(tpool) {} + sofa::pbrpc::RpcController* rpc_controller; + const Request* request; + Response* response; + Callback closure; + std::string tips; + ThreadPool* thread_pool; + + RpcCallbackParam(sofa::pbrpc::RpcController* ctrler, const Request* req, Response* resp, + Callback cb, const std::string& str, ThreadPool* tpool) + : rpc_controller(ctrler), + request(req), + response(resp), + closure(cb), + tips(str), + thread_pool(tpool) {} }; class RpcClientBase { -public: - static void SetOption(int32_t max_inflow, int32_t max_outflow, - int32_t pending_buffer_size, int32_t thread_num) { - channel_options_.create_with_init = false; - if (-1 != max_inflow) { - rpc_client_options_.max_throughput_in = max_inflow; - } - if (-1 != max_outflow) { - rpc_client_options_.max_throughput_out = max_outflow; - } - if (-1 != pending_buffer_size) { - rpc_client_options_.max_pending_buffer_size = pending_buffer_size; - } - if (-1 != thread_num) { - rpc_client_options_.work_thread_num = thread_num; - } - rpc_client_.ResetOptions(rpc_client_options_); - - sofa::pbrpc::RpcClientOptions new_options = rpc_client_.GetOptions(); - LOG(INFO) << "set rpc option: (" - << "max_inflow: " << new_options.max_throughput_in - << " MB/s, max_outflow: " << new_options.max_throughput_out - << " MB/s, max_pending_buffer_size: " << new_options.max_pending_buffer_size - << " MB, work_thread_num: " << new_options.work_thread_num - << ")"; + public: + static void SetOption(int32_t max_inflow, int32_t max_outflow, int32_t pending_buffer_size, + int32_t thread_num) { + channel_options_.create_with_init = false; + if (-1 != max_inflow) { + rpc_client_options_.max_throughput_in = max_inflow; } - - RpcClientBase() : rpc_channel_(NULL) {} - virtual ~RpcClientBase() {} - -protected: - virtual void ResetClient(const std::string& server_addr) { - std::map::iterator it; - mutex_.Lock(); - it = rpc_channel_list_.find(server_addr); - if (it != rpc_channel_list_.end()) { - rpc_channel_ = it->second; - } else { - sofa::pbrpc::RpcChannel* c = new sofa::pbrpc::RpcChannel(&rpc_client_, - server_addr, - channel_options_); - if (c->Init()) { - rpc_channel_ = rpc_channel_list_[server_addr] = c; - } else { - delete c; - rpc_channel_ = NULL; - } - } - mutex_.Unlock(); + if (-1 != max_outflow) { + rpc_client_options_.max_throughput_out = max_outflow; + } + if (-1 != pending_buffer_size) { + rpc_client_options_.max_pending_buffer_size = pending_buffer_size; + } + if (-1 != thread_num) { + rpc_client_options_.work_thread_num = thread_num; } + rpc_client_.ResetOptions(rpc_client_options_); + + sofa::pbrpc::RpcClientOptions new_options = rpc_client_.GetOptions(); + LOG(INFO) << "set rpc option: (" + << "max_inflow: " << new_options.max_throughput_in + << " MB/s, max_outflow: " << new_options.max_throughput_out + << " MB/s, max_pending_buffer_size: " << new_options.max_pending_buffer_size + << " MB, work_thread_num: " << new_options.work_thread_num << ")"; + } + + RpcClientBase() : rpc_channel_(NULL) {} + virtual ~RpcClientBase() {} + + protected: + virtual void ResetClient(const std::string& server_addr) { + std::map::iterator it; + mutex_.Lock(); + it = rpc_channel_list_.find(server_addr); + if (it != rpc_channel_list_.end()) { + rpc_channel_ = it->second; + } else { + sofa::pbrpc::RpcChannel* c = + new sofa::pbrpc::RpcChannel(&rpc_client_, server_addr, channel_options_); + if (c->Init()) { + rpc_channel_ = rpc_channel_list_[server_addr] = c; + } else { + delete c; + rpc_channel_ = NULL; + } + } + mutex_.Unlock(); + } -protected: - sofa::pbrpc::RpcChannel* rpc_channel_; + protected: + sofa::pbrpc::RpcChannel* rpc_channel_; - static sofa::pbrpc::RpcChannelOptions channel_options_; - static std::map rpc_channel_list_; - static sofa::pbrpc::RpcClientOptions rpc_client_options_; - static sofa::pbrpc::RpcClient rpc_client_; - static Mutex mutex_; + static sofa::pbrpc::RpcChannelOptions channel_options_; + static std::map rpc_channel_list_; + static sofa::pbrpc::RpcClientOptions rpc_client_options_; + static sofa::pbrpc::RpcClient rpc_client_; + static Mutex mutex_; }; -template +template class RpcClient : public RpcClientBase { -public: - RpcClient(const std::string& addr) - : sync_call_failed(false) { - ResetClient(addr); - } - virtual ~RpcClient() {} + public: + RpcClient(const std::string& addr) : sync_call_failed(false) { ResetClient(addr); } + virtual ~RpcClient() {} - std::string GetConnectAddr() const { - return server_addr_; - } + std::string GetConnectAddr() const { return server_addr_; } -protected: - virtual void ResetClient(const std::string& server_addr) { - if (server_addr_ == server_addr) { - // VLOG(5) << "address [" << server_addr << "] not be applied"; - return; - } - /* - IpAddress ip_address(server_addr); - if (!ip_address.IsValid()) { - LOG(ERROR) << "invalid address: " << server_addr; - return; - } - */ - RpcClientBase::ResetClient(server_addr); - if (rpc_channel_ == NULL) { - server_client_.reset(NULL); - } else { - server_client_.reset(new ServerType(rpc_channel_)); - } - server_addr_ = server_addr; - // VLOG(5) << "reset connected address to: " << server_addr; + protected: + virtual void ResetClient(const std::string& server_addr) { + if (server_addr_ == server_addr) { + // VLOG(5) << "address [" << server_addr << "] not be applied"; + return; } - - template - bool SendMessageWithRetry(void(ServerType::*func)( - google::protobuf::RpcController*, const Request*, - Response*, google::protobuf::Closure*), - const Request* request, Response* response, - Callback closure, const std::string& tips, - int32_t rpc_timeout, ThreadPool* thread_pool = 0) { - if (NULL == server_client_.get()) { - // sync call - if (!closure) { - return false; - } - - // async call - ThreadPool::Task callback = - std::bind(&RpcClient::template UserCallback, - request, response, closure, true, - (int)sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS); - thread_pool->AddTask(callback); - return true; - } - sofa::pbrpc::RpcController* rpc_controller = - new sofa::pbrpc::RpcController; - rpc_controller->SetTimeout(rpc_timeout); - RpcCallbackParam* param = - new RpcCallbackParam(rpc_controller, - request, response, closure, tips, thread_pool); - google::protobuf::Closure* done = google::protobuf::NewCallback( - &RpcClient::template RpcCallback, - this, param); - (server_client_.get()->*func)(rpc_controller, request, response, done); - - // sync call - if (!closure) { - sync_call_event.Wait(); - return (!sync_call_failed); - } - - // async call - return true; + /* + IpAddress ip_address(server_addr); + if (!ip_address.IsValid()) { + LOG(ERROR) << "invalid address: " << server_addr; + return; } - - template - static void RpcCallback(RpcClient* client, - RpcCallbackParam* param) { - sofa::pbrpc::RpcController* rpc_controller = param->rpc_controller; - const Request* request = param->request; - Response* response = param->response; - Callback closure = param->closure; - ThreadPool* thread_pool = param->thread_pool; - - bool failed = rpc_controller->Failed(); - int error = rpc_controller->ErrorCode(); - if (failed) { - // LOG(ERROR) << "RpcRequest failed: " << param->tip - // << ". Reason: " << rpc_controller->ErrorText(); - } - delete rpc_controller; - delete param; - - // sync call - if (!closure) { - client->sync_call_failed = failed; - client->sync_call_event.Set(); - return; - } - - // async call - ThreadPool::Task done = - std::bind(&RpcClient::template UserCallback, - request, response, closure, failed, error); - thread_pool->AddTask(done); + */ + RpcClientBase::ResetClient(server_addr); + if (rpc_channel_ == NULL) { + server_client_.reset(NULL); + } else { + server_client_.reset(new ServerType(rpc_channel_)); } - - template - static void UserCallback(const Request* request, Response* response, - Callback closure, bool failed, int error) { - closure((Request*)request, response, failed, error); + server_addr_ = server_addr; + // VLOG(5) << "reset connected address to: " << server_addr; + } + + template + bool SendMessageWithRetry(void (ServerType::*func)(google::protobuf::RpcController*, + const Request*, Response*, + google::protobuf::Closure*), + const Request* request, Response* response, Callback closure, + const std::string& tips, int32_t rpc_timeout, + ThreadPool* thread_pool = 0) { + if (NULL == server_client_.get()) { + // sync call + if (!closure) { + return false; + } + + // async call + ThreadPool::Task callback = + std::bind(&RpcClient::template UserCallback, request, + response, closure, true, (int)sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS); + thread_pool->AddTask(callback); + return true; } - - virtual bool PollAndResetServerAddr() { - return true; + sofa::pbrpc::RpcController* rpc_controller = new sofa::pbrpc::RpcController; + rpc_controller->SetTimeout(rpc_timeout); + RpcCallbackParam* param = + new RpcCallbackParam(rpc_controller, request, response, + closure, tips, thread_pool); + google::protobuf::Closure* done = google::protobuf::NewCallback( + &RpcClient::template RpcCallback, this, param); + (server_client_.get()->*func)(rpc_controller, request, response, done); + + // sync call + if (!closure) { + sync_call_event.Wait(); + return (!sync_call_failed); } - virtual bool IsRetryStatus(const StatusCode& status) { - return false; + // async call + return true; + } + + template + static void RpcCallback(RpcClient* client, + RpcCallbackParam* param) { + sofa::pbrpc::RpcController* rpc_controller = param->rpc_controller; + const Request* request = param->request; + Response* response = param->response; + Callback closure = param->closure; + ThreadPool* thread_pool = param->thread_pool; + + bool failed = rpc_controller->Failed(); + int error = rpc_controller->ErrorCode(); + if (failed) { + // LOG(ERROR) << "RpcRequest failed: " << param->tips + // << ". Reason: " << rpc_controller->ErrorText(); + } + delete rpc_controller; + delete param; + + // sync call + if (!closure) { + client->sync_call_failed = failed; + client->sync_call_event.Set(); + return; } -private: - scoped_ptr server_client_; - std::string server_addr_; + // async call + ThreadPool::Task done = + std::bind(&RpcClient::template UserCallback, request, response, + closure, failed, error); + thread_pool->AddTask(done); + } + + template + static void UserCallback(const Request* request, Response* response, Callback closure, + bool failed, int error) { + closure((Request*)request, response, failed, error); + } + + virtual bool PollAndResetServerAddr() { return true; } + + virtual bool IsRetryStatus(const StatusCode& status) { return false; } + + private: + scoped_ptr server_client_; + std::string server_addr_; - bool sync_call_failed; - AutoResetEvent sync_call_event; + bool sync_call_failed; + AutoResetEvent sync_call_event; }; -} // namespace tera +} // namespace tera -#endif // TERA_RPC_CLIENT_ASYNC_H_ +#endif // TERA_RPC_CLIENT_ASYNC_H_ diff --git a/src/proto/status_code.proto b/src/proto/status_code.proto index 8a9c48737..6c13469a5 100644 --- a/src/proto/status_code.proto +++ b/src/proto/status_code.proto @@ -19,20 +19,26 @@ enum StatusCode { // tablet node manage kTabletNodeReady = 30; - kTabletNodeOffLine = 31; + kTabletNodeOffline = 31; kTabletNodeOnKick = 32; kTabletNodeWaitKick = 33; - kTabletNodePendingOffLine = 34; + kTabletNodePendingOffline = 34; + kTabletNodeKicked = 35; + // response + kTabletNodeOk = 40; // table status kTabletNotInit = 41; kTabletReady = 42; kTabletOnLoad = 43; kTabletOnSplit = 44; - kTabletSplited = 51; - kTabletOnMerge = 50; - kTabletUnLoading = 52; - kTabletUnLoading2 = 68; + kTabletMerged = 50; + kTabletSplitted = 51; + kTabletUnloading = 52; + kTabletUnloading2 = 68; + kTabletWaitLoad = 69; + kTabletWaitUnload = 70; + kTableNotFound = 45; kTableCorrupt = 46; @@ -43,9 +49,15 @@ enum StatusCode { kIllegalAccess = 71; kNotPermission = 72; kIOError = 73; + kMismatchAuthType = 74; kTxnFail = 80; + // quota + kQuotaLimited = 90; + kQuotaInvalidArg = 91; + kFlowControlLimited = 92; + //// master rpc //// // create&update table @@ -70,9 +82,6 @@ enum StatusCode { //// tablet node rpc //// - // response - kTabletNodeOk = 40; - // key kKeyNotExist = 402; kKeyNotInRange = 403; diff --git a/src/proto/table_meta.proto b/src/proto/table_meta.proto index 5bec599e4..3fa2fdb37 100644 --- a/src/proto/table_meta.proto +++ b/src/proto/table_meta.proto @@ -1,155 +1,163 @@ import "status_code.proto"; import "table_schema.proto"; +import "access_control.proto"; package tera; message KeyRange { - optional bytes key_start = 1; - optional bytes key_end = 2; + optional bytes key_start = 1; + optional bytes key_end = 2; } message RowResult { - repeated KeyValuePair key_values = 3; + repeated KeyValuePair key_values = 3; } message BytesList { - repeated bytes content = 1; - repeated StatusCode status = 2; - repeated RowResult row_result = 3; + repeated bytes content = 1; + repeated StatusCode status = 2; + repeated RowResult row_result = 3; } message KeyValuePair { - optional bytes key = 1; - optional bytes value = 2; - optional bool del = 3; - optional bytes column_family = 4; - optional bytes qualifier = 5; - optional int64 timestamp = 6; + optional bytes key = 1; + optional bytes value = 2; + optional bool del = 3; + optional bytes column_family = 4; + optional bytes qualifier = 5; + optional int64 timestamp = 6; } message TabletLocation { - required KeyRange key_range = 1; - required string server_addr = 2; + required KeyRange key_range = 1; + required string server_addr = 2; } message ScanOption { - optional KeyRange key_range = 1; - optional int32 max_version = 2; - optional int64 size_limit = 3; - optional bytes start = 4; - optional bytes end = 5; - optional string start_family = 6; - optional bytes start_qualifier = 7; - optional uint64 snapshot_id = 8; - optional bool round_down = 9; + optional KeyRange key_range = 1; + optional int32 max_version = 2; + optional int64 size_limit = 3; + optional bytes start = 4; + optional bytes end = 5; + optional string start_family = 6; + optional bytes start_qualifier = 7; + optional uint64 snapshot_id = 8; + optional bool round_down = 9; } message TabletCounter { - optional uint32 low_read_cell = 1; - optional uint32 scan_rows = 2; - optional uint32 scan_kvs = 3; - optional uint32 scan_size = 4; - optional uint32 read_rows = 5; - optional uint32 read_kvs = 6; - optional uint32 read_size = 7; - optional uint32 write_rows = 8; - optional uint32 write_kvs = 9; - optional uint32 write_size = 10; - optional double write_workload = 11 [default = 0.0]; - - optional bool is_on_busy = 15 [default = false]; - optional TabletMeta.TabletStatus db_status = 16; + optional uint32 low_read_cell = 1; + optional uint32 scan_rows = 2; + optional uint32 scan_kvs = 3; + optional uint32 scan_size = 4; + optional uint32 read_rows = 5; + optional uint32 read_kvs = 6; + optional uint32 read_size = 7; + optional uint32 write_rows = 8; + optional uint32 write_kvs = 9; + optional uint32 write_size = 10; + optional double write_workload = 11 [default = 0.0]; + + optional bool is_on_busy = 15 [default = false]; + optional TabletMeta.TabletStatus db_status = 16; } message TableCounter { - optional int64 lread = 1; - optional int64 scan_rows = 2; - optional int64 scan_max = 3; - optional int64 scan_size = 4; - optional int64 read_rows = 5; - optional int64 read_max = 6; - optional int64 read_size = 7; - optional int64 write_rows = 8; - optional int64 write_max = 9; - optional int64 write_size = 10; - - optional int64 tablet_num = 20; - optional int64 notready_num = 21; - optional int64 size = 22; - repeated int64 lg_size = 23; + optional int64 lread = 1; + optional int64 scan_rows = 2; + optional int64 scan_max = 3; + optional int64 scan_size = 4; + optional int64 read_rows = 5; + optional int64 read_max = 6; + optional int64 read_size = 7; + optional int64 write_rows = 8; + optional int64 write_max = 9; + optional int64 write_size = 10; + + optional int64 tablet_num = 20; + optional int64 notready_num = 21; + optional int64 size = 22; + repeated int64 lg_size = 23; } message TableMeta { - optional string table_name = 1; - optional TableStatus status = 2; - optional TableSchema schema = 3; - optional uint64 create_time = 5; + optional string table_name = 1; + optional TableStatus status = 2; + optional TableSchema schema = 3; + optional uint64 create_time = 5; + optional AuthPolicyType auth_policy_type = 8; } message TabletMeta { - enum TabletStatus { - kTabletNotInit = 41; - kTabletOffline = 57; - kTabletReady = 42; - //kTableUnLoad = 49; - //kTableWaitLoad = 58; - kTabletLoading = 43; - kTabletLoadFail = 60; - //kTableWaitSplit = 59; - kTabletOnSplit = 44; - kTabletSplited = 51; - //kTableSplitFail = 61; - kTabletUnloading = 52; - kTabletUnloading2 = 68; - kTabletUnloadFail = 62; - //kTableOnMerge = 50; - kTabletDisable = 64; - kTabletDelayOffline = 65; - //kTabletOnSnapshot = 66; - //kTabletDelSnapshot = 67; - kTabletCorruption = 90; - }; - required string table_name = 1; - required string path = 2; - required KeyRange key_range = 3; - optional string server_addr = 4; - optional TabletStatus status = 5; - optional int64 size = 6; - optional bool compress = 7; // for Compatible - optional CompactStatus compact_status = 8 [default = kTableNotCompact]; - optional StoreMedium store_medium = 9 [default = DiskStore]; // for Compatible - repeated uint64 parent_tablets = 12; - repeated int64 lg_size = 13; - optional int64 last_move_time_us = 15; + enum TabletStatus { + kTabletNotInit = 41; + kTabletReady = 42; + kTabletLoading = 43; + kTabletOnSplit = 44; + // kTableUnLoad = 49; + kTabletMerged = 50; + kTabletSplitted = 51; + kTabletUnloading = 52; + kTabletOffline = 57; + // kTableWaitLoad = 58; + // kTableWaitSplit = 59; + kTabletLoadFail = 60; + // kTableSplitFail = 61; + kTabletUnloadFail = 62; + // kTableOnMerge = 50; + kTabletDisable = 64; + kTabletDelayOffline = 65; + // kTabletOnSnapshot = 66; + kTabletUnloading2 = 68; + // kTabletDelSnapshot= 67; + kTabletCorruption = 90; + kTabletManifestError = 91; + }; + required string table_name = 1; + required string path = 2; + required KeyRange key_range = 3; + optional string server_addr = 4; + optional TabletStatus status = 5; + optional int64 size = 6; + optional bool compress = 7; // for Compatible + optional CompactStatus compact_status = 8 [default = kTableNotCompact]; + optional StoreMedium store_medium = 9 + [default = DiskStore]; // for Compatible + repeated uint64 parent_tablets = 12; + repeated int64 lg_size = 13; + optional int64 last_move_time_us = 15; + optional int64 data_size_on_flash = 16; + optional int64 create_time = 17; + optional uint64 version = 18; } message TableMetaList { - repeated TableMeta meta = 1; - repeated TableCounter counter = 2; + repeated TableMeta meta = 1; + repeated TableCounter counter = 2; } message TabletMetaList { - repeated TabletMeta meta = 1; - repeated TabletCounter counter = 2; - repeated int64 timestamp = 3; // meta update timestamp + repeated TabletMeta meta = 1; + repeated TabletCounter counter = 2; + repeated int64 timestamp = 3; // meta update timestamp } message SdkTabletCookie { - optional TabletMeta meta = 1; - optional int64 update_time = 2; - optional int32 status = 3; + optional TabletMeta meta = 1; + optional int64 update_time = 2; + optional int32 status = 3; } message SdkCookie { - required string table_name = 1; - repeated SdkTabletCookie tablets = 2; + required string table_name = 1; + repeated SdkTabletCookie tablets = 2; } message PrimaryInfo { - optional string table_name = 1; - optional bytes row_key = 2; - optional bytes column_family = 3; - optional bytes qualifier = 4; - optional int64 gtxn_start_ts = 5; - optional string client_session = 6; + optional string table_name = 1; + optional bytes row_key = 2; + optional bytes column_family = 3; + optional bytes qualifier = 4; + optional int64 gtxn_start_ts = 5; + optional string client_session = 6; } diff --git a/src/proto/table_schema.proto b/src/proto/table_schema.proto index 62c716c53..fbb7137f8 100644 --- a/src/proto/table_schema.proto +++ b/src/proto/table_schema.proto @@ -58,6 +58,8 @@ message TableSchema { optional string alias = 13; // table alias optional string admin = 14; optional bool enable_txn = 15 [default = false]; + optional bool enable_hash = 16 [default = false]; + optional uint32 bloom_filter_bits_per_key = 17 [default = 10]; // deprecated, instead by raw_key GeneralKv optional bool kv_only = 9 [default = false]; diff --git a/src/proto/tabletnode.proto b/src/proto/tabletnode.proto old mode 100644 new mode 100755 index a0441d067..63a1830d3 --- a/src/proto/tabletnode.proto +++ b/src/proto/tabletnode.proto @@ -3,71 +3,74 @@ import "status_code.proto"; package tera; message ExtraTsInfo { - optional string name = 1; - optional uint64 value = 2; + optional string name = 1; + optional uint64 value = 2; } message TabletNodeInfo { - required string addr = 1; - optional StatusCode status_t = 2; - optional uint64 load = 3; - optional uint64 timestamp = 4; - optional uint32 tablet_total = 5; - optional uint32 tablet_onbusy = 6; - optional uint32 tablet_corruption = 7; + required string addr = 1; + optional StatusCode status_t = 2; + optional uint64 load = 3; + optional uint64 timestamp = 4; + optional uint32 tablet_total = 5; + optional uint32 tablet_onbusy = 6; + optional uint32 tablet_corruption = 7; - optional uint32 low_read_cell = 11; - optional uint32 scan_rows = 12; - optional uint32 scan_kvs = 13; - optional uint32 scan_size = 14; - optional uint32 read_rows = 15; - optional uint32 read_kvs = 16; - optional uint32 read_size = 17; - optional uint32 write_rows = 18; - optional uint32 write_kvs = 19; - optional uint32 write_size = 20; + optional uint32 low_read_cell = 11; + optional uint32 scan_rows = 12; + optional uint32 scan_kvs = 13; + optional uint32 scan_size = 14; + optional uint32 read_rows = 15; + optional uint32 read_kvs = 16; + optional uint32 read_size = 17; + optional uint32 write_rows = 18; + optional uint32 write_kvs = 19; + optional uint32 write_size = 20; - optional uint64 mem_used = 21; - optional uint32 net_tx = 22; - optional uint32 net_rx = 23; - optional uint32 dfs_io_r = 24; - optional uint32 dfs_io_w = 25; - optional uint32 local_io_r = 26; - optional uint32 local_io_w = 27; + optional uint64 mem_used = 21; + optional uint32 net_tx = 22; + optional uint32 net_rx = 23; + optional uint32 dfs_io_r = 24 [default = 0]; + optional uint32 dfs_io_w = 25 [default = 0]; + optional uint32 local_io_r = 26; + optional uint32 local_io_w = 27; + optional uint32 dfs_master_qps = 28 [default = 0]; // Operations through dfs's master, including close, open, delete - optional string status_m = 31; - optional uint32 tablet_onload = 32; - optional uint32 tablet_onsplit = 33; - optional uint32 tablet_unloading = 34; + optional string status_m = 31; + optional uint32 tablet_onload = 32; + optional uint32 tablet_onsplit = 33; + optional uint32 tablet_unloading = 34; - repeated ExtraTsInfo extra_info = 40; + repeated ExtraTsInfo extra_info = 40; - optional uint32 read_pending = 41; - optional uint32 write_pending = 42; - optional uint32 scan_pending = 43; + optional uint32 read_pending = 41; + optional uint32 write_pending = 42; + optional uint32 scan_pending = 43; - optional float cpu_usage = 44; - optional int64 process_start_time = 45; // Unix time in us + optional float cpu_usage = 44; + optional int64 process_start_time = 45; // Unix time in us + + optional uint64 persistent_cache_size = 46; } message LgInheritedLiveFiles { - required uint32 lg_no = 1; - repeated uint64 file_number = 2; // full file number, include tablet number + required uint32 lg_no = 1; + repeated uint64 file_number = 2; // full file number, include tablet number } message InheritedLiveFiles { - required string table_name = 1; - repeated LgInheritedLiveFiles lg_live_files = 2; + required string table_name = 1; + repeated LgInheritedLiveFiles lg_live_files = 2; } message TabletInheritedFileInfo { - optional string table_name = 1; - optional bytes key_start = 2; - optional bytes key_end = 3; - repeated LgInheritedLiveFiles lg_inh_files = 4; + optional string table_name = 1; + optional bytes key_start = 2; + optional bytes key_end = 3; + repeated LgInheritedLiveFiles lg_inh_files = 4; } message TabletBackgroundErrorInfo { - required string tablet_name = 1; - required bytes detail_info = 2; + required string tablet_name = 1; + required bytes detail_info = 2; } diff --git a/src/proto/tabletnode_client.cc b/src/proto/tabletnode_client.cc index 7345ae50c..777dfa04d 100644 --- a/src/proto/tabletnode_client.cc +++ b/src/proto/tabletnode_client.cc @@ -8,13 +8,11 @@ namespace tera { namespace tabletnode { void TabletNodeClient::SetRpcOption(int32_t max_inflow, int32_t max_outflow, - int32_t pending_buffer_size, int32_t thread_num) { - RpcClientBase::SetOption(max_inflow, max_outflow, - pending_buffer_size, thread_num); + int32_t pending_buffer_size, int32_t thread_num) { + RpcClientBase::SetOption(max_inflow, max_outflow, pending_buffer_size, thread_num); } -TabletNodeClient::TabletNodeClient(ThreadPool* thread_pool, - const std::string& server_addr, +TabletNodeClient::TabletNodeClient(ThreadPool* thread_pool, const std::string& server_addr, int32_t rpc_timeout) : RpcClient(server_addr), rpc_timeout_(rpc_timeout), @@ -22,94 +20,78 @@ TabletNodeClient::TabletNodeClient(ThreadPool* thread_pool, TabletNodeClient::~TabletNodeClient() {} -bool TabletNodeClient::LoadTablet(const LoadTabletRequest* request, - LoadTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::LoadTablet, - request, response, done, "LoadTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::LoadTablet( + const LoadTabletRequest* request, LoadTabletResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::LoadTablet, request, response, done, + "LoadTablet", rpc_timeout_, thread_pool_); } -bool TabletNodeClient::UnloadTablet(const UnloadTabletRequest* request, - UnloadTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::UnloadTablet, - request, response, done, "UnloadTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::UnloadTablet( + const UnloadTabletRequest* request, UnloadTabletResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::UnloadTablet, request, response, done, + "UnloadTablet", rpc_timeout_, thread_pool_); } -bool TabletNodeClient::ReadTablet(const ReadTabletRequest* request, - ReadTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::ReadTablet, - request, response, done, "ReadTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::ReadTablet( + const ReadTabletRequest* request, ReadTabletResponse* response, + std::function done) { + return SendMessageWithRetry( + &TabletNodeServer::Stub::ReadTablet, request, response, done, "ReadTablet", + request->has_client_timeout_ms() ? request->client_timeout_ms() : rpc_timeout_, thread_pool_); } -bool TabletNodeClient::WriteTablet(const WriteTabletRequest* request, - WriteTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::WriteTablet, - request, response, done, "WriteTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::WriteTablet( + const WriteTabletRequest* request, WriteTabletResponse* response, + std::function done) { + return SendMessageWithRetry( + &TabletNodeServer::Stub::WriteTablet, request, response, done, "WriteTablet", + request->has_client_timeout_ms() ? request->client_timeout_ms() : rpc_timeout_, thread_pool_); } -bool TabletNodeClient::ScanTablet(const ScanTabletRequest* request, - ScanTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::ScanTablet, - request, response, done, "ScanTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::ScanTablet( + const ScanTabletRequest* request, ScanTabletResponse* response, + std::function done) { + return SendMessageWithRetry( + &TabletNodeServer::Stub::ScanTablet, request, response, done, "ScanTablet", + request->has_timeout() ? request->timeout() : rpc_timeout_, thread_pool_); } -bool TabletNodeClient::Query(ThreadPool* thread_pool, - const QueryRequest* request, +bool TabletNodeClient::Query(ThreadPool* thread_pool, const QueryRequest* request, QueryResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::Query, - request, response, done, "Query", - rpc_timeout_, thread_pool); + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::Query, request, response, done, "Query", + rpc_timeout_, thread_pool); } -bool TabletNodeClient::SplitTablet(const SplitTabletRequest* request, - SplitTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::SplitTablet, - request, response, done, "SplitTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::ComputeSplitKey( + const SplitTabletRequest* request, SplitTabletResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::ComputeSplitKey, request, response, done, + "ComputeSplitKey", rpc_timeout_, thread_pool_); } -bool TabletNodeClient::ComputeSplitKey(const SplitTabletRequest* request, - SplitTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::ComputeSplitKey, - request, response, done, "ComputeSplitKey", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::CompactTablet( + const CompactTabletRequest* request, CompactTabletResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::CompactTablet, request, response, done, + "CompactTablet", rpc_timeout_, thread_pool_); } -bool TabletNodeClient::CompactTablet(const CompactTabletRequest* request, - CompactTabletResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::CompactTablet, - request, response, done, "CompactTablet", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::Update( + const UpdateRequest* request, UpdateResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::Update, request, response, done, "Update", + rpc_timeout_, thread_pool_); } -bool TabletNodeClient::Update(const UpdateRequest* request, - UpdateResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::Update, - request, response, done, "Update", - rpc_timeout_, thread_pool_); +bool TabletNodeClient::CmdCtrl( + const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::CmdCtrl, request, response, done, + "TsCmdCtrl", rpc_timeout_, thread_pool_); } -bool TabletNodeClient::CmdCtrl(const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, - std::function done) { - return SendMessageWithRetry(&TabletNodeServer::Stub::CmdCtrl, - request, response, done, "TsCmdCtrl", - rpc_timeout_, thread_pool_); -} - -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/proto/tabletnode_client.h b/src/proto/tabletnode_client.h index 010aba2ff..2b1ef9879 100644 --- a/src/proto/tabletnode_client.h +++ b/src/proto/tabletnode_client.h @@ -19,64 +19,57 @@ namespace tera { namespace tabletnode { class TabletNodeClient : public RpcClient { -public: - static void SetRpcOption(int32_t max_inflow = -1, int32_t max_outflow = -1, - int32_t pending_buffer_size = -1, - int32_t thread_num = -1); + public: + static void SetRpcOption(int32_t max_inflow = -1, int32_t max_outflow = -1, + int32_t pending_buffer_size = -1, int32_t thread_num = -1); - TabletNodeClient(ThreadPool* thread_pool, const std::string& addr = "", - int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + TabletNodeClient(ThreadPool* thread_pool, const std::string& addr = "", + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); - ~TabletNodeClient(); + ~TabletNodeClient(); - bool LoadTablet(const LoadTabletRequest* request, - LoadTabletResponse* response, - std::function done = NULL); + bool LoadTablet( + const LoadTabletRequest* request, LoadTabletResponse* response, + std::function done = NULL); - bool UnloadTablet(const UnloadTabletRequest* request, - UnloadTabletResponse* response, - std::function done = NULL); + bool UnloadTablet( + const UnloadTabletRequest* request, UnloadTabletResponse* response, + std::function done = NULL); - bool ReadTablet(const ReadTabletRequest* request, - ReadTabletResponse* response, - std::function done = NULL); + bool ReadTablet( + const ReadTabletRequest* request, ReadTabletResponse* response, + std::function done = NULL); - bool WriteTablet(const WriteTabletRequest* request, - WriteTabletResponse* response, - std::function done = NULL); + bool WriteTablet( + const WriteTabletRequest* request, WriteTabletResponse* response, + std::function done = NULL); - bool ScanTablet(const ScanTabletRequest* request, - ScanTabletResponse* response, - std::function done = NULL); + bool ScanTablet( + const ScanTabletRequest* request, ScanTabletResponse* response, + std::function done = NULL); - bool Query(ThreadPool* thread_pool, const QueryRequest* request, - QueryResponse* response, - std::function done = NULL); + bool Query(ThreadPool* thread_pool, const QueryRequest* request, QueryResponse* response, + std::function done = NULL); - bool SplitTablet(const SplitTabletRequest* request, - SplitTabletResponse* response, - std::function done = NULL); + bool ComputeSplitKey( + const SplitTabletRequest* request, SplitTabletResponse* response, + std::function done = NULL); - bool ComputeSplitKey(const SplitTabletRequest* request, SplitTabletResponse* response, - std::function done = NULL); + bool CompactTablet( + const CompactTabletRequest* request, CompactTabletResponse* response, + std::function done = NULL); + bool CmdCtrl(const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, + std::function done = NULL); - bool CompactTablet(const CompactTabletRequest* request, - CompactTabletResponse* response, - std::function done = NULL); - bool CmdCtrl(const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, - std::function done = NULL); + bool Update(const UpdateRequest* request, UpdateResponse* response, + std::function done = NULL); - bool Update(const UpdateRequest* request, - UpdateResponse* response, - std::function done = NULL); - -private: - int32_t rpc_timeout_; - ThreadPool* thread_pool_; + private: + int32_t rpc_timeout_; + ThreadPool* thread_pool_; }; -} // namespace sdk -} // namespace tera +} // namespace sdk +} // namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_CLIENT_ASYNC_H_ +#endif // TERA_TABLETNODE_TABLETNODE_CLIENT_ASYNC_H_ diff --git a/src/proto/tabletnode_rpc.proto b/src/proto/tabletnode_rpc.proto index 3a2ccb2d1..b4a8ff0e8 100644 --- a/src/proto/tabletnode_rpc.proto +++ b/src/proto/tabletnode_rpc.proto @@ -3,6 +3,9 @@ import "status_code.proto"; import "table_schema.proto"; import "table_meta.proto"; import "tabletnode.proto"; +import "access_control.proto"; +import "quota.proto"; +import "filter.proto"; package tera; @@ -11,6 +14,16 @@ package tera; message QueryRequest { required uint64 sequence_id = 1; optional bool is_gc_query = 2; + + repeated UgiMetaInfo ugi_meta_infos = 3; + optional uint64 version = 4; + repeated RoleInfo role_infos = 5; + + repeated TableQuota table_quotas = 7; + optional uint64 quota_version = 8; + optional double slowdown_write_ratio = 9; + optional uint64 dfs_write_throughput_hard_limit = 10; + optional uint64 dfs_read_throughput_hard_limit = 11; } message QueryResponse { @@ -21,6 +34,8 @@ message QueryResponse { repeated InheritedLiveFiles inh_live_files = 5; repeated TabletInheritedFileInfo tablet_inh_file_infos = 6; repeated TabletBackgroundErrorInfo tablet_background_errors = 7; + optional uint64 version = 8; + optional uint64 quota_version = 10; } enum UpdateType { @@ -51,7 +66,9 @@ message LoadTabletRequest { repeated uint64 snapshots_id = 9; repeated uint64 snapshots_sequence = 10; repeated uint64 parent_tablets = 11; + optional int64 create_time = 12; repeated string ignore_err_lgs = 13; + optional uint64 version = 14; } message LoadTabletResponse { @@ -65,6 +82,7 @@ message UnloadTabletRequest { required string tablet_name = 2; required KeyRange key_range = 3; optional string session_id = 4; + optional string path = 5; } message UnloadTabletResponse { @@ -138,6 +156,8 @@ message WriteTabletRequest { repeated RowMutationSequence row_list = 6; //optional uint64 session_id = 7 [default = 0]; optional int64 timestamp = 8 [default = 0]; + optional IdentityInfo identity_info = 9; + optional int64 client_timeout_ms = 10 [default = 0]; } message WriteTabletResponse { @@ -228,6 +248,8 @@ message ScanTabletRequest { optional int64 timeout = 19; optional int64 number_limit = 21; optional uint64 max_qualifiers = 22; + optional IdentityInfo identity_info = 23; + optional filter.FilterDesc filter = 24; } message ScanTabletResponse { @@ -239,6 +261,9 @@ message ScanTabletResponse { optional bytes end = 7; optional uint64 results_id = 8; optional KeyValuePair next_start_point = 9; + optional uint32 data_size = 10; + optional uint32 row_count = 11; + optional uint32 cell_count = 12; } message RowReaderInfo { @@ -248,6 +273,7 @@ message RowReaderInfo { optional FilterList filter_list = 4; optional uint32 max_version = 5; optional uint64 max_qualifiers = 6; + optional filter.FilterDesc filter = 7; } message ReadTabletRequest { @@ -259,6 +285,7 @@ message ReadTabletRequest { optional uint64 snapshot_id = 6; optional int64 timestamp = 7 [default = 0]; optional int64 client_timeout_ms = 8 [default = 0]; + optional IdentityInfo identity_info = 9; } message ReadTabletResponse { @@ -331,7 +358,6 @@ service TabletNodeServer { //option (sofa.pbrpc.response_compress_type) = CompressTypeGzip; } - rpc SplitTablet(SplitTabletRequest) returns(SplitTabletResponse); rpc ComputeSplitKey(SplitTabletRequest) returns (SplitTabletResponse); rpc CmdCtrl(TsCmdCtrlRequest) returns(TsCmdCtrlResponse); diff --git a/src/proto/test_helper.cc b/src/proto/test_helper.cc index 19e53dcf9..b652cac68 100644 --- a/src/proto/test_helper.cc +++ b/src/proto/test_helper.cc @@ -11,64 +11,62 @@ namespace tera { TableSchema DefaultTableSchema() { - TableSchema schema; - schema.set_id(0); - schema.set_name("lg0"); - schema.set_owner(0); - schema.add_acl(0777); + TableSchema schema; + schema.set_id(0); + schema.set_name("lg0"); + schema.set_owner(0); + schema.add_acl(0777); - ColumnFamilySchema* cf_schema = schema.add_column_families(); - cf_schema->set_id(0); - cf_schema->set_name("lg0_cf0"); - cf_schema->set_locality_group("lg0"); - cf_schema->set_owner(0); - cf_schema->add_acl(0777); + ColumnFamilySchema* cf_schema = schema.add_column_families(); + cf_schema->set_id(0); + cf_schema->set_name("lg0_cf0"); + cf_schema->set_locality_group("lg0"); + cf_schema->set_owner(0); + cf_schema->add_acl(0777); - LocalityGroupSchema* lg_schema = schema.add_locality_groups(); - lg_schema->set_id(0); - lg_schema->set_name("lg0_name"); + LocalityGroupSchema* lg_schema = schema.add_locality_groups(); + lg_schema->set_id(0); + lg_schema->set_name("lg0_name"); - return schema; + return schema; } -ColumnFamilySchema DefaultCFSchema(const std::string& lg_name, - uint32_t id) { - ColumnFamilySchema cf_schema; - std::string cf_name("cf"); - cf_name += NumberToString(id); - cf_schema.set_id(id); - cf_schema.set_name(lg_name + "_" + cf_name); - cf_schema.set_locality_group(lg_name); - cf_schema.set_owner(0); - cf_schema.add_acl(0777); +ColumnFamilySchema DefaultCFSchema(const std::string& lg_name, uint32_t id) { + ColumnFamilySchema cf_schema; + std::string cf_name("cf"); + cf_name += NumberToString(id); + cf_schema.set_id(id); + cf_schema.set_name(lg_name + "_" + cf_name); + cf_schema.set_locality_group(lg_name); + cf_schema.set_owner(0); + cf_schema.add_acl(0777); - return cf_schema; + return cf_schema; } LocalityGroupSchema DefaultLGSchema(uint32_t id) { - LocalityGroupSchema lg_schema; - std::string lg_name("lg"); - lg_name += NumberToString(id); - lg_schema.set_id(id); - lg_schema.set_name(lg_name); + LocalityGroupSchema lg_schema; + std::string lg_name("lg"); + lg_name += NumberToString(id); + lg_schema.set_id(id); + lg_schema.set_name(lg_name); - return lg_schema; + return lg_schema; } -TableSchema DefaultTableSchema(uint32_t id, uint32_t lg_num, - uint32_t cf_num) { - TableSchema schema; - std::string name("table"); - name += NumberToString(id); - schema.set_id(id); - schema.set_name(name); - schema.set_owner(0); - schema.set_acl(0, 0777); +TableSchema DefaultTableSchema(uint32_t id, uint32_t lg_num, uint32_t cf_num) { + TableSchema schema; + std::string name("table"); + name += NumberToString(id); + schema.set_id(id); + schema.set_name(name); + schema.set_owner(0); + schema.set_acl(0, 0777); - for (uint32_t lg_id = 0; lg_id < lg_num; ++lg_id) { - LocalityGroupSchema lg_schema = DefaultLGSchema(lg_id); - } + for (uint32_t lg_id = 0; lg_id < lg_num; ++lg_id) { + LocalityGroupSchema lg_schema = DefaultLGSchema(lg_id); + } - return schema; + return schema; } -} // namespace tera +} // namespace tera diff --git a/src/proto/test_helper.h b/src/proto/test_helper.h index 1f6eae876..9dd4e2912 100644 --- a/src/proto/test_helper.h +++ b/src/proto/test_helper.h @@ -11,11 +11,10 @@ namespace tera { TableSchema DefaultTableSchema(); -ColumnFamilySchema DefaultCFSchema(const std::string& lg_name, - uint32_t id); +ColumnFamilySchema DefaultCFSchema(const std::string& lg_name, uint32_t id); LocalityGroupSchema DefaultLGSchema(uint32_t id); -} // namespace tera +} // namespace tera -#endif // TERA_PROTO_TEST_HELPER_H_ +#endif // TERA_PROTO_TEST_HELPER_H_ diff --git a/src/quota/flow_controller.h b/src/quota/flow_controller.h new file mode 100644 index 000000000..a32d7e397 --- /dev/null +++ b/src/quota/flow_controller.h @@ -0,0 +1,119 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include "common/event.h" +#include "common/metric/metric_counter.h" + +namespace tera { + +class FlowLimiter { + public: + explicit FlowLimiter(uint64_t limit) : availiable_quota_{limit}, limit_{limit} {} + + void ResetQuota() { + std::lock_guard _(mu_); + availiable_quota_ = (limit_); + cv_.notify_all(); + } + + void SetLimit(uint64_t limit) { limit_ = limit; } + + bool TryConsume(uint64_t size) { + std::lock_guard _(mu_); + if (availiable_quota_ >= size) { + availiable_quota_ -= size; + return true; + } + return false; + } + + void BlockingConsume(uint64_t size) { + std::unique_lock _(mu_); + if (availiable_quota_ < size) { + cv_.wait(_, [this, size] { return availiable_quota_ >= size; }); + assert(availiable_quota_ >= size); + } + availiable_quota_ -= size; + } + + private: + uint64_t availiable_quota_; + std::atomic limit_; + std::mutex mu_; + std::condition_variable cv_; +}; + +class FlowController { + private: + enum class FlowControlStatus { kFlowControlMode, kNormal }; + + public: + FlowController(const FlowController&) = delete; + void operator=(const FlowController&) = delete; + + explicit FlowController(uint64_t limit, uint64_t reset_interval_ms) : limiter_(limit) { + t_ = std::thread{[this, reset_interval_ms]() { + while (!stop_event_.TimeWait(reset_interval_ms)) { + limiter_.ResetQuota(); + } + }}; + }; + + virtual ~FlowController() { + stop_event_.Set(); + t_.join(); + } + + void EnterFlowControlMode(uint64_t value) { + std::lock_guard _{mu_}; + status_.store(FlowControlStatus::kFlowControlMode); + limiter_.SetLimit(value); + } + + void LeaveFlowControlMode() { + std::lock_guard _{mu_}; + status_.store(FlowControlStatus::kNormal); + limiter_.SetLimit(std::numeric_limits::max()); + } + + bool InFlowControlMode() { return status_.load() == FlowControlStatus::kFlowControlMode; } + + void BlockingConsume(uint64_t bytes) { + if (InFlowControlMode()) { + limiter_.BlockingConsume(bytes); + } + } + + bool TryConsume(uint64_t bytes) { + if (InFlowControlMode()) { + return limiter_.TryConsume(bytes); + } + return true; + } + + private: + std::thread t_; + common::AutoResetEvent stop_event_; + std::mutex mu_; + FlowLimiter limiter_; + std::atomic status_{FlowControlStatus::kNormal}; +}; + +inline FlowController& DfsWriteThroughputHardLimiter() { + static FlowController dfs_flow_controller{0, 1000}; + return dfs_flow_controller; +} + +inline FlowController& DfsReadThroughputHardLimiter() { + static FlowController dfs_flow_controller{0, 1000}; + return dfs_flow_controller; +} +} diff --git a/src/quota/helpers/master_quota_helper.cc b/src/quota/helpers/master_quota_helper.cc new file mode 100644 index 000000000..097201fd3 --- /dev/null +++ b/src/quota/helpers/master_quota_helper.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quota/helpers/master_quota_helper.h" +#include "master/master_env.h" +#include + +namespace tera { +namespace quota { + +master::MetaWriteRecord* MasterQuotaHelper::NewMetaRecordFromQuota(const TableQuota& table_quota) { + if (table_quota.quota_infos_size() <= 0) { + return nullptr; + } + std::unique_ptr meta_write_record(new master::MetaWriteRecord); + meta_write_record->is_delete = false; + meta_write_record->key = std::string("|10") + table_quota.table_name(); + if (!table_quota.SerializeToString(&meta_write_record->value)) { + return nullptr; + } + return meta_write_record.release(); +} + +void MasterQuotaHelper::PackDeleteQuotaRecords(const std::string& table_name, + std::vector& records) { + std::string key = std::string("|10") + table_name; + records.emplace_back(master::MetaWriteRecord(key, "", true)); +} + +std::string MasterQuotaHelper::GetTableNameFromMetaKey(const std::string& key) { + if (key.length() <= 3 || key[1] != '1' || key[2] != '0') { + return ""; + } + return key.substr(3); +} + +TableQuota* MasterQuotaHelper::NewTableQuotaFromMetaValue(const std::string& value) { + if (value.size() <= 0) { + return nullptr; + } + std::unique_ptr table_quota(new TableQuota); + if (!table_quota->ParseFromString(value)) { + return nullptr; + } + return table_quota.release(); +} + +static bool CompareAndSetQuotaInfo(const QuotaInfo& quota_info, TableQuota* target_table_quota) { + bool found_delta = false; + int target_quota_infos_size = target_table_quota->quota_infos_size(); + for (int i = 0; i < target_quota_infos_size; ++i) { + QuotaInfo* target_quota_info = target_table_quota->mutable_quota_infos(i); + if (quota_info.type() == target_quota_info->type()) { + if (quota_info.limit() != target_quota_info->limit()) { + target_quota_info->set_limit(quota_info.limit()); + found_delta = true; + } + if (quota_info.period() != target_quota_info->period()) { + target_quota_info->set_period(quota_info.period()); + found_delta = true; + } + } + } + return found_delta; +} + +bool MasterQuotaHelper::MergeTableQuota(const TableQuota& table_quota, + TableQuota* target_table_quota) { + bool found_delta = false; + int quota_infos_size = table_quota.quota_infos_size(); + for (int quota_info_index = 0; quota_info_index < quota_infos_size; ++quota_info_index) { + const QuotaInfo& quota_info = table_quota.quota_infos(quota_info_index); + found_delta |= CompareAndSetQuotaInfo(quota_info, target_table_quota); + } + return found_delta; +} + +void MasterQuotaHelper::SetDefaultQuotaInfo(QuotaInfo* quota_info, QuotaOperationType type) { + quota_info->set_type(type); + quota_info->set_limit(-1); + quota_info->set_period(1); +} +} +} diff --git a/src/quota/helpers/master_quota_helper.h b/src/quota/helpers/master_quota_helper.h new file mode 100644 index 000000000..8c8642e7e --- /dev/null +++ b/src/quota/helpers/master_quota_helper.h @@ -0,0 +1,37 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "proto/quota.pb.h" + +namespace tera { + +namespace master { +struct MetaWriteRecord; +} + +namespace quota { + +class MasterQuotaHelper { + public: + // New**() func will new a instance in heap, it's memory should delete by user. + static master::MetaWriteRecord* NewMetaRecordFromQuota(const TableQuota& table_quota); + static TableQuota* NewTableQuotaFromMetaValue(const std::string& value); + + static void PackDeleteQuotaRecords(const std::string& table_name, + std::vector& records); + + static std::string GetTableNameFromMetaKey(const std::string& key); + + // Merge table_quota to target_table_quota, + // It'll be incremental merge, that means quota update incremental + static bool MergeTableQuota(const TableQuota& table_quota, TableQuota* target_table_quota); + + // default value for quota info, period = 1, limit = -1 + static void SetDefaultQuotaInfo(QuotaInfo* quota_info, QuotaOperationType type); +}; +} +} diff --git a/src/quota/helpers/quota_utils.cc b/src/quota/helpers/quota_utils.cc new file mode 100644 index 000000000..d61a14499 --- /dev/null +++ b/src/quota/helpers/quota_utils.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include "quota/helpers/quota_utils.h" + +namespace tera { +namespace quota { + +std::string QuotaUtils::GetQuotaOperation(QuotaOperationType type) { + static const char* msg[] = {"QuotaWriteReqs", "QuotaWriteBytes", "QuotaReadReqs", + "QuotaReadBytes", "QuotaScanReqs", "QuotaScanBytes", + "QuotaUnknown"}; + static uint32_t msg_size = sizeof(msg) / sizeof(const char*); + using QuotaOpType = std::underlying_type::type; + uint32_t index = static_cast(type) - static_cast(kQuotaWriteReqs); + index = index < msg_size ? index : (msg_size - 1); + return msg[index]; +} + +std::string QuotaUtils::DebugPrintTableQuota(const TableQuota& table_quota) { + std::ostringstream table_quota_info; + table_quota_info << "table[" << table_quota.table_name() << "] :\n"; + int quota_infos_size = table_quota.quota_infos_size(); + for (int quota_infos_index = 0; quota_infos_index < quota_infos_size; ++quota_infos_index) { + const QuotaInfo& quota_info = table_quota.quota_infos(quota_infos_index); + table_quota_info << "QuotaOperationType[" << GetQuotaOperation(quota_info.type()) << "], limit[" + << quota_info.limit() << "], period[" << quota_info.period() << "]\n"; + } + return table_quota_info.str(); +} +} +} diff --git a/src/quota/helpers/quota_utils.h b/src/quota/helpers/quota_utils.h new file mode 100644 index 000000000..42dfd13f5 --- /dev/null +++ b/src/quota/helpers/quota_utils.h @@ -0,0 +1,19 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "proto/quota.pb.h" + +namespace tera { +namespace quota { + +class QuotaUtils { + public: + static std::string DebugPrintTableQuota(const TableQuota& table_quota); + static std::string GetQuotaOperation(QuotaOperationType type); +}; +} +} diff --git a/src/quota/limiter/general_quota_limiter.cc b/src/quota/limiter/general_quota_limiter.cc new file mode 100644 index 000000000..9441c522e --- /dev/null +++ b/src/quota/limiter/general_quota_limiter.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quota/limiter/general_quota_limiter.h" +#include +#include "quota/limiter/general_rate_limiter.h" + +namespace tera { +namespace quota { + +namespace { +static const int64_t unlimited_quota = -1; +static const int64_t period_one_sec = 1; +} + +GeneralQuotaLimiter::GeneralQuotaLimiter(const std::string& table_name) : table_name_(table_name) { + op_rate_limiters_[kQuotaWriteReqs].reset(new GeneralRateLimiter(table_name_, kQuotaWriteReqs)); + op_rate_limiters_[kQuotaWriteBytes].reset(new GeneralRateLimiter(table_name_, kQuotaWriteBytes)); + op_rate_limiters_[kQuotaReadReqs].reset(new GeneralRateLimiter(table_name_, kQuotaReadReqs)); + op_rate_limiters_[kQuotaReadBytes].reset(new GeneralRateLimiter(table_name_, kQuotaReadBytes)); + op_rate_limiters_[kQuotaScanReqs].reset(new GeneralRateLimiter(table_name_, kQuotaScanReqs)); + op_rate_limiters_[kQuotaScanBytes].reset(new GeneralRateLimiter(table_name_, kQuotaScanBytes)); +} + +void GeneralQuotaLimiter::Reset(const TableQuota& table_quota) { + WriteLock l(&rw_mutex_); + for (auto& op_rate_limiter : op_rate_limiters_) { + op_rate_limiter.second->Reset(unlimited_quota, period_one_sec); + } + for (int i = 0; i < table_quota.quota_infos_size(); ++i) { + int64_t limit = table_quota.quota_infos(i).limit(); + int64_t period = table_quota.quota_infos(i).period(); + QuotaOperationType type = table_quota.quota_infos(i).type(); + op_rate_limiters_[type]->Reset(limit, period); + } +} + +bool GeneralQuotaLimiter::CheckAndConsume(const Throttle& throttle) { + ReadLock l(&rw_mutex_); + if (!op_rate_limiters_[kQuotaWriteReqs]->RefillAndCheck(throttle.write_reqs) || + !op_rate_limiters_[kQuotaWriteBytes]->RefillAndCheck(throttle.write_bytes) || + !op_rate_limiters_[kQuotaReadReqs]->RefillAndCheck(throttle.read_reqs) || + !op_rate_limiters_[kQuotaReadBytes]->RefillAndCheck(throttle.read_bytes) || + !op_rate_limiters_[kQuotaScanReqs]->RefillAndCheck(throttle.scan_reqs) || + !op_rate_limiters_[kQuotaScanBytes]->RefillAndCheck(throttle.scan_bytes)) { + return false; + } + op_rate_limiters_[kQuotaWriteReqs]->Consume(throttle.write_reqs); + op_rate_limiters_[kQuotaWriteBytes]->Consume(throttle.write_bytes); + op_rate_limiters_[kQuotaReadReqs]->Consume(throttle.read_reqs); + op_rate_limiters_[kQuotaReadBytes]->Consume(throttle.read_bytes); + op_rate_limiters_[kQuotaScanReqs]->Consume(throttle.scan_reqs); + op_rate_limiters_[kQuotaScanBytes]->Consume(throttle.scan_bytes); + return true; +} +} +} diff --git a/src/quota/limiter/general_quota_limiter.h b/src/quota/limiter/general_quota_limiter.h new file mode 100644 index 000000000..ff92a63b9 --- /dev/null +++ b/src/quota/limiter/general_quota_limiter.h @@ -0,0 +1,39 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "quota/limiter/quota_limiter.h" +#include "quota/limiter/rate_limiter.h" +#include "common/rwmutex.h" + +namespace tera { +namespace quota { + +using RateLimiterPtr = std::unique_ptr; + +// Cause of copy shared_ptr for read, +// and new a Quotalimiter for write to swap, +// so CheckAndConsume will happened after Reset and +// won't have any conflict, DOESN'T NEED ANY synchronization primitive. +class GeneralQuotaLimiter : public QuotaLimiter { + public: + explicit GeneralQuotaLimiter(const std::string& table_name); + virtual ~GeneralQuotaLimiter() {} + + void Reset(const TableQuota& table_quota) override; + + // if quota limited, return false + // otherwise, consume the quota and return true + bool CheckAndConsume(const Throttle& throttle) override; + + private: + std::string table_name_; + std::map op_rate_limiters_; + RWMutex rw_mutex_; +}; +} +} diff --git a/src/quota/limiter/general_rate_limiter.cc b/src/quota/limiter/general_rate_limiter.cc new file mode 100644 index 000000000..c1038425a --- /dev/null +++ b/src/quota/limiter/general_rate_limiter.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quota/limiter/general_rate_limiter.h" +#include "quota/helpers/quota_utils.h" +#include +#include +#include +#include "common/timer.h" + +namespace tera { +namespace quota { + +GeneralRateLimiter::GeneralRateLimiter(const std::string& table_name, QuotaOperationType type) + : quota_type_(QuotaUtils::GetQuotaOperation(type)), + limit_per_sec_(quota_type_, LabelStringBuilder().Append("table", table_name).ToString(), + {SubscriberType::LATEST}, false), + period_sec_(0), + next_refill_ms_(0), + table_name_(table_name) {} + +void GeneralRateLimiter::Reset(int64_t limit, int64_t period_sec) { + limit_.Set(limit); + avail_.Set(limit); + limit_per_sec_.Set(limit / period_sec); + period_sec_ = period_sec; + next_refill_ms_ = 0; + + VLOG(7) << "reset quota " << table_name_ << " " << quota_type_ << " " << limit_.Get() << "/" + << period_sec_; +} + +bool GeneralRateLimiter::RefillAndCheck(int64_t amount) { + if (limit_.Get() < 0 || amount < 0) { + VLOG(25) << "[" << quota_type_ << "] quota limit_[" << limit_.Get() << "] amount[" << amount + << "] but let it pass"; + return true; + } + RefillAvail(); + if (amount > 0) { + VLOG(7) << "[" << table_name_ << " " << quota_type_ << "] quota Avail:" << avail_.Get() + << " RequestAmount:" << amount; + } + if (avail_.Get() < amount) { + VLOG(25) << "[" << quota_type_ << "] quota reach limit"; + return false; + } + return true; +} + +void GeneralRateLimiter::Consume(int64_t amount) { + if (limit_.Get() < 0 || amount <= 0) { + return; + } + if (amount >= avail_.Get()) { + avail_.Clear(); + } else { + avail_.Sub(amount); + } +} + +void GeneralRateLimiter::RefillAvail() { + // refill limit after fixed interval (seconds) + int64_t cur_ms = get_micros() / 1000; // ms + if (cur_ms < next_refill_ms_) { // 1ms precision + return; + } + next_refill_ms_ = cur_ms + period_sec_ * 1000; + avail_.Set(limit_.Get()); +} +} +} diff --git a/src/quota/limiter/general_rate_limiter.h b/src/quota/limiter/general_rate_limiter.h new file mode 100644 index 000000000..a8cf3a7fc --- /dev/null +++ b/src/quota/limiter/general_rate_limiter.h @@ -0,0 +1,44 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "quota/limiter/rate_limiter.h" +#include "common/counter.h" +#include "common/metric/metric_counter.h" +#include "proto/quota.pb.h" + +namespace tera { +namespace quota { + +// thread safe, only int64_t type value change. Doesn't need lock +class GeneralRateLimiter : public RateLimiter { + public: + explicit GeneralRateLimiter(const std::string& table_name, QuotaOperationType type); + virtual ~GeneralRateLimiter() {} + + void Reset(int64_t limit, int64_t period_sec) override; + + // If reach the period of quota, will reset the avail_ to limit_ + // then check user request amount greater then + // avail_(means out of quota, return false) or not(retrun true). + bool RefillAndCheck(int64_t amount) override; + + void Consume(int64_t amount) override; + + private: + void RefillAvail(); + + private: + std::string quota_type_; + Counter limit_; + Counter avail_; + tera::MetricCounter limit_per_sec_; + int64_t period_sec_; + int64_t next_refill_ms_; + std::string table_name_; +}; +} +} diff --git a/src/quota/limiter/limiter_factory.h b/src/quota/limiter/limiter_factory.h new file mode 100644 index 000000000..4c9297af6 --- /dev/null +++ b/src/quota/limiter/limiter_factory.h @@ -0,0 +1,30 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include "quota/limiter/general_quota_limiter.h" + +namespace tera { +namespace quota { + +static const std::string general_quota_limiter_type = "general_quota_limiter"; + +class LimiterFactory { + public: + LimiterFactory() {} + ~LimiterFactory() {} + static QuotaLimiter* CreateQuotaLimiter(const std::string& limiter_type, + const std::string& table_name) { + if (general_quota_limiter_type == limiter_type) { + return new GeneralQuotaLimiter(table_name); + } else { + LOG(ERROR) << "Not surport limit_type = " << limiter_type; + return nullptr; + } + } +}; +} +} diff --git a/src/quota/limiter/quota_limiter.h b/src/quota/limiter/quota_limiter.h new file mode 100644 index 000000000..96dca9c58 --- /dev/null +++ b/src/quota/limiter/quota_limiter.h @@ -0,0 +1,32 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "proto/quota.pb.h" + +namespace tera { +namespace quota { + +struct Throttle { + Throttle() + : write_reqs(0), write_bytes(0), read_reqs(0), read_bytes(0), scan_reqs(0), scan_bytes(0) {} + int64_t write_reqs; + int64_t write_bytes; + int64_t read_reqs; + int64_t read_bytes; + int64_t scan_reqs; + int64_t scan_bytes; +}; + +class QuotaLimiter { + public: + virtual void Reset(const TableQuota& table_quota) = 0; + + // if quota limited, return false + // otherwise, consume the quota and return true + virtual bool CheckAndConsume(const Throttle& throttle) = 0; +}; +} +} diff --git a/src/quota/limiter/rate_limiter.h b/src/quota/limiter/rate_limiter.h new file mode 100644 index 000000000..14f0cec0e --- /dev/null +++ b/src/quota/limiter/rate_limiter.h @@ -0,0 +1,24 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +namespace tera { +namespace quota { + +class RateLimiter { + public: + // Reset the limit and period_sec + virtual void Reset(int64_t limit, int64_t period_sec) = 0; + + // If reach the period of quota, will reset the avail_ to limit_ + // then check user request amount greater then + // avail_(means out of quota, return false) or not(retrun true). + virtual bool RefillAndCheck(int64_t amount) = 0; + + // if io pass quota limiter, consume io amount + virtual void Consume(int64_t amount) = 0; +}; +} +} diff --git a/src/quota/master_quota_entry.cc b/src/quota/master_quota_entry.cc new file mode 100644 index 000000000..171652fbc --- /dev/null +++ b/src/quota/master_quota_entry.cc @@ -0,0 +1,476 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include + +#include "common/metric/metric_counter.h" +#include "master/tablet_manager.h" +#include "quota/master_quota_entry.h" + +namespace tera { +namespace quota { + +tera::MetricCounter flow_control_mode{ + "tera_master_enter_flow_control_mode", {tera::SubscriberType::LATEST}, false}; + +void MasterQuotaEntry::SwitchWaitingUpdateStatus() { + quota_update_status_ = QuotaUpdateStatus::WaitUpdate; + PrepareUpdate(); +} + +void MasterQuotaEntry::PrepareUpdate() { + version_recorder_.IncVersion(); + version_recorder_.SetNeedUpdate(true); +} + +bool MasterQuotaEntry::AddRecord(const std::string& key, const std::string& value) { + std::string table_name = MasterQuotaHelper::GetTableNameFromMetaKey(key); + if ("" == table_name) { + VLOG(25) << "wrong quota key[" << key << "] format, value[" << value << "]"; + return false; + } + + std::unique_ptr table_quota(MasterQuotaHelper::NewTableQuotaFromMetaValue(value)); + if (!table_quota) { + VLOG(25) << "failed to get table_quota in meta value"; + return false; + } + { + MutexLock l(&mutex_); + if (!tablet_manager_) { + return false; + } + bool found_delta = false; + auto it = table_quotas_list_.find(table_name); + if (it == table_quotas_list_.end()) { + table_quotas_list_.emplace(table_name, std::move(table_quota)); + VLOG(25) << "quota insert table_quotas_list_ table[" << table_name << "]"; + found_delta = true; + } else { + // set update if different, and inc vesion + if (MasterQuotaHelper::MergeTableQuota(*table_quota, table_quotas_list_[table_name].get())) { + VLOG(25) << "quota merge table_quotas_list_ table[" << table_name << "]"; + found_delta = true; + } + } + if (found_delta) { + AddDeltaQuota(table_name, &delta_ts_table_quotas_list_); + SwitchWaitingUpdateStatus(); + VLOG(25) << "AddRecord cause quota update, version = " << version_recorder_.GetVersion(); + } + VLOG(25) << DebugPrintTableQuotaList(); + VLOG(25) << DebugPrintDeltaTableList(); + } + return true; +} + +bool MasterQuotaEntry::DelRecord(const std::string& table_name) { + // Delete table quota while table drop + MutexLock l(&mutex_); + auto it = table_quotas_list_.find(table_name); + if (it != table_quotas_list_.end()) { + table_quotas_list_.erase(it); + AddDeltaDropQuota(table_name, &delta_ts_table_quotas_list_); + SwitchWaitingUpdateStatus(); + VLOG(25) << "DelRecord cause quota update, version = " << version_recorder_.GetVersion(); + return true; + } + return false; +} + +void MasterQuotaEntry::BuildReq(QueryRequest* request, const std::string& ts_addr) { + MutexLock l(&mutex_); + if (version_recorder_.NeedUpdate()) { + auto ts_addr_it = delta_ts_table_quotas_list_.find(ts_addr); + if (ts_addr_it != delta_ts_table_quotas_list_.end()) { + for (auto table_quota_it = ts_addr_it->second.begin(); + table_quota_it != ts_addr_it->second.end(); ++table_quota_it) { + TableQuota* table_quota = request->add_table_quotas(); + table_quota->CopyFrom(*(table_quota_it->second)); + } + } + if (SlowdownModeTriggered()) { + request->set_slowdown_write_ratio(flow_control_slowdown_ratio_.load()); + } + + auto write_hard_limit_iter = dfs_write_throughput_hard_limit_list_.find(ts_addr); + if (write_hard_limit_iter != dfs_write_throughput_hard_limit_list_.end()) { + request->set_dfs_write_throughput_hard_limit(write_hard_limit_iter->second); + } + + auto read_hard_limit_iter = dfs_read_throughput_hard_limit_list_.find(ts_addr); + if (read_hard_limit_iter != dfs_read_throughput_hard_limit_list_.end()) { + request->set_dfs_read_throughput_hard_limit(read_hard_limit_iter->second); + } + + request->set_quota_version(version_recorder_.GetVersion()); + VLOG(25) << "BuildReq for Quota, version = " << version_recorder_.GetVersion(); + quota_update_status_ = QuotaUpdateStatus::Updating; + } +} + +void MasterQuotaEntry::ShowQuotaInfo(ShowQuotaResponse* response, bool brief_show) { + MutexLock l(&mutex_); + if (brief_show) { + // show TableQuota + for (auto it = table_quotas_list_.begin(); it != table_quotas_list_.end(); ++it) { + TableQuota* table_quota = response->add_table_quota_list(); + table_quota->CopyFrom(*(it->second)); + } + } else { + // show TsQuota + TsTableQuotaList delta_ts_table_quotas_list; + for (auto it = table_quotas_list_.begin(); it != table_quotas_list_.end(); ++it) { + AddDeltaQuota(it->first, &delta_ts_table_quotas_list); + } + for (auto ts_addr_it = delta_ts_table_quotas_list.begin(); + ts_addr_it != delta_ts_table_quotas_list.end(); ++ts_addr_it) { + TsQuota* ts_quota = response->add_ts_quota_list(); + ts_quota->set_ts_addr(ts_addr_it->first); + for (auto table_quota_it = ts_addr_it->second.begin(); + table_quota_it != ts_addr_it->second.end(); ++table_quota_it) { + TableQuota* table_quota = ts_quota->add_table_quotas(); + table_quota->CopyFrom(*table_quota_it->second); + } + } + } +} + +void MasterQuotaEntry::SetTabletManager( + const std::shared_ptr& tablet_manager) { + MutexLock l(&mutex_); + tablet_manager_ = tablet_manager; +} + +void MasterQuotaEntry::CaculateDeltaQuota(const std::string& table_name) { + MutexLock l(&mutex_); + auto it = table_quotas_list_.find(table_name); + if (it == table_quotas_list_.end()) { + return; + } + AddDeltaQuota(table_name, &delta_ts_table_quotas_list_); + SwitchWaitingUpdateStatus(); + VLOG(25) << "CaculateDeltaQuota cause quota update, version = " << version_recorder_.GetVersion(); +} + +bool MasterQuotaEntry::ClearDeltaQuota() { + MutexLock l(&mutex_); + if (quota_update_status_ == QuotaUpdateStatus::Updating) { + quota_update_status_ = QuotaUpdateStatus::FinishUpdated; + delta_ts_table_quotas_list_.clear(); + return true; + } + return false; +} + +bool MasterQuotaEntry::GetTableQuota(const std::string& table_name, TableQuota* table_quota) { + MutexLock l(&mutex_); + auto iter = table_quotas_list_.find(table_name); + if (iter == table_quotas_list_.end()) { + return false; + } + table_quota->CopyFrom(*iter->second); + return true; +} + +void MasterQuotaEntry::AddDeltaDropQuota(const std::string& table_name, + TsTableQuotaList* delta_ts_table_quotas_list) { + mutex_.AssertHeld(); + master::TablePtr table_ptr; + if (!tablet_manager_ || !tablet_manager_->FindTable(table_name, &table_ptr) || !table_ptr) { + VLOG(25) << "quota AddDeltaQuota couldn't find table[" << table_name << "] in tablet_manager_"; + return; + } + std::map ts_addr_tablets_count; + table_ptr->GetTsAddrTabletsCount(&ts_addr_tablets_count); + for (auto it = ts_addr_tablets_count.begin(); it != ts_addr_tablets_count.end(); ++it) { + std::unique_ptr new_tablet_quota(new TableQuota); + new_tablet_quota->set_table_name(table_name); + new_tablet_quota->set_type(TableQuota::kDelQuota); + TableQuotaList& table_quota_list = (*delta_ts_table_quotas_list)[it->first]; + table_quota_list[table_name].reset(new_tablet_quota.release()); + VLOG(7) << "del quota " << it->first << " for table " << table_name; + } +} + +void MasterQuotaEntry::AddDeltaQuota(const std::string& table_name, + TsTableQuotaList* delta_ts_table_quotas_list) { + mutex_.AssertHeld(); + int64_t tablets_count = 1; + master::TablePtr table_ptr; + if (!tablet_manager_ || !tablet_manager_->FindTable(table_name, &table_ptr) || !table_ptr) { + VLOG(25) << "quota AddDeltaQuota couldn't find table[" << table_name << "] in tablet_manager_"; + return; + } + tablets_count = table_ptr->GetTabletsCount(); + VLOG(25) << "quota AddDeltaQuota table[" << table_name << "] tablets_count[" << tablets_count + << "]"; + + std::unique_ptr tablet_quota(new TableQuota); + tablet_quota->CopyFrom(*table_quotas_list_[table_name]); + int quota_infos_size = tablet_quota->quota_infos_size(); + for (int quota_info_index = 0; quota_info_index < quota_infos_size; ++quota_info_index) { + QuotaInfo* quota_info = tablet_quota->mutable_quota_infos(quota_info_index); + if (quota_info->limit() > 0) { + quota_info->set_limit(std::ceil(quota_info->limit() / static_cast(tablets_count))); + } + } + + // Caclulate the sum quota in ts addr + // 1. get tables count in ts addr + // 2. multiply quota for each table's tablets_count in addr + // 3. set result to delta_list which prepare for request build + + // First step : ts_addr => tablets_count + std::map ts_addr_tablets_count; + table_ptr->GetTsAddrTabletsCount(&ts_addr_tablets_count); + + for (auto it = ts_addr_tablets_count.begin(); it != ts_addr_tablets_count.end(); ++it) { + std::unique_ptr new_tablet_quota(new TableQuota); + new_tablet_quota->CopyFrom(*tablet_quota); + for (int quota_info_index = 0; quota_info_index < quota_infos_size; ++quota_info_index) { + QuotaInfo* quota_info = new_tablet_quota->mutable_quota_infos(quota_info_index); + if (quota_info->limit() > 0) { + // Second step : get limit for each Ts + quota_info->set_limit(quota_info->limit() * it->second); + } + } + + VLOG(25) << "quota AddDeltaQuota ts addr = " << it->first << ", table = " << table_name + << ", tablets count = " << it->second; + + // Third step : set delta_list + TableQuotaList& table_quota_list = (*delta_ts_table_quotas_list)[it->first]; + table_quota_list[table_name].reset(new_tablet_quota.release()); + } + + std::vector all_ts_addr; + tabletnode_manager_->GetAllTabletNodeAddr(&all_ts_addr); + for (auto it = all_ts_addr.cbegin(); it != all_ts_addr.cend(); ++it) { + auto delta_it = delta_ts_table_quotas_list->find(*it); + if (delta_it == delta_ts_table_quotas_list->end()) { + std::unique_ptr new_tablet_quota(new TableQuota); + new_tablet_quota->CopyFrom(*tablet_quota); + for (int quota_info_index = 0; quota_info_index < quota_infos_size; ++quota_info_index) { + QuotaInfo* quota_info = new_tablet_quota->mutable_quota_infos(quota_info_index); + quota_info->set_limit(-1); + quota_info->set_period(-1); + } + TableQuotaList& table_quota_list = (*delta_ts_table_quotas_list)[*it]; + table_quota_list[table_name].reset(new_tablet_quota.release()); + VLOG(7) << "clear quota " << *it << " no table " << table_name; + } else { + VLOG(7) << "reset quota " << *it << " for table " << table_name; + } + } +} + +std::string MasterQuotaEntry::DebugPrintTableQuotaList() { + std::ostringstream table_quota_list_info; + table_quota_list_info + << "-------------------------Globla TableQuota Infos start------------------------\n"; + for (auto it = table_quotas_list_.begin(); it != table_quotas_list_.end(); ++it) { + table_quota_list_info << "############ table[" << it->first << "] ############\n"; + table_quota_list_info << QuotaUtils::DebugPrintTableQuota(*(it->second)); + table_quota_list_info << "################################################\n"; + } + table_quota_list_info + << "-------------------------Globla TableQuota Infos end--------------------------\n"; + return table_quota_list_info.str(); +} + +std::string MasterQuotaEntry::DebugPrintDeltaTableList() { + std::ostringstream delta_table_list_info; + delta_table_list_info + << "-----------------------DeltaTableQuota per Ts Infos start---------------------------\n"; + for (auto ts_addr_it = delta_ts_table_quotas_list_.begin(); + ts_addr_it != delta_ts_table_quotas_list_.end(); ++ts_addr_it) { + for (auto table_quota_it = ts_addr_it->second.begin(); + table_quota_it != ts_addr_it->second.end(); ++table_quota_it) { + delta_table_list_info << "########## ts[" << ts_addr_it->first << "] table[" + << table_quota_it->first << "] ##########\n"; + delta_table_list_info << QuotaUtils::DebugPrintTableQuota(*(table_quota_it->second)); + delta_table_list_info << "#########################################################\n"; + } + } + delta_table_list_info + << "-----------------------DeltaTableQuota per Ts Infos end-----------------------------\n"; + return delta_table_list_info.str(); +} + +void MasterQuotaEntry::SetTabletNodeManager( + const std::shared_ptr& tabletnode_manager) { + MutexLock l(&mutex_); + tabletnode_manager_ = tabletnode_manager; +} + +void MasterQuotaEntry::RefreshClusterFlowControlStatus() { + MutexLock l(&mutex_); + if (!tabletnode_manager_) { + return; + } + auto cluster_dfs_write_bytes_quota = cluster_dfs_write_bytes_quota_.Get(); + auto cluster_dfs_qps_quota = cluster_dfs_qps_quota_.Get(); + double slowdown_write_ratio{std::numeric_limits::max()}; + // Check dfs write through-put triggered flow control + if (cluster_dfs_write_bytes_quota >= 0) { + UpdateDfsWriteBytesQueue(); + auto average_dfs_write_bytes = cluster_dfs_write_bytes_queue_.Average(); + slowdown_write_ratio = (double)cluster_dfs_write_bytes_quota / (average_dfs_write_bytes + 1); + if (average_dfs_write_bytes >= cluster_dfs_write_bytes_quota) { + LOG(WARNING) << "Dfs throughput trigger flow control mode, current: " + << average_dfs_write_bytes << " quota: " << cluster_dfs_write_bytes_quota + << " ratio: " << slowdown_write_ratio; + } + } + + // Check dfs master's qps triggered flow control + if (cluster_dfs_qps_quota >= 0) { + UpdateDfsQpsQueue(); + auto average_dfs_qps = cluster_dfs_qps_queue_.Average(); + slowdown_write_ratio = + std::min((double)cluster_dfs_qps_quota / (average_dfs_qps + 1), slowdown_write_ratio); + if (average_dfs_qps >= cluster_dfs_qps_quota) { + LOG(WARNING) << "Dfs qps trigger flow control mode, current: " << average_dfs_qps + << " quota: " << cluster_dfs_qps_quota << " ratio: " << slowdown_write_ratio; + } + } + + if (slowdown_write_ratio < 1) { + if (!SlowdownModeTriggered()) { + flow_control_mode.Set(1); + LOG(WARNING) << "Enter flow control mode, slow-down cluster write."; + } + LOG(WARNING) << "Set flow control slow down ratio to: " << slowdown_write_ratio; + SetSlowdownWriteRatio(slowdown_write_ratio); + PrepareUpdate(); + VLOG(25) << "RefreshClusterFlowControlStatus cause quota update, version = " + << version_recorder_.GetVersion(); + } else { + if (SlowdownModeTriggered()) { + LOG(WARNING) << "Leave flow control mode."; + flow_control_mode.Set(0); + ResetSlowdownWriteRatio(); + } + } +} + +void MasterQuotaEntry::UpdateDfsWriteBytesQueue() { + mutex_.AssertHeld(); + std::vector tabletnodes; + tabletnode_manager_->GetAllTabletNodeInfo(&tabletnodes); + auto current_dfs_write_size = GetClusterDfsWriteSize(tabletnodes); + cluster_dfs_write_bytes_queue_.Push(current_dfs_write_size); +} + +void MasterQuotaEntry::UpdateDfsQpsQueue() { + mutex_.AssertHeld(); + std::vector tabletnodes; + tabletnode_manager_->GetAllTabletNodeInfo(&tabletnodes); + auto current_dfs_qps = std::accumulate(std::begin(tabletnodes), std::end(tabletnodes), (int64_t)0, + [](int64_t val, const master::TabletNodePtr& ptr) { + if (ptr->info_.has_dfs_master_qps()) { + val += ptr->info_.dfs_master_qps(); + } + return val; + }); + cluster_dfs_qps_queue_.Push(current_dfs_qps); +} + +void MasterQuotaEntry::RefreshDfsHardLimit() { + MutexLock _(&mutex_); + if (!tabletnode_manager_) { + return; + } + std::vector tabletnodes; + tabletnode_manager_->GetAllTabletNodeInfo(&tabletnodes); + if (tabletnodes.empty()) { + return; + } + RefreshDfsWriteThroughputHardLimit(tabletnodes); + RefreshDfsReadThroughputHardLimit(tabletnodes); +} + +void MasterQuotaEntry::RefreshDfsWriteThroughputHardLimit( + const std::vector& nodes) { + mutex_.AssertHeld(); + auto write_hard_limit = GetDfsWriteThroughputHardLimit(); + if (write_hard_limit >= 0) { + // Limit strategy: + // We share half of the hard limit value equally on each ts to guarantee their basic needs. + // And the remaining half of the limit will be set to each ts based on their dfs_write history. + // The reason of this stragety is to keep each ts has some dfs write quota, and maximize the + // cluster's dfs write size meanwhile. + write_hard_limit /= 2; + auto ts_num = nodes.size(); + auto total_write_size = GetClusterDfsWriteSize(nodes); + auto base_limit = write_hard_limit / ts_num; + TsDfsQuotaList new_list; + for (const auto& node : nodes) { + new_list[node->addr_] = base_limit; + if (node->info_.has_dfs_io_w()) { + new_list[node->addr_] += + (double)node->info_.dfs_io_w() / (total_write_size + 1) * write_hard_limit; + } + } + std::swap(dfs_write_throughput_hard_limit_list_, new_list); + PrepareUpdate(); + VLOG(25) << "RefreshDfsWriteThroughputHardLimit cause quota update, version = " + << version_recorder_.GetVersion(); + } else { + dfs_write_throughput_hard_limit_list_.clear(); + } +} + +void MasterQuotaEntry::RefreshDfsReadThroughputHardLimit( + const std::vector& nodes) { + auto read_hard_limit = GetDfsReadThroughputHardLimit(); + if (read_hard_limit >= 0) { + // Same stragety as RefreshDfsWriteThroughputHardLimit does. + read_hard_limit /= 2; + auto ts_num = nodes.size(); + auto total_read_size = GetClusterDfsReadSize(nodes); + auto base_limit = read_hard_limit / ts_num; + TsDfsQuotaList new_list; + for (const auto& node : nodes) { + new_list[node->addr_] = base_limit; + if (node->info_.has_dfs_io_r()) { + new_list[node->addr_] += + (double)node->info_.dfs_io_r() / (total_read_size + 1) * read_hard_limit; + } + } + std::swap(dfs_read_throughput_hard_limit_list_, new_list); + PrepareUpdate(); + VLOG(25) << "RefreshDfsReadThroughputHardLimit cause quota update, version = " + << version_recorder_.GetVersion(); + } else { + dfs_read_throughput_hard_limit_list_.clear(); + } +} + +int64_t MasterQuotaEntry::GetClusterDfsWriteSize(const std::vector& nodes) { + return accumulate(std::begin(nodes), std::end(nodes), (int64_t)0, + [](int64_t val, const master::TabletNodePtr& ptr) { + if (ptr->info_.has_dfs_io_w()) { + val += ptr->info_.dfs_io_w(); + } + return val; + }); +} + +int64_t MasterQuotaEntry::GetClusterDfsReadSize(const std::vector& nodes) { + return accumulate(std::begin(nodes), std::end(nodes), (int64_t)0, + [](int64_t val, const master::TabletNodePtr& ptr) { + if (ptr->info_.has_dfs_io_r()) { + val += ptr->info_.dfs_io_r(); + } + return val; + }); +} +} // namespace quota +} // namespace tera diff --git a/src/quota/master_quota_entry.h b/src/quota/master_quota_entry.h new file mode 100644 index 000000000..bd74b4e37 --- /dev/null +++ b/src/quota/master_quota_entry.h @@ -0,0 +1,159 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include + +#include "access/helpers/version_recorder.h" +#include "common/base/bounded_queue.h" +#include "common/metric/metric_counter.h" +#include "common/mutex.h" +#include "proto/master_rpc.pb.h" +#include "proto/quota.pb.h" +#include "proto/tabletnode_rpc.pb.h" +#include "quota/helpers/master_quota_helper.h" +#include "quota/helpers/quota_utils.h" +#include "master/tabletnode_manager.h" + +namespace tera { + +namespace master { +class TabletManager; +} + +namespace quota { + +enum class QuotaUpdateStatus { WaitUpdate, Updating, FinishUpdated }; + +// map +using TableQuotaList = std::unordered_map>; + +// map> +using TsTableQuotaList = std::unordered_map; + +// map> +using TsDfsQuotaList = std::unordered_map; + +class MasterQuotaEntry { + public: + MasterQuotaEntry() + : quota_update_status_(QuotaUpdateStatus::FinishUpdated), + flow_control_slowdown_ratio_(-1), + cluster_dfs_write_bytes_quota_{ + "dfs_write_bytes_threshold", {tera::Subscriber::SubscriberType::LATEST}, false}, + cluster_dfs_qps_quota_{ + "dfs_qps_threshold", {tera::Subscriber::SubscriberType::LATEST}, false}, + cluster_dfs_qps_queue_(kDfsQueueBoundSize), + cluster_dfs_write_bytes_queue_(kDfsQueueBoundSize), + dfs_write_bytes_hard_limit_(-1), + dfs_read_bytes_hard_limit_(-1) { + version_recorder_.IncVersion(); + } + virtual ~MasterQuotaEntry() {} + + MasterQuotaEntry(MasterQuotaEntry&) = delete; + MasterQuotaEntry& operator=(const MasterQuotaEntry&) = delete; + + // master + void SetTabletManager(const std::shared_ptr& tablet_manager); + void SetTabletNodeManager(const std::shared_ptr& tabletnode_manager); + + // used by quota adjust by manual + bool AddRecord(const std::string& key, const std::string& value); + bool DelRecord(const std::string& key); + + void BuildReq(QueryRequest* request, const std::string& ts_addr); + bool IsSameVersion(uint64_t version) { return version_recorder_.IsSameVersion(version); } + + // Aim to make sure NEED to sync version from master to ts or NOT + // If pass true then means NEED to sync, false then NOT NEED to sync. + void SyncVersion(bool updated) { version_recorder_.SetNeedUpdate(updated); } + + // Caculate delta table_quotas, used by split/merge/move + void CaculateDeltaQuota(const std::string& table_name); + + // Only clear delta list after query sync success all ts, + // and quota_update_status_ still keep Updating. + // If not QuotaUpdateStatus::Updating, it means delta list modified in query dispatch, + // so need to re-dispatch again. + bool ClearDeltaQuota(); + + void RefreshClusterFlowControlStatus(); + + bool GetTableQuota(const std::string& table_name, TableQuota* table_quota); + + void ShowQuotaInfo(ShowQuotaResponse* response, bool brief_show); + + void SetDfsWriteSizeQuota(int64_t quota) { cluster_dfs_write_bytes_quota_.Set(quota); } + void SetDfsQpsQuota(int64_t quota) { cluster_dfs_qps_quota_.Set(quota); } + + void SetDfsWriteThroughputHardLimit(int64_t quota) { dfs_write_bytes_hard_limit_.store(quota); } + void SetDfsReadThroughputHardLimit(int64_t quota) { dfs_read_bytes_hard_limit_.store(quota); } + int64_t GetDfsWriteThroughputHardLimit() { return dfs_write_bytes_hard_limit_.load(); } + int64_t GetDfsReadThroughputHardLimit() { return dfs_read_bytes_hard_limit_.load(); } + void RefreshDfsHardLimit(); + + private: + void AddDeltaQuota(const std::string& table_name, TsTableQuotaList* delta_ts_table_quotas_list); + // Clear ts table quota setting while table drop + void AddDeltaDropQuota(const std::string& table_name, + TsTableQuotaList* delta_ts_table_quotas_list); + void SwitchWaitingUpdateStatus(); + void PrepareUpdate(); + std::string DebugPrintDeltaTableList(); + std::string DebugPrintTableQuotaList(); + + void SetSlowdownWriteRatio(double slowdown_write_ratio) { + flow_control_slowdown_ratio_.store(slowdown_write_ratio); + } + + void ResetSlowdownWriteRatio() { flow_control_slowdown_ratio_.store(-1); } + + bool SlowdownModeTriggered() const { return flow_control_slowdown_ratio_ > 0; } + + void UpdateDfsWriteBytesQueue(); + void UpdateDfsQpsQueue(); + void RefreshDfsWriteThroughputHardLimit(const std::vector&); + void RefreshDfsReadThroughputHardLimit(const std::vector&); + + int64_t GetClusterDfsWriteSize(const std::vector&); + int64_t GetClusterDfsReadSize(const std::vector&); + + private: + TableQuotaList table_quotas_list_; + + // Only clear in all update success + TsTableQuotaList delta_ts_table_quotas_list_; + QuotaUpdateStatus quota_update_status_; + + mutable Mutex mutex_; + std::shared_ptr tablet_manager_; + std::shared_ptr tabletnode_manager_; + + auth::VersionRecorder version_recorder_; + + std::atomic flow_control_slowdown_ratio_; + // Dfs quota, which will trigger user write slowdown when exceed. + MetricCounter cluster_dfs_write_bytes_quota_; + MetricCounter cluster_dfs_qps_quota_; + + common::BoundedQueue cluster_dfs_qps_queue_; + common::BoundedQueue cluster_dfs_write_bytes_queue_; + static constexpr size_t kDfsQueueBoundSize = 10; + + TsDfsQuotaList dfs_write_throughput_hard_limit_list_; + TsDfsQuotaList dfs_read_throughput_hard_limit_list_; + + // Dfs read/write hard limit. It's set by tera client manually and just keep in memory. + // For solving dfs write/read throughput snowslide online. + // -1 means not enabled. + std::atomic dfs_write_bytes_hard_limit_; + std::atomic dfs_read_bytes_hard_limit_; +}; +} // namespace quota +} // namespace tera \ No newline at end of file diff --git a/src/quota/quota_entry.cc b/src/quota/quota_entry.cc new file mode 100644 index 000000000..1ba8daa22 --- /dev/null +++ b/src/quota/quota_entry.cc @@ -0,0 +1,188 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quota/quota_entry.h" +#include "quota/helpers/quota_utils.h" +#include "ts_write_flow_controller.h" +#include + +DECLARE_bool(tera_quota_enabled); +DECLARE_int64(tera_quota_normal_estimate_value); +DECLARE_double(tera_quota_adjust_estimate_ratio); + +namespace tera { +namespace quota { + +bool QuotaEntry::CheckAndConsume(const std::string& table_name, + const OpTypeAmountList& op_type_amount_list) { + if (!FLAGS_tera_quota_enabled) { + VLOG(25) << "tera quota disabled"; + return true; + } + QuotaLimiterPtr limiter; + if (!quota_limiter_container_.GetTableLimiter(table_name, &limiter)) { + VLOG(25) << "quota couldn't find specified table[" << table_name << "] setting"; + return true; + } + return CheckAndConsumeInternal(table_name, op_type_amount_list, limiter); +} + +bool QuotaEntry::Reset(const TableQuota& table_quota) { + std::vector key_list(3); + GetQuotaOperationKey(table_quota.table_name(), kQuotaReadBytes, &key_list[0]); + GetQuotaOperationKey(table_quota.table_name(), kQuotaScanReqs, &key_list[1]); + GetQuotaOperationKey(table_quota.table_name(), kQuotaScanBytes, &key_list[2]); + { + WriteLock l(&rw_mutex_); + for (auto& key : key_list) { + auto iter = estimate_rows_bytes_opkey_.find(key); + if (iter == estimate_rows_bytes_opkey_.end()) { + estimate_rows_bytes_opkey_.emplace(key, FLAGS_tera_quota_normal_estimate_value); + } + } + } + + VLOG(25) << "Reset table[" << table_quota.table_name() + << "] quota :" << QuotaUtils::DebugPrintTableQuota(table_quota); + return quota_limiter_container_.ResetQuota(table_quota); +} + +bool QuotaEntry::CheckAndConsumeInternal(const std::string& table_name, + const OpTypeAmountList& op_type_amount_list, + const QuotaLimiterPtr& limiter) { + Throttle throttle; + for (auto& op_type_amount : op_type_amount_list) { + switch (op_type_amount.first) { + case kQuotaWriteReqs: + throttle.write_reqs = op_type_amount.second; + break; + case kQuotaWriteBytes: + throttle.write_bytes = op_type_amount.second; + break; + case kQuotaReadReqs: + throttle.read_reqs = op_type_amount.second; + throttle.read_bytes = Estimate(table_name, kQuotaReadBytes, op_type_amount.second); + break; + case kQuotaScanReqs: + throttle.scan_reqs = Estimate(table_name, kQuotaScanReqs, op_type_amount.second); + throttle.scan_bytes = Estimate(table_name, kQuotaScanBytes, op_type_amount.second); + break; + case kQuotaReadBytes: + case kQuotaScanBytes: + break; + default: + // error type, no limit + VLOG(25) << "Set wrong quota_op_type[" << op_type_amount.first + << "], return no limit for table[" << table_name << "]"; + return true; + } + } + + VLOG(25) << "QuotaCheckAndConsume details WriteReqs : " << throttle.write_reqs + << ", WriteBytes : " << throttle.write_bytes << ", ReadReqs : " << throttle.read_reqs + << ", ReadBytes : " << throttle.read_bytes << ", ScanReqs : " << throttle.scan_reqs + << ", ScanBytes : " << throttle.scan_bytes; + + if (!limiter->CheckAndConsume(throttle)) { + VLOG(7) << "Quota reach limit for table[" << table_name << "]"; + return false; + } + return true; +} + +void QuotaEntry::GetQuotaOperationKey(const std::string& table_name, QuotaOperationType type, + std::string* key) { + key->clear(); + *key = table_name; + key->push_back('|'); + key->append(QuotaUtils::GetQuotaOperation(type)); +} + +int64_t QuotaEntry::Estimate(const std::string& table_name, QuotaOperationType type, int64_t reqs) { + std::string key; + GetQuotaOperationKey(table_name, type, &key); + int64_t estimate_value = 0; + { + ReadLock l(&rw_mutex_); + auto iter = estimate_rows_bytes_opkey_.find(key); + if (iter != estimate_rows_bytes_opkey_.end()) { + estimate_value = iter->second; + } + } + if (estimate_value <= 0) { + estimate_value = FLAGS_tera_quota_normal_estimate_value; + } + VLOG(25) << "table_name[" << table_name << "] Estimate : " << key << " estimate_value[" + << estimate_value << "]"; + return estimate_value * reqs; +} + +std::string QuotaEntry::DebugEstimateBytes() { + std::ostringstream output; + for (auto it = estimate_rows_bytes_opkey_.begin(); it != estimate_rows_bytes_opkey_.end(); ++it) { + output << it->first << " : " << it->second << "\n"; + } + return output.str(); +} + +void QuotaEntry::Adjust(const std::string& table_name, QuotaOperationType type, + int64_t estimate_value) { + std::string key; + GetQuotaOperationKey(table_name, type, &key); + ReadLock l(&rw_mutex_); + auto iter = estimate_rows_bytes_opkey_.find(key); + if (iter != estimate_rows_bytes_opkey_.end()) { + if (iter->second <= 0) { + iter->second = FLAGS_tera_quota_normal_estimate_value; + } + iter->second = iter->second * FLAGS_tera_quota_adjust_estimate_ratio + + estimate_value * (1 - FLAGS_tera_quota_adjust_estimate_ratio); + VLOG(7) << "Adjust : " << DebugEstimateBytes(); + } +} + +void QuotaEntry::Update(const QueryRequest* request, QueryResponse* response) { + if (request->has_slowdown_write_ratio()) { + TsWriteFlowController::Instance().SetSlowdownMode(request->slowdown_write_ratio()); + } else { + TsWriteFlowController::Instance().ResetSlowdownMode(); + } + + if (request->has_dfs_write_throughput_hard_limit()) { + LOG(WARNING) << "Set dfs write hard limit to " << request->dfs_write_throughput_hard_limit() + << "bytes/s."; + DfsWriteThroughputHardLimiter().EnterFlowControlMode( + request->dfs_write_throughput_hard_limit()); + } else { + DfsWriteThroughputHardLimiter().LeaveFlowControlMode(); + } + + if (request->has_dfs_read_throughput_hard_limit()) { + LOG(WARNING) << "Set dfs read hard limit to " << request->dfs_read_throughput_hard_limit() + << "bytes/s."; + DfsReadThroughputHardLimiter().EnterFlowControlMode(request->dfs_read_throughput_hard_limit()); + } else { + DfsReadThroughputHardLimiter().LeaveFlowControlMode(); + } + + if (request->has_quota_version()) { + VLOG(25) << "ts quota version : " << version_recorder_.GetVersion() + << ", QueryRequest version : " << request->quota_version(); + int32_t table_quotas_size = request->table_quotas_size(); + bool reset_success = true; + for (int32_t table_quotas_index = 0; table_quotas_index < table_quotas_size; + ++table_quotas_index) { + if (!Reset(request->table_quotas(table_quotas_index))) { + reset_success = false; + break; + } + } + if (reset_success) { + version_recorder_.SetVersion(request->quota_version()); + } + } + response->set_quota_version(version_recorder_.GetVersion()); +} +} // namespace quota +} // namespace tera diff --git a/src/quota/quota_entry.h b/src/quota/quota_entry.h new file mode 100644 index 000000000..91643957f --- /dev/null +++ b/src/quota/quota_entry.h @@ -0,0 +1,74 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include +#include "access/helpers/version_recorder.h" +#include "common/rwmutex.h" +#include "proto/quota.pb.h" +#include "proto/tabletnode_rpc.pb.h" +#include "quota/quota_limiter_container.h" +#include "quota/flow_controller.h" + +namespace tera { + +namespace quota { + +using OpTypeAmountPair = std::pair; +using OpTypeAmountList = std::vector; + +class QuotaEntry { + public: + QuotaEntry() {} + virtual ~QuotaEntry() {} + + QuotaEntry(QuotaEntry&) = delete; + QuotaEntry& operator=(const QuotaEntry&) = delete; + + bool CheckAndConsume(const std::string& table_name, const OpTypeAmountList& op_type_amount_list); + + void Update(const QueryRequest* request, QueryResponse* response); + + // Adjust read and scan bytes&reqs per request every time + void Adjust(const std::string& table_name, QuotaOperationType type, int64_t estimate_value); + + private: + // clear quota and set a new one + bool Reset(const TableQuota& table_quota); + + // if out of quota , return false + // otherwise, consume quota and return true + bool CheckAndConsumeInternal(const std::string& table_name, + const OpTypeAmountList& op_type_amount_list, + const QuotaLimiterPtr& limiter); + + void GetQuotaOperationKey(const std::string& table_name, QuotaOperationType type, + std::string* key); + + // estimate read/scan throughput + int64_t Estimate(const std::string& table_name, QuotaOperationType type, int64_t reqs); + + std::string DebugEstimateBytes(); + + private: + QuotaLimiterContainer quota_limiter_container_; + + // + // QuotaOperationKey : table_name|type, only for read/scan operation + // key | value + // table_name|kQuotaReadBytes | estimate bytes for each read rpc + // table_name|kQuotaScanReqs | estimate row num for each scan rpc + // table_name|kQuotaScanBytes | estimate bytes for each scan rpc + // Notice: scan reqs has error about 15% cause by int64_t + // If need more precise, should use double + std::unordered_map estimate_rows_bytes_opkey_; + mutable RWMutex rw_mutex_; + + auth::VersionRecorder version_recorder_; +}; +} // namespace quota +} // namespace tera diff --git a/src/quota/quota_limiter_container.cc b/src/quota/quota_limiter_container.cc new file mode 100644 index 000000000..006eb2a53 --- /dev/null +++ b/src/quota/quota_limiter_container.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quota/quota_limiter_container.h" +#include "quota/limiter/limiter_factory.h" +#include + +DECLARE_string(tera_quota_limiter_type); + +namespace tera { +namespace quota { + +bool QuotaLimiterContainer::GetTableLimiter(const std::string& table_name, + QuotaLimiterPtr* limiter) const { + ReadLock l(&rw_mutex_); + auto it = table_quotas_.find(table_name); + if (it != table_quotas_.end()) { + *limiter = it->second; + return true; + } + return false; +} + +bool QuotaLimiterContainer::ResetQuota(const TableQuota& table_quota) { + WriteLock l(&rw_mutex_); + const std::string& table_name = table_quota.table_name(); + auto it = table_quotas_.find(table_name); + if (TableQuota::kDelQuota == table_quota.type()) { + if (it != table_quotas_.end()) { + table_quotas_.erase(it); + VLOG(7) << "del quota " << table_name; + } + return true; + } + if (it == table_quotas_.end()) { + QuotaLimiterPtr new_limiter( + LimiterFactory::CreateQuotaLimiter(FLAGS_tera_quota_limiter_type, table_name)); + if (!new_limiter) { + VLOG(30) << "quota table[" << table_name + << "] QuotaLimiterContainer CreateQuotaLimiter failed!"; + return false; + } + table_quotas_.emplace(std::make_pair(table_name, new_limiter)); + VLOG(30) << "quota setting table[" << table_name << "] first time"; + } + table_quotas_[table_name]->Reset(table_quota); + return true; +} +} // namespace quota +} // namespace tera diff --git a/src/quota/quota_limiter_container.h b/src/quota/quota_limiter_container.h new file mode 100644 index 000000000..4c53932bd --- /dev/null +++ b/src/quota/quota_limiter_container.h @@ -0,0 +1,32 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include +#include +#include "common/rwmutex.h" +#include "proto/quota.pb.h" +#include "quota/limiter/quota_limiter.h" + +namespace tera { +namespace quota { + +using QuotaLimiterPtr = std::shared_ptr; + +class QuotaLimiterContainer { + public: + explicit QuotaLimiterContainer() {} + virtual ~QuotaLimiterContainer() {} + + bool GetTableLimiter(const std::string& table_name, QuotaLimiterPtr* limiter) const; + + bool ResetQuota(const TableQuota& table_quota); + + private: + std::unordered_map table_quotas_; + mutable RWMutex rw_mutex_; +}; +} +} diff --git a/src/quota/test/master_quota_entry_test.cc b/src/quota/test/master_quota_entry_test.cc new file mode 100644 index 000000000..d66a0b80c --- /dev/null +++ b/src/quota/test/master_quota_entry_test.cc @@ -0,0 +1,213 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include +#include "quota/master_quota_entry.h" +#include "master/master_env.h" + +DECLARE_string(tera_quota_limiter_type); + +namespace tera { +namespace quota { +namespace test { + +static const std::string test_table("test"); +static const int64_t quota_limit = 3000; +static const int64_t consume_amount = 2000; +static const int64_t quota_period = 2; +static const int64_t default_quota_period = 1; +static const std::string server_addr("abc.baidu.com:9001"); + +class QuotaBuilder { + public: + QuotaBuilder() {} + + virtual ~QuotaBuilder() {} + + static TableQuota BuildTableQuota() { + // build table_quota + TableQuota table_quota; + table_quota.set_table_name(test_table); + table_quota.set_type(TableQuota::kSetQuota); + + // add write req limit 1s + QuotaInfo* quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaWriteReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add write bytes 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaWriteBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaReadReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 1s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaReadBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan req limit 1s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaScanReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan bytes limit 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaScanBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + return std::move(table_quota); + } +}; + +class MasterQuotaEntryTest : public ::testing::Test { + public: + MasterQuotaEntryTest() + : tablet_manager_(new master::TabletManager(nullptr, nullptr, nullptr)), + tabletnode_manager_(new master::TabletNodeManager(nullptr)) { + InitMasterEnv(); + + TableSchema schema; + master::TablePtr table_ptr( + master::TabletManager::CreateTable(test_table, schema, kTableEnable)); + + TabletMeta tablet_meta; + KeyRange* key_range = tablet_meta.mutable_key_range(); + key_range->set_key_start("aaa"); + key_range->set_key_end("zzz"); + tablet_meta.set_path("tablet0001"); + tablet_meta.set_server_addr(server_addr); + tablet_meta.set_status(TabletMeta::kTabletOffline); + master::TabletPtr tablet_ptr(new master::Tablet(tablet_meta, table_ptr)); + table_ptr->AddTablet(tablet_meta, nullptr); + tablet_manager_->AddTable(table_ptr, nullptr); + tabletnode_manager_->AddTabletNode(server_addr, "123"); + } + + virtual ~MasterQuotaEntryTest() {} + + MasterQuotaEntry* NewMasterQuotaEntry() { + FLAGS_tera_quota_limiter_type = "general_quota_limiter"; + quota_entry_.reset(new MasterQuotaEntry); + return quota_entry_.get(); + } + + MasterQuotaEntry* NewNotGeneralMasterQuotaEntry() { + FLAGS_tera_quota_limiter_type = "not_general_quota_limiter"; + quota_entry_.reset(new MasterQuotaEntry); + return quota_entry_.get(); + } + + std::shared_ptr GetTabletManager() { return tablet_manager_; } + std::shared_ptr GetTabletNodeManager() { return tabletnode_manager_; } + + private: + void InitMasterEnv() { + master::MasterEnv().Init(nullptr, tabletnode_manager_, tablet_manager_, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, + std::shared_ptr( + new master::TabletAvailability(tablet_manager_)), + nullptr); + // push one element to the queue, avoiding call TryMoveTablet while call + // SuspendMetaOperation + master::MasterEnv().meta_task_queue_.push(nullptr); + } + + private: + std::unique_ptr quota_entry_; + std::shared_ptr tablet_manager_; + std::shared_ptr tabletnode_manager_; +}; + +TEST_F(MasterQuotaEntryTest, AddRecord) { + MasterQuotaEntry* quota_entry = NewMasterQuotaEntry(); + EXPECT_FALSE(quota_entry->AddRecord("", "")); + EXPECT_FALSE(quota_entry->AddRecord(test_table, "")); + + TableQuota nullptr_table_quota; + EXPECT_FALSE(MasterQuotaHelper::NewMetaRecordFromQuota(nullptr_table_quota)); + + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(table_quota.type() == TableQuota::kSetQuota); + std::unique_ptr meta_write_record( + MasterQuotaHelper::NewMetaRecordFromQuota(table_quota)); + EXPECT_TRUE(nullptr != meta_write_record.get()); + EXPECT_TRUE("" == MasterQuotaHelper::GetTableNameFromMetaKey("")); + EXPECT_TRUE(test_table == MasterQuotaHelper::GetTableNameFromMetaKey(meta_write_record->key)); + + EXPECT_TRUE(nullptr == MasterQuotaHelper::NewTableQuotaFromMetaValue("")); + EXPECT_TRUE(nullptr != MasterQuotaHelper::NewTableQuotaFromMetaValue(meta_write_record->value)); + + EXPECT_FALSE(quota_entry->AddRecord(meta_write_record->key, meta_write_record->value)); + + quota_entry->SetTabletManager(GetTabletManager()); + quota_entry->SetTabletNodeManager(GetTabletNodeManager()); + EXPECT_TRUE(quota_entry->AddRecord(meta_write_record->key, meta_write_record->value)); + + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::WaitUpdate); + EXPECT_TRUE(quota_entry->version_recorder_.NeedUpdate()); + + // build request + QueryRequest req; + quota_entry->BuildReq(&req, server_addr); + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::Updating); + EXPECT_TRUE(req.table_quotas_size() == 1); + EXPECT_TRUE(quota_entry->delta_ts_table_quotas_list_.size() == 1); + quota_entry->ClearDeltaQuota(); + EXPECT_TRUE(quota_entry->delta_ts_table_quotas_list_.size() == 0); + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::FinishUpdated); +} + +TEST_F(MasterQuotaEntryTest, CaculateDeltaQuota) { + MasterQuotaEntry* quota_entry = NewMasterQuotaEntry(); + quota_entry->SetTabletManager(GetTabletManager()); + quota_entry->SetTabletNodeManager(GetTabletNodeManager()); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + std::unique_ptr meta_write_record( + MasterQuotaHelper::NewMetaRecordFromQuota(table_quota)); + EXPECT_TRUE(quota_entry->AddRecord(meta_write_record->key, meta_write_record->value)); + quota_entry->ClearDeltaQuota(); + + quota_entry->CaculateDeltaQuota(test_table); + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::WaitUpdate); + EXPECT_TRUE(quota_entry->version_recorder_.NeedUpdate()); + + // build request + QueryRequest req; + quota_entry->BuildReq(&req, server_addr); + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::Updating); + EXPECT_TRUE(req.table_quotas_size() == 1); + EXPECT_TRUE(quota_entry->delta_ts_table_quotas_list_.size() == 1); + quota_entry->ClearDeltaQuota(); + EXPECT_TRUE(quota_entry->delta_ts_table_quotas_list_.size() == 0); + EXPECT_TRUE(quota_entry->quota_update_status_ == QuotaUpdateStatus::FinishUpdated); +} + +TEST_F(MasterQuotaEntryTest, DelRecord) { + MasterQuotaEntry* quota_entry = NewMasterQuotaEntry(); + quota_entry->SetTabletManager(GetTabletManager()); + quota_entry->SetTabletNodeManager(GetTabletNodeManager()); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + std::unique_ptr meta_write_record( + MasterQuotaHelper::NewMetaRecordFromQuota(table_quota)); + EXPECT_TRUE(quota_entry->AddRecord(meta_write_record->key, meta_write_record->value)); + EXPECT_TRUE(quota_entry->DelRecord(test_table)); +} +} +} +} diff --git a/src/quota/test/master_quota_helper_test.cc b/src/quota/test/master_quota_helper_test.cc new file mode 100644 index 000000000..c5b18b2f4 --- /dev/null +++ b/src/quota/test/master_quota_helper_test.cc @@ -0,0 +1,131 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include +#include "quota/master_quota_entry.h" +#include "master/master_env.h" +#include "quota/helpers/master_quota_helper.h" + +namespace tera { +namespace quota { +namespace test { + +static const std::string test_table("test"); +static const int64_t quota_limit = 3000; +static const int64_t consume_amount = 2000; +static const int64_t quota_period = 2; +static const int64_t default_quota_period = 1; + +class QuotaBuilder { + public: + QuotaBuilder() {} + + virtual ~QuotaBuilder() {} + + static TableQuota* BuildTableQuota2() { + // build table_quota + std::unique_ptr table_quota(new TableQuota); + table_quota->set_table_name(test_table); + table_quota->set_type(TableQuota::kDelQuota); + + // add write req limit 1s + QuotaInfo* quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaWriteReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add write bytes 2s + quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaWriteBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 2s + quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaReadReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 1s + quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaReadBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan req limit 1s + quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaScanReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan bytes limit 2s + quota_info = table_quota->add_quota_infos(); + quota_info->set_type(kQuotaScanBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + EXPECT_TRUE(table_quota->type() == TableQuota::kDelQuota); + return table_quota.release(); + } +}; + +class MaterQuotaHelperTest : public ::testing::Test { + public: + MaterQuotaHelperTest() {} + virtual ~MaterQuotaHelperTest() {} +}; + +TEST_F(MaterQuotaHelperTest, SetDefaultQuotaInfo) { + std::unique_ptr quota_info(new QuotaInfo); + MasterQuotaHelper::SetDefaultQuotaInfo(quota_info.get(), kQuotaWriteReqs); + EXPECT_TRUE(quota_info->type() == kQuotaWriteReqs); + EXPECT_TRUE(quota_info->limit() == -1); + EXPECT_TRUE(quota_info->period() == 1); +} + +TEST_F(MaterQuotaHelperTest, MetaWriteRecord) { + std::unique_ptr table_quota(QuotaBuilder::BuildTableQuota2()); + EXPECT_TRUE(table_quota->type() == TableQuota::kDelQuota); + std::unique_ptr meta_write_record( + MasterQuotaHelper::NewMetaRecordFromQuota(*table_quota)); + EXPECT_TRUE(!!meta_write_record); + EXPECT_TRUE(MasterQuotaHelper::GetTableNameFromMetaKey(meta_write_record->key) == test_table); + EXPECT_TRUE((meta_write_record->value).size() > 0); + std::unique_ptr table_quota2( + MasterQuotaHelper::NewTableQuotaFromMetaValue(meta_write_record->value)); + EXPECT_TRUE(!!table_quota2); + EXPECT_TRUE(table_quota2->type() == TableQuota::kDelQuota); + EXPECT_TRUE(table_quota2->table_name() == table_quota->table_name()); + EXPECT_TRUE(table_quota2->quota_infos_size() == table_quota->quota_infos_size()); +} + +TEST_F(MaterQuotaHelperTest, MergeTableQuota) { + std::unique_ptr table_quota(QuotaBuilder::BuildTableQuota2()); + std::unique_ptr target_table_quota(new TableQuota); + + target_table_quota->set_table_name(test_table); + // peroid = 1, limit = -1 + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaWriteReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaWriteBytes); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaReadReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaReadBytes); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaScanReqs); + quota::MasterQuotaHelper::SetDefaultQuotaInfo(target_table_quota->add_quota_infos(), + kQuotaScanBytes); + + EXPECT_TRUE(MasterQuotaHelper::MergeTableQuota(*table_quota, target_table_quota.get())); + EXPECT_FALSE(MasterQuotaHelper::MergeTableQuota(*table_quota, target_table_quota.get())); +} +} +} +} diff --git a/src/quota/test/quota_entry_test.cc b/src/quota/test/quota_entry_test.cc new file mode 100644 index 000000000..2459d58e5 --- /dev/null +++ b/src/quota/test/quota_entry_test.cc @@ -0,0 +1,250 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include +#include +#include "quota/quota_entry.h" + +DECLARE_int64(tera_quota_normal_estimate_value); +DECLARE_string(tera_quota_limiter_type); + +namespace tera { +namespace quota { +namespace test { + +static const std::string test_table("test"); +static const int64_t quota_limit = 3000; +static const int64_t consume_amount = 2000; +static const int64_t quota_period = 2; +static const int64_t default_quota_period = 1; + +class QuotaBuilder { + public: + QuotaBuilder() {} + + virtual ~QuotaBuilder() {} + + static TableQuota BuildTableQuota() { + // build table_quota + TableQuota table_quota; + table_quota.set_table_name(test_table); + + // add write req limit 1s + QuotaInfo* quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaWriteReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add write bytes 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaWriteBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaReadReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + // add read req limit 1s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaReadBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan req limit 1s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaScanReqs); + quota_info->set_limit(quota_limit); + quota_info->set_period(default_quota_period); + + // add scan bytes limit 2s + quota_info = table_quota.add_quota_infos(); + quota_info->set_type(kQuotaScanBytes); + quota_info->set_limit(quota_limit); + quota_info->set_period(quota_period); + + return std::move(table_quota); + } +}; + +class QuotaEntryTest : public ::testing::Test { + public: + QuotaEntryTest() {} + + virtual ~QuotaEntryTest() {} + + QuotaEntry* NewQuotaEntry() { + FLAGS_tera_quota_limiter_type = "general_quota_limiter"; + quota_entry_.reset(new QuotaEntry); + return quota_entry_.get(); + } + + QuotaEntry* NewNotGeneralQuotaEntry() { + FLAGS_tera_quota_limiter_type = "not_general_quota_limiter"; + quota_entry_.reset(new QuotaEntry); + return quota_entry_.get(); + } + + void ReadEstimateBytesPerReqMap(const TableQuota& table_quota) { + int64_t bytes = consume_amount * FLAGS_tera_quota_normal_estimate_value; + EXPECT_TRUE(bytes == + quota_entry_->Estimate(table_quota.table_name(), kQuotaReadBytes, consume_amount)); + } + + void WriteEstimateBytesPerReqMap(const TableQuota& table_quota) { + EXPECT_TRUE(quota_entry_->Reset(table_quota)); + } + + void EstimateBytesPerReqMapTest(const TableQuota& table_quota, int job_num) { + std::vector threads; + for (int i = 0; i < job_num; ++i) { + threads.emplace_back( + std::bind(&QuotaEntryTest::ReadEstimateBytesPerReqMap, this, table_quota)); + threads.emplace_back( + std::bind(&QuotaEntryTest::WriteEstimateBytesPerReqMap, this, table_quota)); + } + for (auto& t : threads) { + t.join(); + } + } + + void MultiResetQuota(const TableQuota& table_quota) { + EXPECT_TRUE(quota_entry_->Reset(table_quota)); + } + + void MultiCheckAndConsumeQuota(const TableQuota& table_quota) { + quota_entry_->CheckAndConsume(table_quota.table_name(), OpTypeAmountList{std::make_pair( + kQuotaWriteBytes, consume_amount)}); + } + + void MultiResetCheckAndConsumeQuota(const TableQuota& table_quota, int job_num) { + std::vector threads; + for (int i = 0; i < job_num; ++i) { + threads.emplace_back(std::bind(&QuotaEntryTest::MultiResetQuota, this, table_quota)); + threads.emplace_back( + std::bind(&QuotaEntryTest::MultiCheckAndConsumeQuota, this, table_quota)); + } + for (auto& t : threads) { + t.join(); + } + } + + private: + std::unique_ptr quota_entry_; +}; + +TEST_F(QuotaEntryTest, NotGeneralQuotaLimitType) { + QuotaEntry* quota_entry = NewNotGeneralQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_FALSE(quota_entry->Reset(table_quota)); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableWriteReqQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteReqs, consume_amount)})); + EXPECT_FALSE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteReqs, consume_amount)})); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableWriteBytePeriodQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteBytes, consume_amount)})); + EXPECT_FALSE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteBytes, consume_amount)})); + + std::chrono::seconds wait_sec(quota_period); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteBytes, consume_amount)})); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaWriteBytes, consume_amount)})); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableReadReqPeriodQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaReadReqs, consume_amount)})); + EXPECT_FALSE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaReadReqs, consume_amount)})); + + std::chrono::seconds wait_sec(quota_period); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaReadReqs, consume_amount)})); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaReadReqs, consume_amount)})); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableReadByteQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaReadBytes, consume_amount)})); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableScanReqQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaScanReqs, consume_amount)})); + EXPECT_FALSE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaScanReqs, consume_amount)})); +} + +TEST_F(QuotaEntryTest, ResetCheckAndConsumeTableScanBytePeriodQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EXPECT_TRUE(quota_entry->Reset(table_quota)); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaScanBytes, consume_amount)})); + + std::chrono::seconds wait_sec(quota_period); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaScanBytes, consume_amount)})); + std::this_thread::sleep_for(wait_sec); + EXPECT_TRUE(quota_entry->CheckAndConsume( + test_table, OpTypeAmountList{std::make_pair(kQuotaScanBytes, consume_amount)})); +} + +TEST_F(QuotaEntryTest, EstimateBytesPerReqMap) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + EstimateBytesPerReqMapTest(table_quota, 10); +} + +TEST_F(QuotaEntryTest, MultiResetCheckAndConsumeQuota) { + QuotaEntry* quota_entry = NewQuotaEntry(); + TableQuota table_quota = QuotaBuilder::BuildTableQuota(); + MultiResetCheckAndConsumeQuota(table_quota, 10); +} + +TEST_F(QuotaEntryTest, Update) { + QuotaEntry* quota_entry = NewQuotaEntry(); + std::unique_ptr request(new QueryRequest); + std::unique_ptr response(new QueryResponse); + request->set_quota_version(1); + quota_entry->Update(request.get(), response.get()); + EXPECT_TRUE(1 == response->quota_version()); +} +} +} +} diff --git a/src/quota/test/ts_write_flow_controller_test.cc b/src/quota/test/ts_write_flow_controller_test.cc new file mode 100644 index 000000000..89ac0d126 --- /dev/null +++ b/src/quota/test/ts_write_flow_controller_test.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include "common/this_thread.h" +#include "quota/ts_write_flow_controller.h" + +namespace tera { +namespace test { + +TEST(TsWriteFlowControllerTest, BaseTest) { + auto& wfc = TsWriteFlowController::Instance(); + wfc.Append(0, 4); + wfc.Append(0, 4); + wfc.Append(0, 4); + wfc.Append(0, 4); + EXPECT_EQ(wfc.write_throughput_queue_.size(), 4); + EXPECT_FALSE(wfc.InSlowdownMode()); + wfc.SetSlowdownMode(0.5); + EXPECT_TRUE(wfc.InSlowdownMode()); + EXPECT_EQ(wfc.flow_controller_.limiter_.limit_, 2); + wfc.Append(700000, 3); + wfc.Append(700000, 3); + EXPECT_EQ(wfc.write_throughput_queue_.size(), 2); + wfc.SetSlowdownMode(0.5); + EXPECT_TRUE(wfc.InSlowdownMode()); + EXPECT_EQ(wfc.flow_controller_.limiter_.limit_, 1); + wfc.Append(1900000, 8); + wfc.Append(1900000, 8); + wfc.Append(1900000, 10); + wfc.Append(1900000, 10); + EXPECT_EQ(wfc.write_throughput_queue_.size(), 4); + wfc.SetSlowdownMode(0.8); + EXPECT_TRUE(wfc.InSlowdownMode()); + EXPECT_EQ(wfc.flow_controller_.limiter_.limit_, 7); + wfc.flow_controller_.stop_event_.Set(); + wfc.flow_controller_.limiter_.ResetQuota(); + EXPECT_TRUE(wfc.TryWrite(3)); + EXPECT_TRUE(wfc.TryWrite(4)); + EXPECT_FALSE(wfc.TryWrite(2)); + EXPECT_TRUE(wfc.InSlowdownMode()); + wfc.ResetSlowdownMode(); + EXPECT_FALSE(wfc.InSlowdownMode()); + EXPECT_EQ(wfc.flow_controller_.status_, FlowController::FlowControlStatus::kNormal); + EXPECT_TRUE(wfc.TryWrite(10000)); +} +} +} diff --git a/src/quota/ts_write_flow_controller.cc b/src/quota/ts_write_flow_controller.cc new file mode 100644 index 000000000..2da0cd70e --- /dev/null +++ b/src/quota/ts_write_flow_controller.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "ts_write_flow_controller.h" +#include + +namespace tera { +// Ts write flow controller in singleton + +TsWriteFlowController &TsWriteFlowController::Instance() { + static TsWriteFlowController write_flow_controller; + return write_flow_controller; +} + +void TsWriteFlowController::SetSlowdownMode(double ratio) { + std::lock_guard _(mu_); + uint64_t total_write_bytes = std::accumulate( + std::begin(write_throughput_queue_), std::end(write_throughput_queue_), (uint64_t)0, + [](uint64_t val, const TimeValuePair &pr) { return val + pr.second; }); + auto write_quota = + static_cast(total_write_bytes * ratio / write_throughput_queue_.size()); + if (!flow_controller_.InFlowControlMode()) { + LOG(WARNING) << "Enter slow-down write mode."; + } + + LOG(WARNING) << "Set write flow limit to " << write_quota + << " bytes, total_write_bytes: " << total_write_bytes << " ratio: " << ratio; + flow_controller_.EnterFlowControlMode(write_quota); + current_write_flow_limit_.Set(write_quota); +} + +void TsWriteFlowController::ResetSlowdownMode() { + std::lock_guard _(mu_); + if (flow_controller_.InFlowControlMode()) { + LOG(WARNING) << "Exit slow-down write mode."; + current_write_flow_limit_.Set(-1); + } + flow_controller_.LeaveFlowControlMode(); +} +} diff --git a/src/quota/ts_write_flow_controller.h b/src/quota/ts_write_flow_controller.h new file mode 100644 index 000000000..5a0de40e0 --- /dev/null +++ b/src/quota/ts_write_flow_controller.h @@ -0,0 +1,58 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#pragma once +#include "flow_controller.h" + +namespace tera { +class TsWriteFlowController { + private: + // 10 min + static constexpr uint64_t kHoldWriteThroughputSeconds = 600; + + public: + using TimeValuePair = std::pair; + static TsWriteFlowController& Instance(); + + void Append(uint64_t timestamp_ms, uint64_t write_throughput) { + std::lock_guard _(mu_); + write_throughput_queue_.emplace_back(timestamp_ms, write_throughput); + DropExpiredValue(); + } + + void SetSlowdownMode(double ratio); + + void ResetSlowdownMode(); + + bool TryWrite(uint64_t size) { return flow_controller_.TryConsume(size); } + + bool InSlowdownMode() { return flow_controller_.InFlowControlMode(); } + + private: + TsWriteFlowController() + : flow_controller_{0, 1000}, + current_write_flow_limit_{ + "ts_write_flow_limit", {tera::Subscriber::SubscriberType::LATEST}, false} { + current_write_flow_limit_.Set(-1); + } + + // Protected by mu_ + void DropExpiredValue() { + if (write_throughput_queue_.empty()) { + return; + } + + auto last_enqueue_ts = write_throughput_queue_.back().first; + while (last_enqueue_ts - write_throughput_queue_.front().first >= + kHoldWriteThroughputSeconds * 1000) { + write_throughput_queue_.pop_front(); + } + } + + private: + std::mutex mu_; + std::deque write_throughput_queue_; + FlowController flow_controller_; + MetricCounter current_write_flow_limit_; +}; +} \ No newline at end of file diff --git a/src/sample/atomic_sample.cc b/src/sample/atomic_sample.cc index 3053ec8b4..8b011d2a0 100644 --- a/src/sample/atomic_sample.cc +++ b/src/sample/atomic_sample.cc @@ -3,61 +3,61 @@ #include "tera.h" int main() { - tera::ErrorCode error_code; - - // Get a client instance - tera::Client* client = tera::Client::NewClient("./tera.flag", "atomic_sample", &error_code); - assert(client); - - // Create table - tera::TableDescriptor schema("atomic_sample"); - schema.AddLocalityGroup("lg0"); - schema.AddColumnFamily("cnt"); - schema.AddColumnFamily("sum"); - client->CreateTable(schema, &error_code); - // CreateTable status check is intentionally ignored to support repeated call. - - // Open table - tera::Table* table = client->OpenTable("atomic_sample", &error_code); - assert(table); - - // init a row - tera::RowMutation* init = table->NewRowMutation("key1"); - init->Put("cnt", "", tera::CounterCoding::EncodeCounter(10)); - init->Put("sum", "", tera::CounterCoding::EncodeCounter(100)); - table->ApplyMutation(init); - assert(init->GetError().GetType() == tera::ErrorCode::kOK); - delete init; - - // add into the row - tera::RowMutation* mutation = table->NewRowMutation("key1"); - mutation->Add("cnt", "", 1); - mutation->Add("sum", "", 20); - table->ApplyMutation(mutation); - assert(mutation->GetError().GetType() == tera::ErrorCode::kOK); - delete mutation; - - // read the row - tera::RowReader* reader = table->NewRowReader("key1"); - table->Get(reader); - assert(reader->GetError().GetType() == tera::ErrorCode::kOK); - - int64_t counter = 0; - while (!reader->Done()) { - if (reader->Family() == "cnt") { - tera::CounterCoding::DecodeCounter(reader->Value(), &counter); - assert(counter == 11); - } else if (reader->Family() == "sum") { - tera::CounterCoding::DecodeCounter(reader->Value(), &counter); - assert(counter == 120); - } - std::cout << reader->Family() << ": " << counter << std::endl; - reader->Next(); + tera::ErrorCode error_code; + + // Get a client instance + tera::Client* client = tera::Client::NewClient("./tera.flag", "atomic_sample", &error_code); + assert(client); + + // Create table + tera::TableDescriptor schema("atomic_sample"); + schema.AddLocalityGroup("lg0"); + schema.AddColumnFamily("cnt"); + schema.AddColumnFamily("sum"); + client->CreateTable(schema, &error_code); + // CreateTable status check is intentionally ignored to support repeated call. + + // Open table + tera::Table* table = client->OpenTable("atomic_sample", &error_code); + assert(table); + + // init a row + tera::RowMutation* init = table->NewRowMutation("key1"); + init->Put("cnt", "", tera::CounterCoding::EncodeCounter(10)); + init->Put("sum", "", tera::CounterCoding::EncodeCounter(100)); + table->ApplyMutation(init); + assert(init->GetError().GetType() == tera::ErrorCode::kOK); + delete init; + + // add into the row + tera::RowMutation* mutation = table->NewRowMutation("key1"); + mutation->Add("cnt", "", 1); + mutation->Add("sum", "", 20); + table->ApplyMutation(mutation); + assert(mutation->GetError().GetType() == tera::ErrorCode::kOK); + delete mutation; + + // read the row + tera::RowReader* reader = table->NewRowReader("key1"); + table->Get(reader); + assert(reader->GetError().GetType() == tera::ErrorCode::kOK); + + int64_t counter = 0; + while (!reader->Done()) { + if (reader->Family() == "cnt") { + tera::CounterCoding::DecodeCounter(reader->Value(), &counter); + assert(counter == 11); + } else if (reader->Family() == "sum") { + tera::CounterCoding::DecodeCounter(reader->Value(), &counter); + assert(counter == 120); } - delete reader; - - // Close - delete table; - delete client; - return 0; + std::cout << reader->Family() << ": " << counter << std::endl; + reader->Next(); + } + delete reader; + + // Close + delete table; + delete client; + return 0; } diff --git a/src/sample/global_txn_async_sample.cc b/src/sample/global_txn_async_sample.cc index a2f77896e..0f0dfccf5 100644 --- a/src/sample/global_txn_async_sample.cc +++ b/src/sample/global_txn_async_sample.cc @@ -11,133 +11,131 @@ std::string read_result = ""; std::atomic all_gtxn_thread_done(false); std::atomic finish_cnt(0); - + struct RowReaderContext { - tera::Transaction* gtxn; - tera::Table* t1; - tera::Table* t2; + tera::Transaction* gtxn; + tera::Table* t1; + tera::Table* t2; }; tera::Table* InitTable(tera::Client* client, const std::string& tablename) { - tera::ErrorCode error_code; - if (!client->IsTableExist(tablename, &error_code)) { - tera::TableDescriptor schema(tablename); - schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->DisableGlobalTransaction(); - client->CreateTable(schema, &error_code); - assert(error_code.GetType() == tera::ErrorCode::kOK); - } - - tera::Table* table = client->OpenTable(tablename, &error_code); - assert(table && error_code.GetType() == tera::ErrorCode::kOK); - return table; + tera::ErrorCode error_code; + if (!client->IsTableExist(tablename, &error_code)) { + tera::TableDescriptor schema(tablename); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } + + tera::Table* table = client->OpenTable(tablename, &error_code); + assert(table && error_code.GetType() == tera::ErrorCode::kOK); + return table; } void TxnCallBack(tera::Transaction* txn) { - if (txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << "txn failed, start_ts= " << txn->GetStartTimestamp() - << ", reason= " << txn->GetError().ToString() - << std::endl; - } else { - std::cout << "gtxn success" << std::endl; - } - delete txn; - all_gtxn_thread_done.store(true); + if (txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "txn failed, start_ts= " << txn->GetStartTimestamp() + << ", reason= " << txn->GetError().ToString() << std::endl; + } else { + std::cout << "gtxn success" << std::endl; + } + delete txn; + all_gtxn_thread_done.store(true); } void ReadRowCallBack(tera::RowReader* row_reader) { - RowReaderContext* ctx = (RowReaderContext*)row_reader->GetContext(); - while (!row_reader->Done()) { - printf("Row: %s\%s\%ld\%s\n", - row_reader->RowName().c_str(), row_reader->ColumnName().c_str(), - row_reader->Timestamp(), row_reader->Value().c_str()); - row_reader->Next(); - read_result += row_reader->Value(); - } - delete row_reader; - ++finish_cnt; - // mutations begin at all reader callback done - if (finish_cnt.load() == 2) { - // write to other columns - tera::Transaction* g_txn = ctx->gtxn; - tera::RowMutation* m1 = ctx->t1->NewRowMutation("r1"); - tera::RowMutation* m2 = ctx->t2->NewRowMutation("r1"); - m1->Put( "cf1", "q1", read_result); - m2->Put( "cf1", "q1", read_result); - - // ApplyMutation only modifying local memory and do not need asynchronous - // we also support asynchronous interface for RowMutation,as you like - g_txn->ApplyMutation(m1); - g_txn->ApplyMutation(m2); - g_txn->SetCommitCallback(TxnCallBack); - delete m1; - delete m2; - // need not check ApplyMutation, Transaction will be check before commit. - g_txn->Commit(); - } + RowReaderContext* ctx = (RowReaderContext*)row_reader->GetContext(); + while (!row_reader->Done()) { + printf("Row: %s\%s\%ld\%s\n", row_reader->RowName().c_str(), row_reader->ColumnName().c_str(), + row_reader->Timestamp(), row_reader->Value().c_str()); + row_reader->Next(); + read_result += row_reader->Value(); + } + delete row_reader; + ++finish_cnt; + // mutations begin at all reader callback done + if (finish_cnt.load() == 2) { + // write to other columns + tera::Transaction* g_txn = ctx->gtxn; + tera::RowMutation* m1 = ctx->t1->NewRowMutation("r1"); + tera::RowMutation* m2 = ctx->t2->NewRowMutation("r1"); + m1->Put("cf1", "q1", read_result); + m2->Put("cf1", "q1", read_result); + + // ApplyMutation only modifying local memory and do not need asynchronous + // we also support asynchronous interface for RowMutation,as you like + g_txn->ApplyMutation(m1); + g_txn->ApplyMutation(m2); + g_txn->SetCommitCallback(TxnCallBack); + delete m1; + delete m2; + // need not check ApplyMutation, Transaction will be check before commit. + g_txn->Commit(); + } } void DoTxn(tera::Client* client, tera::Table* t1, tera::Table* t2) { - - // begin global transaction - tera::Transaction* g_txn = client->NewGlobalTransaction(); - if (g_txn == NULL) { - return; - } - - // read from different tables - tera::RowReader* r1 = t1->NewRowReader("r1"); - tera::RowReader* r2 = t2->NewRowReader("r1"); - r1->AddColumn("cf1", "q2"); - r2->AddColumn("cf1", "q2"); - r1->SetCallBack(ReadRowCallBack); - r2->SetCallBack(ReadRowCallBack); - RowReaderContext ctx; - ctx.gtxn = g_txn; - ctx.t1 = t1; - ctx.t2 = t2; - r1->SetContext(&ctx); - r2->SetContext(&ctx); - // read from t1:r1:cf1:q2 and check - g_txn->Get(r1); - // read from t2:r1:cf1:q2 and check - g_txn->Get(r2); + // begin global transaction + tera::Transaction* g_txn = client->NewGlobalTransaction(); + if (g_txn == NULL) { + return; + } + + // read from different tables + tera::RowReader* r1 = t1->NewRowReader("r1"); + tera::RowReader* r2 = t2->NewRowReader("r1"); + r1->AddColumn("cf1", "q2"); + r2->AddColumn("cf1", "q2"); + r1->SetCallBack(ReadRowCallBack); + r2->SetCallBack(ReadRowCallBack); + RowReaderContext ctx; + ctx.gtxn = g_txn; + ctx.t1 = t1; + ctx.t2 = t2; + r1->SetContext(&ctx); + r2->SetContext(&ctx); + // read from t1:r1:cf1:q2 and check + g_txn->Get(r1); + // read from t2:r1:cf1:q2 and check + g_txn->Get(r2); } -int main(int argc, char *argv[]) { - - tera::ErrorCode error_code; - - tera::Client* client = tera::Client::NewClient("../conf/tera.flag", "global_txn_sample_async", &error_code); - if (client == NULL) { - return -1; - } - - // create or open tables - // before global transaction should be - // (1) OpenTable which you will r/w - // (2) check OpenTable success - tera::Table* t1 = InitTable(client, "t1"); - tera::Table* t2 = InitTable(client, "t2"); - - // the global transaction may add to threadpool, which implements by yourself. - // - // In this example, - // - // first, read two cell values from different tables, - // next, get all values concat at reader callback, - // last, put concat result into different tables. - DoTxn(client, t1, t2); - - // global transaction thead always finished before callback - // wait for callback thread done at main thread - // if your know the program can't exit before callback done, it's not necessary. - while (!all_gtxn_thread_done.load()) { - usleep(100); - } - return 0; +int main(int argc, char* argv[]) { + tera::ErrorCode error_code; + + tera::Client* client = + tera::Client::NewClient("../conf/tera.flag", "global_txn_sample_async", &error_code); + if (client == NULL) { + return -1; + } + + // create or open tables + // before global transaction should be + // (1) OpenTable which you will r/w + // (2) check OpenTable success + tera::Table* t1 = InitTable(client, "t1"); + tera::Table* t2 = InitTable(client, "t2"); + + // the global transaction may add to threadpool, which implements by yourself. + // + // In this example, + // + // first, read two cell values from different tables, + // next, get all values concat at reader callback, + // last, put concat result into different tables. + DoTxn(client, t1, t2); + + // global transaction thead always finished before callback + // wait for callback thread done at main thread + // if your know the program can't exit before callback done, it's not + // necessary. + while (!all_gtxn_thread_done.load()) { + usleep(100); + } + return 0; } diff --git a/src/sample/global_txn_sync_sample.cc b/src/sample/global_txn_sync_sample.cc index 66bb94b7d..b3d219dbb 100644 --- a/src/sample/global_txn_sync_sample.cc +++ b/src/sample/global_txn_sync_sample.cc @@ -4,104 +4,104 @@ #include #include "tera.h" -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { + tera::ErrorCode error_code; - tera::ErrorCode error_code; - - tera::Client* client = tera::Client::NewClient("../conf/tera.flag", "global_txn_sample", &error_code); - assert(client); - // create or open tables - tera::Table* t1 = nullptr; - tera::Table* t2 = nullptr; - if (!client->IsTableExist("t1", &error_code)) { - tera::TableDescriptor schema("t1"); - schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->EnableGlobalTransaction(); - client->CreateTable(schema, &error_code); - assert(error_code.GetType() == tera::ErrorCode::kOK); - } + tera::Client* client = + tera::Client::NewClient("../conf/tera.flag", "global_txn_sample", &error_code); + assert(client); + // create or open tables + tera::Table* t1 = nullptr; + tera::Table* t2 = nullptr; + if (!client->IsTableExist("t1", &error_code)) { + tera::TableDescriptor schema("t1"); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } - if (!client->IsTableExist("t2", &error_code)) { - tera::TableDescriptor schema("t2"); - schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->EnableGlobalTransaction(); - client->CreateTable(schema, &error_code); - assert(error_code.GetType() == tera::ErrorCode::kOK); - } - // before global transaction should be - // (1) OpenTable which you will r/w - // (2) check OpenTable success - t1 = client->OpenTable("t1", &error_code); - assert(t1 && error_code.GetType() == tera::ErrorCode::kOK); - - t2 = client->OpenTable("t2", &error_code); - assert(t2 && error_code.GetType() == tera::ErrorCode::kOK); + if (!client->IsTableExist("t2", &error_code)) { + tera::TableDescriptor schema("t2"); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } + // before global transaction should be + // (1) OpenTable which you will r/w + // (2) check OpenTable success + t1 = client->OpenTable("t1", &error_code); + assert(t1 && error_code.GetType() == tera::ErrorCode::kOK); - // begin global transaction - tera::Transaction* g_txn = client->NewGlobalTransaction(); - if (g_txn == NULL) { - return -1; - } - if (error_code.GetType()!=tera::ErrorCode::kOK) { - std::cout << error_code.ToString() << std::endl; - return -1; - } - // read from different tables - std::unique_ptr r1(t1->NewRowReader("r1")); - std::unique_ptr r2(t2->NewRowReader("r1")); - r1->AddColumn("cf1", "q2"); - r2->AddColumn("cf1", "q2"); - // read from t1:r1:cf1:q2 and check - g_txn->Get(r1.get()); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << g_txn->GetError().ToString() << std::endl; - return -1; - } - std::string r1_v = ""; - while(!r1->Done()) { - std::cout << r1->Value() << std::endl; - r1_v = r1->Value(); - r1->Next(); - } + t2 = client->OpenTable("t2", &error_code); + assert(t2 && error_code.GetType() == tera::ErrorCode::kOK); - // read from t2:r1:cf1:q2 and check - g_txn->Get(r2.get()); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << g_txn->GetError().ToString() << std::endl; - return -1; - } - std::string r2_v = ""; - while(!r2->Done()) { - std::cout << r2->Value() << std::endl; - r2_v = r2->Value(); - r2->Next(); - } + // begin global transaction + tera::Transaction* g_txn = client->NewGlobalTransaction(); + if (g_txn == NULL) { + return -1; + } + if (error_code.GetType() != tera::ErrorCode::kOK) { + std::cout << error_code.ToString() << std::endl; + return -1; + } + // read from different tables + std::unique_ptr r1(t1->NewRowReader("r1")); + std::unique_ptr r2(t2->NewRowReader("r1")); + r1->AddColumn("cf1", "q2"); + r2->AddColumn("cf1", "q2"); + // read from t1:r1:cf1:q2 and check + g_txn->Get(r1.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + return -1; + } + std::string r1_v = ""; + while (!r1->Done()) { + std::cout << r1->Value() << std::endl; + r1_v = r1->Value(); + r1->Next(); + } - // write to other columns - std::unique_ptr m1(t1->NewRowMutation("r1")); - std::unique_ptr m2(t2->NewRowMutation("r1")); - m1->Put( "cf1", "q1", r2_v); - m2->Put( "cf1", "q1", r1_v); + // read from t2:r1:cf1:q2 and check + g_txn->Get(r2.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + return -1; + } + std::string r2_v = ""; + while (!r2->Done()) { + std::cout << r2->Value() << std::endl; + r2_v = r2->Value(); + r2->Next(); + } - g_txn->ApplyMutation(m1.get()); - g_txn->ApplyMutation(m2.get()); - // need not check ApplyMutation, Transaction will be check before commit. - g_txn->Commit(); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << g_txn->GetError().ToString() << std::endl; - } else { - std::cout << "commit success" << std::endl; - } + // write to other columns + std::unique_ptr m1(t1->NewRowMutation("r1")); + std::unique_ptr m2(t2->NewRowMutation("r1")); + m1->Put("cf1", "q1", r2_v); + m2->Put("cf1", "q1", r1_v); - delete g_txn; - // end global transaction - return 0; + g_txn->ApplyMutation(m1.get()); + g_txn->ApplyMutation(m2.get()); + // need not check ApplyMutation, Transaction will be check before commit. + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; + } + + delete g_txn; + // end global transaction + return 0; } diff --git a/src/sample/tera_batch_mutation_sample.cc b/src/sample/tera_batch_mutation_sample.cc new file mode 100644 index 000000000..d37951c46 --- /dev/null +++ b/src/sample/tera_batch_mutation_sample.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include + +#include "tera.h" + +bool g_finished = false; + +void MyCallback(tera::BatchMutation* batch_mu) { + if (batch_mu->GetError().GetType() == tera::ErrorCode::kOK) { + std::cout << "done" << std::endl; + } else { + std::cout << batch_mu->GetError().GetReason() << std::endl; + } + g_finished = true; +} + +int main(int argc, char* argv[]) { + tera::ErrorCode error_code; + // 根据配置创建一个client + tera::Client* client = + tera::Client::NewClient("./tera.flag", "tera_batch_mutation_sample", &error_code); + if (client == NULL) { + printf("Create tera client fail: %s\n", tera::strerr(error_code)); + return 1; + } + tera::Table* table = client->OpenTable("t5", &error_code); + tera::BatchMutation* bmu = table->NewBatchMutation(); + int i = 0; + while (++i < 100) { + bmu->Put("key" + std::to_string(i), "v" + std::to_string(i)); + } + bmu->SetCallBack(MyCallback); + table->ApplyMutation(bmu); + + // simulate your task + while (!g_finished) { + sleep(1); + } + return 0; +} + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sample/tera_row_txn_sample.cc b/src/sample/tera_row_txn_sample.cc index 879652dfc..be2bb823d 100644 --- a/src/sample/tera_row_txn_sample.cc +++ b/src/sample/tera_row_txn_sample.cc @@ -4,71 +4,71 @@ #include "tera.h" int main() { - tera::ErrorCode error_code; + tera::ErrorCode error_code; - // Get a client instance - tera::Client* client = tera::Client::NewClient("./tera.flag", "txn_sample", &error_code); - assert(client); + // Get a client instance + tera::Client* client = tera::Client::NewClient("./tera.flag", "txn_sample", &error_code); + assert(client); - // Create table - tera::TableDescriptor schema("employee"); - schema.EnableTxn(); - schema.AddLocalityGroup("lg0"); - schema.AddColumnFamily("title"); - schema.AddColumnFamily("salary"); - client->CreateTable(schema, &error_code); - assert(error_code.GetType() == tera::ErrorCode::kOK); + // Create table + tera::TableDescriptor schema("employee"); + schema.EnableTxn(); + schema.AddLocalityGroup("lg0"); + schema.AddColumnFamily("title"); + schema.AddColumnFamily("salary"); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); - // Open table - tera::Table* table = client->OpenTable("employee", &error_code); - assert(table); + // Open table + tera::Table* table = client->OpenTable("employee", &error_code); + assert(table); - // init a row - tera::RowMutation* init = table->NewRowMutation("Amy"); - init->Put("title", "", "junior"); - init->Put("salary", "", "100"); - table->ApplyMutation(init); - assert(init->GetError().GetType() == tera::ErrorCode::kOK); - delete init; + // init a row + tera::RowMutation* init = table->NewRowMutation("Amy"); + init->Put("title", "", "junior"); + init->Put("salary", "", "100"); + table->ApplyMutation(init); + assert(init->GetError().GetType() == tera::ErrorCode::kOK); + delete init; - // txn read the row - tera::Transaction* txn = table->StartRowTransaction("Amy"); - tera::RowReader* reader = table->NewRowReader("Amy"); - reader->AddColumnFamily("title"); - txn->Get(reader); - assert(reader->GetError().GetType() == tera::ErrorCode::kOK); + // txn read the row + tera::Transaction* txn = table->StartRowTransaction("Amy"); + tera::RowReader* reader = table->NewRowReader("Amy"); + reader->AddColumnFamily("title"); + txn->Get(reader); + assert(reader->GetError().GetType() == tera::ErrorCode::kOK); - // get title - std::string title; - while (!reader->Done()) { - if (reader->Family() == "title") { - title = reader->Value(); - break; - } - reader->Next(); + // get title + std::string title; + while (!reader->Done()) { + if (reader->Family() == "title") { + title = reader->Value(); + break; } - delete reader; + reader->Next(); + } + delete reader; - // txn write the row - tera::RowMutation* mutation = table->NewRowMutation("Amy"); - if (title == "junior") { - mutation->Put("title", "", "senior"); - mutation->Put("salary", "", "200"); - } else if (title == "senior") { - mutation->Put("title", "", "director"); - mutation->Put("salary", "", "300"); - } - txn->ApplyMutation(mutation); - assert(mutation->GetError().GetType() == tera::ErrorCode::kOK); - delete mutation; + // txn write the row + tera::RowMutation* mutation = table->NewRowMutation("Amy"); + if (title == "junior") { + mutation->Put("title", "", "senior"); + mutation->Put("salary", "", "200"); + } else if (title == "senior") { + mutation->Put("title", "", "director"); + mutation->Put("salary", "", "300"); + } + txn->ApplyMutation(mutation); + assert(mutation->GetError().GetType() == tera::ErrorCode::kOK); + delete mutation; - // txn commit - table->CommitRowTransaction(txn); - printf("Transaction commit result %s\n", txn->GetError().ToString().c_str()); - delete txn; + // txn commit + table->CommitRowTransaction(txn); + printf("Transaction commit result %s\n", txn->GetError().ToString().c_str()); + delete txn; - // Close - delete table; - delete client; - return 0; + // Close + delete table; + delete client; + return 0; } diff --git a/src/sample/tera_sample.cc b/src/sample/tera_sample.cc index b31db5669..38ce6a537 100644 --- a/src/sample/tera_sample.cc +++ b/src/sample/tera_sample.cc @@ -2,18 +2,19 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - /** - * @file tera_sample.cc - * @author yanshiguang02@baidu.com - * @date 2014/02/05 19:55:54 - * @brief Sample of Tera API - * 每个表都有个默认的LocalityGroup "default" 要么被用户显示创建, 要么被系统创建 - * 每个表都有个某人的ColumnFamily "" - * 要么被用户显示创建, 要么被系统创建, 默认属于lg default - * 不包含列和版本 - * 这么创建表: - * create table {{localitygrop:{{"lg1":{"block_size":5}},{"lg2":{"store_type":"disk"}}},{"columnfamily":{"cf1":{}}}}} - **/ +/** +* @file tera_sample.cc +* @author yanshiguang02@baidu.com +* @date 2014/02/05 19:55:54 +* @brief Sample of Tera API +* 每个表都有个默认的LocalityGroup "default" 要么被用户显示创建, 要么被系统创建 +* 每个表都有个某人的ColumnFamily "" +* 要么被用户显示创建, 要么被系统创建, 默认属于lg default +* 不包含列和版本 +* 这么创建表: +* create table +*{{localitygrop:{{"lg1":{"block_size":5}},{"lg2":{"store_type":"disk"}}},{"columnfamily":{"cf1":{}}}}} +**/ #include #include @@ -23,245 +24,240 @@ /// 创建一个表格 int CreateTable(tera::Client* client) { - // 创建一个表格的描述 - tera::TableDescriptor table_desc("webdb"); - - // 创建LocalityGroup - tera::LocalityGroupDescriptor* lgd0 = table_desc.AddLocalityGroup("lg0"); - lgd0->SetBlockSize(128*1024); - lgd0->SetCompress(tera::kSnappyCompress); - - tera::LocalityGroupDescriptor* lgd1 = table_desc.AddLocalityGroup("lg1"); - lgd1->SetBlockSize(32*1024); - lgd1->SetCompress(tera::kSnappyCompress); - - // 创建ColumnFamily - tera::ColumnFamilyDescriptor* cfd1 = table_desc.AddColumnFamily("html", "lg0"); - cfd1->SetMaxVersions(5); - cfd1->SetMinVersions(3); - cfd1->SetTimeToLive(86400*100); - tera::ColumnFamilyDescriptor* cfd2 = table_desc.AddColumnFamily("links", "lg1"); - cfd2->SetMaxVersions(5); - cfd2->SetMinVersions(0); - cfd2->SetTimeToLive(86400*100); - tera::ColumnFamilyDescriptor* cfd3 = table_desc.AddColumnFamily("title", "lg1"); - cfd3->SetMaxVersions(5); - table_desc.AddColumnFamily("anchor", "lg1"); - - tera::ErrorCode error_code; - if (!client->CreateTable(table_desc, &error_code)) { - printf("Create Table fail: %s\n", tera::strerr(error_code)); - } - return 0; + // 创建一个表格的描述 + tera::TableDescriptor table_desc("webdb"); + + // 创建LocalityGroup + tera::LocalityGroupDescriptor* lgd0 = table_desc.AddLocalityGroup("lg0"); + lgd0->SetBlockSize(128 * 1024); + lgd0->SetCompress(tera::kSnappyCompress); + + tera::LocalityGroupDescriptor* lgd1 = table_desc.AddLocalityGroup("lg1"); + lgd1->SetBlockSize(32 * 1024); + lgd1->SetCompress(tera::kSnappyCompress); + + // 创建ColumnFamily + tera::ColumnFamilyDescriptor* cfd1 = table_desc.AddColumnFamily("html", "lg0"); + cfd1->SetMaxVersions(5); + cfd1->SetMinVersions(3); + cfd1->SetTimeToLive(86400 * 100); + tera::ColumnFamilyDescriptor* cfd2 = table_desc.AddColumnFamily("links", "lg1"); + cfd2->SetMaxVersions(5); + cfd2->SetMinVersions(0); + cfd2->SetTimeToLive(86400 * 100); + tera::ColumnFamilyDescriptor* cfd3 = table_desc.AddColumnFamily("title", "lg1"); + cfd3->SetMaxVersions(5); + table_desc.AddColumnFamily("anchor", "lg1"); + + tera::ErrorCode error_code; + if (!client->CreateTable(table_desc, &error_code)) { + printf("Create Table fail: %s\n", tera::strerr(error_code)); + } + return 0; } /// 修改一个表的内容 int ModifyTable(tera::Table* table) { - tera::ErrorCode error_code; - - // 修改需要先创建一个 RowMutation - - tera::RowMutation* row = table->NewRowMutation("com.baidu.www/"); - // 写一个column - row->Put("title", "abc", "Baidu.com"); - row->Put("title", "abd", "Baidu.com"); - row->Put("title", "abe", "Baidu.com"); - row->Put("title", "abf", "Baidu.com"); - row->Put("anchor", "www.hao123.com/", "百度"); - row->Put("html", "", time(NULL), "Test content"); - // 删除一个column过去24小时内的所有版本 - // row->DeleteColumns("title", "abc", time(NULL), time(NULL) - 86400); - // 删除一个column24小时之前的所有版本 - row->DeleteColumns("title", "abd", time(NULL) - 86400); - // 删除一个column的所有版本 - row->DeleteColumns("title", "abe"); - // 删除一个columnfamily的所有列 - row->DeleteFamily("links"); - - // 提交修改 - table->ApplyMutation(row); - printf("Write to table : %s\n", tera::strerr(row->GetError())); - delete row; - - // 批量提交修改 - tera::RowMutation* row2 = table->NewRowMutation("com.baidu.tieba/"); - // 删除一行的所有column family - row2->DeleteRow(); - std::vector mutation_list; - mutation_list.push_back(row2); - table->ApplyMutation(mutation_list); - printf("Write to table : %s\n", tera::strerr(mutation_list[0]->GetError())); - delete row2; - - return 0; + tera::ErrorCode error_code; + + // 修改需要先创建一个 RowMutation + + tera::RowMutation* row = table->NewRowMutation("com.baidu.www/"); + // 写一个column + row->Put("title", "abc", "Baidu.com"); + row->Put("title", "abd", "Baidu.com"); + row->Put("title", "abe", "Baidu.com"); + row->Put("title", "abf", "Baidu.com"); + row->Put("anchor", "www.hao123.com/", "百度"); + row->Put("html", "", time(NULL), "Test content"); + // 删除一个column过去24小时内的所有版本 + // row->DeleteColumns("title", "abc", time(NULL), time(NULL) - 86400); + // 删除一个column24小时之前的所有版本 + row->DeleteColumns("title", "abd", time(NULL) - 86400); + // 删除一个column的所有版本 + row->DeleteColumns("title", "abe"); + // 删除一个columnfamily的所有列 + row->DeleteFamily("links"); + + // 提交修改 + table->ApplyMutation(row); + printf("Write to table : %s\n", tera::strerr(row->GetError())); + delete row; + + // 批量提交修改 + tera::RowMutation* row2 = table->NewRowMutation("com.baidu.tieba/"); + // 删除一行的所有column family + row2->DeleteRow(); + std::vector mutation_list; + mutation_list.push_back(row2); + table->ApplyMutation(mutation_list); + printf("Write to table : %s\n", tera::strerr(mutation_list[0]->GetError())); + delete row2; + + return 0; } /// 扫描一个表 int ScanTable(tera::Table* table) { - tera::ErrorCode error_code; - - // 创建一个scan表述 - tera::ScanDescriptor scan_desc("com.baidu."); - // 只扫描百度主域 - scan_desc.SetEnd("com.baidu.~"); - // 设置扫描的column family - scan_desc.AddColumnFamily("anchor"); - // 设置最多返回的版本 - scan_desc.SetMaxVersions(3); - // 设置扫描的时间范围 - scan_desc.SetTimeRange(time(NULL), time(NULL) - 3600); - - tera::ResultStream* scanner = table->Scan(scan_desc, &error_code); - for (scanner->LookUp("com.baidu."); !scanner->Done(); scanner->Next()) { - printf("Row: %s\%s\%ld\%s\n", - scanner->RowName().c_str(), scanner->ColumnName().c_str(), - scanner->Timestamp(), scanner->Value().c_str()); - } - delete scanner; - return 0; + tera::ErrorCode error_code; + + // 创建一个scan表述 + tera::ScanDescriptor scan_desc("com.baidu."); + // 只扫描百度主域 + scan_desc.SetEnd("com.baidu.~"); + // 设置扫描的column family + scan_desc.AddColumnFamily("anchor"); + // 设置最多返回的版本 + scan_desc.SetMaxVersions(3); + // 设置扫描的时间范围 + scan_desc.SetTimeRange(time(NULL), time(NULL) - 3600); + + tera::ResultStream* scanner = table->Scan(scan_desc, &error_code); + for (scanner->LookUp("com.baidu."); !scanner->Done(); scanner->Next()) { + printf("Row: %s\%s\%ld\%s\n", scanner->RowName().c_str(), scanner->ColumnName().c_str(), + scanner->Timestamp(), scanner->Value().c_str()); + } + delete scanner; + return 0; } bool finish = false; void ReadRowCallBack(tera::RowReader* row_reader) { - while (!row_reader->Done()) { - printf("Row: %s\%s\%ld\%s\n", - row_reader->RowName().c_str(), row_reader->ColumnName().c_str(), - row_reader->Timestamp(), row_reader->Value().c_str()); - row_reader->Next(); - } - delete row_reader; - finish = true; + while (!row_reader->Done()) { + printf("Row: %s\%s\%ld\%s\n", row_reader->RowName().c_str(), row_reader->ColumnName().c_str(), + row_reader->Timestamp(), row_reader->Value().c_str()); + row_reader->Next(); + } + delete row_reader; + finish = true; } int ReadRowFromTable(tera::Table* table) { - tera::ErrorCode error_code; - tera::RowReader* row_reader = table->NewRowReader("com.baidu.www/"); - row_reader->AddColumnFamily("html"); - row_reader->AddColumn("anchor", "www.hao123.com/"); - row_reader->SetMaxVersions(3); - row_reader->SetAsync(); - row_reader->SetCallBack(ReadRowCallBack); - // Async Read one row - table->Get(row_reader); - - while (!finish) { - sleep(1); - } - - // Sync Read Batch Rows - - std::vector rows_reader; - tera::RowReader* row_reader1 = table->NewRowReader("com.baidu.www/"); - row_reader1->AddColumnFamily("html"); - row_reader1->SetMaxVersions(3); - row_reader1->SetTimeOut(5000); - rows_reader.push_back(row_reader1); - tera::RowReader* row_reader2 = table->NewRowReader("com.baidu.www/"); - row_reader2->AddColumnFamily("anchor"); - row_reader2->SetMaxVersions(3); - row_reader2->SetTimeOut(5000); - rows_reader.push_back(row_reader2); - table->Get(rows_reader); - - while (!row_reader1->Done()) { - printf("Row: %s\%s\%ld\%s\n", - row_reader1->RowName().c_str(), row_reader1->ColumnName().c_str(), - row_reader1->Timestamp(), row_reader1->Value().c_str()); - row_reader1->Next(); - } - delete row_reader1; - while (!row_reader2->Done()) { - printf("Row: %s\%s\%ld\%s\n", - row_reader2->RowName().c_str(), row_reader2->ColumnName().c_str(), - row_reader2->Timestamp(), row_reader2->Value().c_str()); - row_reader2->Next(); - } - delete row_reader2; - return 0; + tera::ErrorCode error_code; + tera::RowReader* row_reader = table->NewRowReader("com.baidu.www/"); + row_reader->AddColumnFamily("html"); + row_reader->AddColumn("anchor", "www.hao123.com/"); + row_reader->SetMaxVersions(3); + row_reader->SetAsync(); + row_reader->SetCallBack(ReadRowCallBack); + // Async Read one row + table->Get(row_reader); + + while (!finish) { + sleep(1); + } + + // Sync Read Batch Rows + + std::vector rows_reader; + tera::RowReader* row_reader1 = table->NewRowReader("com.baidu.www/"); + row_reader1->AddColumnFamily("html"); + row_reader1->SetMaxVersions(3); + row_reader1->SetTimeOut(5000); + rows_reader.push_back(row_reader1); + tera::RowReader* row_reader2 = table->NewRowReader("com.baidu.www/"); + row_reader2->AddColumnFamily("anchor"); + row_reader2->SetMaxVersions(3); + row_reader2->SetTimeOut(5000); + rows_reader.push_back(row_reader2); + table->Get(rows_reader); + + while (!row_reader1->Done()) { + printf("Row: %s\%s\%ld\%s\n", row_reader1->RowName().c_str(), row_reader1->ColumnName().c_str(), + row_reader1->Timestamp(), row_reader1->Value().c_str()); + row_reader1->Next(); + } + delete row_reader1; + while (!row_reader2->Done()) { + printf("Row: %s\%s\%ld\%s\n", row_reader2->RowName().c_str(), row_reader2->ColumnName().c_str(), + row_reader2->Timestamp(), row_reader2->Value().c_str()); + row_reader2->Next(); + } + delete row_reader2; + return 0; } /// 三维表格 int ShowBigTable(tera::Client* client) { - tera::ErrorCode error_code; - // Create - CreateTable(client); - // Open - tera::Table* table = client->OpenTable("webdb", &error_code); - if (table == NULL) { - printf("Open table fail: %s\n", tera::strerr(error_code)); - return 1; - } - // Write - ModifyTable(table); - // Scan - //ScanTable(table); - // Read - ReadRowFromTable(table); - delete table; - return 0; + tera::ErrorCode error_code; + // Create + CreateTable(client); + // Open + tera::Table* table = client->OpenTable("webdb", &error_code); + if (table == NULL) { + printf("Open table fail: %s\n", tera::strerr(error_code)); + return 1; + } + // Write + ModifyTable(table); + // Scan + // ScanTable(table); + // Read + ReadRowFromTable(table); + delete table; + return 0; } /// 二维表格 int ShowSampleTable(tera::Client* client) { - tera::ErrorCode error_code; - // 创建表格,并关闭多版本 - tera::TableDescriptor desc("sample_table"); - tera::ColumnFamilyDescriptor* cfd = desc.AddColumnFamily("weight"); - cfd->SetMaxVersions(0); - client->CreateTable(desc, &error_code); - - // Open - tera::Table* table = client->OpenTable("sample_table", &error_code); - // Write - table->Put("com.baidu.www/", "weight", "", "serialized_weights", &error_code); - // Read - std::string value; - if (table->Get("com.baidu.www/", "weight", "", &value, &error_code)) { - printf("Read return %s\n", value.c_str()); - } - // Close - delete table; - return 0; + tera::ErrorCode error_code; + // 创建表格,并关闭多版本 + tera::TableDescriptor desc("sample_table"); + tera::ColumnFamilyDescriptor* cfd = desc.AddColumnFamily("weight"); + cfd->SetMaxVersions(0); + client->CreateTable(desc, &error_code); + + // Open + tera::Table* table = client->OpenTable("sample_table", &error_code); + // Write + table->Put("com.baidu.www/", "weight", "", "serialized_weights", &error_code); + // Read + std::string value; + if (table->Get("com.baidu.www/", "weight", "", &value, &error_code)) { + printf("Read return %s\n", value.c_str()); + } + // Close + delete table; + return 0; } - /// 把表格作为一个kv使用 int ShowKv(tera::Client* client) { - tera::ErrorCode error_code; - // Create - tera::TableDescriptor schema("kvstore"); - client->CreateTable(schema, &error_code); - // Open - tera::Table* table = client->OpenTable("kvstore", &error_code); - // Write - table->Put("test_key", "", "", "test_value", &error_code); - // Read - std::string value; - if (table->Get("test_key", "", "", &value, &error_code)) { - printf("Read return %s\n", value.c_str()); - } - // Close - delete table; - return 0; + tera::ErrorCode error_code; + // Create + tera::TableDescriptor schema("kvstore"); + client->CreateTable(schema, &error_code); + // Open + tera::Table* table = client->OpenTable("kvstore", &error_code); + // Write + table->Put("test_key", "", "", "test_value", &error_code); + // Read + std::string value; + if (table->Get("test_key", "", "", &value, &error_code)) { + printf("Read return %s\n", value.c_str()); + } + // Close + delete table; + return 0; }; /// 演示程序 int main(int argc, char* argv[]) { - tera::ErrorCode error_code; - // 根据配置创建一个client - tera::Client* client = tera::Client::NewClient("./tera.flag", "tera_sample", &error_code); - if (client == NULL) { - printf("Create tera client fail: %s\n", tera::strerr(error_code)); - return 1; - } - - //CreateTable(client); - // 演示三种使用方式 - ShowBigTable(client); - //ShowSampleTable(client); - //ShowKv(client); - return 0; + tera::ErrorCode error_code; + // 根据配置创建一个client + tera::Client* client = tera::Client::NewClient("./tera.flag", "tera_sample", &error_code); + if (client == NULL) { + printf("Create tera client fail: %s\n", tera::strerr(error_code)); + return 1; + } + + // CreateTable(client); + // 演示三种使用方式 + ShowBigTable(client); + // ShowSampleTable(client); + // ShowKv(client); + return 0; } /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/batch_mutation_impl.cc b/src/sdk/batch_mutation_impl.cc new file mode 100644 index 000000000..af0b1481e --- /dev/null +++ b/src/sdk/batch_mutation_impl.cc @@ -0,0 +1,268 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "common/base/string_format.h" +#include "io/coding.h" +#include "sdk/batch_mutation_impl.h" +#include "sdk/sdk_utils.h" +#include "common/timer.h" + +namespace tera { + +BatchMutationImpl::BatchMutationImpl(Table* table) + : SdkTask(SdkTask::BATCH_MUTATION), + table_(table), + update_meta_key_(""), + callback_(NULL), + user_context_(NULL), + timeout_ms_(0), + finish_(false), + finish_cond_(&finish_mutex_), + commit_times_(0), + on_finish_callback_(NULL), + start_ts_(get_micros()) {} + +BatchMutationImpl::~BatchMutationImpl() {} + +void BatchMutationImpl::Put(const std::string& row_key, const std::string& value, int32_t ttl) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kPut; + mutation.family = ""; + mutation.qualifier = ""; + mutation.timestamp = kLatestTimestamp; + mutation.value = value; + mutation.ttl = ttl; +} + +void BatchMutationImpl::Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, + int64_t timestamp) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kPut; + mutation.family = family; + mutation.qualifier = qualifier; + if (timestamp == -1) { + mutation.timestamp = kLatestTimestamp; + } else { + mutation.timestamp = timestamp; + } + mutation.value = value; + mutation.ttl = -1; +} + +void BatchMutationImpl::Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t value, int64_t timestamp) { + std::string value_str((char*)&value, sizeof(int64_t)); + Put(row_key, family, qualifier, value_str, timestamp); +} + +void BatchMutationImpl::Add(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t delta) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + char delta_buf[sizeof(int64_t)]; + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kAdd; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = get_micros(); // 为了避免retry引起的重复加,所以自带时间戳 + io::EncodeBigEndian(delta_buf, delta); + mutation.value.assign(delta_buf, sizeof(delta_buf)); +} + +void BatchMutationImpl::PutIfAbsent(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kPutIfAbsent; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = kLatestTimestamp; + mutation.value = value; +} + +void BatchMutationImpl::Append(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kAppend; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = get_micros(); + mutation.value = value; +} + +void BatchMutationImpl::DeleteRow(const std::string& row_key, int64_t timestamp) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kDeleteRow; + mutation.timestamp = (timestamp == -1 ? kLatestTimestamp : timestamp); +} + +void BatchMutationImpl::DeleteFamily(const std::string& row_key, const std::string& family, + int64_t timestamp) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kDeleteFamily; + mutation.family = family; + mutation.timestamp = (timestamp == -1 ? kLatestTimestamp : timestamp); +} + +void BatchMutationImpl::DeleteColumns(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kDeleteColumns; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = (timestamp == -1 ? kLatestTimestamp : timestamp); +} + +/// 删除一个列的指定版本 +void BatchMutationImpl::DeleteColumn(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp) { + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + RowMutation::Mutation& mutation = AddMutation(row_key); + mutation.type = RowMutation::kDeleteColumn; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = (timestamp == -1 ? kLatestTimestamp : timestamp); +} + +/// 获得结果错误码 +const ErrorCode& BatchMutationImpl::GetError() { return error_code_; } + +void BatchMutationImpl::Prepare(StatCallback cb) { + on_finish_callback_ = cb; + start_ts_ = get_micros(); +} + +void BatchMutationImpl::Reset() { + mu_map_.clear(); + update_meta_key_ = ""; + callback_ = NULL; + timeout_ms_ = 0; + SdkTask::ResetRetryTimes(); + finish_ = false; + error_code_.SetFailed(ErrorCode::kOK); + commit_times_ = 0; +} + +/// 设置异步回调, 操作会异步返回 +void BatchMutationImpl::SetCallBack(BatchMutation::Callback callback) { callback_ = callback; } + +/// 获得回调函数 +BatchMutation::Callback BatchMutationImpl::GetCallBack() { return callback_; } + +/// 设置用户上下文,可在回调函数中获取 +void BatchMutationImpl::SetContext(void* context) { user_context_ = context; } + +/// 获得用户上下文 +void* BatchMutationImpl::GetContext() { return user_context_; } + +/// 设置超时时间(只影响当前操作,不影响Table::SetWriteTimeout设置的默认写超时) +void BatchMutationImpl::SetTimeOut(int64_t timeout_ms) { timeout_ms_ = timeout_ms; } + +int64_t BatchMutationImpl::TimeOut() { return timeout_ms_; } + +/// 是否异步操作 +bool BatchMutationImpl::IsAsync() { return (callback_ != NULL); } + +/// 异步操作是否完成 +bool BatchMutationImpl::IsFinished() const { + MutexLock lock(&finish_mutex_); + return finish_; +} + +std::string BatchMutationImpl::InternalRowKey() { + if (table_ && table_->IsHashTable()) { + return table_->GetHashMethod()(update_meta_key_); + } + return update_meta_key_; +} + +/// mutation数量 +uint32_t BatchMutationImpl::MutationNum(const std::string& row_key) { + return (mu_map_[row_key]).size(); +} + +/// mutation总大小 +uint32_t BatchMutationImpl::Size() { + uint32_t total_size = 0; + for (const auto& mu_seq : mu_map_) { + total_size += mu_seq.first.size(); + for (const auto& mu : mu_seq.second) { + total_size += mu.family.size() + mu.qualifier.size() + mu.value.size() + sizeof(mu.timestamp); + } + } + return total_size; +} + +/// 返回mutation +const RowMutation::Mutation& BatchMutationImpl::GetMutation(const std::string& rowkey, + uint32_t index) { + if (mu_map_.find(rowkey) == mu_map_.end() || index >= mu_map_[rowkey].size()) { + abort(); + } + return mu_map_[rowkey][index]; +} + +std::vector BatchMutationImpl::GetRows() { + std::vector rows; + for (const auto& mu_seq : mu_map_) { + rows.emplace_back(mu_seq.first); + } + return rows; +} + +/// 设置错误码 +void BatchMutationImpl::SetError(ErrorCode::ErrorCodeType err, const std::string& reason) { + error_code_.SetFailed(err, reason); +} + +/// 等待结束 +void BatchMutationImpl::Wait() { + MutexLock lock(&finish_mutex_); + while (!finish_) { + finish_cond_.Wait(); + } +} + +void BatchMutationImpl::RunCallback() { + // staticstic + if (on_finish_callback_) { + on_finish_callback_(table_, this); + } + if (callback_) { + callback_(this); + } else { + MutexLock lock(&finish_mutex_); + finish_ = true; + finish_cond_.Signal(); + } +} + +RowMutation::Mutation& BatchMutationImpl::AddMutation(const std::string& row_key) { + update_meta_key_ = (update_meta_key_ == "" ? row_key : update_meta_key_); + std::vector& mu_seq = mu_map_[row_key]; + mu_seq.resize(mu_seq.size() + 1); + return mu_seq.back(); +} + +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/batch_mutation_impl.h b/src/sdk/batch_mutation_impl.h new file mode 100644 index 000000000..a0d23cbaf --- /dev/null +++ b/src/sdk/batch_mutation_impl.h @@ -0,0 +1,132 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#pragma once + +#include +#include +#include + +#include "common/mutex.h" +#include "proto/tabletnode_rpc.pb.h" +#include "sdk/sdk_task.h" +#include "tera.h" +#include "types.h" +#include "common/timer.h" + +namespace tera { + +class TableImpl; + +class BatchMutationImpl : public BatchMutation, public SdkTask { + public: + BatchMutationImpl(Table* table); + ~BatchMutationImpl(); + + public: // from BatchMutation + virtual void Put(const std::string& row_key, const std::string& value, int32_t ttl = -1); + + virtual void Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int64_t timestamp = -1); + + virtual void Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t value, int64_t timestamp = -1); + + virtual void Add(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t delta); + + virtual void PutIfAbsent(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value); + + virtual void Append(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value); + + virtual void DeleteRow(const std::string& row_key, int64_t timestamp = -1); + + virtual void DeleteFamily(const std::string& row_key, const std::string& family, + int64_t timestamp = -1); + + virtual void DeleteColumns(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp = -1); + + virtual void DeleteColumn(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t timestamp); + + virtual const ErrorCode& GetError(); + + virtual void SetCallBack(Callback callback); + virtual Callback GetCallBack(); + virtual void SetContext(void* context); + virtual void* GetContext(); + + virtual void SetTimeOut(int64_t timeout_ms); + virtual int64_t TimeOut(); + + virtual uint32_t MutationNum(const std::string& row_key); + + virtual uint32_t Size(); + + virtual bool IsAsync(); + + virtual void Reset(); + + public: // from SdkTask + virtual void RunCallback(); + + public: + /// 异步操作是否完成 + bool IsFinished() const; + + /// 返回mutation + const RowMutation::Mutation& GetMutation(const std::string& row_key, uint32_t index); + + std::vector GetRows(); + + public: + /// 以下接口仅内部使用,不开放给用户 + + void Prepare(StatCallback cb); + + int64_t GetStartTime() { return start_ts_; } + + /// 设置错误码 + void SetError(ErrorCode::ErrorCodeType err, const std::string& reason = ""); + + /// 等待结束 + void Wait(); + + void AddCommitTimes() { commit_times_++; } + int64_t GetCommitTimes() { return commit_times_; } + + std::string InternalRowKey(); + + protected: + /// 增加一个操作 + RowMutation::Mutation& AddMutation(const std::string& rowkey); + + private: + Table* table_; + // the first row key add in this BatchMutation + std::string update_meta_key_; + std::map> mu_map_; + + BatchMutation::Callback callback_; + void* user_context_; + int64_t timeout_ms_; + + bool finish_; + ErrorCode error_code_; + mutable Mutex finish_mutex_; + common::CondVar finish_cond_; + + /// 记录此mutation被提交到ts的次数 + int64_t commit_times_; + + StatCallback on_finish_callback_; + int64_t start_ts_; +}; + +} // namespace tera diff --git a/src/sdk/client_impl.cc b/src/sdk/client_impl.cc index bc9c64724..a3c0b3554 100644 --- a/src/sdk/client_impl.cc +++ b/src/sdk/client_impl.cc @@ -9,6 +9,7 @@ #include "gflags/gflags.h" +#include "access/helpers/access_utils.h" #include "common/file/file_path.h" #include "common/log/log_cleaner.h" #include "common/mutex.h" @@ -17,6 +18,8 @@ #include "proto/proto_helper.h" #include "proto/table_meta.pb.h" #include "proto/tabletnode_client.h" +#include "proto/access_control.pb.h" +#include "proto/master_mutli_tenancy_client.h" #include "sdk/table_impl.h" #include "sdk/global_txn.h" #include "sdk/sdk_perf.h" @@ -33,7 +36,6 @@ DECLARE_string(tera_sdk_conf_file); DECLARE_string(tera_user_identity); DECLARE_string(tera_user_passcode); -DECLARE_int32(tera_sdk_retry_times); DECLARE_int32(tera_sdk_update_meta_internal); DECLARE_int32(tera_sdk_retry_period); DECLARE_int32(tera_sdk_thread_min_num); @@ -54,35 +56,36 @@ DECLARE_bool(tera_sdk_client_for_gtxn); DECLARE_bool(tera_sdk_tso_client_enabled); DECLARE_bool(tera_sdk_mock_enable); +DECLARE_string(tera_auth_policy); +DECLARE_string(tera_auth_name); +DECLARE_string(tera_auth_token); + namespace tera { namespace { - struct ClientResource { - std::weak_ptr wp_client_impl; - ThreadPool* client_thread_pool; - ThreadPool* client_gtxn_thread_pool; - ClientResource() : client_thread_pool(NULL), - client_gtxn_thread_pool(NULL) {} - }; - typedef std::map> ClusterClientMap; - std::unique_ptr cluster_client_map; - - Mutex g_mutex; - bool g_is_glog_init = false; - - std::once_flag sdk_client_once_control; - std::once_flag init_cluster_client_map_once; - - void InitClusterClientMap() { - cluster_client_map.reset(new ClusterClientMap()); - } - void LogSdkVersionInfo() { - LOG(INFO) << "\n" << SystemVersionInfo(); - } +struct ClientResource { + std::weak_ptr wp_client_impl; + ThreadPool* client_thread_pool; + ThreadPool* client_gtxn_thread_pool; + ClientResource() : client_thread_pool(NULL), client_gtxn_thread_pool(NULL) {} +}; +typedef std::map> ClusterClientMap; +std::unique_ptr cluster_client_map; + +volatile uint64_t g_sequence_id = 0; + +Mutex g_mutex; +bool g_is_glog_init = false; + +std::once_flag sdk_client_once_control; +std::once_flag init_cluster_client_map_once; + +void InitClusterClientMap() { cluster_client_map.reset(new ClusterClientMap()); } + +void LogSdkVersionInfo() { LOG(INFO) << "\n" << SystemVersionInfo(); } } -ClientImpl::ClientImpl(const ClientOptions& client_options, - ThreadPool* client_thread_pool, +ClientImpl::ClientImpl(const ClientOptions& client_options, ThreadPool* client_thread_pool, ThreadPool* client_gtxn_thread_pool) : thread_pool_(client_thread_pool), gtxn_thread_pool_(client_gtxn_thread_pool), @@ -90,1066 +93,1434 @@ ClientImpl::ClientImpl(const ClientOptions& client_options, client_zk_adapter_(NULL), tso_cluster_(NULL), collecter_(NULL), - session_str_("") { - tabletnode::TabletNodeClient::SetRpcOption( - FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_inflow : -1, - FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_outflow : -1, - FLAGS_tera_sdk_rpc_max_pending_buffer_size, FLAGS_tera_sdk_rpc_work_thread_num); - - if (FLAGS_tera_sdk_client_for_gtxn) { - client_zk_adapter_ = sdk::NewClientZkAdapter(); - client_zk_adapter_->Init(); - cluster_ = sdk::NewClusterFinder(client_zk_adapter_); - if (FLAGS_tera_sdk_tso_client_enabled) { - tso_cluster_ = sdk::NewTimeoracleClusterFinder(); - } - RegisterSelf(); - } else { - cluster_ = sdk::NewClusterFinder(); - } - - if (FLAGS_tera_sdk_perf_collect_enabled) { - collecter_ = new sdk::PerfCollecter(); - collecter_->Run(); - LOG(INFO) << "start perf collect"; - } else { - LOG(INFO) << "perf collect disable"; + session_str_(""), + access_builder_(new auth::AccessBuilder(client_options_.tera_auth_policy)) { + tabletnode::TabletNodeClient::SetRpcOption( + FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_inflow : -1, + FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_outflow : -1, + FLAGS_tera_sdk_rpc_max_pending_buffer_size, FLAGS_tera_sdk_rpc_work_thread_num); + + if (FLAGS_tera_sdk_client_for_gtxn) { + client_zk_adapter_ = sdk::NewClientZkAdapter(); + client_zk_adapter_->Init(); + cluster_ = sdk::NewClusterFinder(client_zk_adapter_); + if (FLAGS_tera_sdk_tso_client_enabled) { + tso_cluster_ = sdk::NewTimeoracleClusterFinder(); } - - std::call_once(sdk_client_once_control, LogSdkVersionInfo); + RegisterSelf(); + } else { + cluster_ = sdk::NewClusterFinder(); + } + + if (FLAGS_tera_sdk_perf_collect_enabled) { + collecter_ = new sdk::PerfCollecter(); + collecter_->Run(); + LOG(INFO) << "start perf collect"; + } else { + LOG(INFO) << "perf collect disable"; + } + + std::call_once(sdk_client_once_control, LogSdkVersionInfo); } ClientImpl::~ClientImpl() { - { - MutexLock l(&open_table_mutex_); - auto it = open_table_map_.begin(); - for (; it != open_table_map_.end(); ) { - open_table_map_.erase(it++); - } + { + MutexLock l(&open_table_mutex_); + auto it = open_table_map_.begin(); + for (; it != open_table_map_.end();) { + open_table_map_.erase(it++); } - delete cluster_; - if (FLAGS_tera_sdk_perf_collect_enabled) { - collecter_->Stop(); - delete collecter_; - } - if (FLAGS_tera_sdk_client_for_gtxn) { - if (FLAGS_tera_sdk_tso_client_enabled) { - delete tso_cluster_; - } - delete client_zk_adapter_; + } + delete cluster_; + if (FLAGS_tera_sdk_perf_collect_enabled) { + collecter_->Stop(); + delete collecter_; + } + if (FLAGS_tera_sdk_client_for_gtxn) { + if (FLAGS_tera_sdk_tso_client_enabled) { + delete tso_cluster_; } + delete client_zk_adapter_; + } +} + +bool ClientImpl::Login(ErrorCode* err) { + if (!access_builder_->Login(client_options_.tera_auth_name, client_options_.tera_auth_token, + err)) { + LOG(ERROR) << "access_builder login failed!"; + return false; + } + return true; } bool ClientImpl::CreateTable(const TableDescriptor& desc, ErrorCode* err) { - std::vector empty_delimiter; - return CreateTable(desc, empty_delimiter, err); + std::vector empty_delimiter; + return CreateTable(desc, empty_delimiter, err); +} + +bool ClientImpl::CreateTable(const TableDescriptor& desc, int64_t hash_num, ErrorCode* err) { + std::vector delimiters; + if (!desc.IsHashEnabled()) { + err->SetFailed(ErrorCode::kBadParam, "Create non-hash table with hash_num is invalid"); + } + GenerateHashDelimiters(hash_num, &delimiters); + return CreateTable(desc, delimiters, err); } std::string ClientImpl::GetUserToken(const std::string& user, const std::string& password) { - std::string token_str = user + ":" + password; - std::string token; - GetHashString(token_str, 0, &token); - return token; + std::string token_str = user + ":" + password; + std::string token; + GetHashString(token_str, 0, &token); + return token; } bool ClientImpl::CheckReturnValue(StatusCode status, std::string& reason, ErrorCode* err) { - switch (status) { - case kMasterOk: - err->SetFailed(ErrorCode::kOK, "success"); - LOG(INFO) << "master status is OK."; - return true; - case kTableExist: - reason = "table already exist."; - err->SetFailed(ErrorCode::kBadParam, reason); - break; - case kTableNotExist: - reason = "table not exist."; - err->SetFailed(ErrorCode::kBadParam, reason); - break; - case kTableNotFound: - reason = "table not found."; - err->SetFailed(ErrorCode::kBadParam, reason); - break; - case kTableStatusDisable: - reason = "table status: disable."; - err->SetFailed(ErrorCode::kBadParam, reason); - break; - case kTableStatusEnable: - reason = "table status: enable."; - err->SetFailed(ErrorCode::kSystem, reason); - break; - case kInvalidArgument: - reason = "invalid arguments."; - err->SetFailed(ErrorCode::kBadParam, reason); - break; - case kNotPermission: - reason = "permission denied."; - err->SetFailed(ErrorCode::kNoAuth, reason); - break; - case kTabletReady: - reason = "tablet is ready."; - err->SetFailed(ErrorCode::kOK, reason); - break; - default: - reason = "unknown system error, contact to cluster admin..."; - err->SetFailed(ErrorCode::kSystem, reason); - break; - } - return false; + switch (status) { + case kMasterOk: + err->SetFailed(ErrorCode::kOK, "success"); + LOG(INFO) << "master status is OK."; + return true; + case kTableExist: + reason = "table already exist."; + err->SetFailed(ErrorCode::kBadParam, reason); + break; + case kTableNotExist: + reason = "table not exist."; + err->SetFailed(ErrorCode::kBadParam, reason); + break; + case kTableNotFound: + reason = "table not found."; + err->SetFailed(ErrorCode::kBadParam, reason); + break; + case kTableStatusDisable: + reason = "table status: disable."; + err->SetFailed(ErrorCode::kBadParam, reason); + break; + case kTableStatusEnable: + reason = "table status: enable."; + err->SetFailed(ErrorCode::kSystem, reason); + break; + case kInvalidArgument: + reason = "invalid arguments."; + err->SetFailed(ErrorCode::kBadParam, reason); + break; + case kNotPermission: + reason = "permission denied."; + err->SetFailed(ErrorCode::kNoAuth, reason); + break; + case kTabletReady: + reason = "tablet is ready."; + err->SetFailed(ErrorCode::kOK, reason); + break; + case kMismatchAuthType: + reason = "Mismatch auth type between sdk and master"; + err->SetFailed(ErrorCode::kAuthBadParam, reason); + break; + case kQuotaLimited: + reason = "Reach Quota limit"; + err->SetFailed(ErrorCode::kNoQuota, reason); + case kQuotaInvalidArg: + reason = "Operate Quota failed for invalid arg"; + err->SetFailed(ErrorCode::kBadParam, reason); + case kFlowControlLimited: + reason = "Reach flow control limit"; + err->SetFailed(ErrorCode::kBusy, reason); + default: + reason = "unknown system error, contact to cluster admin..."; + err->SetFailed(ErrorCode::kSystem, reason); + break; + } + return false; } -bool ClientImpl::CreateTable(const TableDescriptor& desc, - const std::vector& tablet_delim, +bool ClientImpl::CreateTable(const TableDescriptor& desc, const std::vector& tablet_delim, ErrorCode* err) { - if (!CheckTableDescrptor(desc, err)) { - return false; - } - master::MasterClient master_client(cluster_->MasterAddr()); - - CreateTableRequest request; - CreateTableResponse response; - request.set_sequence_id(0); - std::string timestamp = tera::get_curtime_str_plain(); - request.set_table_name(desc.TableName()); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - TableSchema* schema = request.mutable_schema(); - - TableDescToSchema(desc, schema); - schema->set_alias(desc.TableName()); - schema->set_name(desc.TableName()); - // add delimiter - size_t delim_num = tablet_delim.size(); - for (size_t i = 0; i < delim_num; ++i) { - const string& delim = tablet_delim[i]; - request.add_delimiters(delim); - } - string reason; - if (master_client.CreateTable(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - return true; - } - LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); - } else { - reason = "rpc fail to create table:" + desc.TableName(); - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); - } + if (!CheckTableDescrptor(desc, err)) { return false; + } + master::MasterClient master_client(cluster_->MasterAddr()); + + CreateTableRequest request; + CreateTableResponse response; + request.set_sequence_id(g_sequence_id++); + std::string timestamp = tera::get_curtime_str_plain(); + request.set_table_name(desc.TableName()); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + access_builder_->BuildRequest(&request); + + TableSchema* schema = request.mutable_schema(); + + TableDescToSchema(desc, schema); + schema->set_alias(desc.TableName()); + schema->set_name(desc.TableName()); + // add delimiter + size_t delim_num = tablet_delim.size(); + for (size_t i = 0; i < delim_num; ++i) { + const string& delim = tablet_delim[i]; + request.add_delimiters(delim); + } + string reason; + if (master_client.CreateTable(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; + } + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to create table:" + desc.TableName(); + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } bool ClientImpl::UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err) { - return UpdateTable(desc, err); + return UpdateTable(desc, err); } bool ClientImpl::UpdateTable(const TableDescriptor& desc, ErrorCode* err) { - if (!IsTableExist(desc.TableName(), err)) { - LOG(ERROR) << "table not exist: " << desc.TableName(); - return false; - } + if (!IsTableExist(desc.TableName(), err)) { + LOG(ERROR) << "table not exist: " << desc.TableName(); + return false; + } - master::MasterClient master_client(cluster_->MasterAddr()); + master::MasterClient master_client(cluster_->MasterAddr()); - UpdateTableRequest request; - UpdateTableResponse response; - request.set_sequence_id(0); - request.set_table_name(desc.TableName()); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); + UpdateTableRequest request; + UpdateTableResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(desc.TableName()); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); - TableSchema* schema = request.mutable_schema(); - TableDescToSchema(desc, schema); + access_builder_->BuildRequest(&request); - ErrorCode err2; - TableDescriptor* old_desc = GetTableDescriptor(desc.TableName(), &err2); - if (old_desc == NULL) { - return false; - } - TableSchema old_schema; - TableDescToSchema(*old_desc, &old_schema); - delete old_desc; - - // if try to update lg, need to disable table - bool is_update_lg = IsSchemaLgDiff(*schema, old_schema); - bool is_update_cf = IsSchemaCfDiff(*schema, old_schema); - - // compatible for old-master which no support for online-schema-update - if (!FLAGS_tera_online_schema_update_enabled - && IsTableEnabled(desc.TableName(), err) - && (is_update_lg || is_update_cf)) { - err->SetFailed(ErrorCode::kBadParam, "disable this table if you want to update (Lg | Cf) property(ies)"); - return false; - } + TableSchema* schema = request.mutable_schema(); + TableDescToSchema(desc, schema); - string reason; - if (master_client.UpdateTable(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - return true; - } - LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); - } else { - reason = "rpc fail to update table:" + desc.TableName(); - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); - } + ErrorCode err2; + TableDescriptor* old_desc = GetTableDescriptor(desc.TableName(), &err2); + if (old_desc == NULL) { return false; + } + TableSchema old_schema; + TableDescToSchema(*old_desc, &old_schema); + delete old_desc; + + // if try to update lg, need to disable table + bool is_update_lg = IsSchemaLgDiff(*schema, old_schema); + bool is_update_cf = IsSchemaCfDiff(*schema, old_schema); + + // compatible for old-master which no support for online-schema-update + if (!FLAGS_tera_online_schema_update_enabled && IsTableEnabled(desc.TableName(), err) && + (is_update_lg || is_update_cf)) { + err->SetFailed(ErrorCode::kBadParam, + "disable this table if you want to update (Lg | Cf) property(ies)"); + return false; + } + + string reason; + if (master_client.UpdateTable(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; + } + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to update table:" + desc.TableName(); + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } bool ClientImpl::UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - UpdateCheckRequest request; - UpdateCheckResponse response; - request.set_sequence_id(0); - request.set_table_name(table_name); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - string reason; - if (master_client.UpdateCheck(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - *done = response.done(); - return true; - } - err->SetFailed(ErrorCode::kSystem, reason); - } else { - reason = "rpc fail to update-check table:" + table_name; - err->SetFailed(ErrorCode::kSystem, reason); + master::MasterClient master_client(cluster_->MasterAddr()); + UpdateCheckRequest request; + UpdateCheckResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(table_name); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + string reason; + if (master_client.UpdateCheck(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + *done = response.done(); + return true; } - return false; + err->SetFailed(ErrorCode::kSystem, reason); + } else { + reason = "rpc fail to update-check table:" + table_name; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } bool ClientImpl::DropTable(const std::string& name, ErrorCode* err) { - return DeleteTable(name, err); + return DeleteTable(name, err); } bool ClientImpl::DeleteTable(const std::string& name, ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - - DeleteTableRequest request; - DeleteTableResponse response; - request.set_sequence_id(0); - request.set_table_name(name); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - string reason; - if (master_client.DeleteTable(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - return true; - } - } else { - reason = "rpc fail to delete table: " + name; - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); + master::MasterClient master_client(cluster_->MasterAddr()); + + DeleteTableRequest request; + DeleteTableResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(name); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + access_builder_->BuildRequest(&request); + + string reason; + if (master_client.DeleteTable(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; } - return false; + } else { + reason = "rpc fail to delete table: " + name; + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } bool ClientImpl::DisableTable(const std::string& name, ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - - DisableTableRequest request; - DisableTableResponse response; - request.set_sequence_id(0); - request.set_table_name(name); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - string reason; - if (master_client.DisableTable(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - return true; - } - LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); - } else { - reason = "rpc fail to disable table: " + name; - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); + master::MasterClient master_client(cluster_->MasterAddr()); + + DisableTableRequest request; + DisableTableResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(name); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + access_builder_->BuildRequest(&request); + + string reason; + if (master_client.DisableTable(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; } - return false; + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to disable table: " + name; + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } bool ClientImpl::EnableTable(const std::string& name, ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - EnableTableRequest request; - EnableTableResponse response; - request.set_sequence_id(0); - request.set_table_name(name); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - std::string reason; - if (master_client.EnableTable(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - return true; - } - LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); - } else { - reason = "rpc fail to enable table: " + name; - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); + master::MasterClient master_client(cluster_->MasterAddr()); + EnableTableRequest request; + EnableTableResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(name); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + access_builder_->BuildRequest(&request); + + std::string reason; + if (master_client.EnableTable(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; } - return false; + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to enable table: " + name; + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } -void ClientImpl::DoShowUser(OperateUserResponse& response, - std::vector& user_groups) { - if (!response.has_user_info()) { - return; - } - UserInfo user_info = response.user_info(); - user_groups.push_back(user_info.user_name()); - for (int i = 0; i < user_info.group_name_size(); ++i) { - user_groups.push_back(user_info.group_name(i)); - } +void ClientImpl::DoShowUser(OperateUserResponse& response, std::vector& user_groups) { + if (!response.has_user_info()) { + return; + } + UserInfo user_info = response.user_info(); + user_groups.push_back(user_info.user_name()); + for (int i = 0; i < user_info.group_name_size(); ++i) { + user_groups.push_back(user_info.group_name(i)); + } } bool ClientImpl::OperateUser(UserInfo& operated_user, UserOperateType type, std::vector& user_groups, ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - OperateUserRequest request; - OperateUserResponse response; - request.set_sequence_id(0); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - request.set_op_type(type); - UserInfo* user_info = request.mutable_user_info(); - user_info->CopyFrom(operated_user); - std::string reason; - if (master_client.OperateUser(&request, &response)) { - if (CheckReturnValue(response.status(), reason, err)) { - if (type == kShowUser) { - DoShowUser(response, user_groups); - } - return true; - } - LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); - } else { - reason = "rpc fail to operate user: " + operated_user.user_name(); - LOG(ERROR) << reason; - err->SetFailed(ErrorCode::kSystem, reason); + master::MasterClient master_client(cluster_->MasterAddr()); + OperateUserRequest request; + OperateUserResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + request.set_op_type(type); + UserInfo* user_info = request.mutable_user_info(); + user_info->CopyFrom(operated_user); + std::string reason; + if (master_client.OperateUser(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + if (type == kShowUser) { + DoShowUser(response, user_groups); + } + return true; } - return false; + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to operate user: " + operated_user.user_name(); + LOG(ERROR) << reason; + err->SetFailed(ErrorCode::kSystem, reason); + } + return false; } -bool ClientImpl::CreateUser(const std::string& user, - const std::string& password, ErrorCode* err) { - UserInfo created_user; - created_user.set_user_name(user); - created_user.set_token(GetUserToken(user, password)); - std::vector null; - return OperateUser(created_user, kCreateUser, null, err); +bool ClientImpl::CreateUser(const std::string& user, const std::string& password, ErrorCode* err) { + UserInfo created_user; + created_user.set_user_name(user); + created_user.set_token(GetUserToken(user, password)); + std::vector null; + return OperateUser(created_user, kCreateUser, null, err); } bool ClientImpl::DeleteUser(const std::string& user, ErrorCode* err) { - UserInfo deleted_user; - deleted_user.set_user_name(user); - std::vector null; - return OperateUser(deleted_user, kDeleteUser, null, err); + UserInfo deleted_user; + deleted_user.set_user_name(user); + std::vector null; + return OperateUser(deleted_user, kDeleteUser, null, err); } -bool ClientImpl::ChangePwd(const std::string& user, - const std::string& password, ErrorCode* err) { - UserInfo updated_user; - updated_user.set_user_name(user); - updated_user.set_token(GetUserToken(user, password)); - std::vector null; - return OperateUser(updated_user, kChangePwd, null, err); +bool ClientImpl::ChangePwd(const std::string& user, const std::string& password, ErrorCode* err) { + UserInfo updated_user; + updated_user.set_user_name(user); + updated_user.set_token(GetUserToken(user, password)); + std::vector null; + return OperateUser(updated_user, kChangePwd, null, err); } bool ClientImpl::ShowUser(const std::string& user, std::vector& user_groups, ErrorCode* err) { - UserInfo user_info; - user_info.set_user_name(user); - user_info.set_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - return OperateUser(user_info, kShowUser, user_groups, err); + UserInfo user_info; + user_info.set_user_name(user); + user_info.set_token(GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + return OperateUser(user_info, kShowUser, user_groups, err); } -bool ClientImpl::AddUserToGroup(const std::string& user_name, - const std::string& group_name, ErrorCode* err) { - UserInfo user; - user.set_user_name(user_name); - user.add_group_name(group_name); - std::vector null; - return OperateUser(user, kAddToGroup, null, err); +bool ClientImpl::AddUserToGroup(const std::string& user_name, const std::string& group_name, + ErrorCode* err) { + UserInfo user; + user.set_user_name(user_name); + user.add_group_name(group_name); + std::vector null; + return OperateUser(user, kAddToGroup, null, err); } -bool ClientImpl::DeleteUserFromGroup(const std::string& user_name, - const std::string& group_name, ErrorCode* err) { - UserInfo user; - user.set_user_name(user_name); - user.add_group_name(group_name); - std::vector null; - return OperateUser(user, kDeleteFromGroup, null, err); +bool ClientImpl::DeleteUserFromGroup(const std::string& user_name, const std::string& group_name, + ErrorCode* err) { + UserInfo user; + user.set_user_name(user_name); + user.add_group_name(group_name); + std::vector null; + return OperateUser(user, kDeleteFromGroup, null, err); } -Table* ClientImpl::OpenTable(const std::string& table_name, - ErrorCode* err) { - open_table_mutex_.Lock(); - TableHandle& th = open_table_map_[table_name]; - std::shared_ptr table_impl; - - bool is_existed = false; - { - MutexLock l(&th.mu); - open_table_mutex_.Unlock(); - table_impl = th.handle.lock(); - if (table_impl) { - is_existed = true; - } - - if (!is_existed) { - // open a new table - VLOG(10) << "open a new table: " << table_name; - table_impl = OpenTableInternal(table_name, &th.err); - th.handle = table_impl; - } +bool ClientImpl::UpdateUgi(const std::string& user_name, const std::string& passwd, + ErrorCode* err) { + UpdateUgiRequest request; + UpdateUgiResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + UgiInfo* ugi_info = update_auth_info->mutable_ugi_info(); + ugi_info->set_user_name(user_name); + ugi_info->set_passwd(passwd); + update_auth_info->set_update_type(UpdateAuthType::kUpdateUgi); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateUgi(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } + return false; + } + return true; +} - if (err) { - *err = th.err; +bool ClientImpl::DelUgi(const std::string& user_name, ErrorCode* err) { + UpdateUgiRequest request; + UpdateUgiResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + UgiInfo* ugi_info = update_auth_info->mutable_ugi_info(); + ugi_info->set_user_name(user_name); + update_auth_info->set_update_type(UpdateAuthType::kDelUgi); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateUgi(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - if (!table_impl) { - VLOG(10) << "open null table: " << table_name; - MutexLock l(&open_table_mutex_); - open_table_map_.erase(table_name); - return NULL; - } - - return new TableWrapper(table_impl); + return false; + } + return true; } -std::shared_ptr ClientImpl::OpenTableInternal(const std::string& table_name, - ErrorCode* err) { - std::shared_ptr table(new TableImpl(table_name, thread_pool_, shared_from_this())); - if (!table) { - std::string reason = "fail to new TableImpl"; - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, reason); - } - LOG(ERROR) << reason; - return std::shared_ptr(); - } else if (FLAGS_tera_sdk_mock_enable) { - return table; +bool ClientImpl::ShowUgi(UserVerificationInfoList* user_verification_info_list, ErrorCode* err) { + ShowUgiRequest request; + ShowUgiResponse response; + + request.set_sequence_id(g_sequence_id++); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.ShowUgi(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - if (!table->OpenInternal(err)) { - return std::shared_ptr(); + return false; + } + user_verification_info_list->clear(); + ssize_t ugi_meta_infos_size = response.ugi_meta_infos_size(); + for (ssize_t i = 0; i < ugi_meta_infos_size; ++i) { + const UgiMetaInfo& ugi_meta_info = response.ugi_meta_infos(i); + const std::string& user_name = ugi_meta_info.user_name(); + const std::string& passwd = ugi_meta_info.passwd(); + VerificationInfo& verification_info = (*user_verification_info_list)[user_name]; + verification_info.first = passwd; + ssize_t roles_size = ugi_meta_info.roles_size(); + for (ssize_t roles_index = 0; roles_index < roles_size; ++roles_index) { + verification_info.second.emplace(ugi_meta_info.roles(roles_index)); } - return table; + } + return true; } -bool ClientImpl::GetTabletLocation(const string& table_name, - std::vector* tablets, - ErrorCode* err) { - TableMeta table_meta; - TabletMetaList tablet_list; +bool ClientImpl::AddRole(const std::string& role_name, ErrorCode* err) { + UpdateAuthRequest request; + UpdateAuthResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + RoleInfo* role_info = update_auth_info->mutable_role_info(); + role_info->set_role(role_name); + update_auth_info->set_update_type(UpdateAuthType::kAddRole); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateAuth(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } + return false; + } + + return true; +} - if (!ShowTablesInfo(table_name, &table_meta, &tablet_list, err)) { - LOG(ERROR) << "table not exist: " << table_name; - return false; +bool ClientImpl::DelRole(const std::string& role_name, ErrorCode* err) { + UpdateAuthRequest request; + UpdateAuthResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + RoleInfo* role_info = update_auth_info->mutable_role_info(); + role_info->set_role(role_name); + update_auth_info->set_update_type(UpdateAuthType::kDelRole); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateAuth(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } + return false; + } + return true; +} - for (int i = 0; i < tablet_list.meta_size(); ++i) { - ParseTabletEntry(tablet_list.meta(i), tablets); +bool ClientImpl::GrantRole(const std::string& role_name, const std::string& user_name, + ErrorCode* err) { + UpdateAuthRequest request; + UpdateAuthResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + AuthorityInfo* authority_info = update_auth_info->mutable_authority_info(); + authority_info->set_user_name(user_name); + authority_info->set_role(role_name); + update_auth_info->set_update_type(UpdateAuthType::kGrantRole); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateAuth(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - return true; + return false; + } + return true; } -TableDescriptor* ClientImpl::GetTableDescriptor(const string& table_name, - ErrorCode* err) { - std::vector table_list; - ListInternal(&table_list, NULL, table_name, "", 1, 0, err); - if (table_list.size() > 0 - && table_list[0].table_desc->TableName() == table_name) { - return table_list[0].table_desc; +bool ClientImpl::RevokeRole(const std::string& role_name, const std::string& user_name, + ErrorCode* err) { + UpdateAuthRequest request; + UpdateAuthResponse response; + + request.set_sequence_id(g_sequence_id++); + UpdateAuthInfo* update_auth_info = request.mutable_update_info(); + AuthorityInfo* authority_info = update_auth_info->mutable_authority_info(); + authority_info->set_user_name(user_name); + authority_info->set_role(role_name); + update_auth_info->set_update_type(UpdateAuthType::kRevokeRole); + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.UpdateAuth(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - return NULL; + return false; + } + return true; } -bool ClientImpl::List(std::vector* table_list, ErrorCode* err) { - std::vector tablet_list; - return ListInternal(table_list, &tablet_list, "", "", - FLAGS_tera_sdk_show_max_num, - 0, err); -} - -bool ClientImpl::ShowTableSchema(const string& name, TableSchema* schema, - ErrorCode* err) { - tabletnode::TabletNodeClient meta_client(thread_pool_, cluster_->RootTableAddr(true)); - ScanTabletRequest request; - ScanTabletResponse response; - request.set_sequence_id(0); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(""); - request.set_end("@~"); - if (!meta_client.ScanTablet(&request, &response) - || response.status() != kTabletNodeOk) { - LOG(ERROR) << "fail to scan meta: " << StatusCodeToString(response.status()); - err->SetFailed(ErrorCode::kSystem, "system error"); - return false; +bool ClientImpl::ShowRole(std::vector* roles_list, ErrorCode* err) { + ShowAuthRequest request; + ShowAuthResponse response; + + request.set_sequence_id(g_sequence_id++); + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.ShowAuth(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - int32_t table_size = response.results().key_values_size(); - for (int32_t i = 0; i < table_size; i++) { - const KeyValuePair& record = response.results().key_values(i); - const string& key = record.key(); - const string& value = record.value(); - if (key[0] == '@') { - TableMeta meta; - ParseMetaTableKeyValue(key, value, &meta); - if (meta.schema().name() == name - || meta.schema().alias() == name) { - *schema = meta.schema(); - return true; - } - } else if (key[0] > '@') { - break; - } else { - continue; - } + return false; + } + + int role_infos_size = response.role_infos_size(); + roles_list->clear(); + roles_list->reserve(role_infos_size); + for (int role_infos_index = 0; role_infos_index < role_infos_size; ++role_infos_index) { + roles_list->emplace_back(response.role_infos(role_infos_index).role()); + } + + return true; +} + +bool ClientImpl::SetAuthPolicy(const std::string& table_name, const std::string& auth_policy, + ErrorCode* err) { + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + SetAuthPolicyRequest request; + SetAuthPolicyResponse response; + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + TableAuthPolicyInfo* table_auth_policy_info = request.mutable_table_auth_policy_info(); + table_auth_policy_info->set_table_name(table_name); + AuthPolicyType auth_policy_type; + + std::string reason; + if (!auth::AccessUtils::GetAuthPolicyType(auth_policy, &auth_policy_type)) { + LOG(ERROR) << "Set wrong auth policy[" << auth_policy << "]"; + reason = "wrong input auth policy[" + auth_policy + "]"; + if (NULL != err) { + err->SetFailed(ErrorCode::kBadParam, reason); } return false; + } + table_auth_policy_info->set_auth_policy_type(auth_policy_type); + if (master_client.SetAuthPolicy(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; + } + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to set auth policy"; + LOG(ERROR) << reason; + if (NULL != err) { + err->SetFailed(ErrorCode::kSystem, reason); + } + } + return false; } -// show exactly one table -bool ClientImpl::ShowTablesInfo(const string& name, - TableMeta* meta, - TabletMetaList* tablet_list, +bool ClientImpl::ShowAuthPolicy(std::map* table_auth_policy_list, ErrorCode* err) { - TableMetaList table_list; - bool result = DoShowTablesInfo(&table_list, tablet_list, name, - false, err); - if ((table_list.meta_size() == 0) - || (table_list.meta(0).table_name() != name)) { - return false; + ShowAuthPolicyRequest request; + ShowAuthPolicyResponse response; + + // TODO: doesn't need access verify at first + // access_builder_->BuildRequest(&request); + + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + if (!master_client.ShowAuthPolicy(&request, &response)) { + if (NULL != err) { + std::string reason; + CheckReturnValue(response.status(), reason, err); + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); } - if (result) { - meta->CopyFrom(table_list.meta(0)); + return false; + } + + ssize_t table_auth_policy_infos_size = response.table_auth_policy_infos_size(); + table_auth_policy_list->clear(); + for (ssize_t i = 0; i < table_auth_policy_infos_size; ++i) { + const TableAuthPolicyInfo& table_auth_policy_info = response.table_auth_policy_infos(i); + std::string auth_policy; + if (!auth::AccessUtils::GetAuthPolicy(table_auth_policy_info.auth_policy_type(), + &auth_policy)) { + LOG(ERROR) << "Wrong auth policy[" << auth_policy << "]"; + continue; } - return result; + table_auth_policy_list->emplace(table_auth_policy_info.table_name(), std::move(auth_policy)); + } + return true; } -bool ClientImpl::ShowTablesInfo(TableMetaList* table_list, - TabletMetaList* tablet_list, - bool is_brief, - ErrorCode* err) { - return DoShowTablesInfo(table_list, tablet_list, "", is_brief, err); +bool ClientImpl::SetQuota(const TableQuota& table_quota, ErrorCode* err) { + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + SetQuotaRequest request; + SetQuotaResponse response; + request.set_sequence_id(g_sequence_id++); + request.mutable_table_quota()->CopyFrom(table_quota); + std::string reason; + if (master_client.SetQuota(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + return true; + } + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to update quota"; + LOG(ERROR) << reason; + if (NULL != err) { + err->SetFailed(ErrorCode::kSystem, reason); + } + } + return false; } -bool ClientImpl::DoShowTablesInfo(TableMetaList* table_list, - TabletMetaList* tablet_list, - const string& table_name, - bool is_brief, - ErrorCode* err) { - if (table_list == NULL || tablet_list == NULL) { - return false; +bool ClientImpl::ShowQuotaBrief(std::vector* table_quota_list, ErrorCode* err) { + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + ShowQuotaRequest request; + ShowQuotaResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_brief_show(true); + std::string reason; + if (master_client.ShowQuota(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + int table_quota_list_size = response.table_quota_list_size(); + for (int table_quota_list_index = 0; table_quota_list_index < table_quota_list_size; + ++table_quota_list_index) { + table_quota_list->emplace_back(response.table_quota_list(table_quota_list_index)); + } + return true; } - table_list->Clear(); - tablet_list->Clear(); - - master::MasterClient master_client(cluster_->MasterAddr()); - std::string start_tablet_key; - std::string start_table_name = table_name; // maybe a empty string - bool has_more = true; - bool has_error = false; - bool table_meta_copied = false; - std::string err_msg; - while(has_more && !has_error) { - VLOG(7) << "round more " << has_more << ", " << DebugString(start_tablet_key); - ShowTablesRequest request; - ShowTablesResponse response; - if (!table_name.empty()) { - request.set_max_table_num(1); - } - request.set_start_table_name(start_table_name); - request.set_start_tablet_key(start_tablet_key); - request.set_max_tablet_num(FLAGS_tera_sdk_show_max_num); //tablets be fetched at most in one RPC - request.set_sequence_id(0); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - request.set_all_brief(is_brief); - - if (master_client.ShowTables(&request, &response) && - response.status() == kMasterOk) { - if (response.all_brief()) { - // show all table brief - table_list->CopyFrom(response.table_meta_list()); - return true; - } - - if (response.table_meta_list().meta_size() == 0) { - has_error = true; - err_msg = StatusCodeToString(response.status()); - break; - } - if (!table_meta_copied) { - table_list->CopyFrom(response.table_meta_list()); - table_meta_copied = true; - } - if (response.tablet_meta_list().meta_size() == 0) { - has_more = false; - } - for(int i = 0; i < response.tablet_meta_list().meta_size(); i++){ - const std::string& table_name = response.tablet_meta_list().meta(i).table_name(); - const std::string& tablet_key = response.tablet_meta_list().meta(i).key_range().key_start(); - // compatible to old master - if (table_name > start_table_name - || (table_name == start_table_name && tablet_key >= start_tablet_key)) { - tablet_list->add_meta()->CopyFrom(response.tablet_meta_list().meta(i)); - tablet_list->add_counter()->CopyFrom(response.tablet_meta_list().counter(i)); - // old tera master will not return timestamp #963 - if (response.tablet_meta_list().timestamp_size() > 0) { - tablet_list->add_timestamp(response.tablet_meta_list().timestamp(i)); - } - } - if (i == response.tablet_meta_list().meta_size() - 1 ) { - std::string prev_table_name = start_table_name; - start_table_name = response.tablet_meta_list().meta(i).table_name(); - std::string last_key = response.tablet_meta_list().meta(i).key_range().key_start(); - if (prev_table_name > start_table_name - || (prev_table_name == start_table_name && last_key <= start_tablet_key)) { - LOG(WARNING) << "the master has older version, pre_table " << prev_table_name - << ", start_table " << start_table_name << ", last_key " << DebugString(last_key) - << ", start_key " << DebugString(start_tablet_key); - has_more = false; - break; - } - start_tablet_key = last_key; - } - } - start_tablet_key.append(1,'\0'); // fetch next tablet - } else { - if (response.status() != kMasterOk && - response.status() != kTableNotFound) { - has_error = true; - err_msg = StatusCodeToString(response.status()); - } - has_more = false; - } - VLOG(16) << "fetch meta table name: " << start_table_name - << " tablet size: " << response.tablet_meta_list().meta_size() - << " next start: " << DebugString(start_tablet_key); - }; - - if (has_error) { - LOG(ERROR) << "fail to show table info."; - if (err != NULL) { - err->SetFailed(ErrorCode::kSystem, err_msg); - } - return false; + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to show quota"; + LOG(ERROR) << reason; + if (NULL != err) { + err->SetFailed(ErrorCode::kSystem, reason); } - return true; + } + return false; } -bool ClientImpl::ShowTabletNodesInfo(const string& addr, - TabletNodeInfo* info, - TabletMetaList* tablet_list, - ErrorCode* err) { - if (info == NULL || tablet_list == NULL) { - return false; +bool ClientImpl::ShowQuotaDetail(std::vector* ts_quota_list, ErrorCode* err) { + master::MasterMultiTenancyClient master_client(cluster_->MasterAddr()); + ShowQuotaRequest request; + ShowQuotaResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_brief_show(false); + std::string reason; + if (master_client.ShowQuota(&request, &response)) { + if (CheckReturnValue(response.status(), reason, err)) { + int ts_quota_list_size = response.ts_quota_list_size(); + for (int ts_quota_list_index = 0; ts_quota_list_index < ts_quota_list_size; + ++ts_quota_list_index) { + ts_quota_list->emplace_back(response.ts_quota_list(ts_quota_list_index)); + } + return true; } - info->Clear(); - tablet_list->Clear(); - - master::MasterClient master_client(cluster_->MasterAddr()); - - ShowTabletNodesRequest request; - ShowTabletNodesResponse response; - request.set_sequence_id(0); - request.set_addr(addr); - request.set_is_showall(false); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - if (master_client.ShowTabletNodes(&request, &response) && - response.status() == kMasterOk) { - if (response.tabletnode_info_size() == 0) { - return false; - } - info->CopyFrom(response.tabletnode_info(0)); - tablet_list->CopyFrom(response.tabletmeta_list()); - return true; + LOG(ERROR) << reason << "| status: " << StatusCodeToString(response.status()); + } else { + reason = "rpc fail to show quota"; + LOG(ERROR) << reason; + if (NULL != err) { + err->SetFailed(ErrorCode::kSystem, reason); } - LOG(ERROR) << "fail to show tabletnode info: " << addr; - err->SetFailed(ErrorCode::kSystem, StatusCodeToString(response.status())); - return false; + } + return false; } -bool ClientImpl::ShowTabletNodesInfo(std::vector* infos, - ErrorCode* err) { - if (infos == NULL) { - return false; +Table* ClientImpl::OpenTable(const std::string& table_name, ErrorCode* err) { + return OpenTable(table_name, std::function{}, err); +} + +Table* ClientImpl::OpenTable(const std::string& table_name, + std::function hash_method, + ErrorCode* err) { + open_table_mutex_.Lock(); + TableHandle& th = open_table_map_[table_name]; + std::shared_ptr table_impl; + + bool is_existed = false; + { + MutexLock l(&th.mu); + open_table_mutex_.Unlock(); + table_impl = th.handle.lock(); + if (table_impl) { + is_existed = true; } - infos->clear(); - master::MasterClient master_client(cluster_->MasterAddr()); + if (!is_existed) { + // open a new table + VLOG(10) << "open a new table: " << table_name; + table_impl = OpenTableInternal(table_name, hash_method, &th.err); + th.handle = table_impl; + } + } + + if (err) { + *err = th.err; + } + if (!table_impl) { + VLOG(10) << "open null table: " << table_name; + MutexLock l(&open_table_mutex_); + open_table_map_.erase(table_name); + return NULL; + } - ShowTabletNodesRequest request; - ShowTabletNodesResponse response; - request.set_sequence_id(0); - request.set_is_showall(true); + return new TableWrapper(table_impl); +} - if (master_client.ShowTabletNodes(&request, &response) && - response.status() == kMasterOk) { - if (response.tabletnode_info_size() == 0) { - return true; - } - for (int i = 0; i < response.tabletnode_info_size(); ++i) { - infos->push_back(response.tabletnode_info(i)); - } - return true; +std::shared_ptr ClientImpl::OpenTableInternal( + const std::string& table_name, std::function hash_method, + ErrorCode* err) { + std::shared_ptr table(new TableImpl(table_name, thread_pool_, shared_from_this())); + if (!table) { + std::string reason = "fail to new TableImpl"; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, reason); } - LOG(ERROR) << "fail to show tabletnode info"; - err->SetFailed(ErrorCode::kSystem, StatusCodeToString(response.status())); + LOG(ERROR) << reason; + return std::shared_ptr(); + } else if (FLAGS_tera_sdk_mock_enable) { + return table; + } + if (!table->OpenInternal(hash_method, err)) { + return std::shared_ptr(); + } + return table; +} + +bool ClientImpl::GetTabletLocation(const string& table_name, std::vector* tablets, + ErrorCode* err) { + TableMeta table_meta; + TabletMetaList tablet_list; + + if (!ShowTablesInfo(table_name, &table_meta, &tablet_list, err)) { + LOG(ERROR) << "table not exist: " << table_name; return false; + } + + for (int i = 0; i < tablet_list.meta_size(); ++i) { + ParseTabletEntry(tablet_list.meta(i), tablets); + } + return true; } -bool ClientImpl::List(const string& table_name, TableInfo* table_info, - std::vector* tablet_list, ErrorCode* err) { - std::vector table_list; - bool ret = ListInternal(&table_list, tablet_list, table_name, "", 1, - FLAGS_tera_sdk_show_max_num, err); - if (table_list.size() > 0 - && table_list[0].table_desc->TableName() == table_name) { - *table_info = table_list[0]; - } - return ret; +TableDescriptor* ClientImpl::GetTableDescriptor(const string& table_name, ErrorCode* err) { + std::vector table_list; + ListInternal(&table_list, NULL, table_name, "", 1, 0, err); + if (table_list.size() > 0 && table_list[0].table_desc->TableName() == table_name) { + return table_list[0].table_desc; + } + return NULL; } -bool ClientImpl::IsTableExist(const string& table_name, ErrorCode* err) { - std::vector table_list; - ListInternal(&table_list, NULL, table_name, "", 1, 0, err); - if (table_list.size() > 0 - && table_list[0].table_desc->TableName() == table_name) { +bool ClientImpl::List(std::vector* table_list, ErrorCode* err) { + std::vector tablet_list; + return ListInternal(table_list, &tablet_list, "", "", FLAGS_tera_sdk_show_max_num, 0, err); +} + +bool ClientImpl::ShowTableSchema(const string& name, TableSchema* schema, ErrorCode* err) { + tabletnode::TabletNodeClient meta_client(thread_pool_, cluster_->RootTableAddr(true)); + ScanTabletRequest request; + ScanTabletResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(""); + request.set_end("@~"); + access_builder_->BuildRequest(&request); + if (!meta_client.ScanTablet(&request, &response) || response.status() != kTabletNodeOk) { + LOG(ERROR) << "fail to scan meta: " << StatusCodeToString(response.status()); + err->SetFailed(ErrorCode::kSystem, "system error"); + return false; + } + int32_t table_size = response.results().key_values_size(); + for (int32_t i = 0; i < table_size; i++) { + const KeyValuePair& record = response.results().key_values(i); + const string& key = record.key(); + const string& value = record.value(); + if (key[0] == '@') { + TableMeta meta; + ParseMetaTableKeyValue(key, value, &meta); + if (meta.schema().name() == name || meta.schema().alias() == name) { + *schema = meta.schema(); return true; + } + } else if (key[0] > '@') { + break; + } else { + continue; } + } + return false; +} + +// show exactly one table +bool ClientImpl::ShowTablesInfo(const string& name, TableMeta* meta, TabletMetaList* tablet_list, + ErrorCode* err) { + TableMetaList table_list; + bool result = DoShowTablesInfo(&table_list, tablet_list, name, false, err); + if ((table_list.meta_size() == 0) || (table_list.meta(0).table_name() != name)) { return false; + } + if (result) { + meta->CopyFrom(table_list.meta(0)); + } + return result; } -bool ClientImpl::IsTableEnabled(const string& table_name, ErrorCode* err) { - std::vector table_list; - ListInternal(&table_list, NULL, table_name, "", 1, 0, err); - if (table_list.size() > 0 - && table_list[0].table_desc->TableName() == table_name) { - if (table_list[0].status == "kTableEnable") { - return true; - } else { - return false; +bool ClientImpl::ShowTablesInfo(TableMetaList* table_list, TabletMetaList* tablet_list, + bool is_brief, ErrorCode* err) { + return DoShowTablesInfo(table_list, tablet_list, "", is_brief, err); +} + +bool ClientImpl::DoShowTablesInfo(TableMetaList* table_list, TabletMetaList* tablet_list, + const string& table_name, bool is_brief, ErrorCode* err) { + if (table_list == NULL || tablet_list == NULL) { + return false; + } + table_list->Clear(); + tablet_list->Clear(); + + master::MasterClient master_client(cluster_->MasterAddr()); + std::string start_tablet_key; + std::string start_table_name = table_name; // maybe a empty string + bool has_more = true; + bool has_error = false; + bool table_meta_copied = false; + std::string err_msg; + while (has_more && !has_error) { + VLOG(7) << "round more " << has_more << ", " << DebugString(start_tablet_key); + ShowTablesRequest request; + ShowTablesResponse response; + if (!table_name.empty()) { + request.set_max_table_num(1); + } + request.set_start_table_name(start_table_name); + request.set_start_tablet_key(start_tablet_key); + request.set_max_tablet_num( + FLAGS_tera_sdk_show_max_num); // tablets be fetched at most in one RPC + request.set_sequence_id(g_sequence_id++); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + request.set_all_brief(is_brief); + + if (master_client.ShowTables(&request, &response) && response.status() == kMasterOk) { + if (response.all_brief()) { + // show all table brief + table_list->CopyFrom(response.table_meta_list()); + return true; + } + + if (response.table_meta_list().meta_size() == 0) { + has_error = true; + err_msg = StatusCodeToString(response.status()); + break; + } + if (!table_meta_copied) { + table_list->CopyFrom(response.table_meta_list()); + table_meta_copied = true; + } + if (response.tablet_meta_list().meta_size() == 0) { + has_more = false; + } + for (int i = 0; i < response.tablet_meta_list().meta_size(); i++) { + const std::string& table_name = response.tablet_meta_list().meta(i).table_name(); + const std::string& tablet_key = response.tablet_meta_list().meta(i).key_range().key_start(); + // compatible to old master + if (table_name > start_table_name || + (table_name == start_table_name && tablet_key >= start_tablet_key)) { + tablet_list->add_meta()->CopyFrom(response.tablet_meta_list().meta(i)); + tablet_list->add_counter()->CopyFrom(response.tablet_meta_list().counter(i)); + // old tera master will not return timestamp #963 + if (response.tablet_meta_list().timestamp_size() > 0) { + tablet_list->add_timestamp(response.tablet_meta_list().timestamp(i)); + } } + if (i == response.tablet_meta_list().meta_size() - 1) { + std::string prev_table_name = start_table_name; + start_table_name = response.tablet_meta_list().meta(i).table_name(); + std::string last_key = response.tablet_meta_list().meta(i).key_range().key_start(); + if (prev_table_name > start_table_name || + (prev_table_name == start_table_name && last_key <= start_tablet_key)) { + LOG(WARNING) << "the master has older version, pre_table " << prev_table_name + << ", start_table " << start_table_name << ", last_key " + << DebugString(last_key) << ", start_key " + << DebugString(start_tablet_key); + has_more = false; + break; + } + start_tablet_key = last_key; + } + } + start_tablet_key.append(1, '\0'); // fetch next tablet } else { - LOG(ERROR) << "table not exist: " << table_name; + if (response.status() != kMasterOk && response.status() != kTableNotFound) { + has_error = true; + err_msg = StatusCodeToString(response.status()); + } + has_more = false; + } + VLOG(16) << "fetch meta table name: " << start_table_name + << " tablet size: " << response.tablet_meta_list().meta_size() + << " next start: " << DebugString(start_tablet_key); + }; + + if (has_error) { + LOG(ERROR) << "fail to show table info."; + if (err != NULL) { + err->SetFailed(ErrorCode::kSystem, err_msg); } return false; + } + return true; } -bool ClientImpl::IsTableEmpty(const string& table_name, ErrorCode* err) { - std::vector table_list; - std::vector tablet_list; - ListInternal(&table_list, &tablet_list, table_name, "", 1, - FLAGS_tera_sdk_show_max_num, err); - if (table_list.size() > 0 - && table_list[0].table_desc->TableName() == table_name) { - if (tablet_list.size() == 0 - || (tablet_list.size() == 1 && tablet_list[0].data_size <= 0)) { - return true; - } - return false; +bool ClientImpl::ShowTabletNodesInfo(const string& addr, TabletNodeInfo* info, + TabletMetaList* tablet_list, ErrorCode* err) { + if (info == NULL || tablet_list == NULL) { + return false; + } + info->Clear(); + tablet_list->Clear(); + + master::MasterClient master_client(cluster_->MasterAddr()); + + ShowTabletNodesRequest request; + ShowTabletNodesResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_addr(addr); + request.set_is_showall(false); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + if (master_client.ShowTabletNodes(&request, &response) && response.status() == kMasterOk) { + if (response.tabletnode_info_size() == 0) { + return false; } - LOG(ERROR) << "table not exist: " << table_name; + info->CopyFrom(response.tabletnode_info(0)); + tablet_list->CopyFrom(response.tabletmeta_list()); return true; + } + LOG(ERROR) << "fail to show tabletnode info: " << addr; + err->SetFailed(ErrorCode::kSystem, StatusCodeToString(response.status())); + return false; } -bool ClientImpl::CmdCtrl(const string& command, - const std::vector& arg_list, - bool* bool_result, - string* str_result, - ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - - CmdCtrlRequest request; - CmdCtrlResponse response; - request.set_sequence_id(0); - request.set_command(command); - std::vector::const_iterator it = arg_list.begin(); - for (; it != arg_list.end(); ++it) { - request.add_arg_list(*it); - } +bool ClientImpl::ShowTabletNodesInfo(std::vector* infos, ErrorCode* err) { + if (infos == NULL) { + return false; + } + infos->clear(); - if (!master_client.CmdCtrl(&request, &response) - || response.status() != kMasterOk) { - LOG(ERROR) << "fail to run cmd: " << command; - err->SetFailed(ErrorCode::kBadParam); - return false; - } - if (bool_result != NULL && response.has_bool_result()) { - *bool_result = response.bool_result(); + master::MasterClient master_client(cluster_->MasterAddr()); + + ShowTabletNodesRequest request; + ShowTabletNodesResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_is_showall(true); + + if (master_client.ShowTabletNodes(&request, &response) && response.status() == kMasterOk) { + if (response.tabletnode_info_size() == 0) { + return true; } - if (str_result != NULL && response.has_str_result()) { - *str_result = response.str_result(); + for (int i = 0; i < response.tabletnode_info_size(); ++i) { + infos->push_back(response.tabletnode_info(i)); } return true; + } + LOG(ERROR) << "fail to show tabletnode info"; + err->SetFailed(ErrorCode::kSystem, StatusCodeToString(response.status())); + return false; } -bool ClientImpl::ListInternal(std::vector* table_list, - std::vector* tablet_list, - const string& start_table_name, - const string& start_tablet_key, - uint32_t max_table_found, - uint32_t max_tablet_found, - ErrorCode* err) { - master::MasterClient master_client(cluster_->MasterAddr()); - - uint64_t sequence_id = 0; - ShowTablesRequest request; - ShowTablesResponse response; - request.set_sequence_id(sequence_id); - request.set_max_table_num(max_table_found); - request.set_max_tablet_num(max_tablet_found); - request.set_start_table_name(start_table_name); - request.set_start_tablet_key(start_tablet_key); - request.set_user_token(GetUserToken(client_options_.user_identity, - client_options_.user_passcode)); - - bool is_more = true; - while (is_more) { - if (!master_client.ShowTables(&request, &response) - || response.status() != kMasterOk) { - LOG(ERROR) << "fail to show tables from table: " - << request.start_table_name() << ", key: " - << request.start_tablet_key() << ", status: " - << StatusCodeToString(response.status()); - err->SetFailed(ErrorCode::kSystem); - return false; - } +bool ClientImpl::List(const string& table_name, TableInfo* table_info, + std::vector* tablet_list, ErrorCode* err) { + std::vector table_list; + bool ret = + ListInternal(&table_list, tablet_list, table_name, "", 1, FLAGS_tera_sdk_show_max_num, err); + if (table_list.size() > 0 && table_list[0].table_desc->TableName() == table_name) { + *table_info = table_list[0]; + } + return ret; +} - const tera::TableMetaList& table_meta_list = response.table_meta_list(); - const tera::TabletMetaList& tablet_meta_list = response.tablet_meta_list(); - for (int32_t i = 0; i < table_meta_list.meta_size(); ++i) { - const TableMeta& meta = table_meta_list.meta(i); - ParseTableEntry(meta, table_list); - } - for (int32_t i = 0; i < tablet_meta_list.meta_size(); ++i) { - const TabletMeta& meta = tablet_meta_list.meta(i); - ParseTabletEntry(meta, tablet_list); - } - if (!response.has_is_more() || !response.is_more()) { - is_more = false; - } else { - if (tablet_meta_list.meta_size() == 0) { //argument @max_tablet_found maybe zero - break; - } - const tera::TabletMeta& meta = tablet_meta_list.meta(tablet_meta_list.meta_size()-1); - const string& last_key = meta.key_range().key_start(); - request.set_start_table_name(meta.table_name()); - request.set_start_tablet_key(tera::NextKey(last_key)); - request.set_sequence_id(sequence_id++); - } +bool ClientImpl::IsTableExist(const string& table_name, ErrorCode* err) { + std::vector table_list; + ListInternal(&table_list, NULL, table_name, "", 1, 0, err); + if (table_list.size() > 0 && table_list[0].table_desc->TableName() == table_name) { + return true; + } + return false; +} + +bool ClientImpl::IsTableEnabled(const string& table_name, ErrorCode* err) { + std::vector table_list; + ListInternal(&table_list, NULL, table_name, "", 1, 0, err); + if (table_list.size() > 0 && table_list[0].table_desc->TableName() == table_name) { + if (table_list[0].status == "kTableEnable") { + return true; + } else { + return false; } + } else { + LOG(ERROR) << "table not exist: " << table_name; + } + return false; +} - return true; +bool ClientImpl::IsTableEmpty(const string& table_name, ErrorCode* err) { + std::vector table_list; + std::vector tablet_list; + ListInternal(&table_list, &tablet_list, table_name, "", 1, FLAGS_tera_sdk_show_max_num, err); + if (table_list.size() > 0 && table_list[0].table_desc->TableName() == table_name) { + if (tablet_list.size() == 0 || (tablet_list.size() == 1 && tablet_list[0].data_size <= 0)) { + return true; + } + return false; + } + LOG(ERROR) << "table not exist: " << table_name; + return true; } -bool ClientImpl::ParseTableEntry(const TableMeta meta, std::vector* table_list) { - if (table_list == NULL) { - return true; +bool ClientImpl::CmdCtrl(const string& command, const std::vector& arg_list, + bool* bool_result, string* str_result, ErrorCode* err) { + master::MasterClient master_client(cluster_->MasterAddr()); + + CmdCtrlRequest request; + CmdCtrlResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_command(command); + std::vector::const_iterator it = arg_list.begin(); + for (; it != arg_list.end(); ++it) { + request.add_arg_list(*it); + } + + if (!master_client.CmdCtrl(&request, &response) || response.status() != kMasterOk) { + LOG(ERROR) << "fail to run cmd: " << command; + err->SetFailed(ErrorCode::kBadParam); + return false; + } + if (bool_result != NULL && response.has_bool_result()) { + *bool_result = response.bool_result(); + } + if (str_result != NULL && response.has_str_result()) { + *str_result = response.str_result(); + } + return true; +} + +bool ClientImpl::ListInternal(std::vector* table_list, + std::vector* tablet_list, const string& start_table_name, + const string& start_tablet_key, uint32_t max_table_found, + uint32_t max_tablet_found, ErrorCode* err) { + master::MasterClient master_client(cluster_->MasterAddr()); + + ShowTablesRequest request; + ShowTablesResponse response; + request.set_sequence_id(g_sequence_id++); + request.set_max_table_num(max_table_found); + request.set_max_tablet_num(max_tablet_found); + request.set_start_table_name(start_table_name); + request.set_start_tablet_key(start_tablet_key); + request.set_user_token( + GetUserToken(client_options_.user_identity, client_options_.user_passcode)); + + bool is_more = true; + while (is_more) { + if (!master_client.ShowTables(&request, &response) || response.status() != kMasterOk) { + LOG(ERROR) << "fail to show tables from table: " << request.start_table_name() + << ", key: " << request.start_tablet_key() + << ", status: " << StatusCodeToString(response.status()); + err->SetFailed(ErrorCode::kSystem); + return false; + } + + const tera::TableMetaList& table_meta_list = response.table_meta_list(); + const tera::TabletMetaList& tablet_meta_list = response.tablet_meta_list(); + for (int32_t i = 0; i < table_meta_list.meta_size(); ++i) { + const TableMeta& meta = table_meta_list.meta(i); + ParseTableEntry(meta, table_list); + } + for (int32_t i = 0; i < tablet_meta_list.meta_size(); ++i) { + const TabletMeta& meta = tablet_meta_list.meta(i); + ParseTabletEntry(meta, tablet_list); + } + if (!response.has_is_more() || !response.is_more()) { + is_more = false; + } else { + if (tablet_meta_list.meta_size() == 0) { // argument @max_tablet_found maybe zero + break; + } + const tera::TabletMeta& meta = tablet_meta_list.meta(tablet_meta_list.meta_size() - 1); + const string& last_key = meta.key_range().key_start(); + request.set_start_table_name(meta.table_name()); + request.set_start_tablet_key(tera::NextKey(last_key)); + request.set_sequence_id(g_sequence_id++); } - TableInfo table_info; - const TableSchema& schema = meta.schema(); - table_info.table_desc = new TableDescriptor(schema.name()); + } - TableSchemaToDesc(schema, table_info.table_desc); + return true; +} - table_info.status = StatusCodeToString(meta.status()); - table_list->push_back(table_info); +bool ClientImpl::ParseTableEntry(const TableMeta meta, std::vector* table_list) { + if (table_list == NULL) { return true; + } + TableInfo table_info; + const TableSchema& schema = meta.schema(); + table_info.table_desc = new TableDescriptor(schema.name()); + + TableSchemaToDesc(schema, table_info.table_desc); + + table_info.status = StatusCodeToString(meta.status()); + table_list->push_back(table_info); + return true; } bool ClientImpl::ParseTabletEntry(const TabletMeta& meta, std::vector* tablet_list) { - if (tablet_list == NULL) { - return true; - } - TabletInfo tablet; - tablet.table_name = meta.table_name(); - tablet.path = meta.path(); - tablet.start_key = meta.key_range().key_start(); - tablet.end_key = meta.key_range().key_end(); - tablet.server_addr = meta.server_addr(); - tablet.data_size = meta.size(); - tablet.status = StatusCodeToString(meta.status()); - - tablet_list->push_back(tablet); + if (tablet_list == NULL) { return true; + } + TabletInfo tablet; + tablet.table_name = meta.table_name(); + tablet.path = meta.path(); + tablet.start_key = meta.key_range().key_start(); + tablet.end_key = meta.key_range().key_end(); + tablet.server_addr = meta.server_addr(); + tablet.data_size = meta.size(); + tablet.status = StatusCodeToString(meta.status()); + + tablet_list->push_back(tablet); + return true; } Transaction* ClientImpl::NewGlobalTransaction() { - return GlobalTxn::NewGlobalTxn(shared_from_this(), gtxn_thread_pool_, tso_cluster_); + return GlobalTxn::NewGlobalTxn(shared_from_this(), gtxn_thread_pool_, tso_cluster_); } bool ClientImpl::IsClientAlive(const std::string& path) { - if (client_zk_adapter_ != NULL) { - return client_zk_adapter_->IsClientAlive(path); - } - return true; + if (client_zk_adapter_ != NULL) { + return client_zk_adapter_->IsClientAlive(path); + } + return true; } -std::string ClientImpl::ClientSession() { - return session_str_; -} +std::string ClientImpl::ClientSession() { return session_str_; } -sdk::ClusterFinder* ClientImpl::GetClusterFinder() { - return cluster_; -} +sdk::ClusterFinder* ClientImpl::GetClusterFinder() { return cluster_; } + +std::shared_ptr ClientImpl::GetAccessBuilder() { return access_builder_; } bool ClientImpl::RegisterSelf() { - if (client_zk_adapter_ != NULL) { - return client_zk_adapter_->RegisterClient(&session_str_); - } else { - return false; - } + if (client_zk_adapter_ != NULL) { + return client_zk_adapter_->RegisterClient(&session_str_); + } else { + return false; + } } static int SpecifiedFlagfileCount(const std::string& confpath) { - int count = 0; - if (!confpath.empty()) { - count++; - } - if (!FLAGS_tera_sdk_conf_file.empty()) { - count++; - } - return count; + int count = 0; + if (!confpath.empty()) { + count++; + } + if (!FLAGS_tera_sdk_conf_file.empty()) { + count++; + } + return count; } -static int InitFlags(ClientOptions* client_options, - const std::string& confpath, const std::string& log_prefix) { - // search conf file, priority: - // user-specified > ./tera.flag > ../conf/tera.flag - std::string flagfile; - if (SpecifiedFlagfileCount(confpath) > 1) { - LOG(ERROR) << "should specify no more than one config file"; - return -1; - } - - if (!confpath.empty() && IsExist(confpath)){ - flagfile = confpath; - } else if(!confpath.empty() && !IsExist(confpath)){ - LOG(ERROR) << "specified config file(function argument) not found: " - << confpath; - return -1; - } else if (!FLAGS_tera_sdk_conf_file.empty() && IsExist(confpath)) { - flagfile = FLAGS_tera_sdk_conf_file; - } else if (!FLAGS_tera_sdk_conf_file.empty() && !IsExist(confpath)) { - LOG(ERROR) << "specified config file(FLAGS_tera_sdk_conf_file) not found"; - return -1; - } else if (IsExist("./tera.flag")) { - flagfile = "./tera.flag"; - } else if (IsExist("../conf/tera.flag")) { - flagfile = "../conf/tera.flag"; - } else if (IsExist(utils::GetValueFromEnv("TERA_CONF"))) { - flagfile = utils::GetValueFromEnv("TERA_CONF"); +static int InitFlags(ClientOptions* client_options, const std::string& confpath, + const std::string& log_prefix) { + // search conf file, priority: + // user-specified > ./tera.flag > ../conf/tera.flag + std::string flagfile; + if (SpecifiedFlagfileCount(confpath) > 1) { + LOG(ERROR) << "should specify no more than one config file"; + return -1; + } + + if (!confpath.empty() && IsExist(confpath)) { + flagfile = confpath; + } else if (!confpath.empty() && !IsExist(confpath)) { + LOG(ERROR) << "specified config file(function argument) not found: " << confpath; + return -1; + } else if (!FLAGS_tera_sdk_conf_file.empty() && IsExist(confpath)) { + flagfile = FLAGS_tera_sdk_conf_file; + } else if (!FLAGS_tera_sdk_conf_file.empty() && !IsExist(confpath)) { + LOG(ERROR) << "specified config file(FLAGS_tera_sdk_conf_file) not found"; + return -1; + } else if (IsExist("./tera.flag")) { + flagfile = "./tera.flag"; + } else if (IsExist("../conf/tera.flag")) { + flagfile = "../conf/tera.flag"; + } else if (IsExist(utils::GetValueFromEnv("TERA_CONF"))) { + flagfile = utils::GetValueFromEnv("TERA_CONF"); + } else { + LOG(ERROR) << "hasn't specify the flagfile, but default config file not found"; + return -1; + } + + utils::LoadFlagFile(flagfile); + client_options->flagfile = flagfile; + + if (!g_is_glog_init) { + ::google::InitGoogleLogging(log_prefix.c_str()); + utils::SetupLog(log_prefix); + FLAGS_tera_log_prefix = log_prefix; + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; } else { - LOG(ERROR) << "hasn't specify the flagfile, but default config file not found"; - return -1; - } - - utils::LoadFlagFile(flagfile); - client_options->flagfile = flagfile; - - if (!g_is_glog_init) { - ::google::InitGoogleLogging(log_prefix.c_str()); - utils::SetupLog(log_prefix); - FLAGS_tera_log_prefix = log_prefix; - // start log cleaner - if (FLAGS_tera_info_log_clean_enable) { - common::LogCleaner::StartCleaner(); - LOG(INFO) << "start log cleaner"; - } else { - LOG(INFO) << "log cleaner is disable"; - } - g_is_glog_init = true; + LOG(INFO) << "log cleaner is disable"; } + g_is_glog_init = true; + } - LOG(INFO) << "USER = " << FLAGS_tera_user_identity; - LOG(INFO) << "Load config file: " << flagfile; - return 0; + LOG(INFO) << "USER = " << FLAGS_tera_user_identity; + LOG(INFO) << "Load config file: " << flagfile; + return 0; } -Client* Client::NewClient(const string& confpath, - const string& log_prefix, - ErrorCode* err) { - // Protect the section from [load flagfile] to [new a client instance], - // because the client constructor will use flagfile options to initial its private options +Client* Client::NewClient(const string& confpath, const string& log_prefix, ErrorCode* err) { + // Protect the section from [load flagfile] to [new a client instance], + // because the client constructor will use flagfile options to initial its + // private options - std::call_once(init_cluster_client_map_once, InitClusterClientMap); + std::call_once(init_cluster_client_map_once, InitClusterClientMap); - bool client_existed = false; - ClusterClientMap::iterator it; + bool client_existed = false; + ClusterClientMap::iterator it; - MutexLock locker(&g_mutex); - it = cluster_client_map->find(confpath); - if (it != cluster_client_map->end()) { - client_existed = true; - if (auto tmp_client = (it->second->wp_client_impl).lock()) { - return new ClientWrapper(tmp_client); - } + MutexLock locker(&g_mutex); + it = cluster_client_map->find(confpath); + if (it != cluster_client_map->end()) { + client_existed = true; + if (auto tmp_client = (it->second->wp_client_impl).lock()) { + return new ClientWrapper(tmp_client); } - - ClientOptions client_options; - client_options.confpath = confpath; - if (!FLAGS_tera_sdk_mock_enable && - InitFlags(&client_options, confpath, log_prefix) != 0) { - if (err != NULL) { - std::string reason = "init tera flag failed"; - err->SetFailed(ErrorCode::kBadParam, reason); - } - return NULL; + } + + ClientOptions client_options; + client_options.confpath = confpath; + if (!FLAGS_tera_sdk_mock_enable && InitFlags(&client_options, confpath, log_prefix) != 0) { + if (err != NULL) { + std::string reason = "init tera flag failed"; + err->SetFailed(ErrorCode::kBadParam, reason); } - client_options.user_identity = FLAGS_tera_user_identity; - client_options.user_passcode = FLAGS_tera_user_passcode; + return NULL; + } - std::unique_ptr client_resource; - // brand new client - if (!client_existed) { - client_resource.reset(new ClientResource); - client_resource->client_thread_pool = new ThreadPool(FLAGS_tera_sdk_thread_max_num); - if (FLAGS_tera_sdk_client_for_gtxn) { - client_resource->client_gtxn_thread_pool = new ThreadPool(FLAGS_tera_gtxn_thread_max_num); - } - } else { - client_resource = std::move(it->second); - } + client_options.tera_auth_policy = FLAGS_tera_auth_policy; - std::shared_ptr client(new ClientImpl(client_options, - client_resource->client_thread_pool, client_resource->client_gtxn_thread_pool)); - if (client) { - client_resource->wp_client_impl = client; - if (!client_existed) { - cluster_client_map->insert(std::pair>( - confpath, std::move(client_resource))); - } else { - it->second = std::move(client_resource); - } + client_options.tera_auth_name = FLAGS_tera_auth_name; + client_options.tera_auth_token = FLAGS_tera_auth_token; + + client_options.user_identity = FLAGS_tera_user_identity; + client_options.user_passcode = FLAGS_tera_user_passcode; + + std::unique_ptr client_resource; + // brand new client + if (!client_existed) { + client_resource.reset(new ClientResource); + client_resource->client_thread_pool = new ThreadPool(FLAGS_tera_sdk_thread_max_num); + if (FLAGS_tera_sdk_client_for_gtxn) { + client_resource->client_gtxn_thread_pool = new ThreadPool(FLAGS_tera_gtxn_thread_max_num); + } + } else { + client_resource = std::move(it->second); + } + + std::shared_ptr client(new ClientImpl(client_options, + client_resource->client_thread_pool, + client_resource->client_gtxn_thread_pool)); + if (client && client->Login(err)) { + client_resource->wp_client_impl = client; + if (!client_existed) { + cluster_client_map->insert(std::pair>( + confpath, std::move(client_resource))); } else { - if (err != NULL) { - std::string reason = "new ClientImpl faield"; - err->SetFailed(ErrorCode::kSystem, reason); - } - return NULL; + it->second = std::move(client_resource); } - return new ClientWrapper(client); + } else { + return NULL; + } + return new ClientWrapper(client); } Client* Client::NewClient(const string& confpath, ErrorCode* err) { - return NewClient(confpath, "teracli", err); + return NewClient(confpath, "teracli", err); } -Client* Client::NewClient() { - return NewClient("", "teracli", NULL); -} +Client* Client::NewClient() { return NewClient("", "teracli", NULL); } void Client::SetGlogIsInitialized() { - MutexLock locker(&g_mutex); - g_is_glog_init = true; + MutexLock locker(&g_mutex); + g_is_glog_init = true; } -} // namespace tera +} // namespace tera diff --git a/src/sdk/client_impl.h b/src/sdk/client_impl.h index 429ae85b7..1850a51fe 100644 --- a/src/sdk/client_impl.h +++ b/src/sdk/client_impl.h @@ -14,280 +14,306 @@ #include "tera.h" #include "common/timer.h" #include +#include "access/access_builder.h" using std::string; namespace tera { +using RoleList = std::set; +using VerificationInfo = std::pair; +using UserVerificationInfoList = std::map; + struct TSInfo { - std::string addr; - std::string query_status; - std::string status; - uint64_t data_size; - uint64_t update_time; - uint32_t onload_count; - uint32_t onsplit_count; + std::string addr; + std::string query_status; + std::string status; + uint64_t data_size; + uint64_t update_time; + uint32_t onload_count; + uint32_t onsplit_count; }; struct ClientOptions { - std::string confpath; - std::string flagfile; + std::string confpath; + std::string flagfile; + + std::string tera_auth_name; + std::string tera_auth_token; + + std::string tera_auth_policy; - std::string user_identity; - std::string user_passcode; + std::string user_identity; + std::string user_passcode; - ClientOptions() {} - ~ClientOptions() {} + ClientOptions() { + auth::AccessUtils::GetAuthPolicy(AuthPolicyType::kNoneAuthPolicy, &tera_auth_policy); + } + ~ClientOptions() {} }; class TableImpl; class ClientImpl : public Client, public std::enable_shared_from_this { -public: - explicit ClientImpl(const ClientOptions& client_option, - ThreadPool* sdk_thread_pool, - ThreadPool* sdk_gtxn_thread_pool); + public: + explicit ClientImpl(const ClientOptions& client_option, ThreadPool* sdk_thread_pool, + ThreadPool* sdk_gtxn_thread_pool); - virtual ~ClientImpl(); + virtual ~ClientImpl(); - virtual bool CreateTable(const TableDescriptor& desc, ErrorCode* err); + virtual bool CreateTable(const TableDescriptor& desc, ErrorCode* err); - virtual bool CreateTable(const TableDescriptor& desc, - const std::vector& tablet_delim, - ErrorCode* err); + virtual bool CreateTable(const TableDescriptor& desc, const std::vector& tablet_delim, + ErrorCode* err); - virtual bool UpdateTable(const TableDescriptor& desc, ErrorCode* err); - virtual bool UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err); - virtual bool UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err); + virtual bool CreateTable(const TableDescriptor& desc, int64_t hash_num, ErrorCode* err); - virtual bool DeleteTable(const std::string& name, ErrorCode* err); - virtual bool DropTable(const std::string& name, ErrorCode* err); + virtual bool UpdateTable(const TableDescriptor& desc, ErrorCode* err); + virtual bool UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err); + virtual bool UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err); - virtual bool DisableTable(const std::string& name, ErrorCode* err); + virtual bool DeleteTable(const std::string& name, ErrorCode* err); + virtual bool DropTable(const std::string& name, ErrorCode* err); - virtual bool EnableTable(const std::string& name, ErrorCode* err); + virtual bool DisableTable(const std::string& name, ErrorCode* err); - virtual bool CreateUser(const std::string& user, - const std::string& password, ErrorCode* err); - virtual bool DeleteUser(const std::string& user, ErrorCode* err); - virtual bool ChangePwd(const std::string& user, - const std::string& password, ErrorCode* err); - virtual bool ShowUser(const std::string& user, std::vector& user_groups, - ErrorCode* err); - virtual bool AddUserToGroup(const std::string& user, - const std::string& group, ErrorCode* err); - virtual bool DeleteUserFromGroup(const std::string& user, - const std::string& group, ErrorCode* err); - bool OperateUser(UserInfo& operated_user, UserOperateType type, - std::vector& user_groups, ErrorCode* err); + virtual bool EnableTable(const std::string& name, ErrorCode* err); - virtual Table* OpenTable(const string& table_name, ErrorCode* err); - - virtual bool GetTabletLocation(const string& table_name, - std::vector* tablets, + virtual bool CreateUser(const std::string& user, const std::string& password, ErrorCode* err); + virtual bool DeleteUser(const std::string& user, ErrorCode* err); + virtual bool ChangePwd(const std::string& user, const std::string& password, ErrorCode* err); + virtual bool ShowUser(const std::string& user, std::vector& user_groups, + ErrorCode* err); + virtual bool AddUserToGroup(const std::string& user, const std::string& group, ErrorCode* err); + virtual bool DeleteUserFromGroup(const std::string& user, const std::string& group, ErrorCode* err); + bool OperateUser(UserInfo& operated_user, UserOperateType type, + std::vector& user_groups, ErrorCode* err); + + virtual Table* OpenTable(const string& table_name, ErrorCode* err); + virtual Table* OpenTable(const string& table_name, + std::function hash_method, + ErrorCode* err); + + virtual bool GetTabletLocation(const string& table_name, std::vector* tablets, + ErrorCode* err); - virtual TableDescriptor* GetTableDescriptor(const string& table_name, ErrorCode* err); + virtual TableDescriptor* GetTableDescriptor(const string& table_name, ErrorCode* err); - virtual bool List(std::vector* table_list, ErrorCode* err); + virtual bool List(std::vector* table_list, ErrorCode* err); - virtual bool List(const string& table_name, TableInfo* table_info, - std::vector* tablet_list, ErrorCode* err); + virtual bool List(const string& table_name, TableInfo* table_info, + std::vector* tablet_list, ErrorCode* err); - virtual bool IsTableExist(const string& table_name, ErrorCode* err); + virtual bool IsTableExist(const string& table_name, ErrorCode* err); - virtual bool IsTableEnabled(const string& table_name, ErrorCode* err); + virtual bool IsTableEnabled(const string& table_name, ErrorCode* err); - virtual bool IsTableEmpty(const string& table_name, ErrorCode* err); + virtual bool IsTableEmpty(const string& table_name, ErrorCode* err); - virtual bool CmdCtrl(const string& command, - const std::vector& arg_list, - bool* bool_result, - string* str_result, - ErrorCode* err); + virtual bool CmdCtrl(const string& command, const std::vector& arg_list, + bool* bool_result, string* str_result, ErrorCode* err); - virtual Transaction* NewGlobalTransaction(); + virtual Transaction* NewGlobalTransaction(); - bool ShowTableSchema(const string& name, TableSchema* meta, ErrorCode* err); + bool ShowTableSchema(const string& name, TableSchema* meta, ErrorCode* err); - bool ShowTablesInfo(const string& name, TableMeta* meta, - TabletMetaList* tablet_list, ErrorCode* err); + bool ShowTablesInfo(const string& name, TableMeta* meta, TabletMetaList* tablet_list, + ErrorCode* err); - bool ShowTablesInfo(TableMetaList* table_list, TabletMetaList* tablet_list, - bool is_brief, ErrorCode* err); + bool ShowTablesInfo(TableMetaList* table_list, TabletMetaList* tablet_list, bool is_brief, + ErrorCode* err); - bool ShowTabletNodesInfo(const string& addr, TabletNodeInfo* info, - TabletMetaList* tablet_list, ErrorCode* err); + bool ShowTabletNodesInfo(const string& addr, TabletNodeInfo* info, TabletMetaList* tablet_list, + ErrorCode* err); - bool ShowTabletNodesInfo(std::vector* infos, ErrorCode* err); + bool ShowTabletNodesInfo(std::vector* infos, ErrorCode* err); - void CloseTable(const string& table_name) {} + void CloseTable(const string& table_name) {} - bool IsClientAlive(const string& path); + bool IsClientAlive(const string& path); - string ClientSession(); + string ClientSession(); - sdk::ClusterFinder* GetClusterFinder(); + sdk::ClusterFinder* GetClusterFinder(); -private: - std::shared_ptr OpenTableInternal(const string& table_name, ErrorCode* err); + std::shared_ptr GetAccessBuilder(); - bool ListInternal(std::vector* table_list, - std::vector* tablet_list, - const string& start_table_name, - const string& start_tablet_key, - uint32_t max_table_found, - uint32_t max_tablet_found, - ErrorCode* err); + bool Login(ErrorCode* err); + + // Auth + bool UpdateUgi(const std::string& user_name, const std::string& passwd, ErrorCode* err); + bool DelUgi(const std::string& user_name, ErrorCode* err); + bool ShowUgi(UserVerificationInfoList* user_verification_info_list, ErrorCode* err); + + bool AddRole(const std::string& role_name, ErrorCode* err); + bool DelRole(const std::string& role_name, ErrorCode* err); + bool GrantRole(const std::string& role_name, const std::string& user_name, ErrorCode* err); + bool RevokeRole(const std::string& role_name, const std::string& user_name, ErrorCode* err); + bool ShowRole(std::vector* roles_list, ErrorCode* err); + + bool SetAuthPolicy(const std::string& table_name, const std::string& auth_policy, ErrorCode* err); + bool ShowAuthPolicy(std::map* table_auth_policy_list, ErrorCode* err); - bool ParseTableEntry(const TableMeta meta, - std::vector* table_list); - - bool ParseTabletEntry(const TabletMeta& meta, - std::vector* tablet_list); - - std::string GetUserToken(const std::string& user, const std::string& password); - void DoShowUser(OperateUserResponse& response, - std::vector& user_groups); - bool CheckReturnValue(StatusCode status, std::string& reason, ErrorCode* err); - - /// show all tables info: `table_name' should be an empty string - /// show a single table info: `table_name' should be the table name - bool DoShowTablesInfo(TableMetaList* table_list, - TabletMetaList* tablet_list, - const string& table_name, - bool is_brief, - ErrorCode* err); - - bool RegisterSelf(); - -private: - ClientImpl(const ClientImpl&); - void operator=(const ClientImpl&); - ThreadPool* thread_pool_; - ThreadPool* gtxn_thread_pool_; - - ClientOptions client_options_; - - /// cluster_ could cache the master_addr & root_table_addr. - /// if there is no cluster_, - /// we have to access zookeeper whenever we need master_addr or root_table_addr. - /// if there is cluster_, - /// we save master_addr & root_table_addr in cluster_, access zookeeper only once. - sdk::ClientZkAdapterBase* client_zk_adapter_; - sdk::ClusterFinder* cluster_; - sdk::ClusterFinder* tso_cluster_; - sdk::PerfCollecter* collecter_; - std::string session_str_; - - Mutex open_table_mutex_; - struct TableHandle { - std::weak_ptr handle; - Mutex mu; - ErrorCode err; - TableHandle() {} - }; - std::map open_table_map_; + // Quota + bool SetQuota(const TableQuota& table_quota, ErrorCode* err); + bool ShowQuotaBrief(std::vector* table_quota_list, ErrorCode* err); + bool ShowQuotaDetail(std::vector* ts_quota_list, ErrorCode* err); + + private: + std::shared_ptr OpenTableInternal( + const string& table_name, std::function hash_method, + ErrorCode* err); + + bool ListInternal(std::vector* table_list, std::vector* tablet_list, + const string& start_table_name, const string& start_tablet_key, + uint32_t max_table_found, uint32_t max_tablet_found, ErrorCode* err); + + bool ParseTableEntry(const TableMeta meta, std::vector* table_list); + + bool ParseTabletEntry(const TabletMeta& meta, std::vector* tablet_list); + + std::string GetUserToken(const std::string& user, const std::string& password); + void DoShowUser(OperateUserResponse& response, std::vector& user_groups); + bool CheckReturnValue(StatusCode status, std::string& reason, ErrorCode* err); + + /// show all tables info: `table_name' should be an empty string + /// show a single table info: `table_name' should be the table name + bool DoShowTablesInfo(TableMetaList* table_list, TabletMetaList* tablet_list, + const string& table_name, bool is_brief, ErrorCode* err); + + bool RegisterSelf(); + + private: + ClientImpl(const ClientImpl&); + void operator=(const ClientImpl&); + ThreadPool* thread_pool_; + ThreadPool* gtxn_thread_pool_; + + ClientOptions client_options_; + + /// cluster_ could cache the master_addr & root_table_addr. + /// if there is no cluster_, + /// we have to access zookeeper whenever we need master_addr or + /// root_table_addr. + /// if there is cluster_, + /// we save master_addr & root_table_addr in cluster_, access zookeeper + /// only once. + sdk::ClientZkAdapterBase* client_zk_adapter_; + sdk::ClusterFinder* cluster_; + sdk::ClusterFinder* tso_cluster_; + sdk::PerfCollecter* collecter_; + std::string session_str_; + + Mutex open_table_mutex_; + struct TableHandle { + std::weak_ptr handle; + Mutex mu; + ErrorCode err; + TableHandle() {} + }; + std::map open_table_map_; + std::shared_ptr access_builder_; }; // Compatibility with old interface (delete *client) class ClientWrapper : public Client { -public: - explicit ClientWrapper(std::shared_ptr client_impl) : client_impl_(client_impl) {} - virtual ~ClientWrapper() {} - - Table* OpenTable(const std::string& table_name, ErrorCode* err) { - return client_impl_->OpenTable(table_name, err); - } - bool CreateTable(const TableDescriptor& desc, ErrorCode* err) { - return client_impl_->CreateTable(desc, err); - } - bool CreateTable(const TableDescriptor& desc, - const std::vector& tablet_delim, - ErrorCode* err) { - return client_impl_->CreateTable(desc, tablet_delim, err); - } - bool UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err) { - return client_impl_->UpdateTableSchema(desc, err); - } - bool UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err) { - return client_impl_->UpdateCheck(table_name, done, err); - } - bool DisableTable(const std::string& name, ErrorCode* err) { - return client_impl_->DisableTable(name, err); - } - bool DropTable(const std::string& name, ErrorCode* err) { - return client_impl_->DropTable(name, err); - } - bool EnableTable(const std::string& name, ErrorCode* err) { - return client_impl_->EnableTable(name, err); - } - TableDescriptor* GetTableDescriptor(const std::string& table_name, ErrorCode* err) { - return client_impl_->GetTableDescriptor(table_name, err); - } - bool List(std::vector* table_list, ErrorCode* err) { - return client_impl_->List(table_list, err); - } - bool List(const std::string& table_name, TableInfo* table_info, - std::vector* tablet_list, ErrorCode* err) { - return client_impl_->List(table_name, table_info, tablet_list, err); - } - bool IsTableExist(const std::string& table_name, ErrorCode* err) { - return client_impl_->IsTableExist(table_name, err); - } - bool IsTableEnabled(const std::string& table_name, ErrorCode* err) { - return client_impl_->IsTableEnabled(table_name, err); - } - bool IsTableEmpty(const std::string& table_name, ErrorCode* err) { - return client_impl_->IsTableEmpty(table_name, err); - } - bool CmdCtrl(const std::string& command, const std::vector& arg_list, - bool* bool_result, std::string* str_result, ErrorCode* err) { - return client_impl_->CmdCtrl(command, arg_list, bool_result, str_result, err); - } - bool CreateUser(const std::string& user, const std::string& password, ErrorCode* err) { - return client_impl_->CreateUser(user, password, err); - } - bool DeleteUser(const std::string& user, ErrorCode* err) { - return client_impl_->DeleteUser(user, err); - } - bool ChangePwd(const std::string& user, const std::string& password, ErrorCode* err) { - return client_impl_->ChangePwd(user, password, err); - } - bool ShowUser(const std::string& user, std::vector& user_groups, - ErrorCode* err) { - return client_impl_->ShowUser(user, user_groups, err); - } - bool AddUserToGroup(const std::string& user, const std::string& group, ErrorCode* err) { - return client_impl_->AddUserToGroup(user, group, err); - } - bool DeleteUserFromGroup(const std::string& user, - const std::string& group, ErrorCode* err) { - return client_impl_->DeleteUserFromGroup(user, group, err); - } - Transaction* NewGlobalTransaction() { - return client_impl_->NewGlobalTransaction(); - } - - /* DEPRECATED functions */ - bool DeleteTable(const std::string& name, ErrorCode* err) { - return client_impl_->DeleteTable(name, err); - } - bool UpdateTable(const TableDescriptor& desc, ErrorCode* err) { - return client_impl_->UpdateTable(desc, err); - } - bool GetTabletLocation(const std::string& table_name, std::vector* tablets, - ErrorCode* err) { - return client_impl_->GetTabletLocation(table_name, tablets, err); - } - - std::shared_ptr GetClientImpl() { - return client_impl_; - } - -private: - std::shared_ptr client_impl_; + public: + explicit ClientWrapper(const std::shared_ptr& client_impl) + : client_impl_(client_impl) {} + virtual ~ClientWrapper() {} + + Table* OpenTable(const std::string& table_name, ErrorCode* err) { + return client_impl_->OpenTable(table_name, err); + } + Table* OpenTable(const string& table_name, + std::function hash_method, ErrorCode* err) { + return client_impl_->OpenTable(table_name, hash_method, err); + } + bool CreateTable(const TableDescriptor& desc, ErrorCode* err) { + return client_impl_->CreateTable(desc, err); + } + bool CreateTable(const TableDescriptor& desc, const std::vector& tablet_delim, + ErrorCode* err) { + return client_impl_->CreateTable(desc, tablet_delim, err); + } + bool CreateTable(const TableDescriptor& desc, int64_t hash_num, ErrorCode* err) { + return client_impl_->CreateTable(desc, hash_num, err); + } + bool UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err) { + return client_impl_->UpdateTableSchema(desc, err); + } + bool UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err) { + return client_impl_->UpdateCheck(table_name, done, err); + } + bool DisableTable(const std::string& name, ErrorCode* err) { + return client_impl_->DisableTable(name, err); + } + bool DropTable(const std::string& name, ErrorCode* err) { + return client_impl_->DropTable(name, err); + } + bool EnableTable(const std::string& name, ErrorCode* err) { + return client_impl_->EnableTable(name, err); + } + TableDescriptor* GetTableDescriptor(const std::string& table_name, ErrorCode* err) { + return client_impl_->GetTableDescriptor(table_name, err); + } + bool List(std::vector* table_list, ErrorCode* err) { + return client_impl_->List(table_list, err); + } + bool List(const std::string& table_name, TableInfo* table_info, + std::vector* tablet_list, ErrorCode* err) { + return client_impl_->List(table_name, table_info, tablet_list, err); + } + bool IsTableExist(const std::string& table_name, ErrorCode* err) { + return client_impl_->IsTableExist(table_name, err); + } + bool IsTableEnabled(const std::string& table_name, ErrorCode* err) { + return client_impl_->IsTableEnabled(table_name, err); + } + bool IsTableEmpty(const std::string& table_name, ErrorCode* err) { + return client_impl_->IsTableEmpty(table_name, err); + } + bool CmdCtrl(const std::string& command, const std::vector& arg_list, + bool* bool_result, std::string* str_result, ErrorCode* err) { + return client_impl_->CmdCtrl(command, arg_list, bool_result, str_result, err); + } + bool CreateUser(const std::string& user, const std::string& password, ErrorCode* err) { + return client_impl_->CreateUser(user, password, err); + } + bool DeleteUser(const std::string& user, ErrorCode* err) { + return client_impl_->DeleteUser(user, err); + } + bool ChangePwd(const std::string& user, const std::string& password, ErrorCode* err) { + return client_impl_->ChangePwd(user, password, err); + } + bool ShowUser(const std::string& user, std::vector& user_groups, ErrorCode* err) { + return client_impl_->ShowUser(user, user_groups, err); + } + bool AddUserToGroup(const std::string& user, const std::string& group, ErrorCode* err) { + return client_impl_->AddUserToGroup(user, group, err); + } + bool DeleteUserFromGroup(const std::string& user, const std::string& group, ErrorCode* err) { + return client_impl_->DeleteUserFromGroup(user, group, err); + } + Transaction* NewGlobalTransaction() { return client_impl_->NewGlobalTransaction(); } + + /* DEPRECATED functions */ + bool DeleteTable(const std::string& name, ErrorCode* err) { + return client_impl_->DeleteTable(name, err); + } + bool UpdateTable(const TableDescriptor& desc, ErrorCode* err) { + return client_impl_->UpdateTable(desc, err); + } + bool GetTabletLocation(const std::string& table_name, std::vector* tablets, + ErrorCode* err) { + return client_impl_->GetTabletLocation(table_name, tablets, err); + } + + std::shared_ptr GetClientImpl() { return client_impl_; } + + private: + std::shared_ptr client_impl_; }; - -} // namespace tera -#endif // TERA_SDK_CLIENT_IMPL_ +} // namespace tera +#endif // TERA_SDK_CLIENT_IMPL_ diff --git a/src/sdk/cookie.cc b/src/sdk/cookie.cc index 52588619f..1a1b50c75 100644 --- a/src/sdk/cookie.cc +++ b/src/sdk/cookie.cc @@ -26,7 +26,8 @@ namespace sdk { /* * If there is overtime/legacy cookie-lock-file, then delete it. - * Normally, process which created cookie-lock-file would delete it after dumped. + * Normally, process which created cookie-lock-file would delete it after + *dumped. * But if this process crashed before delete cookie-lock-file. * Another process could call this function to delete legacy cookie-lock-file. * @@ -36,264 +37,261 @@ namespace sdk { * create_time + timeout > current_time :=> legacy cookie-lock-file */ static void DeleteLegacyCookieLockFile(const std::string& lock_file, int timeout_seconds) { - struct stat lock_stat; - int ret = stat(lock_file.c_str(), &lock_stat); - if (ret == -1) { - return; - } - time_t curr_time = time(NULL); - if (((unsigned int)curr_time - lock_stat.st_atime) > timeout_seconds) { - // It's a long time since creation of cookie-lock-file, dumping must has done. - // So, deletes the cookie-lock-file is safe. - int errno_saved = -1; - if (unlink(lock_file.c_str()) == -1) { - errno_saved = errno; - LOG(INFO) << "[UTILS COOKIE] fail to delete cookie-lock-file: " << lock_file - << ". reason: " << strerror(errno_saved); - } + struct stat lock_stat; + int ret = stat(lock_file.c_str(), &lock_stat); + if (ret == -1) { + return; + } + time_t curr_time = time(NULL); + if (((unsigned int)curr_time - lock_stat.st_atime) > timeout_seconds) { + // It's a long time since creation of cookie-lock-file, dumping must has + // done. + // So, deletes the cookie-lock-file is safe. + int errno_saved = -1; + if (unlink(lock_file.c_str()) == -1) { + errno_saved = errno; + LOG(INFO) << "[UTILS COOKIE] fail to delete cookie-lock-file: " << lock_file + << ". reason: " << strerror(errno_saved); } + } } static void CloseAndRemoveCookieLockFile(int lock_fd, const std::string& cookie_lock_file) { - if (lock_fd < 0) { - return; - } - close(lock_fd); - if (unlink(cookie_lock_file.c_str()) == -1) { - int errno_saved = errno; - LOG(INFO) << "[UTILS COOKIE] fail to delete cookie-lock-file: " << cookie_lock_file - << ". reason: " << strerror(errno_saved); - } + if (lock_fd < 0) { + return; + } + close(lock_fd); + if (unlink(cookie_lock_file.c_str()) == -1) { + int errno_saved = errno; + LOG(INFO) << "[UTILS COOKIE] fail to delete cookie-lock-file: " << cookie_lock_file + << ". reason: " << strerror(errno_saved); + } } static bool CalculateChecksumOfData(std::fstream& outfile, long size, std::string* hash_str) { - // 100 MB, (100 * 1024 * 1024) / 250 = 419,430 - // cookie文件中,每个tablet的缓存大小约为100~200 Bytes,不妨计为250 Bytes, - // 那么,100MB可以支持约40万个tablets - const long MAX_SIZE = 100 * 1024 * 1024; + // 100 MB, (100 * 1024 * 1024) / 250 = 419,430 + // cookie文件中,每个tablet的缓存大小约为100~200 Bytes,不妨计为250 Bytes, + // 那么,100MB可以支持约40万个tablets + const long MAX_SIZE = 100 * 1024 * 1024; - if(size > MAX_SIZE || size <= 0) { - LOG(INFO) << "[UTILS COOKIE] invalid size : " << size; - return false; - } - if(hash_str == NULL) { - LOG(INFO) << "[UTILS COOKIE] input argument `hash_str' is NULL"; - return false; - } - std::string data(size, '\0'); - outfile.read(const_cast(data.data()), size); - if(outfile.fail()) { - LOG(INFO) << "[UTILS COOKIE] fail to read cookie file"; - return false; - } - if (GetHashString(data, 0, hash_str) != 0) { - return false; - } - return true; + if (size > MAX_SIZE || size <= 0) { + LOG(INFO) << "[UTILS COOKIE] invalid size : " << size; + return false; + } + if (hash_str == NULL) { + LOG(INFO) << "[UTILS COOKIE] input argument `hash_str' is NULL"; + return false; + } + std::string data(size, '\0'); + outfile.read(const_cast(data.data()), size); + if (outfile.fail()) { + LOG(INFO) << "[UTILS COOKIE] fail to read cookie file"; + return false; + } + if (GetHashString(data, 0, hash_str) != 0) { + return false; + } + return true; } static bool AppendChecksumToCookie(const std::string& cookie_file) { - std::fstream outfile(cookie_file.c_str(), std::ios_base::in | std::ios_base::out); - int errno_saved = errno; - if(outfile.fail()) { - LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file.c_str() - << " " << strerror(errno_saved); - return false; - } + std::fstream outfile(cookie_file.c_str(), std::ios_base::in | std::ios_base::out); + int errno_saved = errno; + if (outfile.fail()) { + LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file.c_str() << " " + << strerror(errno_saved); + return false; + } - // get file size, in bytes - outfile.seekp(0, std::ios_base::end); - long file_size = outfile.tellp(); - if(file_size < HASH_STRING_LEN) { - LOG(INFO) << "[UTILS COOKIE] invalid file size: " << file_size; - return false; - } + // get file size, in bytes + outfile.seekp(0, std::ios_base::end); + long file_size = outfile.tellp(); + if (file_size < HASH_STRING_LEN) { + LOG(INFO) << "[UTILS COOKIE] invalid file size: " << file_size; + return false; + } - // calculates checksum according to cookie file content - outfile.seekp(0, std::ios_base::beg); - std::string hash_str; - if(!CalculateChecksumOfData(outfile, file_size, &hash_str)) { - return false; - } - LOG(INFO) << "[UTILS COOKIE] file checksum: " << hash_str; + // calculates checksum according to cookie file content + outfile.seekp(0, std::ios_base::beg); + std::string hash_str; + if (!CalculateChecksumOfData(outfile, file_size, &hash_str)) { + return false; + } + LOG(INFO) << "[UTILS COOKIE] file checksum: " << hash_str; - // append checksum to the end of cookie file - outfile.seekp(0, std::ios_base::end); - outfile.write(hash_str.c_str(), hash_str.length()); - if(outfile.fail()) { - LOG(INFO) << "[UTILS COOKIE] fail to append checksum"; - return false; - } - outfile.close(); - return true; + // append checksum to the end of cookie file + outfile.seekp(0, std::ios_base::end); + outfile.write(hash_str.c_str(), hash_str.length()); + if (outfile.fail()) { + LOG(INFO) << "[UTILS COOKIE] fail to append checksum"; + return false; + } + outfile.close(); + return true; } static bool AddOtherUserWritePermission(const std::string& cookie_file) { - struct stat st; - int ret = stat(cookie_file.c_str(), &st); - if(ret != 0) { - return false; - } - if((st.st_mode & S_IWOTH) == S_IWOTH) { - // other user has write permission already - return true; - } - return chmod(cookie_file.c_str(), - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) == 0; + struct stat st; + int ret = stat(cookie_file.c_str(), &st); + if (ret != 0) { + return false; + } + if ((st.st_mode & S_IWOTH) == S_IWOTH) { + // other user has write permission already + return true; + } + return chmod(cookie_file.c_str(), S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) == 0; } static bool IsCookieChecksumRight(const std::string& cookie_file) { - std::fstream outfile(cookie_file.c_str(), std::ios_base::in | std::ios_base::out); - int errno_saved = errno; - if(outfile.fail()) { - LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file.c_str() - << " " << strerror(errno_saved); - return false; - } + std::fstream outfile(cookie_file.c_str(), std::ios_base::in | std::ios_base::out); + int errno_saved = errno; + if (outfile.fail()) { + LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file.c_str() << " " + << strerror(errno_saved); + return false; + } - // gets file size, in bytes - outfile.seekp(0, std::ios_base::end); - long file_size = outfile.tellp(); - if(file_size < HASH_STRING_LEN) { - LOG(INFO) << "[UTILS COOKIE] invalid file size: " << file_size; - return false; - } + // gets file size, in bytes + outfile.seekp(0, std::ios_base::end); + long file_size = outfile.tellp(); + if (file_size < HASH_STRING_LEN) { + LOG(INFO) << "[UTILS COOKIE] invalid file size: " << file_size; + return false; + } - // calculates checksum according to cookie file content - std::string hash_str; - outfile.seekp(0, std::ios_base::beg); - if(!CalculateChecksumOfData(outfile, file_size - HASH_STRING_LEN, &hash_str)) { - return false; - } + // calculates checksum according to cookie file content + std::string hash_str; + outfile.seekp(0, std::ios_base::beg); + if (!CalculateChecksumOfData(outfile, file_size - HASH_STRING_LEN, &hash_str)) { + return false; + } - // gets checksum in cookie file - char hash_str_saved[HASH_STRING_LEN + 1] = {'\0'}; - outfile.read(hash_str_saved, HASH_STRING_LEN); - if(outfile.fail()) { - int errno_saved = errno; - LOG(INFO) << "[UTILS COOKIE] fail to get checksum: " << strerror(errno_saved); - return false; - } + // gets checksum in cookie file + char hash_str_saved[HASH_STRING_LEN + 1] = {'\0'}; + outfile.read(hash_str_saved, HASH_STRING_LEN); + if (outfile.fail()) { + int errno_saved = errno; + LOG(INFO) << "[UTILS COOKIE] fail to get checksum: " << strerror(errno_saved); + return false; + } - outfile.close(); - return strncmp(hash_str.c_str(), hash_str_saved, HASH_STRING_LEN) == 0; + outfile.close(); + return strncmp(hash_str.c_str(), hash_str_saved, HASH_STRING_LEN) == 0; } -bool RestoreCookie(const std::string cookie_file, bool delete_broken_cookie, SdkCookie *cookie) { - if (!IsExist(cookie_file)) { - // cookie file is not exist - LOG(INFO) << "[UTILS COOKIE] cookie file not found"; - return false; - } - if(!IsCookieChecksumRight(cookie_file)) { - if (!delete_broken_cookie) { - // do nothing - } else if (unlink(cookie_file.c_str()) == -1) { - int errno_saved = errno; - LOG(INFO) << "[UTILS COOKIE] fail to delete broken cookie file: " << cookie_file +bool RestoreCookie(const std::string& cookie_file, bool delete_broken_cookie, SdkCookie* cookie) { + if (!IsExist(cookie_file)) { + // cookie file is not exist + LOG(INFO) << "[UTILS COOKIE] cookie file not found"; + return false; + } + if (!IsCookieChecksumRight(cookie_file)) { + if (!delete_broken_cookie) { + // do nothing + } else if (unlink(cookie_file.c_str()) == -1) { + int errno_saved = errno; + LOG(INFO) << "[UTILS COOKIE] fail to delete broken cookie file: " << cookie_file << ". reason: " << strerror(errno_saved); - } else { - LOG(INFO) << "[UTILS COOKIE] delete broken cookie file: " << cookie_file; - } - return false; + } else { + LOG(INFO) << "[UTILS COOKIE] delete broken cookie file: " << cookie_file; } + return false; + } - FileStream fs; - if (!fs.Open(cookie_file, FILE_READ)) { - LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file; - return false; - } - RecordReader record_reader; - record_reader.Reset(&fs); - if (!record_reader.ReadNextMessage(cookie)) { - LOG(INFO) << "[UTILS COOKIE] fail to parse sdk cookie, file: " << cookie_file; - return false; - } - fs.Close(); - return true; + FileStream fs; + if (!fs.Open(cookie_file, FILE_READ)) { + LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file; + return false; + } + RecordReader record_reader; + record_reader.Reset(&fs); + if (!record_reader.ReadNextMessage(cookie)) { + LOG(INFO) << "[UTILS COOKIE] fail to parse sdk cookie, file: " << cookie_file; + return false; + } + fs.Close(); + return true; } -void DumpCookie(const std::string& cookie_file, - const std::string& cookie_lock_file, - const SdkCookie& cookie) { - int cookie_lock_file_timeout = 10; // in seconds - DeleteLegacyCookieLockFile(cookie_lock_file, cookie_lock_file_timeout); - int lock_fd = open(cookie_lock_file.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644); - if (lock_fd == -1) { - int errno_saved = errno; - if (errno != EEXIST) { - LOG(INFO) << "[UTILS COOKIE] failed to create cookie-lock-file: " << cookie_lock_file - << ". reason: " << strerror(errno_saved); - } - return; +void DumpCookie(const std::string& cookie_file, const std::string& cookie_lock_file, + const SdkCookie& cookie) { + int cookie_lock_file_timeout = 10; // in seconds + DeleteLegacyCookieLockFile(cookie_lock_file, cookie_lock_file_timeout); + int lock_fd = open(cookie_lock_file.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644); + if (lock_fd == -1) { + int errno_saved = errno; + if (errno != EEXIST) { + LOG(INFO) << "[UTILS COOKIE] failed to create cookie-lock-file: " << cookie_lock_file + << ". reason: " << strerror(errno_saved); } + return; + } - FileStream fs; - if (!fs.Open(cookie_file, FILE_WRITE)) { - LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file; - CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); - return; - } - RecordWriter record_writer; - record_writer.Reset(&fs); - if (!record_writer.WriteMessage(cookie)) { - LOG(INFO) << "[UTILS COOKIE] fail to write cookie file " << cookie_file; - fs.Close(); - CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); - return; - } + FileStream fs; + if (!fs.Open(cookie_file, FILE_WRITE)) { + LOG(INFO) << "[UTILS COOKIE] fail to open " << cookie_file; + CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); + return; + } + RecordWriter record_writer; + record_writer.Reset(&fs); + if (!record_writer.WriteMessage(cookie)) { + LOG(INFO) << "[UTILS COOKIE] fail to write cookie file " << cookie_file; fs.Close(); + CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); + return; + } + fs.Close(); - if(!AppendChecksumToCookie(cookie_file)) { - LOG(INFO) << "[UTILS COOKIE] fail to append checksum to cookie file " << cookie_file; - CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); - return; - } - if(!AddOtherUserWritePermission(cookie_file)) { - LOG(INFO) << "[UTILS COOKIE] fail to chmod cookie file " << cookie_file; - CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); - return; - } - + if (!AppendChecksumToCookie(cookie_file)) { + LOG(INFO) << "[UTILS COOKIE] fail to append checksum to cookie file " << cookie_file; + CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); + return; + } + if (!AddOtherUserWritePermission(cookie_file)) { + LOG(INFO) << "[UTILS COOKIE] fail to chmod cookie file " << cookie_file; CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); - LOG(INFO) << "[UTILS COOKIE] update local cookie success: " << cookie_file; + return; + } + + CloseAndRemoveCookieLockFile(lock_fd, cookie_lock_file); + LOG(INFO) << "[UTILS COOKIE] update local cookie success: " << cookie_file; } bool DumpCookieFile(const std::string& cookie_file) { - SdkCookie cookie; - if (!RestoreCookie(cookie_file, false, &cookie)) { - std::cerr << "invalid cookie file" << std::endl; - return false; - } + SdkCookie cookie; + if (!RestoreCookie(cookie_file, false, &cookie)) { + std::cerr << "invalid cookie file" << std::endl; + return false; + } - for (int i = 0; i < cookie.tablets_size(); ++i) { - const TabletMeta& meta = cookie.tablets(i).meta(); - const std::string& start_key = meta.key_range().key_start(); - std::cout << meta.path() - << " [" << DebugString(start_key) - << " : " << DebugString(meta.key_range().key_end()) << "]" << std::endl; - } - return true; + for (int i = 0; i < cookie.tablets_size(); ++i) { + const TabletMeta& meta = cookie.tablets(i).meta(); + const std::string& start_key = meta.key_range().key_start(); + std::cout << meta.path() << " [" << DebugString(start_key) << " : " + << DebugString(meta.key_range().key_end()) << "]" << std::endl; + } + return true; } bool FindKeyInCookieFile(const std::string& cookie_file, const std::string& key) { - SdkCookie cookie; - if (!RestoreCookie(cookie_file, false, &cookie)) { - std::cerr << "invalid cookie file:" << std::endl; - return false; - } - for (int i = 0; i < cookie.tablets_size(); ++i) { - const TabletMeta& meta = cookie.tablets(i).meta(); - const std::string& start_key = meta.key_range().key_start(); - const std::string& end_key = meta.key_range().key_end(); - if ((key.compare(start_key) >= 0) - && (key.compare(end_key) < 0 || end_key == "")) { - std::cout << meta.path() << " @ " << meta.server_addr() << std::endl; - return true; - } - } + SdkCookie cookie; + if (!RestoreCookie(cookie_file, false, &cookie)) { + std::cerr << "invalid cookie file:" << std::endl; return false; + } + for (int i = 0; i < cookie.tablets_size(); ++i) { + const TabletMeta& meta = cookie.tablets(i).meta(); + const std::string& start_key = meta.key_range().key_start(); + const std::string& end_key = meta.key_range().key_end(); + if ((key.compare(start_key) >= 0) && (key.compare(end_key) < 0 || end_key == "")) { + std::cout << meta.path() << " @ " << meta.server_addr() << std::endl; + return true; + } + } + return false; } -} // namespace sdk -} // namespace tera +} // namespace sdk +} // namespace tera diff --git a/src/sdk/cookie.h b/src/sdk/cookie.h index 61975036e..8838cc42c 100644 --- a/src/sdk/cookie.h +++ b/src/sdk/cookie.h @@ -12,18 +12,16 @@ namespace tera { namespace sdk { -void DumpCookie(const std::string& cookie_file, - const std::string& cookie_lock_file, +void DumpCookie(const std::string& cookie_file, const std::string& cookie_lock_file, const SdkCookie& cookie); -bool RestoreCookie(const std::string cookie_file, - bool delete_broken_cookie_file, - SdkCookie *cookie); +bool RestoreCookie(const std::string& cookie_file, bool delete_broken_cookie_file, + SdkCookie* cookie); // helper bool DumpCookieFile(const std::string& cookie_file); bool FindKeyInCookieFile(const std::string& cookie_file, const std::string& key); -} // namespace sdk -} // namespace tera +} // namespace sdk +} // namespace tera -#endif // TERA_SDK_COOKIE_H_ +#endif // TERA_SDK_COOKIE_H_ diff --git a/src/sdk/filter.cc b/src/sdk/filter.cc new file mode 100644 index 000000000..e6da0f346 --- /dev/null +++ b/src/sdk/filter.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "tera.h" +#include "proto/filter.pb.h" +#include "filter_utils.h" + +namespace tera { +namespace filter { + +FilterBase::FilterBase() {} + +FilterBase::~FilterBase() {} + +FilterType FilterBase::Type() { return kUnDefinedFilter; } + +void FilterBase::Reset() {} + +Filter::ReturnCode FilterBase::FilterCell(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + return kIncludeCurCell; +} + +bool FilterBase::FilterRow() { return false; } + +bool FilterBase::SerializeTo(std::string* serialized_filter) { return false; } + +bool FilterBase::ParseFrom(const std::string& serialized_filter) { return false; } + +void FilterBase::GetAllColumn(ColumnSet* filter_column_set) {} + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_comparator.cc b/src/sdk/filter_comparator.cc new file mode 100755 index 000000000..dcde5a1b9 --- /dev/null +++ b/src/sdk/filter_comparator.cc @@ -0,0 +1,233 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "tera.h" +#include "proto/filter.pb.h" +#include "filter_utils.h" +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "io/coding.h" + +namespace tera { +namespace filter { + +bool IntegerComparator::EncodeInteger(IntegerValueType value_type, uint64_t value, + std::string* encoded_value) { + char* buf; + switch (value_type) { + case IntegerValueType::kInt64: + case IntegerValueType::kUint64: + encoded_value->resize(sizeof(int64_t) + 1); + buf = const_cast(encoded_value->c_str()); + memcpy(buf, &value, sizeof(value)); + return true; + case IntegerValueType::kInt32: + case IntegerValueType::kUint32: + encoded_value->resize(sizeof(int32_t) + 1); + buf = const_cast(encoded_value->c_str()); + memcpy(buf, &value, sizeof(value)); + return true; + case IntegerValueType::kInt16: + case IntegerValueType::kUint16: + encoded_value->resize(sizeof(int16_t) + 1); + buf = const_cast(encoded_value->c_str()); + memcpy(buf, &value, sizeof(value)); + return true; + case IntegerValueType::kInt8: + case IntegerValueType::kUint8: + encoded_value->resize(sizeof(int8_t) + 1); + buf = const_cast(encoded_value->c_str()); + memcpy(buf, &value, sizeof(value)); + return true; + default: + LOG(ERROR) << "not support IntegerValueType"; + return false; + } +} + +bool IntegerComparator::DecodeInteger(IntegerValueType value_type, const std::string& value, + uint64_t* decoded_value) { + switch (value_type) { + case IntegerValueType::kInt64: + case IntegerValueType::kUint64: + memcpy(decoded_value, const_cast(value.c_str()), sizeof(uint64_t)); + return true; + case IntegerValueType::kInt32: + case IntegerValueType::kUint32: + memcpy(decoded_value, const_cast(value.c_str()), sizeof(uint32_t)); + return true; + case IntegerValueType::kInt16: + case IntegerValueType::kUint16: + memcpy(decoded_value, const_cast(value.c_str()), sizeof(uint16_t)); + return true; + case IntegerValueType::kInt8: + case IntegerValueType::kUint8: + memcpy(decoded_value, const_cast(value.c_str()), sizeof(uint8_t)); + return true; + default: + LOG(ERROR) << "not support IntegerValueType"; + return false; + } +} + +IntegerComparator::IntegerComparator() + : value_type_(IntegerValueType::kUnknown), integer_value_(0) {} + +IntegerComparator::IntegerComparator(IntegerValueType value_type, uint64_t value) + : value_type_(value_type), integer_value_(value) {} + +IntegerComparator::~IntegerComparator() {} + +ComparatorType IntegerComparator::Type() { return ComparatorType::kIntegerComparator; } + +int IntegerComparator::CompareWith(const std::string& value) { + uint64_t decoded_value = 0; + if (!DecodeInteger(value_type_, value, &decoded_value)) { + return 0; + } + switch (value_type_) { + case IntegerValueType::kInt64: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kUint64: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kInt32: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kUint32: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kInt16: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kUint16: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kInt8: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + case IntegerValueType::kUint8: + return Compare(static_cast(decoded_value), static_cast(integer_value_)); + default: + LOG(ERROR) << "not support IntegerValueType"; + return 0; + } +} + +bool IntegerComparator::SerializeTo(std::string* serialized_comparator) { + IntegerComparatorDesc comparator_desc; + FilterValueType filter_value_type = TransIntegerValueType(value_type_); + if (filter_value_type == FilterValueType::kUnknownValueType) { + return false; + } + comparator_desc.set_value_type(filter_value_type); + comparator_desc.set_integer_value(integer_value_); + return comparator_desc.SerializeToString(serialized_comparator); +} + +bool IntegerComparator::ParseFrom(const std::string& serialized_comparator) { + IntegerComparatorDesc comparator_desc; + bool ret = comparator_desc.ParseFromString(serialized_comparator); + if (!ret) { + LOG(ERROR) << "parse pb string failed"; + return false; + } + integer_value_ = comparator_desc.integer_value(); + value_type_ = TransFilterValueType(comparator_desc.value_type()); + if (value_type_ == IntegerValueType::kUnknown) { + return false; + } + return true; +} + +std::string DecimalComparator::EncodeDecimal(double value) { + std::string encoded_value; + encoded_value.resize(sizeof(double) + 1); + char* buf = const_cast(encoded_value.c_str()); + memcpy(buf, &value, sizeof(value)); + return encoded_value; +} + +double DecimalComparator::DecodeDecimal(const std::string& value) { + double decoded_value; + memcpy(&decoded_value, const_cast(value.c_str()), sizeof(decoded_value)); + return decoded_value; +} + +DecimalComparator::DecimalComparator() : decimal_value_(0.0) {} + +DecimalComparator::DecimalComparator(double value) : decimal_value_(value) {} + +DecimalComparator::~DecimalComparator() {} + +ComparatorType DecimalComparator::Type() { return ComparatorType::kDecimalComparator; } + +int DecimalComparator::CompareWith(const std::string& value) { + double db_value = DecodeDecimal(value); + double diff = db_value - decimal_value_; + const double EPSINON = 1e-10; + if (diff > -1 * EPSINON && diff < EPSINON) { + return 0; + } else if (diff > EPSINON) { + return 1; + } else { + return -1; + } +} + +bool DecimalComparator::SerializeTo(std::string* serialized_comparator) { + DecimalComparatorDesc comparator_desc; + comparator_desc.set_decimal_value(decimal_value_); + return comparator_desc.SerializeToString(serialized_comparator); +} + +bool DecimalComparator::ParseFrom(const std::string& serialized_comparator) { + DecimalComparatorDesc comparator_desc; + bool ret = comparator_desc.ParseFromString(serialized_comparator); + if (!ret) { + LOG(ERROR) << "parse pb string failed"; + return false; + } + decimal_value_ = comparator_desc.decimal_value(); + return true; +} + +BinaryComparator::BinaryComparator() : value_("") {} + +BinaryComparator::BinaryComparator(const std::string& value) : value_(value) {} + +BinaryComparator::~BinaryComparator() {} + +ComparatorType BinaryComparator::Type() { return ComparatorType::kBinaryComparator; } + +int BinaryComparator::CompareWith(const std::string& value) { + if (value > value_) { + return 1; + } else if (value < value_) { + return -1; + } else { + return 0; + } +} + +bool BinaryComparator::SerializeTo(std::string* serialized_comparator) { + BinaryComparatorDesc comparator_desc; + comparator_desc.set_value(value_); + return comparator_desc.SerializeToString(serialized_comparator); +} + +bool BinaryComparator::ParseFrom(const std::string& serialized_comparator) { + BinaryComparatorDesc comparator_desc; + bool ret = comparator_desc.ParseFromString(serialized_comparator); + if (!ret) { + LOG(ERROR) << "parse pb string failed"; + return false; + } + value_ = comparator_desc.value(); + return true; +} + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list.cc b/src/sdk/filter_list.cc new file mode 100644 index 000000000..e65147a61 --- /dev/null +++ b/src/sdk/filter_list.cc @@ -0,0 +1,154 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "tera.h" +#include "proto/filter.pb.h" +#include "sdk/filter_utils.h" +#include "sdk/filter_list_with_and.h" +#include "sdk/filter_list_with_or.h" +#include "glog/logging.h" + +namespace tera { +namespace filter { + +FilterList::FilterList(Operator op) : op_(kInvalidOp), filter_list_base_(NULL) { + if (op == kAnd) { + filter_list_base_ = new FilterListWithAND(); + op_ = op; + } else if (op == kOr) { + filter_list_base_ = new FilterListWithOR(); + op_ = op; + } +} + +FilterList::FilterList() {} + +FilterList::~FilterList() { + if (filter_list_base_) { + delete filter_list_base_; + } +} + +bool FilterList::AddFilter(const FilterPtr& filter) { + if (filter_list_base_) { + filter_list_base_->AddFilter(filter); + return true; + } else { + return false; + } +} + +FilterType FilterList::Type() { return kFilterList; } + +void FilterList::Reset() { + if (filter_list_base_) { + filter_list_base_->Reset(); + } +} + +Filter::ReturnCode FilterList::FilterCell(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + if (!filter_list_base_) { + return kUndefinedRC; + } + return filter_list_base_->FilterCell(column_family, column_qualifier, value); +} + +bool FilterList::FilterRow() { + if (!filter_list_base_) { + return false; + } + return filter_list_base_->FilterRow(); +} + +bool FilterList::SerializeTo(std::string* serialized_filter) { + FilterListDesc filter_list_desc; + FilterListDesc::Operator op = TransFilterListOp(op_); + if (op == FilterListDesc::kInvalidOp) { + return false; + } + filter_list_desc.set_op(op); + if (!filter_list_base_) { + return false; + } + const std::vector& filters = filter_list_base_->GetFilters(); + for (auto it = filters.begin(); it != filters.end(); ++it) { + FilterPtr filter = *it; + FilterDesc* filter_desc = filter_list_desc.add_filters(); + FilterDesc::FilterType filter_type = TransFilterType(filter->Type()); + if (filter_type == FilterDesc::kUnknownType) { + return false; + } + filter_desc->set_type(filter_type); + int ret = filter->SerializeTo(filter_desc->mutable_serialized_filter()); + if (!ret) { + return false; + } + } + return filter_list_desc.SerializeToString(serialized_filter); +} + +bool FilterList::ParseFrom(const std::string& serialized_filter) { + FilterListDesc filter_list_desc; + int ret = filter_list_desc.ParseFromString(serialized_filter); + if (!ret) { + LOG(ERROR) << "filter_list ParseFromString failed"; + return false; + } + op_ = TransFilterListDescOp(filter_list_desc.op()); + if (op_ == kAnd) { + filter_list_base_ = new FilterListWithAND(); + } else if (op_ == kOr) { + filter_list_base_ = new FilterListWithOR(); + } else { + LOG(ERROR) << "not support Operator"; + return false; + } + for (int i = 0; i < filter_list_desc.filters_size(); ++i) { + const FilterDesc& filter_desc = filter_list_desc.filters(i); + std::shared_ptr filter; + switch (filter_desc.type()) { + case FilterDesc::kFilterList: + filter = std::make_shared(); + break; + case FilterDesc::kValueFilter: + filter = std::make_shared(); + break; + default: + filter.reset(); + break; + } + if (filter) { + ret = filter->ParseFrom(filter_desc.serialized_filter()); + if (ret) { + filter_list_base_->AddFilter(filter); + } else { + LOG(ERROR) << "filter_list ParseFrom sub_filter " << i << " failed"; + return false; + } + } + } + return true; +} + +void FilterList::GetAllColumn(ColumnSet* filter_column_set) { + if (filter_list_base_) { + const std::vector& filters = filter_list_base_->GetFilters(); + for (auto it = filters.begin(); it != filters.end(); ++it) { + FilterPtr filter = *it; + filter->GetAllColumn(filter_column_set); + } + } +} + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_base.cc b/src/sdk/filter_list_base.cc new file mode 100644 index 000000000..499e28100 --- /dev/null +++ b/src/sdk/filter_list_base.cc @@ -0,0 +1,28 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "tera.h" +#include "proto/filter.pb.h" +#include "sdk/filter_list_base.h" +#include "gflags/gflags.h" +#include "glog/logging.h" + +namespace tera { +namespace filter { + +FilterListBase::~FilterListBase() {} + +bool FilterListBase::IsEmpty() { return filters_.empty(); } + +const std::vector& FilterListBase::GetFilters() { return filters_; } + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_base.h b/src/sdk/filter_list_base.h new file mode 100644 index 000000000..8816a07b2 --- /dev/null +++ b/src/sdk/filter_list_base.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include +#include "tera/filter.h" + +namespace tera { +namespace filter { + +class FilterListBase : public FilterBase { + public: + virtual ~FilterListBase(); + virtual void AddFilter(const FilterPtr& filter) = 0; + const std::vector& GetFilters(); + + protected: + bool IsEmpty(); + + protected: + std::vector filters_; +}; + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_with_and.cc b/src/sdk/filter_list_with_and.cc new file mode 100644 index 000000000..13b319034 --- /dev/null +++ b/src/sdk/filter_list_with_and.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "sdk/filter_list_with_and.h" +#include "gflags/gflags.h" +#include "glog/logging.h" + +namespace tera { +namespace filter { + +FilterListWithAND::FilterListWithAND() {} + +FilterListWithAND::~FilterListWithAND() {} + +void FilterListWithAND::AddFilter(const FilterPtr& filter) { filters_.push_back(filter); } + +void FilterListWithAND::Reset() { + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + FilterPtr filter = *it; + filter->Reset(); + } +} + +Filter::ReturnCode FilterListWithAND::FilterCell(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + if (IsEmpty()) { + return kIncludeCurCell; + } + ReturnCode rc = kIncludeCurCell; + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + FilterPtr filter = *it; + ReturnCode cur_rc; + cur_rc = filter->FilterCell(column_family, column_qualifier, value); + rc = MergeReturnCode(rc, cur_rc); + } + return rc; +} + +bool FilterListWithAND::FilterRow() { + if (IsEmpty()) { + return false; + } + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + FilterPtr filter = *it; + if (filter->FilterRow()) { + return true; + } + } + return false; +} + +Filter::ReturnCode FilterListWithAND::MergeReturnCode(ReturnCode rc, ReturnCode cur_rc) { + switch (cur_rc) { + case kIncludeCurCell: + return rc; + case kNotIncludeCurAndLeftCellOfRow: + return kNotIncludeCurAndLeftCellOfRow; + default: + LOG(ERROR) << "not support ReturnCode of curRC"; + return kNotIncludeCurAndLeftCellOfRow; + } +} + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_with_and.h b/src/sdk/filter_list_with_and.h new file mode 100644 index 000000000..2e82f07bf --- /dev/null +++ b/src/sdk/filter_list_with_and.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include "sdk/filter_list_base.h" + +namespace tera { +namespace filter { + +class FilterListWithAND : public FilterListBase { + public: + FilterListWithAND(); + virtual ~FilterListWithAND(); + void Reset(); + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value); + virtual bool FilterRow(); + virtual void AddFilter(const FilterPtr& filter); + + private: + ReturnCode MergeReturnCode(ReturnCode rc, ReturnCode localRC); +}; + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_with_or.cc b/src/sdk/filter_list_with_or.cc new file mode 100644 index 000000000..ff95a5574 --- /dev/null +++ b/src/sdk/filter_list_with_or.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#include +#include +#include "sdk/filter_list_with_or.h" +#include "gflags/gflags.h" +#include "glog/logging.h" + +namespace tera { +namespace filter { + +FilterListWithOR::FilterListWithOR() {} + +FilterListWithOR::~FilterListWithOR() {} + +void FilterListWithOR::AddFilter(const FilterPtr& filter) { + filters_.push_back(filter); + prev_filter_rc_list_.resize(filters_.size()); +} + +void FilterListWithOR::Reset() { + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + FilterPtr filter = *it; + filter->Reset(); + } + for (auto it = prev_filter_rc_list_.begin(); it != prev_filter_rc_list_.end(); ++it) { + *it = kUndefinedRC; + } +} + +bool FilterListWithOR::NeedFilter(ReturnCode prevRC) { + if (prevRC == kUndefinedRC) { + return true; + } + switch (prevRC) { + case kIncludeCurCell: + return true; + case kNotIncludeCurAndLeftCellOfRow: + return false; + default: + LOG(ERROR) << "illegal ReturnCode"; + return false; + } +} + +Filter::ReturnCode FilterListWithOR::FilterCell(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + if (IsEmpty()) { + return kIncludeCurCell; + } + ReturnCode rc = kIncludeCurCell; + std::vector::iterator prev_filter_rc_list_it = prev_filter_rc_list_.begin(); + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + ReturnCode prevRC = *prev_filter_rc_list_it; + if (!NeedFilter(prevRC)) { + ++prev_filter_rc_list_it; + continue; + } + FilterPtr filter = *it; + ReturnCode cur_rc; + cur_rc = filter->FilterCell(column_family, column_qualifier, value); + *prev_filter_rc_list_it = cur_rc; + ++prev_filter_rc_list_it; + rc = MergeReturnCode(rc, cur_rc); + } + return rc; +} + +bool FilterListWithOR::FilterRow() { + if (IsEmpty()) { + return false; + } + for (auto it = filters_.begin(); it != filters_.end(); ++it) { + FilterPtr filter = *it; + if (!filter->FilterRow()) { + return false; + } + } + return true; +} + +Filter::ReturnCode FilterListWithOR::MergeReturnCode(ReturnCode rc, ReturnCode cur_rc) { + switch (cur_rc) { + case kIncludeCurCell: + return kIncludeCurCell; + case kNotIncludeCurAndLeftCellOfRow: + if (rc == kIncludeCurCell || rc == kNotIncludeCurAndLeftCellOfRow) { + return rc; + } else { + LOG(ERROR) << "not support ReturnCode of rc"; + return kNotIncludeCurAndLeftCellOfRow; + } + default: + LOG(ERROR) << "not support ReturnCode of curRC"; + return kNotIncludeCurAndLeftCellOfRow; + } +} + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_list_with_or.h b/src/sdk/filter_list_with_or.h new file mode 100644 index 000000000..9fee383aa --- /dev/null +++ b/src/sdk/filter_list_with_or.h @@ -0,0 +1,38 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// The Designs of Filter and related codes are inspired by hbase which is licensed under +// Apache 2.0 License (found in the LICENSE.Apache file in the root directory). Please refer to +// https://hbase.apache.org/2.0/apidocs/org/apache/hadoop/hbase/filter/Filter.html +// to see more detailed design of hbase filter. + +#pragma once + +#include +#include +#include "sdk/filter_list_base.h" + +namespace tera { +namespace filter { + +class FilterListWithOR : public FilterListBase { + public: + FilterListWithOR(); + virtual ~FilterListWithOR(); + void Reset(); + virtual ReturnCode FilterCell(const std::string& column_family, + const std::string& column_qualifier, const std::string& value); + virtual bool FilterRow(); + virtual void AddFilter(const FilterPtr& filter); + + private: + ReturnCode MergeReturnCode(ReturnCode rc, ReturnCode localRC); + bool NeedFilter(ReturnCode prevRC); + + private: + std::vector prev_filter_rc_list_; +}; + +} // namesapce filter +} // namesapce tera diff --git a/src/sdk/filter_utils.cc b/src/sdk/filter_utils.cc index 2b6cfd6fd..26767008f 100644 --- a/src/sdk/filter_utils.cc +++ b/src/sdk/filter_utils.cc @@ -18,36 +18,184 @@ namespace tera { bool CheckFilterString(const string& filter_str) { - // check filter string syntax - return true; + // check filter string syntax + return true; } string RemoveInvisibleChar(const string& schema) { - string ret; - for (size_t i = 0; i < schema.size(); ++i) { - if (schema[i] != '\n' && schema[i] != '\t' && schema[i] != ' ') { - ret.append(1, schema[i]); - } + string ret; + for (size_t i = 0; i < schema.size(); ++i) { + if (schema[i] != '\n' && schema[i] != '\t' && schema[i] != ' ') { + ret.append(1, schema[i]); } - return ret; + } + return ret; } -bool DefaultValueConverter(const string& in, const string& type, string* out) { - if (out == NULL) { - LOG(ERROR) << "null ptr: out"; - return false; - } - if (type == "int64") { - int64_t value_int64; - if (!StringToNumber(in.c_str(), &value_int64)) { - LOG(ERROR) << "invalid Integer number Got: " << in; - return false; - } - out->assign((char*)&value_int64, sizeof(int64_t)); - } else { - LOG(ERROR) << "not supported type: " << type; - return false; - } - return true; +bool TransBinCompOp(BinCompOp bin_comp_op, filter::CompareOperator* op) { + switch (bin_comp_op) { + case EQ: + *op = filter::CompareOperator::kEqual; + return true; + case NE: + *op = filter::CompareOperator::kNotEqual; + return true; + case LT: + *op = filter::CompareOperator::kLess; + return true; + case LE: + *op = filter::CompareOperator::kLessOrEqual; + return true; + case GT: + *op = filter::CompareOperator::kGreater; + return true; + case GE: + *op = filter::CompareOperator::kGreaterOrEqual; + return true; + default: + LOG(ERROR) << "not support BinCompOp"; + return false; + } +} + +namespace filter { +CompareType TransCompareOperator(CompareOperator op) { + switch (op) { + case CompareOperator::kLess: + return kLess; + case CompareOperator::kLessOrEqual: + return kLessOrEqual; + case CompareOperator::kEqual: + return kEqual; + case CompareOperator::kNotEqual: + return kNotEqual; + case CompareOperator::kGreaterOrEqual: + return kGreaterOrEqual; + case CompareOperator::kGreater: + return kGreater; + default: + LOG(ERROR) << "not support CompareOperator"; + return kNoOp; + } +} + +CompareOperator TransCompareType(CompareType op) { + switch (op) { + case kLess: + return CompareOperator::kLess; + case kLessOrEqual: + return CompareOperator::kLessOrEqual; + case kEqual: + return CompareOperator::kEqual; + case kNotEqual: + return CompareOperator::kNotEqual; + case kGreaterOrEqual: + return CompareOperator::kGreaterOrEqual; + case kGreater: + return CompareOperator::kGreater; + default: + LOG(ERROR) << "not support CompareType"; + return CompareOperator::kNoOp; + } +} + +FilterDesc::FilterType TransFilterType(FilterType type) { + switch (type) { + case kFilterList: + return FilterDesc::kFilterList; + case kValueFilter: + return FilterDesc::kValueFilter; + default: + LOG(ERROR) << "not support FilterType"; + return FilterDesc::kUnknownType; + } +} + +ComparatorDesc::ComparatorType TransComparatorType(ComparatorType type) { + switch (type) { + case ComparatorType::kIntegerComparator: + return ComparatorDesc::kIntegerComparator; + case ComparatorType::kDecimalComparator: + return ComparatorDesc::kDecimalComparator; + case ComparatorType::kBinaryComparator: + return ComparatorDesc::kBinaryComparator; + default: + LOG(ERROR) << "not support ComparatorType"; + return ComparatorDesc::kUnknownComparator; + } } -} // namespace tera + +FilterValueType TransIntegerValueType(IntegerValueType type) { + switch (type) { + case IntegerValueType::kInt64: + return kINT64; + case IntegerValueType::kUint64: + return kUINT64; + case IntegerValueType::kInt32: + return kINT32; + case IntegerValueType::kUint32: + return kUINT32; + case IntegerValueType::kInt16: + return kINT16; + case IntegerValueType::kUint16: + return kUINT16; + case IntegerValueType::kInt8: + return kINT8; + case IntegerValueType::kUint8: + return kUINT8; + default: + LOG(ERROR) << "not support IntegerValueType"; + return kUnknownValueType; + } +} + +IntegerValueType TransFilterValueType(FilterValueType type) { + switch (type) { + case kINT64: + return IntegerValueType::kInt64; + case kUINT64: + return IntegerValueType::kUint64; + case kINT32: + return IntegerValueType::kInt32; + case kUINT32: + return IntegerValueType::kUint32; + case kINT16: + return IntegerValueType::kInt16; + case kUINT16: + return IntegerValueType::kUint16; + case kINT8: + return IntegerValueType::kInt8; + case kUINT8: + return IntegerValueType::kUint8; + default: + LOG(ERROR) << "not support FilterValueType"; + return IntegerValueType::kUnknown; + } +} + +FilterListDesc::Operator TransFilterListOp(FilterList::Operator op) { + switch (op) { + case FilterList::kAnd: + return FilterListDesc::kAnd; + case FilterList::kOr: + return FilterListDesc::kOr; + default: + LOG(ERROR) << "not support Operator in FilterList"; + return FilterListDesc::kInvalidOp; + } +} + +FilterList::Operator TransFilterListDescOp(FilterListDesc::Operator op) { + switch (op) { + case FilterListDesc::kAnd: + return FilterList::kAnd; + case FilterListDesc::kOr: + return FilterList::kOr; + default: + LOG(ERROR) << "not support Operator in FilterListDesc"; + return FilterList::kInvalidOp; + } +} + +} // namespace filter +} // namespace tera diff --git a/src/sdk/filter_utils.h b/src/sdk/filter_utils.h index 284e1e56d..cd8c9b925 100644 --- a/src/sdk/filter_utils.h +++ b/src/sdk/filter_utils.h @@ -2,19 +2,35 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_FILTER_UTILS_H_ -#define TERA_SDK_FILTER_UTILS_H_ +#ifndef TERA_SDK_FILTER_UTILS_H_ +#define TERA_SDK_FILTER_UTILS_H_ #include "proto/tabletnode_rpc.pb.h" +#include "proto/filter.pb.h" +#include "tera.h" using std::string; namespace tera { bool CheckFilterString(const string& filter_str); - string RemoveInvisibleChar(const string& schema); -bool DefaultValueConverter(const string& in, const string& type, string* out); -} // namespace tera -#endif // TERA_SDK_FILTER_UTILS_H_ +bool TransBinCompOp(BinCompOp bin_comp_op, filter::CompareOperator* op); + +namespace filter { +CompareType TransCompareOperator(CompareOperator op); +CompareOperator TransCompareType(CompareType op); + +FilterDesc::FilterType TransFilterType(FilterType type); +ComparatorDesc::ComparatorType TransComparatorType(ComparatorType type); + +FilterValueType TransIntegerValueType(IntegerValueType type); +IntegerValueType TransFilterValueType(FilterValueType type); + +FilterListDesc::Operator TransFilterListOp(FilterList::Operator op); +FilterList::Operator TransFilterListDescOp(FilterListDesc::Operator op); + +} // namespace filter +} // namespace tera +#endif // TERA_SDK_FILTER_UTILS_H_ diff --git a/src/sdk/global_txn.cc b/src/sdk/global_txn.cc index 80461035e..239c223a9 100644 --- a/src/sdk/global_txn.cc +++ b/src/sdk/global_txn.cc @@ -5,11 +5,10 @@ // Author: baorenyi@baidu.com #include -#include +#include #include "common/metric/metric_counter.h" #include "common/this_thread.h" -#include "common/thread.h" #include "proto/table_meta.pb.h" #include "proto/tabletnode_rpc.pb.h" #include "sdk/global_txn.h" @@ -44,675 +43,664 @@ extern tera::MetricCounter gtxn_acks_fail_cnt; extern tera::MetricCounter gtxn_notifies_cnt; extern tera::MetricCounter gtxn_notifies_fail_cnt; -Transaction* GlobalTxn::NewGlobalTxn(std::shared_ptr client_impl, +Transaction* GlobalTxn::NewGlobalTxn(std::shared_ptr client_impl, common::ThreadPool* thread_pool, sdk::ClusterFinder* tso_cluster) { - if (client_impl && thread_pool != NULL) { - std::shared_ptr global_txn_shared_ptr( - new GlobalTxn(client_impl, thread_pool, tso_cluster)); - return new tera::TransactionWrapper(global_txn_shared_ptr); - } - LOG(ERROR) << "client_impl or tso_cluster is NULL"; - return NULL; + if (client_impl && thread_pool != NULL) { + std::shared_ptr global_txn_shared_ptr( + new GlobalTxn(client_impl, thread_pool, tso_cluster)); + return new tera::TransactionWrapper(global_txn_shared_ptr); + } + LOG(ERROR) << "client_impl or tso_cluster is NULL"; + return NULL; } -GlobalTxn::GlobalTxn(std::shared_ptr client_impl, - common::ThreadPool* thread_pool, - sdk::ClusterFinder* tso_cluster) : - gtxn_internal_(new GlobalTxnInternal(client_impl)), - status_returned_(false), - primary_write_(NULL), - writes_size_(0), - commit_ts_(0), - isolation_level_(IsolationLevel::kSnapshot), - serialized_primary_(""), - finish_(false), - finish_cond_(&finish_mutex_), - has_commited_(false), - user_commit_callback_(NULL), - user_commit_context_(NULL), - thread_pool_(thread_pool), - tso_cluster_(tso_cluster), - commit_timeout_ms_(FLAGS_tera_gtxn_commit_timeout_ms), - ttl_timestamp_ms_(FLAGS_tera_gtxn_timeout_ms + get_millis()), - all_task_pushed_(false) { - if (FLAGS_tera_gtxn_test_opened) { - VLOG(12) << "conf_file = " << FLAGS_tera_gtxn_test_flagfile; - start_ts_ = gtxn_internal_->TEST_Init(FLAGS_tera_gtxn_test_flagfile); - } else if (!FLAGS_tera_sdk_tso_client_enabled) { - start_ts_ = get_micros(); - } else { - timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); - start_ts_ = tsoc.GetTimestamp(1); - if (start_ts_ == 0) { - status_.SetFailed(ErrorCode::kGTxnTimestampLost); - status_returned_ = true; - } +GlobalTxn::GlobalTxn(std::shared_ptr client_impl, common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster) + : gtxn_internal_(new GlobalTxnInternal(client_impl)), + status_returned_(false), + primary_write_(NULL), + writes_size_(0), + commit_ts_(0), + isolation_level_(IsolationLevel::kSnapshot), + serialized_primary_(""), + finish_(false), + finish_cond_(&finish_mutex_), + has_commited_(false), + user_commit_callback_(NULL), + user_commit_context_(NULL), + thread_pool_(thread_pool), + tso_cluster_(tso_cluster), + commit_timeout_ms_(FLAGS_tera_gtxn_commit_timeout_ms), + ttl_timestamp_ms_(FLAGS_tera_gtxn_timeout_ms + get_millis()), + all_task_pushed_(false) { + if (FLAGS_tera_gtxn_test_opened) { + VLOG(12) << "conf_file = " << FLAGS_tera_gtxn_test_flagfile; + start_ts_ = gtxn_internal_->TEST_Init(FLAGS_tera_gtxn_test_flagfile); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + start_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + start_ts_ = tsoc.GetTimestamp(1); + if (start_ts_ == 0) { + status_.SetFailed(ErrorCode::kGTxnTimestampLost); + status_returned_ = true; } - prewrite_start_ts_ = start_ts_; - gtxn_internal_->SetStartTimestamp(start_ts_); + } + prewrite_start_ts_ = start_ts_; + gtxn_internal_->SetStartTimestamp(start_ts_); } -GlobalTxn::~GlobalTxn() { -} +GlobalTxn::~GlobalTxn() {} void GlobalTxn::SetIsolation(const IsolationLevel& isolation_level) { - assert(has_commited_ == false); - isolation_level_ = isolation_level; + assert(has_commited_ == false); + isolation_level_ = isolation_level; } -void GlobalTxn::SetTimeout(int64_t timeout_ms) { - commit_timeout_ms_ = timeout_ms; -} +void GlobalTxn::SetTimeout(int64_t timeout_ms) { commit_timeout_ms_ = timeout_ms; } -void GlobalTxn::SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, - ErrorCode* status) { - gtxn_read_cnt.Inc(); - gtxn_internal_->PerfReadDelay(0, get_micros()); // finish_time - VLOG(12) << "[gtxn][get][" << start_ts_ << "][status] :" << status->ToString(); - reader_impl->SetError(status->GetType(), status->GetReason()); - thread_pool_->AddTask(std::bind(&RowReaderImpl::RunCallback, reader_impl)); +void GlobalTxn::SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, ErrorCode* status) { + gtxn_read_cnt.Inc(); + gtxn_internal_->PerfReadDelay(0, get_micros()); // finish_time + VLOG(12) << "[gtxn][get][" << start_ts_ << "][status] :" << status->ToString(); + reader_impl->SetError(status->GetType(), status->GetReason()); + thread_pool_->AddTask(std::bind(&RowReaderImpl::RunCallback, reader_impl)); } ErrorCode GlobalTxn::Get(RowReader* row_reader) { - assert(row_reader != NULL); - gtxn_internal_->PerfReadDelay(get_micros(), 0); // begin_time - gtxn_internal_->TEST_GetSleep(); - - RowReaderImpl* reader_impl = static_cast(row_reader); - int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); - if (odd_time_ms < reader_impl->TimeOut()) { - reader_impl->SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); - } - reader_impl->SetTransaction(this); - - // Pre Check can read - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - if (has_commited_.load()) { - std::string reason = "get failed, txn has commited @ [" + - std::to_string(start_ts_) + "," + std::to_string(commit_ts_); - LOG(ERROR) << "[gtxn][get][" << start_ts_ <<"] " << reason; - status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); - SetReaderStatusAndRunCallback(reader_impl, &status); - return status; - } - - Table* table = reader_impl->GetTable(); - const std::string& row_key = row_reader->RowKey(); - // Check UserReader and Build cells - if (!gtxn_internal_->VerifyUserRowReader(row_reader)) { - status = reader_impl->GetError(); - SetReaderStatusAndRunCallback(reader_impl, &status); - return status; - } - - std::vector cells; - for (auto it : row_reader->GetReadColumnList()) { - const std::string& column_family = it.first; - const std::set& qualifier_set = it.second; + assert(row_reader != NULL); + gtxn_internal_->PerfReadDelay(get_micros(), 0); // begin_time + gtxn_internal_->TEST_GetSleep(); + + RowReaderImpl* reader_impl = static_cast(row_reader); + int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); + if (odd_time_ms < reader_impl->TimeOut()) { + reader_impl->SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); + } + reader_impl->SetTransaction(this); + + // Pre Check can read + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + if (has_commited_.load()) { + std::string reason = "get failed, txn has commited @ [" + std::to_string(start_ts_) + "," + + std::to_string(commit_ts_); + LOG(ERROR) << "[gtxn][get][" << start_ts_ << "] " << reason; + status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); + SetReaderStatusAndRunCallback(reader_impl, &status); + return status; + } + + Table* table = reader_impl->GetTable(); + const std::string& row_key = row_reader->RowKey(); + // Check UserReader and Build cells + if (!gtxn_internal_->VerifyUserRowReader(row_reader)) { + status = reader_impl->GetError(); + SetReaderStatusAndRunCallback(reader_impl, &status); + return status; + } - for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { - const std::string& qualifier = *q_it; - cells.push_back(new Cell(table, row_key, column_family, qualifier)); - } - } - int expected_cells_cnt = cells.size(); + std::vector cells; + for (auto it : row_reader->GetReadColumnList()) { + const std::string& column_family = it.first; + const std::set& qualifier_set = it.second; - InternalReaderContext* ctx = new InternalReaderContext(expected_cells_cnt, reader_impl, this); - bool is_async = reader_impl->IsAsync(); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; // cell* -> try_time, default = 0 - AsyncGetCell(cell, reader_impl, ctx); - } - - // sync wait and set status - if(!is_async) { - reader_impl->Wait(); - status = reader_impl->GetError(); - return status; + for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { + const std::string& qualifier = *q_it; + cells.push_back(new Cell(table, row_key, column_family, qualifier)); } + } + int expected_cells_cnt = cells.size(); + + InternalReaderContext* ctx = new InternalReaderContext(expected_cells_cnt, reader_impl, this); + bool is_async = reader_impl->IsAsync(); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; // cell* -> try_time, default = 0 + AsyncGetCell(cell, reader_impl, ctx); + } + + // sync wait and set status + if (!is_async) { + reader_impl->Wait(); + status = reader_impl->GetError(); return status; + } + return status; } -void GlobalTxn::AsyncGetCell(Cell* cell, - RowReaderImpl* user_reader_impl, +void GlobalTxn::AsyncGetCell(Cell* cell, RowReaderImpl* user_reader_impl, InternalReaderContext* ctx) { - VLOG(12) << "[gtxn][get][" << start_ts_ << "] " - << gtxn_internal_->DebugString(*cell, "TryGet times(" + std::to_string(ctx->cell_map[cell]) + ")"); - - Table* table = cell->Table(); - RowReader* reader = table->NewRowReader(cell->RowKey()); - reader->AddColumn(cell->ColFamily(), cell->LockName()); - reader->AddColumn(cell->ColFamily(), cell->WriteName()); - reader->AddColumn(cell->ColFamily(), cell->DataName()); - reader->SetTimeRange(0, kMaxTimeStamp); - reader->SetMaxVersions(UINT32_MAX); - reader->SetCallBack([] (RowReader* r) { - CellReaderContext* ctx = (CellReaderContext*)r->GetContext(); - GlobalTxn* gtxn = static_cast(ctx->internal_reader_ctx->gtxn); - gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoGetCellReaderCallback, - gtxn, static_cast(r))); - }); - reader->SetContext(new CellReaderContext(cell, ctx)); - table->Get(reader); + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString( + *cell, "TryGet times(" + std::to_string(ctx->cell_map[cell]) + ")"); + + Table* table = cell->Table(); + RowReader* reader = table->NewRowReader(cell->RowKey()); + reader->AddColumn(cell->ColFamily(), cell->LockName()); + reader->AddColumn(cell->ColFamily(), cell->WriteName()); + reader->AddColumn(cell->ColFamily(), cell->DataName()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + reader->SetCallBack([](RowReader* r) { + CellReaderContext* ctx = (CellReaderContext*)r->GetContext(); + GlobalTxn* gtxn = static_cast(ctx->internal_reader_ctx->gtxn); + gtxn->thread_pool_->AddTask( + std::bind(&GlobalTxn::DoGetCellReaderCallback, gtxn, static_cast(r))); + }); + reader->SetContext(new CellReaderContext(cell, ctx)); + table->Get(reader); } void GlobalTxn::DoGetCellReaderCallback(RowReader* reader) { - ErrorCode status = reader->GetError(); - if (status.GetType() != ErrorCode::kOK) { - MergeCellToRow(reader, status); - return; - } - - RowReader::TRow row; - reader->ToMap(&row); - CellReaderContext* ctx = (CellReaderContext*)reader->GetContext(); - Cell* cell = ctx->cell; - if (row.find(cell->ColFamily()) == row.end()) { - status.SetFailed(ErrorCode::kNotFound, "columnfamily not found"); - MergeCellToRow(reader, status); - return; + ErrorCode status = reader->GetError(); + if (status.GetType() != ErrorCode::kOK) { + MergeCellToRow(reader, status); + return; + } + + RowReader::TRow row; + reader->ToMap(&row); + CellReaderContext* ctx = (CellReaderContext*)reader->GetContext(); + Cell* cell = ctx->cell; + if (row.find(cell->ColFamily()) == row.end()) { + status.SetFailed(ErrorCode::kNotFound, "columnfamily not found"); + MergeCellToRow(reader, status); + return; + } + // local check lock + if (gtxn_internal_->IsLockedByOthers(row, *cell)) { + // sync operate + status.SetFailed(ErrorCode::kOK); + InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; + bool do_clean = false; + // check clean lock before read cell next time, + // when read times >= limit - 1 do clean lock opreations + if (internal_reader_ctx->cell_map[cell] >= FLAGS_tera_gtxn_get_waited_times_limit - 1) { + do_clean = true; } - // local check lock - if (gtxn_internal_->IsLockedByOthers(row, *cell)) { - // sync operate - status.SetFailed(ErrorCode::kOK); - InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; - bool do_clean = false; - // check clean lock before read cell next time, - // when read times >= limit - 1 do clean lock opreations - if (internal_reader_ctx->cell_map[cell] >= FLAGS_tera_gtxn_get_waited_times_limit - 1) { - do_clean = true; - } - BackoffAndMaybeCleanupLock(row, *cell, do_clean, &status); - if (status.GetType() == ErrorCode::kOK) { - // call Next time to async GetCell - // don't merge until next time ok or failed - ++ internal_reader_ctx->cell_map[cell]; - gtxn_read_retry_cnt.Inc(); - AsyncGetCell(cell, - static_cast(internal_reader_ctx->user_reader), - internal_reader_ctx); - delete reader; - delete ctx; - return; - } - } else if (!FindValueFromResultRow(row, cell)) { - status.SetFailed(ErrorCode::kNotFound, "build data col from write col failed"); + BackoffAndMaybeCleanupLock(row, *cell, do_clean, &status); + if (status.GetType() == ErrorCode::kOK) { + // call Next time to async GetCell + // don't merge until next time ok or failed + ++internal_reader_ctx->cell_map[cell]; + gtxn_read_retry_cnt.Inc(); + AsyncGetCell(cell, static_cast(internal_reader_ctx->user_reader), + internal_reader_ctx); + delete reader; + delete ctx; + return; } - MergeCellToRow(reader, status); + } else if (!FindValueFromResultRow(row, cell)) { + status.SetFailed(ErrorCode::kNotFound, "build data col from write col failed"); + } + MergeCellToRow(reader, status); } -void GlobalTxn::MergeCellToRow(RowReader* internal_reader, - const ErrorCode& status) { - CellReaderContext* ctx = (CellReaderContext*)internal_reader->GetContext(); - ctx->status = status; - VLOG(12) << "[gtxn][get][" << start_ts_ << "] " - << gtxn_internal_->DebugString(*(ctx->cell), status.ToString()); - GetCellCallback(ctx); - // next time internal read will new next RowReader - delete internal_reader; - delete ctx; +void GlobalTxn::MergeCellToRow(RowReader* internal_reader, const ErrorCode& status) { + CellReaderContext* ctx = (CellReaderContext*)internal_reader->GetContext(); + ctx->status = status; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(*(ctx->cell), status.ToString()); + GetCellCallback(ctx); + // next time internal read will new next RowReader + delete internal_reader; + delete ctx; } void GlobalTxn::GetCellCallback(CellReaderContext* ctx) { - InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; - Cell* cell = ctx->cell; - bool last_cell = false; - { - MutexLock lock(&mu_); - ++internal_reader_ctx->active_cell_cnt; - if (internal_reader_ctx->fail_cell_cnt == 0 && ctx->status.GetType() == ErrorCode::kOK) { - KeyValuePair* kv = internal_reader_ctx->results.add_key_values(); - kv->set_key(cell->RowKey()); - kv->set_column_family(cell->ColFamily()); - // return to user qualifier not qualifier.data - kv->set_qualifier(cell->Qualifier()); - kv->set_timestamp(cell->Timestamp()); - kv->set_value(cell->Value()); - } else if (ctx->status.GetType() != ErrorCode::kNotFound) { - ++internal_reader_ctx->fail_cell_cnt; - internal_reader_ctx->results.clear_key_values(); - internal_reader_ctx->last_err = ctx->status; - } else { - ++internal_reader_ctx->not_found_cnt; - } - last_cell = (internal_reader_ctx->active_cell_cnt == internal_reader_ctx->expected_cell_cnt); + InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; + Cell* cell = ctx->cell; + bool last_cell = false; + { + MutexLock lock(&mu_); + ++internal_reader_ctx->active_cell_cnt; + if (internal_reader_ctx->fail_cell_cnt == 0 && ctx->status.GetType() == ErrorCode::kOK) { + KeyValuePair* kv = internal_reader_ctx->results.add_key_values(); + kv->set_key(cell->RowKey()); + kv->set_column_family(cell->ColFamily()); + // return to user qualifier not qualifier.data + kv->set_qualifier(cell->Qualifier()); + kv->set_timestamp(cell->Timestamp()); + kv->set_value(cell->Value()); + } else if (ctx->status.GetType() != ErrorCode::kNotFound) { + ++internal_reader_ctx->fail_cell_cnt; + internal_reader_ctx->results.clear_key_values(); + internal_reader_ctx->last_err = ctx->status; + } else { + ++internal_reader_ctx->not_found_cnt; } - if (last_cell) { - ErrorCode last_err = internal_reader_ctx->last_err; - RowReaderImpl* reader_impl = static_cast(internal_reader_ctx->user_reader); - if (internal_reader_ctx->fail_cell_cnt > 0) { - gtxn_read_fail_cnt.Inc(); - } else if (internal_reader_ctx->not_found_cnt == internal_reader_ctx->expected_cell_cnt) { - // all cell not found - last_err.SetFailed(ErrorCode::kNotFound); - } else { - reader_impl->SetResult(internal_reader_ctx->results); - last_err.SetFailed(ErrorCode::kOK); - } - delete internal_reader_ctx; - SetReaderStatusAndRunCallback(reader_impl, &last_err); + last_cell = (internal_reader_ctx->active_cell_cnt == internal_reader_ctx->expected_cell_cnt); + } + if (last_cell) { + ErrorCode last_err = internal_reader_ctx->last_err; + RowReaderImpl* reader_impl = static_cast(internal_reader_ctx->user_reader); + if (internal_reader_ctx->fail_cell_cnt > 0) { + gtxn_read_fail_cnt.Inc(); + } else if (internal_reader_ctx->not_found_cnt == internal_reader_ctx->expected_cell_cnt) { + // all cell not found + last_err.SetFailed(ErrorCode::kNotFound); + } else { + reader_impl->SetResult(internal_reader_ctx->results); + last_err.SetFailed(ErrorCode::kOK); } + delete internal_reader_ctx; + SetReaderStatusAndRunCallback(reader_impl, &last_err); + } } bool GlobalTxn::FindValueFromResultRow(RowReader::TRow& result_row, Cell* target_cell) { - - auto write_col_it = result_row[target_cell->ColFamily()].find(target_cell->WriteName()); - auto data_col_it = result_row[target_cell->ColFamily()].find(target_cell->DataName()); - - // check write col and data col exsit - if (write_col_it == result_row[target_cell->ColFamily()].end() - || data_col_it == result_row[target_cell->ColFamily()].end()) { - return false; + auto write_col_it = result_row[target_cell->ColFamily()].find(target_cell->WriteName()); + auto data_col_it = result_row[target_cell->ColFamily()].find(target_cell->DataName()); + + // check write col and data col exsit + if (write_col_it == result_row[target_cell->ColFamily()].end() || + data_col_it == result_row[target_cell->ColFamily()].end()) { + return false; + } + auto write_col = result_row[target_cell->ColFamily()][target_cell->WriteName()]; + auto data_col = result_row[target_cell->ColFamily()][target_cell->DataName()]; + + for (auto k1 = write_col.rbegin(); k1 != write_col.rend(); ++k1) { + int64_t write_ts = k1->first; + std::string write_value = k1->second; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] found write col, ts=" << write_ts + << ", internal val = " << write_value; + int write_type; + int64_t data_ts; + // skip new version value or skip error write format version + if (write_ts > start_ts_ || !DecodeWriteValue(write_value, &write_type, &data_ts)) { + continue; } - auto write_col = result_row[target_cell->ColFamily()][target_cell->WriteName()]; - auto data_col = result_row[target_cell->ColFamily()][target_cell->DataName()]; - - for (auto k1 = write_col.rbegin(); k1 != write_col.rend(); ++k1) { - int64_t write_ts = k1->first; - std::string write_value = k1->second; - VLOG(12) << "[gtxn][get][" << start_ts_ << "] found write col, ts=" - << write_ts << ", internal val = " << write_value; - int write_type; - int64_t data_ts; - // skip new version value or skip error write format version - if (write_ts > start_ts_ || !DecodeWriteValue(write_value, &write_type, &data_ts)) { - continue; - } - VLOG(12) << "[gtxn][get][" << start_ts_ << "] decode write col, ts=" - << write_ts << ", type=" << write_type << ", value=" << data_ts; - // get data col , ts == data_ts - for (auto k2 = data_col.rbegin(); k2 != data_col.rend(); ++k2) { - VLOG(12) << "[gtxn][get][" << start_ts_ << "] found data col, ts=" - << k2->first << ", internal val = " << k2->second; - if (k2->first == data_ts && write_type == RowMutation::kPut) { - target_cell->SetTimestamp(data_ts); - target_cell->SetValue(k2->second); - return true; - } else if (k2->first < data_ts) { - VLOG(12) << "[gtxn][get][" << start_ts_ - << "] data cell version not found, v=" << k2->first; - break; - } - } - VLOG(12) << "[gtxn][get][" << start_ts_ << "] check data col failed, no data"; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] decode write col, ts=" << write_ts + << ", type=" << write_type << ", value=" << data_ts; + // get data col , ts == data_ts + for (auto k2 = data_col.rbegin(); k2 != data_col.rend(); ++k2) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] found data col, ts=" << k2->first + << ", internal val = " << k2->second; + if (k2->first == data_ts && write_type == RowMutation::kPut) { + target_cell->SetTimestamp(data_ts); + target_cell->SetValue(k2->second); + return true; + } else if (k2->first < data_ts) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] data cell version not found, v=" << k2->first; break; + } } - VLOG(12) << "[gtxn][get][" << start_ts_ - << "] write col versions count" << write_col.size(); - return false; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] check data col failed, no data"; + break; + } + VLOG(12) << "[gtxn][get][" << start_ts_ << "] write col versions count" << write_col.size(); + return false; } void GlobalTxn::BackoffAndMaybeCleanupLock(RowReader::TRow& row, const Cell& cell, const bool try_clean, ErrorCode* status) { - VLOG(12) << gtxn_internal_->DebugString(cell, "[gtxn][get][" + - std::to_string(start_ts_) + " backoff or cleanup lock"); - // get lock ts - int64_t lock_ts = -1; - int lock_type = -1; - tera::PrimaryInfo primary_info; - for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); - k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { - if (k->first < start_ts_) { - lock_ts = k->first; - VLOG(12) << "lock_ts=" << lock_ts << ", primary_str=" << k->second; - if (!DecodeLockValue(k->second, &lock_type, &primary_info)) { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, "can't found primary"); - return; - } - break; - } - } - // get primary lock - const std::string& process = "[gtxn][get][" + std::to_string(start_ts_) - + "][check locked and writed]"; - bool ret = gtxn_internal_->PrimaryIsLocked(primary_info, lock_ts, status); - if (status->GetType() != ErrorCode::kOK && status->GetType() != ErrorCode::kNotFound) { - LOG(ERROR) << gtxn_internal_->DebugString(cell, process + " failed," + status->ToString()); + VLOG(12) << gtxn_internal_->DebugString( + cell, "[gtxn][get][" + std::to_string(start_ts_) + " backoff or cleanup lock"); + // get lock ts + int64_t lock_ts = -1; + int lock_type = -1; + tera::PrimaryInfo primary_info; + for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); + k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { + if (k->first < start_ts_) { + lock_ts = k->first; + VLOG(12) << "lock_ts=" << lock_ts << ", primary_str=" << k->second; + if (!DecodeLockValue(k->second, &lock_type, &primary_info)) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "can't found primary"); return; - } else if (ret) { - // NotFound means : other txn on prewrite process - // and this cell locked but primary unlocked(failed) - VLOG(12) << gtxn_internal_->DebugString(cell, process + " succeed"); - // primary at prewrite do (1) clean or (2) wait + } + break; + } + } + // get primary lock + const std::string& process = + "[gtxn][get][" + std::to_string(start_ts_) + "][check locked and writed]"; + bool ret = gtxn_internal_->PrimaryIsLocked(primary_info, lock_ts, status); + lock_ts = lock_ts == -1 ? start_ts_ : lock_ts; + if (status->GetType() != ErrorCode::kOK && status->GetType() != ErrorCode::kNotFound) { + LOG(ERROR) << gtxn_internal_->DebugString(cell, process + " failed," + status->ToString()); + return; + } else if (ret) { + // NotFound means : other txn on prewrite process + // and this cell locked but primary unlocked(failed) + VLOG(12) << gtxn_internal_->DebugString(cell, process + " succeed"); + // primary at prewrite do (1) clean or (2) wait + if (try_clean) { + CleanLock(cell, primary_info, status, lock_ts); + } else if (gtxn_internal_->SuspectLive(primary_info)) { + // TODO add a better sleep strategy + ThisThread::Sleep(100); + } else { + CleanLock(cell, primary_info, status, lock_ts); + } + } else { + if (!gtxn_internal_->IsPrimary(cell, primary_info)) { + VLOG(12) << gtxn_internal_->DebugString(cell, process + ", will do rollforward"); + // primary maybe at commited do roll_forward + RollForward(cell, primary_info, lock_type, status); + if (status->GetType() == ErrorCode::kGTxnPrimaryLost) { + VLOG(12) << gtxn_internal_->DebugString(cell, + process + ", rollforward failed, try clean lock"); + // primary prewrite failed + status->SetFailed(ErrorCode::kOK); if (try_clean) { - CleanLock(cell, primary_info, status); - } else if (gtxn_internal_->SuspectLive(primary_info)) { - // TODO add a better sleep strategy - ThisThread::Sleep(100); + CleanLock(cell, primary_info, status, lock_ts); + } else if (gtxn_internal_->SuspectLive(primary_info)) { + ThisThread::Sleep(100); } else { - CleanLock(cell, primary_info, status); + CleanLock(cell, primary_info, status, lock_ts); } + } } else { - if (!gtxn_internal_->IsPrimary(cell, primary_info)) { - VLOG(12) << gtxn_internal_->DebugString(cell, process + ", will do rollforward"); - // primary maybe at commited do roll_forward - RollForward(cell, primary_info, lock_type, status); - if (status->GetType() == ErrorCode::kGTxnPrimaryLost) { - VLOG(12) << gtxn_internal_->DebugString(cell, process + ", rollforward failed, try clean lock"); - // primary prewrite failed - status->SetFailed(ErrorCode::kOK); - if (try_clean) { - CleanLock(cell, primary_info, status); - } else if (gtxn_internal_->SuspectLive(primary_info)) { - ThisThread::Sleep(100); - } else { - CleanLock(cell, primary_info, status); - } - } - } else { - VLOG(12) << gtxn_internal_->DebugString(cell, process + ", ignore(primary)"); - } - } + VLOG(12) << gtxn_internal_->DebugString(cell, process + ", ignore(primary)"); + } + } } -void GlobalTxn::CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, ErrorCode* status) { - gtxn_read_rollback_cnt.Inc(); - Table* primary_table = gtxn_internal_->FindTable(primary.table_name()); - assert(primary_table != NULL); - const Cell& primary_cell = Cell(primary_table, primary.row_key(), - primary.column_family(), primary.qualifier()); - // if now cell is primary - bool is_same = cell.Table()->GetName() == primary_table->GetName() - && cell.RowKey() == primary_cell.RowKey() - && cell.ColFamily() == primary_cell.ColFamily() - && cell.LockName() == primary_cell.LockName(); - if (!is_same) { - VLOG(12) << "[gtxn][get][" << start_ts_ << "] " - << gtxn_internal_->DebugString(primary_cell, "clean lock primary"); - RowMutation* pri_mu = primary_table->NewRowMutation(primary_cell.RowKey()); - // delete all info between [0, start_ts_] at lock col - pri_mu->DeleteColumns(primary_cell.ColFamily(), primary_cell.LockName(), start_ts_); - primary_table->ApplyMutation(pri_mu); - if (pri_mu->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << pri_mu->GetError().ToString(); - *status = pri_mu->GetError(); - } - delete pri_mu; - } - VLOG(12) << "[gtxn][get][" << start_ts_ << "] " - << gtxn_internal_->DebugString(cell, "clean lock this cell"); - RowMutation* this_mu = (cell.Table())->NewRowMutation(cell.RowKey()); +void GlobalTxn::CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, ErrorCode* status, + int64_t lock_ts) { + gtxn_read_rollback_cnt.Inc(); + Table* primary_table = gtxn_internal_->FindTable(primary.table_name()); + assert(primary_table != NULL); + const Cell& primary_cell = + Cell(primary_table, primary.row_key(), primary.column_family(), primary.qualifier()); + // if now cell is primary + bool is_same = cell.Table()->GetName() == primary_table->GetName() && + cell.RowKey() == primary_cell.RowKey() && + cell.ColFamily() == primary_cell.ColFamily() && + cell.LockName() == primary_cell.LockName(); + if (!is_same) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(primary_cell, "clean lock primary"); + RowMutation* pri_mu = primary_table->NewRowMutation(primary_cell.RowKey()); // delete all info between [0, start_ts_] at lock col - this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), start_ts_); - (cell.Table())->ApplyMutation(this_mu); - if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << "[gtxn][get][" << start_ts_ << "] clean lock failed :" - << this_mu->GetError().ToString(); - *status = this_mu->GetError(); + pri_mu->DeleteColumns(primary_cell.ColFamily(), primary_cell.LockName(), lock_ts); + primary_table->ApplyMutation(pri_mu); + if (pri_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << pri_mu->GetError().ToString(); + *status = pri_mu->GetError(); } - delete this_mu; + delete pri_mu; + } + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(cell, "clean lock this cell"); + RowMutation* this_mu = (cell.Table())->NewRowMutation(cell.RowKey()); + // delete all info between [0, start_ts_] at lock col + this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), lock_ts); + (cell.Table())->ApplyMutation(this_mu); + if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][get][" << start_ts_ + << "] clean lock failed :" << this_mu->GetError().ToString(); + *status = this_mu->GetError(); + } + delete this_mu; } -void GlobalTxn::RollForward(const Cell& cell, const tera::PrimaryInfo& primary, - int lock_type, ErrorCode* status) { - gtxn_read_rollforward_cnt.Inc(); - // find primary write col start_ts - Table* pri_table = gtxn_internal_->FindTable(primary.table_name()); - assert(pri_table != NULL); - std::unique_ptr primary_cell(new Cell(pri_table, primary.row_key(), - primary.column_family(), - primary.qualifier())); - RowReader* reader = pri_table->NewRowReader(primary_cell->RowKey()); - reader->AddColumn(primary_cell->ColFamily(), primary_cell->WriteName()); - reader->SetTimeRange(0, kMaxTimeStamp); - reader->SetMaxVersions(UINT32_MAX); - pri_table->Get(reader); - if (reader->GetError().GetType() != ErrorCode::kOK) { - if (reader->GetError().GetType() == ErrorCode::kNotFound) { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); - } else { - LOG(WARNING) << status->GetReason(); - *status = reader->GetError(); - } - delete reader; - return; - } - int64_t commit_ts = -1; - int write_type; - int64_t data_ts = -1; - while (!reader->Done()) { - // decode primary cell write col value - std::string reader_value = reader->Value(); - DecodeWriteValue(reader_value, &write_type, &data_ts); - VLOG(12) << "[gtxn][get][ " << start_ts_ << "] decode primary 'write', ts=" << reader->Timestamp() - << ", type=" << write_type << ", value=" << data_ts; - VLOG(12) << "[gtxn][get][ " << start_ts_ << "] primary start_ts=" << primary.gtxn_start_ts(); - if (data_ts > 0 && data_ts < primary.gtxn_start_ts()) { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); - delete reader; - return; - } else if (data_ts == primary.gtxn_start_ts()) { - commit_ts = reader->Timestamp(); - break; - } - reader->Next(); +void GlobalTxn::RollForward(const Cell& cell, const tera::PrimaryInfo& primary, int lock_type, + ErrorCode* status) { + gtxn_read_rollforward_cnt.Inc(); + // find primary write col start_ts + Table* pri_table = gtxn_internal_->FindTable(primary.table_name()); + assert(pri_table != NULL); + std::unique_ptr primary_cell( + new Cell(pri_table, primary.row_key(), primary.column_family(), primary.qualifier())); + RowReader* reader = pri_table->NewRowReader(primary_cell->RowKey()); + reader->AddColumn(primary_cell->ColFamily(), primary_cell->WriteName()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + pri_table->Get(reader); + if (reader->GetError().GetType() != ErrorCode::kOK) { + if (reader->GetError().GetType() == ErrorCode::kNotFound) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); + } else { + LOG(WARNING) << status->GetReason(); + *status = reader->GetError(); } delete reader; - - if (commit_ts > 0) { - RowMutation* this_mu = cell.Table()->NewRowMutation(cell.RowKey()); - this_mu->Put(cell.ColFamily(), - cell.WriteName(), - EncodeWriteValue(lock_type, data_ts), - commit_ts); - this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), commit_ts); - cell.Table()->ApplyMutation(this_mu); - if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << this_mu->GetError().GetReason(); - *status = this_mu->GetError(); - } - delete this_mu; - } else { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, "not found primary cell"); + return; + } + int64_t commit_ts = -1; + int write_type; + int64_t data_ts = -1; + while (!reader->Done()) { + // decode primary cell write col value + std::string reader_value = reader->Value(); + DecodeWriteValue(reader_value, &write_type, &data_ts); + VLOG(12) << "[gtxn][get][ " << start_ts_ + << "] decode primary 'write', ts=" << reader->Timestamp() << ", type=" << write_type + << ", value=" << data_ts; + VLOG(12) << "[gtxn][get][ " << start_ts_ << "] primary start_ts=" << primary.gtxn_start_ts(); + if (data_ts > 0 && data_ts < primary.gtxn_start_ts()) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); + delete reader; + return; + } else if (data_ts == primary.gtxn_start_ts()) { + commit_ts = reader->Timestamp(); + break; + } + reader->Next(); + } + delete reader; + + if (commit_ts > 0) { + RowMutation* this_mu = cell.Table()->NewRowMutation(cell.RowKey()); + this_mu->Put(cell.ColFamily(), cell.WriteName(), EncodeWriteValue(lock_type, data_ts), + commit_ts); + this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), commit_ts); + cell.Table()->ApplyMutation(this_mu); + if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << this_mu->GetError().GetReason(); + *status = this_mu->GetError(); } + delete this_mu; + } else { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "not found primary cell"); + } } -void GlobalTxn::SaveWrite(const std::string& tablename, const std::string& row_key, - tera::Write& w) { - MutexLock lock(&mu_); - TableWithRowkey twr(tablename, row_key); - auto it = writes_.find(twr); - if (it != writes_.end()) { - std::vector* ws_ptr = &(writes_[twr]); - ws_ptr->push_back(w); - } else { - std::vector ws; - ws.push_back(w); - writes_[twr] = ws; - writes_cnt_.Inc(); - } +void GlobalTxn::SaveWrite(const std::string& tablename, const std::string& row_key, + tera::Write& w) { + MutexLock lock(&mu_); + TableWithRowkey twr(tablename, row_key); + auto it = writes_.find(twr); + if (it != writes_.end()) { + std::vector* ws_ptr = &(writes_[twr]); + ws_ptr->push_back(w); + } else { + std::vector ws; + ws.push_back(w); + writes_[twr] = ws; + writes_cnt_.Inc(); + } } void GlobalTxn::SetLastStatus(ErrorCode* status) { - MutexLock lock(&mu_); - if (!status_returned_) { - VLOG(12) << "[gtxn][commit][status][" << start_ts_ << "]" << status->ToString(); - status_.SetFailed(status->GetType(), status->GetReason()); - status_returned_ = true; - } + MutexLock lock(&mu_); + if (!status_returned_) { + VLOG(12) << "[gtxn][commit][status][" << start_ts_ << "]" << status->ToString(); + status_.SetFailed(status->GetType(), status->GetReason()); + status_returned_ = true; + } } void GlobalTxn::RunUserCallback() { - if (status_.GetType() == ErrorCode::kOK) { - gtxn_commit_cnt.Inc(); - } else { - gtxn_commit_fail_cnt.Inc(); - } - gtxn_internal_->PerfCommitDelay(0, get_micros()); // finish_time - if (user_commit_callback_ != NULL) { - VLOG(12) << "[gtxn][commit][callback][" << start_ts_ << "]" << status_.ToString(); - user_commit_callback_(this); - } else { - MutexLock lock(&finish_mutex_); - VLOG(12) << "[gtxn][commit][finish][" << start_ts_ << "]" << status_.ToString(); - finish_ = true; - finish_cond_.Signal(); - } + if (status_.GetType() == ErrorCode::kOK) { + gtxn_commit_cnt.Inc(); + } else { + gtxn_commit_fail_cnt.Inc(); + } + gtxn_internal_->PerfCommitDelay(0, get_micros()); // finish_time + if (user_commit_callback_ != NULL) { + VLOG(12) << "[gtxn][commit][callback][" << start_ts_ << "]" << status_.ToString(); + user_commit_callback_(this); + } else { + MutexLock lock(&finish_mutex_); + VLOG(12) << "[gtxn][commit][finish][" << start_ts_ << "]" << status_.ToString(); + finish_ = true; + finish_cond_.Signal(); + } } -ErrorCode GlobalTxn::Commit() { - /// begin commit - gtxn_internal_->TEST_Sleep(); - gtxn_internal_->PerfCommitDelay(get_micros(), 0); // begin_time - ErrorCode status; - if (put_fail_cnt_.Get() > 0 || has_commited_) { - std::string reason("commit failed, has_commited[" + - std::to_string(has_commited_.load()) + - "], put_fail_cnt[" + std::to_string(put_fail_cnt_.Get()) + "]"); - VLOG(12) << reason; - status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); - SetLastStatus(&status); - // Callback Point : put applyMutation failed or has commited - RunUserCallback(); - return status; - } - has_commited_ = true; - // don't have any writes - if (writes_cnt_.Get() == 0) { - status.SetFailed(ErrorCode::kOK, "No modification exists"); - SetLastStatus(&status); - // Callback Point - RunUserCallback(); - return status; - } - thread_pool_->AddTask(std::bind(&GlobalTxn::InternalCommit, this)); +ErrorCode GlobalTxn::Commit() { + /// begin commit + gtxn_internal_->TEST_Sleep(); + gtxn_internal_->PerfCommitDelay(get_micros(), 0); // begin_time + ErrorCode status; + if (put_fail_cnt_.Get() > 0 || has_commited_) { + std::string reason("commit failed, has_commited[" + std::to_string(has_commited_.load()) + + "], put_fail_cnt[" + std::to_string(put_fail_cnt_.Get()) + "]"); + VLOG(12) << reason; + status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); + SetLastStatus(&status); + // Callback Point : put applyMutation failed or has commited + RunUserCallback(); + return status; + } + has_commited_ = true; + // don't have any writes + if (writes_cnt_.Get() == 0) { + status.SetFailed(ErrorCode::kOK, "No modification exists"); + SetLastStatus(&status); + // Callback Point + RunUserCallback(); + return status; + } + thread_pool_->AddTask(std::bind(&GlobalTxn::InternalCommit, this)); - if (user_commit_callback_ == NULL) { - WaitForComplete(); - } - return status_; + if (user_commit_callback_ == NULL) { + WaitForComplete(); + } + return status_; } void GlobalTxn::InternalCommit() { - int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); - if (odd_time_ms < commit_timeout_ms_) { - commit_timeout_ms_ = odd_time_ms > 0 ? odd_time_ms : 1; - } - gtxn_internal_->SetCommitDuration(commit_timeout_ms_); + int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); + if (odd_time_ms < commit_timeout_ms_) { + commit_timeout_ms_ = odd_time_ms > 0 ? odd_time_ms : 1; + } + gtxn_internal_->SetCommitDuration(commit_timeout_ms_); - /// begin prewrite - gtxn_internal_->TEST_Sleep(); + /// begin prewrite + gtxn_internal_->TEST_Sleep(); - // on ReadCommitedSnapshot level will get new timestamp before prewrite - if (isolation_level_ == IsolationLevel::kReadCommitedSnapshot) { - if (FLAGS_tera_gtxn_test_opened) { - prewrite_start_ts_ = gtxn_internal_->TEST_GetPrewriteStartTimestamp(); - } else if (!FLAGS_tera_sdk_tso_client_enabled) { - start_ts_ = get_micros(); - } else { - timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); - prewrite_start_ts_ = tsoc.GetTimestamp(1); - } - if (prewrite_start_ts_ < start_ts_) { - ErrorCode status; - LOG(ERROR) << "[gtxn][prewrite][" << start_ts_ <<"] get prewrite new ts failed"; - status.SetFailed(ErrorCode::kGTxnTimestampLost, "get prewrite new ts failed"); - SetLastStatus(&status); - RunUserCallback(); - return; - } - gtxn_internal_->SetPrewriteStartTimestamp(prewrite_start_ts_); + // on ReadCommitedSnapshot level will get new timestamp before prewrite + if (isolation_level_ == IsolationLevel::kReadCommitedSnapshot) { + if (FLAGS_tera_gtxn_test_opened) { + prewrite_start_ts_ = gtxn_internal_->TEST_GetPrewriteStartTimestamp(); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + start_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + prewrite_start_ts_ = tsoc.GetTimestamp(1); } - VLOG(12) << "[gtxn][prewrite][" << start_ts_ << "]"; - gtxn_internal_->PerfPrewriteDelay(get_micros(), 0); // begin_time - gtxn_prewrite_cnt.Inc(); - - prewrite_iterator_ = writes_.begin(); - primary_write_ = &(prewrite_iterator_->second[0]); - primary_write_->Serialize(prewrite_start_ts_, - gtxn_internal_->GetClientSession(), - &serialized_primary_); - AsyncPrewrite(&prewrite_iterator_->second); + if (prewrite_start_ts_ < start_ts_) { + ErrorCode status; + LOG(ERROR) << "[gtxn][prewrite][" << start_ts_ << "] get prewrite new ts failed"; + status.SetFailed(ErrorCode::kGTxnTimestampLost, "get prewrite new ts failed"); + SetLastStatus(&status); + RunUserCallback(); + return; + } + gtxn_internal_->SetPrewriteStartTimestamp(prewrite_start_ts_); + } + VLOG(12) << "[gtxn][prewrite][" << start_ts_ << "]"; + gtxn_internal_->PerfPrewriteDelay(get_micros(), 0); // begin_time + gtxn_prewrite_cnt.Inc(); + + prewrite_iterator_ = writes_.begin(); + primary_write_ = &(prewrite_iterator_->second[0]); + primary_write_->Serialize(prewrite_start_ts_, gtxn_internal_->GetClientSession(), + &serialized_primary_); + AsyncPrewrite(&prewrite_iterator_->second); } -// [prewrite] Step(1): +// [prewrite] Step(1): // read "lock", "write" column from tera // // aysnc prewrite one row use single_row_txn // void GlobalTxn::AsyncPrewrite(std::vector* ws) { - assert(ws->size() > 0); - // find table and rowkey to new reader and single row txn - Write w = *(ws->begin()); - Table* table = w.Table(); - Transaction* single_row_txn = table->StartRowTransaction(w.RowKey()); - RowReader* reader = table->NewRowReader(w.RowKey()); - // set internal reader timeout - gtxn_internal_->SetInternalSdkTaskTimeout(reader); - // set cf qu and timerange for reader - gtxn_internal_->BuildRowReaderForPrewrite(*ws, reader); - // set callback, context, single row txn for reader - reader->SetCallBack([](RowReader* r){ - GlobalTxn* gtxn = static_cast(((PrewriteContext*)r->GetContext())->gtxn); - gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteReaderCallback, gtxn, r)); - }); - PrewriteContext* ctx = new PrewriteContext(ws, this, single_row_txn, w.TableName(), w.RowKey()); - if (gtxn_internal_->IsTimeOut()) { - ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); - VLOG(12) << "[gtxn][prewrite][stxn_read] ignored : " << ctx->DebugString(); - delete single_row_txn; - RunAfterPrewriteFailed(ctx); - } else { - reader->SetContext(ctx); - // get async - VLOG(12) << "[gtxn][prewrite][stxn_read] invoked : " << ctx->DebugString(); - single_row_txn->Get(reader); - } + assert(ws->size() > 0); + // find table and rowkey to new reader and single row txn + Write w = *(ws->begin()); + Table* table = w.Table(); + Transaction* single_row_txn = table->StartRowTransaction(w.RowKey()); + RowReader* reader = table->NewRowReader(w.RowKey()); + // set internal reader timeout + gtxn_internal_->SetInternalSdkTaskTimeout(reader); + // set cf qu and timerange for reader + gtxn_internal_->BuildRowReaderForPrewrite(*ws, reader); + // set callback, context, single row txn for reader + reader->SetCallBack([](RowReader* r) { + GlobalTxn* gtxn = static_cast(((PrewriteContext*)r->GetContext())->gtxn); + gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteReaderCallback, gtxn, r)); + }); + PrewriteContext* ctx = new PrewriteContext(ws, this, single_row_txn, w.TableName(), w.RowKey()); + if (gtxn_internal_->IsTimeOut()) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); + VLOG(12) << "[gtxn][prewrite][stxn_read] ignored : " << ctx->DebugString(); + delete single_row_txn; + RunAfterPrewriteFailed(ctx); + } else { + reader->SetContext(ctx); + // get async + VLOG(12) << "[gtxn][prewrite][stxn_read] invoked : " << ctx->DebugString(); + single_row_txn->Get(reader); + } } -// [prewrite] Step(2): -// a) verify [prewrite] step(1) read result status and no conflict -// b) write "lock" and "data" column to tera, through same single_row_txn in step(1) +// [prewrite] Step(2): +// a) verify [prewrite] step(1) read result status and no conflict +// b) write "lock" and "data" column to tera, through same single_row_txn +// in step(1) // // call by [prewrite] step(1),through reader callback -// +// void GlobalTxn::DoPrewriteReaderCallback(RowReader* r) { - std::unique_ptr reader(static_cast(r)); - PrewriteContext* ctx = (PrewriteContext*)reader->GetContext(); - Transaction* single_row_txn = ctx->stxn; - if (reader->GetError().GetType() != ErrorCode::kNotFound - && reader->GetError().GetType() != ErrorCode::kOK) { - ctx->status = reader->GetError(); - VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); - if (gtxn_internal_->IsTimeOut() || reader->GetError().GetType() == ErrorCode::kTimeout) { - ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); - } - delete single_row_txn; - RunAfterPrewriteFailed(ctx); - } else if (gtxn_internal_->ConflictWithOtherWrite(ctx->ws, reader, &(ctx->status))) { - VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); - delete single_row_txn; - RunAfterPrewriteFailed(ctx); + std::unique_ptr reader(static_cast(r)); + PrewriteContext* ctx = (PrewriteContext*)reader->GetContext(); + Transaction* single_row_txn = ctx->stxn; + ctx->status = reader->GetError(); + if (reader->GetError().GetType() != ErrorCode::kNotFound && + reader->GetError().GetType() != ErrorCode::kOK) { + VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); + if (gtxn_internal_->IsTimeOut() || reader->GetError().GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); + } + delete single_row_txn; + RunAfterPrewriteFailed(ctx); + } else if (gtxn_internal_->ConflictWithOtherWrite(ctx->ws, reader, &(ctx->status))) { + VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); + delete single_row_txn; + RunAfterPrewriteFailed(ctx); + } else { + VLOG(12) << "[gtxn][prewrite][stxn_read] succeed, table: " << ctx->DebugString(); + Table* t = reader->GetTable(); + RowMutation* prewrite_mu = t->NewRowMutation(reader->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(prewrite_mu); + gtxn_internal_->BuildRowMutationForPrewrite(ctx->ws, prewrite_mu, serialized_primary_); + + // commit single_row_txn + single_row_txn->SetContext(ctx); + single_row_txn->SetCommitCallback([](Transaction* single_txn) { + GlobalTxn* gtxn = static_cast(((PrewriteContext*)single_txn->GetContext())->gtxn); + gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteCallback, gtxn, single_txn)); + }); + if (gtxn_internal_->IsTimeOut()) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); + VLOG(12) << "[gtxn][prewrite][stxn_commit] ignored : " << ctx->DebugString(); + delete single_row_txn; + RunAfterPrewriteFailed(ctx); } else { - VLOG(12) << "[gtxn][prewrite][stxn_read] succeed, table=" << ctx->DebugString(); - Table* t = reader->GetTable(); - RowMutation* prewrite_mu = t->NewRowMutation(reader->RowKey()); - // set internal task timeout - gtxn_internal_->SetInternalSdkTaskTimeout(prewrite_mu); - gtxn_internal_->BuildRowMutationForPrewrite(ctx->ws, prewrite_mu, - serialized_primary_); - - // commit single_row_txn - single_row_txn->SetContext(ctx); - single_row_txn->SetCommitCallback([](Transaction* single_txn) { - GlobalTxn* gtxn = static_cast(((PrewriteContext*)single_txn->GetContext())->gtxn); - gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteCallback, gtxn, single_txn)); - }); - if (gtxn_internal_->IsTimeOut()) { - ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); - VLOG(12) << "[gtxn][prewrite][stxn_commit] ignored : " << ctx->DebugString(); - delete single_row_txn; - RunAfterPrewriteFailed(ctx); - } else { - single_row_txn->ApplyMutation(prewrite_mu); - VLOG(12) << "[gtxn][prewrite][stxn_commit] invoked : " << ctx->DebugString(); - single_row_txn->Commit(); - } - delete prewrite_mu; + single_row_txn->ApplyMutation(prewrite_mu); + VLOG(12) << "[gtxn][prewrite][stxn_commit] invoked : " << ctx->DebugString(); + single_row_txn->Commit(); } + delete prewrite_mu; + } } // prewrite Step(3): @@ -720,40 +708,41 @@ void GlobalTxn::DoPrewriteReaderCallback(RowReader* r) { // if the last prewrite callback and status ok, will call [commit] // // call by [prewrite] step(2), through single_row_txn commit callback -// +// void GlobalTxn::DoPrewriteCallback(Transaction* single_row_txn) { - ErrorCode status = single_row_txn->GetError(); - PrewriteContext* ctx = (PrewriteContext*)single_row_txn->GetContext(); - delete ctx->stxn; - if (gtxn_internal_->IsTimeOut() || status.GetType() != ErrorCode::kOK) { - // wapper timeout status for global transaction - if (gtxn_internal_->IsTimeOut() || status.GetType() == ErrorCode::kTimeout) { - ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, status.ToString()); - } else { - ctx->status.SetFailed(status.GetType(), status.ToString()); - } - VLOG(12) << "[gtxn][prewrite][stxn_commit] failed : " << ctx->DebugString(); - RunAfterPrewriteFailed(ctx); - } else if (++prewrite_iterator_ != writes_.end()) { - thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncPrewrite, this, &(prewrite_iterator_->second))); - delete ctx; + ErrorCode status = single_row_txn->GetError(); + PrewriteContext* ctx = (PrewriteContext*)single_row_txn->GetContext(); + delete ctx->stxn; + if (gtxn_internal_->IsTimeOut() || status.GetType() != ErrorCode::kOK) { + // wapper timeout status for global transaction + if (gtxn_internal_->IsTimeOut() || status.GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, status.ToString()); } else { - gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time - VLOG(12) << "prewrite done, next step"; - InternalCommitPhase2(); - delete ctx; + ctx->status.SetFailed(status.GetType(), status.ToString()); } + VLOG(12) << "[gtxn][prewrite][stxn_commit] failed : " << ctx->DebugString(); + RunAfterPrewriteFailed(ctx); + } else if (++prewrite_iterator_ != writes_.end()) { + thread_pool_->AddTask( + std::bind(&GlobalTxn::AsyncPrewrite, this, &(prewrite_iterator_->second))); + delete ctx; + } else { + gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time + VLOG(12) << "prewrite done, next step"; + InternalCommitPhase2(); + delete ctx; + } } void GlobalTxn::RunAfterPrewriteFailed(PrewriteContext* ctx) { - gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time - gtxn_prewrite_fail_cnt.Inc(); - if (gtxn_internal_->IsTimeOut() || ctx->status.GetType() == ErrorCode::kTimeout) { - ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); - } - SetLastStatus(&ctx->status); - delete ctx; - RunUserCallback(); + gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time + gtxn_prewrite_fail_cnt.Inc(); + if (gtxn_internal_->IsTimeOut() || ctx->status.GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); + } + SetLastStatus(&ctx->status); + delete ctx; + RunUserCallback(); } // commit phase2 Step(1): @@ -764,401 +753,392 @@ void GlobalTxn::RunAfterPrewriteFailed(PrewriteContext* ctx) { // // call by [prewrite] step(3) void GlobalTxn::InternalCommitPhase2() { - gtxn_internal_->PerfPrimaryCommitDelay(get_micros(), 0); // begin_time - gtxn_primary_cnt.Inc(); - gtxn_internal_->TEST_Sleep(); // end prewrite - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - gtxn_internal_->TEST_Sleep(); // wait to begin commit + gtxn_internal_->PerfPrimaryCommitDelay(get_micros(), 0); // begin_time + gtxn_primary_cnt.Inc(); + gtxn_internal_->TEST_Sleep(); // end prewrite + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + gtxn_internal_->TEST_Sleep(); // wait to begin commit + + if (FLAGS_tera_gtxn_test_opened) { + commit_ts_ = gtxn_internal_->TEST_GetCommitTimestamp(); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + commit_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + commit_ts_ = tsoc.GetTimestamp(1); + } + if (commit_ts_ < prewrite_start_ts_) { + LOG(ERROR) << "[gtxn][commit] get commit ts failed"; + status.SetFailed(ErrorCode::kGTxnTimestampLost, "get commit ts failed"); + SetLastStatus(&status); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); + gtxn_primary_fail_cnt.Inc(); + RunUserCallback(); + return; + } - if (FLAGS_tera_gtxn_test_opened) { - commit_ts_ = gtxn_internal_->TEST_GetCommitTimestamp(); - } else if (!FLAGS_tera_sdk_tso_client_enabled) { - commit_ts_ = get_micros(); - } else { - timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); - commit_ts_ = tsoc.GetTimestamp(1); - } - if (commit_ts_ < prewrite_start_ts_) { - LOG(ERROR) << "[gtxn][commit] get commit ts failed"; - status.SetFailed(ErrorCode::kGTxnTimestampLost, "get commit ts failed"); - SetLastStatus(&status); - gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); - gtxn_primary_fail_cnt.Inc(); - RunUserCallback(); - return; - } + VLOG(12) << "[gtxn][commit] commit_ts:" << commit_ts_; + gtxn_internal_->TEST_Sleep(); // wait to begin primary commit - VLOG(12) << "[gtxn][commit] commit_ts:" << commit_ts_; - gtxn_internal_->TEST_Sleep(); // wait to begin primary commit - - /// begin to commit primary - VerifyPrimaryLocked(); + /// begin to commit primary + VerifyPrimaryLocked(); } void GlobalTxn::VerifyPrimaryLocked() { - Table* pri_t = primary_write_->Table(); - Transaction* pri_txn = pri_t->StartRowTransaction(primary_write_->RowKey()); - RowReader* reader = pri_t->NewRowReader(primary_write_->RowKey()); - // set internal task timeout - gtxn_internal_->SetInternalSdkTaskTimeout(reader); - reader->AddColumn(primary_write_->ColFamily(), primary_write_->LockName()); - reader->SetTimeRange(prewrite_start_ts_, prewrite_start_ts_); - PrimaryTxnContext* ctx = new PrimaryTxnContext(this, pri_txn); - reader->SetContext(ctx); - reader->SetCallBack([](RowReader* r) { - GlobalTxn* gtxn = static_cast(((PrimaryTxnContext*)r->GetContext())->gtxn); - gtxn->DoVerifyPrimaryLockedCallback(r); - }); - pri_txn->Get(reader); + Table* pri_t = primary_write_->Table(); + Transaction* pri_txn = pri_t->StartRowTransaction(primary_write_->RowKey()); + RowReader* reader = pri_t->NewRowReader(primary_write_->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(reader); + reader->AddColumn(primary_write_->ColFamily(), primary_write_->LockName()); + VLOG(12) << "[gtxn][vertify][get] " << primary_write_->DebugString() + << " LockName:Type:prewrite_start_ts " << primary_write_->LockName() << ":" + << primary_write_->WriteType() << ":" << prewrite_start_ts_; + reader->SetTimeRange(prewrite_start_ts_, prewrite_start_ts_); + PrimaryTxnContext* ctx = new PrimaryTxnContext(this, pri_txn); + reader->SetContext(ctx); + reader->SetCallBack([](RowReader* r) { + GlobalTxn* gtxn = static_cast(((PrimaryTxnContext*)r->GetContext())->gtxn); + gtxn->DoVerifyPrimaryLockedCallback(r); + }); + pri_txn->Get(reader); } void GlobalTxn::DoVerifyPrimaryLockedCallback(RowReader* reader) { - ErrorCode status = reader->GetError(); - PrimaryTxnContext* ctx = (PrimaryTxnContext*)reader->GetContext(); - Transaction* pri_txn = ctx->stxn; - delete reader; - - if (status.GetType() == ErrorCode::kOK) { - pri_txn->SetContext(ctx); - CommitPrimary(pri_txn); - } else { - delete pri_txn; - delete ctx; - if (status.GetType() == ErrorCode::kNotFound) { - status.SetFailed(ErrorCode::kGTxnPrimaryLost, "primary 'lock' lost before commit"); - } else if (status.GetType() == ErrorCode::kTimeout) { - status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); - } - SetLastStatus(&status); - gtxn_primary_fail_cnt.Inc(); - gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time - RunUserCallback(); + ErrorCode status = reader->GetError(); + PrimaryTxnContext* ctx = (PrimaryTxnContext*)reader->GetContext(); + Transaction* pri_txn = ctx->stxn; + delete reader; + + if (status.GetType() == ErrorCode::kOK) { + pri_txn->SetContext(ctx); + CommitPrimary(pri_txn); + } else { + delete pri_txn; + delete ctx; + if (status.GetType() == ErrorCode::kNotFound) { + status.SetFailed(ErrorCode::kGTxnPrimaryLost, "primary 'lock' lost before commit"); + } else if (status.GetType() == ErrorCode::kTimeout) { + status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); } + SetLastStatus(&status); + gtxn_primary_fail_cnt.Inc(); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + RunUserCallback(); + } } void GlobalTxn::CommitPrimary(Transaction* pri_txn) { - Table* pri_t = primary_write_->Table(); - RowMutation* primary_mu = pri_t->NewRowMutation(primary_write_->RowKey()); - // set internal task timeout - gtxn_internal_->SetInternalSdkTaskTimeout(primary_mu); - primary_mu->Put(primary_write_->ColFamily(), primary_write_->WriteName(), - EncodeWriteValue(primary_write_->WriteType(), prewrite_start_ts_), commit_ts_); - primary_mu->DeleteColumns(primary_write_->ColFamily(), primary_write_->LockName(), commit_ts_); - pri_txn->ApplyMutation(primary_mu); - pri_txn->SetCommitCallback([] (Transaction* txn) { - GlobalTxn* gtxn = static_cast(((PrimaryTxnContext*)txn->GetContext())->gtxn); - gtxn->CheckPrimaryStatusAndCommmitSecondaries(txn); - }); - pri_txn->Commit(); - delete primary_mu; + Table* pri_t = primary_write_->Table(); + RowMutation* primary_mu = pri_t->NewRowMutation(primary_write_->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(primary_mu); + primary_mu->Put(primary_write_->ColFamily(), primary_write_->WriteName(), + EncodeWriteValue(primary_write_->WriteType(), prewrite_start_ts_), commit_ts_); + primary_mu->DeleteColumns(primary_write_->ColFamily(), primary_write_->LockName(), commit_ts_); + pri_txn->ApplyMutation(primary_mu); + pri_txn->SetCommitCallback([](Transaction* txn) { + GlobalTxn* gtxn = static_cast(((PrimaryTxnContext*)txn->GetContext())->gtxn); + gtxn->CheckPrimaryStatusAndCommmitSecondaries(txn); + }); + pri_txn->Commit(); + delete primary_mu; } void GlobalTxn::CheckPrimaryStatusAndCommmitSecondaries(Transaction* pri_txn) { - std::unique_ptr ctx((PrimaryTxnContext*)pri_txn->GetContext()); - ErrorCode status = pri_txn->GetError(); - delete ctx->stxn; - gtxn_internal_->TEST_Sleep(); - // primary commit failed callback and return - if (status.GetType() != tera::ErrorCode::kOK) { - VLOG(12) << "[gtxn][commit] primary failed :[" << status.ToString() << "]"; - // Callback Point : primary commit failed - if (status.GetType() == ErrorCode::kTimeout) { - status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); - } - SetLastStatus(&status); - gtxn_primary_fail_cnt.Inc(); - gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time - RunUserCallback(); - return; - } - gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time - if (acks_cnt_.Get() == 0 && notifies_cnt_.Get() == 0) { - SetLastStatus(&status); - } - // wait primary commit done - VLOG(12) << "[gtxn][commit] succeed :[" << start_ts_ - << "," << prewrite_start_ts_ << "," << commit_ts_ << "]"; - - std::vector* ws = &(writes_.begin()->second); - if (ws->size() == 1) { - writes_.erase(writes_.begin()); - writes_cnt_.Dec(); - } else { - ws->erase(ws->begin()); - } - - all_task_pushed_ = false; - /// begin commit secondaries - for (auto &same_row_writes : writes_) { - thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncCommitSecondaries, - this, &(same_row_writes.second))); - } - - /// begin ack - for (auto &same_row_acks : acks_) { - thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncAck, - this, &(same_row_acks.second))); + std::unique_ptr ctx((PrimaryTxnContext*)pri_txn->GetContext()); + ErrorCode status = pri_txn->GetError(); + delete ctx->stxn; + gtxn_internal_->TEST_Sleep(); + // primary commit failed callback and return + if (status.GetType() != tera::ErrorCode::kOK) { + VLOG(12) << "[gtxn][commit] primary failed :[" << status.ToString() << "]"; + // Callback Point : primary commit failed + if (status.GetType() == ErrorCode::kTimeout) { + status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); } - /// begin notify - for (auto &same_row_notifies : notifies_) { - thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncNotify, - this, &(same_row_notifies.second))); - } - bool should_callback = false; - { - MutexLock lock(&mu_); - all_task_pushed_ = true; - should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() - && acks_cnt_.Get() == ack_done_cnt_.Get() - && notifies_cnt_.Get() == notify_done_cnt_.Get() - && all_task_pushed_ == true; - } - if (should_callback) { - RunUserCallback(); - } - + SetLastStatus(&status); + gtxn_primary_fail_cnt.Inc(); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + RunUserCallback(); + return; + } + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + if (acks_cnt_.Get() == 0 && notifies_cnt_.Get() == 0) { + SetLastStatus(&status); + } + // wait primary commit done + VLOG(12) << "[gtxn][commit] succeed :[" << start_ts_ << "," << prewrite_start_ts_ << "," + << commit_ts_ << "]"; + + std::vector* ws = &(writes_.begin()->second); + if (ws->size() == 1) { + writes_.erase(writes_.begin()); + writes_cnt_.Dec(); + } else { + ws->erase(ws->begin()); + } + + all_task_pushed_ = false; + /// begin commit secondaries + for (auto& same_row_writes : writes_) { + thread_pool_->AddTask( + std::bind(&GlobalTxn::AsyncCommitSecondaries, this, &(same_row_writes.second))); + } + + /// begin ack + for (auto& same_row_acks : acks_) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncAck, this, &(same_row_acks.second))); + } + /// begin notify + for (auto& same_row_notifies : notifies_) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncNotify, this, &(same_row_notifies.second))); + } + bool should_callback = false; + { + MutexLock lock(&mu_); + all_task_pushed_ = true; + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() && + acks_cnt_.Get() == ack_done_cnt_.Get() && + notifies_cnt_.Get() == notify_done_cnt_.Get() && all_task_pushed_ == true; + } + if (should_callback) { + RunUserCallback(); + } } void GlobalTxn::AsyncAck(std::vector* ws) { - gtxn_internal_->PerfAckDelay(get_micros(), 0); - gtxn_acks_cnt.Inc(); - assert(ws->size() > 0); - Write w = *(ws->begin()); - Table* table = w.Table(); - RowMutation* mu = table->NewRowMutation(w.RowKey()); - gtxn_internal_->SetInternalSdkTaskTimeout(mu); - gtxn_internal_->BuildRowMutationForAck(ws, mu); - mu->SetCallBack([](RowMutation* row_mu) { - ((GlobalTxn*)row_mu->GetContext())->DoAckCallback(row_mu);}); - mu->SetContext(this); - table->ApplyMutation(mu); + gtxn_internal_->PerfAckDelay(get_micros(), 0); + gtxn_acks_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForAck(ws, mu); + mu->SetCallBack( + [](RowMutation* row_mu) { ((GlobalTxn*)row_mu->GetContext())->DoAckCallback(row_mu); }); + mu->SetContext(this); + table->ApplyMutation(mu); } void GlobalTxn::DoAckCallback(RowMutation* mutation) { - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << "[gtxn][commit][ack], failed" - << mutation->GetError().GetReason(); - ErrorCode status; - status.SetFailed(ErrorCode::kGTxnOKButAckFailed, mutation->GetError().ToString()); - SetLastStatus(&status); - gtxn_acks_fail_cnt.Inc(); - } - delete mutation; - bool should_callback = false; - { - MutexLock lock(&mu_); - ack_done_cnt_.Inc(); - gtxn_internal_->PerfAckDelay(0, get_micros()); - should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() - && acks_cnt_.Get() == ack_done_cnt_.Get() - && notifies_cnt_.Get() == notify_done_cnt_.Get(); - } - - if (should_callback) { - RunUserCallback(); - } + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][ack], failed" << mutation->GetError().GetReason(); + ErrorCode status; + status.SetFailed(ErrorCode::kGTxnOKButAckFailed, mutation->GetError().ToString()); + SetLastStatus(&status); + gtxn_acks_fail_cnt.Inc(); + } + delete mutation; + bool should_callback = false; + { + MutexLock lock(&mu_); + ack_done_cnt_.Inc(); + gtxn_internal_->PerfAckDelay(0, get_micros()); + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() && + acks_cnt_.Get() == ack_done_cnt_.Get() && + notifies_cnt_.Get() == notify_done_cnt_.Get(); + } + + if (should_callback) { + RunUserCallback(); + } } void GlobalTxn::AsyncNotify(std::vector* ws) { - gtxn_internal_->PerfNotifyDelay(get_micros(), 0); - gtxn_notifies_cnt.Inc(); - assert(ws->size() > 0); - Write w = *(ws->begin()); - Table* table = w.Table(); - RowMutation* mu = table->NewRowMutation(w.RowKey()); - gtxn_internal_->SetInternalSdkTaskTimeout(mu); - gtxn_internal_->BuildRowMutationForNotify(ws, mu, commit_ts_); - mu->SetCallBack([](RowMutation* row_mu) { - ((GlobalTxn*)row_mu->GetContext())->DoNotifyCallback(row_mu);}); - mu->SetContext(this); - table->ApplyMutation(mu); + gtxn_internal_->PerfNotifyDelay(get_micros(), 0); + gtxn_notifies_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForNotify(ws, mu, commit_ts_); + mu->SetCallBack( + [](RowMutation* row_mu) { ((GlobalTxn*)row_mu->GetContext())->DoNotifyCallback(row_mu); }); + mu->SetContext(this); + table->ApplyMutation(mu); } void GlobalTxn::DoNotifyCallback(RowMutation* mutation) { - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << "[gtxn][commit][notify], failed" - << mutation->GetError().GetReason(); - ErrorCode status; - status.SetFailed(ErrorCode::kGTxnOKButNotifyFailed, mutation->GetError().ToString()); - gtxn_notifies_fail_cnt.Inc(); - SetLastStatus(&status); - } - delete mutation; - - bool should_callback = false; - { - MutexLock lock(&mu_); - notify_done_cnt_.Inc(); - gtxn_internal_->PerfNotifyDelay(0, get_micros()); - should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() - && acks_cnt_.Get() == ack_done_cnt_.Get() - && notifies_cnt_.Get() == notify_done_cnt_.Get() - && all_task_pushed_ == true; - } - - if (should_callback) { - RunUserCallback(); - } + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][notify], failed" << mutation->GetError().GetReason(); + ErrorCode status; + status.SetFailed(ErrorCode::kGTxnOKButNotifyFailed, mutation->GetError().ToString()); + gtxn_notifies_fail_cnt.Inc(); + SetLastStatus(&status); + } + delete mutation; + + bool should_callback = false; + { + MutexLock lock(&mu_); + notify_done_cnt_.Inc(); + gtxn_internal_->PerfNotifyDelay(0, get_micros()); + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() && + acks_cnt_.Get() == ack_done_cnt_.Get() && + notifies_cnt_.Get() == notify_done_cnt_.Get() && all_task_pushed_ == true; + } + + if (should_callback) { + RunUserCallback(); + } } void GlobalTxn::AsyncCommitSecondaries(std::vector* ws) { - gtxn_internal_->PerfSecondariesCommitDelay(get_micros(), 0); // begin time - gtxn_secondaries_cnt.Inc(); - assert(ws->size() > 0); - Write w = *(ws->begin()); - Table* table = w.Table(); - RowMutation* mu = table->NewRowMutation(w.RowKey()); - gtxn_internal_->SetInternalSdkTaskTimeout(mu); - gtxn_internal_->BuildRowMutationForCommit(ws, mu, commit_ts_); - mu->SetCallBack([](RowMutation* row_mu) { - ((GlobalTxn*)row_mu->GetContext())->DoCommitSecondariesCallback(row_mu);}); - mu->SetContext(this); - table->ApplyMutation(mu); + gtxn_internal_->PerfSecondariesCommitDelay(get_micros(), 0); // begin time + gtxn_secondaries_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForCommit(ws, mu, commit_ts_); + mu->SetCallBack([](RowMutation* row_mu) { + ((GlobalTxn*)row_mu->GetContext())->DoCommitSecondariesCallback(row_mu); + }); + mu->SetContext(this); + table->ApplyMutation(mu); } void GlobalTxn::DoCommitSecondariesCallback(RowMutation* mutation) { - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) << "[gtxn][commit][secondaries], failed" - << mutation->GetError().GetReason(); - gtxn_secondaries_fail_cnt.Inc(); - } - delete mutation; - - bool should_callback = false; - { - MutexLock lock(&mu_); - commit_secondaries_done_cnt_.Inc(); - gtxn_internal_->PerfSecondariesCommitDelay(0, get_micros()); // finish time - should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() - && acks_cnt_.Get() == ack_done_cnt_.Get() - && notifies_cnt_.Get() == notify_done_cnt_.Get() - && all_task_pushed_ == true; - } - - if (should_callback) { - RunUserCallback(); - } + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][secondaries], failed" << mutation->GetError().GetReason(); + gtxn_secondaries_fail_cnt.Inc(); + } + delete mutation; + + bool should_callback = false; + { + MutexLock lock(&mu_); + commit_secondaries_done_cnt_.Inc(); + gtxn_internal_->PerfSecondariesCommitDelay(0, get_micros()); // finish time + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() && + acks_cnt_.Get() == ack_done_cnt_.Get() && + notifies_cnt_.Get() == notify_done_cnt_.Get() && all_task_pushed_ == true; + } + + if (should_callback) { + RunUserCallback(); + } } void GlobalTxn::ApplyMutation(RowMutation* row_mu) { - assert(row_mu != NULL); - - RowMutationImpl* row_mu_impl = static_cast(row_mu); - row_mu_impl->SetTransaction(this); - row_mu_impl->SetError(ErrorCode::kOK); - - bool can_apply = false; - if (!has_commited_.load()) { - assert(put_fail_cnt_.Get() > -1); - put_fail_cnt_.Inc(); - // check writes_size_ over limit - MutexLock lock(&mu_); - can_apply = gtxn_internal_->VerifyWritesSize(row_mu, &writes_size_); - } else { - std::string reason = "ApplyMutation failed, txn has committed at [" - + std::to_string(commit_ts_) + "]"; - LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "]" << reason; - row_mu_impl->SetError(ErrorCode::kGTxnOpAfterCommit, reason); - } - - size_t writes_cnt = 0; - - if (can_apply && gtxn_internal_->VerifyUserRowMutation(row_mu)) { - Table* table = row_mu_impl->GetTable(); - const std::string& tablename = table->GetName(); - const std::string& row_key = row_mu->RowKey(); - for (size_t i = 0; i < row_mu->MutationNum(); ++i) { - const RowMutation::Mutation& mu = row_mu->GetMutation(i); - Cell cell(table, row_key, mu.family, mu.qualifier, start_ts_, mu.value); - Write w(cell, mu.type); - ++writes_cnt; - SaveWrite(tablename, row_key, w); - } + assert(row_mu != NULL); + + RowMutationImpl* row_mu_impl = static_cast(row_mu); + row_mu_impl->SetTransaction(this); + row_mu_impl->SetError(ErrorCode::kOK); + + bool can_apply = false; + if (!has_commited_.load()) { + assert(put_fail_cnt_.Get() > -1); + put_fail_cnt_.Inc(); + // check writes_size_ over limit + MutexLock lock(&mu_); + can_apply = gtxn_internal_->VerifyWritesSize(row_mu, &writes_size_); + } else { + std::string reason = + "ApplyMutation failed, txn has committed at [" + std::to_string(commit_ts_) + "]"; + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "]" << reason; + row_mu_impl->SetError(ErrorCode::kGTxnOpAfterCommit, reason); + } + + size_t writes_cnt = 0; + + if (can_apply && gtxn_internal_->VerifyUserRowMutation(row_mu)) { + Table* table = row_mu_impl->GetTable(); + const std::string& tablename = table->GetName(); + const std::string& row_key = row_mu->RowKey(); + for (size_t i = 0; i < row_mu->MutationNum(); ++i) { + const RowMutation::Mutation& mu = row_mu->GetMutation(i); + Cell cell(table, row_key, mu.family, mu.qualifier, start_ts_, mu.value); + Write w(cell, mu.type); + ++writes_cnt; + SaveWrite(tablename, row_key, w); } + } - bool is_async = row_mu_impl->IsAsync(); - ErrorCode mu_err = row_mu_impl->GetError(); - - if (mu_err.GetType() != ErrorCode::kOK || writes_cnt == 0) { - if (!status_returned_) { - status_.SetFailed(mu_err.GetType(), mu_err.GetReason()); - status_returned_ = true; - } - if (is_async) { - thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); - } else { - // nothing to do - // sync mu_err != ok will return before put_fail_cnt -1 - } - return; + bool is_async = row_mu_impl->IsAsync(); + ErrorCode mu_err = row_mu_impl->GetError(); + + if (mu_err.GetType() != ErrorCode::kOK || writes_cnt == 0) { + if (!status_returned_) { + status_.SetFailed(mu_err.GetType(), mu_err.GetReason()); + status_returned_ = true; } if (is_async) { - thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); + thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); + } else { + // nothing to do + // sync mu_err != ok will return before put_fail_cnt -1 } - // only succes put will -1 - assert(put_fail_cnt_.Get() > 0); - put_fail_cnt_.Dec(); + return; + } + if (is_async) { + thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); + } + // only succes put will -1 + assert(put_fail_cnt_.Get() > 0); + put_fail_cnt_.Dec(); } -// for wait commit +// for wait commit void GlobalTxn::WaitForComplete() { - MutexLock lock(&finish_mutex_); - while(!finish_) { - finish_cond_.Wait(); - } + MutexLock lock(&finish_mutex_); + while (!finish_) { + finish_cond_.Wait(); + } } -void GlobalTxn::Ack(Table* t, - const std::string& row_key, - const std::string& column_family, +void GlobalTxn::Ack(Table* t, const std::string& row_key, const std::string& column_family, const std::string& qualifier) { - if (t == NULL) { - LOG(ERROR) << "set ack cell failed"; - return; - } - const std::string& tablename = t->GetName(); - Cell cell(t, row_key, column_family, qualifier); - Write w(cell); - TableWithRowkey twr(tablename, row_key); - MutexLock lock(&mu_); - auto it = acks_.find(twr); - if (it != acks_.end()) { - std::vector* acks_ptr = &(acks_[twr]); - acks_ptr->push_back(w); - } else { - std::vector acks; - acks.push_back(w); - acks_[twr] = acks; - acks_cnt_.Inc(); - } + if (t == NULL) { + LOG(ERROR) << "set ack cell failed"; + return; + } + const std::string& tablename = t->GetName(); + Cell cell(t, row_key, column_family, qualifier); + Write w(cell); + TableWithRowkey twr(tablename, row_key); + MutexLock lock(&mu_); + auto it = acks_.find(twr); + if (it != acks_.end()) { + std::vector* acks_ptr = &(acks_[twr]); + acks_ptr->push_back(w); + } else { + std::vector acks; + acks.push_back(w); + acks_[twr] = acks; + acks_cnt_.Inc(); + } } -void GlobalTxn::Notify(Table* t, - const std::string& row_key, - const std::string& column_family, +void GlobalTxn::Notify(Table* t, const std::string& row_key, const std::string& column_family, const std::string& qualifier) { - if (t == NULL) { - LOG(ERROR) << "set ack cell failed"; - return; - } - const std::string& tablename = t->GetName(); - Cell cell(t, row_key, column_family, qualifier); - Write w(cell); - TableWithRowkey twr(tablename, row_key); - MutexLock lock(&mu_); - auto it = notifies_.find(twr); - if (it != notifies_.end()) { - std::vector* notifies_ptr = &(notifies_[twr]); - notifies_ptr->push_back(w); - } else { - std::vector notifies; - notifies.push_back(w); - notifies_[twr] = notifies; - notifies_cnt_.Inc(); - } + if (t == NULL) { + LOG(ERROR) << "set ack cell failed"; + return; + } + const std::string& tablename = t->GetName(); + Cell cell(t, row_key, column_family, qualifier); + Write w(cell); + TableWithRowkey twr(tablename, row_key); + MutexLock lock(&mu_); + auto it = notifies_.find(twr); + if (it != notifies_.end()) { + std::vector* notifies_ptr = &(notifies_[twr]); + notifies_ptr->push_back(w); + } else { + std::vector notifies; + notifies.push_back(w); + notifies_[twr] = notifies; + notifies_cnt_.Inc(); + } } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/global_txn.h b/src/sdk/global_txn.h index cd4d348d7..126099563 100644 --- a/src/sdk/global_txn.h +++ b/src/sdk/global_txn.h @@ -4,13 +4,13 @@ // // Author: baorenyi@baidu.com -#ifndef TERA_SDK_GLOBAL_TXN_H_ -#define TERA_SDK_GLOBAL_TXN_H_ +#ifndef TERA_SDK_GLOBAL_TXN_H_ +#define TERA_SDK_GLOBAL_TXN_H_ #include #include #include -#include +#include #include "common/mutex.h" #include "io/coding.h" @@ -35,239 +35,220 @@ class PrewriteContext; class PrimaryTxnContext; class GlobalTxn : public Transaction { -public: - static Transaction* NewGlobalTxn(std::shared_ptr client_impl, - common::ThreadPool* thread_pool, - sdk::ClusterFinder* tso_cluster); + public: + static Transaction* NewGlobalTxn(std::shared_ptr client_impl, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster); + + virtual ~GlobalTxn(); + + virtual void ApplyMutation(RowMutation* row_mu); + virtual ErrorCode Get(RowReader* row_reader); + virtual ErrorCode Commit(); + + virtual int64_t GetStartTimestamp() { return start_ts_; } + virtual int64_t GetCommitTimestamp() { return commit_ts_; } + + virtual const ErrorCode& GetError() { return status_; } + + typedef void (*Callback)(Transaction* transaction); + + virtual void SetCommitCallback(Callback callback) { user_commit_callback_ = callback; } + + virtual Callback GetCommitCallback() { return user_commit_callback_; } + + virtual void SetContext(void* context) { user_commit_context_ = context; } + + virtual void* GetContext() { return user_commit_context_; } + + virtual void Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + virtual void Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + virtual void SetIsolation(const IsolationLevel& isolation_level); + + virtual IsolationLevel Isolation() { return isolation_level_; } + + virtual void SetTimeout(int64_t timeout_ms); - virtual ~GlobalTxn(); - - virtual void ApplyMutation(RowMutation* row_mu); - virtual ErrorCode Get(RowReader* row_reader); - virtual ErrorCode Commit(); - - virtual int64_t GetStartTimestamp() { return start_ts_; } - virtual int64_t GetCommitTimestamp() { return commit_ts_; } - - virtual const ErrorCode& GetError() { return status_; } - - typedef void (*Callback)(Transaction* transaction); - - virtual void SetCommitCallback(Callback callback) { - user_commit_callback_ = callback; - } - - virtual Callback GetCommitCallback() { - return user_commit_callback_; - } - - virtual void SetContext(void* context) { - user_commit_context_ = context; - } - - virtual void* GetContext() { - return user_commit_context_; - } - - virtual void Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - virtual void Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - virtual void SetIsolation(const IsolationLevel& isolation_level); - - virtual IsolationLevel Isolation() { return isolation_level_; } - - virtual void SetTimeout(int64_t timeout_ms); - -private: - // ----------------------- begin get process --------------------------- // - // read one cell from db - // - // read "lock", "write", "data" columns result from db, - // use async interface of tera [RowReader] - void AsyncGetCell(Cell* cell, RowReaderImpl* user_reader_impl, InternalReaderContext* ctx); - - // check lock write and build cell result - // (1) check read result, if failed will call [MergeCellToRow] - // (2) maybe call [BackoffAndMaybeCleanupLock] and call [AsyncGetCell] retry - // (3) maybe call [FindValueFromResultRow] and call [MergeCellToRow] - void DoGetCellReaderCallback(RowReader* reader); - - // check "lock" and "write" columns, do like percolator - // maybe call CleanLock, RollForward or wait some times - // - // if try_clean == true will be CleanLock not wait - void BackoffAndMaybeCleanupLock(RowReader::TRow& row, - const Cell& cell, - const bool try_clean, - ErrorCode* status); - void CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, + private: + // ----------------------- begin get process --------------------------- // + // read one cell from db + // + // read "lock", "write", "data" columns result from db, + // use async interface of tera [RowReader] + void AsyncGetCell(Cell* cell, RowReaderImpl* user_reader_impl, InternalReaderContext* ctx); + + // check lock write and build cell result + // (1) check read result, if failed will call [MergeCellToRow] + // (2) maybe call [BackoffAndMaybeCleanupLock] and call [AsyncGetCell] retry + // (3) maybe call [FindValueFromResultRow] and call [MergeCellToRow] + void DoGetCellReaderCallback(RowReader* reader); + + // check "lock" and "write" columns, do like percolator + // maybe call CleanLock, RollForward or wait some times + // + // if try_clean == true will be CleanLock not wait + void BackoffAndMaybeCleanupLock(RowReader::TRow& row, const Cell& cell, const bool try_clean, + ErrorCode* status); + void CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, ErrorCode* status, + int64_t lock_ts); + + void RollForward(const Cell& cell, const tera::PrimaryInfo& primary, int lock_type, ErrorCode* status); - - void RollForward(const Cell& cell, - const tera::PrimaryInfo& primary, - int lock_type, - ErrorCode* status); - - // get result form "result_row" and set into "target_cell" - bool FindValueFromResultRow(RowReader::TRow& result_row, Cell* target_cell); - - // call GetCellCallback function @ other thread - void MergeCellToRow(RowReader* internal_reader, const ErrorCode& status); - - // set cell result, merge to value_list and call user_reader_callback - void GetCellCallback(CellReaderContext* ctx); - - void SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, ErrorCode* status); - - // ------------- begin commit prewrite (commit phase1) ----------------- // - void SaveWrite(const std::string& tablename, - const std::string& row_key, - tera::Write& w); - - // commit entry - // - // do [commit phase1], [commit phase2] will begin at callback - void InternalCommit(); - - // [prewrite] Step(1): - // read "data", "lock", "write" column from tera - // - // aysnc prewrite one row use single_row_txn - void AsyncPrewrite(std::vector* same_row_writes); - - // [prewrite] Step(2): - // a) verify [prewrite] step(1) read result status and no conflict - // b) write "lock" and "data" column to tera, - // through same single_row_txn in step(1) - // - // call by [prewrite] step(1),through reader callback - void DoPrewriteReaderCallback(RowReader* reader); - - // prewrite Step(3): - // verify [prewrite] step(2) single_row_txn commit status, - // if the last prewrite callback and status ok, will call [commit] - // - // call by [prewrite] step(2), through single_row_txn commit callback - void DoPrewriteCallback(Transaction* single_row_txn); - void RunAfterPrewriteFailed(PrewriteContext* ctx); - - // --------------------- begin commit phase2 ---------------------- // - - // commit phase2 Step(1): - // a) get timestamp from timeoracle for commit_ts - // b) sync commit primary write through single_row_txn - // (for this gtxn, on this step only one thread can work) - // c) call [commit phase2] step(2) in a loop - // - // call by [prewrite] step(3) - void InternalCommitPhase2(); - - void VerifyPrimaryLocked(); - - void DoVerifyPrimaryLockedCallback(RowReader* reader); - - void CommitPrimary(Transaction* primary_single_txn); - - void CheckPrimaryStatusAndCommmitSecondaries(Transaction* primary_single_txn); - - // commit phase2 Step(2): - // async commit secondaries writes through RowMutaion - // - // call by [commit phase2] step(1) - void AsyncCommitSecondaries(std::vector* same_row_writes); - - void DoCommitSecondariesCallback(RowMutation* mutation); - - // commit phase2 Step(3): - // async do ack through RowMutaion - // - // call by [commit phase2] step(1) - void AsyncAck(std::vector* same_row_acks); - - void DoAckCallback(RowMutation* mutation); - - // commit phase2 Step(4): - // async do notify through RowMutaion - // - // call by [commit phase2] step(1) - void AsyncNotify(std::vector* same_row_notifies); - - void DoNotifyCallback(RowMutation* mutation); - - /// if user want to delete this transaction, - /// before any async tasks of this transaction finished for failed - void WaitForComplete(); - - void SetLastStatus(ErrorCode* status); - - void RunUserCallback(); - - // -------------------- end commit phase1 and phase2 ------------------- // -private: - GlobalTxn(std::shared_ptr client_impl, - common::ThreadPool* thread_pool, - sdk::ClusterFinder* tso_cluster); - - GlobalTxn(const GlobalTxn&) = delete; - void operator=(const GlobalTxn&) = delete; - - // - typedef std::pair TableWithRowkey; - // tableWithRowkey -> set(write) - typedef std::map> WriterMap; - - std::unique_ptr gtxn_internal_; - ErrorCode status_; - bool status_returned_; // if true gtxn will not change "status_" - - Write* primary_write_; - WriterMap writes_; - WriterMap::iterator prewrite_iterator_; - int64_t writes_size_; - - int64_t start_ts_; - int64_t prewrite_start_ts_; - int64_t commit_ts_; - IsolationLevel isolation_level_; - std::string serialized_primary_; - - WriterMap acks_; - WriterMap notifies_; - - mutable Mutex mu_; - std::atomic finish_; - mutable Mutex finish_mutex_; - common::CondVar finish_cond_; - - std::atomic has_commited_; - - Callback user_commit_callback_; - void* user_commit_context_; - - common::ThreadPool* thread_pool_; - sdk::ClusterFinder* tso_cluster_; - - int64_t commit_timeout_ms_; - int64_t ttl_timestamp_ms_; - - Counter put_fail_cnt_; // put begin +1, done -1 - Counter commit_secondaries_done_cnt_; - Counter ack_done_cnt_; - Counter notify_done_cnt_; - - Counter writes_cnt_; - Counter acks_cnt_; - Counter notifies_cnt_; - std::atomic all_task_pushed_; + + // get result form "result_row" and set into "target_cell" + bool FindValueFromResultRow(RowReader::TRow& result_row, Cell* target_cell); + + // call GetCellCallback function @ other thread + void MergeCellToRow(RowReader* internal_reader, const ErrorCode& status); + + // set cell result, merge to value_list and call user_reader_callback + void GetCellCallback(CellReaderContext* ctx); + + void SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, ErrorCode* status); + + // ------------- begin commit prewrite (commit phase1) ----------------- // + void SaveWrite(const std::string& tablename, const std::string& row_key, tera::Write& w); + + // commit entry + // + // do [commit phase1], [commit phase2] will begin at callback + void InternalCommit(); + + // [prewrite] Step(1): + // read "data", "lock", "write" column from tera + // + // aysnc prewrite one row use single_row_txn + void AsyncPrewrite(std::vector* same_row_writes); + + // [prewrite] Step(2): + // a) verify [prewrite] step(1) read result status and no conflict + // b) write "lock" and "data" column to tera, + // through same single_row_txn in step(1) + // + // call by [prewrite] step(1),through reader callback + void DoPrewriteReaderCallback(RowReader* reader); + + // prewrite Step(3): + // verify [prewrite] step(2) single_row_txn commit status, + // if the last prewrite callback and status ok, will call [commit] + // + // call by [prewrite] step(2), through single_row_txn commit callback + void DoPrewriteCallback(Transaction* single_row_txn); + void RunAfterPrewriteFailed(PrewriteContext* ctx); + + // --------------------- begin commit phase2 ---------------------- // + + // commit phase2 Step(1): + // a) get timestamp from timeoracle for commit_ts + // b) sync commit primary write through single_row_txn + // (for this gtxn, on this step only one thread can work) + // c) call [commit phase2] step(2) in a loop + // + // call by [prewrite] step(3) + void InternalCommitPhase2(); + + void VerifyPrimaryLocked(); + + void DoVerifyPrimaryLockedCallback(RowReader* reader); + + void CommitPrimary(Transaction* primary_single_txn); + + void CheckPrimaryStatusAndCommmitSecondaries(Transaction* primary_single_txn); + + // commit phase2 Step(2): + // async commit secondaries writes through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncCommitSecondaries(std::vector* same_row_writes); + + void DoCommitSecondariesCallback(RowMutation* mutation); + + // commit phase2 Step(3): + // async do ack through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncAck(std::vector* same_row_acks); + + void DoAckCallback(RowMutation* mutation); + + // commit phase2 Step(4): + // async do notify through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncNotify(std::vector* same_row_notifies); + + void DoNotifyCallback(RowMutation* mutation); + + /// if user want to delete this transaction, + /// before any async tasks of this transaction finished for failed + void WaitForComplete(); + + void SetLastStatus(ErrorCode* status); + + void RunUserCallback(); + + // -------------------- end commit phase1 and phase2 ------------------- // + private: + GlobalTxn(std::shared_ptr client_impl, common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster); + + GlobalTxn(const GlobalTxn&) = delete; + void operator=(const GlobalTxn&) = delete; + + // + typedef std::pair TableWithRowkey; + // tableWithRowkey -> set(write) + typedef std::map> WriterMap; + + std::unique_ptr gtxn_internal_; + ErrorCode status_; + bool status_returned_; // if true gtxn will not change "status_" + + Write* primary_write_; + WriterMap writes_; + WriterMap::iterator prewrite_iterator_; + int64_t writes_size_; + + int64_t start_ts_; + int64_t prewrite_start_ts_; + int64_t commit_ts_; + IsolationLevel isolation_level_; + std::string serialized_primary_; + + WriterMap acks_; + WriterMap notifies_; + + mutable Mutex mu_; + std::atomic finish_; + mutable Mutex finish_mutex_; + common::CondVar finish_cond_; + + std::atomic has_commited_; + + Callback user_commit_callback_; + void* user_commit_context_; + + common::ThreadPool* thread_pool_; + sdk::ClusterFinder* tso_cluster_; + + int64_t commit_timeout_ms_; + int64_t ttl_timestamp_ms_; + + Counter put_fail_cnt_; // put begin +1, done -1 + Counter commit_secondaries_done_cnt_; + Counter ack_done_cnt_; + Counter notify_done_cnt_; + + Counter writes_cnt_; + Counter acks_cnt_; + Counter notifies_cnt_; + std::atomic all_task_pushed_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_GLOBAL_TXN_H_ diff --git a/src/sdk/global_txn_internal.cc b/src/sdk/global_txn_internal.cc index ce13983c8..acaa69c41 100644 --- a/src/sdk/global_txn_internal.cc +++ b/src/sdk/global_txn_internal.cc @@ -17,7 +17,8 @@ DECLARE_bool(tera_gtxn_test_opened); DECLARE_string(tera_gtxn_test_flagfile); DECLARE_int32(tera_gtxn_all_puts_size_limit); -DECLARE_int32(tera_sdk_timeout); +DECLARE_int32(tera_sdk_read_timeout); +DECLARE_int32(tera_sdk_write_timeout); namespace tera { @@ -56,502 +57,471 @@ tera::MetricCounter gtxn_notifies_fail_cnt(kGTxnNotifiesFailCountMetric, kGTxnLa tera::MetricCounter gtxn_tso_delay_us(kGTxnTsoDelayMetric, kGTxnLabelTso); tera::MetricCounter gtxn_tso_req_cnt(kGTxnTsoRequestCountMetric, kGTxnLabelTso); -GlobalTxnInternal::GlobalTxnInternal(std::shared_ptr client_impl) +GlobalTxnInternal::GlobalTxnInternal(std::shared_ptr client_impl) : TEST_GtxnTestHelper_(NULL), - start_ts_(0), - prewrite_start_ts_(0), - terminal_time_(0), + start_ts_(0), + prewrite_start_ts_(0), + terminal_time_(0), is_timeout_(false), client_impl_(client_impl) {} -GlobalTxnInternal::~GlobalTxnInternal() { - PerfReport(); -} +GlobalTxnInternal::~GlobalTxnInternal() { PerfReport(); } void GlobalTxnInternal::SetStartTimestamp(int64_t ts) { - start_ts_ = ts; - prewrite_start_ts_ = ts; + start_ts_ = ts; + prewrite_start_ts_ = ts; } bool GlobalTxnInternal::CheckTable(Table* table, ErrorCode* status) { - assert(table != NULL); - MutexLock lock(&tables_mu_); - TableInfoMap::const_iterator tables_it = tables_.find(table->GetName()); - if (tables_it == tables_.end()) { - TableImpl* table_impl = static_cast(table); - TableSchema schema = table_impl->GetTableSchema(); - if (IsTransactionTable(schema)) { - std::set gtxn_cfs; - FindGlobalTransactionCfs(schema, >xn_cfs); - if (gtxn_cfs.size() > 0) { - tables_[table->GetName()] = std::pair >(table, gtxn_cfs); - return true; - } else { - status->SetFailed(ErrorCode::kBadParam, - "schema check fail: " + table->GetName() + " haven't gtxn cf"); - return false; - } - } else { - status->SetFailed(ErrorCode::kBadParam, - "schema check fail: " + table->GetName() + " not txn table"); - return false; - } + assert(table != NULL); + MutexLock lock(&tables_mu_); + TableInfoMap::const_iterator tables_it = tables_.find(table->GetName()); + if (tables_it == tables_.end()) { + TableImpl* table_impl = static_cast(table); + TableSchema schema = table_impl->GetTableSchema(); + if (IsTransactionTable(schema)) { + std::set gtxn_cfs; + FindGlobalTransactionCfs(schema, >xn_cfs); + if (gtxn_cfs.size() > 0) { + tables_[table->GetName()] = std::pair >(table, gtxn_cfs); + return true; + } else { + status->SetFailed(ErrorCode::kBadParam, + "schema check fail: " + table->GetName() + " haven't gtxn cf"); + return false; + } + } else { + status->SetFailed(ErrorCode::kBadParam, + "schema check fail: " + table->GetName() + " not txn table"); + return false; } - return true; + } + return true; } - - bool GlobalTxnInternal::IsLockedByOthers(RowReader::TRow& row, const Cell& cell) { - if (row[cell.ColFamily()].find(cell.LockName()) != row[cell.ColFamily()].end()) { - for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); - k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { - if (k->first < start_ts_) { - return true; - } - } - } - return false; + if (row[cell.ColFamily()].find(cell.LockName()) != row[cell.ColFamily()].end()) { + for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); + k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { + if (k->first < start_ts_) { + return true; + } + } + } + return false; } bool GlobalTxnInternal::SuspectLive(const tera::PrimaryInfo& primary_info) { - std::string session_str = primary_info.client_session(); - VLOG(12) << "suppect_live : " << session_str; - return client_impl_->IsClientAlive(session_str); + std::string session_str = primary_info.client_session(); + VLOG(12) << "suppect_live : " << session_str; + return client_impl_->IsClientAlive(session_str); } bool GlobalTxnInternal::VerifyUserRowReader(RowReader* user_reader) { - RowReaderImpl* reader_impl = static_cast(user_reader); - const RowReader::ReadColumnList& read_col_list = user_reader->GetReadColumnList(); - ErrorCode status; - bool schema_valid = true; - std::string reason(""); - - Table* table = reader_impl->GetTable(); - if (!CheckTable(table, &status)) { - // table schema error for gtxn - reader_impl->SetError(status.GetType(), status.GetReason()); - return false; - } else if (read_col_list.size() == 0) { - // TODO support read full - reason = "not support read full line in global transaction"; - LOG(ERROR) << "[gtxn][get] " << reason; - reader_impl->SetError(ErrorCode::kBadParam, reason); - return false; - } else if (reader_impl->GetSnapshot() != 0) { - reason = "not support read a snapshot in global transaction"; + RowReaderImpl* reader_impl = static_cast(user_reader); + const RowReader::ReadColumnList& read_col_list = user_reader->GetReadColumnList(); + ErrorCode status; + bool schema_valid = true; + std::string reason(""); + + Table* table = reader_impl->GetTable(); + if (!CheckTable(table, &status)) { + // table schema error for gtxn + reader_impl->SetError(status.GetType(), status.GetReason()); + return false; + } else if (read_col_list.size() == 0) { + // TODO support read full + reason = "not support read full line in global transaction"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (reader_impl->GetSnapshot() != 0) { + reason = "not support read a snapshot in global transaction"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } + + // check schema valid + const std::string& tablename = table->GetName(); + + for (auto it = read_col_list.begin(); it != read_col_list.end(); ++it) { + const std::string& column_family = it->first; + const std::set& qualifier_set = it->second; + + if (qualifier_set.size() == 0) { + reason = "not set any qualifier"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + schema_valid = false; + break; + } + if (!IsGTxnColumnFamily(tablename, column_family)) { + reason = "table:" + tablename + ",cf:" + column_family + " not set gtxn=\"on\""; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + schema_valid = false; + break; + } + for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { + const std::string& qualifier = *q_it; + + if (BadQualifier(qualifier)) { + reason = "table:" + tablename + ",qu:" + qualifier + " can't end with \"_*_\""; LOG(ERROR) << "[gtxn][get] " << reason; reader_impl->SetError(ErrorCode::kBadParam, reason); - return false; - } - - // check schema valid - const std::string& tablename = table->GetName(); - - for (auto it = read_col_list.begin(); it != read_col_list.end(); ++it) { - const std::string& column_family = it->first; - const std::set& qualifier_set = it->second; - - if (qualifier_set.size() == 0) { - reason = "not set any qualifier"; - LOG(ERROR) << "[gtxn][get] " << reason; - reader_impl->SetError(ErrorCode::kBadParam, reason); - schema_valid = false; - break; - } - if (!IsGTxnColumnFamily(tablename, column_family)) { - reason = "table:" + tablename + ",cf:" + column_family + " not set gtxn=\"on\""; - LOG(ERROR) << "[gtxn][get] " << reason; - reader_impl->SetError(ErrorCode::kBadParam, reason); - schema_valid = false; - break; - } - for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { - const std::string& qualifier = *q_it; - - if (BadQualifier(qualifier)) { - reason = "table:" + tablename + ",qu:" + qualifier + " can't end with \"_*_\""; - LOG(ERROR) << "[gtxn][get] " << reason; - reader_impl->SetError(ErrorCode::kBadParam, reason); - schema_valid = false; - break; - } - } + schema_valid = false; + break; + } } - return schema_valid; + } + return schema_valid; } bool GlobalTxnInternal::VerifyUserRowMutation(RowMutation* user_mu) { - RowMutationImpl* row_mu_impl = static_cast(user_mu); - Table* table = row_mu_impl->GetTable(); - - ErrorCode status; - if (!CheckTable(table, &status)) { - // table schema error for gtxn; - row_mu_impl->SetError(status.GetType(), status.GetReason()); - return false; - } else if (row_mu_impl->MutationNum() <= 0) { - // nothing to mutation - row_mu_impl->SetError(ErrorCode::kBadParam, "nothing to mutation"); - return false; - } - - std::string reason(""); - const std::string& tablename = table->GetName(); - - for (size_t i = 0; i < user_mu->MutationNum(); ++i) { - const RowMutation::Mutation& mu = user_mu->GetMutation(i); - // check this qualifier is right - if (BadQualifier(mu.qualifier)) { - reason = "@table" + tablename + ",qu:" + mu.qualifier + - " can't end with \"_*_\""; - LOG(ERROR) << "[gtxn][apply_mutation] " << reason; - row_mu_impl->SetError(ErrorCode::kBadParam, reason); - return false; - } else if (!IsGTxnColumnFamily(tablename, mu.family)) { - // check column has set gtxn="on" - reason = "@table" + tablename + ",cf:" + mu.family + - " not set gtxn=\"on\""; - LOG(ERROR) << "[gtxn][apply_mutation] " << reason; - row_mu_impl->SetError(ErrorCode::kBadParam, reason); - return false; - } else if (mu.type != RowMutation::kPut && mu.type != RowMutation::kDeleteColumn - && mu.type != RowMutation::kDeleteColumns) { - - reason = "@table " + tablename + ",row mutation type is " + - std::to_string(mu.type); - LOG(ERROR) << "[gtxn][apply_mutation] " << reason; - row_mu_impl->SetError(ErrorCode::kGTxnNotSupport, reason); - return false; - } + RowMutationImpl* row_mu_impl = static_cast(user_mu); + Table* table = row_mu_impl->GetTable(); + + ErrorCode status; + if (!CheckTable(table, &status)) { + // table schema error for gtxn; + row_mu_impl->SetError(status.GetType(), status.GetReason()); + return false; + } else if (row_mu_impl->MutationNum() <= 0) { + // nothing to mutation + row_mu_impl->SetError(ErrorCode::kBadParam, "nothing to mutation"); + return false; + } + + std::string reason(""); + const std::string& tablename = table->GetName(); + + for (size_t i = 0; i < user_mu->MutationNum(); ++i) { + const RowMutation::Mutation& mu = user_mu->GetMutation(i); + // check this qualifier is right + if (BadQualifier(mu.qualifier)) { + reason = "@table" + tablename + ",qu:" + mu.qualifier + " can't end with \"_*_\""; + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (!IsGTxnColumnFamily(tablename, mu.family)) { + // check column has set gtxn="on" + reason = "@table" + tablename + ",cf:" + mu.family + " not set gtxn=\"on\""; + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (mu.type != RowMutation::kPut && mu.type != RowMutation::kDeleteColumn && + mu.type != RowMutation::kDeleteColumns) { + reason = "@table " + tablename + ",row mutation type is " + std::to_string(mu.type); + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kGTxnNotSupport, reason); + return false; } - return true; + } + return true; } bool GlobalTxnInternal::VerifyWritesSize(RowMutation* user_mu, int64_t* size) { - RowMutationImpl* row_mu_impl = static_cast(user_mu); - *size += row_mu_impl->Size(); - if (*size > FLAGS_tera_gtxn_all_puts_size_limit) { - LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " - << "mutations size " << *size << " > limit (" - << FLAGS_tera_gtxn_all_puts_size_limit << ")"; - row_mu_impl->SetError(ErrorCode::kGTxnDataTooLarge); - return false; - } else if ( *size <= 0) { - LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " - << "mutaions size " << *size; - row_mu_impl->SetError(ErrorCode::kBadParam); - return false; - } - return true; + RowMutationImpl* row_mu_impl = static_cast(user_mu); + *size += row_mu_impl->Size(); + if (*size > FLAGS_tera_gtxn_all_puts_size_limit) { + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " + << "mutations size " << *size << " > limit (" << FLAGS_tera_gtxn_all_puts_size_limit + << ")"; + row_mu_impl->SetError(ErrorCode::kGTxnDataTooLarge); + return false; + } else if (*size <= 0) { + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " + << "mutaions size " << *size; + row_mu_impl->SetError(ErrorCode::kBadParam); + return false; + } + return true; } -bool GlobalTxnInternal::PrimaryIsLocked(const tera::PrimaryInfo& primary, - const int64_t lock_ts, +bool GlobalTxnInternal::PrimaryIsLocked(const tera::PrimaryInfo& primary, const int64_t lock_ts, ErrorCode* status) { - Table* table = FindTable(primary.table_name()); - if (table == NULL) { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, - "not found primary table and open failed"); - return false; - } - if (!CheckTable(table, status)) { - status->SetFailed(ErrorCode::kGTxnPrimaryLost, - "primary table check failed" + status->ToString()); - return false; - } - const Cell& cell = Cell(table, primary.row_key(), - primary.column_family(), primary.qualifier()); - - std::unique_ptr reader(table->NewRowReader(cell.RowKey())); - reader->AddColumn(cell.ColFamily(), cell.LockName()); - reader->SetTimeRange(lock_ts, lock_ts); - table->Get(reader.get()); - - if (reader->GetError().GetType() != tera::ErrorCode::kOK && - reader->GetError().GetType() != tera::ErrorCode::kNotFound) { - *status = reader->GetError(); - return false; - } - while (!reader->Done()) { - if (reader->Timestamp() == lock_ts) { - VLOG(12) << DebugString(cell, "other transaction on prewrite @" + std::to_string(lock_ts)); - return true; - } - reader->Next(); - } + Table* table = FindTable(primary.table_name()); + if (table == NULL) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "not found primary table and open failed"); return false; + } + if (!CheckTable(table, status)) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, + "primary table check failed" + status->ToString()); + return false; + } + const Cell& cell = Cell(table, primary.row_key(), primary.column_family(), primary.qualifier()); + + std::unique_ptr reader(table->NewRowReader(cell.RowKey())); + reader->AddColumn(cell.ColFamily(), cell.LockName()); + reader->SetTimeRange(lock_ts, lock_ts); + table->Get(reader.get()); + + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + *status = reader->GetError(); + return false; + } + while (!reader->Done()) { + if (reader->Timestamp() == lock_ts) { + VLOG(12) << DebugString(cell, "other transaction on prewrite @" + std::to_string(lock_ts)); + return true; + } + reader->Next(); + } + return false; } void GlobalTxnInternal::BuildRowReaderForPrewrite(const std::vector& ws, RowReader* reader) { - for (auto& w : ws){ - reader->AddColumn(w.ColFamily(), w.DataName()); - reader->AddColumn(w.ColFamily(), w.LockName()); - reader->AddColumn(w.ColFamily(), w.WriteName()); - reader->SetTimeRange(0, kMaxTimeStamp); - reader->SetMaxVersions(UINT32_MAX); - } + for (auto& w : ws) { + reader->AddColumn(w.ColFamily(), w.DataName()); + reader->AddColumn(w.ColFamily(), w.LockName()); + reader->AddColumn(w.ColFamily(), w.WriteName()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + } } -void GlobalTxnInternal::BuildRowMutationForPrewrite(std::vector* ws, +void GlobalTxnInternal::BuildRowMutationForPrewrite(std::vector* ws, RowMutation* prewrite_mu, const std::string& primary_info) { - for (auto it = ws->begin(); it != ws->end(); ++it) { - const Write& w = *it; // one cell - prewrite_mu->Put(w.ColFamily(), - w.LockName(), - EncodeLockValue(w.WriteType(), primary_info), - (int64_t)prewrite_start_ts_); - prewrite_mu->Put(w.ColFamily(), - w.DataName(), - w.Value(), - (int64_t)prewrite_start_ts_); - } -} - -void GlobalTxnInternal::BuildRowMutationForCommit(std::vector* ws, - RowMutation* commit_mu, + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + prewrite_mu->Put(w.ColFamily(), w.LockName(), EncodeLockValue(w.WriteType(), primary_info), + (int64_t)prewrite_start_ts_); + prewrite_mu->Put(w.ColFamily(), w.DataName(), w.Value(), (int64_t)prewrite_start_ts_); + VLOG(12) << "[gtxn][prewrite][lock] " << w.DebugString() + << " LockName:Type:Info:prewrite_start_ts " << w.LockName() << ":" << w.WriteType() + << ":" << primary_info << ":" << prewrite_start_ts_ << " WriteName:Value " + << w.DataName() << ":" << w.Value(); + } +} + +void GlobalTxnInternal::BuildRowMutationForCommit(std::vector* ws, RowMutation* commit_mu, const int64_t commit_ts) { - for (auto it = ws->begin(); it != ws->end(); ++it) { - const Write& w = *it; // one cell - // value = type + start_ts - commit_mu->Put(w.ColFamily(), w.WriteName(), - EncodeWriteValue(w.WriteType(), prewrite_start_ts_), - commit_ts); - commit_mu->DeleteColumns(w.ColFamily(), w.LockName(), commit_ts); - } + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + // value = type + start_ts + commit_mu->Put(w.ColFamily(), w.WriteName(), + EncodeWriteValue(w.WriteType(), prewrite_start_ts_), commit_ts); + commit_mu->DeleteColumns(w.ColFamily(), w.LockName(), commit_ts); + } } -void GlobalTxnInternal::BuildRowMutationForAck(std::vector* ws, - RowMutation* commit_mu) { - for (auto it = ws->begin(); it != ws->end(); ++it) { - const Write& w = *it; // one cell - commit_mu->DeleteColumns(kNotifyColumnFamily, w.NotifyName(), start_ts_); - } +void GlobalTxnInternal::BuildRowMutationForAck(std::vector* ws, RowMutation* commit_mu) { + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + commit_mu->DeleteColumns(kNotifyColumnFamily, w.NotifyName(), start_ts_); + } } -void GlobalTxnInternal::BuildRowMutationForNotify(std::vector* ws, - RowMutation* commit_mu, +void GlobalTxnInternal::BuildRowMutationForNotify(std::vector* ws, RowMutation* commit_mu, const int64_t commit_ts) { - for (auto it = ws->begin(); it != ws->end(); ++it) { - const Write& w = *it; // one cell - commit_mu->Put(kNotifyColumnFamily, w.NotifyName(), - Int64ToEncodedString(commit_ts), commit_ts); - } + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + commit_mu->Put(kNotifyColumnFamily, w.NotifyName(), Int64ToEncodedString(commit_ts), commit_ts); + } } void GlobalTxnInternal::SetCommitDuration(int64_t timeout_ms) { - terminal_time_ = timeout_ms + get_millis(); + terminal_time_ = timeout_ms + get_millis(); } void GlobalTxnInternal::SetInternalSdkTaskTimeout(RowReader* reader) { - int64_t duration = terminal_time_ - get_millis(); - if (duration < 0) { - is_timeout_ = true; - duration = 1; - } - // duration should not larger than FLAGS_tera_sdk_timeout - duration = duration > FLAGS_tera_sdk_timeout ? FLAGS_tera_sdk_timeout : duration; - reader->SetTimeOut(duration); + int64_t duration = terminal_time_ - get_millis(); + if (duration < 0) { + is_timeout_ = true; + duration = 1; + } + // duration should not larger than FLAGS_tera_sdk_read_timeout + duration = duration > FLAGS_tera_sdk_read_timeout ? FLAGS_tera_sdk_read_timeout : duration; + reader->SetTimeOut(duration); } void GlobalTxnInternal::SetInternalSdkTaskTimeout(RowMutation* mutation) { - int64_t duration = terminal_time_ - get_millis(); - if (duration < 0) { - is_timeout_ = true; - duration = 1; - } - // duration should not larger than FLAGS_tera_sdk_timeout - duration = duration > FLAGS_tera_sdk_timeout ? FLAGS_tera_sdk_timeout : duration; - mutation->SetTimeOut(duration); + int64_t duration = terminal_time_ - get_millis(); + if (duration < 0) { + is_timeout_ = true; + duration = 1; + } + // duration should not larger than FLAGS_tera_sdk_write_timeout + duration = duration > FLAGS_tera_sdk_write_timeout ? FLAGS_tera_sdk_write_timeout : duration; + mutation->SetTimeOut(duration); } -bool GlobalTxnInternal::IsTimeOut() { - return is_timeout_; -} +bool GlobalTxnInternal::IsTimeOut() { return is_timeout_; } -bool GlobalTxnInternal::IsPrimary(const tera::Cell& cell, - const tera::PrimaryInfo& primary_info) { - return primary_info.table_name() == cell.TableName() - && primary_info.row_key() == cell.RowKey() - && primary_info.column_family() == cell.ColFamily() - && primary_info.qualifier() == cell.Qualifier(); +bool GlobalTxnInternal::IsPrimary(const tera::Cell& cell, const tera::PrimaryInfo& primary_info) { + return primary_info.table_name() == cell.TableName() && primary_info.row_key() == cell.RowKey() && + primary_info.column_family() == cell.ColFamily() && + primary_info.qualifier() == cell.Qualifier(); } Table* GlobalTxnInternal::FindTable(const std::string& tablename) { - assert(!tablename.empty()); - MutexLock lock(&tables_mu_); - TableInfoMap::const_iterator it = tables_.find(tablename); - if (it == tables_.end()) { - ErrorCode status; - Table* t = client_impl_->OpenTable(tablename, &status); - if (t == NULL || status.GetType() != ErrorCode::kOK) { - LOG(ERROR) << "[gtxn] can't create table :" << tablename << "," << status.ToString(); - return NULL; - } - return t; + assert(!tablename.empty()); + MutexLock lock(&tables_mu_); + TableInfoMap::const_iterator it = tables_.find(tablename); + if (it == tables_.end()) { + ErrorCode status; + Table* t = client_impl_->OpenTable(tablename, &status); + if (t == NULL || status.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "[gtxn] can't create table :" << tablename << "," << status.ToString(); + return NULL; } - return (it->second).first; + return t; + } + return (it->second).first; } -bool GlobalTxnInternal::ConflictWithOtherWrite(const std::vector* ws, - const std::unique_ptr& reader, +bool GlobalTxnInternal::ConflictWithOtherWrite(const std::vector* ws, + const std::unique_ptr& reader, ErrorCode* status) { - RowReader::TRow row; - reader->ToMap(&row); - - // check every cell - for (auto it = ws->begin(); it != ws->end(); ++it) { - const Write& w = *it; - const std::string& w_cf = w.ColFamily(); - if (row.find(w_cf) == row.end()) { - VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() - << "not found [" << w_cf << "]"; - continue; - } else { - // check Write column - const std::string& w_write = w.WriteName(); - if (row[w_cf].find(w_write) != row[w_cf].end()) { - for (auto k = row[w_cf][w_write].rbegin(); k != row[w_cf][w_write].rend(); ++k) { - std::string write_value = k->second; - int write_type; - int64_t data_start_ts; - DecodeWriteValue(write_value, &write_type, &data_start_ts); - VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() - << " prewrite_start_ts:" << prewrite_start_ts_ - << " found _W_ :" << k->first - << " type: " << write_type - << " data_ts: " << data_start_ts; - if (k->first >= prewrite_start_ts_) { - status->SetFailed(ErrorCode::kGTxnWriteConflict, - "writing by others ts:" + std::to_string(k->first)); - return true; - } - } - } else { - VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() - << "not found _W_ col"; - } - // check Lock column - const std::string& w_lock = w.LockName(); - if (row[w_cf].find(w_lock) != row[w_cf].end()) { - auto k = row[w_cf][w_lock].rbegin(); - if (k != row[w_cf][w_lock].rend()) { - VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() - << "locked@: " << k->first; - status->SetFailed(ErrorCode::kGTxnLockConflict, - w.DebugString() + "locked@:" + std::to_string(k->first)); - return true; - } - } + RowReader::TRow row; + reader->ToMap(&row); + + // check every cell + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; + const std::string& w_cf = w.ColFamily(); + if (row.find(w_cf) == row.end()) { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() << "not found [" << w_cf << "]"; + continue; + } else { + // check Write column + const std::string& w_write = w.WriteName(); + if (row[w_cf].find(w_write) != row[w_cf].end()) { + for (auto k = row[w_cf][w_write].rbegin(); k != row[w_cf][w_write].rend(); ++k) { + std::string write_value = k->second; + int write_type; + int64_t data_start_ts; + DecodeWriteValue(write_value, &write_type, &data_start_ts); + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() + << " prewrite_start_ts:" << prewrite_start_ts_ << " found _W_ :" << k->first + << " type: " << write_type << " data_ts: " << data_start_ts; + if (k->first >= prewrite_start_ts_) { + status->SetFailed(ErrorCode::kGTxnWriteConflict, + "writing by others ts:" + std::to_string(k->first)); + return true; + } } + } else { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() << "not found _W_ col"; + } + // check Lock column + const std::string& w_lock = w.LockName(); + if (row[w_cf].find(w_lock) != row[w_cf].end()) { + auto k = row[w_cf][w_lock].rbegin(); + if (k != row[w_cf][w_lock].rend()) { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() << "locked@: " << k->first; + status->SetFailed(ErrorCode::kGTxnLockConflict, + w.DebugString() + "locked@:" + std::to_string(k->first)); + return true; + } + } } - return false; + } + return false; } void GlobalTxnInternal::SetPrewriteStartTimestamp(const int64_t prewrite_start_ts) { - prewrite_start_ts_ = prewrite_start_ts; + prewrite_start_ts_ = prewrite_start_ts; } -bool GlobalTxnInternal::IsGTxnColumnFamily(const std::string& tablename, +bool GlobalTxnInternal::IsGTxnColumnFamily(const std::string& tablename, const std::string& column_family) { - MutexLock lock(&tables_mu_); - auto it = tables_.find(tablename); - if (it != tables_.end()) { - std::set& gtxn_cfs = (it->second).second; - auto cfs_it = gtxn_cfs.find(column_family); - if (cfs_it != gtxn_cfs.end()) { - return true; - } + MutexLock lock(&tables_mu_); + auto it = tables_.find(tablename); + if (it != tables_.end()) { + std::set& gtxn_cfs = (it->second).second; + auto cfs_it = gtxn_cfs.find(column_family); + if (cfs_it != gtxn_cfs.end()) { + return true; } - return false; + } + return false; } -std::string GlobalTxnInternal::GetClientSession() { - return client_impl_->ClientSession(); -} +std::string GlobalTxnInternal::GetClientSession() { return client_impl_->ClientSession(); } std::string GlobalTxnInternal::DebugString(const Cell& cell, const std::string& msg) const { - std::stringstream ss; - ss << msg << " @ [" << cell.Table()->GetName() << ":" - << cell.RowKey() << ":" << cell.ColFamily() - << ":" << cell.Qualifier() << ":" << cell.Timestamp() << "]"; - return ss.str(); + std::stringstream ss; + ss << msg << " @ [" << cell.Table()->GetName() << ":" << cell.RowKey() << ":" << cell.ColFamily() + << ":" << cell.Qualifier() << ":" << cell.Timestamp() << "]"; + return ss.str(); } int64_t GlobalTxnInternal::TEST_Init(const std::string& conf_file) { - if (FLAGS_tera_gtxn_test_opened) { - TEST_GtxnTestHelper_ = new GlobalTxnTestHelper(conf_file); - TEST_GtxnTestHelper_->LoadTxnConf(); - start_ts_ = TEST_GtxnTestHelper_->GetStartTs(); - prewrite_start_ts_ = TEST_GtxnTestHelper_->GetPrewriteStartTs(); - } - return start_ts_; + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_ = new GlobalTxnTestHelper(conf_file); + TEST_GtxnTestHelper_->LoadTxnConf(); + start_ts_ = TEST_GtxnTestHelper_->GetStartTs(); + prewrite_start_ts_ = TEST_GtxnTestHelper_->GetPrewriteStartTs(); + } + return start_ts_; } void GlobalTxnInternal::TEST_GetSleep() { - if (FLAGS_tera_gtxn_test_opened) { - TEST_GtxnTestHelper_->GetWait(start_ts_); - } + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_->GetWait(start_ts_); + } } void GlobalTxnInternal::TEST_Sleep() { - if (FLAGS_tera_gtxn_test_opened) { - TEST_GtxnTestHelper_->Wait(start_ts_); - } + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_->Wait(start_ts_); + } } void GlobalTxnInternal::TEST_Destory() { - if (FLAGS_tera_gtxn_test_opened) { - delete TEST_GtxnTestHelper_; - } + if (FLAGS_tera_gtxn_test_opened) { + delete TEST_GtxnTestHelper_; + } } -int64_t GlobalTxnInternal::TEST_GetCommitTimestamp() { - return TEST_GtxnTestHelper_->GetCommitTs(); -} +int64_t GlobalTxnInternal::TEST_GetCommitTimestamp() { return TEST_GtxnTestHelper_->GetCommitTs(); } int64_t GlobalTxnInternal::TEST_GetPrewriteStartTimestamp() { - return TEST_GtxnTestHelper_->GetPrewriteStartTs(); + return TEST_GtxnTestHelper_->GetPrewriteStartTs(); } void GlobalTxnInternal::PerfReadDelay(int64_t begin_time, int64_t finish_time) { - read_cost_time_.Add(finish_time - begin_time); + read_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfCommitDelay(int64_t begin_time, int64_t finish_time) { - commit_cost_time_.Add(finish_time - begin_time); + commit_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfPrewriteDelay(int64_t begin_time, int64_t finish_time) { - prewrite_cost_time_.Add(finish_time - begin_time); + prewrite_cost_time_.Add(finish_time - begin_time); } -void GlobalTxnInternal::PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time) { - primary_cost_time_.Add(finish_time - begin_time); +void GlobalTxnInternal::PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time) { + primary_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfSecondariesCommitDelay(int64_t begin_time, int64_t finish_time) { - secondaries_cost_time_.Add(finish_time - begin_time); + secondaries_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfAckDelay(int64_t begin_time, int64_t finish_time) { - acks_cost_time_.Add(finish_time - begin_time); + acks_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfNotifyDelay(int64_t begin_time, int64_t finish_time) { - notifies_cost_time_.Add(finish_time - begin_time); + notifies_cost_time_.Add(finish_time - begin_time); } void GlobalTxnInternal::PerfReport() { - gtxn_read_delay_us.Add(read_cost_time_.Clear()); - gtxn_commit_delay_us.Add(commit_cost_time_.Clear()); - gtxn_prewrite_delay_us.Add(prewrite_cost_time_.Clear()); - gtxn_primary_delay_us.Add(primary_cost_time_.Clear()); - gtxn_secondaries_delay_us.Add(secondaries_cost_time_.Clear()); - gtxn_acks_delay_us.Add(acks_cost_time_.Clear()); - gtxn_notifies_delay_us.Add(notifies_cost_time_.Clear()); + gtxn_read_delay_us.Add(read_cost_time_.Clear()); + gtxn_commit_delay_us.Add(commit_cost_time_.Clear()); + gtxn_prewrite_delay_us.Add(prewrite_cost_time_.Clear()); + gtxn_primary_delay_us.Add(primary_cost_time_.Clear()); + gtxn_secondaries_delay_us.Add(secondaries_cost_time_.Clear()); + gtxn_acks_delay_us.Add(acks_cost_time_.Clear()); + gtxn_notifies_delay_us.Add(notifies_cost_time_.Clear()); } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/global_txn_internal.h b/src/sdk/global_txn_internal.h index b85ad299b..54f634015 100644 --- a/src/sdk/global_txn_internal.h +++ b/src/sdk/global_txn_internal.h @@ -4,14 +4,14 @@ // // Author: baorenyi@baidu.com -#ifndef TERA_SDK_GLOBAL_TXN_INTERNAL_H_ -#define TERA_SDK_GLOBAL_TXN_INTERNAL_H_ +#ifndef TERA_SDK_GLOBAL_TXN_INTERNAL_H_ +#define TERA_SDK_GLOBAL_TXN_INTERNAL_H_ #include #include #include #include -#include +#include #include "common/mutex.h" #include "io/coding.h" @@ -32,362 +32,318 @@ class GlobalTxnTestHelper; class Write; inline void PrintCostTime(const std::string& msg, int64_t begin_time) { - VLOG(12) << msg <<" cost: " << get_micros() - begin_time; + VLOG(12) << msg << " cost: " << get_micros() - begin_time; } inline std::string Int64ToEncodedString(int64_t n) { - char buf[sizeof(int64_t)]; - io::EncodeBigEndian(buf, n); - std::string s (buf, sizeof(int64_t)); - return s; + char buf[sizeof(int64_t)]; + io::EncodeBigEndian(buf, n); + std::string s(buf, sizeof(int64_t)); + return s; } -inline int64_t EncodedStringToInt64(const std::string& s) { - return io::DecodeBigEndain(s.c_str()); -} +inline int64_t EncodedStringToInt64(const std::string& s) { return io::DecodeBigEndain(s.c_str()); } -inline std::string PackLockName(const std::string& qualifier) { - return "!L" + qualifier; -} +inline std::string PackLockName(const std::string& qualifier) { return "!L" + qualifier; } -inline std::string PackWriteName(const std::string& qualifier) { - return "!W" + qualifier; -} +inline std::string PackWriteName(const std::string& qualifier) { return "!W" + qualifier; } // make sure 'data' column sort after 'lock' and 'write' columns in same row -inline std::string PackDataName(const std::string& qualifier) { - return qualifier; -} +inline std::string PackDataName(const std::string& qualifier) { return qualifier; } inline std::string EncodeLockValue(int type, const std::string& primary_str) { - return (char)type + primary_str; + return (char)type + primary_str; } -inline bool DecodeLockValue(const std::string& value, - int* type, tera::PrimaryInfo* info) { - if (value.length() > 1) { - *type = (int)value[0]; - return info->ParseFromString(value.substr(1)); - } else { - *type = -1; - return false; - } +inline bool DecodeLockValue(const std::string& value, int* type, tera::PrimaryInfo* info) { + if (value.length() > 1) { + *type = (int)value[0]; + return info->ParseFromString(value.substr(1)); + } else { + *type = -1; + return false; + } } inline std::string EncodeWriteValue(int type, int64_t timestamp) { - return (char)type + Int64ToEncodedString(timestamp); + return (char)type + Int64ToEncodedString(timestamp); } inline bool DecodeWriteValue(const std::string& value, int* type, int64_t* timestamp) { - if (value.length() > 1) { - *type = (int)value[0]; - *timestamp = EncodedStringToInt64(value.substr(1)); - return true; - } else { - *type = -1; - *timestamp = -1; - return false; - } + if (value.length() > 1) { + *type = (int)value[0]; + *timestamp = EncodedStringToInt64(value.substr(1)); + return true; + } else { + *type = -1; + *timestamp = -1; + return false; + } } -inline std::string PackNotifyName(const std::string& column_family, - const std::string& qualifier) { - return column_family + ":" + qualifier; +inline std::string PackNotifyName(const std::string& column_family, const std::string& qualifier) { + return column_family + ":" + qualifier; } inline bool BadQualifier(const std::string& qualifier) { - size_t q_len = qualifier.length(); - return q_len > 0 && qualifier[0] == '!'; + size_t q_len = qualifier.length(); + return q_len > 0 && qualifier[0] == '!'; } class Cell { -public: - Cell(tera::Table* table, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier, - const int64_t timestamp = 0, - const std::string& value = "") : - table_(table), + public: + Cell(tera::Table* table, const std::string& row_key, const std::string& column_family, + const std::string& qualifier, const int64_t timestamp = 0, const std::string& value = "") + : table_(table), row_key_(row_key), column_family_(column_family), qualifier_(qualifier), timestamp_(timestamp), value_(value), tablename_("") { - - assert(table_ != NULL); - tablename_ = table_->GetName(); - } - - tera::Table* Table() const { return table_; } - - const std::string TableName() const { return tablename_; } - const std::string& RowKey() const { return row_key_; } - const std::string& ColFamily() const { return column_family_; } - const std::string& Qualifier() const { return qualifier_; } - const std::string LockName() const { return PackLockName(qualifier_); } - const std::string WriteName() const { return PackWriteName(qualifier_); } - const std::string DataName() const { return PackDataName(qualifier_); } - const std::string NotifyName() const { return PackNotifyName(column_family_, qualifier_); } - const int64_t Timestamp() const { return timestamp_; } - void SetTimestamp(const int64_t timestamp) { - timestamp_ = timestamp; - } - const std::string& Value() const { return value_; } - void SetValue(const std::string& value) { - value_ = value; - } -private: - tera::Table* table_; - std::string row_key_; - std::string column_family_; - std::string qualifier_; - int64_t timestamp_; - std::string value_; - std::string tablename_; + assert(table_ != NULL); + tablename_ = table_->GetName(); + } + + tera::Table* Table() const { return table_; } + + const std::string TableName() const { return tablename_; } + const std::string& RowKey() const { return row_key_; } + const std::string& ColFamily() const { return column_family_; } + const std::string& Qualifier() const { return qualifier_; } + const std::string LockName() const { return PackLockName(qualifier_); } + const std::string WriteName() const { return PackWriteName(qualifier_); } + const std::string DataName() const { return PackDataName(qualifier_); } + const std::string NotifyName() const { return PackNotifyName(column_family_, qualifier_); } + const int64_t Timestamp() const { return timestamp_; } + void SetTimestamp(const int64_t timestamp) { timestamp_ = timestamp; } + const std::string& Value() const { return value_; } + void SetValue(const std::string& value) { value_ = value; } + + private: + tera::Table* table_; + std::string row_key_; + std::string column_family_; + std::string qualifier_; + int64_t timestamp_; + std::string value_; + std::string tablename_; }; class Write { -public: - Write(const Cell& cell, const int& type = 0) - : cell_(cell), - type_(type), - is_primary_(false) {} - - int WriteType() const { return type_; } - bool IsPrimary() const { return is_primary_; } - tera::Table* Table() const { return cell_.Table(); } - const std::string TableName() const { return cell_.TableName(); } - const std::string& RowKey() const { return cell_.RowKey(); } - const std::string& ColFamily() const { return cell_.ColFamily(); } - const std::string& Qualifier() const { return cell_.Qualifier(); } - const std::string LockName() const { return cell_.LockName(); } - const std::string WriteName() const { return cell_.WriteName(); } - const std::string DataName() const { return cell_.DataName(); } - const std::string NotifyName() const { return cell_.NotifyName(); } - const int64_t Timestamp() const { return cell_.Timestamp(); } - const std::string& Value() const { return cell_.Value(); } - const int64_t GetSize() { - return cell_.RowKey().length() + cell_.ColFamily().length() + - cell_.Qualifier().length() + cell_.Value().length(); - } - bool IsSameRow(Write* w) { - return RowKey() == w->RowKey() - && Table() == w->Table(); - } - - void Serialize(const int64_t start_ts, - const std::string& session, - std::string* primary_info) { - tera::PrimaryInfo primary; - primary.set_table_name(TableName()); - primary.set_row_key(RowKey()); - primary.set_column_family(ColFamily()); - primary.set_qualifier(Qualifier()); - primary.set_gtxn_start_ts(start_ts); - primary.set_client_session(session), - primary.SerializeToString(primary_info); - } - - const std::string DebugString() const { - std::stringstream ss; - ss <<"[" << TableName() << ":" << RowKey() << ":" << ColFamily() - << ":" << Qualifier() << "]"; - return ss.str(); - } - -private: - tera::Cell cell_; - int type_; - bool is_primary_; -}; + public: + Write(const Cell& cell, const int& type = 0) : cell_(cell), type_(type), is_primary_(false) {} + + int WriteType() const { return type_; } + bool IsPrimary() const { return is_primary_; } + tera::Table* Table() const { return cell_.Table(); } + const std::string TableName() const { return cell_.TableName(); } + const std::string& RowKey() const { return cell_.RowKey(); } + const std::string& ColFamily() const { return cell_.ColFamily(); } + const std::string& Qualifier() const { return cell_.Qualifier(); } + const std::string LockName() const { return cell_.LockName(); } + const std::string WriteName() const { return cell_.WriteName(); } + const std::string DataName() const { return cell_.DataName(); } + const std::string NotifyName() const { return cell_.NotifyName(); } + const int64_t Timestamp() const { return cell_.Timestamp(); } + const std::string& Value() const { return cell_.Value(); } + const int64_t GetSize() { + return cell_.RowKey().length() + cell_.ColFamily().length() + cell_.Qualifier().length() + + cell_.Value().length(); + } + bool IsSameRow(Write* w) { return RowKey() == w->RowKey() && Table() == w->Table(); } + + void Serialize(const int64_t start_ts, const std::string& session, std::string* primary_info) { + tera::PrimaryInfo primary; + primary.set_table_name(TableName()); + primary.set_row_key(RowKey()); + primary.set_column_family(ColFamily()); + primary.set_qualifier(Qualifier()); + primary.set_gtxn_start_ts(start_ts); + primary.set_client_session(session), primary.SerializeToString(primary_info); + } + + const std::string DebugString() const { + std::stringstream ss; + ss << "[" << TableName() << ":" << RowKey() << ":" << ColFamily() << ":" << Qualifier() << "]"; + return ss.str(); + } + + private: + tera::Cell cell_; + int type_; + bool is_primary_; +}; struct PrewriteContext { - std::vector* ws; - Transaction* gtxn; - Transaction* stxn; - std::string table_name; - std::string row_key; - ErrorCode status; - PrewriteContext(std::vector* same_row_ws, - Transaction* g, - Transaction* s, - const std::string& tablename, - const std::string& rowkey) : - ws(same_row_ws), - gtxn(g), - stxn(s), - table_name(tablename), - row_key(rowkey) { - status.SetFailed(ErrorCode::kOK); - } - const std::string DebugString() const { - return "[tablename=" + table_name + ",rowkey=" + row_key + "]" + status.ToString(); - } + std::vector* ws; + Transaction* gtxn; + Transaction* stxn; + std::string table_name; + std::string row_key; + ErrorCode status; + PrewriteContext(std::vector* same_row_ws, Transaction* g, Transaction* s, + const std::string& tablename, const std::string& rowkey) + : ws(same_row_ws), gtxn(g), stxn(s), table_name(tablename), row_key(rowkey) { + status.SetFailed(ErrorCode::kOK); + } + const std::string DebugString() const { + return "[tablename=" + table_name + ",rowkey=" + row_key + "]" + status.ToString(); + } }; // one user reader will have one InternalReaderContext struct InternalReaderContext { - int expected_cell_cnt; - int active_cell_cnt; - int fail_cell_cnt; - int not_found_cnt; - RowReader* user_reader; - Transaction* gtxn; - std::map cell_map; - RowResult results; - ErrorCode last_err; - - InternalReaderContext(int expected_cnt, RowReader* reader, Transaction* txn) - : expected_cell_cnt(expected_cnt), - active_cell_cnt(0), - fail_cell_cnt(0), - not_found_cnt(0), - user_reader(reader), - gtxn(txn) {} - - ~InternalReaderContext() { - for (auto it = cell_map.begin(); it != cell_map.end();) { - Cell* cell = it->first; - cell_map.erase(it++); - delete cell; - cell = NULL; - } + int expected_cell_cnt; + int active_cell_cnt; + int fail_cell_cnt; + int not_found_cnt; + RowReader* user_reader; + Transaction* gtxn; + std::map cell_map; + RowResult results; + ErrorCode last_err; + + InternalReaderContext(int expected_cnt, RowReader* reader, Transaction* txn) + : expected_cell_cnt(expected_cnt), + active_cell_cnt(0), + fail_cell_cnt(0), + not_found_cnt(0), + user_reader(reader), + gtxn(txn) {} + + ~InternalReaderContext() { + for (auto it = cell_map.begin(); it != cell_map.end();) { + Cell* cell = it->first; + cell_map.erase(it++); + delete cell; + cell = NULL; } + } }; // one cell reader will have one CellReaderContext struct CellReaderContext { - Cell* cell; - InternalReaderContext* internal_reader_ctx; - ErrorCode status; - CellReaderContext(Cell* c, InternalReaderContext* ctx) - : cell(c), - internal_reader_ctx(ctx) {} + Cell* cell; + InternalReaderContext* internal_reader_ctx; + ErrorCode status; + CellReaderContext(Cell* c, InternalReaderContext* ctx) : cell(c), internal_reader_ctx(ctx) {} }; struct PrimaryTxnContext { - Transaction* gtxn; - Transaction* stxn; - PrimaryTxnContext(Transaction* g, Transaction* s) : gtxn(g) , stxn(s) {} + Transaction* gtxn; + Transaction* stxn; + PrimaryTxnContext(Transaction* g, Transaction* s) : gtxn(g), stxn(s) {} }; class GlobalTxnInternal { -public: - friend class GlobalTxn; - GlobalTxnInternal(std::shared_ptr client_impl); - - ~GlobalTxnInternal(); - // for common - void SetStartTimestamp(int64_t ts); - - bool CheckTable(Table* table, ErrorCode* status); - - Table* FindTable(const std::string& tablename); - - bool IsPrimary(const tera::Cell& cell, - const tera::PrimaryInfo& primary_info); - - bool IsGTxnColumnFamily(const std::string& tablename, - const std::string& column_family); - - // for get - bool VerifyUserRowReader(RowReader* user_reader); - - bool PrimaryIsLocked(const tera::PrimaryInfo& primary_info, - const int64_t lock_ts, - ErrorCode* status); - - bool IsLockedByOthers(RowReader::TRow& row, const tera::Cell& cell); - - bool SuspectLive(const tera::PrimaryInfo& primary_info); - - // for prewrite - void BuildRowReaderForPrewrite(const std::vector& ws, RowReader* reader); - - void BuildRowMutationForPrewrite(std::vector* ws, - RowMutation* txn_mu, - const std::string& primary_info); - - bool ConflictWithOtherWrite(const std::vector* ws, - const std::unique_ptr& reader, - ErrorCode* status); - - // for applyMutation - bool VerifyUserRowMutation(RowMutation* user_mu); - bool VerifyWritesSize(RowMutation* user_mu, int64_t* size); - - // for commit - void BuildRowMutationForCommit(std::vector* ws, - RowMutation* txn_mu, - const int64_t commit_ts); - - void BuildRowMutationForAck(std::vector* ws, RowMutation* txn_mu); - - void BuildRowMutationForNotify(std::vector* ws, - RowMutation* txn_mu, - const int64_t commit_ts); - - void SetPrewriteStartTimestamp(const int64_t prewrite_start_ts); - - // for timeout - void SetCommitDuration(int64_t timeout_ms); - void SetInternalSdkTaskTimeout(RowMutation* mutation); - void SetInternalSdkTaskTimeout(RowReader* reader); - bool IsTimeOut(); - - // for other transaction alive - std::string GetClientSession(); -private: - // for pref - void UpdateTimerCounter(Counter* c) { - c->Set(get_micros() - c->Get()); - } + public: + friend class GlobalTxn; + GlobalTxnInternal(std::shared_ptr client_impl); + + ~GlobalTxnInternal(); + // for common + void SetStartTimestamp(int64_t ts); + + bool CheckTable(Table* table, ErrorCode* status); + + Table* FindTable(const std::string& tablename); + + bool IsPrimary(const tera::Cell& cell, const tera::PrimaryInfo& primary_info); + + bool IsGTxnColumnFamily(const std::string& tablename, const std::string& column_family); + + // for get + bool VerifyUserRowReader(RowReader* user_reader); + + bool PrimaryIsLocked(const tera::PrimaryInfo& primary_info, const int64_t lock_ts, + ErrorCode* status); + + bool IsLockedByOthers(RowReader::TRow& row, const tera::Cell& cell); + + bool SuspectLive(const tera::PrimaryInfo& primary_info); + + // for prewrite + void BuildRowReaderForPrewrite(const std::vector& ws, RowReader* reader); + + void BuildRowMutationForPrewrite(std::vector* ws, RowMutation* txn_mu, + const std::string& primary_info); + + bool ConflictWithOtherWrite(const std::vector* ws, + const std::unique_ptr& reader, ErrorCode* status); + + // for applyMutation + bool VerifyUserRowMutation(RowMutation* user_mu); + bool VerifyWritesSize(RowMutation* user_mu, int64_t* size); + + // for commit + void BuildRowMutationForCommit(std::vector* ws, RowMutation* txn_mu, + const int64_t commit_ts); + + void BuildRowMutationForAck(std::vector* ws, RowMutation* txn_mu); + + void BuildRowMutationForNotify(std::vector* ws, RowMutation* txn_mu, + const int64_t commit_ts); + + void SetPrewriteStartTimestamp(const int64_t prewrite_start_ts); + + // for timeout + void SetCommitDuration(int64_t timeout_ms); + void SetInternalSdkTaskTimeout(RowMutation* mutation); + void SetInternalSdkTaskTimeout(RowReader* reader); + bool IsTimeOut(); + + // for other transaction alive + std::string GetClientSession(); + + private: + // for pref + void UpdateTimerCounter(Counter* c) { c->Set(get_micros() - c->Get()); } + + // for debug and test + std::string DebugString(const tera::Cell& cell, const std::string& msg) const; + int64_t TEST_Init(const std::string& conf_file); + void TEST_Sleep(); + void TEST_GetSleep(); + void TEST_Destory(); + int64_t TEST_GetCommitTimestamp(); + int64_t TEST_GetPrewriteStartTimestamp(); + + void PerfReadDelay(int64_t begin_time, int64_t finish_time); + void PerfCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfPrewriteDelay(int64_t begin_time, int64_t finish_time); + void PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfSecondariesCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfAckDelay(int64_t begin_time, int64_t finish_time); + void PerfNotifyDelay(int64_t begin_time, int64_t finish_time); + + void PerfReport(); + + private: + GlobalTxnInternal(const GlobalTxnInternal&) = delete; + GlobalTxnInternal& operator=(const GlobalTxnInternal&) = delete; + // for test + GlobalTxnTestHelper* TEST_GtxnTestHelper_; + // tablename-> (Table*, set(gtxn_cf_name)) + typedef std::map > > TableInfoMap; + TableInfoMap tables_; + mutable Mutex tables_mu_; + int64_t start_ts_; + int64_t prewrite_start_ts_; + + // for record this transaction perf + Counter read_cost_time_; + Counter commit_cost_time_; + Counter prewrite_cost_time_; + Counter primary_cost_time_; + Counter secondaries_cost_time_; + Counter acks_cost_time_; + Counter notifies_cost_time_; - // for debug and test - std::string DebugString(const tera::Cell& cell, const std::string& msg) const ; - int64_t TEST_Init(const std::string& conf_file); - void TEST_Sleep(); - void TEST_GetSleep(); - void TEST_Destory(); - int64_t TEST_GetCommitTimestamp(); - int64_t TEST_GetPrewriteStartTimestamp(); - - void PerfReadDelay(int64_t begin_time, int64_t finish_time); - void PerfCommitDelay(int64_t begin_time, int64_t finish_time); - void PerfPrewriteDelay(int64_t begin_time, int64_t finish_time); - void PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time); - void PerfSecondariesCommitDelay(int64_t begin_time, int64_t finish_time); - void PerfAckDelay(int64_t begin_time, int64_t finish_time); - void PerfNotifyDelay(int64_t begin_time, int64_t finish_time); - - void PerfReport(); -private: - GlobalTxnInternal(const GlobalTxnInternal&) = delete; - GlobalTxnInternal& operator=(const GlobalTxnInternal&) = delete; - // for test - GlobalTxnTestHelper* TEST_GtxnTestHelper_; - // tablename-> (Table*, set(gtxn_cf_name)) - typedef std::map > > TableInfoMap; - TableInfoMap tables_; - mutable Mutex tables_mu_; - int64_t start_ts_; - int64_t prewrite_start_ts_; - - // for record this transaction perf - Counter read_cost_time_; - Counter commit_cost_time_; - Counter prewrite_cost_time_; - Counter primary_cost_time_; - Counter secondaries_cost_time_; - Counter acks_cost_time_; - Counter notifies_cost_time_; - - int64_t terminal_time_; - std::atomic is_timeout_; - std::shared_ptr client_impl_; + int64_t terminal_time_; + std::atomic is_timeout_; + std::shared_ptr client_impl_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_GLOBAL_TXN_INTERNAL_H_ diff --git a/src/sdk/http/http.cc b/src/sdk/http/http.cc index da7b571c0..d0f42dc07 100644 --- a/src/sdk/http/http.cc +++ b/src/sdk/http/http.cc @@ -31,361 +31,344 @@ tera::Counter write_request_counter; tera::Counter write_response_counter; class StatusCode { -public: - enum code { - kOk = 0, - kError = 1 - }; - StatusCode() { - code_ = kOk; - } - bool Ok() { - return code_ == kOk; - } - void SetError(const std::string& reason) { - code_ = kError; - reason_ = reason; - } - std::string GetReason() { - return reason_; - } -private: - code code_; - std::string reason_; + public: + enum code { kOk = 0, kError = 1 }; + StatusCode() { code_ = kOk; } + bool Ok() { return code_ == kOk; } + void SetError(const std::string& reason) { + code_ = kError; + reason_ = reason; + } + std::string GetReason() { return reason_; } + + private: + code code_; + std::string reason_; }; class HttpProxyImpl : public tera::http::HttpProxy { -public: - HttpProxyImpl(const std::string& confpath) { - client_ = Client::NewClient(confpath, "terahttp", NULL); - assert(client_ != NULL); + public: + HttpProxyImpl(const std::string& confpath) { + client_ = Client::NewClient(confpath, "terahttp", NULL); + assert(client_ != NULL); - request_pool_ = new common::ThreadPool(FLAGS_tera_http_request_thread_num); - assert(request_pool_ != NULL); + request_pool_ = new common::ThreadPool(FLAGS_tera_http_request_thread_num); + assert(request_pool_ != NULL); - ctrl_pool_ = new common::ThreadPool(FLAGS_tera_http_ctrl_thread_num); - assert(ctrl_pool_ != NULL); + ctrl_pool_ = new common::ThreadPool(FLAGS_tera_http_ctrl_thread_num); + assert(ctrl_pool_ != NULL); - LogCounter(); - } - virtual ~HttpProxyImpl() {} - -private: - void LogCounter(); - - virtual void Get(google::protobuf::RpcController* controller, - const tera::http::GetRequest* request, - tera::http::GetResponse* response, - google::protobuf::Closure* done) { - VLOG(25) << "accept RPC (Get)"; - read_request_counter.Add(1); - common::ThreadPool::Task callback = - std::bind(&HttpProxyImpl::DoGet, this, controller, request, response, done); - request_pool_->AddTask(callback); - } - virtual void DoGet(google::protobuf::RpcController* controller, - const tera::http::GetRequest* request, - tera::http::GetResponse* response, - google::protobuf::Closure* done); - - virtual void Put(google::protobuf::RpcController* controller, - const tera::http::PutRequest* request, - tera::http::PutResponse* response, - google::protobuf::Closure* done) { - VLOG(25) << "accept RPC (Put)"; - write_request_counter.Add(1); - common::ThreadPool::Task callback = - std::bind(&HttpProxyImpl::DoPut, this, controller, request, response, done); - request_pool_->AddTask(callback); - } - virtual void DoPut(google::protobuf::RpcController* controller, - const tera::http::PutRequest* request, - tera::http::PutResponse* response, - google::protobuf::Closure* done); - - Table* OpenTableWithCache(const std::string& tablename, ErrorCode* err) { - MutexLock l(&mutex_); - std::map::iterator it = tables_.find(tablename); - if (it == tables_.end()) { - mutex_.Unlock(); - Table* table = client_->OpenTable(tablename, err); - mutex_.Lock(); - if (table == NULL) { - VLOG(20) << "[OpenTableWithCache] open table failed:" << tablename - << " for " << err->GetReason(); - return NULL; - } - VLOG(25) << "[OpenTableWithCache] open table done:" << tablename; - tables_[tablename] = table; - return table; - } else { - VLOG(25) << "[OpenTableWithCache] open table(cached):" << tablename; - return it->second; - } + LogCounter(); + } + virtual ~HttpProxyImpl() {} + + private: + void LogCounter(); + + virtual void Get(google::protobuf::RpcController* controller, + const tera::http::GetRequest* request, tera::http::GetResponse* response, + google::protobuf::Closure* done) { + VLOG(25) << "accept RPC (Get)"; + read_request_counter.Add(1); + common::ThreadPool::Task callback = + std::bind(&HttpProxyImpl::DoGet, this, controller, request, response, done); + request_pool_->AddTask(callback); + } + virtual void DoGet(google::protobuf::RpcController* controller, + const tera::http::GetRequest* request, tera::http::GetResponse* response, + google::protobuf::Closure* done); + + virtual void Put(google::protobuf::RpcController* controller, + const tera::http::PutRequest* request, tera::http::PutResponse* response, + google::protobuf::Closure* done) { + VLOG(25) << "accept RPC (Put)"; + write_request_counter.Add(1); + common::ThreadPool::Task callback = + std::bind(&HttpProxyImpl::DoPut, this, controller, request, response, done); + request_pool_->AddTask(callback); + } + virtual void DoPut(google::protobuf::RpcController* controller, + const tera::http::PutRequest* request, tera::http::PutResponse* response, + google::protobuf::Closure* done); + + Table* OpenTableWithCache(const std::string& tablename, ErrorCode* err) { + MutexLock l(&mutex_); + std::map::iterator it = tables_.find(tablename); + if (it == tables_.end()) { + mutex_.Unlock(); + Table* table = client_->OpenTable(tablename, err); + mutex_.Lock(); + if (table == NULL) { + VLOG(20) << "[OpenTableWithCache] open table failed:" << tablename << " for " + << err->GetReason(); + return NULL; + } + VLOG(25) << "[OpenTableWithCache] open table done:" << tablename; + tables_[tablename] = table; + return table; + } else { + VLOG(25) << "[OpenTableWithCache] open table(cached):" << tablename; + return it->second; } + } -private: - mutable Mutex mutex_; + private: + mutable Mutex mutex_; - // tablename Table* - std::map tables_; + // tablename Table* + std::map tables_; - tera::Client* client_; - common::ThreadPool* request_pool_; - common::ThreadPool* ctrl_pool_; + tera::Client* client_; + common::ThreadPool* request_pool_; + common::ThreadPool* ctrl_pool_; }; struct PutContext { - const tera::http::PutRequest* request_; - tera::http::PutResponse* response_; - google::protobuf::Closure* done_; - Mutex mutex_; - int32_t finished_count_; + const tera::http::PutRequest* request_; + tera::http::PutResponse* response_; + google::protobuf::Closure* done_; + Mutex mutex_; + int32_t finished_count_; }; struct GetContext { - const tera::http::GetRequest* request_; - tera::http::GetResponse* response_; - google::protobuf::Closure* done_; - Mutex mutex_; - int32_t finished_count_; + const tera::http::GetRequest* request_; + tera::http::GetResponse* response_; + google::protobuf::Closure* done_; + Mutex mutex_; + int32_t finished_count_; }; struct GetContext* NewGetContext(const tera::http::GetRequest* request, tera::http::GetResponse* response, google::protobuf::Closure* done) { - struct GetContext* c = new GetContext; - c->request_ = request; - c->response_ = response; - c->done_ = done; - c->finished_count_ = 0; - return c; + struct GetContext* c = new GetContext; + c->request_ = request; + c->response_ = response; + c->done_ = done; + c->finished_count_ = 0; + return c; } struct PutContext* NewPutContext(const tera::http::PutRequest* request, tera::http::PutResponse* response, google::protobuf::Closure* done) { - struct PutContext* c = new PutContext; - c->request_ = request; - c->response_ = response; - c->done_ = done; - c->finished_count_ = 0; - return c; + struct PutContext* c = new PutContext; + c->request_ = request; + c->response_ = response; + c->done_ = done; + c->finished_count_ = 0; + return c; } void ReadCallback(tera::RowReader* reader) { - struct GetContext* context = (struct GetContext*)reader->GetContext(); - const GetRequest* request = context->request_; - GetResponse* response = context->response_; - google::protobuf::Closure* done = context->done_; - ErrorCode status = reader->GetError(); - { - MutexLock (&context->mutex_); - context->finished_count_++; - HttpColumnReader* res = response->add_results(); - if (status.GetType() != tera::ErrorCode::kOK) { - res->set_status(false); - res->set_reason("fail to get record from table: " + status.GetReason()); - } else { - res->set_rowkey(reader->RowName()); - res->set_columnfamily(reader->Family()); - res->set_qualifier(reader->Qualifier()); - res->set_status(true); - res->set_value(reader->Value()); - } - if (context->finished_count_ == request->reader_list_size()) { - write_response_counter.Add(1); - response->set_status(true); - delete context; - done->Run(); - } - delete reader; + struct GetContext* context = (struct GetContext*)reader->GetContext(); + const GetRequest* request = context->request_; + GetResponse* response = context->response_; + google::protobuf::Closure* done = context->done_; + ErrorCode status = reader->GetError(); + { + MutexLock(&context->mutex_); + context->finished_count_++; + HttpColumnReader* res = response->add_results(); + if (status.GetType() != tera::ErrorCode::kOK) { + res->set_status(false); + res->set_reason("fail to get record from table: " + status.GetReason()); + } else { + res->set_rowkey(reader->RowName()); + res->set_columnfamily(reader->Family()); + res->set_qualifier(reader->Qualifier()); + res->set_status(true); + res->set_value(reader->Value()); + } + if (context->finished_count_ == request->reader_list_size()) { + write_response_counter.Add(1); + response->set_status(true); + delete context; + done->Run(); } + delete reader; + } } void WriteCallback(tera::RowMutation* mutation) { - struct PutContext* context = (struct PutContext*)mutation->GetContext(); - const PutRequest* request = context->request_; - PutResponse* response = context->response_; - google::protobuf::Closure* done = context->done_; - ErrorCode status = mutation->GetError(); - { - MutexLock (&context->mutex_); - context->finished_count_++; - HttpRowMutationResult* res = response->add_results(); - res->set_rowkey(mutation->RowKey()); - if (status.GetType() != tera::ErrorCode::kOK) { - res->set_status(false); - res->set_reason("fail to put record to table: " + status.GetReason()); - } else { - res->set_status(true); - } - if (context->finished_count_ == request->mutation_list_size()) { - write_response_counter.Add(1); - response->set_status(true); - delete context; - done->Run(); - } - delete mutation; + struct PutContext* context = (struct PutContext*)mutation->GetContext(); + const PutRequest* request = context->request_; + PutResponse* response = context->response_; + google::protobuf::Closure* done = context->done_; + ErrorCode status = mutation->GetError(); + { + MutexLock(&context->mutex_); + context->finished_count_++; + HttpRowMutationResult* res = response->add_results(); + res->set_rowkey(mutation->RowKey()); + if (status.GetType() != tera::ErrorCode::kOK) { + res->set_status(false); + res->set_reason("fail to put record to table: " + status.GetReason()); + } else { + res->set_status(true); } + if (context->finished_count_ == request->mutation_list_size()) { + write_response_counter.Add(1); + response->set_status(true); + delete context; + done->Run(); + } + delete mutation; + } } void HttpProxyImpl::LogCounter() { - LOG(INFO) << "[write] request: " << write_request_counter.Clear() - << " response: " << write_response_counter.Clear(); - LOG(INFO) << "[read] request: " << read_request_counter.Clear() - << " response: " << read_response_counter.Clear(); - common::ThreadPool::Task callback = - std::bind(&HttpProxyImpl::LogCounter, this); - ctrl_pool_->DelayTask(1000, callback); + LOG(INFO) << "[write] request: " << write_request_counter.Clear() + << " response: " << write_response_counter.Clear(); + LOG(INFO) << "[read] request: " << read_request_counter.Clear() + << " response: " << read_response_counter.Clear(); + common::ThreadPool::Task callback = std::bind(&HttpProxyImpl::LogCounter, this); + ctrl_pool_->DelayTask(1000, callback); } void HttpProxyImpl::DoGet(google::protobuf::RpcController* controller, - const tera::http::GetRequest* request, - tera::http::GetResponse* response, + const tera::http::GetRequest* request, tera::http::GetResponse* response, google::protobuf::Closure* done) { - VLOG(30) << request->ShortDebugString(); - tera::ErrorCode err; - Table* table = NULL; - - // check arguments - StatusCode status; - if (!request->has_tablename()) { - status.SetError("invalid request, expect "); - } - for (int i = 0; i < request->reader_list_size(); i++) { - HttpColumnReader http_column_reader = request->reader_list(i); - if (!http_column_reader.has_rowkey() - || !http_column_reader.has_columnfamily() - || !http_column_reader.has_qualifier()) { - status.SetError("invalid request, expect & & "); - } - } - - // try open table - if ((table = OpenTableWithCache(request->tablename(), &err)) == NULL) { - std::string reason = "fail to open table " + request->tablename() + " : " + err.GetReason(); - status.SetError(reason); - } - - // 如果在检查参数的过程中遇到很多错误,这里选择返回最后一个错误原因给用户, - // 主要是为了保持错误检查代码尽量简洁易读。 - if (!status.Ok()) { - read_response_counter.Add(1); - response->set_reason(status.GetReason()); - response->set_status(false); - done->Run(); - return; - } - - struct GetContext* context = NewGetContext(request, response, done); - for (int i = 0; i < request->reader_list_size(); i++) { - HttpColumnReader http_column_reader = request->reader_list(i); - std::string rowkey = http_column_reader.rowkey(); - tera::RowReader* reader = table->NewRowReader(rowkey); - reader->AddColumn(http_column_reader.columnfamily(), http_column_reader.qualifier()); - reader->SetContext(context); - reader->SetCallBack(ReadCallback); - table->Get(reader); + VLOG(30) << request->ShortDebugString(); + tera::ErrorCode err; + Table* table = NULL; + + // check arguments + StatusCode status; + if (!request->has_tablename()) { + status.SetError("invalid request, expect "); + } + for (int i = 0; i < request->reader_list_size(); i++) { + HttpColumnReader http_column_reader = request->reader_list(i); + if (!http_column_reader.has_rowkey() || !http_column_reader.has_columnfamily() || + !http_column_reader.has_qualifier()) { + status.SetError("invalid request, expect & & "); } + } + + // try open table + if ((table = OpenTableWithCache(request->tablename(), &err)) == NULL) { + std::string reason = "fail to open table " + request->tablename() + " : " + err.GetReason(); + status.SetError(reason); + } + + // 如果在检查参数的过程中遇到很多错误,这里选择返回最后一个错误原因给用户, + // 主要是为了保持错误检查代码尽量简洁易读。 + if (!status.Ok()) { + read_response_counter.Add(1); + response->set_reason(status.GetReason()); + response->set_status(false); + done->Run(); + return; + } + + struct GetContext* context = NewGetContext(request, response, done); + for (int i = 0; i < request->reader_list_size(); i++) { + HttpColumnReader http_column_reader = request->reader_list(i); + std::string rowkey = http_column_reader.rowkey(); + tera::RowReader* reader = table->NewRowReader(rowkey); + reader->AddColumn(http_column_reader.columnfamily(), http_column_reader.qualifier()); + reader->SetContext(context); + reader->SetCallBack(ReadCallback); + table->Get(reader); + } } void HttpProxyImpl::DoPut(google::protobuf::RpcController* controller, - const tera::http::PutRequest* request, - tera::http::PutResponse* response, + const tera::http::PutRequest* request, tera::http::PutResponse* response, google::protobuf::Closure* done) { - VLOG(30) << request->ShortDebugString(); - tera::ErrorCode err; - Table* table = NULL; - - // check arguments - StatusCode status; - if (!request->has_tablename()) { - status.SetError("invalid request, expect "); - } - for (int i = 0; i < request->mutation_list_size(); i++) { - HttpRowMutation http_row_mutation = request->mutation_list(i); - if (!http_row_mutation.has_rowkey()) { - status.SetError("invalid request, expect "); - } - std::string type = http_row_mutation.type(); - if (type != "put" && type != "del-col" && type != "del-row") { - status.SetError("invalid request, operation:put/del-col/del-row"); - } + VLOG(30) << request->ShortDebugString(); + tera::ErrorCode err; + Table* table = NULL; + + // check arguments + StatusCode status; + if (!request->has_tablename()) { + status.SetError("invalid request, expect "); + } + for (int i = 0; i < request->mutation_list_size(); i++) { + HttpRowMutation http_row_mutation = request->mutation_list(i); + if (!http_row_mutation.has_rowkey()) { + status.SetError("invalid request, expect "); } - - // try open table - if ((table = OpenTableWithCache(request->tablename(), &err)) == NULL) { - std::string reason = "fail to open table " + request->tablename() + " : " + err.GetReason(); - status.SetError(reason); + std::string type = http_row_mutation.type(); + if (type != "put" && type != "del-col" && type != "del-row") { + status.SetError("invalid request, operation:put/del-col/del-row"); } - - // 如果在检查参数的过程中遇到很多错误,这里选择返回最后一个错误原因给用户, - // 主要是为了保持错误检查代码尽量简洁易读。 - if (!status.Ok()) { - write_response_counter.Add(1); - response->set_reason(status.GetReason()); - response->set_status(false); - done->Run(); - return; - } - - struct PutContext* context = NewPutContext(request, response, done); - for (int i = 0; i < request->mutation_list_size(); i++) { - HttpRowMutation http_row_mutation = request->mutation_list(i); - std::string rowkey = http_row_mutation.rowkey(); - tera::RowMutation* mutation = table->NewRowMutation(rowkey); - for (int k = 0; k < http_row_mutation.columns_size(); k++) { - MutationColumns col = http_row_mutation.columns(k); - if (http_row_mutation.type() == "put") { - mutation->Put(col.columnfamily(), col.qualifier(), col.value()); - } else if (http_row_mutation.type() == "del-col") { - mutation->DeleteColumns(col.columnfamily(), col.qualifier()); - } else if (http_row_mutation.type() == "del-row") { - mutation->DeleteRow(); - } else { - abort(); // should checked at the start - } - } - mutation->SetContext(context); - mutation->SetCallBack(WriteCallback); - table->ApplyMutation(mutation); + } + + // try open table + if ((table = OpenTableWithCache(request->tablename(), &err)) == NULL) { + std::string reason = "fail to open table " + request->tablename() + " : " + err.GetReason(); + status.SetError(reason); + } + + // 如果在检查参数的过程中遇到很多错误,这里选择返回最后一个错误原因给用户, + // 主要是为了保持错误检查代码尽量简洁易读。 + if (!status.Ok()) { + write_response_counter.Add(1); + response->set_reason(status.GetReason()); + response->set_status(false); + done->Run(); + return; + } + + struct PutContext* context = NewPutContext(request, response, done); + for (int i = 0; i < request->mutation_list_size(); i++) { + HttpRowMutation http_row_mutation = request->mutation_list(i); + std::string rowkey = http_row_mutation.rowkey(); + tera::RowMutation* mutation = table->NewRowMutation(rowkey); + for (int k = 0; k < http_row_mutation.columns_size(); k++) { + MutationColumns col = http_row_mutation.columns(k); + if (http_row_mutation.type() == "put") { + mutation->Put(col.columnfamily(), col.qualifier(), col.value()); + } else if (http_row_mutation.type() == "del-col") { + mutation->DeleteColumns(col.columnfamily(), col.qualifier()); + } else if (http_row_mutation.type() == "del-row") { + mutation->DeleteRow(); + } else { + abort(); // should checked at the start + } } + mutation->SetContext(context); + mutation->SetCallBack(WriteCallback); + table->ApplyMutation(mutation); + } } bool InitRPCService(const std::string& ip_port, const std::string& confpath) { - // 定义RpcServer - sofa::pbrpc::RpcServerOptions options; - options.work_thread_num = 8; - sofa::pbrpc::RpcServer rpc_server(options); - - // 启动RpcServer - if (!rpc_server.Start(ip_port)) { - LOG(ERROR) << "start server failed"; - return false; - } - - // 创建和注册服务 - tera::http::HttpProxy* http_service = new HttpProxyImpl(confpath); - if (!rpc_server.RegisterService(http_service)) { - LOG(ERROR) << "register service failed"; - return false; - } - - // 等待SIGINT/SIGTERM退出信号 - rpc_server.Run(); - - // 停止Server - rpc_server.Stop(); - return true; + // 定义RpcServer + sofa::pbrpc::RpcServerOptions options; + options.work_thread_num = 8; + sofa::pbrpc::RpcServer rpc_server(options); + + // 启动RpcServer + if (!rpc_server.Start(ip_port)) { + LOG(ERROR) << "start server failed"; + return false; + } + + // 创建和注册服务 + tera::http::HttpProxy* http_service = new HttpProxyImpl(confpath); + if (!rpc_server.RegisterService(http_service)) { + LOG(ERROR) << "register service failed"; + return false; + } + + // 等待SIGINT/SIGTERM退出信号 + rpc_server.Run(); + + // 停止Server + rpc_server.Stop(); + return true; } -} // namespace http -} // namespace tera +} // namespace http +} // namespace tera -int main(int argc, char** argv) -{ - ::google::ParseCommandLineFlags(&argc, &argv, true); - tera::http::InitRPCService("0.0.0.0:" + FLAGS_tera_http_port, FLAGS_flagfile); - return 0; +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + tera::http::InitRPCService("0.0.0.0:" + FLAGS_tera_http_port, FLAGS_flagfile); + return 0; } diff --git a/src/sdk/java/native-src/jni.h b/src/sdk/java/native-src/jni.h index 1476cd465..6072f8d60 100644 --- a/src/sdk/java/native-src/jni.h +++ b/src/sdk/java/native-src/jni.h @@ -38,13 +38,13 @@ extern "C" { #ifndef JNI_TYPES_ALREADY_DEFINED_IN_JNI_MD_H -typedef unsigned char jboolean; -typedef unsigned short jchar; -typedef short jshort; -typedef float jfloat; -typedef double jdouble; +typedef unsigned char jboolean; +typedef unsigned short jchar; +typedef short jshort; +typedef float jfloat; +typedef double jdouble; -typedef jint jsize; +typedef jint jsize; #ifdef __cplusplus @@ -102,15 +102,15 @@ typedef jarray jobjectArray; typedef jobject jweak; typedef union jvalue { - jboolean z; - jbyte b; - jchar c; - jshort s; - jint i; - jlong j; - jfloat f; - jdouble d; - jobject l; + jboolean z; + jbyte b; + jchar c; + jshort s; + jint i; + jlong j; + jfloat f; + jdouble d; + jobject l; } jvalue; struct _jfieldID; @@ -121,13 +121,12 @@ typedef struct _jmethodID *jmethodID; /* Return values from jobjectRefType */ typedef enum _jobjectType { - JNIInvalidRefType = 0, - JNILocalRefType = 1, - JNIGlobalRefType = 2, - JNIWeakGlobalRefType = 3 + JNIInvalidRefType = 0, + JNILocalRefType = 1, + JNIGlobalRefType = 2, + JNIWeakGlobalRefType = 3 } jobjectRefType; - #endif /* JNI_TYPES_ALREADY_DEFINED_IN_JNI_MD_H */ /* @@ -141,13 +140,13 @@ typedef enum _jobjectType { * possible return values for JNI functions. */ -#define JNI_OK 0 /* success */ -#define JNI_ERR (-1) /* unknown error */ -#define JNI_EDETACHED (-2) /* thread detached from the VM */ -#define JNI_EVERSION (-3) /* JNI version error */ -#define JNI_ENOMEM (-4) /* not enough memory */ -#define JNI_EEXIST (-5) /* VM already created */ -#define JNI_EINVAL (-6) /* invalid arguments */ +#define JNI_OK 0 /* success */ +#define JNI_ERR (-1) /* unknown error */ +#define JNI_EDETACHED (-2) /* thread detached from the VM */ +#define JNI_EVERSION (-3) /* JNI version error */ +#define JNI_ENOMEM (-4) /* not enough memory */ +#define JNI_EEXIST (-5) /* VM already created */ +#define JNI_EINVAL (-6) /* invalid arguments */ /* * used in ReleaseScalarArrayElements @@ -162,9 +161,9 @@ typedef enum _jobjectType { */ typedef struct { - char *name; - char *signature; - void *fnPtr; + char *name; + char *signature; + void *fnPtr; } JNINativeMethod; /* @@ -196,559 +195,384 @@ typedef const struct JNIInvokeInterface_ *JavaVM; #endif struct JNINativeInterface_ { - void *reserved0; - void *reserved1; - void *reserved2; - - void *reserved3; - jint (JNICALL *GetVersion)(JNIEnv *env); - - jclass (JNICALL *DefineClass) - (JNIEnv *env, const char *name, jobject loader, const jbyte *buf, - jsize len); - jclass (JNICALL *FindClass) - (JNIEnv *env, const char *name); - - jmethodID (JNICALL *FromReflectedMethod) - (JNIEnv *env, jobject method); - jfieldID (JNICALL *FromReflectedField) - (JNIEnv *env, jobject field); - - jobject (JNICALL *ToReflectedMethod) - (JNIEnv *env, jclass cls, jmethodID methodID, jboolean isStatic); - - jclass (JNICALL *GetSuperclass) - (JNIEnv *env, jclass sub); - jboolean (JNICALL *IsAssignableFrom) - (JNIEnv *env, jclass sub, jclass sup); - - jobject (JNICALL *ToReflectedField) - (JNIEnv *env, jclass cls, jfieldID fieldID, jboolean isStatic); - - jint (JNICALL *Throw) - (JNIEnv *env, jthrowable obj); - jint (JNICALL *ThrowNew) - (JNIEnv *env, jclass clazz, const char *msg); - jthrowable (JNICALL *ExceptionOccurred) - (JNIEnv *env); - void (JNICALL *ExceptionDescribe) - (JNIEnv *env); - void (JNICALL *ExceptionClear) - (JNIEnv *env); - void (JNICALL *FatalError) - (JNIEnv *env, const char *msg); - - jint (JNICALL *PushLocalFrame) - (JNIEnv *env, jint capacity); - jobject (JNICALL *PopLocalFrame) - (JNIEnv *env, jobject result); - - jobject (JNICALL *NewGlobalRef) - (JNIEnv *env, jobject lobj); - void (JNICALL *DeleteGlobalRef) - (JNIEnv *env, jobject gref); - void (JNICALL *DeleteLocalRef) - (JNIEnv *env, jobject obj); - jboolean (JNICALL *IsSameObject) - (JNIEnv *env, jobject obj1, jobject obj2); - jobject (JNICALL *NewLocalRef) - (JNIEnv *env, jobject ref); - jint (JNICALL *EnsureLocalCapacity) - (JNIEnv *env, jint capacity); - - jobject (JNICALL *AllocObject) - (JNIEnv *env, jclass clazz); - jobject (JNICALL *NewObject) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jobject (JNICALL *NewObjectV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jobject (JNICALL *NewObjectA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jclass (JNICALL *GetObjectClass) - (JNIEnv *env, jobject obj); - jboolean (JNICALL *IsInstanceOf) - (JNIEnv *env, jobject obj, jclass clazz); - - jmethodID (JNICALL *GetMethodID) - (JNIEnv *env, jclass clazz, const char *name, const char *sig); - - jobject (JNICALL *CallObjectMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jobject (JNICALL *CallObjectMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jobject (JNICALL *CallObjectMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue * args); - - jboolean (JNICALL *CallBooleanMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jboolean (JNICALL *CallBooleanMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jboolean (JNICALL *CallBooleanMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue * args); - - jbyte (JNICALL *CallByteMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jbyte (JNICALL *CallByteMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jbyte (JNICALL *CallByteMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jchar (JNICALL *CallCharMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jchar (JNICALL *CallCharMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jchar (JNICALL *CallCharMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jshort (JNICALL *CallShortMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jshort (JNICALL *CallShortMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jshort (JNICALL *CallShortMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jint (JNICALL *CallIntMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jint (JNICALL *CallIntMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jint (JNICALL *CallIntMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jlong (JNICALL *CallLongMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jlong (JNICALL *CallLongMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jlong (JNICALL *CallLongMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jfloat (JNICALL *CallFloatMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jfloat (JNICALL *CallFloatMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jfloat (JNICALL *CallFloatMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - jdouble (JNICALL *CallDoubleMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - jdouble (JNICALL *CallDoubleMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - jdouble (JNICALL *CallDoubleMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); - - void (JNICALL *CallVoidMethod) - (JNIEnv *env, jobject obj, jmethodID methodID, ...); - void (JNICALL *CallVoidMethodV) - (JNIEnv *env, jobject obj, jmethodID methodID, va_list args); - void (JNICALL *CallVoidMethodA) - (JNIEnv *env, jobject obj, jmethodID methodID, const jvalue * args); - - jobject (JNICALL *CallNonvirtualObjectMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jobject (JNICALL *CallNonvirtualObjectMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jobject (JNICALL *CallNonvirtualObjectMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue * args); - - jboolean (JNICALL *CallNonvirtualBooleanMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jboolean (JNICALL *CallNonvirtualBooleanMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jboolean (JNICALL *CallNonvirtualBooleanMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue * args); - - jbyte (JNICALL *CallNonvirtualByteMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jbyte (JNICALL *CallNonvirtualByteMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jbyte (JNICALL *CallNonvirtualByteMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jchar (JNICALL *CallNonvirtualCharMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jchar (JNICALL *CallNonvirtualCharMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jchar (JNICALL *CallNonvirtualCharMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jshort (JNICALL *CallNonvirtualShortMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jshort (JNICALL *CallNonvirtualShortMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jshort (JNICALL *CallNonvirtualShortMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jint (JNICALL *CallNonvirtualIntMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jint (JNICALL *CallNonvirtualIntMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jint (JNICALL *CallNonvirtualIntMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jlong (JNICALL *CallNonvirtualLongMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jlong (JNICALL *CallNonvirtualLongMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jlong (JNICALL *CallNonvirtualLongMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jfloat (JNICALL *CallNonvirtualFloatMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jfloat (JNICALL *CallNonvirtualFloatMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jfloat (JNICALL *CallNonvirtualFloatMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - jdouble (JNICALL *CallNonvirtualDoubleMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - jdouble (JNICALL *CallNonvirtualDoubleMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - jdouble (JNICALL *CallNonvirtualDoubleMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue *args); - - void (JNICALL *CallNonvirtualVoidMethod) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, ...); - void (JNICALL *CallNonvirtualVoidMethodV) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - va_list args); - void (JNICALL *CallNonvirtualVoidMethodA) - (JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, - const jvalue * args); - - jfieldID (JNICALL *GetFieldID) - (JNIEnv *env, jclass clazz, const char *name, const char *sig); - - jobject (JNICALL *GetObjectField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jboolean (JNICALL *GetBooleanField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jbyte (JNICALL *GetByteField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jchar (JNICALL *GetCharField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jshort (JNICALL *GetShortField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jint (JNICALL *GetIntField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jlong (JNICALL *GetLongField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jfloat (JNICALL *GetFloatField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - jdouble (JNICALL *GetDoubleField) - (JNIEnv *env, jobject obj, jfieldID fieldID); - - void (JNICALL *SetObjectField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jobject val); - void (JNICALL *SetBooleanField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jboolean val); - void (JNICALL *SetByteField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jbyte val); - void (JNICALL *SetCharField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jchar val); - void (JNICALL *SetShortField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jshort val); - void (JNICALL *SetIntField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jint val); - void (JNICALL *SetLongField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jlong val); - void (JNICALL *SetFloatField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jfloat val); - void (JNICALL *SetDoubleField) - (JNIEnv *env, jobject obj, jfieldID fieldID, jdouble val); - - jmethodID (JNICALL *GetStaticMethodID) - (JNIEnv *env, jclass clazz, const char *name, const char *sig); - - jobject (JNICALL *CallStaticObjectMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jobject (JNICALL *CallStaticObjectMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jobject (JNICALL *CallStaticObjectMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jboolean (JNICALL *CallStaticBooleanMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jboolean (JNICALL *CallStaticBooleanMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jboolean (JNICALL *CallStaticBooleanMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jbyte (JNICALL *CallStaticByteMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jbyte (JNICALL *CallStaticByteMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jbyte (JNICALL *CallStaticByteMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jchar (JNICALL *CallStaticCharMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jchar (JNICALL *CallStaticCharMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jchar (JNICALL *CallStaticCharMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jshort (JNICALL *CallStaticShortMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jshort (JNICALL *CallStaticShortMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jshort (JNICALL *CallStaticShortMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jint (JNICALL *CallStaticIntMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jint (JNICALL *CallStaticIntMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jint (JNICALL *CallStaticIntMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jlong (JNICALL *CallStaticLongMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jlong (JNICALL *CallStaticLongMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jlong (JNICALL *CallStaticLongMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jfloat (JNICALL *CallStaticFloatMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jfloat (JNICALL *CallStaticFloatMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jfloat (JNICALL *CallStaticFloatMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - jdouble (JNICALL *CallStaticDoubleMethod) - (JNIEnv *env, jclass clazz, jmethodID methodID, ...); - jdouble (JNICALL *CallStaticDoubleMethodV) - (JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); - jdouble (JNICALL *CallStaticDoubleMethodA) - (JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); - - void (JNICALL *CallStaticVoidMethod) - (JNIEnv *env, jclass cls, jmethodID methodID, ...); - void (JNICALL *CallStaticVoidMethodV) - (JNIEnv *env, jclass cls, jmethodID methodID, va_list args); - void (JNICALL *CallStaticVoidMethodA) - (JNIEnv *env, jclass cls, jmethodID methodID, const jvalue * args); - - jfieldID (JNICALL *GetStaticFieldID) - (JNIEnv *env, jclass clazz, const char *name, const char *sig); - jobject (JNICALL *GetStaticObjectField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jboolean (JNICALL *GetStaticBooleanField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jbyte (JNICALL *GetStaticByteField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jchar (JNICALL *GetStaticCharField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jshort (JNICALL *GetStaticShortField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jint (JNICALL *GetStaticIntField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jlong (JNICALL *GetStaticLongField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jfloat (JNICALL *GetStaticFloatField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - jdouble (JNICALL *GetStaticDoubleField) - (JNIEnv *env, jclass clazz, jfieldID fieldID); - - void (JNICALL *SetStaticObjectField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jobject value); - void (JNICALL *SetStaticBooleanField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jboolean value); - void (JNICALL *SetStaticByteField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jbyte value); - void (JNICALL *SetStaticCharField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jchar value); - void (JNICALL *SetStaticShortField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jshort value); - void (JNICALL *SetStaticIntField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jint value); - void (JNICALL *SetStaticLongField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jlong value); - void (JNICALL *SetStaticFloatField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jfloat value); - void (JNICALL *SetStaticDoubleField) - (JNIEnv *env, jclass clazz, jfieldID fieldID, jdouble value); - - jstring (JNICALL *NewString) - (JNIEnv *env, const jchar *unicode, jsize len); - jsize (JNICALL *GetStringLength) - (JNIEnv *env, jstring str); - const jchar *(JNICALL *GetStringChars) - (JNIEnv *env, jstring str, jboolean *isCopy); - void (JNICALL *ReleaseStringChars) - (JNIEnv *env, jstring str, const jchar *chars); - - jstring (JNICALL *NewStringUTF) - (JNIEnv *env, const char *utf); - jsize (JNICALL *GetStringUTFLength) - (JNIEnv *env, jstring str); - const char* (JNICALL *GetStringUTFChars) - (JNIEnv *env, jstring str, jboolean *isCopy); - void (JNICALL *ReleaseStringUTFChars) - (JNIEnv *env, jstring str, const char* chars); - - - jsize (JNICALL *GetArrayLength) - (JNIEnv *env, jarray array); - - jobjectArray (JNICALL *NewObjectArray) - (JNIEnv *env, jsize len, jclass clazz, jobject init); - jobject (JNICALL *GetObjectArrayElement) - (JNIEnv *env, jobjectArray array, jsize index); - void (JNICALL *SetObjectArrayElement) - (JNIEnv *env, jobjectArray array, jsize index, jobject val); - - jbooleanArray (JNICALL *NewBooleanArray) - (JNIEnv *env, jsize len); - jbyteArray (JNICALL *NewByteArray) - (JNIEnv *env, jsize len); - jcharArray (JNICALL *NewCharArray) - (JNIEnv *env, jsize len); - jshortArray (JNICALL *NewShortArray) - (JNIEnv *env, jsize len); - jintArray (JNICALL *NewIntArray) - (JNIEnv *env, jsize len); - jlongArray (JNICALL *NewLongArray) - (JNIEnv *env, jsize len); - jfloatArray (JNICALL *NewFloatArray) - (JNIEnv *env, jsize len); - jdoubleArray (JNICALL *NewDoubleArray) - (JNIEnv *env, jsize len); - - jboolean * (JNICALL *GetBooleanArrayElements) - (JNIEnv *env, jbooleanArray array, jboolean *isCopy); - jbyte * (JNICALL *GetByteArrayElements) - (JNIEnv *env, jbyteArray array, jboolean *isCopy); - jchar * (JNICALL *GetCharArrayElements) - (JNIEnv *env, jcharArray array, jboolean *isCopy); - jshort * (JNICALL *GetShortArrayElements) - (JNIEnv *env, jshortArray array, jboolean *isCopy); - jint * (JNICALL *GetIntArrayElements) - (JNIEnv *env, jintArray array, jboolean *isCopy); - jlong * (JNICALL *GetLongArrayElements) - (JNIEnv *env, jlongArray array, jboolean *isCopy); - jfloat * (JNICALL *GetFloatArrayElements) - (JNIEnv *env, jfloatArray array, jboolean *isCopy); - jdouble * (JNICALL *GetDoubleArrayElements) - (JNIEnv *env, jdoubleArray array, jboolean *isCopy); - - void (JNICALL *ReleaseBooleanArrayElements) - (JNIEnv *env, jbooleanArray array, jboolean *elems, jint mode); - void (JNICALL *ReleaseByteArrayElements) - (JNIEnv *env, jbyteArray array, jbyte *elems, jint mode); - void (JNICALL *ReleaseCharArrayElements) - (JNIEnv *env, jcharArray array, jchar *elems, jint mode); - void (JNICALL *ReleaseShortArrayElements) - (JNIEnv *env, jshortArray array, jshort *elems, jint mode); - void (JNICALL *ReleaseIntArrayElements) - (JNIEnv *env, jintArray array, jint *elems, jint mode); - void (JNICALL *ReleaseLongArrayElements) - (JNIEnv *env, jlongArray array, jlong *elems, jint mode); - void (JNICALL *ReleaseFloatArrayElements) - (JNIEnv *env, jfloatArray array, jfloat *elems, jint mode); - void (JNICALL *ReleaseDoubleArrayElements) - (JNIEnv *env, jdoubleArray array, jdouble *elems, jint mode); - - void (JNICALL *GetBooleanArrayRegion) - (JNIEnv *env, jbooleanArray array, jsize start, jsize l, jboolean *buf); - void (JNICALL *GetByteArrayRegion) - (JNIEnv *env, jbyteArray array, jsize start, jsize len, jbyte *buf); - void (JNICALL *GetCharArrayRegion) - (JNIEnv *env, jcharArray array, jsize start, jsize len, jchar *buf); - void (JNICALL *GetShortArrayRegion) - (JNIEnv *env, jshortArray array, jsize start, jsize len, jshort *buf); - void (JNICALL *GetIntArrayRegion) - (JNIEnv *env, jintArray array, jsize start, jsize len, jint *buf); - void (JNICALL *GetLongArrayRegion) - (JNIEnv *env, jlongArray array, jsize start, jsize len, jlong *buf); - void (JNICALL *GetFloatArrayRegion) - (JNIEnv *env, jfloatArray array, jsize start, jsize len, jfloat *buf); - void (JNICALL *GetDoubleArrayRegion) - (JNIEnv *env, jdoubleArray array, jsize start, jsize len, jdouble *buf); - - void (JNICALL *SetBooleanArrayRegion) - (JNIEnv *env, jbooleanArray array, jsize start, jsize l, const jboolean *buf); - void (JNICALL *SetByteArrayRegion) - (JNIEnv *env, jbyteArray array, jsize start, jsize len, const jbyte *buf); - void (JNICALL *SetCharArrayRegion) - (JNIEnv *env, jcharArray array, jsize start, jsize len, const jchar *buf); - void (JNICALL *SetShortArrayRegion) - (JNIEnv *env, jshortArray array, jsize start, jsize len, const jshort *buf); - void (JNICALL *SetIntArrayRegion) - (JNIEnv *env, jintArray array, jsize start, jsize len, const jint *buf); - void (JNICALL *SetLongArrayRegion) - (JNIEnv *env, jlongArray array, jsize start, jsize len, const jlong *buf); - void (JNICALL *SetFloatArrayRegion) - (JNIEnv *env, jfloatArray array, jsize start, jsize len, const jfloat *buf); - void (JNICALL *SetDoubleArrayRegion) - (JNIEnv *env, jdoubleArray array, jsize start, jsize len, const jdouble *buf); - - jint (JNICALL *RegisterNatives) - (JNIEnv *env, jclass clazz, const JNINativeMethod *methods, - jint nMethods); - jint (JNICALL *UnregisterNatives) - (JNIEnv *env, jclass clazz); - - jint (JNICALL *MonitorEnter) - (JNIEnv *env, jobject obj); - jint (JNICALL *MonitorExit) - (JNIEnv *env, jobject obj); - - jint (JNICALL *GetJavaVM) - (JNIEnv *env, JavaVM **vm); - - void (JNICALL *GetStringRegion) - (JNIEnv *env, jstring str, jsize start, jsize len, jchar *buf); - void (JNICALL *GetStringUTFRegion) - (JNIEnv *env, jstring str, jsize start, jsize len, char *buf); - - void * (JNICALL *GetPrimitiveArrayCritical) - (JNIEnv *env, jarray array, jboolean *isCopy); - void (JNICALL *ReleasePrimitiveArrayCritical) - (JNIEnv *env, jarray array, void *carray, jint mode); - - const jchar * (JNICALL *GetStringCritical) - (JNIEnv *env, jstring string, jboolean *isCopy); - void (JNICALL *ReleaseStringCritical) - (JNIEnv *env, jstring string, const jchar *cstring); - - jweak (JNICALL *NewWeakGlobalRef) - (JNIEnv *env, jobject obj); - void (JNICALL *DeleteWeakGlobalRef) - (JNIEnv *env, jweak ref); - - jboolean (JNICALL *ExceptionCheck) - (JNIEnv *env); - - jobject (JNICALL *NewDirectByteBuffer) - (JNIEnv* env, void* address, jlong capacity); - void* (JNICALL *GetDirectBufferAddress) - (JNIEnv* env, jobject buf); - jlong (JNICALL *GetDirectBufferCapacity) - (JNIEnv* env, jobject buf); - - /* New JNI 1.6 Features */ - - jobjectRefType (JNICALL *GetObjectRefType) - (JNIEnv* env, jobject obj); + void *reserved0; + void *reserved1; + void *reserved2; + + void *reserved3; + jint(JNICALL *GetVersion)(JNIEnv *env); + + jclass(JNICALL *DefineClass)(JNIEnv *env, const char *name, jobject loader, const jbyte *buf, + jsize len); + jclass(JNICALL *FindClass)(JNIEnv *env, const char *name); + + jmethodID(JNICALL *FromReflectedMethod)(JNIEnv *env, jobject method); + jfieldID(JNICALL *FromReflectedField)(JNIEnv *env, jobject field); + + jobject(JNICALL *ToReflectedMethod)(JNIEnv *env, jclass cls, jmethodID methodID, + jboolean isStatic); + + jclass(JNICALL *GetSuperclass)(JNIEnv *env, jclass sub); + jboolean(JNICALL *IsAssignableFrom)(JNIEnv *env, jclass sub, jclass sup); + + jobject(JNICALL *ToReflectedField)(JNIEnv *env, jclass cls, jfieldID fieldID, jboolean isStatic); + + jint(JNICALL *Throw)(JNIEnv *env, jthrowable obj); + jint(JNICALL *ThrowNew)(JNIEnv *env, jclass clazz, const char *msg); + jthrowable(JNICALL *ExceptionOccurred)(JNIEnv *env); + void(JNICALL *ExceptionDescribe)(JNIEnv *env); + void(JNICALL *ExceptionClear)(JNIEnv *env); + void(JNICALL *FatalError)(JNIEnv *env, const char *msg); + + jint(JNICALL *PushLocalFrame)(JNIEnv *env, jint capacity); + jobject(JNICALL *PopLocalFrame)(JNIEnv *env, jobject result); + + jobject(JNICALL *NewGlobalRef)(JNIEnv *env, jobject lobj); + void(JNICALL *DeleteGlobalRef)(JNIEnv *env, jobject gref); + void(JNICALL *DeleteLocalRef)(JNIEnv *env, jobject obj); + jboolean(JNICALL *IsSameObject)(JNIEnv *env, jobject obj1, jobject obj2); + jobject(JNICALL *NewLocalRef)(JNIEnv *env, jobject ref); + jint(JNICALL *EnsureLocalCapacity)(JNIEnv *env, jint capacity); + + jobject(JNICALL *AllocObject)(JNIEnv *env, jclass clazz); + jobject(JNICALL *NewObject)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jobject(JNICALL *NewObjectV)(JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); + jobject(JNICALL *NewObjectA)(JNIEnv *env, jclass clazz, jmethodID methodID, const jvalue *args); + + jclass(JNICALL *GetObjectClass)(JNIEnv *env, jobject obj); + jboolean(JNICALL *IsInstanceOf)(JNIEnv *env, jobject obj, jclass clazz); + + jmethodID(JNICALL *GetMethodID)(JNIEnv *env, jclass clazz, const char *name, const char *sig); + + jobject(JNICALL *CallObjectMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jobject(JNICALL *CallObjectMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jobject(JNICALL *CallObjectMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, + const jvalue *args); + + jboolean(JNICALL *CallBooleanMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jboolean(JNICALL *CallBooleanMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jboolean(JNICALL *CallBooleanMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, + const jvalue *args); + + jbyte(JNICALL *CallByteMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jbyte(JNICALL *CallByteMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jbyte(JNICALL *CallByteMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); + + jchar(JNICALL *CallCharMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jchar(JNICALL *CallCharMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jchar(JNICALL *CallCharMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); + + jshort(JNICALL *CallShortMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jshort(JNICALL *CallShortMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jshort(JNICALL *CallShortMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, + const jvalue *args); + + jint(JNICALL *CallIntMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jint(JNICALL *CallIntMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jint(JNICALL *CallIntMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); + + jlong(JNICALL *CallLongMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jlong(JNICALL *CallLongMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jlong(JNICALL *CallLongMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); + + jfloat(JNICALL *CallFloatMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jfloat(JNICALL *CallFloatMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jfloat(JNICALL *CallFloatMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, + const jvalue *args); + + jdouble(JNICALL *CallDoubleMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + jdouble(JNICALL *CallDoubleMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + jdouble(JNICALL *CallDoubleMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, + const jvalue *args); + + void(JNICALL *CallVoidMethod)(JNIEnv *env, jobject obj, jmethodID methodID, ...); + void(JNICALL *CallVoidMethodV)(JNIEnv *env, jobject obj, jmethodID methodID, va_list args); + void(JNICALL *CallVoidMethodA)(JNIEnv *env, jobject obj, jmethodID methodID, const jvalue *args); + + jobject(JNICALL *CallNonvirtualObjectMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jobject(JNICALL *CallNonvirtualObjectMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jobject(JNICALL *CallNonvirtualObjectMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jboolean(JNICALL *CallNonvirtualBooleanMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jboolean(JNICALL *CallNonvirtualBooleanMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jboolean(JNICALL *CallNonvirtualBooleanMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jbyte(JNICALL *CallNonvirtualByteMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jbyte(JNICALL *CallNonvirtualByteMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jbyte(JNICALL *CallNonvirtualByteMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jchar(JNICALL *CallNonvirtualCharMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jchar(JNICALL *CallNonvirtualCharMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jchar(JNICALL *CallNonvirtualCharMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jshort(JNICALL *CallNonvirtualShortMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jshort(JNICALL *CallNonvirtualShortMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jshort(JNICALL *CallNonvirtualShortMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jint(JNICALL *CallNonvirtualIntMethod)(JNIEnv *env, jobject obj, jclass clazz, jmethodID methodID, + ...); + jint(JNICALL *CallNonvirtualIntMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jint(JNICALL *CallNonvirtualIntMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jlong(JNICALL *CallNonvirtualLongMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jlong(JNICALL *CallNonvirtualLongMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jlong(JNICALL *CallNonvirtualLongMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jfloat(JNICALL *CallNonvirtualFloatMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jfloat(JNICALL *CallNonvirtualFloatMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jfloat(JNICALL *CallNonvirtualFloatMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jdouble(JNICALL *CallNonvirtualDoubleMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + jdouble(JNICALL *CallNonvirtualDoubleMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + jdouble(JNICALL *CallNonvirtualDoubleMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + void(JNICALL *CallNonvirtualVoidMethod)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, ...); + void(JNICALL *CallNonvirtualVoidMethodV)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, va_list args); + void(JNICALL *CallNonvirtualVoidMethodA)(JNIEnv *env, jobject obj, jclass clazz, + jmethodID methodID, const jvalue *args); + + jfieldID(JNICALL *GetFieldID)(JNIEnv *env, jclass clazz, const char *name, const char *sig); + + jobject(JNICALL *GetObjectField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jboolean(JNICALL *GetBooleanField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jbyte(JNICALL *GetByteField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jchar(JNICALL *GetCharField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jshort(JNICALL *GetShortField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jint(JNICALL *GetIntField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jlong(JNICALL *GetLongField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jfloat(JNICALL *GetFloatField)(JNIEnv *env, jobject obj, jfieldID fieldID); + jdouble(JNICALL *GetDoubleField)(JNIEnv *env, jobject obj, jfieldID fieldID); + + void(JNICALL *SetObjectField)(JNIEnv *env, jobject obj, jfieldID fieldID, jobject val); + void(JNICALL *SetBooleanField)(JNIEnv *env, jobject obj, jfieldID fieldID, jboolean val); + void(JNICALL *SetByteField)(JNIEnv *env, jobject obj, jfieldID fieldID, jbyte val); + void(JNICALL *SetCharField)(JNIEnv *env, jobject obj, jfieldID fieldID, jchar val); + void(JNICALL *SetShortField)(JNIEnv *env, jobject obj, jfieldID fieldID, jshort val); + void(JNICALL *SetIntField)(JNIEnv *env, jobject obj, jfieldID fieldID, jint val); + void(JNICALL *SetLongField)(JNIEnv *env, jobject obj, jfieldID fieldID, jlong val); + void(JNICALL *SetFloatField)(JNIEnv *env, jobject obj, jfieldID fieldID, jfloat val); + void(JNICALL *SetDoubleField)(JNIEnv *env, jobject obj, jfieldID fieldID, jdouble val); + + jmethodID(JNICALL *GetStaticMethodID)(JNIEnv *env, jclass clazz, const char *name, + const char *sig); + + jobject(JNICALL *CallStaticObjectMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jobject(JNICALL *CallStaticObjectMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jobject(JNICALL *CallStaticObjectMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jboolean(JNICALL *CallStaticBooleanMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jboolean(JNICALL *CallStaticBooleanMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jboolean(JNICALL *CallStaticBooleanMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jbyte(JNICALL *CallStaticByteMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jbyte(JNICALL *CallStaticByteMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jbyte(JNICALL *CallStaticByteMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jchar(JNICALL *CallStaticCharMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jchar(JNICALL *CallStaticCharMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jchar(JNICALL *CallStaticCharMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jshort(JNICALL *CallStaticShortMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jshort(JNICALL *CallStaticShortMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jshort(JNICALL *CallStaticShortMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jint(JNICALL *CallStaticIntMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jint(JNICALL *CallStaticIntMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, va_list args); + jint(JNICALL *CallStaticIntMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jlong(JNICALL *CallStaticLongMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jlong(JNICALL *CallStaticLongMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jlong(JNICALL *CallStaticLongMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jfloat(JNICALL *CallStaticFloatMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jfloat(JNICALL *CallStaticFloatMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jfloat(JNICALL *CallStaticFloatMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + jdouble(JNICALL *CallStaticDoubleMethod)(JNIEnv *env, jclass clazz, jmethodID methodID, ...); + jdouble(JNICALL *CallStaticDoubleMethodV)(JNIEnv *env, jclass clazz, jmethodID methodID, + va_list args); + jdouble(JNICALL *CallStaticDoubleMethodA)(JNIEnv *env, jclass clazz, jmethodID methodID, + const jvalue *args); + + void(JNICALL *CallStaticVoidMethod)(JNIEnv *env, jclass cls, jmethodID methodID, ...); + void(JNICALL *CallStaticVoidMethodV)(JNIEnv *env, jclass cls, jmethodID methodID, va_list args); + void(JNICALL *CallStaticVoidMethodA)(JNIEnv *env, jclass cls, jmethodID methodID, + const jvalue *args); + + jfieldID(JNICALL *GetStaticFieldID)(JNIEnv *env, jclass clazz, const char *name, const char *sig); + jobject(JNICALL *GetStaticObjectField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jboolean(JNICALL *GetStaticBooleanField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jbyte(JNICALL *GetStaticByteField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jchar(JNICALL *GetStaticCharField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jshort(JNICALL *GetStaticShortField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jint(JNICALL *GetStaticIntField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jlong(JNICALL *GetStaticLongField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jfloat(JNICALL *GetStaticFloatField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + jdouble(JNICALL *GetStaticDoubleField)(JNIEnv *env, jclass clazz, jfieldID fieldID); + + void(JNICALL *SetStaticObjectField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jobject value); + void(JNICALL *SetStaticBooleanField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jboolean value); + void(JNICALL *SetStaticByteField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jbyte value); + void(JNICALL *SetStaticCharField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jchar value); + void(JNICALL *SetStaticShortField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jshort value); + void(JNICALL *SetStaticIntField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jint value); + void(JNICALL *SetStaticLongField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jlong value); + void(JNICALL *SetStaticFloatField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jfloat value); + void(JNICALL *SetStaticDoubleField)(JNIEnv *env, jclass clazz, jfieldID fieldID, jdouble value); + + jstring(JNICALL *NewString)(JNIEnv *env, const jchar *unicode, jsize len); + jsize(JNICALL *GetStringLength)(JNIEnv *env, jstring str); + const jchar *(JNICALL *GetStringChars)(JNIEnv *env, jstring str, jboolean *isCopy); + void(JNICALL *ReleaseStringChars)(JNIEnv *env, jstring str, const jchar *chars); + + jstring(JNICALL *NewStringUTF)(JNIEnv *env, const char *utf); + jsize(JNICALL *GetStringUTFLength)(JNIEnv *env, jstring str); + const char *(JNICALL *GetStringUTFChars)(JNIEnv *env, jstring str, jboolean *isCopy); + void(JNICALL *ReleaseStringUTFChars)(JNIEnv *env, jstring str, const char *chars); + + jsize(JNICALL *GetArrayLength)(JNIEnv *env, jarray array); + + jobjectArray(JNICALL *NewObjectArray)(JNIEnv *env, jsize len, jclass clazz, jobject init); + jobject(JNICALL *GetObjectArrayElement)(JNIEnv *env, jobjectArray array, jsize index); + void(JNICALL *SetObjectArrayElement)(JNIEnv *env, jobjectArray array, jsize index, jobject val); + + jbooleanArray(JNICALL *NewBooleanArray)(JNIEnv *env, jsize len); + jbyteArray(JNICALL *NewByteArray)(JNIEnv *env, jsize len); + jcharArray(JNICALL *NewCharArray)(JNIEnv *env, jsize len); + jshortArray(JNICALL *NewShortArray)(JNIEnv *env, jsize len); + jintArray(JNICALL *NewIntArray)(JNIEnv *env, jsize len); + jlongArray(JNICALL *NewLongArray)(JNIEnv *env, jsize len); + jfloatArray(JNICALL *NewFloatArray)(JNIEnv *env, jsize len); + jdoubleArray(JNICALL *NewDoubleArray)(JNIEnv *env, jsize len); + + jboolean *(JNICALL *GetBooleanArrayElements)(JNIEnv *env, jbooleanArray array, jboolean *isCopy); + jbyte *(JNICALL *GetByteArrayElements)(JNIEnv *env, jbyteArray array, jboolean *isCopy); + jchar *(JNICALL *GetCharArrayElements)(JNIEnv *env, jcharArray array, jboolean *isCopy); + jshort *(JNICALL *GetShortArrayElements)(JNIEnv *env, jshortArray array, jboolean *isCopy); + jint *(JNICALL *GetIntArrayElements)(JNIEnv *env, jintArray array, jboolean *isCopy); + jlong *(JNICALL *GetLongArrayElements)(JNIEnv *env, jlongArray array, jboolean *isCopy); + jfloat *(JNICALL *GetFloatArrayElements)(JNIEnv *env, jfloatArray array, jboolean *isCopy); + jdouble *(JNICALL *GetDoubleArrayElements)(JNIEnv *env, jdoubleArray array, jboolean *isCopy); + + void(JNICALL *ReleaseBooleanArrayElements)(JNIEnv *env, jbooleanArray array, jboolean *elems, + jint mode); + void(JNICALL *ReleaseByteArrayElements)(JNIEnv *env, jbyteArray array, jbyte *elems, jint mode); + void(JNICALL *ReleaseCharArrayElements)(JNIEnv *env, jcharArray array, jchar *elems, jint mode); + void(JNICALL *ReleaseShortArrayElements)(JNIEnv *env, jshortArray array, jshort *elems, + jint mode); + void(JNICALL *ReleaseIntArrayElements)(JNIEnv *env, jintArray array, jint *elems, jint mode); + void(JNICALL *ReleaseLongArrayElements)(JNIEnv *env, jlongArray array, jlong *elems, jint mode); + void(JNICALL *ReleaseFloatArrayElements)(JNIEnv *env, jfloatArray array, jfloat *elems, + jint mode); + void(JNICALL *ReleaseDoubleArrayElements)(JNIEnv *env, jdoubleArray array, jdouble *elems, + jint mode); + + void(JNICALL *GetBooleanArrayRegion)(JNIEnv *env, jbooleanArray array, jsize start, jsize l, + jboolean *buf); + void(JNICALL *GetByteArrayRegion)(JNIEnv *env, jbyteArray array, jsize start, jsize len, + jbyte *buf); + void(JNICALL *GetCharArrayRegion)(JNIEnv *env, jcharArray array, jsize start, jsize len, + jchar *buf); + void(JNICALL *GetShortArrayRegion)(JNIEnv *env, jshortArray array, jsize start, jsize len, + jshort *buf); + void(JNICALL *GetIntArrayRegion)(JNIEnv *env, jintArray array, jsize start, jsize len, jint *buf); + void(JNICALL *GetLongArrayRegion)(JNIEnv *env, jlongArray array, jsize start, jsize len, + jlong *buf); + void(JNICALL *GetFloatArrayRegion)(JNIEnv *env, jfloatArray array, jsize start, jsize len, + jfloat *buf); + void(JNICALL *GetDoubleArrayRegion)(JNIEnv *env, jdoubleArray array, jsize start, jsize len, + jdouble *buf); + + void(JNICALL *SetBooleanArrayRegion)(JNIEnv *env, jbooleanArray array, jsize start, jsize l, + const jboolean *buf); + void(JNICALL *SetByteArrayRegion)(JNIEnv *env, jbyteArray array, jsize start, jsize len, + const jbyte *buf); + void(JNICALL *SetCharArrayRegion)(JNIEnv *env, jcharArray array, jsize start, jsize len, + const jchar *buf); + void(JNICALL *SetShortArrayRegion)(JNIEnv *env, jshortArray array, jsize start, jsize len, + const jshort *buf); + void(JNICALL *SetIntArrayRegion)(JNIEnv *env, jintArray array, jsize start, jsize len, + const jint *buf); + void(JNICALL *SetLongArrayRegion)(JNIEnv *env, jlongArray array, jsize start, jsize len, + const jlong *buf); + void(JNICALL *SetFloatArrayRegion)(JNIEnv *env, jfloatArray array, jsize start, jsize len, + const jfloat *buf); + void(JNICALL *SetDoubleArrayRegion)(JNIEnv *env, jdoubleArray array, jsize start, jsize len, + const jdouble *buf); + + jint(JNICALL *RegisterNatives)(JNIEnv *env, jclass clazz, const JNINativeMethod *methods, + jint nMethods); + jint(JNICALL *UnregisterNatives)(JNIEnv *env, jclass clazz); + + jint(JNICALL *MonitorEnter)(JNIEnv *env, jobject obj); + jint(JNICALL *MonitorExit)(JNIEnv *env, jobject obj); + + jint(JNICALL *GetJavaVM)(JNIEnv *env, JavaVM **vm); + + void(JNICALL *GetStringRegion)(JNIEnv *env, jstring str, jsize start, jsize len, jchar *buf); + void(JNICALL *GetStringUTFRegion)(JNIEnv *env, jstring str, jsize start, jsize len, char *buf); + + void *(JNICALL *GetPrimitiveArrayCritical)(JNIEnv *env, jarray array, jboolean *isCopy); + void(JNICALL *ReleasePrimitiveArrayCritical)(JNIEnv *env, jarray array, void *carray, jint mode); + + const jchar *(JNICALL *GetStringCritical)(JNIEnv *env, jstring string, jboolean *isCopy); + void(JNICALL *ReleaseStringCritical)(JNIEnv *env, jstring string, const jchar *cstring); + + jweak(JNICALL *NewWeakGlobalRef)(JNIEnv *env, jobject obj); + void(JNICALL *DeleteWeakGlobalRef)(JNIEnv *env, jweak ref); + + jboolean(JNICALL *ExceptionCheck)(JNIEnv *env); + + jobject(JNICALL *NewDirectByteBuffer)(JNIEnv *env, void *address, jlong capacity); + void *(JNICALL *GetDirectBufferAddress)(JNIEnv *env, jobject buf); + jlong(JNICALL *GetDirectBufferCapacity)(JNIEnv *env, jobject buf); + + /* New JNI 1.6 Features */ + + jobjectRefType(JNICALL *GetObjectRefType)(JNIEnv *env, jobject obj); }; /* @@ -764,1104 +588,862 @@ struct JNINativeInterface_ { */ struct JNIEnv_ { - const struct JNINativeInterface_ *functions; + const struct JNINativeInterface_ *functions; #ifdef __cplusplus - jint GetVersion() { - return functions->GetVersion(this); - } - jclass DefineClass(const char *name, jobject loader, const jbyte *buf, - jsize len) { - return functions->DefineClass(this, name, loader, buf, len); - } - jclass FindClass(const char *name) { - return functions->FindClass(this, name); - } - jmethodID FromReflectedMethod(jobject method) { - return functions->FromReflectedMethod(this,method); - } - jfieldID FromReflectedField(jobject field) { - return functions->FromReflectedField(this,field); - } - - jobject ToReflectedMethod(jclass cls, jmethodID methodID, jboolean isStatic) { - return functions->ToReflectedMethod(this, cls, methodID, isStatic); - } - - jclass GetSuperclass(jclass sub) { - return functions->GetSuperclass(this, sub); - } - jboolean IsAssignableFrom(jclass sub, jclass sup) { - return functions->IsAssignableFrom(this, sub, sup); - } - - jobject ToReflectedField(jclass cls, jfieldID fieldID, jboolean isStatic) { - return functions->ToReflectedField(this,cls,fieldID,isStatic); - } - - jint Throw(jthrowable obj) { - return functions->Throw(this, obj); - } - jint ThrowNew(jclass clazz, const char *msg) { - return functions->ThrowNew(this, clazz, msg); - } - jthrowable ExceptionOccurred() { - return functions->ExceptionOccurred(this); - } - void ExceptionDescribe() { - functions->ExceptionDescribe(this); - } - void ExceptionClear() { - functions->ExceptionClear(this); - } - void FatalError(const char *msg) { - functions->FatalError(this, msg); - } - - jint PushLocalFrame(jint capacity) { - return functions->PushLocalFrame(this,capacity); - } - jobject PopLocalFrame(jobject result) { - return functions->PopLocalFrame(this,result); - } - - jobject NewGlobalRef(jobject lobj) { - return functions->NewGlobalRef(this,lobj); - } - void DeleteGlobalRef(jobject gref) { - functions->DeleteGlobalRef(this,gref); - } - void DeleteLocalRef(jobject obj) { - functions->DeleteLocalRef(this, obj); - } - - jboolean IsSameObject(jobject obj1, jobject obj2) { - return functions->IsSameObject(this,obj1,obj2); - } - - jobject NewLocalRef(jobject ref) { - return functions->NewLocalRef(this,ref); - } - jint EnsureLocalCapacity(jint capacity) { - return functions->EnsureLocalCapacity(this,capacity); - } - - jobject AllocObject(jclass clazz) { - return functions->AllocObject(this,clazz); - } - jobject NewObject(jclass clazz, jmethodID methodID, ...) { - va_list args; - jobject result; - va_start(args, methodID); - result = functions->NewObjectV(this,clazz,methodID,args); - va_end(args); - return result; - } - jobject NewObjectV(jclass clazz, jmethodID methodID, - va_list args) { - return functions->NewObjectV(this,clazz,methodID,args); - } - jobject NewObjectA(jclass clazz, jmethodID methodID, - const jvalue *args) { - return functions->NewObjectA(this,clazz,methodID,args); - } - - jclass GetObjectClass(jobject obj) { - return functions->GetObjectClass(this,obj); - } - jboolean IsInstanceOf(jobject obj, jclass clazz) { - return functions->IsInstanceOf(this,obj,clazz); - } - - jmethodID GetMethodID(jclass clazz, const char *name, - const char *sig) { - return functions->GetMethodID(this,clazz,name,sig); - } - - jobject CallObjectMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jobject result; - va_start(args,methodID); - result = functions->CallObjectMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jobject CallObjectMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallObjectMethodV(this,obj,methodID,args); - } - jobject CallObjectMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallObjectMethodA(this,obj,methodID,args); - } - - jboolean CallBooleanMethod(jobject obj, - jmethodID methodID, ...) { - va_list args; - jboolean result; - va_start(args,methodID); - result = functions->CallBooleanMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jboolean CallBooleanMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallBooleanMethodV(this,obj,methodID,args); - } - jboolean CallBooleanMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallBooleanMethodA(this,obj,methodID, args); - } - - jbyte CallByteMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jbyte result; - va_start(args,methodID); - result = functions->CallByteMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jbyte CallByteMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallByteMethodV(this,obj,methodID,args); - } - jbyte CallByteMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallByteMethodA(this,obj,methodID,args); - } - - jchar CallCharMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jchar result; - va_start(args,methodID); - result = functions->CallCharMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jchar CallCharMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallCharMethodV(this,obj,methodID,args); - } - jchar CallCharMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallCharMethodA(this,obj,methodID,args); - } - - jshort CallShortMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jshort result; - va_start(args,methodID); - result = functions->CallShortMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jshort CallShortMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallShortMethodV(this,obj,methodID,args); - } - jshort CallShortMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallShortMethodA(this,obj,methodID,args); - } - - jint CallIntMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jint result; - va_start(args,methodID); - result = functions->CallIntMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jint CallIntMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallIntMethodV(this,obj,methodID,args); - } - jint CallIntMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallIntMethodA(this,obj,methodID,args); - } - - jlong CallLongMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jlong result; - va_start(args,methodID); - result = functions->CallLongMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jlong CallLongMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallLongMethodV(this,obj,methodID,args); - } - jlong CallLongMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallLongMethodA(this,obj,methodID,args); - } - - jfloat CallFloatMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jfloat result; - va_start(args,methodID); - result = functions->CallFloatMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jfloat CallFloatMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallFloatMethodV(this,obj,methodID,args); - } - jfloat CallFloatMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallFloatMethodA(this,obj,methodID,args); - } - - jdouble CallDoubleMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - jdouble result; - va_start(args,methodID); - result = functions->CallDoubleMethodV(this,obj,methodID,args); - va_end(args); - return result; - } - jdouble CallDoubleMethodV(jobject obj, jmethodID methodID, - va_list args) { - return functions->CallDoubleMethodV(this,obj,methodID,args); - } - jdouble CallDoubleMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - return functions->CallDoubleMethodA(this,obj,methodID,args); - } - - void CallVoidMethod(jobject obj, jmethodID methodID, ...) { - va_list args; - va_start(args,methodID); - functions->CallVoidMethodV(this,obj,methodID,args); - va_end(args); - } - void CallVoidMethodV(jobject obj, jmethodID methodID, - va_list args) { - functions->CallVoidMethodV(this,obj,methodID,args); - } - void CallVoidMethodA(jobject obj, jmethodID methodID, - const jvalue * args) { - functions->CallVoidMethodA(this,obj,methodID,args); - } - - jobject CallNonvirtualObjectMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jobject result; - va_start(args,methodID); - result = functions->CallNonvirtualObjectMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jobject CallNonvirtualObjectMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualObjectMethodV(this,obj,clazz, - methodID,args); - } - jobject CallNonvirtualObjectMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualObjectMethodA(this,obj,clazz, - methodID,args); - } - - jboolean CallNonvirtualBooleanMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jboolean result; - va_start(args,methodID); - result = functions->CallNonvirtualBooleanMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jboolean CallNonvirtualBooleanMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualBooleanMethodV(this,obj,clazz, - methodID,args); - } - jboolean CallNonvirtualBooleanMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualBooleanMethodA(this,obj,clazz, - methodID, args); - } - - jbyte CallNonvirtualByteMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jbyte result; - va_start(args,methodID); - result = functions->CallNonvirtualByteMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jbyte CallNonvirtualByteMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualByteMethodV(this,obj,clazz, - methodID,args); - } - jbyte CallNonvirtualByteMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualByteMethodA(this,obj,clazz, - methodID,args); - } - - jchar CallNonvirtualCharMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jchar result; - va_start(args,methodID); - result = functions->CallNonvirtualCharMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jchar CallNonvirtualCharMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualCharMethodV(this,obj,clazz, - methodID,args); - } - jchar CallNonvirtualCharMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualCharMethodA(this,obj,clazz, - methodID,args); - } - - jshort CallNonvirtualShortMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jshort result; - va_start(args,methodID); - result = functions->CallNonvirtualShortMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jshort CallNonvirtualShortMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualShortMethodV(this,obj,clazz, - methodID,args); - } - jshort CallNonvirtualShortMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualShortMethodA(this,obj,clazz, - methodID,args); - } - - jint CallNonvirtualIntMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jint result; - va_start(args,methodID); - result = functions->CallNonvirtualIntMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jint CallNonvirtualIntMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualIntMethodV(this,obj,clazz, - methodID,args); - } - jint CallNonvirtualIntMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualIntMethodA(this,obj,clazz, - methodID,args); - } - - jlong CallNonvirtualLongMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jlong result; - va_start(args,methodID); - result = functions->CallNonvirtualLongMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jlong CallNonvirtualLongMethodV(jobject obj, jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallNonvirtualLongMethodV(this,obj,clazz, - methodID,args); - } - jlong CallNonvirtualLongMethodA(jobject obj, jclass clazz, - jmethodID methodID, const jvalue * args) { - return functions->CallNonvirtualLongMethodA(this,obj,clazz, - methodID,args); - } - - jfloat CallNonvirtualFloatMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jfloat result; - va_start(args,methodID); - result = functions->CallNonvirtualFloatMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jfloat CallNonvirtualFloatMethodV(jobject obj, jclass clazz, - jmethodID methodID, - va_list args) { - return functions->CallNonvirtualFloatMethodV(this,obj,clazz, - methodID,args); - } - jfloat CallNonvirtualFloatMethodA(jobject obj, jclass clazz, - jmethodID methodID, - const jvalue * args) { - return functions->CallNonvirtualFloatMethodA(this,obj,clazz, - methodID,args); - } - - jdouble CallNonvirtualDoubleMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - jdouble result; - va_start(args,methodID); - result = functions->CallNonvirtualDoubleMethodV(this,obj,clazz, - methodID,args); - va_end(args); - return result; - } - jdouble CallNonvirtualDoubleMethodV(jobject obj, jclass clazz, - jmethodID methodID, - va_list args) { - return functions->CallNonvirtualDoubleMethodV(this,obj,clazz, - methodID,args); - } - jdouble CallNonvirtualDoubleMethodA(jobject obj, jclass clazz, - jmethodID methodID, - const jvalue * args) { - return functions->CallNonvirtualDoubleMethodA(this,obj,clazz, - methodID,args); - } - - void CallNonvirtualVoidMethod(jobject obj, jclass clazz, - jmethodID methodID, ...) { - va_list args; - va_start(args,methodID); - functions->CallNonvirtualVoidMethodV(this,obj,clazz,methodID,args); - va_end(args); - } - void CallNonvirtualVoidMethodV(jobject obj, jclass clazz, - jmethodID methodID, - va_list args) { - functions->CallNonvirtualVoidMethodV(this,obj,clazz,methodID,args); - } - void CallNonvirtualVoidMethodA(jobject obj, jclass clazz, - jmethodID methodID, - const jvalue * args) { - functions->CallNonvirtualVoidMethodA(this,obj,clazz,methodID,args); - } - - jfieldID GetFieldID(jclass clazz, const char *name, - const char *sig) { - return functions->GetFieldID(this,clazz,name,sig); - } - - jobject GetObjectField(jobject obj, jfieldID fieldID) { - return functions->GetObjectField(this,obj,fieldID); - } - jboolean GetBooleanField(jobject obj, jfieldID fieldID) { - return functions->GetBooleanField(this,obj,fieldID); - } - jbyte GetByteField(jobject obj, jfieldID fieldID) { - return functions->GetByteField(this,obj,fieldID); - } - jchar GetCharField(jobject obj, jfieldID fieldID) { - return functions->GetCharField(this,obj,fieldID); - } - jshort GetShortField(jobject obj, jfieldID fieldID) { - return functions->GetShortField(this,obj,fieldID); - } - jint GetIntField(jobject obj, jfieldID fieldID) { - return functions->GetIntField(this,obj,fieldID); - } - jlong GetLongField(jobject obj, jfieldID fieldID) { - return functions->GetLongField(this,obj,fieldID); - } - jfloat GetFloatField(jobject obj, jfieldID fieldID) { - return functions->GetFloatField(this,obj,fieldID); - } - jdouble GetDoubleField(jobject obj, jfieldID fieldID) { - return functions->GetDoubleField(this,obj,fieldID); - } - - void SetObjectField(jobject obj, jfieldID fieldID, jobject val) { - functions->SetObjectField(this,obj,fieldID,val); - } - void SetBooleanField(jobject obj, jfieldID fieldID, - jboolean val) { - functions->SetBooleanField(this,obj,fieldID,val); - } - void SetByteField(jobject obj, jfieldID fieldID, - jbyte val) { - functions->SetByteField(this,obj,fieldID,val); - } - void SetCharField(jobject obj, jfieldID fieldID, - jchar val) { - functions->SetCharField(this,obj,fieldID,val); - } - void SetShortField(jobject obj, jfieldID fieldID, - jshort val) { - functions->SetShortField(this,obj,fieldID,val); - } - void SetIntField(jobject obj, jfieldID fieldID, - jint val) { - functions->SetIntField(this,obj,fieldID,val); - } - void SetLongField(jobject obj, jfieldID fieldID, - jlong val) { - functions->SetLongField(this,obj,fieldID,val); - } - void SetFloatField(jobject obj, jfieldID fieldID, - jfloat val) { - functions->SetFloatField(this,obj,fieldID,val); - } - void SetDoubleField(jobject obj, jfieldID fieldID, - jdouble val) { - functions->SetDoubleField(this,obj,fieldID,val); - } - - jmethodID GetStaticMethodID(jclass clazz, const char *name, - const char *sig) { - return functions->GetStaticMethodID(this,clazz,name,sig); - } - - jobject CallStaticObjectMethod(jclass clazz, jmethodID methodID, - ...) { - va_list args; - jobject result; - va_start(args,methodID); - result = functions->CallStaticObjectMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jobject CallStaticObjectMethodV(jclass clazz, jmethodID methodID, - va_list args) { - return functions->CallStaticObjectMethodV(this,clazz,methodID,args); - } - jobject CallStaticObjectMethodA(jclass clazz, jmethodID methodID, - const jvalue *args) { - return functions->CallStaticObjectMethodA(this,clazz,methodID,args); - } - - jboolean CallStaticBooleanMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jboolean result; - va_start(args,methodID); - result = functions->CallStaticBooleanMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jboolean CallStaticBooleanMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticBooleanMethodV(this,clazz,methodID,args); - } - jboolean CallStaticBooleanMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticBooleanMethodA(this,clazz,methodID,args); - } - - jbyte CallStaticByteMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jbyte result; - va_start(args,methodID); - result = functions->CallStaticByteMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jbyte CallStaticByteMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticByteMethodV(this,clazz,methodID,args); - } - jbyte CallStaticByteMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticByteMethodA(this,clazz,methodID,args); - } - - jchar CallStaticCharMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jchar result; - va_start(args,methodID); - result = functions->CallStaticCharMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jchar CallStaticCharMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticCharMethodV(this,clazz,methodID,args); - } - jchar CallStaticCharMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticCharMethodA(this,clazz,methodID,args); - } - - jshort CallStaticShortMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jshort result; - va_start(args,methodID); - result = functions->CallStaticShortMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jshort CallStaticShortMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticShortMethodV(this,clazz,methodID,args); - } - jshort CallStaticShortMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticShortMethodA(this,clazz,methodID,args); - } - - jint CallStaticIntMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jint result; - va_start(args,methodID); - result = functions->CallStaticIntMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jint CallStaticIntMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticIntMethodV(this,clazz,methodID,args); - } - jint CallStaticIntMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticIntMethodA(this,clazz,methodID,args); - } - - jlong CallStaticLongMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jlong result; - va_start(args,methodID); - result = functions->CallStaticLongMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jlong CallStaticLongMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticLongMethodV(this,clazz,methodID,args); - } - jlong CallStaticLongMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticLongMethodA(this,clazz,methodID,args); - } - - jfloat CallStaticFloatMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jfloat result; - va_start(args,methodID); - result = functions->CallStaticFloatMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jfloat CallStaticFloatMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticFloatMethodV(this,clazz,methodID,args); - } - jfloat CallStaticFloatMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticFloatMethodA(this,clazz,methodID,args); - } - - jdouble CallStaticDoubleMethod(jclass clazz, - jmethodID methodID, ...) { - va_list args; - jdouble result; - va_start(args,methodID); - result = functions->CallStaticDoubleMethodV(this,clazz,methodID,args); - va_end(args); - return result; - } - jdouble CallStaticDoubleMethodV(jclass clazz, - jmethodID methodID, va_list args) { - return functions->CallStaticDoubleMethodV(this,clazz,methodID,args); - } - jdouble CallStaticDoubleMethodA(jclass clazz, - jmethodID methodID, const jvalue *args) { - return functions->CallStaticDoubleMethodA(this,clazz,methodID,args); - } - - void CallStaticVoidMethod(jclass cls, jmethodID methodID, ...) { - va_list args; - va_start(args,methodID); - functions->CallStaticVoidMethodV(this,cls,methodID,args); - va_end(args); - } - void CallStaticVoidMethodV(jclass cls, jmethodID methodID, - va_list args) { - functions->CallStaticVoidMethodV(this,cls,methodID,args); - } - void CallStaticVoidMethodA(jclass cls, jmethodID methodID, - const jvalue * args) { - functions->CallStaticVoidMethodA(this,cls,methodID,args); - } - - jfieldID GetStaticFieldID(jclass clazz, const char *name, - const char *sig) { - return functions->GetStaticFieldID(this,clazz,name,sig); - } - jobject GetStaticObjectField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticObjectField(this,clazz,fieldID); - } - jboolean GetStaticBooleanField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticBooleanField(this,clazz,fieldID); - } - jbyte GetStaticByteField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticByteField(this,clazz,fieldID); - } - jchar GetStaticCharField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticCharField(this,clazz,fieldID); - } - jshort GetStaticShortField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticShortField(this,clazz,fieldID); - } - jint GetStaticIntField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticIntField(this,clazz,fieldID); - } - jlong GetStaticLongField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticLongField(this,clazz,fieldID); - } - jfloat GetStaticFloatField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticFloatField(this,clazz,fieldID); - } - jdouble GetStaticDoubleField(jclass clazz, jfieldID fieldID) { - return functions->GetStaticDoubleField(this,clazz,fieldID); - } - - void SetStaticObjectField(jclass clazz, jfieldID fieldID, - jobject value) { - functions->SetStaticObjectField(this,clazz,fieldID,value); - } - void SetStaticBooleanField(jclass clazz, jfieldID fieldID, - jboolean value) { - functions->SetStaticBooleanField(this,clazz,fieldID,value); - } - void SetStaticByteField(jclass clazz, jfieldID fieldID, - jbyte value) { - functions->SetStaticByteField(this,clazz,fieldID,value); - } - void SetStaticCharField(jclass clazz, jfieldID fieldID, - jchar value) { - functions->SetStaticCharField(this,clazz,fieldID,value); - } - void SetStaticShortField(jclass clazz, jfieldID fieldID, - jshort value) { - functions->SetStaticShortField(this,clazz,fieldID,value); - } - void SetStaticIntField(jclass clazz, jfieldID fieldID, - jint value) { - functions->SetStaticIntField(this,clazz,fieldID,value); - } - void SetStaticLongField(jclass clazz, jfieldID fieldID, - jlong value) { - functions->SetStaticLongField(this,clazz,fieldID,value); - } - void SetStaticFloatField(jclass clazz, jfieldID fieldID, - jfloat value) { - functions->SetStaticFloatField(this,clazz,fieldID,value); - } - void SetStaticDoubleField(jclass clazz, jfieldID fieldID, - jdouble value) { - functions->SetStaticDoubleField(this,clazz,fieldID,value); - } - - jstring NewString(const jchar *unicode, jsize len) { - return functions->NewString(this,unicode,len); - } - jsize GetStringLength(jstring str) { - return functions->GetStringLength(this,str); - } - const jchar *GetStringChars(jstring str, jboolean *isCopy) { - return functions->GetStringChars(this,str,isCopy); - } - void ReleaseStringChars(jstring str, const jchar *chars) { - functions->ReleaseStringChars(this,str,chars); - } - - jstring NewStringUTF(const char *utf) { - return functions->NewStringUTF(this,utf); - } - jsize GetStringUTFLength(jstring str) { - return functions->GetStringUTFLength(this,str); - } - const char* GetStringUTFChars(jstring str, jboolean *isCopy) { - return functions->GetStringUTFChars(this,str,isCopy); - } - void ReleaseStringUTFChars(jstring str, const char* chars) { - functions->ReleaseStringUTFChars(this,str,chars); - } - - jsize GetArrayLength(jarray array) { - return functions->GetArrayLength(this,array); - } - - jobjectArray NewObjectArray(jsize len, jclass clazz, - jobject init) { - return functions->NewObjectArray(this,len,clazz,init); - } - jobject GetObjectArrayElement(jobjectArray array, jsize index) { - return functions->GetObjectArrayElement(this,array,index); - } - void SetObjectArrayElement(jobjectArray array, jsize index, - jobject val) { - functions->SetObjectArrayElement(this,array,index,val); - } - - jbooleanArray NewBooleanArray(jsize len) { - return functions->NewBooleanArray(this,len); - } - jbyteArray NewByteArray(jsize len) { - return functions->NewByteArray(this,len); - } - jcharArray NewCharArray(jsize len) { - return functions->NewCharArray(this,len); - } - jshortArray NewShortArray(jsize len) { - return functions->NewShortArray(this,len); - } - jintArray NewIntArray(jsize len) { - return functions->NewIntArray(this,len); - } - jlongArray NewLongArray(jsize len) { - return functions->NewLongArray(this,len); - } - jfloatArray NewFloatArray(jsize len) { - return functions->NewFloatArray(this,len); - } - jdoubleArray NewDoubleArray(jsize len) { - return functions->NewDoubleArray(this,len); - } - - jboolean * GetBooleanArrayElements(jbooleanArray array, jboolean *isCopy) { - return functions->GetBooleanArrayElements(this,array,isCopy); - } - jbyte * GetByteArrayElements(jbyteArray array, jboolean *isCopy) { - return functions->GetByteArrayElements(this,array,isCopy); - } - jchar * GetCharArrayElements(jcharArray array, jboolean *isCopy) { - return functions->GetCharArrayElements(this,array,isCopy); - } - jshort * GetShortArrayElements(jshortArray array, jboolean *isCopy) { - return functions->GetShortArrayElements(this,array,isCopy); - } - jint * GetIntArrayElements(jintArray array, jboolean *isCopy) { - return functions->GetIntArrayElements(this,array,isCopy); - } - jlong * GetLongArrayElements(jlongArray array, jboolean *isCopy) { - return functions->GetLongArrayElements(this,array,isCopy); - } - jfloat * GetFloatArrayElements(jfloatArray array, jboolean *isCopy) { - return functions->GetFloatArrayElements(this,array,isCopy); - } - jdouble * GetDoubleArrayElements(jdoubleArray array, jboolean *isCopy) { - return functions->GetDoubleArrayElements(this,array,isCopy); - } - - void ReleaseBooleanArrayElements(jbooleanArray array, - jboolean *elems, - jint mode) { - functions->ReleaseBooleanArrayElements(this,array,elems,mode); - } - void ReleaseByteArrayElements(jbyteArray array, - jbyte *elems, - jint mode) { - functions->ReleaseByteArrayElements(this,array,elems,mode); - } - void ReleaseCharArrayElements(jcharArray array, - jchar *elems, - jint mode) { - functions->ReleaseCharArrayElements(this,array,elems,mode); - } - void ReleaseShortArrayElements(jshortArray array, - jshort *elems, - jint mode) { - functions->ReleaseShortArrayElements(this,array,elems,mode); - } - void ReleaseIntArrayElements(jintArray array, - jint *elems, - jint mode) { - functions->ReleaseIntArrayElements(this,array,elems,mode); - } - void ReleaseLongArrayElements(jlongArray array, - jlong *elems, - jint mode) { - functions->ReleaseLongArrayElements(this,array,elems,mode); - } - void ReleaseFloatArrayElements(jfloatArray array, - jfloat *elems, - jint mode) { - functions->ReleaseFloatArrayElements(this,array,elems,mode); - } - void ReleaseDoubleArrayElements(jdoubleArray array, - jdouble *elems, - jint mode) { - functions->ReleaseDoubleArrayElements(this,array,elems,mode); - } - - void GetBooleanArrayRegion(jbooleanArray array, - jsize start, jsize len, jboolean *buf) { - functions->GetBooleanArrayRegion(this,array,start,len,buf); - } - void GetByteArrayRegion(jbyteArray array, - jsize start, jsize len, jbyte *buf) { - functions->GetByteArrayRegion(this,array,start,len,buf); - } - void GetCharArrayRegion(jcharArray array, - jsize start, jsize len, jchar *buf) { - functions->GetCharArrayRegion(this,array,start,len,buf); - } - void GetShortArrayRegion(jshortArray array, - jsize start, jsize len, jshort *buf) { - functions->GetShortArrayRegion(this,array,start,len,buf); - } - void GetIntArrayRegion(jintArray array, - jsize start, jsize len, jint *buf) { - functions->GetIntArrayRegion(this,array,start,len,buf); - } - void GetLongArrayRegion(jlongArray array, - jsize start, jsize len, jlong *buf) { - functions->GetLongArrayRegion(this,array,start,len,buf); - } - void GetFloatArrayRegion(jfloatArray array, - jsize start, jsize len, jfloat *buf) { - functions->GetFloatArrayRegion(this,array,start,len,buf); - } - void GetDoubleArrayRegion(jdoubleArray array, - jsize start, jsize len, jdouble *buf) { - functions->GetDoubleArrayRegion(this,array,start,len,buf); - } - - void SetBooleanArrayRegion(jbooleanArray array, jsize start, jsize len, - const jboolean *buf) { - functions->SetBooleanArrayRegion(this,array,start,len,buf); - } - void SetByteArrayRegion(jbyteArray array, jsize start, jsize len, - const jbyte *buf) { - functions->SetByteArrayRegion(this,array,start,len,buf); - } - void SetCharArrayRegion(jcharArray array, jsize start, jsize len, - const jchar *buf) { - functions->SetCharArrayRegion(this,array,start,len,buf); - } - void SetShortArrayRegion(jshortArray array, jsize start, jsize len, - const jshort *buf) { - functions->SetShortArrayRegion(this,array,start,len,buf); - } - void SetIntArrayRegion(jintArray array, jsize start, jsize len, - const jint *buf) { - functions->SetIntArrayRegion(this,array,start,len,buf); - } - void SetLongArrayRegion(jlongArray array, jsize start, jsize len, - const jlong *buf) { - functions->SetLongArrayRegion(this,array,start,len,buf); - } - void SetFloatArrayRegion(jfloatArray array, jsize start, jsize len, - const jfloat *buf) { - functions->SetFloatArrayRegion(this,array,start,len,buf); - } - void SetDoubleArrayRegion(jdoubleArray array, jsize start, jsize len, - const jdouble *buf) { - functions->SetDoubleArrayRegion(this,array,start,len,buf); - } - - jint RegisterNatives(jclass clazz, const JNINativeMethod *methods, - jint nMethods) { - return functions->RegisterNatives(this,clazz,methods,nMethods); - } - jint UnregisterNatives(jclass clazz) { - return functions->UnregisterNatives(this,clazz); - } - - jint MonitorEnter(jobject obj) { - return functions->MonitorEnter(this,obj); - } - jint MonitorExit(jobject obj) { - return functions->MonitorExit(this,obj); - } - - jint GetJavaVM(JavaVM **vm) { - return functions->GetJavaVM(this,vm); - } - - void GetStringRegion(jstring str, jsize start, jsize len, jchar *buf) { - functions->GetStringRegion(this,str,start,len,buf); - } - void GetStringUTFRegion(jstring str, jsize start, jsize len, char *buf) { - functions->GetStringUTFRegion(this,str,start,len,buf); - } - - void * GetPrimitiveArrayCritical(jarray array, jboolean *isCopy) { - return functions->GetPrimitiveArrayCritical(this,array,isCopy); - } - void ReleasePrimitiveArrayCritical(jarray array, void *carray, jint mode) { - functions->ReleasePrimitiveArrayCritical(this,array,carray,mode); - } - - const jchar * GetStringCritical(jstring string, jboolean *isCopy) { - return functions->GetStringCritical(this,string,isCopy); - } - void ReleaseStringCritical(jstring string, const jchar *cstring) { - functions->ReleaseStringCritical(this,string,cstring); - } - - jweak NewWeakGlobalRef(jobject obj) { - return functions->NewWeakGlobalRef(this,obj); - } - void DeleteWeakGlobalRef(jweak ref) { - functions->DeleteWeakGlobalRef(this,ref); - } - - jboolean ExceptionCheck() { - return functions->ExceptionCheck(this); - } - - jobject NewDirectByteBuffer(void* address, jlong capacity) { - return functions->NewDirectByteBuffer(this, address, capacity); - } - void* GetDirectBufferAddress(jobject buf) { - return functions->GetDirectBufferAddress(this, buf); - } - jlong GetDirectBufferCapacity(jobject buf) { - return functions->GetDirectBufferCapacity(this, buf); - } - jobjectRefType GetObjectRefType(jobject obj) { - return functions->GetObjectRefType(this, obj); - } + jint GetVersion() { return functions->GetVersion(this); } + jclass DefineClass(const char *name, jobject loader, const jbyte *buf, jsize len) { + return functions->DefineClass(this, name, loader, buf, len); + } + jclass FindClass(const char *name) { return functions->FindClass(this, name); } + jmethodID FromReflectedMethod(jobject method) { + return functions->FromReflectedMethod(this, method); + } + jfieldID FromReflectedField(jobject field) { return functions->FromReflectedField(this, field); } + + jobject ToReflectedMethod(jclass cls, jmethodID methodID, jboolean isStatic) { + return functions->ToReflectedMethod(this, cls, methodID, isStatic); + } + + jclass GetSuperclass(jclass sub) { return functions->GetSuperclass(this, sub); } + jboolean IsAssignableFrom(jclass sub, jclass sup) { + return functions->IsAssignableFrom(this, sub, sup); + } + + jobject ToReflectedField(jclass cls, jfieldID fieldID, jboolean isStatic) { + return functions->ToReflectedField(this, cls, fieldID, isStatic); + } + + jint Throw(jthrowable obj) { return functions->Throw(this, obj); } + jint ThrowNew(jclass clazz, const char *msg) { return functions->ThrowNew(this, clazz, msg); } + jthrowable ExceptionOccurred() { return functions->ExceptionOccurred(this); } + void ExceptionDescribe() { functions->ExceptionDescribe(this); } + void ExceptionClear() { functions->ExceptionClear(this); } + void FatalError(const char *msg) { functions->FatalError(this, msg); } + + jint PushLocalFrame(jint capacity) { return functions->PushLocalFrame(this, capacity); } + jobject PopLocalFrame(jobject result) { return functions->PopLocalFrame(this, result); } + + jobject NewGlobalRef(jobject lobj) { return functions->NewGlobalRef(this, lobj); } + void DeleteGlobalRef(jobject gref) { functions->DeleteGlobalRef(this, gref); } + void DeleteLocalRef(jobject obj) { functions->DeleteLocalRef(this, obj); } + + jboolean IsSameObject(jobject obj1, jobject obj2) { + return functions->IsSameObject(this, obj1, obj2); + } + + jobject NewLocalRef(jobject ref) { return functions->NewLocalRef(this, ref); } + jint EnsureLocalCapacity(jint capacity) { return functions->EnsureLocalCapacity(this, capacity); } + + jobject AllocObject(jclass clazz) { return functions->AllocObject(this, clazz); } + jobject NewObject(jclass clazz, jmethodID methodID, ...) { + va_list args; + jobject result; + va_start(args, methodID); + result = functions->NewObjectV(this, clazz, methodID, args); + va_end(args); + return result; + } + jobject NewObjectV(jclass clazz, jmethodID methodID, va_list args) { + return functions->NewObjectV(this, clazz, methodID, args); + } + jobject NewObjectA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->NewObjectA(this, clazz, methodID, args); + } + + jclass GetObjectClass(jobject obj) { return functions->GetObjectClass(this, obj); } + jboolean IsInstanceOf(jobject obj, jclass clazz) { + return functions->IsInstanceOf(this, obj, clazz); + } + + jmethodID GetMethodID(jclass clazz, const char *name, const char *sig) { + return functions->GetMethodID(this, clazz, name, sig); + } + + jobject CallObjectMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jobject result; + va_start(args, methodID); + result = functions->CallObjectMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jobject CallObjectMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallObjectMethodV(this, obj, methodID, args); + } + jobject CallObjectMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallObjectMethodA(this, obj, methodID, args); + } + + jboolean CallBooleanMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jboolean result; + va_start(args, methodID); + result = functions->CallBooleanMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jboolean CallBooleanMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallBooleanMethodV(this, obj, methodID, args); + } + jboolean CallBooleanMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallBooleanMethodA(this, obj, methodID, args); + } + + jbyte CallByteMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jbyte result; + va_start(args, methodID); + result = functions->CallByteMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jbyte CallByteMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallByteMethodV(this, obj, methodID, args); + } + jbyte CallByteMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallByteMethodA(this, obj, methodID, args); + } + + jchar CallCharMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jchar result; + va_start(args, methodID); + result = functions->CallCharMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jchar CallCharMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallCharMethodV(this, obj, methodID, args); + } + jchar CallCharMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallCharMethodA(this, obj, methodID, args); + } + + jshort CallShortMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jshort result; + va_start(args, methodID); + result = functions->CallShortMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jshort CallShortMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallShortMethodV(this, obj, methodID, args); + } + jshort CallShortMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallShortMethodA(this, obj, methodID, args); + } + + jint CallIntMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jint result; + va_start(args, methodID); + result = functions->CallIntMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jint CallIntMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallIntMethodV(this, obj, methodID, args); + } + jint CallIntMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallIntMethodA(this, obj, methodID, args); + } + + jlong CallLongMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jlong result; + va_start(args, methodID); + result = functions->CallLongMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jlong CallLongMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallLongMethodV(this, obj, methodID, args); + } + jlong CallLongMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallLongMethodA(this, obj, methodID, args); + } + + jfloat CallFloatMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jfloat result; + va_start(args, methodID); + result = functions->CallFloatMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jfloat CallFloatMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallFloatMethodV(this, obj, methodID, args); + } + jfloat CallFloatMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallFloatMethodA(this, obj, methodID, args); + } + + jdouble CallDoubleMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + jdouble result; + va_start(args, methodID); + result = functions->CallDoubleMethodV(this, obj, methodID, args); + va_end(args); + return result; + } + jdouble CallDoubleMethodV(jobject obj, jmethodID methodID, va_list args) { + return functions->CallDoubleMethodV(this, obj, methodID, args); + } + jdouble CallDoubleMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + return functions->CallDoubleMethodA(this, obj, methodID, args); + } + + void CallVoidMethod(jobject obj, jmethodID methodID, ...) { + va_list args; + va_start(args, methodID); + functions->CallVoidMethodV(this, obj, methodID, args); + va_end(args); + } + void CallVoidMethodV(jobject obj, jmethodID methodID, va_list args) { + functions->CallVoidMethodV(this, obj, methodID, args); + } + void CallVoidMethodA(jobject obj, jmethodID methodID, const jvalue *args) { + functions->CallVoidMethodA(this, obj, methodID, args); + } + + jobject CallNonvirtualObjectMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jobject result; + va_start(args, methodID); + result = functions->CallNonvirtualObjectMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jobject CallNonvirtualObjectMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualObjectMethodV(this, obj, clazz, methodID, args); + } + jobject CallNonvirtualObjectMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualObjectMethodA(this, obj, clazz, methodID, args); + } + + jboolean CallNonvirtualBooleanMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jboolean result; + va_start(args, methodID); + result = functions->CallNonvirtualBooleanMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jboolean CallNonvirtualBooleanMethodV(jobject obj, jclass clazz, jmethodID methodID, + va_list args) { + return functions->CallNonvirtualBooleanMethodV(this, obj, clazz, methodID, args); + } + jboolean CallNonvirtualBooleanMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualBooleanMethodA(this, obj, clazz, methodID, args); + } + + jbyte CallNonvirtualByteMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jbyte result; + va_start(args, methodID); + result = functions->CallNonvirtualByteMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jbyte CallNonvirtualByteMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualByteMethodV(this, obj, clazz, methodID, args); + } + jbyte CallNonvirtualByteMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualByteMethodA(this, obj, clazz, methodID, args); + } + + jchar CallNonvirtualCharMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jchar result; + va_start(args, methodID); + result = functions->CallNonvirtualCharMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jchar CallNonvirtualCharMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualCharMethodV(this, obj, clazz, methodID, args); + } + jchar CallNonvirtualCharMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualCharMethodA(this, obj, clazz, methodID, args); + } + + jshort CallNonvirtualShortMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jshort result; + va_start(args, methodID); + result = functions->CallNonvirtualShortMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jshort CallNonvirtualShortMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualShortMethodV(this, obj, clazz, methodID, args); + } + jshort CallNonvirtualShortMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualShortMethodA(this, obj, clazz, methodID, args); + } + + jint CallNonvirtualIntMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jint result; + va_start(args, methodID); + result = functions->CallNonvirtualIntMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jint CallNonvirtualIntMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualIntMethodV(this, obj, clazz, methodID, args); + } + jint CallNonvirtualIntMethodA(jobject obj, jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallNonvirtualIntMethodA(this, obj, clazz, methodID, args); + } + + jlong CallNonvirtualLongMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jlong result; + va_start(args, methodID); + result = functions->CallNonvirtualLongMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jlong CallNonvirtualLongMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualLongMethodV(this, obj, clazz, methodID, args); + } + jlong CallNonvirtualLongMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualLongMethodA(this, obj, clazz, methodID, args); + } + + jfloat CallNonvirtualFloatMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jfloat result; + va_start(args, methodID); + result = functions->CallNonvirtualFloatMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jfloat CallNonvirtualFloatMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualFloatMethodV(this, obj, clazz, methodID, args); + } + jfloat CallNonvirtualFloatMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualFloatMethodA(this, obj, clazz, methodID, args); + } + + jdouble CallNonvirtualDoubleMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + jdouble result; + va_start(args, methodID); + result = functions->CallNonvirtualDoubleMethodV(this, obj, clazz, methodID, args); + va_end(args); + return result; + } + jdouble CallNonvirtualDoubleMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + return functions->CallNonvirtualDoubleMethodV(this, obj, clazz, methodID, args); + } + jdouble CallNonvirtualDoubleMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + return functions->CallNonvirtualDoubleMethodA(this, obj, clazz, methodID, args); + } + + void CallNonvirtualVoidMethod(jobject obj, jclass clazz, jmethodID methodID, ...) { + va_list args; + va_start(args, methodID); + functions->CallNonvirtualVoidMethodV(this, obj, clazz, methodID, args); + va_end(args); + } + void CallNonvirtualVoidMethodV(jobject obj, jclass clazz, jmethodID methodID, va_list args) { + functions->CallNonvirtualVoidMethodV(this, obj, clazz, methodID, args); + } + void CallNonvirtualVoidMethodA(jobject obj, jclass clazz, jmethodID methodID, + const jvalue *args) { + functions->CallNonvirtualVoidMethodA(this, obj, clazz, methodID, args); + } + + jfieldID GetFieldID(jclass clazz, const char *name, const char *sig) { + return functions->GetFieldID(this, clazz, name, sig); + } + + jobject GetObjectField(jobject obj, jfieldID fieldID) { + return functions->GetObjectField(this, obj, fieldID); + } + jboolean GetBooleanField(jobject obj, jfieldID fieldID) { + return functions->GetBooleanField(this, obj, fieldID); + } + jbyte GetByteField(jobject obj, jfieldID fieldID) { + return functions->GetByteField(this, obj, fieldID); + } + jchar GetCharField(jobject obj, jfieldID fieldID) { + return functions->GetCharField(this, obj, fieldID); + } + jshort GetShortField(jobject obj, jfieldID fieldID) { + return functions->GetShortField(this, obj, fieldID); + } + jint GetIntField(jobject obj, jfieldID fieldID) { + return functions->GetIntField(this, obj, fieldID); + } + jlong GetLongField(jobject obj, jfieldID fieldID) { + return functions->GetLongField(this, obj, fieldID); + } + jfloat GetFloatField(jobject obj, jfieldID fieldID) { + return functions->GetFloatField(this, obj, fieldID); + } + jdouble GetDoubleField(jobject obj, jfieldID fieldID) { + return functions->GetDoubleField(this, obj, fieldID); + } + + void SetObjectField(jobject obj, jfieldID fieldID, jobject val) { + functions->SetObjectField(this, obj, fieldID, val); + } + void SetBooleanField(jobject obj, jfieldID fieldID, jboolean val) { + functions->SetBooleanField(this, obj, fieldID, val); + } + void SetByteField(jobject obj, jfieldID fieldID, jbyte val) { + functions->SetByteField(this, obj, fieldID, val); + } + void SetCharField(jobject obj, jfieldID fieldID, jchar val) { + functions->SetCharField(this, obj, fieldID, val); + } + void SetShortField(jobject obj, jfieldID fieldID, jshort val) { + functions->SetShortField(this, obj, fieldID, val); + } + void SetIntField(jobject obj, jfieldID fieldID, jint val) { + functions->SetIntField(this, obj, fieldID, val); + } + void SetLongField(jobject obj, jfieldID fieldID, jlong val) { + functions->SetLongField(this, obj, fieldID, val); + } + void SetFloatField(jobject obj, jfieldID fieldID, jfloat val) { + functions->SetFloatField(this, obj, fieldID, val); + } + void SetDoubleField(jobject obj, jfieldID fieldID, jdouble val) { + functions->SetDoubleField(this, obj, fieldID, val); + } + + jmethodID GetStaticMethodID(jclass clazz, const char *name, const char *sig) { + return functions->GetStaticMethodID(this, clazz, name, sig); + } + + jobject CallStaticObjectMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jobject result; + va_start(args, methodID); + result = functions->CallStaticObjectMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jobject CallStaticObjectMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticObjectMethodV(this, clazz, methodID, args); + } + jobject CallStaticObjectMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticObjectMethodA(this, clazz, methodID, args); + } + + jboolean CallStaticBooleanMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jboolean result; + va_start(args, methodID); + result = functions->CallStaticBooleanMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jboolean CallStaticBooleanMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticBooleanMethodV(this, clazz, methodID, args); + } + jboolean CallStaticBooleanMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticBooleanMethodA(this, clazz, methodID, args); + } + + jbyte CallStaticByteMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jbyte result; + va_start(args, methodID); + result = functions->CallStaticByteMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jbyte CallStaticByteMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticByteMethodV(this, clazz, methodID, args); + } + jbyte CallStaticByteMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticByteMethodA(this, clazz, methodID, args); + } + + jchar CallStaticCharMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jchar result; + va_start(args, methodID); + result = functions->CallStaticCharMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jchar CallStaticCharMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticCharMethodV(this, clazz, methodID, args); + } + jchar CallStaticCharMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticCharMethodA(this, clazz, methodID, args); + } + + jshort CallStaticShortMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jshort result; + va_start(args, methodID); + result = functions->CallStaticShortMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jshort CallStaticShortMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticShortMethodV(this, clazz, methodID, args); + } + jshort CallStaticShortMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticShortMethodA(this, clazz, methodID, args); + } + + jint CallStaticIntMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jint result; + va_start(args, methodID); + result = functions->CallStaticIntMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jint CallStaticIntMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticIntMethodV(this, clazz, methodID, args); + } + jint CallStaticIntMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticIntMethodA(this, clazz, methodID, args); + } + + jlong CallStaticLongMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jlong result; + va_start(args, methodID); + result = functions->CallStaticLongMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jlong CallStaticLongMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticLongMethodV(this, clazz, methodID, args); + } + jlong CallStaticLongMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticLongMethodA(this, clazz, methodID, args); + } + + jfloat CallStaticFloatMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jfloat result; + va_start(args, methodID); + result = functions->CallStaticFloatMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jfloat CallStaticFloatMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticFloatMethodV(this, clazz, methodID, args); + } + jfloat CallStaticFloatMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticFloatMethodA(this, clazz, methodID, args); + } + + jdouble CallStaticDoubleMethod(jclass clazz, jmethodID methodID, ...) { + va_list args; + jdouble result; + va_start(args, methodID); + result = functions->CallStaticDoubleMethodV(this, clazz, methodID, args); + va_end(args); + return result; + } + jdouble CallStaticDoubleMethodV(jclass clazz, jmethodID methodID, va_list args) { + return functions->CallStaticDoubleMethodV(this, clazz, methodID, args); + } + jdouble CallStaticDoubleMethodA(jclass clazz, jmethodID methodID, const jvalue *args) { + return functions->CallStaticDoubleMethodA(this, clazz, methodID, args); + } + + void CallStaticVoidMethod(jclass cls, jmethodID methodID, ...) { + va_list args; + va_start(args, methodID); + functions->CallStaticVoidMethodV(this, cls, methodID, args); + va_end(args); + } + void CallStaticVoidMethodV(jclass cls, jmethodID methodID, va_list args) { + functions->CallStaticVoidMethodV(this, cls, methodID, args); + } + void CallStaticVoidMethodA(jclass cls, jmethodID methodID, const jvalue *args) { + functions->CallStaticVoidMethodA(this, cls, methodID, args); + } + + jfieldID GetStaticFieldID(jclass clazz, const char *name, const char *sig) { + return functions->GetStaticFieldID(this, clazz, name, sig); + } + jobject GetStaticObjectField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticObjectField(this, clazz, fieldID); + } + jboolean GetStaticBooleanField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticBooleanField(this, clazz, fieldID); + } + jbyte GetStaticByteField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticByteField(this, clazz, fieldID); + } + jchar GetStaticCharField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticCharField(this, clazz, fieldID); + } + jshort GetStaticShortField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticShortField(this, clazz, fieldID); + } + jint GetStaticIntField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticIntField(this, clazz, fieldID); + } + jlong GetStaticLongField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticLongField(this, clazz, fieldID); + } + jfloat GetStaticFloatField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticFloatField(this, clazz, fieldID); + } + jdouble GetStaticDoubleField(jclass clazz, jfieldID fieldID) { + return functions->GetStaticDoubleField(this, clazz, fieldID); + } + + void SetStaticObjectField(jclass clazz, jfieldID fieldID, jobject value) { + functions->SetStaticObjectField(this, clazz, fieldID, value); + } + void SetStaticBooleanField(jclass clazz, jfieldID fieldID, jboolean value) { + functions->SetStaticBooleanField(this, clazz, fieldID, value); + } + void SetStaticByteField(jclass clazz, jfieldID fieldID, jbyte value) { + functions->SetStaticByteField(this, clazz, fieldID, value); + } + void SetStaticCharField(jclass clazz, jfieldID fieldID, jchar value) { + functions->SetStaticCharField(this, clazz, fieldID, value); + } + void SetStaticShortField(jclass clazz, jfieldID fieldID, jshort value) { + functions->SetStaticShortField(this, clazz, fieldID, value); + } + void SetStaticIntField(jclass clazz, jfieldID fieldID, jint value) { + functions->SetStaticIntField(this, clazz, fieldID, value); + } + void SetStaticLongField(jclass clazz, jfieldID fieldID, jlong value) { + functions->SetStaticLongField(this, clazz, fieldID, value); + } + void SetStaticFloatField(jclass clazz, jfieldID fieldID, jfloat value) { + functions->SetStaticFloatField(this, clazz, fieldID, value); + } + void SetStaticDoubleField(jclass clazz, jfieldID fieldID, jdouble value) { + functions->SetStaticDoubleField(this, clazz, fieldID, value); + } + + jstring NewString(const jchar *unicode, jsize len) { + return functions->NewString(this, unicode, len); + } + jsize GetStringLength(jstring str) { return functions->GetStringLength(this, str); } + const jchar *GetStringChars(jstring str, jboolean *isCopy) { + return functions->GetStringChars(this, str, isCopy); + } + void ReleaseStringChars(jstring str, const jchar *chars) { + functions->ReleaseStringChars(this, str, chars); + } + + jstring NewStringUTF(const char *utf) { return functions->NewStringUTF(this, utf); } + jsize GetStringUTFLength(jstring str) { return functions->GetStringUTFLength(this, str); } + const char *GetStringUTFChars(jstring str, jboolean *isCopy) { + return functions->GetStringUTFChars(this, str, isCopy); + } + void ReleaseStringUTFChars(jstring str, const char *chars) { + functions->ReleaseStringUTFChars(this, str, chars); + } + + jsize GetArrayLength(jarray array) { return functions->GetArrayLength(this, array); } + + jobjectArray NewObjectArray(jsize len, jclass clazz, jobject init) { + return functions->NewObjectArray(this, len, clazz, init); + } + jobject GetObjectArrayElement(jobjectArray array, jsize index) { + return functions->GetObjectArrayElement(this, array, index); + } + void SetObjectArrayElement(jobjectArray array, jsize index, jobject val) { + functions->SetObjectArrayElement(this, array, index, val); + } + + jbooleanArray NewBooleanArray(jsize len) { return functions->NewBooleanArray(this, len); } + jbyteArray NewByteArray(jsize len) { return functions->NewByteArray(this, len); } + jcharArray NewCharArray(jsize len) { return functions->NewCharArray(this, len); } + jshortArray NewShortArray(jsize len) { return functions->NewShortArray(this, len); } + jintArray NewIntArray(jsize len) { return functions->NewIntArray(this, len); } + jlongArray NewLongArray(jsize len) { return functions->NewLongArray(this, len); } + jfloatArray NewFloatArray(jsize len) { return functions->NewFloatArray(this, len); } + jdoubleArray NewDoubleArray(jsize len) { return functions->NewDoubleArray(this, len); } + + jboolean *GetBooleanArrayElements(jbooleanArray array, jboolean *isCopy) { + return functions->GetBooleanArrayElements(this, array, isCopy); + } + jbyte *GetByteArrayElements(jbyteArray array, jboolean *isCopy) { + return functions->GetByteArrayElements(this, array, isCopy); + } + jchar *GetCharArrayElements(jcharArray array, jboolean *isCopy) { + return functions->GetCharArrayElements(this, array, isCopy); + } + jshort *GetShortArrayElements(jshortArray array, jboolean *isCopy) { + return functions->GetShortArrayElements(this, array, isCopy); + } + jint *GetIntArrayElements(jintArray array, jboolean *isCopy) { + return functions->GetIntArrayElements(this, array, isCopy); + } + jlong *GetLongArrayElements(jlongArray array, jboolean *isCopy) { + return functions->GetLongArrayElements(this, array, isCopy); + } + jfloat *GetFloatArrayElements(jfloatArray array, jboolean *isCopy) { + return functions->GetFloatArrayElements(this, array, isCopy); + } + jdouble *GetDoubleArrayElements(jdoubleArray array, jboolean *isCopy) { + return functions->GetDoubleArrayElements(this, array, isCopy); + } + + void ReleaseBooleanArrayElements(jbooleanArray array, jboolean *elems, jint mode) { + functions->ReleaseBooleanArrayElements(this, array, elems, mode); + } + void ReleaseByteArrayElements(jbyteArray array, jbyte *elems, jint mode) { + functions->ReleaseByteArrayElements(this, array, elems, mode); + } + void ReleaseCharArrayElements(jcharArray array, jchar *elems, jint mode) { + functions->ReleaseCharArrayElements(this, array, elems, mode); + } + void ReleaseShortArrayElements(jshortArray array, jshort *elems, jint mode) { + functions->ReleaseShortArrayElements(this, array, elems, mode); + } + void ReleaseIntArrayElements(jintArray array, jint *elems, jint mode) { + functions->ReleaseIntArrayElements(this, array, elems, mode); + } + void ReleaseLongArrayElements(jlongArray array, jlong *elems, jint mode) { + functions->ReleaseLongArrayElements(this, array, elems, mode); + } + void ReleaseFloatArrayElements(jfloatArray array, jfloat *elems, jint mode) { + functions->ReleaseFloatArrayElements(this, array, elems, mode); + } + void ReleaseDoubleArrayElements(jdoubleArray array, jdouble *elems, jint mode) { + functions->ReleaseDoubleArrayElements(this, array, elems, mode); + } + + void GetBooleanArrayRegion(jbooleanArray array, jsize start, jsize len, jboolean *buf) { + functions->GetBooleanArrayRegion(this, array, start, len, buf); + } + void GetByteArrayRegion(jbyteArray array, jsize start, jsize len, jbyte *buf) { + functions->GetByteArrayRegion(this, array, start, len, buf); + } + void GetCharArrayRegion(jcharArray array, jsize start, jsize len, jchar *buf) { + functions->GetCharArrayRegion(this, array, start, len, buf); + } + void GetShortArrayRegion(jshortArray array, jsize start, jsize len, jshort *buf) { + functions->GetShortArrayRegion(this, array, start, len, buf); + } + void GetIntArrayRegion(jintArray array, jsize start, jsize len, jint *buf) { + functions->GetIntArrayRegion(this, array, start, len, buf); + } + void GetLongArrayRegion(jlongArray array, jsize start, jsize len, jlong *buf) { + functions->GetLongArrayRegion(this, array, start, len, buf); + } + void GetFloatArrayRegion(jfloatArray array, jsize start, jsize len, jfloat *buf) { + functions->GetFloatArrayRegion(this, array, start, len, buf); + } + void GetDoubleArrayRegion(jdoubleArray array, jsize start, jsize len, jdouble *buf) { + functions->GetDoubleArrayRegion(this, array, start, len, buf); + } + + void SetBooleanArrayRegion(jbooleanArray array, jsize start, jsize len, const jboolean *buf) { + functions->SetBooleanArrayRegion(this, array, start, len, buf); + } + void SetByteArrayRegion(jbyteArray array, jsize start, jsize len, const jbyte *buf) { + functions->SetByteArrayRegion(this, array, start, len, buf); + } + void SetCharArrayRegion(jcharArray array, jsize start, jsize len, const jchar *buf) { + functions->SetCharArrayRegion(this, array, start, len, buf); + } + void SetShortArrayRegion(jshortArray array, jsize start, jsize len, const jshort *buf) { + functions->SetShortArrayRegion(this, array, start, len, buf); + } + void SetIntArrayRegion(jintArray array, jsize start, jsize len, const jint *buf) { + functions->SetIntArrayRegion(this, array, start, len, buf); + } + void SetLongArrayRegion(jlongArray array, jsize start, jsize len, const jlong *buf) { + functions->SetLongArrayRegion(this, array, start, len, buf); + } + void SetFloatArrayRegion(jfloatArray array, jsize start, jsize len, const jfloat *buf) { + functions->SetFloatArrayRegion(this, array, start, len, buf); + } + void SetDoubleArrayRegion(jdoubleArray array, jsize start, jsize len, const jdouble *buf) { + functions->SetDoubleArrayRegion(this, array, start, len, buf); + } + + jint RegisterNatives(jclass clazz, const JNINativeMethod *methods, jint nMethods) { + return functions->RegisterNatives(this, clazz, methods, nMethods); + } + jint UnregisterNatives(jclass clazz) { return functions->UnregisterNatives(this, clazz); } + + jint MonitorEnter(jobject obj) { return functions->MonitorEnter(this, obj); } + jint MonitorExit(jobject obj) { return functions->MonitorExit(this, obj); } + + jint GetJavaVM(JavaVM **vm) { return functions->GetJavaVM(this, vm); } + + void GetStringRegion(jstring str, jsize start, jsize len, jchar *buf) { + functions->GetStringRegion(this, str, start, len, buf); + } + void GetStringUTFRegion(jstring str, jsize start, jsize len, char *buf) { + functions->GetStringUTFRegion(this, str, start, len, buf); + } + + void *GetPrimitiveArrayCritical(jarray array, jboolean *isCopy) { + return functions->GetPrimitiveArrayCritical(this, array, isCopy); + } + void ReleasePrimitiveArrayCritical(jarray array, void *carray, jint mode) { + functions->ReleasePrimitiveArrayCritical(this, array, carray, mode); + } + + const jchar *GetStringCritical(jstring string, jboolean *isCopy) { + return functions->GetStringCritical(this, string, isCopy); + } + void ReleaseStringCritical(jstring string, const jchar *cstring) { + functions->ReleaseStringCritical(this, string, cstring); + } + + jweak NewWeakGlobalRef(jobject obj) { return functions->NewWeakGlobalRef(this, obj); } + void DeleteWeakGlobalRef(jweak ref) { functions->DeleteWeakGlobalRef(this, ref); } + + jboolean ExceptionCheck() { return functions->ExceptionCheck(this); } + + jobject NewDirectByteBuffer(void *address, jlong capacity) { + return functions->NewDirectByteBuffer(this, address, capacity); + } + void *GetDirectBufferAddress(jobject buf) { return functions->GetDirectBufferAddress(this, buf); } + jlong GetDirectBufferCapacity(jobject buf) { + return functions->GetDirectBufferCapacity(this, buf); + } + jobjectRefType GetObjectRefType(jobject obj) { return functions->GetObjectRefType(this, obj); } #endif /* __cplusplus */ }; typedef struct JavaVMOption { - char *optionString; - void *extraInfo; + char *optionString; + void *extraInfo; } JavaVMOption; typedef struct JavaVMInitArgs { - jint version; + jint version; - jint nOptions; - JavaVMOption *options; - jboolean ignoreUnrecognized; + jint nOptions; + JavaVMOption *options; + jboolean ignoreUnrecognized; } JavaVMInitArgs; typedef struct JavaVMAttachArgs { - jint version; + jint version; - char *name; - jobject group; + char *name; + jobject group; } JavaVMAttachArgs; /* These will be VM-specific. */ @@ -1872,41 +1454,35 @@ typedef struct JavaVMAttachArgs { /* End VM-specific. */ struct JNIInvokeInterface_ { - void *reserved0; - void *reserved1; - void *reserved2; + void *reserved0; + void *reserved1; + void *reserved2; - jint (JNICALL *DestroyJavaVM)(JavaVM *vm); + jint(JNICALL *DestroyJavaVM)(JavaVM *vm); - jint (JNICALL *AttachCurrentThread)(JavaVM *vm, void **penv, void *args); + jint(JNICALL *AttachCurrentThread)(JavaVM *vm, void **penv, void *args); - jint (JNICALL *DetachCurrentThread)(JavaVM *vm); + jint(JNICALL *DetachCurrentThread)(JavaVM *vm); - jint (JNICALL *GetEnv)(JavaVM *vm, void **penv, jint version); + jint(JNICALL *GetEnv)(JavaVM *vm, void **penv, jint version); - jint (JNICALL *AttachCurrentThreadAsDaemon)(JavaVM *vm, void **penv, void *args); + jint(JNICALL *AttachCurrentThreadAsDaemon)(JavaVM *vm, void **penv, void *args); }; struct JavaVM_ { - const struct JNIInvokeInterface_ *functions; + const struct JNIInvokeInterface_ *functions; #ifdef __cplusplus - jint DestroyJavaVM() { - return functions->DestroyJavaVM(this); - } - jint AttachCurrentThread(void **penv, void *args) { - return functions->AttachCurrentThread(this, penv, args); - } - jint DetachCurrentThread() { - return functions->DetachCurrentThread(this); - } - - jint GetEnv(void **penv, jint version) { - return functions->GetEnv(this, penv, version); - } - jint AttachCurrentThreadAsDaemon(void **penv, void *args) { - return functions->AttachCurrentThreadAsDaemon(this, penv, args); - } + jint DestroyJavaVM() { return functions->DestroyJavaVM(this); } + jint AttachCurrentThread(void **penv, void *args) { + return functions->AttachCurrentThread(this, penv, args); + } + jint DetachCurrentThread() { return functions->DetachCurrentThread(this); } + + jint GetEnv(void **penv, jint version) { return functions->GetEnv(this, penv, version); } + jint AttachCurrentThreadAsDaemon(void **penv, void *args) { + return functions->AttachCurrentThreadAsDaemon(this, penv, args); + } #endif }; @@ -1915,21 +1491,16 @@ struct JavaVM_ { #else #define _JNI_IMPORT_OR_EXPORT_ JNIIMPORT #endif -_JNI_IMPORT_OR_EXPORT_ jint JNICALL -JNI_GetDefaultJavaVMInitArgs(void *args); +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetDefaultJavaVMInitArgs(void *args); -_JNI_IMPORT_OR_EXPORT_ jint JNICALL -JNI_CreateJavaVM(JavaVM **pvm, void **penv, void *args); +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM **pvm, void **penv, void *args); -_JNI_IMPORT_OR_EXPORT_ jint JNICALL -JNI_GetCreatedJavaVMs(JavaVM **, jsize, jsize *); +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetCreatedJavaVMs(JavaVM **, jsize, jsize *); /* Defined by native libraries. */ -JNIEXPORT jint JNICALL -JNI_OnLoad(JavaVM *vm, void *reserved); +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved); -JNIEXPORT void JNICALL -JNI_OnUnload(JavaVM *vm, void *reserved); +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved); #define JNI_VERSION_1_1 0x00010001 #define JNI_VERSION_1_2 0x00010002 @@ -1943,6 +1514,3 @@ JNI_OnUnload(JavaVM *vm, void *reserved); #pragma GCC visibility pop #endif /* !_JAVASOFT_JNI_H_ */ - - - diff --git a/src/sdk/java/native-src/jni_md.h b/src/sdk/java/native-src/jni_md.h index 721a25ea5..90c0e3efd 100644 --- a/src/sdk/java/native-src/jni_md.h +++ b/src/sdk/java/native-src/jni_md.h @@ -8,7 +8,7 @@ #ifndef _JAVASOFT_JNI_MD_H_ #define _JAVASOFT_JNI_MD_H_ -#define JNIEXPORT __attribute__ ((visibility ("default"))) +#define JNIEXPORT __attribute__((visibility("default"))) #define JNIIMPORT #define JNICALL diff --git a/src/sdk/java/native-src/jni_tera_base.cc b/src/sdk/java/native-src/jni_tera_base.cc index cef4607bb..d0bcf3708 100644 --- a/src/sdk/java/native-src/jni_tera_base.cc +++ b/src/sdk/java/native-src/jni_tera_base.cc @@ -12,43 +12,31 @@ #include "glog/logging.h" -#define NativeInitGlog \ - JNICALL Java_com_baidu_tera_client_TeraBase_nativeInitGlog -#define NativeGlog \ - JNICALL Java_com_baidu_tera_client_TeraBase_nativeGlog -#define NativeVlog \ - JNICALL Java_com_baidu_tera_client_TeraBase_nativeVlog - -JNIEXPORT void NativeInitGlog(JNIEnv *env, jobject jobj, - jstring jprefix) { - const char* prefix = - reinterpret_cast(env->GetStringUTFChars(jprefix, NULL)); - InitGlog(prefix); +#define NativeInitGlog JNICALL Java_com_baidu_tera_client_TeraBase_nativeInitGlog +#define NativeGlog JNICALL Java_com_baidu_tera_client_TeraBase_nativeGlog +#define NativeVlog JNICALL Java_com_baidu_tera_client_TeraBase_nativeVlog + +JNIEXPORT void NativeInitGlog(JNIEnv *env, jobject jobj, jstring jprefix) { + const char *prefix = reinterpret_cast(env->GetStringUTFChars(jprefix, NULL)); + InitGlog(prefix); } -JNIEXPORT void NativeGlog(JNIEnv *env, jobject jobj, - jstring jtype, - jstring jlog) { - std::string type = - reinterpret_cast(env->GetStringUTFChars(jtype, NULL)); - std::string log = - reinterpret_cast(env->GetStringUTFChars(jlog, NULL)); - - if (type == "INFO") { - LOG(INFO) << log; - } else if (type == "WARNING") { - LOG(WARNING) << log; - } else if (type == "ERROR") { - LOG(ERROR) << log; - } else { - LOG(FATAL) << log; - } +JNIEXPORT void NativeGlog(JNIEnv *env, jobject jobj, jstring jtype, jstring jlog) { + std::string type = reinterpret_cast(env->GetStringUTFChars(jtype, NULL)); + std::string log = reinterpret_cast(env->GetStringUTFChars(jlog, NULL)); + + if (type == "INFO") { + LOG(INFO) << log; + } else if (type == "WARNING") { + LOG(WARNING) << log; + } else if (type == "ERROR") { + LOG(ERROR) << log; + } else { + LOG(FATAL) << log; + } } -JNIEXPORT void NativeVlog(JNIEnv *env, jobject jobj, - jlong jlevel, - jstring jlog) { - std::string log = - reinterpret_cast(env->GetStringUTFChars(jlog, NULL)); - VLOG(jlevel) << log; +JNIEXPORT void NativeVlog(JNIEnv *env, jobject jobj, jlong jlevel, jstring jlog) { + std::string log = reinterpret_cast(env->GetStringUTFChars(jlog, NULL)); + VLOG(jlevel) << log; } diff --git a/src/sdk/java/native-src/jni_tera_base.h b/src/sdk/java/native-src/jni_tera_base.h index ba38e339c..94d52622b 100644 --- a/src/sdk/java/native-src/jni_tera_base.h +++ b/src/sdk/java/native-src/jni_tera_base.h @@ -12,24 +12,24 @@ extern "C" { * Method: nativeInitGlog * Signature: (Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraBase_nativeInitGlog - (JNIEnv *, jobject, jstring); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraBase_nativeInitGlog(JNIEnv *, jobject, jstring); /* * Class: com_baidu_tera_client_TeraBase * Method: nativeGlog * Signature: (Ljava/lang/String;Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraBase_nativeGlog - (JNIEnv *, jobject, jstring, jstring); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraBase_nativeGlog(JNIEnv *, jobject, jstring, jstring); /* * Class: com_baidu_tera_client_TeraBase * Method: nativeVlog * Signature: (JLjava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraBase_nativeVlog - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraBase_nativeVlog(JNIEnv *, jobject, jlong, jstring); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_client.cc b/src/sdk/java/native-src/jni_tera_client.cc index 4a6bd8212..9d9c083a3 100644 --- a/src/sdk/java/native-src/jni_tera_client.cc +++ b/src/sdk/java/native-src/jni_tera_client.cc @@ -12,313 +12,276 @@ #include "sdk/sdk_utils.h" #include "tera.h" -#define NativeNewClient \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeNewClient -#define NativeDeleteClient \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteClient -#define NativeCreateTable \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeCreateTable -#define NativeDeleteTable \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteTable -#define NativeEnableTable \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeEnableTable -#define NativeDisableTable \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDisableTable -#define NativeIsTableExist \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableExist -#define NativeIsTableEnabled \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEnabled -#define NativeIsTableEmpty \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEmpty -#define NativeOpenTable \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeOpenTable +#define NativeNewClient JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeNewClient +#define NativeDeleteClient JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteClient +#define NativeCreateTable JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeCreateTable +#define NativeDeleteTable JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteTable +#define NativeEnableTable JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeEnableTable +#define NativeDisableTable JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDisableTable +#define NativeIsTableExist JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableExist +#define NativeIsTableEnabled JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEnabled +#define NativeIsTableEmpty JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEmpty +#define NativeOpenTable JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeOpenTable #define NativeGetTableDescriptor \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeGetTableDescriptor -#define NativeListTables \ - JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeListTables + JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeGetTableDescriptor +#define NativeListTables JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeListTables -JNIEXPORT jlong NativeNewClient(JNIEnv *env, jobject jobj, - jstring jconfpath) { - std::string confpath = - reinterpret_cast(env->GetStringUTFChars(jconfpath, NULL)); - tera::ErrorCode error_code; - std::string msg; - tera::Client* client = tera::Client::NewClient(confpath, "teracli_java", &error_code); - if (client == NULL) { - msg = "failed to create tera client, error_code: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return 0; - } - jlong jclient_ptr = reinterpret_cast(client); - return jclient_ptr; +JNIEXPORT jlong NativeNewClient(JNIEnv* env, jobject jobj, jstring jconfpath) { + std::string confpath = reinterpret_cast(env->GetStringUTFChars(jconfpath, NULL)); + tera::ErrorCode error_code; + std::string msg; + tera::Client* client = tera::Client::NewClient(confpath, "teracli_java", &error_code); + if (client == NULL) { + msg = "failed to create tera client, error_code: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return 0; + } + jlong jclient_ptr = reinterpret_cast(client); + return jclient_ptr; } -JNIEXPORT jboolean NativeDeleteClient(JNIEnv *env, jobject jobj, - jlong jclient_ptr) { - tera::Client* client = reinterpret_cast(jclient_ptr); - if (client == NULL) { - LOG(WARNING) << "tera client not initialized."; - return JNI_TRUE; - } - delete client; +JNIEXPORT jboolean NativeDeleteClient(JNIEnv* env, jobject jobj, jlong jclient_ptr) { + tera::Client* client = reinterpret_cast(jclient_ptr); + if (client == NULL) { + LOG(WARNING) << "tera client not initialized."; return JNI_TRUE; + } + delete client; + return JNI_TRUE; } -JNIEXPORT jboolean NativeCreateTable(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename, - jstring jtableschema) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - std::string tableschema = - reinterpret_cast(env->GetStringUTFChars(jtableschema, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean NativeCreateTable(JNIEnv* env, jobject jobj, jlong jclient_ptr, + jstring jtablename, jstring jtableschema) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + std::string tableschema = + reinterpret_cast(env->GetStringUTFChars(jtableschema, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - tera::TableDescriptor desc(tablename); - if (!ParseTableSchema(tableschema, &desc, &error_code)) { - msg = "failed to parse input table schema."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - if (!client->CreateTable(desc, &error_code)) { - msg = "failed to create table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - return JNI_TRUE; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + tera::TableDescriptor desc(tablename); + if (!ParseTableSchema(tableschema, &desc, &error_code)) { + msg = "failed to parse input table schema."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + if (!client->CreateTable(desc, &error_code)) { + msg = "failed to create table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteTable(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeDeleteTable(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - if (!client->DeleteTable(tablename, &error_code)) { - msg = "failed to delete table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - return JNI_TRUE; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + if (!client->DeleteTable(tablename, &error_code)) { + msg = "failed to delete table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + return JNI_TRUE; } -JNIEXPORT jboolean NativeEnableTable(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeEnableTable(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - if (!client->EnableTable(tablename, &error_code)) { - msg = "failed to enable table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - return JNI_TRUE; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + if (!client->EnableTable(tablename, &error_code)) { + msg = "failed to enable table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + return JNI_TRUE; } -JNIEXPORT jboolean NativeDisableTable(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeDisableTable(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - if (!client->DisableTable(tablename, &error_code)) { - msg = "failed to disable table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - return JNI_TRUE; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + if (!client->DisableTable(tablename, &error_code)) { + msg = "failed to disable table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + return JNI_TRUE; } -JNIEXPORT jboolean NativeIsTableExist(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeIsTableExist(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - bool ret = client->IsTableExist(tablename, &error_code); - if (ret) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + bool ret = client->IsTableExist(tablename, &error_code); + if (ret) { + return JNI_TRUE; + } else { + return JNI_FALSE; + } } -JNIEXPORT jboolean NativeIsTableEnabled(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeIsTableEnabled(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - bool ret = client->IsTableEnabled(tablename, &error_code); - if (ret) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + bool ret = client->IsTableEnabled(tablename, &error_code); + if (ret) { + return JNI_TRUE; + } else { + return JNI_FALSE; + } } -JNIEXPORT jboolean NativeIsTableEmpty(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; +JNIEXPORT jboolean +NativeIsTableEmpty(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - bool ret = client->IsTableEmpty(tablename, &error_code); - if (ret) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + bool ret = client->IsTableEmpty(tablename, &error_code); + if (ret) { + return JNI_TRUE; + } else { + return JNI_FALSE; + } } -JNIEXPORT jlong NativeOpenTable(JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - tera::Table* table; - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return 0; - } - table = client->OpenTable(tablename, &error_code); - if (table == NULL) { - msg = "failed to open table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return 0; - } - jlong jtable = reinterpret_cast(table); - return jtable; +JNIEXPORT jlong NativeOpenTable(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + tera::Table* table; + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return 0; + } + table = client->OpenTable(tablename, &error_code); + if (table == NULL) { + msg = "failed to open table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return 0; + } + jlong jtable = reinterpret_cast(table); + return jtable; } -JNIEXPORT jstring NativeGetTableDescriptor (JNIEnv *env, jobject jobj, - jlong jclient_ptr, - jstring jtablename) { - tera::Client* client = reinterpret_cast(jclient_ptr); - std::string tablename = - reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); - tera::ErrorCode error_code; - std::string msg; - jstring jdesc; +JNIEXPORT jstring +NativeGetTableDescriptor(JNIEnv* env, jobject jobj, jlong jclient_ptr, jstring jtablename) { + tera::Client* client = reinterpret_cast(jclient_ptr); + std::string tablename = reinterpret_cast(env->GetStringUTFChars(jtablename, NULL)); + tera::ErrorCode error_code; + std::string msg; + jstring jdesc; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } - tera::TableInfo table_info = {NULL, ""}; - if (!client->List(tablename, &table_info, NULL, &error_code)) { - LOG(ERROR) << "fail to get meta data from tera."; - return NULL; - } - if (table_info.table_desc == NULL) { - return NULL; - } - std::string schema; - int cf_num = table_info.table_desc->ColumnFamilyNum(); - for (int cf_no = 0; cf_no < cf_num; ++cf_no) { - const tera::ColumnFamilyDescriptor* cf_desc = - table_info.table_desc->ColumnFamily(cf_no); - if (cf_no > 0) { - schema.append(","); - } - schema.append(cf_desc->Name()); + tera::TableInfo table_info = {NULL, ""}; + if (!client->List(tablename, &table_info, NULL, &error_code)) { + LOG(ERROR) << "fail to get meta data from tera."; + return NULL; + } + if (table_info.table_desc == NULL) { + return NULL; + } + std::string schema; + int cf_num = table_info.table_desc->ColumnFamilyNum(); + for (int cf_no = 0; cf_no < cf_num; ++cf_no) { + const tera::ColumnFamilyDescriptor* cf_desc = table_info.table_desc->ColumnFamily(cf_no); + if (cf_no > 0) { + schema.append(","); } + schema.append(cf_desc->Name()); + } - jdesc = env->NewStringUTF(schema.data()); - return jdesc; + jdesc = env->NewStringUTF(schema.data()); + return jdesc; } -JNIEXPORT jobjectArray NativeListTables(JNIEnv *env, jobject jobj, - jlong jclient_ptr) { - tera::Client* client = reinterpret_cast(jclient_ptr); - tera::ErrorCode error_code; - std::string msg; - if (client == NULL) { - msg = "tera client not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::vector table_list; - if (!client->List(&table_list, &error_code)) { - msg = "failed to list tables"; - SendErrorJ(env, jobj, msg); - } - jobjectArray jtable_list; - int32_t table_num = table_list.size(); - if (table_num <= 0) { - return NULL; - } - jtable_list = (jobjectArray)env->NewObjectArray(table_num, - env->FindClass("Ljava/lang/String;"), - NULL); - for (int32_t i = 0; i < table_num; ++i) { - tera::TableDescriptor* desc = table_list[i].table_desc; - std::string schema = ConvertDescToString(desc); - delete desc; - env->SetObjectArrayElement(jtable_list, i, env->NewStringUTF(schema.c_str())); - } +JNIEXPORT jobjectArray NativeListTables(JNIEnv* env, jobject jobj, jlong jclient_ptr) { + tera::Client* client = reinterpret_cast(jclient_ptr); + tera::ErrorCode error_code; + std::string msg; + if (client == NULL) { + msg = "tera client not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::vector table_list; + if (!client->List(&table_list, &error_code)) { + msg = "failed to list tables"; + SendErrorJ(env, jobj, msg); + } + jobjectArray jtable_list; + int32_t table_num = table_list.size(); + if (table_num <= 0) { + return NULL; + } + jtable_list = + (jobjectArray)env->NewObjectArray(table_num, env->FindClass("Ljava/lang/String;"), NULL); + for (int32_t i = 0; i < table_num; ++i) { + tera::TableDescriptor* desc = table_list[i].table_desc; + std::string schema = ConvertDescToString(desc); + delete desc; + env->SetObjectArrayElement(jtable_list, i, env->NewStringUTF(schema.c_str())); + } - return jtable_list; + return jtable_list; } diff --git a/src/sdk/java/native-src/jni_tera_client.h b/src/sdk/java/native-src/jni_tera_client.h index fe5e9aa61..59167feaf 100644 --- a/src/sdk/java/native-src/jni_tera_client.h +++ b/src/sdk/java/native-src/jni_tera_client.h @@ -12,96 +12,98 @@ extern "C" { * Method: nativeNewClient * Signature: (Ljava/lang/String;)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeNewClient - (JNIEnv *, jobject, jstring); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeNewClient(JNIEnv *, jobject, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeDeleteClient * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteClient - (JNIEnv *, jobject, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteClient(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeCreateTable * Signature: (JLjava/lang/String;Ljava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeCreateTable - (JNIEnv *, jobject, jlong, jstring, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeCreateTable(JNIEnv *, jobject, jlong, jstring, + jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeDeleteTable * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteTable - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeDeleteTable(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeEnableTable * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeEnableTable - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeEnableTable(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeDisableTable * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeDisableTable - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeDisableTable(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeIsTableExist * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableExist - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableExist(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeIsTableEnabled * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEnabled - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEnabled(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeIsTableEmpty * Signature: (JLjava/lang/String;)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEmpty - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeIsTableEmpty(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeOpenTable * Signature: (JLjava/lang/String;)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeOpenTable - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeOpenTable(JNIEnv *, jobject, jlong, jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeGetTableDescriptor * Signature: (JLjava/lang/String;)Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeGetTableDescriptor - (JNIEnv *, jobject, jlong, jstring); +JNIEXPORT jstring JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeGetTableDescriptor(JNIEnv *, jobject, jlong, + jstring); /* * Class: com_baidu_tera_client_TeraClientImpl * Method: nativeListTables * Signature: (J)[Ljava/lang/String; */ -JNIEXPORT jobjectArray JNICALL Java_com_baidu_tera_client_TeraClientImpl_nativeListTables - (JNIEnv *, jobject, jlong); +JNIEXPORT jobjectArray JNICALL +Java_com_baidu_tera_client_TeraClientImpl_nativeListTables(JNIEnv *, jobject, jlong); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_common.cc b/src/sdk/java/native-src/jni_tera_common.cc index 0c43cc53b..e41eb058c 100644 --- a/src/sdk/java/native-src/jni_tera_common.cc +++ b/src/sdk/java/native-src/jni_tera_common.cc @@ -18,64 +18,55 @@ DECLARE_string(log_dir); DECLARE_string(tera_user_identity); DECLARE_string(tera_user_passcode); -void SendErrorJ(JNIEnv *env, jobject jobj, std::string msg) { - jclass jc = env->GetObjectClass(jobj); - jstring jmsg = env->NewStringUTF(msg.c_str()); - env->SetObjectField(jobj, - env->GetFieldID(jc, "nativeMsg", "Ljava/lang/String;"), - jmsg); - VLOG(10) << msg; +void SendErrorJ(JNIEnv* env, jobject jobj, std::string msg) { + jclass jc = env->GetObjectClass(jobj); + jstring jmsg = env->NewStringUTF(msg.c_str()); + env->SetObjectField(jobj, env->GetFieldID(jc, "nativeMsg", "Ljava/lang/String;"), jmsg); + VLOG(10) << msg; } void InitFlags(std::string confpath) { - // init FLAGS_flagfile - if (IsExist(confpath)) { - FLAGS_flagfile = confpath; - } else if (IsExist("./tera.flag")) { - FLAGS_flagfile = "./tera.flag"; - } else { - LOG(FATAL) << "tera.flag not exist, job failed."; - } + // init FLAGS_flagfile + if (IsExist(confpath)) { + FLAGS_flagfile = confpath; + } else if (IsExist("./tera.flag")) { + FLAGS_flagfile = "./tera.flag"; + } else { + LOG(FATAL) << "tera.flag not exist, job failed."; + } - // init user identity & role - std::string cur_identity = tera::utils::GetValueFromEnv("USER"); - if (cur_identity.empty()) { - cur_identity = "other"; - } - if (FLAGS_tera_user_identity.empty()) { - FLAGS_tera_user_identity = cur_identity; - } + // init user identity & role + std::string cur_identity = tera::utils::GetValueFromEnv("USER"); + if (cur_identity.empty()) { + cur_identity = "other"; + } + if (FLAGS_tera_user_identity.empty()) { + FLAGS_tera_user_identity = cur_identity; + } - // init log dir - if (FLAGS_log_dir.empty()) { - FLAGS_log_dir = "./"; - } - ::google::ReadFromFlagsFile(FLAGS_flagfile, NULL, true); - LOG(INFO) << "USER = " << FLAGS_tera_user_identity; - LOG(INFO) << "Load config file: " << FLAGS_flagfile; + // init log dir + if (FLAGS_log_dir.empty()) { + FLAGS_log_dir = "./"; + } + ::google::ReadFromFlagsFile(FLAGS_flagfile, NULL, true); + LOG(INFO) << "USER = " << FLAGS_tera_user_identity; + LOG(INFO) << "Load config file: " << FLAGS_flagfile; } -void InitGlog(std::string prefix) { - ::google::InitGoogleLogging(prefix.c_str()); -} - -std::string ConvertDescToString(tera::TableDescriptor* desc) { - return "desc"; +void InitGlog(std::string prefix) { ::google::InitGoogleLogging(prefix.c_str()); } -} +std::string ConvertDescToString(tera::TableDescriptor* desc) { return "desc"; } -void JByteArrayToString(JNIEnv *env, jbyteArray& jbarray, std::string* str) { - const char* str_ptr = - reinterpret_cast(env->GetByteArrayElements(jbarray, 0)); - size_t str_len = - static_cast(env->GetArrayLength(jbarray)); - str->assign(str_ptr, str_len); +void JByteArrayToString(JNIEnv* env, jbyteArray& jbarray, std::string* str) { + const char* str_ptr = reinterpret_cast(env->GetByteArrayElements(jbarray, 0)); + size_t str_len = static_cast(env->GetArrayLength(jbarray)); + str->assign(str_ptr, str_len); } -void StringToJByteArray(JNIEnv *env, const std::string& str, jbyteArray* jbarray) { - size_t str_len = str.size(); - *jbarray = env->NewByteArray(str_len); - jbyte* buffer = env->GetByteArrayElements(*jbarray, 0); - memcpy(buffer, str.data(), str_len); - env->SetByteArrayRegion(*jbarray, 0, str_len, buffer); +void StringToJByteArray(JNIEnv* env, const std::string& str, jbyteArray* jbarray) { + size_t str_len = str.size(); + *jbarray = env->NewByteArray(str_len); + jbyte* buffer = env->GetByteArrayElements(*jbarray, 0); + memcpy(buffer, str.data(), str_len); + env->SetByteArrayRegion(*jbarray, 0, str_len, buffer); } diff --git a/src/sdk/java/native-src/jni_tera_common.h b/src/sdk/java/native-src/jni_tera_common.h index deb209ceb..d0d9bc9eb 100644 --- a/src/sdk/java/native-src/jni_tera_common.h +++ b/src/sdk/java/native-src/jni_tera_common.h @@ -14,7 +14,7 @@ #include "tera.h" -void SendErrorJ(JNIEnv *env, jobject jobj, std::string msg); +void SendErrorJ(JNIEnv* env, jobject jobj, std::string msg); void InitFlags(std::string confpath); @@ -22,8 +22,8 @@ void InitGlog(std::string prefix); std::string ConvertDescToString(tera::TableDescriptor* desc); -void JByteArrayToString(JNIEnv *env, jbyteArray& jbarray, std::string* str); +void JByteArrayToString(JNIEnv* env, jbyteArray& jbarray, std::string* str); -void StringToJByteArray(JNIEnv *env, const std::string& str, jbyteArray* jbarray); +void StringToJByteArray(JNIEnv* env, const std::string& str, jbyteArray* jbarray); -#endif // _JAVATERA_NATIVE_SRC_JNI_TERA_COMMON_H_ +#endif // _JAVATERA_NATIVE_SRC_JNI_TERA_COMMON_H_ diff --git a/src/sdk/java/native-src/jni_tera_mutation.cc b/src/sdk/java/native-src/jni_tera_mutation.cc index c6b0267cb..c80c624b7 100644 --- a/src/sdk/java/native-src/jni_tera_mutation.cc +++ b/src/sdk/java/native-src/jni_tera_mutation.cc @@ -8,105 +8,90 @@ #include "tera.h" -#define NativeAdd \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeAdd -#define NativeDeleteRow \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteRow -#define NativeDeleteFamily \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteFamily -#define NativeDeleteColumn \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumn -#define NativeDeleteColumns \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumns +#define NativeAdd JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeAdd +#define NativeDeleteRow JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteRow +#define NativeDeleteFamily JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteFamily +#define NativeDeleteColumn JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumn +#define NativeDeleteColumns JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumns #define NativeDeleteMutation \ - JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteMutation + JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteMutation -JNIEXPORT jboolean NativeAdd(JNIEnv *env, jobject jobj, - jlong jmutation, - jbyteArray jfamily, - jbyteArray jqualifier, - jbyteArray jvalue) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation == NULL) { - std::string msg = "mutation not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family, qualifier, value; - JByteArrayToString(env, jfamily, &family); - JByteArrayToString(env, jqualifier, &qualifier); - JByteArrayToString(env, jvalue, &value); - mutation->Put(family, qualifier, value); - return JNI_TRUE; +JNIEXPORT jboolean NativeAdd(JNIEnv* env, jobject jobj, jlong jmutation, jbyteArray jfamily, + jbyteArray jqualifier, jbyteArray jvalue) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); + if (mutation == NULL) { + std::string msg = "mutation not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family, qualifier, value; + JByteArrayToString(env, jfamily, &family); + JByteArrayToString(env, jqualifier, &qualifier); + JByteArrayToString(env, jvalue, &value); + mutation->Put(family, qualifier, value); + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteRow(JNIEnv *env, jobject jobj, - jlong jmutation) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation == NULL) { - std::string msg = "mutation not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - mutation->DeleteRow(); - return JNI_TRUE; +JNIEXPORT jboolean NativeDeleteRow(JNIEnv* env, jobject jobj, jlong jmutation) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); + if (mutation == NULL) { + std::string msg = "mutation not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + mutation->DeleteRow(); + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteFamily(JNIEnv *env, jobject jobj, - jlong jmutation, - jbyteArray jfamily) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation == NULL) { - std::string msg = "mutation not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family; - JByteArrayToString(env, jfamily, &family); - mutation->DeleteFamily(family); - return JNI_TRUE; +JNIEXPORT jboolean +NativeDeleteFamily(JNIEnv* env, jobject jobj, jlong jmutation, jbyteArray jfamily) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); + if (mutation == NULL) { + std::string msg = "mutation not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family; + JByteArrayToString(env, jfamily, &family); + mutation->DeleteFamily(family); + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteColumn(JNIEnv *env, jobject jobj, - jlong jmutation, - jbyteArray jfamily, - jbyteArray jcolumn) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation == NULL) { - std::string msg = "mutation not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family, column; - JByteArrayToString(env, jfamily, &family); - JByteArrayToString(env, jcolumn, &column); - mutation->DeleteColumn(family, column); - return JNI_TRUE; +JNIEXPORT jboolean NativeDeleteColumn(JNIEnv* env, jobject jobj, jlong jmutation, + jbyteArray jfamily, jbyteArray jcolumn) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); + if (mutation == NULL) { + std::string msg = "mutation not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family, column; + JByteArrayToString(env, jfamily, &family); + JByteArrayToString(env, jcolumn, &column); + mutation->DeleteColumn(family, column); + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteColumns(JNIEnv *env, jobject jobj, - jlong jmutation, - jbyteArray jfamily, - jbyteArray jcolumn) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation == NULL) { - std::string msg = "mutation not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family, column; - JByteArrayToString(env, jfamily, &family); - JByteArrayToString(env, jcolumn, &column); - mutation->DeleteColumns(family, column); - return JNI_TRUE; +JNIEXPORT jboolean NativeDeleteColumns(JNIEnv* env, jobject jobj, jlong jmutation, + jbyteArray jfamily, jbyteArray jcolumn) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); + if (mutation == NULL) { + std::string msg = "mutation not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family, column; + JByteArrayToString(env, jfamily, &family); + JByteArrayToString(env, jcolumn, &column); + mutation->DeleteColumns(family, column); + return JNI_TRUE; } -JNIEXPORT jboolean NativeDeleteMutation(JNIEnv *env, jobject jobj, - jlong jmutation) { - tera::RowMutation* mutation = reinterpret_cast(jmutation); +JNIEXPORT jboolean NativeDeleteMutation(JNIEnv* env, jobject jobj, jlong jmutation) { + tera::RowMutation* mutation = reinterpret_cast(jmutation); - if (mutation != NULL) { - delete mutation; - } - return JNI_TRUE; + if (mutation != NULL) { + delete mutation; + } + return JNI_TRUE; } diff --git a/src/sdk/java/native-src/jni_tera_mutation.h b/src/sdk/java/native-src/jni_tera_mutation.h index 5c4dceb23..1204c9b28 100644 --- a/src/sdk/java/native-src/jni_tera_mutation.h +++ b/src/sdk/java/native-src/jni_tera_mutation.h @@ -12,40 +12,44 @@ extern "C" { * Method: nativeAdd * Signature: (J[B[B[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeAdd - (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraMutationImpl_nativeAdd(JNIEnv *, jobject, jlong, jbyteArray, + jbyteArray, jbyteArray); /* * Class: com_baidu_tera_client_TeraMutationImpl * Method: nativeDeleteRow * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteRow - (JNIEnv *, jobject, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteRow(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraMutationImpl * Method: nativeDeleteFamily * Signature: (J[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteFamily - (JNIEnv *, jobject, jlong, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteFamily(JNIEnv *, jobject, jlong, + jbyteArray); /* * Class: com_baidu_tera_client_TeraMutationImpl * Method: nativeDeleteColumn * Signature: (J[B[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumn - (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumn(JNIEnv *, jobject, jlong, jbyteArray, + jbyteArray); /* * Class: com_baidu_tera_client_TeraMutationImpl * Method: nativeDeleteColumns * Signature: (J[B[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumns - (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraMutationImpl_nativeDeleteColumns(JNIEnv *, jobject, jlong, + jbyteArray, jbyteArray); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_reader.cc b/src/sdk/java/native-src/jni_tera_reader.cc index ef5a30360..79ebbbd60 100644 --- a/src/sdk/java/native-src/jni_tera_reader.cc +++ b/src/sdk/java/native-src/jni_tera_reader.cc @@ -8,52 +8,43 @@ #include "tera.h" -#define NativeAddColumn \ - JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddColumn -#define NativeAddFamily \ - JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddFamily -#define NativeDeleteReader \ - JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeDeleteReader - -JNIEXPORT jboolean NativeAddColumn(JNIEnv *env, jobject jobj, - jlong jreader, - jbyteArray jfamily, - jbyteArray jcolumn) { - tera::RowReader* reader = reinterpret_cast(jreader); - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family, column; - JByteArrayToString(env, jfamily, &family); - JByteArrayToString(env, jcolumn, &column); - reader->AddColumn(family, column); - return JNI_TRUE; +#define NativeAddColumn JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddColumn +#define NativeAddFamily JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddFamily +#define NativeDeleteReader JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeDeleteReader + +JNIEXPORT jboolean +NativeAddColumn(JNIEnv* env, jobject jobj, jlong jreader, jbyteArray jfamily, jbyteArray jcolumn) { + tera::RowReader* reader = reinterpret_cast(jreader); + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family, column; + JByteArrayToString(env, jfamily, &family); + JByteArrayToString(env, jcolumn, &column); + reader->AddColumn(family, column); + return JNI_TRUE; } -JNIEXPORT jboolean NativeAddFamily(JNIEnv *env, jobject jobj, - jlong jreader, - jbyteArray jfamily) { - tera::RowReader* reader = reinterpret_cast(jreader); - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family; - JByteArrayToString(env, jfamily, &family); - reader->AddColumnFamily(family); - return JNI_TRUE; +JNIEXPORT jboolean NativeAddFamily(JNIEnv* env, jobject jobj, jlong jreader, jbyteArray jfamily) { + tera::RowReader* reader = reinterpret_cast(jreader); + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family; + JByteArrayToString(env, jfamily, &family); + reader->AddColumnFamily(family); + return JNI_TRUE; } +JNIEXPORT jboolean NativeDeleteReader(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); -JNIEXPORT jboolean NativeDeleteReader(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - - if (reader != NULL) { - delete reader; - } - return JNI_TRUE; + if (reader != NULL) { + delete reader; + } + return JNI_TRUE; } diff --git a/src/sdk/java/native-src/jni_tera_reader.h b/src/sdk/java/native-src/jni_tera_reader.h index e10f114f6..ee439d3ee 100644 --- a/src/sdk/java/native-src/jni_tera_reader.h +++ b/src/sdk/java/native-src/jni_tera_reader.h @@ -12,24 +12,25 @@ extern "C" { * Method: nativeAddColumn * Signature: (J[B[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddColumn - (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraReaderImpl_nativeAddColumn(JNIEnv *, jobject, jlong, jbyteArray, + jbyteArray); /* * Class: com_baidu_tera_client_TeraReaderImpl * Method: nativeAddFamily * Signature: (J[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeAddFamily - (JNIEnv *, jobject, jlong, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraReaderImpl_nativeAddFamily(JNIEnv *, jobject, jlong, jbyteArray); /* * Class: com_baidu_tera_client_TeraReaderImpl * Method: nativeDeleteReader * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraReaderImpl_nativeDeleteReader - (JNIEnv *, jobject, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraReaderImpl_nativeDeleteReader(JNIEnv *, jobject, jlong); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_result.cc b/src/sdk/java/native-src/jni_tera_result.cc index 6f71513a8..10ee8dc83 100644 --- a/src/sdk/java/native-src/jni_tera_result.cc +++ b/src/sdk/java/native-src/jni_tera_result.cc @@ -6,112 +6,98 @@ #include "jni_tera_result.h" #include "jni_tera_common.h" -#define NativeReaderDone \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderDone -#define NativeReaderNext \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderNext -#define NativeGetRow \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetRow -#define NativeGetFamily \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetFamily -#define NativeGetColumn \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetColumn -#define NativeGetTimeStamp \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetTimeStamp -#define NativeGetValue \ - JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetValue +#define NativeReaderDone JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderDone +#define NativeReaderNext JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderNext +#define NativeGetRow JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetRow +#define NativeGetFamily JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetFamily +#define NativeGetColumn JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetColumn +#define NativeGetTimeStamp JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetTimeStamp +#define NativeGetValue JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetValue -JNIEXPORT jboolean NativeReaderDone(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_TRUE; - } - if (reader->Done()) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } +JNIEXPORT jboolean NativeReaderDone(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_TRUE; + } + if (reader->Done()) { + return JNI_TRUE; + } else { + return JNI_FALSE; + } } -JNIEXPORT void NativeReaderNext(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return; - } - reader->Next(); +JNIEXPORT void NativeReaderNext(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return; + } + reader->Next(); } -JNIEXPORT jbyteArray NativeGetRow(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - jbyteArray jrow; - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string row = reader->RowName(); - StringToJByteArray(env, row, &jrow); - return jrow; +JNIEXPORT jbyteArray NativeGetRow(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + jbyteArray jrow; + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string row = reader->RowName(); + StringToJByteArray(env, row, &jrow); + return jrow; } -JNIEXPORT jbyteArray NativeGetFamily(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - jbyteArray jfamily; - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string family = reader->Family(); - StringToJByteArray(env, family, &jfamily); - return jfamily; +JNIEXPORT jbyteArray NativeGetFamily(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + jbyteArray jfamily; + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string family = reader->Family(); + StringToJByteArray(env, family, &jfamily); + return jfamily; } -JNIEXPORT jbyteArray NativeGetColumn(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - jbyteArray jcolumn; - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string column = reader->Qualifier(); - StringToJByteArray(env, column, &jcolumn); - return jcolumn; +JNIEXPORT jbyteArray NativeGetColumn(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + jbyteArray jcolumn; + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string column = reader->Qualifier(); + StringToJByteArray(env, column, &jcolumn); + return jcolumn; } -JNIEXPORT jlong NativeGetTimeStamp(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - jlong ts = -1; - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return ts; - } - ts = reader->Timestamp(); +JNIEXPORT jlong NativeGetTimeStamp(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + jlong ts = -1; + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); return ts; + } + ts = reader->Timestamp(); + return ts; } -JNIEXPORT jbyteArray NativeGetValue(JNIEnv *env, jobject jobj, - jlong jreader) { - tera::RowReader* reader = reinterpret_cast(jreader); - jbyteArray jvalue; - if (reader == NULL) { - std::string msg = "reader not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string value = reader->Value(); - StringToJByteArray(env, value, &jvalue); - return jvalue; +JNIEXPORT jbyteArray NativeGetValue(JNIEnv* env, jobject jobj, jlong jreader) { + tera::RowReader* reader = reinterpret_cast(jreader); + jbyteArray jvalue; + if (reader == NULL) { + std::string msg = "reader not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string value = reader->Value(); + StringToJByteArray(env, value, &jvalue); + return jvalue; } diff --git a/src/sdk/java/native-src/jni_tera_result.h b/src/sdk/java/native-src/jni_tera_result.h index 70d037af8..87dedad04 100644 --- a/src/sdk/java/native-src/jni_tera_result.h +++ b/src/sdk/java/native-src/jni_tera_result.h @@ -12,56 +12,56 @@ extern "C" { * Method: nativeReaderDone * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderDone - (JNIEnv *, jobject, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeReaderDone(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeReaderNext * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeReaderNext - (JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeReaderNext(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeGetRow * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetRow - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeGetRow(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeGetFamily * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetFamily - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeGetFamily(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeGetColumn * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetColumn - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeGetColumn(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeGetTimeStamp * Signature: (J)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetTimeStamp - (JNIEnv *, jobject, jlong); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeGetTimeStamp(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraResultImpl * Method: nativeGetValue * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_TeraResultImpl_nativeGetValue - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_TeraResultImpl_nativeGetValue(JNIEnv *, jobject, jlong); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_result_stream.cc b/src/sdk/java/native-src/jni_tera_result_stream.cc index accf87289..da81db47d 100644 --- a/src/sdk/java/native-src/jni_tera_result_stream.cc +++ b/src/sdk/java/native-src/jni_tera_result_stream.cc @@ -8,117 +8,112 @@ #include "tera.h" -#define NativeDone \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDone -#define NativeNext \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeNext -#define NativeGetRow \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetRow -#define NativeGetFamily \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetFamily -#define NativeGetColumn \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetColumn +#define NativeDone JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDone +#define NativeNext JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeNext +#define NativeGetRow JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetRow +#define NativeGetFamily JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetFamily +#define NativeGetColumn JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetColumn #define NativeGetTimeStamp \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetTimeStamp -#define NativeGetValue \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetValue + JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetTimeStamp +#define NativeGetValue JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetValue #define NativeDeleteResultStream \ - JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDeleteResultStream + JNICALL \ + Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDeleteResultStream -JNIEXPORT jboolean NativeDone(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_TRUE; - } - if (result->Done()) { - return JNI_TRUE; - } else { - return JNI_FALSE; - } +JNIEXPORT jboolean NativeDone(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_TRUE; + } + if (result->Done()) { + return JNI_TRUE; + } else { + return JNI_FALSE; + } } -JNIEXPORT void NativeNext(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return; - } - result->Next(); +JNIEXPORT void NativeNext(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return; + } + result->Next(); } -JNIEXPORT jbyteArray NativeGetRow(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - jbyteArray jrow; - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string row = result->RowName(); - StringToJByteArray(env, row, &jrow); - return jrow; +JNIEXPORT jbyteArray NativeGetRow(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + jbyteArray jrow; + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string row = result->RowName(); + StringToJByteArray(env, row, &jrow); + return jrow; } -JNIEXPORT jbyteArray NativeGetFamily(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - jbyteArray jfamily; - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string family = result->Family(); - StringToJByteArray(env, family, &jfamily); - return jfamily; +JNIEXPORT jbyteArray NativeGetFamily(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + jbyteArray jfamily; + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string family = result->Family(); + StringToJByteArray(env, family, &jfamily); + return jfamily; } -JNIEXPORT jbyteArray NativeGetColumn(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - jbyteArray jcolumn; - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string column = result->Qualifier(); - StringToJByteArray(env, column, &jcolumn); - return jcolumn; +JNIEXPORT jbyteArray NativeGetColumn(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + jbyteArray jcolumn; + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string column = result->Qualifier(); + StringToJByteArray(env, column, &jcolumn); + return jcolumn; } -JNIEXPORT jlong NativeGetTimeStamp(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - jlong ts = -1; - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return ts; - } - ts = result->Timestamp(); +JNIEXPORT jlong NativeGetTimeStamp(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + jlong ts = -1; + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); return ts; + } + ts = result->Timestamp(); + return ts; } -JNIEXPORT jbyteArray NativeGetValue(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - jbyteArray jvalue; - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return NULL; - } - std::string value = result->Value(); - StringToJByteArray(env, value, &jvalue); - return jvalue; +JNIEXPORT jbyteArray NativeGetValue(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + jbyteArray jvalue; + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return NULL; + } + std::string value = result->Value(); + StringToJByteArray(env, value, &jvalue); + return jvalue; } -JNIEXPORT void NativeDeleteResultStream(JNIEnv *env, jobject jobj, jlong jresult) { - tera::ResultStream* result = reinterpret_cast(jresult); - if (result == NULL) { - std::string msg = "result not initialized."; - SendErrorJ(env, jobj, msg); - return; - } - delete result; +JNIEXPORT void NativeDeleteResultStream(JNIEnv* env, jobject jobj, jlong jresult) { + tera::ResultStream* result = reinterpret_cast(jresult); + if (result == NULL) { + std::string msg = "result not initialized."; + SendErrorJ(env, jobj, msg); + return; + } + delete result; } diff --git a/src/sdk/java/native-src/jni_tera_result_stream.h b/src/sdk/java/native-src/jni_tera_result_stream.h index a86bb6ef5..99bf8b865 100644 --- a/src/sdk/java/native-src/jni_tera_result_stream.h +++ b/src/sdk/java/native-src/jni_tera_result_stream.h @@ -12,64 +12,64 @@ extern "C" { * Method: nativeDone * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDone - (JNIEnv *, jobject, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDone(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeNext * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeNext - (JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeNext(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeGetRow * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetRow - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetRow(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeGetFamily * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetFamily - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetFamily(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeGetColumn * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetColumn - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetColumn(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeGetTimeStamp * Signature: (J)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetTimeStamp - (JNIEnv *, jobject, jlong); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetTimeStamp(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeGetValue * Signature: (J)[B */ -JNIEXPORT jbyteArray JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetValue - (JNIEnv *, jobject, jlong); +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeGetValue(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_ScanResultStreamImpl * Method: nativeDeleteResultStream * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDeleteResultStream - (JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_ScanResultStreamImpl_nativeDeleteResultStream(JNIEnv *, jobject, jlong); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_scan.cc b/src/sdk/java/native-src/jni_tera_scan.cc index af6d29c28..7ddee822f 100644 --- a/src/sdk/java/native-src/jni_tera_scan.cc +++ b/src/sdk/java/native-src/jni_tera_scan.cc @@ -8,65 +8,55 @@ #include "tera.h" -#define NativeNewScanDesc \ - JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeNewScanDesc -#define NativeAddFamily \ - JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddFamily -#define NativeAddColumn \ - JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddColumn -#define NativeDeleteScanDesc \ - JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeDeleteScanDesc - -JNIEXPORT jlong NativeNewScanDesc(JNIEnv *env, jobject jobj, - jbyteArray jstartkey, - jbyteArray jendkey) { - std::string startkey, endkey; - JByteArrayToString(env, jstartkey, &startkey); - JByteArrayToString(env, jendkey, &endkey); - - tera::ScanDescriptor* desc = new tera::ScanDescriptor(startkey); - desc->SetEnd(endkey); - - jlong jdesc = reinterpret_cast(desc); - return jdesc; +#define NativeNewScanDesc JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeNewScanDesc +#define NativeAddFamily JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddFamily +#define NativeAddColumn JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddColumn +#define NativeDeleteScanDesc JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeDeleteScanDesc + +JNIEXPORT jlong +NativeNewScanDesc(JNIEnv* env, jobject jobj, jbyteArray jstartkey, jbyteArray jendkey) { + std::string startkey, endkey; + JByteArrayToString(env, jstartkey, &startkey); + JByteArrayToString(env, jendkey, &endkey); + + tera::ScanDescriptor* desc = new tera::ScanDescriptor(startkey); + desc->SetEnd(endkey); + + jlong jdesc = reinterpret_cast(desc); + return jdesc; } -JNIEXPORT jboolean NativeAddFamily(JNIEnv *env, jobject jobj, - jlong jdesc, - jbyteArray jfamily) { - tera::ScanDescriptor* desc = reinterpret_cast(jdesc); - if (desc == NULL) { - std::string msg = "descriptor not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - std::string family; - JByteArrayToString(env, jfamily, &family); - desc->AddColumnFamily(family); - return JNI_TRUE; +JNIEXPORT jboolean NativeAddFamily(JNIEnv* env, jobject jobj, jlong jdesc, jbyteArray jfamily) { + tera::ScanDescriptor* desc = reinterpret_cast(jdesc); + if (desc == NULL) { + std::string msg = "descriptor not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + std::string family; + JByteArrayToString(env, jfamily, &family); + desc->AddColumnFamily(family); + return JNI_TRUE; } -JNIEXPORT jboolean NativeAddColumn(JNIEnv *env, jobject jobj, - jlong jdesc, - jbyteArray jfamily, - jbyteArray jcolumn) { - tera::ScanDescriptor* desc = reinterpret_cast(jdesc); - if (desc == NULL) { - std::string msg = "descriptor not initialized."; - SendErrorJ(env, jobj, msg); - return false; - } - std::string family, column; - JByteArrayToString(env, jfamily, &family); - JByteArrayToString(env, jcolumn, &column); - desc->AddColumn(family, column); - return true; +JNIEXPORT jboolean +NativeAddColumn(JNIEnv* env, jobject jobj, jlong jdesc, jbyteArray jfamily, jbyteArray jcolumn) { + tera::ScanDescriptor* desc = reinterpret_cast(jdesc); + if (desc == NULL) { + std::string msg = "descriptor not initialized."; + SendErrorJ(env, jobj, msg); + return false; + } + std::string family, column; + JByteArrayToString(env, jfamily, &family); + JByteArrayToString(env, jcolumn, &column); + desc->AddColumn(family, column); + return true; } -JNIEXPORT void NativeDeleteScanDesc(JNIEnv *env, jobject jobj, - jlong jdesc) { - tera::ScanDescriptor* desc = reinterpret_cast(jdesc); - if (desc != NULL) { - delete desc; - } +JNIEXPORT void NativeDeleteScanDesc(JNIEnv* env, jobject jobj, jlong jdesc) { + tera::ScanDescriptor* desc = reinterpret_cast(jdesc); + if (desc != NULL) { + delete desc; + } } diff --git a/src/sdk/java/native-src/jni_tera_scan.h b/src/sdk/java/native-src/jni_tera_scan.h index 8cdb3e420..7fcc61e91 100644 --- a/src/sdk/java/native-src/jni_tera_scan.h +++ b/src/sdk/java/native-src/jni_tera_scan.h @@ -12,32 +12,34 @@ extern "C" { * Method: nativeNewScanDesc * Signature: ([B[B)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeNewScanDesc - (JNIEnv *, jobject, jbyteArray, jbyteArray); +JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeNewScanDesc(JNIEnv *, jobject, + jbyteArray, + jbyteArray); /* * Class: com_baidu_tera_client_TeraScanImpl * Method: nativeAddFamily * Signature: (J[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddFamily - (JNIEnv *, jobject, jlong, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraScanImpl_nativeAddFamily(JNIEnv *, jobject, jlong, jbyteArray); /* * Class: com_baidu_tera_client_TeraScanImpl * Method: nativeAddColumn * Signature: (J[B[B)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeAddColumn - (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraScanImpl_nativeAddColumn(JNIEnv *, jobject, jlong, jbyteArray, + jbyteArray); /* * Class: com_baidu_tera_client_TeraScanImpl * Method: nativeDeleteScanDesc * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraScanImpl_nativeDeleteScanDesc - (JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraScanImpl_nativeDeleteScanDesc(JNIEnv *, jobject, jlong); #ifdef __cplusplus } diff --git a/src/sdk/java/native-src/jni_tera_table.cc b/src/sdk/java/native-src/jni_tera_table.cc index 35ac8d312..5dda1afc9 100644 --- a/src/sdk/java/native-src/jni_tera_table.cc +++ b/src/sdk/java/native-src/jni_tera_table.cc @@ -12,216 +12,182 @@ #include "tera.h" -#define NativePut \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativePut -#define NativeGet \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeGet -#define NativeNewMutation \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewMutation -#define NativeApplyMutation \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyMutation -#define NativeFlushCommits \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeFlushCommits -#define NativeNewReader \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewReader -#define NativeApplyReader \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyReader -#define NativeScan \ - JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeScan - -JNIEXPORT jboolean NativePut(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jstring jrowkey, - jstring jfamily, - jstring jqualifier, - jstring jvalue, - jlong timestamp) { - tera::Table* table = reinterpret_cast(jtable_ptr); - std::string rowkey = - reinterpret_cast(env->GetStringUTFChars(jrowkey, NULL)); - std::string family = - reinterpret_cast(env->GetStringUTFChars(jfamily, NULL)); - std::string qualifier = - reinterpret_cast(env->GetStringUTFChars(jqualifier, NULL)); - std::string value = - reinterpret_cast(env->GetStringUTFChars(jvalue, NULL)); - tera::ErrorCode error_code; - std::string msg; - - if (table == NULL) { - msg = "table not initialized."; - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - - if (timestamp == 0) { - if (!table->Put(rowkey, family, qualifier, value, &error_code)) { - msg = "failed to put record to table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - } else { - if (!table->Put(rowkey, family, qualifier, value, timestamp, &error_code)) { - msg = "failed to put record to table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return JNI_FALSE; - } - } - - return JNI_TRUE; +#define NativePut JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativePut +#define NativeGet JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeGet +#define NativeNewMutation JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewMutation +#define NativeApplyMutation JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyMutation +#define NativeFlushCommits JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeFlushCommits +#define NativeNewReader JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewReader +#define NativeApplyReader JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyReader +#define NativeScan JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeScan + +JNIEXPORT jboolean NativePut(JNIEnv* env, jobject jobj, jlong jtable_ptr, jstring jrowkey, + jstring jfamily, jstring jqualifier, jstring jvalue, jlong timestamp) { + tera::Table* table = reinterpret_cast(jtable_ptr); + std::string rowkey = reinterpret_cast(env->GetStringUTFChars(jrowkey, NULL)); + std::string family = reinterpret_cast(env->GetStringUTFChars(jfamily, NULL)); + std::string qualifier = reinterpret_cast(env->GetStringUTFChars(jqualifier, NULL)); + std::string value = reinterpret_cast(env->GetStringUTFChars(jvalue, NULL)); + tera::ErrorCode error_code; + std::string msg; + + if (table == NULL) { + msg = "table not initialized."; + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + + if (timestamp == 0) { + if (!table->Put(rowkey, family, qualifier, value, &error_code)) { + msg = "failed to put record to table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + } else { + if (!table->Put(rowkey, family, qualifier, value, timestamp, &error_code)) { + msg = "failed to put record to table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return JNI_FALSE; + } + } + + return JNI_TRUE; } -JNIEXPORT jstring NativeGet(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jstring jrowkey, - jstring jfamily, - jstring jqualifier, - jlong timestamp) { - tera::Table* table = reinterpret_cast(jtable_ptr); - std::string rowkey = - reinterpret_cast(env->GetStringUTFChars(jrowkey, NULL)); - std::string family = - reinterpret_cast(env->GetStringUTFChars(jfamily, NULL)); - std::string qualifier = - reinterpret_cast(env->GetStringUTFChars(jqualifier, NULL)); - std::string value; - tera::ErrorCode error_code; - jstring jvalue; - if (table == NULL) { - SendErrorJ(env, jobj, "table not initialized."); - return NULL; - } - - if (!table->Get(rowkey, family, qualifier, &value, &error_code)) { - std::string msg = "failed to get record from table, reason: " + error_code.GetReason(); - SendErrorJ(env, jobj, msg); - return NULL; - } - - jvalue = env->NewStringUTF(value.c_str()); - return jvalue; +JNIEXPORT jstring NativeGet(JNIEnv* env, jobject jobj, jlong jtable_ptr, jstring jrowkey, + jstring jfamily, jstring jqualifier, jlong timestamp) { + tera::Table* table = reinterpret_cast(jtable_ptr); + std::string rowkey = reinterpret_cast(env->GetStringUTFChars(jrowkey, NULL)); + std::string family = reinterpret_cast(env->GetStringUTFChars(jfamily, NULL)); + std::string qualifier = reinterpret_cast(env->GetStringUTFChars(jqualifier, NULL)); + std::string value; + tera::ErrorCode error_code; + jstring jvalue; + if (table == NULL) { + SendErrorJ(env, jobj, "table not initialized."); + return NULL; + } + + if (!table->Get(rowkey, family, qualifier, &value, &error_code)) { + std::string msg = "failed to get record from table, reason: " + error_code.GetReason(); + SendErrorJ(env, jobj, msg); + return NULL; + } + + jvalue = env->NewStringUTF(value.c_str()); + return jvalue; } -JNIEXPORT jlong NativeNewMutation(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jbyteArray jrowkey) { - tera::Table* table = reinterpret_cast(jtable_ptr); - tera::ErrorCode error_code; - std::string rowkey; - - if (table == NULL) { - SendErrorJ(env, jobj, "table not initialized."); - return 0; - } - JByteArrayToString(env, jrowkey, &rowkey); - tera::RowMutation* mutation = table->NewRowMutation(rowkey); - if (mutation == NULL) { - SendErrorJ(env, jobj, "failed to new mutation: "); - return 0; - } - jlong jmutation_ptr = reinterpret_cast(mutation); - return jmutation_ptr; +JNIEXPORT jlong NativeNewMutation(JNIEnv* env, jobject jobj, jlong jtable_ptr, jbyteArray jrowkey) { + tera::Table* table = reinterpret_cast(jtable_ptr); + tera::ErrorCode error_code; + std::string rowkey; + + if (table == NULL) { + SendErrorJ(env, jobj, "table not initialized."); + return 0; + } + JByteArrayToString(env, jrowkey, &rowkey); + tera::RowMutation* mutation = table->NewRowMutation(rowkey); + if (mutation == NULL) { + SendErrorJ(env, jobj, "failed to new mutation: "); + return 0; + } + jlong jmutation_ptr = reinterpret_cast(mutation); + return jmutation_ptr; } void MutationCallBack(tera::RowMutation* mutation) { - const tera::ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "exception occured, reason:" << error_code.GetReason(); - } + const tera::ErrorCode& error_code = mutation->GetError(); + if (error_code.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "exception occured, reason:" << error_code.GetReason(); + } - delete mutation; + delete mutation; } -JNIEXPORT jboolean NativeApplyMutation(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jlong jmutation_ptr) { - tera::Table* table = reinterpret_cast(jtable_ptr); - tera::RowMutation* mutation = reinterpret_cast(jmutation_ptr); - tera::ErrorCode error_code; - - if (table == NULL || mutation == NULL) { - SendErrorJ(env, jobj, "table or mutation not initialized."); - return JNI_FALSE; - } - if (mutation->MutationNum() <= 0) { - return JNI_TRUE; - } - -// mutation->SetCallBack(MutationCallBack); - table->ApplyMutation(mutation); +JNIEXPORT jboolean +NativeApplyMutation(JNIEnv* env, jobject jobj, jlong jtable_ptr, jlong jmutation_ptr) { + tera::Table* table = reinterpret_cast(jtable_ptr); + tera::RowMutation* mutation = reinterpret_cast(jmutation_ptr); + tera::ErrorCode error_code; + + if (table == NULL || mutation == NULL) { + SendErrorJ(env, jobj, "table or mutation not initialized."); + return JNI_FALSE; + } + if (mutation->MutationNum() <= 0) { return JNI_TRUE; + } + + // mutation->SetCallBack(MutationCallBack); + table->ApplyMutation(mutation); + return JNI_TRUE; } -JNIEXPORT void NativeFlushCommits (JNIEnv *env, jobject jobj, - jlong jtable_ptr) { - tera::Table* table = reinterpret_cast(jtable_ptr); - tera::ErrorCode error_code; +JNIEXPORT void NativeFlushCommits(JNIEnv* env, jobject jobj, jlong jtable_ptr) { + tera::Table* table = reinterpret_cast(jtable_ptr); + tera::ErrorCode error_code; - if (table == NULL) { - SendErrorJ(env, jobj, "table not initialized."); - return; - } + if (table == NULL) { + SendErrorJ(env, jobj, "table not initialized."); + return; + } - while (!table->IsPutFinished()) { - LOG(INFO) << "wainting for flush finished .."; - usleep(100000); - } + while (!table->IsPutFinished()) { + LOG(INFO) << "wainting for flush finished .."; + usleep(100000); + } } -JNIEXPORT jlong NativeNewReader(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jbyteArray jrowkey) { - tera::Table* table = reinterpret_cast(jtable_ptr); - tera::ErrorCode error_code; - std::string rowkey; - - if (table == NULL) { - SendErrorJ(env, jobj, "table not initialized."); - return 0; - } - JByteArrayToString(env, jrowkey, &rowkey); - tera::RowReader* reader = table->NewRowReader(rowkey); - if (reader == NULL) { - SendErrorJ(env, jobj, "failed to new reader: "); - return 0; - } - jlong jreader_ptr = reinterpret_cast(reader); - return jreader_ptr; +JNIEXPORT jlong NativeNewReader(JNIEnv* env, jobject jobj, jlong jtable_ptr, jbyteArray jrowkey) { + tera::Table* table = reinterpret_cast(jtable_ptr); + tera::ErrorCode error_code; + std::string rowkey; + + if (table == NULL) { + SendErrorJ(env, jobj, "table not initialized."); + return 0; + } + JByteArrayToString(env, jrowkey, &rowkey); + tera::RowReader* reader = table->NewRowReader(rowkey); + if (reader == NULL) { + SendErrorJ(env, jobj, "failed to new reader: "); + return 0; + } + jlong jreader_ptr = reinterpret_cast(reader); + return jreader_ptr; } -JNIEXPORT jlong NativeApplyReader(JNIEnv *env, jobject jobj, - jlong jtable_ptr, - jlong jreader_ptr) { - tera::Table* table = reinterpret_cast(jtable_ptr); - tera::RowReader* reader = reinterpret_cast(jreader_ptr); - tera::ErrorCode error_code; +JNIEXPORT jlong NativeApplyReader(JNIEnv* env, jobject jobj, jlong jtable_ptr, jlong jreader_ptr) { + tera::Table* table = reinterpret_cast(jtable_ptr); + tera::RowReader* reader = reinterpret_cast(jreader_ptr); + tera::ErrorCode error_code; - if (table == NULL || reader == NULL) { - SendErrorJ(env, jobj, "table or reader not initialized."); - return 0; - } - table->Get(reader); + if (table == NULL || reader == NULL) { + SendErrorJ(env, jobj, "table or reader not initialized."); + return 0; + } + table->Get(reader); - return reinterpret_cast(reader); + return reinterpret_cast(reader); } -JNIEXPORT jlong NativeScan(JNIEnv *env, jobject jobj, - jlong jtable, - jlong jdesc) { - tera::Table* table = reinterpret_cast(jtable); - tera::ScanDescriptor* desc = reinterpret_cast(jdesc); - tera::ErrorCode error_code; +JNIEXPORT jlong NativeScan(JNIEnv* env, jobject jobj, jlong jtable, jlong jdesc) { + tera::Table* table = reinterpret_cast(jtable); + tera::ScanDescriptor* desc = reinterpret_cast(jdesc); + tera::ErrorCode error_code; - if (table == NULL || desc == NULL) { - SendErrorJ(env, jobj, "table or scan descriptor not initialized."); - return 0; - } + if (table == NULL || desc == NULL) { + SendErrorJ(env, jobj, "table or scan descriptor not initialized."); + return 0; + } - tera::ResultStream* result_stream; - if ((result_stream = table->Scan(*desc, &error_code)) == NULL) { - LOG(ERROR) << "fail to scan records from table: " << table->GetName(); - return 0; - } + tera::ResultStream* result_stream; + if ((result_stream = table->Scan(*desc, &error_code)) == NULL) { + LOG(ERROR) << "fail to scan records from table: " << table->GetName(); + return 0; + } - return reinterpret_cast(result_stream); + return reinterpret_cast(result_stream); } diff --git a/src/sdk/java/native-src/jni_tera_table.h b/src/sdk/java/native-src/jni_tera_table.h index 4c9bcafa2..35b9b97a3 100644 --- a/src/sdk/java/native-src/jni_tera_table.h +++ b/src/sdk/java/native-src/jni_tera_table.h @@ -10,66 +10,70 @@ extern "C" { /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativePut - * Signature: (JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;J)Z + * Signature: + * (JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;J)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativePut - (JNIEnv *, jobject, jlong, jstring, jstring, jstring, jstring, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativePut(JNIEnv *, jobject, jlong, jstring, jstring, + jstring, jstring, jlong); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeGet - * Signature: (JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/String; + * Signature: + * (JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeGet - (JNIEnv *, jobject, jlong, jstring, jstring, jstring, jlong); +JNIEXPORT jstring JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeGet(JNIEnv *, jobject, jlong, jstring, jstring, + jstring, jlong); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeNewMutation * Signature: (J[B)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewMutation - (JNIEnv *, jobject, jlong, jbyteArray); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeNewMutation(JNIEnv *, jobject, jlong, jbyteArray); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeApplyMutation * Signature: (JJ)Z */ -JNIEXPORT jboolean JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyMutation - (JNIEnv *, jobject, jlong, jlong); +JNIEXPORT jboolean JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeApplyMutation(JNIEnv *, jobject, jlong, jlong); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeFlushCommits * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeFlushCommits - (JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeFlushCommits(JNIEnv *, jobject, jlong); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeNewReader * Signature: (J[B)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeNewReader - (JNIEnv *, jobject, jlong, jbyteArray); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeNewReader(JNIEnv *, jobject, jlong, jbyteArray); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeApplyReader * Signature: (JJ)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeApplyReader - (JNIEnv *, jobject, jlong, jlong); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeApplyReader(JNIEnv *, jobject, jlong, jlong); /* * Class: com_baidu_tera_client_TeraTableImpl * Method: nativeScan * Signature: (JJ)J */ -JNIEXPORT jlong JNICALL Java_com_baidu_tera_client_TeraTableImpl_nativeScan - (JNIEnv *, jobject, jlong, jlong); +JNIEXPORT jlong JNICALL +Java_com_baidu_tera_client_TeraTableImpl_nativeScan(JNIEnv *, jobject, jlong, jlong); #ifdef __cplusplus } diff --git a/src/sdk/murmur_hash_impl.cc b/src/sdk/murmur_hash_impl.cc new file mode 100644 index 000000000..f23cb53f8 --- /dev/null +++ b/src/sdk/murmur_hash_impl.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "tera/hash.h" +#include +#include + +namespace tera { +/* + Murmurhash from http://sites.google.com/site/murmurhash/ + All code is released to the public domain. For business purposes, Murmurhash + is under the MIT license. +*/ +static uint64_t MurmurHash64A(const void *key, int len, unsigned int seed) { + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len / 8); + + while (data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char *data2 = (const unsigned char *)data; + + switch (len & 7) { + case 7: + h ^= ((uint64_t)data2[6]) << 48; // fallthrough + case 6: + h ^= ((uint64_t)data2[5]) << 40; // fallthrough + case 5: + h ^= ((uint64_t)data2[4]) << 32; // fallthrough + case 4: + h ^= ((uint64_t)data2[3]) << 24; // fallthrough + case 3: + h ^= ((uint64_t)data2[2]) << 16; // fallthrough + case 2: + h ^= ((uint64_t)data2[1]) << 8; // fallthrough + case 1: + h ^= ((uint64_t)data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +std::string MurmurHash(const std::string &user_key) { + const static unsigned int kSeed = 823; + std::stringstream ss; + ss << std::setw(16) << std::setfill('0') << std::hex + << MurmurHash64A(&user_key[0], user_key.size(), kSeed); + return ss.str(); +} +} diff --git a/src/sdk/mutate_impl.cc b/src/sdk/mutate_impl.cc index 634fb3817..504ecff02 100644 --- a/src/sdk/mutate_impl.cc +++ b/src/sdk/mutate_impl.cc @@ -5,6 +5,7 @@ #include "common/base/string_format.h" #include "io/coding.h" #include "sdk/mutate_impl.h" +#include "sdk/sdk_utils.h" #include "common/timer.h" namespace tera { @@ -16,456 +17,379 @@ RowMutationImpl::RowMutationImpl(Table* table, const std::string& row_key) callback_(NULL), user_context_(NULL), timeout_ms_(0), - retry_times_(0), finish_(false), finish_cond_(&finish_mutex_), commit_times_(0), on_finish_callback_(NULL), start_ts_(get_micros()), txn_(NULL) { - SetErrorIfInvalid(row_key, kRowkey); + SetMutationErrorIfInvalid(row_key, FieldType::kRowkey, &error_code_); } -RowMutationImpl::~RowMutationImpl() { -} +RowMutationImpl::~RowMutationImpl() {} void RowMutationImpl::Prepare(StatCallback cb) { - on_finish_callback_ = cb; - start_ts_ = get_micros(); + on_finish_callback_ = cb; + start_ts_ = get_micros(); } /// 重置,复用前必须调用 void RowMutationImpl::Reset(const std::string& row_key) { - row_key_ = row_key; - mu_seq_.clear(); - callback_ = NULL; - timeout_ms_ = 0; - retry_times_ = 0; - finish_ = false; - error_code_.SetFailed(ErrorCode::kOK); - commit_times_ = 0; -} - -void RowMutationImpl::SetErrorIfInvalid(const std::string& str, - const FieldLimit& field) { - std::string reason = error_code_.GetReason(); - switch (field) { - case kRowkey: { - if (str.size() >= kRowkeySize) { - reason.append(" Bad parameters: rowkey should < 64KB"); - error_code_.SetFailed(ErrorCode::kBadParam, reason); - } - } break; - case kQualifier: { - if (str.size() >= kQualifierSize) { - reason.append(" Bad parameters: qualifier should < 64KB"); - error_code_.SetFailed(ErrorCode::kBadParam, reason); - } - } break; - case kValue: { - if (str.size() >= kValueSize) { - reason.append(" Bad parameters: value should < 32MB"); - error_code_.SetFailed(ErrorCode::kBadParam, reason); - } - } break; - default: { - abort(); - } - } + row_key_ = row_key; + mu_seq_.clear(); + callback_ = NULL; + SdkTask::ResetRetryTimes(); + timeout_ms_ = 0; + finish_ = false; + error_code_.SetFailed(ErrorCode::kOK); + commit_times_ = 0; } // 原子加一个Cell void RowMutationImpl::AddInt64(const std::string& family, const std::string& qualifier, const int64_t delta) { - SetErrorIfInvalid(qualifier, kQualifier); - std::string delta_str((char*)&delta, sizeof(int64_t)); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kAddInt64; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = get_micros(); // 为了避免retry引起的重复加,所以自带时间戳 - mutation.value.assign(delta_str); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + std::string delta_str((char*)&delta, sizeof(int64_t)); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kAddInt64; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = get_micros(); // 为了避免retry引起的重复加,所以自带时间戳 + mutation.value.assign(delta_str); } // 原子加一个Cell void RowMutationImpl::Add(const std::string& family, const std::string& qualifier, const int64_t delta) { - SetErrorIfInvalid(qualifier, kQualifier); - char delta_buf[sizeof(int64_t)]; - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kAdd; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = get_micros(); // 为了避免retry引起的重复加,所以自带时间戳 - io::EncodeBigEndian(delta_buf, delta); - mutation.value.assign(delta_buf, sizeof(delta_buf)); + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + char delta_buf[sizeof(int64_t)]; + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kAdd; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = get_micros(); // 为了避免retry引起的重复加,所以自带时间戳 + io::EncodeBigEndian(delta_buf, delta); + mutation.value.assign(delta_buf, sizeof(delta_buf)); } // 原子操作:如果不存在才能Put成功 void RowMutationImpl::PutIfAbsent(const std::string& family, const std::string& qualifier, const std::string& value) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kPutIfAbsent; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = kLatestTimestamp; - mutation.value = value; + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kPutIfAbsent; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = kLatestTimestamp; + mutation.value = value; } void RowMutationImpl::Append(const std::string& family, const std::string& qualifier, const std::string& value) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kAppend; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = get_micros(); - mutation.value = value; + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kAppend; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = get_micros(); + mutation.value = value; } /// 修改一个列 void RowMutationImpl::Put(const std::string& family, const std::string& qualifier, const int64_t value, int64_t timestamp) { - std::string value_str((char*)&value, sizeof(int64_t)); - Put(family, qualifier, value_str, timestamp); + std::string value_str((char*)&value, sizeof(int64_t)); + Put(family, qualifier, value_str, timestamp); } /// 带TTL修改一个列 void RowMutationImpl::Put(const std::string& family, const std::string& qualifier, const std::string& value, int32_t ttl) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kPut; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = kLatestTimestamp; - mutation.value = value; - mutation.ttl = ttl; + SetMutationErrorIfInvalid(family, FieldType::kKVColumnFamily, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kKVQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kPut; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = kLatestTimestamp; + mutation.value = value; + mutation.ttl = ttl; } /// 修改一个列的特定版本 void RowMutationImpl::Put(const std::string& family, const std::string& qualifier, const std::string& value, int64_t timestamp) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kPut; - mutation.family = family; - mutation.qualifier = qualifier; - if (timestamp == -1) { - mutation.timestamp = kLatestTimestamp; - } else { - mutation.timestamp = timestamp; - } - mutation.value = value; - mutation.ttl = -1; + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kPut; + mutation.family = family; + mutation.qualifier = qualifier; + if (timestamp == -1) { + mutation.timestamp = kLatestTimestamp; + } else { + mutation.timestamp = timestamp; + } + mutation.value = value; + mutation.ttl = -1; } void RowMutationImpl::Put(const std::string& family, const std::string& qualifier, int64_t timestamp, const std::string& value) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kPut; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = timestamp; - mutation.value = value; - mutation.ttl = -1; + Put(family, qualifier, value, timestamp); } /// 带TTL的修改一个列的特定版本 void RowMutationImpl::Put(const std::string& family, const std::string& qualifier, int64_t timestamp, const std::string& value, int32_t ttl) { - SetErrorIfInvalid(qualifier, kQualifier); - SetErrorIfInvalid(value, kValue); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kPut; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = timestamp; - mutation.value = value; - mutation.ttl = ttl; + SetMutationErrorIfInvalid(family, FieldType::kKVColumnFamily, &error_code_); + SetMutationErrorIfInvalid(qualifier, FieldType::kKVQualifier, &error_code_); + SetMutationErrorIfInvalid(value, FieldType::kValue, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kPut; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = timestamp; + mutation.value = value; + mutation.ttl = ttl; } /// 修改默认列 void RowMutationImpl::Put(const int64_t value) { - std::string value_str((char*)&value, sizeof(int64_t)); - Put(value_str, -1); + std::string value_str((char*)&value, sizeof(int64_t)); + Put(value_str, -1); } /// 带TTL的修改默认列 void RowMutationImpl::Put(const std::string& value, int32_t ttl) { - Put("", "", kLatestTimestamp, value, ttl); + Put("", "", kLatestTimestamp, value, ttl); } /// 修改默认列的特定版本 void RowMutationImpl::Put(int64_t timestamp, const std::string& value) { - Put("", "", timestamp, value); + Put("", "", timestamp, value); } /// 删除一个列的最新版本 -void RowMutationImpl::DeleteColumn(const std::string& family, - const std::string& qualifier) { - DeleteColumn(family, qualifier, kLatestTimestamp); +void RowMutationImpl::DeleteColumn(const std::string& family, const std::string& qualifier) { + DeleteColumn(family, qualifier, kLatestTimestamp); } /// 删除一个列的指定版本 -void RowMutationImpl::DeleteColumn(const std::string& family, - const std::string& qualifier, +void RowMutationImpl::DeleteColumn(const std::string& family, const std::string& qualifier, int64_t timestamp) { - SetErrorIfInvalid(qualifier, kQualifier); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kDeleteColumn; - mutation.family = family; - mutation.qualifier = qualifier; - mutation.timestamp = timestamp; - - /* - mutation.set_type(kDeleteColumn); - mutation.set_family(family); - mutation.set_qualifier(qualifier); - mutation.set_timestamp(timestamp); - */ + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kDeleteColumn; + mutation.family = family; + mutation.qualifier = qualifier; + mutation.timestamp = timestamp; } /// 删除一个列的指定范围版本 -void RowMutationImpl::DeleteColumns(const std::string& family, - const std::string& qualifier, +void RowMutationImpl::DeleteColumns(const std::string& family, const std::string& qualifier, int64_t timestamp) { - SetErrorIfInvalid(qualifier, kQualifier); - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kDeleteColumns; - mutation.family = family; - mutation.qualifier = qualifier; - if (timestamp == -1) { - mutation.timestamp = kLatestTimestamp; - } else { - mutation.timestamp = timestamp; - } + SetMutationErrorIfInvalid(qualifier, FieldType::kQualifier, &error_code_); + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kDeleteColumns; + mutation.family = family; + mutation.qualifier = qualifier; + if (timestamp == -1) { + mutation.timestamp = kLatestTimestamp; + } else { + mutation.timestamp = timestamp; + } } /// 删除一个列族的所有列的指定范围版本 -void RowMutationImpl::DeleteFamily(const std::string& family, - int64_t timestamp) { - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kDeleteFamily; - mutation.family = family; - if (timestamp == -1) { - mutation.timestamp = kLatestTimestamp; - } else { - mutation.timestamp = timestamp; - } +void RowMutationImpl::DeleteFamily(const std::string& family, int64_t timestamp) { + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kDeleteFamily; + mutation.family = family; + if (timestamp == -1) { + mutation.timestamp = kLatestTimestamp; + } else { + mutation.timestamp = timestamp; + } } /// 删除整行的指定范围版本 void RowMutationImpl::DeleteRow(int64_t timestamp) { - RowMutation::Mutation& mutation = AddMutation(); - mutation.type = RowMutation::kDeleteRow; - if (timestamp == -1) { - mutation.timestamp = kLatestTimestamp; - } else { - mutation.timestamp = timestamp; - } + RowMutation::Mutation& mutation = AddMutation(); + mutation.type = RowMutation::kDeleteRow; + if (timestamp == -1) { + mutation.timestamp = kLatestTimestamp; + } else { + mutation.timestamp = timestamp; + } } /// 修改锁住的行, 必须提供行锁 -void RowMutationImpl::SetLock(RowLock* rowlock) { -} +void RowMutationImpl::SetLock(RowLock* rowlock) {} /// 设置超时时间(只影响当前操作,不影响Table::SetWriteTimeout设置的默认写超时) -void RowMutationImpl::SetTimeOut(int64_t timeout_ms) { - timeout_ms_ = timeout_ms; -} +void RowMutationImpl::SetTimeOut(int64_t timeout_ms) { timeout_ms_ = timeout_ms; } -int64_t RowMutationImpl::TimeOut() { - return timeout_ms_; -} +int64_t RowMutationImpl::TimeOut() { return timeout_ms_; } /// 设置异步回调, 操作会异步返回 -void RowMutationImpl::SetCallBack(RowMutation::Callback callback) { - callback_ = callback; -} +void RowMutationImpl::SetCallBack(RowMutation::Callback callback) { callback_ = callback; } /// 获得回调函数 -RowMutation::Callback RowMutationImpl::GetCallBack() { - return callback_; -} +RowMutation::Callback RowMutationImpl::GetCallBack() { return callback_; } /// 设置用户上下文,可在回调函数中获取 -void RowMutationImpl::SetContext(void* context) { - user_context_ = context; -} +void RowMutationImpl::SetContext(void* context) { user_context_ = context; } /// 获得用户上下文 -void* RowMutationImpl::GetContext() { - return user_context_; -} +void* RowMutationImpl::GetContext() { return user_context_; } /// 获得结果错误码 -const ErrorCode& RowMutationImpl::GetError() { - return error_code_; -} +const ErrorCode& RowMutationImpl::GetError() { return error_code_; } /// 是否异步操作 -bool RowMutationImpl::IsAsync() { - return (callback_ != NULL); -} +bool RowMutationImpl::IsAsync() { return (callback_ != NULL); } /// 异步操作是否完成 bool RowMutationImpl::IsFinished() const { - MutexLock lock(&finish_mutex_); - return finish_; + MutexLock lock(&finish_mutex_); + return finish_; } /// 返回row_key -const std::string& RowMutationImpl::RowKey() { - return row_key_; +const std::string& RowMutationImpl::RowKey() { return row_key_; } + +std::string RowMutationImpl::InternalRowKey() { + if (table_ && table_->IsHashTable()) { + return table_->GetHashMethod()(RowKey()); + } + return RowKey(); } /// mutation数量 -uint32_t RowMutationImpl::MutationNum() { - return mu_seq_.size(); -} +uint32_t RowMutationImpl::MutationNum() { return mu_seq_.size(); } /// mutation总大小 uint32_t RowMutationImpl::Size() { - uint32_t total_size = 0; - for (size_t i = 0; i < mu_seq_.size(); ++i) { - total_size += - + row_key_.size() - + mu_seq_[i].family.size() - + mu_seq_[i].qualifier.size() - + mu_seq_[i].value.size() - + sizeof(mu_seq_[i].timestamp); - } - return total_size; + uint32_t total_size = 0; + for (size_t i = 0; i < mu_seq_.size(); ++i) { + total_size += +InternalRowKey().size() + mu_seq_[i].family.size() + + mu_seq_[i].qualifier.size() + mu_seq_[i].value.size() + + sizeof(mu_seq_[i].timestamp); + } + return total_size; } /// 返回mutation -const RowMutation::Mutation& RowMutationImpl::GetMutation(uint32_t index) { - return mu_seq_[index]; -} - -/// 重试次数 -uint32_t RowMutationImpl::RetryTimes() { - return retry_times_; -} - -/// 重试计数加一 -void RowMutationImpl::IncRetryTimes() { - retry_times_++; -} +const RowMutation::Mutation& RowMutationImpl::GetMutation(uint32_t index) { return mu_seq_[index]; } /// 设置错误码 -void RowMutationImpl::SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) { - error_code_.SetFailed(err, reason); +void RowMutationImpl::SetError(ErrorCode::ErrorCodeType err, const std::string& reason) { + error_code_.SetFailed(err, reason); } /// 等待结束 void RowMutationImpl::Wait() { - MutexLock lock(&finish_mutex_); - while (!finish_) { - finish_cond_.Wait(); - } + MutexLock lock(&finish_mutex_); + while (!finish_) { + finish_cond_.Wait(); + } } void RowMutationImpl::RunCallback() { - // staticstic - if (on_finish_callback_) { - on_finish_callback_(table_, this); - } - if (callback_) { - callback_(this); - } else { - MutexLock lock(&finish_mutex_); - finish_ = true; - finish_cond_.Signal(); - } + // staticstic + if (on_finish_callback_) { + on_finish_callback_(table_, this); + } + if (callback_) { + callback_(this); + } else { + MutexLock lock(&finish_mutex_); + finish_ = true; + finish_cond_.Signal(); + } } void RowMutationImpl::Concatenate(RowMutationImpl& row_mu) { - uint32_t mutation_num = row_mu.MutationNum(); - for (size_t i = 0; i < mutation_num; i++) { - AddMutation() = row_mu.GetMutation(i); - } + uint32_t mutation_num = row_mu.MutationNum(); + for (size_t i = 0; i < mutation_num; i++) { + AddMutation() = row_mu.GetMutation(i); + } } RowMutation::Mutation& RowMutationImpl::AddMutation() { - mu_seq_.resize(mu_seq_.size() + 1); - return mu_seq_.back(); + mu_seq_.resize(mu_seq_.size() + 1); + return mu_seq_.back(); } void SerializeMutation(const RowMutation::Mutation& src, tera::Mutation* dst) { - switch (src.type) { - case RowMutation::kPut: - dst->set_type(tera::kPut); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - dst->set_value(src.value); - dst->set_ttl(src.ttl); - break; - case RowMutation::kAdd: - dst->set_type(tera::kAdd); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - dst->set_value(src.value); - break; - case RowMutation::kAddInt64: - dst->set_type(tera::kAddInt64); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - dst->set_value(src.value); - break; - case RowMutation::kPutIfAbsent: - dst->set_type(tera::kPutIfAbsent); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - dst->set_value(src.value); - break; - case RowMutation::kAppend: - dst->set_type(tera::kAppend); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - dst->set_value(src.value); - break; - case RowMutation::kDeleteColumn: - dst->set_type(tera::kDeleteColumn); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - break; - case RowMutation::kDeleteColumns: - dst->set_type(tera::kDeleteColumns); - dst->set_family(src.family); - dst->set_qualifier(src.qualifier); - dst->set_timestamp(src.timestamp); - break; - case RowMutation::kDeleteFamily: - dst->set_type(tera::kDeleteFamily); - dst->set_family(src.family); - dst->set_timestamp(src.timestamp); - break; - case RowMutation::kDeleteRow: - dst->set_type(tera::kDeleteRow); - dst->set_timestamp(src.timestamp); - break; - default: - assert(false); - break; - } -} - -} // namespace tera + switch (src.type) { + case RowMutation::kPut: + dst->set_type(tera::kPut); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + dst->set_value(src.value); + dst->set_ttl(src.ttl); + break; + case RowMutation::kAdd: + dst->set_type(tera::kAdd); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + dst->set_value(src.value); + break; + case RowMutation::kAddInt64: + dst->set_type(tera::kAddInt64); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + dst->set_value(src.value); + break; + case RowMutation::kPutIfAbsent: + dst->set_type(tera::kPutIfAbsent); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + dst->set_value(src.value); + break; + case RowMutation::kAppend: + dst->set_type(tera::kAppend); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + dst->set_value(src.value); + break; + case RowMutation::kDeleteColumn: + dst->set_type(tera::kDeleteColumn); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + break; + case RowMutation::kDeleteColumns: + dst->set_type(tera::kDeleteColumns); + dst->set_family(src.family); + dst->set_qualifier(src.qualifier); + dst->set_timestamp(src.timestamp); + break; + case RowMutation::kDeleteFamily: + dst->set_type(tera::kDeleteFamily); + dst->set_family(src.family); + dst->set_timestamp(src.timestamp); + break; + case RowMutation::kDeleteRow: + dst->set_type(tera::kDeleteRow); + dst->set_timestamp(src.timestamp); + break; + default: + assert(false); + break; + } +} + +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/mutate_impl.h b/src/sdk/mutate_impl.h index da20577b4..c37799cdf 100644 --- a/src/sdk/mutate_impl.h +++ b/src/sdk/mutate_impl.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_MUTATE_IMPL_H_ -#define TERA_SDK_MUTATE_IMPL_H_ +#ifndef TERA_SDK_MUTATE_IMPL_H_ +#define TERA_SDK_MUTATE_IMPL_H_ #include #include @@ -20,193 +20,180 @@ namespace tera { class TableImpl; class RowMutationImpl : public RowMutation, public SdkTask { -public: - enum FieldLimit { - kRowkey = 0, - kColumnFamily, - kQualifier, - kTimeStamp, - kValue - }; + public: + RowMutationImpl(Table* table, const std::string& row_key); - RowMutationImpl(Table* table, const std::string& row_key); - ~RowMutationImpl(); + ~RowMutationImpl(); - /// 重置 - void Reset(const std::string& row_key); + /// 重置 + void Reset(const std::string& row_key); - /// 修改一个列 - void Put(const std::string& family, const std::string& qualifier, - const std::string& value, int64_t timestamp); + /// 修改一个列 + void Put(const std::string& family, const std::string& qualifier, const std::string& value, + int64_t timestamp); - void Put(const std::string& family, const std::string& qualifier, - const int64_t value, int64_t timestamp); + void Put(const std::string& family, const std::string& qualifier, const int64_t value, + int64_t timestamp); - /// 带TTL的修改一个列 - void Put(const std::string& family, const std::string& qualifier, - const std::string& value, int32_t ttl); + /// 带TTL的修改一个列 + void Put(const std::string& family, const std::string& qualifier, const std::string& value, + int32_t ttl); - /// 修改一个列的特定版本 - void Put(const std::string& family, const std::string& qualifier, - int64_t timestamp, const std::string& value); + /// 修改一个列的特定版本 + void Put(const std::string& family, const std::string& qualifier, int64_t timestamp, + const std::string& value); - /// 带TTL的修改一个列的特定版本 - virtual void Put(const std::string& family, const std::string& qualifier, - int64_t timestamp, const std::string& value, int32_t ttl); + /// 带TTL的修改一个列的特定版本 + virtual void Put(const std::string& family, const std::string& qualifier, int64_t timestamp, + const std::string& value, int32_t ttl); - /// 修改默认列 - void Put(const int64_t value); + /// 修改默认列 + void Put(const int64_t value); - /// 带TTL的修改默认列 - virtual void Put(const std::string& value, int32_t ttl); + /// 带TTL的修改默认列 + virtual void Put(const std::string& value, int32_t ttl); - /// 修改默认列的特定版本 - void Put(int64_t timestamp, const std::string& value); + /// 修改默认列的特定版本 + void Put(int64_t timestamp, const std::string& value); - /// 原子加一个Cell - void Add(const std::string& family, const std::string& qualifier, const int64_t delta); - /// 原子加一个Cell - void AddInt64(const std::string& family, const std::string& qualifier, const int64_t delta); + /// 原子加一个Cell + void Add(const std::string& family, const std::string& qualifier, const int64_t delta); + /// 原子加一个Cell + void AddInt64(const std::string& family, const std::string& qualifier, const int64_t delta); - // 原子操作:如果不存在才能Put成功 - void PutIfAbsent(const std::string& family, const std::string& qualifier, - const std::string& value); + // 原子操作:如果不存在才能Put成功 + void PutIfAbsent(const std::string& family, const std::string& qualifier, + const std::string& value); - /// 原子操作:追加内容到一个Cell - void Append(const std::string& family, const std::string& qualifier, - const std::string& value); + /// 原子操作:追加内容到一个Cell + void Append(const std::string& family, const std::string& qualifier, const std::string& value); - /// 删除一个列的最新版本 - void DeleteColumn(const std::string& family, const std::string& qualifier); + /// 删除一个列的最新版本 + void DeleteColumn(const std::string& family, const std::string& qualifier); - /// 删除一个列的指定版本 - void DeleteColumn(const std::string& family, const std::string& qualifier, - int64_t timestamp); + /// 删除一个列的指定版本 + void DeleteColumn(const std::string& family, const std::string& qualifier, int64_t timestamp); - /// 删除一个列的指定范围版本 - void DeleteColumns(const std::string& family, const std::string& qualifier, - int64_t timestamp); + /// 删除一个列的指定范围版本 + void DeleteColumns(const std::string& family, const std::string& qualifier, int64_t timestamp); - /// 删除一个列族的所有列的指定范围版本 - void DeleteFamily(const std::string& family, int64_t timestamp); + /// 删除一个列族的所有列的指定范围版本 + void DeleteFamily(const std::string& family, int64_t timestamp); - /// 删除整行的指定范围版本 - void DeleteRow(int64_t timestamp); + /// 删除整行的指定范围版本 + void DeleteRow(int64_t timestamp); - /// 修改锁住的行, 必须提供行锁 - void SetLock(RowLock* rowlock); + /// 修改锁住的行, 必须提供行锁 + void SetLock(RowLock* rowlock); - /// 设置超时时间(只影响当前操作,不影响Table::SetWriteTimeout设置的默认写超时) - void SetTimeOut(int64_t timeout_ms); + /// 设置超时时间(只影响当前操作,不影响Table::SetWriteTimeout设置的默认写超时) + void SetTimeOut(int64_t timeout_ms); - int64_t TimeOut(); + int64_t TimeOut(); - /// 设置异步回调, 操作会异步返回 - void SetCallBack(RowMutation::Callback callback); + /// 设置异步回调, 操作会异步返回 + void SetCallBack(RowMutation::Callback callback); - RowMutation::Callback GetCallBack(); + RowMutation::Callback GetCallBack(); - /// 设置用户上下文,可在回调函数中获取 - void SetContext(void* context); + /// 设置用户上下文,可在回调函数中获取 + void SetContext(void* context); - void* GetContext(); + void* GetContext(); - /// 获得结果错误码 - const ErrorCode& GetError(); + /// 获得结果错误码 + const ErrorCode& GetError(); - /// 设置异步返回 - bool IsAsync(); + /// 设置异步返回 + bool IsAsync(); - /// 异步操作是否完成 - bool IsFinished() const; + /// 异步操作是否完成 + bool IsFinished() const; - /// 返回row_key - const std::string& RowKey(); + /// 返回row_key + const std::string& RowKey(); - /// mutation数量 - uint32_t MutationNum(); + /// mutation数量 + uint32_t MutationNum(); - /// mutation总大小 - uint32_t Size(); + /// mutation总大小 + uint32_t Size(); - /// 返回mutation - const RowMutation::Mutation& GetMutation(uint32_t index); + uint32_t RetryTimes() { return SdkTask::RetryTimes(); } - /// 重试次数 - uint32_t RetryTimes(); + /// 返回mutation + const RowMutation::Mutation& GetMutation(uint32_t index); - Table* GetTable() { return (Table*)table_; } + Table* GetTable() { return (Table*)table_; } -public: - /// 以下接口仅内部使用,不开放给用户 + public: + /// 以下接口仅内部使用,不开放给用户 - void Prepare(StatCallback cb); - int64_t GetStartTime() { return start_ts_;} + void Prepare(StatCallback cb); + int64_t GetStartTime() { return start_ts_; } - /// 重试计数加一 - void IncRetryTimes(); + /// 设置错误码 + void SetError(ErrorCode::ErrorCodeType err, const std::string& reason = ""); - /// 设置错误码 - void SetError(ErrorCode::ErrorCodeType err , const std::string& reason = ""); + /// 等待结束 + void Wait(); - /// 等待结束 - void Wait(); + /// 执行异步回调 + void RunCallback(); - /// 执行异步回调 - void RunCallback(); + /// 增加引用 + void Ref(); - /// 增加引用 - void Ref(); + /// 释放引用 + void Unref(); - /// 释放引用 - void Unref(); + void AddCommitTimes() { commit_times_++; } + int64_t GetCommitTimes() { return commit_times_; } - void SetErrorIfInvalid(const std::string& str, - const FieldLimit& field); + /// 返回所属事务 + Transaction* GetTransaction() { return txn_; } - void AddCommitTimes() { commit_times_++; } - int64_t GetCommitTimes() { return commit_times_; } + /// 设置所属事务 + void SetTransaction(Transaction* txn) { txn_ = txn; } - /// 返回所属事务 - Transaction* GetTransaction() { return txn_; } + /// 连接 + void Concatenate(RowMutationImpl& row_mu); - /// 设置所属事务 - void SetTransaction(Transaction* txn) { txn_ = txn; } + /// tera 实际读写的key, 若是hash_table, + /// 会返回签名后的 row_key + /// 否则返回 row_key, + std::string InternalRowKey(); - /// 连接 - void Concatenate(RowMutationImpl& row_mu); + protected: + /// 增加一个操作 + RowMutation::Mutation& AddMutation(); -protected: - /// 增加一个操作 - RowMutation::Mutation& AddMutation(); + private: + Table* table_; + std::string row_key_; + std::vector mu_seq_; -private: - Table* table_; - std::string row_key_; - std::vector mu_seq_; + RowMutation::Callback callback_; + void* user_context_; + int64_t timeout_ms_; - RowMutation::Callback callback_; - void* user_context_; - int64_t timeout_ms_; - uint32_t retry_times_; + bool finish_; + ErrorCode error_code_; + mutable Mutex finish_mutex_; + common::CondVar finish_cond_; - bool finish_; - ErrorCode error_code_; - mutable Mutex finish_mutex_; - common::CondVar finish_cond_; + /// 记录此mutation被提交到ts的次数 + int64_t commit_times_; - /// 记录此mutation被提交到ts的次数 - int64_t commit_times_; + StatCallback on_finish_callback_; + int64_t start_ts_; - StatCallback on_finish_callback_; - int64_t start_ts_; - - /// 所属事务 - Transaction* txn_; + /// 所属事务 + Transaction* txn_; }; void SerializeMutation(const RowMutation::Mutation& src, tera::Mutation* dst); -} // namespace tera +} // namespace tera #endif // TERA_SDK_MUTATE_IMPL_H_ diff --git a/src/sdk/python/TeraSdk.py b/src/sdk/python/TeraSdk.py index 090c5e152..e60b8455f 100644 --- a/src/sdk/python/TeraSdk.py +++ b/src/sdk/python/TeraSdk.py @@ -107,16 +107,6 @@ def SetBufferSize(self, buffer_size): """ lib.tera_scan_descriptor_set_buffer_size(self.desc, buffer_size) - def SetIsAsync(self, is_async): - """ - sdk内部启用并行scan操作,加快scan速率 - 开启或者不开启,给用户的逻辑完全一样,默认不开启 - - Args: - is_async(bool): 是否启用并行scan - """ - lib.tera_scan_descriptor_set_is_async(self.desc, is_async) - def SetPackInterval(self, interval): """ 设置scan操作的超时时长,单位ms @@ -147,13 +137,6 @@ def AddColumnFamily(self, cf): """ lib.tera_scan_descriptor_add_column_family(self.desc, cf) - def IsAsync(self): - """ - Returns: - (bool) 当前scan操作是否为async方式 - """ - return lib.tera_scan_descriptor_is_async(self.desc) - def SetTimeRange(self, start, end): """ 设置返回版本的时间范围 @@ -167,17 +150,6 @@ def SetTimeRange(self, start, end): """ lib.tera_scan_descriptor_set_time_range(self.desc, start, end) - def SetFilter(self, filter_str): - """ - 设置过滤器(当前只支持比较初级的功能) - - Args: - filter_str(string): 过滤字符串 - Returns: - (bool) 返回True表示filter_str解析成功,支持这种过滤方式,否则表示解析失败 - """ - return lib.tera_scan_descriptor_set_filter(self.desc, filter_str) - class ResultStream(object): """ scan操作返回的输出流 @@ -925,9 +897,6 @@ def init_function_prototype_for_scan(): lib.tera_scan_descriptor_add_column_family.argtypes = [c_void_p, c_char_p] lib.tera_scan_descriptor_add_column_family.restype = None - lib.tera_scan_descriptor_is_async.argtypes = [c_void_p] - lib.tera_scan_descriptor_is_async.restype = c_bool - lib.tera_scan_descriptor_set_buffer_size.argtypes = [c_void_p, c_int64] lib.tera_scan_descriptor_set_buffer_size.restype = None @@ -937,9 +906,6 @@ def init_function_prototype_for_scan(): lib.tera_scan_descriptor_set_pack_interval.argtypes = [c_char_p, c_int64] lib.tera_scan_descriptor_set_pack_interval.restype = None - lib.tera_scan_descriptor_set_is_async.argtypes = [c_void_p, c_bool] - lib.tera_scan_descriptor_set_is_async.restype = None - lib.tera_scan_descriptor_set_max_versions.argtypes = [c_void_p, c_int32] lib.tera_scan_descriptor_set_max_versions.restype = None @@ -950,9 +916,6 @@ def init_function_prototype_for_scan(): c_int64, c_int64] lib.tera_scan_descriptor_set_time_range.restype = None - lib.tera_scan_descriptor_set_filter.argtypes = [c_void_p, c_char_p] - lib.tera_scan_descriptor_set_filter.restype = c_bool - def init_function_prototype_for_client(): """ client """ diff --git a/src/sdk/read_impl.cc b/src/sdk/read_impl.cc index 35738cc53..5b11a212a 100644 --- a/src/sdk/read_impl.cc +++ b/src/sdk/read_impl.cc @@ -22,313 +22,263 @@ RowReaderImpl::RowReaderImpl(TableImpl* table, const std::string& row_key) max_qualifiers_(std::numeric_limits::max()), snapshot_id_(0), timeout_ms_(0), - retry_times_(0), result_pos_(0), commit_times_(0), on_finish_callback_(NULL), start_ts_(get_micros()), - txn_(NULL) { -} + txn_(NULL) {} -RowReaderImpl::~RowReaderImpl() { -} +RowReaderImpl::~RowReaderImpl() {} void RowReaderImpl::Prepare(StatCallback cb) { - on_finish_callback_ = cb; - start_ts_ = get_micros(); + on_finish_callback_ = cb; + start_ts_ = get_micros(); } /// 设置读取特定版本 -void RowReaderImpl::SetTimestamp(int64_t ts) { - SetTimeRange(ts, ts); -} +void RowReaderImpl::SetTimestamp(int64_t ts) { SetTimeRange(ts, ts); } -int64_t RowReaderImpl::GetTimestamp() { - return ts_start_; -} +int64_t RowReaderImpl::GetTimestamp() { return ts_start_; } -void RowReaderImpl::AddColumnFamily(const std::string& cf_name) { - family_map_[cf_name].clear(); -} +void RowReaderImpl::AddColumnFamily(const std::string& cf_name) { family_map_[cf_name].clear(); } -void RowReaderImpl::AddColumn(const std::string& cf_name, - const std::string& qualifier) { - QualifierSet& qualifier_set = family_map_[cf_name]; - qualifier_set.insert(qualifier); +void RowReaderImpl::AddColumn(const std::string& cf_name, const std::string& qualifier) { + QualifierSet& qualifier_set = family_map_[cf_name]; + qualifier_set.insert(qualifier); } void RowReaderImpl::SetTimeRange(int64_t ts_start, int64_t ts_end) { - ts_start_ = ts_start; - ts_end_ = ts_end; + ts_start_ = ts_start; + ts_end_ = ts_end; } void RowReaderImpl::GetTimeRange(int64_t* ts_start, int64_t* ts_end) { - if (NULL != ts_start) { - *ts_start = ts_start_; - } - if (NULL != ts_end) { - *ts_end = ts_end_; - } + if (NULL != ts_start) { + *ts_start = ts_start_; + } + if (NULL != ts_end) { + *ts_end = ts_end_; + } } -void RowReaderImpl::SetMaxVersions(uint32_t max_version) { - max_version_ = max_version; -} +void RowReaderImpl::SetMaxVersions(uint32_t max_version) { max_version_ = max_version; } -uint32_t RowReaderImpl::GetMaxVersions() { - return max_version_; -} +uint32_t RowReaderImpl::GetMaxVersions() { return max_version_; } -void RowReaderImpl::SetMaxQualifiers(uint64_t max_qualifiers) { - max_qualifiers_ = max_qualifiers; -} -uint64_t RowReaderImpl::GetMaxQualifiers() { - return max_qualifiers_; -} +void RowReaderImpl::SetMaxQualifiers(uint64_t max_qualifiers) { max_qualifiers_ = max_qualifiers; } +uint64_t RowReaderImpl::GetMaxQualifiers() { return max_qualifiers_; } /// 设置超时时间(只影响当前操作,不影响Table::SetReadTimeout设置的默认读超时) -void RowReaderImpl::SetTimeOut(int64_t timeout_ms) { - timeout_ms_ = timeout_ms; -} +void RowReaderImpl::SetTimeOut(int64_t timeout_ms) { timeout_ms_ = timeout_ms; } -void RowReaderImpl::SetCallBack(RowReader::Callback callback) { - callback_ = callback; -} +void RowReaderImpl::SetCallBack(RowReader::Callback callback) { callback_ = callback; } -RowReader::Callback RowReaderImpl::GetCallBack() { - return callback_; -} +RowReader::Callback RowReaderImpl::GetCallBack() { return callback_; } /// 设置用户上下文,可在回调函数中获取 -void RowReaderImpl::SetContext(void* context) { - user_context_ = context; -} +void RowReaderImpl::SetContext(void* context) { user_context_ = context; } -void* RowReaderImpl::GetContext() { - return user_context_; -} +void* RowReaderImpl::GetContext() { return user_context_; } /// 设置异步返回 -void RowReaderImpl::SetAsync() { -} +void RowReaderImpl::SetAsync() {} /// 异步操作是否完成 bool RowReaderImpl::IsFinished() const { - MutexLock lock(&finish_mutex_); - return finish_; + MutexLock lock(&finish_mutex_); + return finish_; } /// 获得结果错误码 -ErrorCode RowReaderImpl::GetError() { - return error_code_; -} +ErrorCode RowReaderImpl::GetError() { return error_code_; } /// 是否到达结束标记 bool RowReaderImpl::Done() { - if (result_pos_ < result_.key_values_size()) { - return false; - } - return true; + if (result_pos_ < result_.key_values_size()) { + return false; + } + return true; } /// 迭代下一个cell -void RowReaderImpl::Next() { - result_pos_++; -} +void RowReaderImpl::Next() { result_pos_++; } /// 读取的结果 std::string RowReaderImpl::Value() { - if (result_.key_values(result_pos_).has_value()) { - return result_.key_values(result_pos_).value(); - } else { - return ""; - } + if (result_.key_values(result_pos_).has_value()) { + return result_.key_values(result_pos_).value(); + } else { + return ""; + } } /// 读取的结果 int64_t RowReaderImpl::ValueInt64() { - std::string v = Value(); - return (v.size() == sizeof(int64_t)) ? *(int64_t*)v.c_str() : 0; + std::string v = Value(); + return (v.size() == sizeof(int64_t)) ? *(int64_t*)v.c_str() : 0; } /// Timestamp int64_t RowReaderImpl::Timestamp() { - if (result_.key_values(result_pos_).has_timestamp()) { - return result_.key_values(result_pos_).timestamp(); - } else { - return 0L; - } + if (result_.key_values(result_pos_).has_timestamp()) { + return result_.key_values(result_pos_).timestamp(); + } else { + return 0L; + } } -const std::string& RowReaderImpl::RowName() { - return row_key_; -} +const std::string& RowReaderImpl::RowName() { return row_key_; } -const std::string& RowReaderImpl::RowKey() { - return row_key_; +const std::string& RowReaderImpl::RowKey() { return row_key_; } + +std::string RowReaderImpl::InternalRowKey() { + if (table_ && table_->IsHashTable()) { + return table_->GetHashMethod()(RowKey()); + } + return RowKey(); } /// Column cf:qualifier std::string RowReaderImpl::ColumnName() { - std::string column; - if (result_.key_values(result_pos_).has_column_family()) { - column = result_.key_values(result_pos_).column_family(); - } else { - return column = ""; - } + std::string column; + if (result_.key_values(result_pos_).has_column_family()) { + column = result_.key_values(result_pos_).column_family(); + } else { + return column = ""; + } - if (result_.key_values(result_pos_).has_qualifier()) { - column += ":" + result_.key_values(result_pos_).qualifier(); - } - return column; + if (result_.key_values(result_pos_).has_qualifier()) { + column += ":" + result_.key_values(result_pos_).qualifier(); + } + return column; } /// Column family std::string RowReaderImpl::Family() { - if (result_.key_values(result_pos_).has_column_family()) { - return result_.key_values(result_pos_).column_family(); - } else { - return ""; - } + if (result_.key_values(result_pos_).has_column_family()) { + return result_.key_values(result_pos_).column_family(); + } else { + return ""; + } } /// Qualifier std::string RowReaderImpl::Qualifier() { - if (result_.key_values(result_pos_).has_qualifier()) { - return result_.key_values(result_pos_).qualifier(); - } else { - return ""; - } + if (result_.key_values(result_pos_).has_qualifier()) { + return result_.key_values(result_pos_).qualifier(); + } else { + return ""; + } } void RowReaderImpl::ToMap(Map* rowmap) { - for (int32_t i = 0; i < result_.key_values_size(); ++i) { - - std::string column; - if (result_.key_values(i).has_column_family()) { - column = result_.key_values(i).column_family(); - } else { - column = ""; - } - if (result_.key_values(i).has_qualifier()) { - column += ":" + result_.key_values(i).qualifier(); - } - std::map& value_map = (*rowmap)[column]; - int64_t timestamp = 0L; - if (result_.key_values(i).has_timestamp()) { - timestamp = result_.key_values(i).timestamp(); - } - value_map[timestamp] = result_.key_values(i).value(); + for (int32_t i = 0; i < result_.key_values_size(); ++i) { + std::string column; + if (result_.key_values(i).has_column_family()) { + column = result_.key_values(i).column_family(); + } else { + column = ""; } + if (result_.key_values(i).has_qualifier()) { + column += ":" + result_.key_values(i).qualifier(); + } + std::map& value_map = (*rowmap)[column]; + int64_t timestamp = 0L; + if (result_.key_values(i).has_timestamp()) { + timestamp = result_.key_values(i).timestamp(); + } + value_map[timestamp] = result_.key_values(i).value(); + } } void RowReaderImpl::ToMap(TRow* rowmap) { - for (int32_t i = 0; i < result_.key_values_size(); ++i) { - std::string cf, qu, value; - if (result_.key_values(i).has_column_family()) { - cf = result_.key_values(i).column_family(); - } else { - cf = ""; - } - TColumnFamily& tcf = (*rowmap)[cf]; - if (result_.key_values(i).has_qualifier()) { - qu = result_.key_values(i).qualifier(); - } - TColumn& tqu = tcf[qu]; - int64_t timestamp = 0L; - if (result_.key_values(i).has_timestamp()) { - timestamp = result_.key_values(i).timestamp(); - } - tqu[timestamp] = result_.key_values(i).value(); + for (int32_t i = 0; i < result_.key_values_size(); ++i) { + std::string cf, qu, value; + if (result_.key_values(i).has_column_family()) { + cf = result_.key_values(i).column_family(); + } else { + cf = ""; } -} - -void RowReaderImpl::SetResult(const RowResult& result) { - int32_t num = result.key_values_size(); - for (int32_t i = 0; i < num; ++i) { - const std::string& key = result.key_values(i).key(); - CHECK(row_key_ == key) << "FATAL: rowkey[" << row_key_ - << "] vs result[" << key << "]"; + TColumnFamily& tcf = (*rowmap)[cf]; + if (result_.key_values(i).has_qualifier()) { + qu = result_.key_values(i).qualifier(); + } + TColumn& tqu = tcf[qu]; + int64_t timestamp = 0L; + if (result_.key_values(i).has_timestamp()) { + timestamp = result_.key_values(i).timestamp(); } - return result_.CopyFrom(result); + tqu[timestamp] = result_.key_values(i).value(); + } } - -/// 重试计数加一 -void RowReaderImpl::IncRetryTimes() { - retry_times_++; +void RowReaderImpl::SetResult(const RowResult& result) { + int32_t num = result.key_values_size(); + for (int32_t i = 0; i < num; ++i) { + const std::string& key = result.key_values(i).key(); + CHECK(InternalRowKey() == key) << "FATAL: rowkey[" << InternalRowKey() << "] vs result[" << key + << "]"; + } + return result_.CopyFrom(result); } /// 设置错误码 -void RowReaderImpl::SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) { - error_code_.SetFailed(err, reason); +void RowReaderImpl::SetError(ErrorCode::ErrorCodeType err, const std::string& reason) { + error_code_.SetFailed(err, reason); } -uint32_t RowReaderImpl::RetryTimes() { - return retry_times_; -} +bool RowReaderImpl::IsAsync() { return (callback_ != NULL); } -bool RowReaderImpl::IsAsync() { - return (callback_ != NULL); -} - -int64_t RowReaderImpl::TimeOut() { - return timeout_ms_; -} +int64_t RowReaderImpl::TimeOut() { return timeout_ms_; } void RowReaderImpl::RunCallback() { - if (on_finish_callback_) { - on_finish_callback_(table_, this); - } - if (callback_) { - callback_(this); - } else { - MutexLock lock(&finish_mutex_); - finish_ = true; - finish_cond_.Signal(); - } + if (on_finish_callback_) { + on_finish_callback_(table_, this); + } + if (callback_) { + callback_(this); + } else { + MutexLock lock(&finish_mutex_); + finish_ = true; + finish_cond_.Signal(); + } } void RowReaderImpl::Wait() { - MutexLock lock(&finish_mutex_); - while (!finish_) { - finish_cond_.Wait(); - } + MutexLock lock(&finish_mutex_); + while (!finish_) { + finish_cond_.Wait(); + } } /// Get数量 -uint32_t RowReaderImpl::GetReadColumnNum() { - return family_map_.size(); -} +uint32_t RowReaderImpl::GetReadColumnNum() { return family_map_.size(); } /// 返回Get -const RowReader::ReadColumnList& RowReaderImpl::GetReadColumnList() { - return family_map_; -} +const RowReader::ReadColumnList& RowReaderImpl::GetReadColumnList() { return family_map_; } /// 序列化 void RowReaderImpl::ToProtoBuf(RowReaderInfo* info) { - info->set_key(row_key_); - info->set_max_version(max_version_); - info->set_max_qualifiers(max_qualifiers_); - info->mutable_time_range()->set_ts_start(ts_start_); - info->mutable_time_range()->set_ts_end(ts_end_); - - FamilyMap::iterator f_it = family_map_.begin(); - for (; f_it != family_map_.end(); ++f_it) { - const std::string& family = f_it->first; - const QualifierSet& qualifier_set = f_it->second; - - ColumnFamily* family_info = info->add_cf_list(); - family_info->set_family_name(family); - - QualifierSet::iterator q_it = qualifier_set.begin(); - for (; q_it != qualifier_set.end(); ++q_it) { - family_info->add_qualifier_list(*q_it); - } + info->set_key(InternalRowKey()); + info->set_max_version(max_version_); + info->set_max_qualifiers(max_qualifiers_); + info->mutable_time_range()->set_ts_start(ts_start_); + info->mutable_time_range()->set_ts_end(ts_end_); + + FamilyMap::iterator f_it = family_map_.begin(); + for (; f_it != family_map_.end(); ++f_it) { + const std::string& family = f_it->first; + const QualifierSet& qualifier_set = f_it->second; + + ColumnFamily* family_info = info->add_cf_list(); + family_info->set_family_name(family); + + QualifierSet::iterator q_it = qualifier_set.begin(); + for (; q_it != qualifier_set.end(); ++q_it) { + family_info->add_qualifier_list(*q_it); } + } } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/read_impl.h b/src/sdk/read_impl.h index 23dabcda1..20b3da355 100644 --- a/src/sdk/read_impl.h +++ b/src/sdk/read_impl.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_READ_IMPL_H_ -#define TERA_SDK_READ_IMPL_H_ +#ifndef TERA_SDK_READ_IMPL_H_ +#define TERA_SDK_READ_IMPL_H_ #include #include @@ -20,145 +20,146 @@ namespace tera { class TableImpl; class RowReaderImpl : public RowReader, public SdkTask { -public: - RowReaderImpl(TableImpl* table, const std::string& row_key); - ~RowReaderImpl(); - /// 设置读取特定版本 - void SetTimestamp(int64_t ts); - /// 返回读取时间戳 - int64_t GetTimestamp(); - - void SetSnapshot(uint64_t snapshot_id) { snapshot_id_ = snapshot_id; } - - uint64_t GetSnapshot() { return snapshot_id_; } - - /// 设置读取CF - void AddColumnFamily(const std::string& cf_name); - /// 设置读取Column(CF:Qualifier) - void AddColumn(const std::string& cf_name, const std::string& qualifier); - /// 设置读取time_range - void SetTimeRange(int64_t ts_start, int64_t ts_end); - /// 返回time_range - void GetTimeRange(int64_t* ts_start, int64_t* ts_end = NULL); - /// 设置读取max_version - void SetMaxVersions(uint32_t max_version); - /// 返回max_version - uint32_t GetMaxVersions(); - void SetMaxQualifiers(uint64_t max_qualifiers); - uint64_t GetMaxQualifiers(); - /// 设置超时时间(只影响当前操作,不影响Table::SetReadTimeout设置的默认读超时) - void SetTimeOut(int64_t timeout_ms); - /// 设置异步回调, 操作会异步返回 - void SetCallBack(RowReader::Callback callback); - RowReader::Callback GetCallBack(); - /// 设置用户上下文,可在回调函数中获取 - void SetContext(void* context); - void* GetContext(); - /// 设置异步返回 - void SetAsync(); - /// 异步操作是否完成 - bool IsFinished() const; - /// 获取读超时时间 - int64_t TimeOut(); - /// 设置错误吗 - void SetError(ErrorCode::ErrorCodeType err , const std::string& reason = ""); - /// 获得结果错误码 - ErrorCode GetError(); - /// 是否到达结束标记 - bool Done(); - /// 迭代下一个cell - void Next(); - /// Row - const std::string& RowName(); - const std::string& RowKey(); - /// 读取的结果 - std::string Value(); - /// 读取的结果 - int64_t ValueInt64(); - /// Timestamp - int64_t Timestamp(); - /// Column cf:qualifier - std::string ColumnName(); - /// Column family - std::string Family(); - /// Qualifier - std::string Qualifier(); - /// 将结果转存到一个std::map中, 格式为: map> - typedef std::map< std::string, std::map > Map; - void ToMap(Map* rowmap); - void ToMap(TRow* rowmap); - - void SetResult(const RowResult& result); - - void Prepare(StatCallback cb); - int64_t GetStartTime() { return start_ts_;} - - void IncRetryTimes(); - - uint32_t RetryTimes(); - - bool IsAsync(); - - void Wait(); - - /// 执行异步回调 - void RunCallback(); - /// Get数量 - uint32_t GetReadColumnNum(); - /// 返回Get引用 - const ReadColumnList& GetReadColumnList(); - /// 序列化 - void ToProtoBuf(RowReaderInfo* info); - - void AddCommitTimes() { commit_times_++; } - int64_t GetCommitTimes() { return commit_times_; } - - /// 重置result游标 - void ResetResultPos() { result_pos_ = 0; } - /// 返回所属事务 - Transaction* GetTransaction() { return txn_; } - /// 设置所属事务 - void SetTransaction(Transaction* txn) { txn_ = txn; } - - Table* GetTable() { return (Table*)table_; } - - uint32_t Size() { return 0; } - -private: - TableImpl* table_; - std::string row_key_; - RowReader::Callback callback_; - void* user_context_; - - bool finish_; - ErrorCode error_code_; - mutable Mutex finish_mutex_; - common::CondVar finish_cond_; - - typedef std::set QualifierSet; - typedef std::map FamilyMap; - FamilyMap family_map_; - int64_t ts_start_; - int64_t ts_end_; - uint32_t max_version_; - uint64_t max_qualifiers_; - uint64_t snapshot_id_; - - int64_t timeout_ms_; - uint32_t retry_times_; - int32_t result_pos_; - RowResult result_; - - /// 记录此reader被提交到ts的次数 - int64_t commit_times_; - - StatCallback on_finish_callback_; - int64_t start_ts_; - - /// 所属事务 - Transaction* txn_; + public: + RowReaderImpl(TableImpl* table, const std::string& row_key); + ~RowReaderImpl(); + /// 设置读取特定版本 + void SetTimestamp(int64_t ts); + /// 返回读取时间戳 + int64_t GetTimestamp(); + + void SetSnapshot(uint64_t snapshot_id) { snapshot_id_ = snapshot_id; } + + uint64_t GetSnapshot() { return snapshot_id_; } + + /// 设置读取CF + void AddColumnFamily(const std::string& cf_name); + /// 设置读取Column(CF:Qualifier) + void AddColumn(const std::string& cf_name, const std::string& qualifier); + /// 设置读取time_range + void SetTimeRange(int64_t ts_start, int64_t ts_end); + /// 返回time_range + void GetTimeRange(int64_t* ts_start, int64_t* ts_end = NULL); + /// 设置读取max_version + void SetMaxVersions(uint32_t max_version); + /// 返回max_version + uint32_t GetMaxVersions(); + void SetMaxQualifiers(uint64_t max_qualifiers); + uint64_t GetMaxQualifiers(); + /// 设置超时时间(只影响当前操作,不影响Table::SetReadTimeout设置的默认读超时) + void SetTimeOut(int64_t timeout_ms); + /// 设置异步回调, 操作会异步返回 + void SetCallBack(RowReader::Callback callback); + RowReader::Callback GetCallBack(); + /// 设置用户上下文,可在回调函数中获取 + void SetContext(void* context); + void* GetContext(); + /// 设置异步返回 + void SetAsync(); + /// 异步操作是否完成 + bool IsFinished() const; + /// 获取读超时时间 + int64_t TimeOut(); + /// 设置错误吗 + void SetError(ErrorCode::ErrorCodeType err, const std::string& reason = ""); + /// 获得结果错误码 + ErrorCode GetError(); + /// 是否到达结束标记 + bool Done(); + /// 迭代下一个cell + void Next(); + /// Row + const std::string& RowName(); + const std::string& RowKey(); + /// 返回internal_row_key, 若是hash_table, + /// 会返回签名后的row_key + std::string InternalRowKey(); + + /// 读取的结果 + std::string Value(); + /// 读取的结果 + int64_t ValueInt64(); + /// Timestamp + int64_t Timestamp(); + /// Column cf:qualifier + std::string ColumnName(); + /// Column family + std::string Family(); + /// Qualifier + std::string Qualifier(); + /// 将结果转存到一个std::map中, 格式为: map> + typedef std::map > Map; + void ToMap(Map* rowmap); + void ToMap(TRow* rowmap); + + void SetResult(const RowResult& result); + + void Prepare(StatCallback cb); + int64_t GetStartTime() { return start_ts_; } + + bool IsAsync(); + + void Wait(); + + /// 执行异步回调 + void RunCallback(); + /// Get数量 + uint32_t GetReadColumnNum(); + /// 返回Get引用 + const ReadColumnList& GetReadColumnList(); + /// 序列化 + void ToProtoBuf(RowReaderInfo* info); + + void AddCommitTimes() { commit_times_++; } + int64_t GetCommitTimes() { return commit_times_; } + + /// 重置result游标 + void ResetResultPos() { result_pos_ = 0; } + /// 返回所属事务 + Transaction* GetTransaction() { return txn_; } + /// 设置所属事务 + void SetTransaction(Transaction* txn) { txn_ = txn; } + + Table* GetTable() { return (Table*)table_; } + + uint32_t Size() { return 0; } + + const std::string& FirstRowKey(); + + private: + TableImpl* table_; + std::string row_key_; + RowReader::Callback callback_; + void* user_context_; + + bool finish_; + ErrorCode error_code_; + mutable Mutex finish_mutex_; + common::CondVar finish_cond_; + + typedef std::set QualifierSet; + typedef std::map FamilyMap; + FamilyMap family_map_; + int64_t ts_start_; + int64_t ts_end_; + uint32_t max_version_; + uint64_t max_qualifiers_; + uint64_t snapshot_id_; + + int64_t timeout_ms_; + int32_t result_pos_; + RowResult result_; + + /// 记录此reader被提交到ts的次数 + int64_t commit_times_; + + StatCallback on_finish_callback_; + int64_t start_ts_; + + /// 所属事务 + Transaction* txn_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_READ_IMPL_H_ diff --git a/src/sdk/rowlock_client.cc b/src/sdk/rowlock_client.cc index 5167d72dc..7525f5c82 100644 --- a/src/sdk/rowlock_client.cc +++ b/src/sdk/rowlock_client.cc @@ -7,10 +7,7 @@ #include #include -#include "gflags/gflags.h" - -#include "observer/rowlocknode/ins_rowlock_client_zk_adapter.h" -#include "proto/rowlocknode_rpc.pb.h" +#include #include "types.h" #include "utils/utils_cmd.h" @@ -20,139 +17,127 @@ DECLARE_bool(rowlock_test); DECLARE_int32(rowlock_client_max_fail_times); DECLARE_bool(mock_rowlock_enable); -namespace tera{ +namespace tera { namespace observer { ThreadPool* RowlockStub::thread_pool_ = NULL; -void RowlockStub::SetThreadPool(ThreadPool* thread_pool) { - thread_pool_ = thread_pool; -} +void RowlockStub::SetThreadPool(ThreadPool* thread_pool) { thread_pool_ = thread_pool; } -void RowlockStub::SetRpcOption(int32_t max_inflow, int32_t max_outflow, - int32_t pending_buffer_size, int32_t thread_num) { - tera::RpcClientBase::SetOption(max_inflow, max_outflow, - pending_buffer_size, thread_num); +void RowlockStub::SetRpcOption(int32_t max_inflow, int32_t max_outflow, int32_t pending_buffer_size, + int32_t thread_num) { + tera::RpcClientBase::SetOption(max_inflow, max_outflow, pending_buffer_size, thread_num); } -RowlockStub::RowlockStub(const std::string& server_addr, - int32_t rpc_timeout) - : tera::RpcClient(server_addr), - rpc_timeout_(rpc_timeout) { -} +RowlockStub::RowlockStub(const std::string& server_addr, int32_t rpc_timeout) + : tera::RpcClient(server_addr), rpc_timeout_(rpc_timeout) {} RowlockStub::~RowlockStub() {} -bool RowlockStub::TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done) { - return SendMessageWithRetry(&RowlockService::Stub::Lock, - request, response, done, "TryLock", - rpc_timeout_, thread_pool_); +bool RowlockStub::TryLock(const RowlockRequest* request, RowlockResponse* response, + std::function done) { + return SendMessageWithRetry(&RowlockService::Stub::Lock, request, response, done, "TryLock", + rpc_timeout_, thread_pool_); } -bool RowlockStub::UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done) { - return SendMessageWithRetry(&RowlockService::Stub::UnLock, - request, response, done, "UnLock", - rpc_timeout_, thread_pool_); +bool RowlockStub::UnLock(const RowlockRequest* request, RowlockResponse* response, + std::function done) { + return SendMessageWithRetry(&RowlockService::Stub::UnLock, request, response, done, "UnLock", + rpc_timeout_, thread_pool_); } bool RowlockClient::init_ = false; std::string RowlockClient::server_addr_ = ""; void RowlockClient::SetThreadPool(ThreadPool* thread_pool) { - RowlockStub::SetThreadPool(thread_pool); + RowlockStub::SetThreadPool(thread_pool); } RowlockClient::RowlockClient(const std::string& addr, int32_t rpc_timeout) : local_addr_(tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_server_port) { - srand((unsigned int)(time(NULL))); + srand((unsigned int)(time(NULL))); - if (FLAGS_mock_rowlock_enable == true) { - return; - } - if (init_ == false) { - SetZkAdapter(); - init_ = true; - } else { - std::vector init_addrs; - init_addrs.push_back(server_addr_); - Update(init_addrs); - } - + if (FLAGS_mock_rowlock_enable == true) { + return; + } + if (init_ == false) { + SetZkAdapter(); + init_ = true; + } else { + std::vector init_addrs; + init_addrs.push_back(server_addr_); + Update(init_addrs); + } } void RowlockClient::Update(const std::vector& addrs) { - std::string addr = addrs[rand() % addrs.size()]; - std::shared_ptr client(new RowlockStub(addr)); + std::string addr = addrs[rand() % addrs.size()]; + std::shared_ptr client(new RowlockStub(addr)); - LOG(INFO) << "Update rowlock client ip: " << addr; + LOG(INFO) << "Update rowlock client ip: " << addr; - MutexLock locker(&client_mutex_); - server_addr_ = addr; - client_.swap(client); + MutexLock locker(&client_mutex_); + server_addr_ = addr; + client_.swap(client); } -bool RowlockClient::TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done) { - std::shared_ptr client; - { - MutexLock locker(&client_mutex_); - // COW ref +1 - client = client_; - } - for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { - bool ret = client->TryLock(request, response, done); - if (ret) { - return true; - } - LOG(WARNING) << "try lock fail: " << request->row(); +bool RowlockClient::TryLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done) { + std::shared_ptr client; + { + MutexLock locker(&client_mutex_); + // COW ref +1 + client = client_; + } + for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { + bool ret = client->TryLock(request, response, done); + if (ret) { + return true; } - // rpc fail - SetZkAdapter(); - return false; + LOG(WARNING) << "try lock fail: " << request->row(); + } + // rpc fail + SetZkAdapter(); + return false; } -bool RowlockClient::UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done) { - std::shared_ptr client; - { - MutexLock locker(&client_mutex_); - // copy-on-write ref+1 - client = client_; - } - for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { - bool ret = client->UnLock(request, response, done); - if (ret) { - return true; - } - // rpc fail - SetZkAdapter(); +bool RowlockClient::UnLock(const RowlockRequest* request, RowlockResponse* response, + std::function done) { + std::shared_ptr client; + { + MutexLock locker(&client_mutex_); + // copy-on-write ref+1 + client = client_; + } + for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { + bool ret = client->UnLock(request, response, done); + if (ret) { + return true; } - return false; + // rpc fail + SetZkAdapter(); + } + return false; } void RowlockClient::SetZkAdapter() { - // mock rowlock, do not need a real zk adapter - if (FLAGS_mock_rowlock_enable == true) { - return; - } - - if (FLAGS_tera_coord_type == "zk") { - zk_adapter_.reset(new ZkRowlockClientZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "ins") { - zk_adapter_.reset(new InsRowlockClientZkAdapter(this, local_addr_)); - } else { - LOG(ERROR) << "Unknow coord type for rowlock client"; - return; - } - - zk_adapter_->Init(); + // mock rowlock, do not need a real zk adapter + if (FLAGS_mock_rowlock_enable == true) { + return; + } + + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new ZkRowlockClientZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "ins") { + zk_adapter_.reset(new InsRowlockClientZkAdapter(this, local_addr_)); + } else { + LOG(ERROR) << "Unknow coord type for rowlock client"; + return; + } + + zk_adapter_->Init(); } -} // namespace observer -} // namespace tera +} // namespace observer +} // namespace tera diff --git a/src/sdk/rowlock_client.h b/src/sdk/rowlock_client.h index 31d609a69..afc848c76 100644 --- a/src/sdk/rowlock_client.h +++ b/src/sdk/rowlock_client.h @@ -2,78 +2,99 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_ROWLOCK_CLIENT_H_ -#define TERA_SDK_ROWLOCK_CLIENT_H_ +#pragma once #include -#include #include #include "common/mutex.h" -#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" #include "proto/rpc_client.h" #include "proto/rowlocknode_rpc.pb.h" +#include "sdk/rowlock_client_zk_adapter.h" namespace tera { namespace observer { -class RowlockClientZkAdapter; +class ZkRowlockClientZkAdapter; class RowlockStub : public tera::RpcClient { -public: - static void SetThreadPool(ThreadPool* thread_pool); + public: + static void SetThreadPool(ThreadPool* thread_pool); - static void SetRpcOption(int32_t max_inflow = -1, int32_t max_outflow = -1, - int32_t pending_buffer_size = -1, - int32_t thread_num = -1); + static void SetRpcOption(int32_t max_inflow = -1, int32_t max_outflow = -1, + int32_t pending_buffer_size = -1, int32_t thread_num = -1); - RowlockStub(const std::string& addr = "", int32_t rpc_timeout = 60000); - ~RowlockStub(); + RowlockStub(const std::string& addr = "", int32_t rpc_timeout = 60000); + ~RowlockStub(); - virtual bool TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL); + virtual bool TryLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL); - virtual bool UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL); + virtual bool UnLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL); - -private: - int32_t rpc_timeout_; - static ThreadPool* thread_pool_; + private: + int32_t rpc_timeout_; + static ThreadPool* thread_pool_; }; class RowlockClient { -public: - static void SetThreadPool(ThreadPool* thread_pool); - - RowlockClient(const std::string& addr = "", int32_t rpc_timeout = 60000); - ~RowlockClient() {} - - virtual bool TryLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL); - - virtual bool UnLock(const RowlockRequest* request, - RowlockResponse* response, - std::function done = NULL); - - void Update(const std::vector& addrs); - -private: - void SetZkAdapter(); - -private: - mutable Mutex client_mutex_; - std::shared_ptr client_; - std::unique_ptr zk_adapter_; - std::string local_addr_; - static bool init_; - static std::string server_addr_; + public: + static void SetThreadPool(ThreadPool* thread_pool); + + RowlockClient(const std::string& addr = "", int32_t rpc_timeout = 60000); + ~RowlockClient() {} + + virtual bool TryLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL); + + virtual bool UnLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL); + + void Update(const std::vector& addrs); + + private: + void SetZkAdapter(); + + private: + mutable Mutex client_mutex_; + std::shared_ptr client_; + std::unique_ptr zk_adapter_; + std::string local_addr_; + static bool init_; + static std::string server_addr_; +}; + +class FakeRowlockClient : public RowlockClient { + public: + FakeRowlockClient() : RowlockClient("127.0.0.1:22222"){}; + ~FakeRowlockClient() {} + + virtual bool TryLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + if (done != NULL) { + bool failed = true; + int error_code = 0; + done(request, response, failed, error_code); + } + return true; + } + + virtual bool UnLock( + const RowlockRequest* request, RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + + return true; + } }; -} // namespace observer -} // namespace tera -#endif // TERA_SDK_ROWLOCK_CLIENT_H +} // namespace observer +} // namespace tera diff --git a/src/sdk/rowlock_client_zk_adapter.cc b/src/sdk/rowlock_client_zk_adapter.cc new file mode 100644 index 000000000..4418ade0f --- /dev/null +++ b/src/sdk/rowlock_client_zk_adapter.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sdk/rowlock_client_zk_adapter.h" + +#include +#include + +#include "common/this_thread.h" +#include "types.h" + +DECLARE_string(rowlock_ins_root_path); +DECLARE_string(tera_ins_addr_list); +DECLARE_string(rowlock_zk_root_path); +DECLARE_string(tera_zk_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); + +namespace tera { +namespace observer { + +ZkRowlockClientZkAdapter::ZkRowlockClientZkAdapter(RowlockClient* server_client, + const std::string& server_addr) + : client_(server_client), server_addr_(server_addr) {} + +ZkRowlockClientZkAdapter::~ZkRowlockClientZkAdapter() { ZooKeeperAdapter::Finalize(); } + +bool ZkRowlockClientZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string proxy_path = root_path + kRowlockProxyPath; + + int zk_errno = zk::ZE_OK; + ; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_rowlock_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "init zk success"; + + std::vector child; + std::vector value; + + while (!ListChildren(proxy_path, &child, &value, &zk_errno)) { + LOG(ERROR) << "fail to get proxy addr : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + client_->Update(value); + return true; +} + +InsRowlockClientZkAdapter::InsRowlockClientZkAdapter(RowlockClient* server_client, + const std::string& server_addr) + : ZkRowlockClientZkAdapter(server_client, server_addr), + client_(server_client), + server_addr_(server_addr) {} + +bool InsRowlockClientZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_ins_root_path; + std::vector value; + // create session + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + + // put server_node_num + std::string rowlock_proxy_path = root_path + kRowlockProxyPath; + + galaxy::ins::sdk::ScanResult* result = + ins_sdk_->Scan(rowlock_proxy_path + "/!", rowlock_proxy_path + "/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + value.push_back(result->Value()); + result->Next(); + } + delete result; + + client_->Update(value); + return true; +} + +} // namespace observer +} // namespace tera diff --git a/src/sdk/rowlock_client_zk_adapter.h b/src/sdk/rowlock_client_zk_adapter.h new file mode 100644 index 000000000..c8a6d381c --- /dev/null +++ b/src/sdk/rowlock_client_zk_adapter.h @@ -0,0 +1,57 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#pragma once + +#include "sdk/rowlock_client.h" +#include "zk/zk_adapter.h" +#include "ins_sdk.h" + +namespace galaxy { +namespace ins { +namespace sdk { +class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockClient; + +class ZkRowlockClientZkAdapter : public zk::ZooKeeperLightAdapter { + public: + ZkRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); + virtual ~ZkRowlockClientZkAdapter(); + virtual bool Init(); + + private: + RowlockClient* client_; + std::string server_addr_; +}; + +class InsRowlockClientZkAdapter : public ZkRowlockClientZkAdapter { + public: + InsRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); + virtual ~InsRowlockClientZkAdapter(){}; + virtual bool Init(); + + protected: + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnSessionTimeout() {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + + private: + RowlockClient* client_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; +}; + +} // namespace observer +} // namespace tera diff --git a/src/sdk/scan.cc b/src/sdk/scan.cc index f4b630216..1ad5a4f4d 100644 --- a/src/sdk/scan.cc +++ b/src/sdk/scan.cc @@ -7,78 +7,42 @@ namespace tera { -ScanDescriptor::ScanDescriptor(const std::string& rowkey) { - impl_ = new ScanDescImpl(rowkey); -} +ScanDescriptor::ScanDescriptor(const std::string& rowkey) { impl_ = new ScanDescImpl(rowkey); } -ScanDescriptor::~ScanDescriptor() { - delete impl_; -} +ScanDescriptor::~ScanDescriptor() { delete impl_; } -void ScanDescriptor::SetEnd(const std::string& rowkey) { - impl_->SetEnd(rowkey); -} +void ScanDescriptor::SetEnd(const std::string& rowkey) { impl_->SetEnd(rowkey); } -void ScanDescriptor::AddColumnFamily(const std::string& cf) { - impl_->AddColumnFamily(cf); -} +void ScanDescriptor::AddColumnFamily(const std::string& cf) { impl_->AddColumnFamily(cf); } void ScanDescriptor::AddColumn(const std::string& cf, const std::string& qualifier) { - impl_->AddColumn(cf, qualifier); + impl_->AddColumn(cf, qualifier); } -void ScanDescriptor::SetMaxVersions(int32_t versions) { - impl_->SetMaxVersions(versions); -} +void ScanDescriptor::SetMaxVersions(int32_t versions) { impl_->SetMaxVersions(versions); } void ScanDescriptor::SetMaxQualifiers(uint64_t max_qualifiers) { - impl_->SetMaxQualifiers(max_qualifiers); + impl_->SetMaxQualifiers(max_qualifiers); } -void ScanDescriptor::SetPackInterval(int64_t interval) { - impl_->SetPackInterval(interval); -} +void ScanDescriptor::SetPackInterval(int64_t interval) { impl_->SetPackInterval(interval); } void ScanDescriptor::SetTimeRange(int64_t ts_end, int64_t ts_start) { - impl_->SetTimeRange(ts_end, ts_start); -} - -bool ScanDescriptor::SetFilter(const std::string& filter_string) { - return impl_->SetFilter(filter_string); -} - -void ScanDescriptor::SetValueConverter(ValueConverter converter) { - impl_->SetValueConverter(converter); + impl_->SetTimeRange(ts_end, ts_start); } -void ScanDescriptor::SetSnapshot(uint64_t snapshot_id) { - return impl_->SetSnapshot(snapshot_id); -} +bool ScanDescriptor::SetFilter(const filter::FilterPtr& filter) { return impl_->SetFilter(filter); } -void ScanDescriptor::SetBufferSize(int64_t buf_size) { - impl_->SetBufferSize(buf_size); -} +void ScanDescriptor::SetSnapshot(uint64_t snapshot_id) { return impl_->SetSnapshot(snapshot_id); } -void ScanDescriptor::SetNumberLimit(int64_t number_limit) { - impl_->SetNumberLimit(number_limit); -} +void ScanDescriptor::SetBufferSize(int64_t buf_size) { impl_->SetBufferSize(buf_size); } -int64_t ScanDescriptor::GetNumberLimit() { - return impl_->GetNumberLimit(); -} +void ScanDescriptor::SetNumberLimit(int64_t number_limit) { impl_->SetNumberLimit(number_limit); } -void ScanDescriptor::SetAsync(bool async) { - impl_->SetAsync(async); -} - -bool ScanDescriptor::IsAsync() const { - return impl_->IsAsync(); -} +int64_t ScanDescriptor::GetNumberLimit() { return impl_->GetNumberLimit(); } -ScanDescImpl* ScanDescriptor::GetImpl() const { - return impl_; -} +ScanDescImpl* ScanDescriptor::GetImpl() const { return impl_; } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/scan_impl.cc b/src/sdk/scan_impl.cc index 0bc9733e3..d74437461 100644 --- a/src/sdk/scan_impl.cc +++ b/src/sdk/scan_impl.cc @@ -6,6 +6,7 @@ #include #include +#include #include "common/this_thread.h" #include "common/base/string_ext.h" @@ -18,35 +19,51 @@ #include "common/atomic.h" #include "common/timer.h" -DECLARE_bool(tera_sdk_batch_scan_enabled); DECLARE_int64(tera_sdk_scan_number_limit); DECLARE_int64(tera_sdk_scan_buffer_size); DECLARE_int32(tera_sdk_max_batch_scan_req); -DECLARE_int32(tera_sdk_batch_scan_max_retry); -DECLARE_int32(tera_sdk_sync_scan_max_retry); +DECLARE_int32(tera_sdk_scan_max_retry); DECLARE_int64(tera_sdk_scan_timeout); -DECLARE_int64(batch_scan_delay_retry_in_us); -DECLARE_int64(sync_scan_delay_retry_in_ms); +DECLARE_int64(tera_sdk_scan_delay_retry_in_us); DECLARE_bool(debug_tera_sdk_scan); namespace tera { -ResultStreamImpl::ResultStreamImpl(TableImpl* table, - ScanDescImpl* scan_desc_impl) - : scan_desc_impl_(new ScanDescImpl(*scan_desc_impl)), - table_ptr_(table) { +ResultStreamImpl::ResultStreamImpl(TableImpl* table, ScanDescImpl* scan_desc_impl) + : cv_(&mu_), + scan_desc_impl_(new ScanDescImpl(*scan_desc_impl)), + table_ptr_(table), + session_retry_(0), + ref_count_(1), + data_size_(0), + row_count_(0), + last_key_(""), + canceled_(false) { + // do something startup + sliding_window_.resize(FLAGS_tera_sdk_max_batch_scan_req); + session_end_key_ = scan_desc_impl_->GetStartRowKey(); + slot_last_key_.set_key(session_end_key_); + slot_last_key_.set_timestamp(INT64_MAX); + mu_.Lock(); + ScanSessionReset(true); + mu_.Unlock(); } ResultStreamImpl::~ResultStreamImpl() { - if (scan_desc_impl_ != NULL) { - delete scan_desc_impl_; - } + // do something cleanup + MutexLock mutex(&mu_); + if (scan_desc_impl_ != NULL) { + delete scan_desc_impl_; + } + ref_count_--; + SCAN_LOG << "before wait scan task finsh, ref_count " << ref_count_; + while (ref_count_ != 0) { + cv_.Wait(); + } } -ScanDescImpl* ResultStreamImpl::GetScanDesc() { - return scan_desc_impl_; -} +ScanDescImpl* ResultStreamImpl::GetScanDesc() { return scan_desc_impl_; } /* * scan的时候,tabletnode攒满一个buffer就会返回给sdk,sdk再接着scan, @@ -65,53 +82,35 @@ ScanDescImpl* ResultStreamImpl::GetScanDesc() { * 否则,加'\x0'(也就是'\0')。 */ std::string ResultStreamImpl::GetNextStartPoint(const std::string& str) { - const static std::string x0("\x0", 1); - const static std::string x1("\x1"); - RawKey rawkey_type = table_ptr_->GetTableSchema().raw_key(); - return rawkey_type == Readable ? str + x1 : str + x0; + const static std::string x0("\x0", 1); + const static std::string x1("\x1"); + RawKey rawkey_type = table_ptr_->GetTableSchema().raw_key(); + return rawkey_type == Readable ? str + x1 : str + x0; } -/////////////////////////////////////// -///// high performance scan ///// -/////////////////////////////////////// -ResultStreamBatchImpl::ResultStreamBatchImpl(TableImpl* table, ScanDescImpl* scan_desc) - : ResultStreamImpl(table, scan_desc), - cv_(&mu_), session_retry_(0), ref_count_(1) { - // do something startup - sliding_window_.resize(FLAGS_tera_sdk_max_batch_scan_req); - session_end_key_ = scan_desc_impl_->GetStartRowKey(); - slot_last_key_.set_key(session_end_key_); - slot_last_key_.set_timestamp(INT64_MAX); +void ResultStreamImpl::GetRpcHandle(ScanTabletRequest** request_ptr, + ScanTabletResponse** response_ptr) { + *request_ptr = new ScanTabletRequest; + *response_ptr = new ScanTabletResponse; - mu_.Lock(); - ScanSessionReset(true); - mu_.Unlock(); -} - -void ResultStreamBatchImpl::GetRpcHandle(ScanTabletRequest** request_ptr, - ScanTabletResponse** response_ptr) { - *request_ptr = new ScanTabletRequest; - *response_ptr = new ScanTabletResponse; - - MutexLock mutex(&mu_); - (*request_ptr)->set_part_of_session(part_of_session_); - (*request_ptr)->set_session_id((int64_t)session_id_); - SCAN_LOG << "get rpc handle, part_of_session_ " << part_of_session_ - << ", response " << (uint64_t)(*response_ptr); + MutexLock mutex(&mu_); + (*request_ptr)->set_part_of_session(part_of_session_); + (*request_ptr)->set_session_id((int64_t)session_id_); + SCAN_LOG << "get rpc handle, part_of_session_ " << part_of_session_ << ", response " + << (uint64_t)(*response_ptr); } // insure table_impl no more use scan_impl -void ResultStreamBatchImpl::ReleaseRpcHandle(ScanTabletRequest* request, - ScanTabletResponse* response) { - delete request; - uint64_t response_ptr = (uint64_t)(response); - delete response; - - MutexLock mutex(&mu_); - ref_count_--; - SCAN_LOG << "release rpc handle and wakeup, ref_count_ " << ref_count_ - << ", response " << response_ptr; - cv_.Signal(); +void ResultStreamImpl::ReleaseRpcHandle(ScanTabletRequest* request, ScanTabletResponse* response) { + delete request; + uint64_t response_ptr = (uint64_t)(response); + delete response; + + MutexLock mutex(&mu_); + ref_count_--; + SCAN_LOG << "release rpc handle and wakeup, ref_count_ " << ref_count_ << ", response " + << response_ptr; + cv_.Signal(); } // scan request callback trigger: @@ -121,477 +120,304 @@ void ResultStreamBatchImpl::ReleaseRpcHandle(ScanTabletRequest* request, // 2. handle session broken: // 2.1. stop scan, and report error to user // 3. scan success, notify user to consume result -void ResultStreamBatchImpl::OnFinish(ScanTabletRequest* request, - ScanTabletResponse* response) { - MutexLock mutex(&mu_); - // check session id - if (request->session_id() != (int64_t)session_id_) { - SCAN_LOG << "[OnFinish]session_id not match, request session id" << request->session_id(); - } else if (response->status() != kTabletNodeOk) { - // rpc or ts error, session broken and report error - session_error_ = response->status(); - SCAN_WLOG << "[OnFinish]broken error " << StatusCodeToString(session_error_); - session_done_ = true; - } else if ((response->results_id() == 0) && - (response->results().key_values_size() == 0) && - request->part_of_session()) { - // handle old ts, results_id not init - SCAN_WLOG << "[OnFinish]batch scan old ts"; - } else if ((response->results_id() < session_data_idx_) || - (response->results_id() >= session_data_idx_ + - FLAGS_tera_sdk_max_batch_scan_req)) { - if (response->results_id() != std::numeric_limits::max()) { - SCAN_WLOG << "[OnFinish]session_data_idx " << session_data_idx_ - << ", stale result_id " << response->results_id() - << ", response " << (uint64_t)response; - session_done_ = true; - // TODO: ts state no known - session_error_ = kRPCTimeout; - } - } else { // scan success, cache result - int32_t slot_idx = ((response->results_id() - session_data_idx_) - + sliding_window_idx_) % FLAGS_tera_sdk_max_batch_scan_req; - SCAN_LOG << "[OnFinish]scan suc, slot_idx " << slot_idx << ", result_id " << response->results_id() - << ", session_data_idx_ " << session_data_idx_ - << ", sliding_window_idx_ " << sliding_window_idx_ - << ", resp.kv.size() " << response->results().key_values_size(); - ScanSlot* slot = &(sliding_window_[slot_idx]); - if (slot->state_ == SCANSLOT_INVALID) { - slot->state_ = SCANSLOT_VALID; - slot->cell_.CopyFrom(response->results()); - SCAN_LOG << "[OnFinish]cache scan result, slot_idx " << slot_idx - << ", kv.size() " << slot->cell_.key_values_size() - << ", resp.kv.size() " << response->results().key_values_size(); - } - if (response->complete()) { - session_last_idx_ = (session_last_idx_ > response->results_id()) ? - response->results_id(): session_last_idx_; - session_end_key_ = response->end(); - SCAN_LOG << "[OnFinish]scan complete: session_end_key " << session_end_key_ - << ", session_last_idx " << session_last_idx_; - session_done_ = true; - } - } - return; -} - -ResultStreamBatchImpl::~ResultStreamBatchImpl() { - // do something cleanup - MutexLock mutex(&mu_); - ref_count_--; - SCAN_LOG << "before wait scan task finsh, ref_count " << ref_count_; - while (ref_count_ != 0) { cv_.Wait();} -} - -void ResultStreamBatchImpl::ComputeStartKey(const KeyValuePair& kv, KeyValuePair* start_key) { - if (scan_desc_impl_->IsKvOnlyTable()) { // kv, set next key - start_key->set_key(GetNextStartPoint(kv.key())); - start_key->set_column_family(kv.column_family()); - start_key->set_qualifier(kv.qualifier()); - start_key->set_timestamp(kv.timestamp()); - } else if (kv.timestamp() == 0) { // table timestamp == 0 - start_key->set_key(kv.key()); - start_key->set_column_family(kv.column_family()); - start_key->set_qualifier(GetNextStartPoint(kv.qualifier())); - start_key->set_timestamp(INT64_MAX); - } else { // table has timestamp > 0 - start_key->set_key(kv.key()); - start_key->set_column_family(kv.column_family()); - start_key->set_qualifier(kv.qualifier()); - start_key->set_timestamp(kv.timestamp() - 1); - } - return; -} - -void ResultStreamBatchImpl::ScanSessionReset(bool reset_retry) { - mu_.AssertHeld(); - // reset session parameter - uint64_t pre_session_id = session_id_; - StatusCode pre_session_error = session_error_; - bool pre_session_done = session_done_; - std::string pre_session_end_key = session_end_key_; - if (reset_retry) { - session_retry_ = 0; - } - uint64_t tid = (uint64_t)pthread_self(); - session_id_ = ((tid << 48) | ((uint64_t)get_micros())) & (0x7ffffffffffff); - session_done_ = false; - session_error_ = kTabletNodeOk; - part_of_session_ = false; - session_data_idx_ = 0; - session_last_idx_ = UINT32_MAX; - sliding_window_idx_= 0; - next_idx_ = 0; - - // set all slot invalid - std::vector::iterator it = sliding_window_.begin(); - for (; it != sliding_window_.end(); ++it) { - it->state_ = SCANSLOT_INVALID; - it->cell_.Clear(); - } - - ref_count_ += FLAGS_tera_sdk_max_batch_scan_req; - KeyValuePair start_key; - ComputeStartKey(slot_last_key_, &start_key); - scan_desc_impl_->SetStart(start_key.key(), start_key.column_family(), - start_key.qualifier(), start_key.timestamp()); - SCAN_LOG << "session reset [start key " << start_key.key() - << ", session_retry " << session_retry_ - << ", ref_count " << ref_count_ - << "], previous session info [session_id " << pre_session_id - << ", session_error " << StatusCodeToString(pre_session_error) - << ", session_done " << pre_session_done - << ", session_end_key " << pre_session_end_key << "]"; +void ResultStreamImpl::OnFinish(ScanTabletRequest* request, ScanTabletResponse* response) { + MutexLock mutex(&mu_); + // check session id + if (request->session_id() != (int64_t)session_id_) { + SCAN_LOG << "[OnFinish]session_id not match, request session id" << request->session_id(); + } else if (response->status() != kTabletNodeOk || + (response->status() == kTabletNodeOk && response->status() == kNotPermission)) { + // rpc or ts error, session broken and report error + session_error_ = response->status(); + SCAN_WLOG << "[OnFinish]broken error " << StatusCodeToString(session_error_); + session_done_ = true; + } else if (response->results_id() == 0 && response->results().key_values_size() == 0 && + request->part_of_session()) { + // handle old ts, results_id not init + SCAN_WLOG << "[OnFinish]batch scan old ts"; + } else if ((response->results_id() < session_data_idx_) || + (response->results_id() >= session_data_idx_ + FLAGS_tera_sdk_max_batch_scan_req)) { + if (response->results_id() != std::numeric_limits::max()) { + SCAN_WLOG << "[OnFinish]session_data_idx " << session_data_idx_ << ", stale result_id " + << response->results_id() << ", response " << (uint64_t)response; + session_done_ = true; + // TODO: ts state no known + session_error_ = kRPCTimeout; + } + } else { // scan success, cache result + int32_t slot_idx = ((response->results_id() - session_data_idx_) + sliding_window_idx_) % + FLAGS_tera_sdk_max_batch_scan_req; + SCAN_LOG << "[OnFinish]scan suc, slot_idx " << slot_idx << ", result_id " + << response->results_id() << ", session_data_idx_ " << session_data_idx_ + << ", sliding_window_idx_ " << sliding_window_idx_ << ", resp.kv.size() " + << response->results().key_values_size() << ", data_size " << response->data_size() + << ", row_count " << response->row_count(); + UpdateRowCount(response->row_count()); + UpdateDataSize(response->data_size()); + ScanSlot* slot = &(sliding_window_[slot_idx]); + if (slot->state_ == SCANSLOT_INVALID) { + slot->state_ = SCANSLOT_VALID; + slot->cell_.CopyFrom(response->results()); + if (slot->cell_.key_values_size() > 0) { + UpdateLastKey(slot->cell_.key_values(slot->cell_.key_values_size() - 1)); + } + SCAN_LOG << "[OnFinish]cache scan result, slot_idx " << slot_idx << ", kv.size() " + << slot->cell_.key_values_size() << ", resp.kv.size() " + << response->results().key_values_size(); + } + if (response->complete()) { + session_last_idx_ = + (session_last_idx_ > response->results_id()) ? response->results_id() : session_last_idx_; + session_end_key_ = response->end(); + SCAN_LOG << "[OnFinish]scan complete: session_end_key " << session_end_key_ + << ", session_last_idx " << session_last_idx_; + session_done_ = true; + } + } + return; +} + +void ResultStreamImpl::ComputeStartKey(const KeyValuePair& kv, KeyValuePair* start_key) { + if (scan_desc_impl_->IsKvOnlyTable()) { // kv, set next key + start_key->set_key(GetNextStartPoint(kv.key())); + start_key->set_column_family(kv.column_family()); + start_key->set_qualifier(kv.qualifier()); + start_key->set_timestamp(kv.timestamp()); + } else if (kv.timestamp() == 0) { // table timestamp == 0 + start_key->set_key(kv.key()); + start_key->set_column_family(kv.column_family()); + start_key->set_qualifier(GetNextStartPoint(kv.qualifier())); + start_key->set_timestamp(INT64_MAX); + } else { // table has timestamp > 0 + start_key->set_key(kv.key()); + start_key->set_column_family(kv.column_family()); + start_key->set_qualifier(kv.qualifier()); + start_key->set_timestamp(kv.timestamp() - 1); + } + return; +} + +void ResultStreamImpl::ScanSessionReset(bool reset_retry) { + mu_.AssertHeld(); + // reset session parameter + uint64_t pre_session_id = session_id_; + StatusCode pre_session_error = session_error_; + bool pre_session_done = session_done_; + std::string pre_session_end_key = session_end_key_; + if (reset_retry) { + session_retry_ = 0; + } + uint64_t tid = (uint64_t)pthread_self(); + session_id_ = ((tid << 48) | ((uint64_t)get_micros())) & (0x7ffffffffffff); + session_done_ = false; + session_error_ = kTabletNodeOk; + part_of_session_ = false; + session_data_idx_ = 0; + session_last_idx_ = UINT32_MAX; + sliding_window_idx_ = 0; + next_idx_ = 0; + + // set all slot invalid + std::vector::iterator it = sliding_window_.begin(); + for (; it != sliding_window_.end(); ++it) { + it->state_ = SCANSLOT_INVALID; + it->cell_.Clear(); + } + + ref_count_ += FLAGS_tera_sdk_max_batch_scan_req; + KeyValuePair start_key; + ComputeStartKey(slot_last_key_, &start_key); + scan_desc_impl_->SetStart(start_key.key(), start_key.column_family(), start_key.qualifier(), + start_key.timestamp()); + SCAN_LOG << "session reset [start key " << start_key.key() << ", session_retry " << session_retry_ + << ", ref_count " << ref_count_ << "], previous session info [session_id " + << pre_session_id << ", session_error " << StatusCodeToString(pre_session_error) + << ", session_done " << pre_session_done << ", session_end_key " << pre_session_end_key + << "]"; + mu_.Unlock(); + // do io, release lock + for (int32_t i = 0; i < FLAGS_tera_sdk_max_batch_scan_req; i++) { + table_ptr_->ScanTabletAsync(this); + part_of_session_ = true; + } + mu_.Lock(); +} + +void ResultStreamImpl::ClearAndScanNextSlot(bool scan_next) { + mu_.AssertHeld(); + ScanSlot* slot = &(sliding_window_[sliding_window_idx_]); + assert(next_idx_ == slot->cell_.key_values_size()); + if (next_idx_ > 0) { // update last slot kv_pair + slot_last_key_.CopyFrom(slot->cell_.key_values(next_idx_ - 1)); + } + slot->cell_.Clear(); + slot->state_ = SCANSLOT_INVALID; + next_idx_ = 0; + session_data_idx_++; + sliding_window_idx_ = (sliding_window_idx_ + 1) % FLAGS_tera_sdk_max_batch_scan_req; + SCAN_LOG << " session_data_idx_ " << session_data_idx_ << ", sliding_window_idx_ " + << sliding_window_idx_ << ", ref_count_ " << ref_count_; + if (scan_next) { + ref_count_++; mu_.Unlock(); - // do io, release lock - for (int32_t i = 0; i < FLAGS_tera_sdk_max_batch_scan_req; i++) { - table_ptr_->ScanTabletAsync(this); - part_of_session_ = true; - } + table_ptr_->ScanTabletAsync(this); mu_.Lock(); -} - -void ResultStreamBatchImpl::ClearAndScanNextSlot(bool scan_next) { - mu_.AssertHeld(); + } + return; +} + +bool ResultStreamImpl::Done(ErrorCode* error) { + if (error) { + error->SetFailed(ErrorCode::kOK); + } + MutexLock mutex(&mu_); + while (1) { + if (canceled_) { + LOG(INFO) << "This scan is cancelled.\n"; + return true; + } + // not wait condition: + // 1. current slot valid, or + // 2. ts not available, or + // 3. rpc not available, or ScanSlot* slot = &(sliding_window_[sliding_window_idx_]); - assert(next_idx_ == slot->cell_.key_values_size()); - if (next_idx_ > 0) { // update last slot kv_pair - slot_last_key_.CopyFrom(slot->cell_.key_values(next_idx_ - 1)); - } - slot->cell_.Clear(); - slot->state_ = SCANSLOT_INVALID; - next_idx_ = 0; - session_data_idx_++; - sliding_window_idx_ = (sliding_window_idx_ + 1) % FLAGS_tera_sdk_max_batch_scan_req; - SCAN_LOG << " session_data_idx_ " << session_data_idx_ - << ", sliding_window_idx_ " << sliding_window_idx_ - << ", ref_count_ " << ref_count_; - if (scan_next) { - ref_count_++; - mu_.Unlock(); - table_ptr_->ScanTabletAsync(this); - mu_.Lock(); - } - return; -} - -bool ResultStreamBatchImpl::Done(ErrorCode* error) { - if (error) { - error->SetFailed(ErrorCode::kOK); - } - MutexLock mutex(&mu_); - while (1) { - // not wait condition: - // 1. current slot valid, or - // 2. ts not available, or - // 3. rpc not available, or - ScanSlot* slot = &(sliding_window_[sliding_window_idx_]); - while (slot->state_ == SCANSLOT_INVALID) { - // stale results_id, re-enable another scan req - if (session_error_ != kTabletNodeOk) { - // TODO: kKeyNotInRange, do reset session - SCAN_LOG << "[RETRY " << ++session_retry_ << "] scan session error: " - << StatusCodeToString(session_error_) - << ", session_end_key " << session_end_key_ - << ", data_idx " << session_data_idx_ << ", slice_idx " << sliding_window_idx_; - assert(session_done_); - if (session_retry_ <= FLAGS_tera_sdk_batch_scan_max_retry) { - break; - } - - // give up scan, report session error - if (error) { - error->SetFailed(ErrorCode::kSystem, StatusCodeToString(session_error_)); - } - return true; - } - if (ref_count_ == 1) { - // check wether ts refuse scan - if (error) { - error->SetFailed(ErrorCode::kSystem, StatusCodeToString(session_error_)); - } - LOG(WARNING) << "[CHECK]: ts refuse scan, scan later.\n"; - return true; - } - cv_.Wait(); - } - if (slot->state_ == SCANSLOT_INVALID) { // TODO: error break, maybe delay retry - while (ref_count_ > 1) { cv_.Wait();} - cv_.TimeWaitInUs(FLAGS_batch_scan_delay_retry_in_us, "BatchScanRetryTimeWait"); - ScanSessionReset(false); - continue; - } - - // slot valid - if (next_idx_ < slot->cell_.key_values_size()) { break; } - - SCAN_LOG << "session_done_ " << session_done_ - << ", session_data_idx_ " << session_data_idx_ - << ", session_last_idx_ " << session_last_idx_; - // current slot finish and session not finish, scan next slot - if (!session_done_) { - ClearAndScanNextSlot(true); - continue; + while (slot->state_ == SCANSLOT_INVALID) { + // stale results_id, re-enable another scan req + if (session_error_ != kTabletNodeOk) { + // TODO: kKeyNotInRange, do reset session + SCAN_LOG << "[RETRY " << ++session_retry_ + << "] scan session error: " << StatusCodeToString(session_error_) + << ", session_end_key " << session_end_key_ << ", data_idx " << session_data_idx_ + << ", slice_idx " << sliding_window_idx_; + assert(session_done_); + if (session_retry_ <= FLAGS_tera_sdk_scan_max_retry) { + break; } - // session finish, read rest data - if (session_data_idx_ != session_last_idx_) { - ClearAndScanNextSlot(false); - continue; + // give up scan, report session error + if (error) { + error->SetFailed(ErrorCode::kSystem, StatusCodeToString(session_error_)); } - - // scan finish, exit - const string& scan_end_key = scan_desc_impl_->GetEndRowKey(); - if (session_end_key_ == "" || (scan_end_key != "" && session_end_key_ >= scan_end_key)) { - SCAN_LOG << "scan done, scan_end_key " << scan_end_key - << ", session_end_key " << session_end_key_; - return true; + return true; + } + if (ref_count_ == 1) { + // check wether ts refuse scan + if (error) { + error->SetFailed(ErrorCode::kSystem, StatusCodeToString(session_error_)); } - - // scan next tablet - slot_last_key_.set_key(session_end_key_); - slot_last_key_.set_timestamp(INT64_MAX); - ScanSessionReset(true); + LOG(WARNING) << "[CHECK]: ts refuse scan, scan later.\n"; + return true; + } + cv_.Wait(); } - return false; -} - -void ResultStreamBatchImpl::Next() { next_idx_++; } -bool ResultStreamBatchImpl::LookUp(const std::string& row_key) { return true;} -std::string ResultStreamBatchImpl::RowName() const { - const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); - return row.has_key() ? row.key(): ""; -} -std::string ResultStreamBatchImpl::Family() const { - const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); - return row.has_column_family() ? row.column_family(): ""; -} -std::string ResultStreamBatchImpl::Qualifier() const { - const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); - return row.has_qualifier() ? row.qualifier(): ""; -} -std::string ResultStreamBatchImpl::ColumnName() const { - const std::string& cf = Family(); - const std::string& qu = Qualifier(); - return cf + ":" + qu; -} -int64_t ResultStreamBatchImpl::Timestamp() const { - const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); - return row.has_timestamp() ? row.timestamp(): 0; -} -std::string ResultStreamBatchImpl::Value() const { - const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); - return row.has_value() ? row.value(): ""; -} -int64_t ResultStreamBatchImpl::ValueInt64() const { - std::string v = Value(); - return (v.size() == sizeof(int64_t)) ? *(int64_t*)v.c_str() : 0; -} - -ResultStreamSyncImpl::ResultStreamSyncImpl(TableImpl* table, - ScanDescImpl* scan_desc_impl) - : ResultStreamImpl(table, scan_desc_impl), - response_(new tera::ScanTabletResponse), - result_pos_(0), - finish_cond_(&finish_mutex_), - retry_times_(0), - finish_(false) { - table_ptr_->ScanTabletSync(this); -} - -ResultStreamSyncImpl::~ResultStreamSyncImpl() { - if (response_ != NULL) { - delete response_; + if (slot->state_ == SCANSLOT_INVALID) { // TODO: error break, maybe delay retry + while (ref_count_ > 1) { + cv_.Wait(); + } + cv_.TimeWaitInUs(FLAGS_tera_sdk_scan_delay_retry_in_us, "ScanRetryTimeWait"); + ScanSessionReset(false); + continue; } -} - -bool ResultStreamSyncImpl::LookUp(const string& row_key) { - return true; -} -bool ResultStreamSyncImpl::Done(ErrorCode* err) { - while (1) { - const string& scan_end_key = scan_desc_impl_->GetEndRowKey(); - /// scan failed - while (response_->status() != kTabletNodeOk && - retry_times_ <= FLAGS_tera_sdk_sync_scan_max_retry) { - LOG(WARNING) << "[RETRY " << ++retry_times_ << "] scan error: " - << StatusCodeToString(response_->status()); - - int64_t wait_time; - if(response_->status() == kKeyNotInRange) { - wait_time = FLAGS_sync_scan_delay_retry_in_ms; - } else { - /// Wait less than 60 seconds - wait_time = std::min(static_cast(FLAGS_sync_scan_delay_retry_in_ms * (1 << (retry_times_ - 1))), - static_cast(60000)); - } - - delete response_; - response_ = new tera::ScanTabletResponse; - result_pos_ = 0; - Reset(); - - ThisThread::Sleep(wait_time); - table_ptr_->ScanTabletSync(this); - } - - if(response_->status() != kTabletNodeOk) { - if (err) { - err->SetFailed(ErrorCode::kSystem, - StatusCodeToString(response_->status())); - } - return true; - } - - if (result_pos_ < response_->results().key_values_size()) { - break; - } - const string& tablet_end_key = response_->end(); - if (response_->complete() && - (tablet_end_key == "" || (scan_end_key != "" && tablet_end_key >= scan_end_key))) { - if (err) { - err->SetFailed(ErrorCode::kOK); - } - return true; - } - - // Newer version of TS will return next_start_point when the opration is timeout - if (!response_->complete()) { - // Without next_start_point, kv is the last kv pair from last scan - if (response_->next_start_point().key() == "") { - const KeyValuePair& kv = response_->results().key_values(result_pos_ - 1); - if (scan_desc_impl_->IsKvOnlyTable()) { - scan_desc_impl_->SetStart(GetNextStartPoint(kv.key()), kv.column_family(), - kv.qualifier(), kv.timestamp()); - } else if (kv.timestamp() == 0) { - scan_desc_impl_->SetStart(kv.key(), kv.column_family(), - GetNextStartPoint(kv.qualifier()), INT64_MAX); - } else { - scan_desc_impl_->SetStart(kv.key(), kv.column_family(), - kv.qualifier(), kv.timestamp() - 1); - } - // next_start_point is where the next scan should start - } else { - const KeyValuePair& kv = response_->next_start_point(); - scan_desc_impl_->SetStart(kv.key(), kv.column_family(), - kv.qualifier(), kv.timestamp()); - } - } else { - scan_desc_impl_->SetStart(tablet_end_key); - } - result_pos_ = 0; - delete response_; - response_ = new tera::ScanTabletResponse; - Reset(); - table_ptr_->ScanTabletSync(this); + // slot valid, break here to read current slot continue + if (next_idx_ < slot->cell_.key_values_size()) { + break; } - return false; -} - -void ResultStreamSyncImpl::Next() { - ++result_pos_; -} - -string ResultStreamSyncImpl::RowName() const { - return response_->results().key_values(result_pos_).key(); -} -string ResultStreamSyncImpl::ColumnName() const { - if (response_->results().key_values(result_pos_).has_column_family()) { - const string& family = response_->results().key_values(result_pos_).column_family(); - if (response_->results().key_values(result_pos_).has_qualifier()) { - return (family + ":" + response_->results().key_values(result_pos_).qualifier()); - } - return family; + SCAN_LOG << "session_done_ " << session_done_ << ", session_data_idx_ " << session_data_idx_ + << ", session_last_idx_ " << session_last_idx_; + // current slot finish and session not finish, scan next slot + if (!session_done_) { + ClearAndScanNextSlot(true); + continue; } - return ""; -} -string ResultStreamSyncImpl::Family() const { - if (response_->results().key_values(result_pos_).has_column_family()) { - return response_->results().key_values(result_pos_).column_family(); + // session finish, read rest data + if (session_data_idx_ != session_last_idx_) { + ClearAndScanNextSlot(false); + continue; } - return ""; -} -string ResultStreamSyncImpl::Qualifier() const { - if (response_->results().key_values(result_pos_).has_qualifier()) { - return response_->results().key_values(result_pos_).qualifier(); + // scan finish, exit + const string& scan_end_key = scan_desc_impl_->GetEndRowKey(); + if (session_end_key_ == "" || (scan_end_key != "" && session_end_key_ >= scan_end_key)) { + SCAN_LOG << "scan done, scan_end_key " << scan_end_key << ", session_end_key " + << session_end_key_; + return true; } - return ""; -} -int64_t ResultStreamSyncImpl::Timestamp() const { - if (response_->results().key_values(result_pos_).has_timestamp()) { - return response_->results().key_values(result_pos_).timestamp(); - } - return 0; + // scan next tablet + slot_last_key_.set_key(session_end_key_); + slot_last_key_.set_timestamp(INT64_MAX); + ScanSessionReset(true); + } + return false; } -string ResultStreamSyncImpl::Value() const { - if (response_->results().key_values(result_pos_).has_value()) { - return response_->results().key_values(result_pos_).value(); - } +void ResultStreamImpl::Next() { + assert(!canceled_); + next_idx_++; +} +bool ResultStreamImpl::LookUp(const std::string& row_key) { return true; } +std::string ResultStreamImpl::RowName() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + if (!row.has_key()) { return ""; + } + return row.key(); } - -int64_t ResultStreamSyncImpl::ValueInt64() const { - std::string v; - if (response_->results().key_values(result_pos_).has_value()) { - v = response_->results().key_values(result_pos_).value(); - } - return (v.size() == sizeof(int64_t)) ? *(int64_t*)v.c_str() : 0; +std::string ResultStreamImpl::Family() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + return row.has_column_family() ? row.column_family() : ""; } - -void ResultStreamSyncImpl::GetRpcHandle(ScanTabletRequest** request, - ScanTabletResponse** response) { - *request = new ScanTabletRequest; - *response = response_; +std::string ResultStreamImpl::Qualifier() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + return row.has_qualifier() ? row.qualifier() : ""; } - -void ResultStreamSyncImpl::ReleaseRpcHandle(ScanTabletRequest* request, - ScanTabletResponse* response) { - delete request; - Signal(); +std::string ResultStreamImpl::ColumnName() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + if (!row.has_column_family() && !row.has_qualifier()) { + return ""; + } else { + const std::string& cf = Family(); + const std::string& qu = Qualifier(); + return cf + ":" + qu; + } } - -void ResultStreamSyncImpl::OnFinish(ScanTabletRequest* request, - ScanTabletResponse* response) { +int64_t ResultStreamImpl::Timestamp() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + return row.has_timestamp() ? row.timestamp() : 0; } - -void ResultStreamSyncImpl::Wait() { - MutexLock locker(&finish_mutex_); - while (!finish_) { - finish_cond_.Wait(); - } +std::string ResultStreamImpl::Value() const { + const KeyValuePair& row = sliding_window_[sliding_window_idx_].cell_.key_values(next_idx_); + return row.has_value() ? row.value() : ""; } - -void ResultStreamSyncImpl::Signal() { - MutexLock locker(&finish_mutex_); - finish_ = true; - finish_cond_.Signal(); +int64_t ResultStreamImpl::ValueInt64() const { + std::string v = Value(); + return (v.size() == sizeof(int64_t)) ? *(int64_t*)v.c_str() : 0; } +// scan query and cancel +uint64_t ResultStreamImpl::GetDataSize() const { return data_size_; } +uint64_t ResultStreamImpl::GetRowCount() const { return row_count_; } +std::string ResultStreamImpl::GetLastKey() const { return last_key_; } +void ResultStreamImpl::Cancel() { canceled_ = true; } -void ResultStreamSyncImpl::Reset() { - MutexLock locker(&finish_mutex_); - finish_ = false; -} +void ResultStreamImpl::UpdateRowCount(uint32_t row_count) { row_count_ += row_count; } +void ResultStreamImpl::UpdateDataSize(uint32_t data_size) { data_size_ += data_size; } +void ResultStreamImpl::UpdateLastKey(const KeyValuePair& kv) { last_key_ = kv.key(); } ///////////////////////// ScanDescImpl /////////////////////// - ScanDescImpl::ScanDescImpl(const string& rowkey) : start_timestamp_(0), timer_range_(NULL), buf_size_(FLAGS_tera_sdk_scan_buffer_size), number_limit_(FLAGS_tera_sdk_scan_number_limit), - is_async_(FLAGS_tera_sdk_batch_scan_enabled), max_version_(1), max_qualifiers_(std::numeric_limits::max()), - pack_interval_(FLAGS_tera_sdk_scan_timeout), + scan_slot_timeout_(FLAGS_tera_sdk_scan_timeout), snapshot_(0), - value_converter_(&DefaultValueConverter) { - SetStart(rowkey); + filter_desc_(NULL) { + SetStart(rowkey); } ScanDescImpl::ScanDescImpl(const ScanDescImpl& impl) @@ -602,343 +428,147 @@ ScanDescImpl::ScanDescImpl(const ScanDescImpl& impl) start_timestamp_(impl.start_timestamp_), buf_size_(impl.buf_size_), number_limit_(impl.number_limit_), - is_async_(impl.is_async_), max_version_(impl.max_version_), max_qualifiers_(impl.max_qualifiers_), - pack_interval_(impl.pack_interval_), + scan_slot_timeout_(impl.scan_slot_timeout_), snapshot_(impl.snapshot_), table_schema_(impl.table_schema_) { - value_converter_ = impl.GetValueConverter(); - filter_string_ = impl.GetFilterString(); - filter_list_ = impl.GetFilterList(); - if (impl.GetTimerRange() != NULL) { - timer_range_ = new tera::TimeRange; - timer_range_->CopyFrom(*(impl.GetTimerRange())); - } else { - timer_range_ = NULL; - } - for (int32_t i = 0; i < impl.GetSizeofColumnFamilyList(); ++i) { - cf_list_.push_back(new tera::ColumnFamily(*(impl.GetColumnFamily(i)))); - } + if (impl.GetFilterDesc()) { + filter_desc_ = new filter::FilterDesc(); + filter_desc_->CopyFrom(*(impl.GetFilterDesc())); + } else { + filter_desc_ = NULL; + } + if (impl.GetTimerRange() != NULL) { + timer_range_ = new tera::TimeRange; + timer_range_->CopyFrom(*(impl.GetTimerRange())); + } else { + timer_range_ = NULL; + } + for (int32_t i = 0; i < impl.GetSizeofColumnFamilyList(); ++i) { + cf_list_.push_back(new tera::ColumnFamily(*(impl.GetColumnFamily(i)))); + } } ScanDescImpl::~ScanDescImpl() { - if (timer_range_ != NULL) { - delete timer_range_; - } - for (uint32_t i = 0; i < cf_list_.size(); ++i) { - delete cf_list_[i]; - } + if (timer_range_ != NULL) { + delete timer_range_; + } + for (uint32_t i = 0; i < cf_list_.size(); ++i) { + delete cf_list_[i]; + } + if (filter_desc_) { + delete filter_desc_; + } } void ScanDescImpl::SetStart(const string& row_key, const string& column_family, - const string& qualifier, int64_t time_stamp) -{ - start_key_ = row_key; - start_column_family_ = column_family; - start_qualifier_ = qualifier; - start_timestamp_ = time_stamp; + const string& qualifier, int64_t time_stamp) { + start_key_ = row_key; + start_column_family_ = column_family; + start_qualifier_ = qualifier; + start_timestamp_ = time_stamp; } -void ScanDescImpl::SetEnd(const string& rowkey) { - end_key_ = rowkey; -} +void ScanDescImpl::SetEnd(const string& rowkey) { end_key_ = rowkey; } -void ScanDescImpl::AddColumnFamily(const string& cf) { - AddColumn(cf, ""); -} +void ScanDescImpl::AddColumnFamily(const string& cf) { AddColumn(cf, ""); } void ScanDescImpl::AddColumn(const string& cf, const string& qualifier) { - for (uint32_t i = 0; i < cf_list_.size(); ++i) { - if (cf_list_[i]->family_name() == cf) { - if (qualifier != "") { - cf_list_[i]->add_qualifier_list(qualifier); - } - return; - } - } - tera::ColumnFamily* column_family = new tera::ColumnFamily; - column_family->set_family_name(cf); - if (qualifier != "") { - column_family->add_qualifier_list(qualifier); + for (uint32_t i = 0; i < cf_list_.size(); ++i) { + if (cf_list_[i]->family_name() == cf) { + if (qualifier != "") { + cf_list_[i]->add_qualifier_list(qualifier); + } + return; } - cf_list_.push_back(column_family); + } + tera::ColumnFamily* column_family = new tera::ColumnFamily; + column_family->set_family_name(cf); + if (qualifier != "") { + column_family->add_qualifier_list(qualifier); + } + cf_list_.push_back(column_family); } -void ScanDescImpl::SetMaxVersions(int32_t versions) { - max_version_ = versions; -} +void ScanDescImpl::SetMaxVersions(int32_t versions) { max_version_ = versions; } -void ScanDescImpl::SetMaxQualifiers(int64_t max_qualifiers) { - max_qualifiers_ = max_qualifiers; -} +void ScanDescImpl::SetMaxQualifiers(int64_t max_qualifiers) { max_qualifiers_ = max_qualifiers; } -void ScanDescImpl::SetPackInterval(int64_t interval) { - pack_interval_ = interval; -} +void ScanDescImpl::SetPackInterval(int64_t timeout) { scan_slot_timeout_ = timeout; } void ScanDescImpl::SetTimeRange(int64_t ts_end, int64_t ts_start) { - if (timer_range_ == NULL) { - timer_range_ = new tera::TimeRange; - } - timer_range_->set_ts_start(ts_start); - timer_range_->set_ts_end(ts_end); + if (timer_range_ == NULL) { + timer_range_ = new tera::TimeRange; + } + timer_range_->set_ts_start(ts_start); + timer_range_->set_ts_end(ts_end); } -void ScanDescImpl::SetValueConverter(ValueConverter convertor) { - value_converter_ = convertor; -} +void ScanDescImpl::SetSnapshot(uint64_t snapshot_id) { snapshot_ = snapshot_id; } -void ScanDescImpl::SetSnapshot(uint64_t snapshot_id) { - snapshot_ = snapshot_id; -} +uint64_t ScanDescImpl::GetSnapshot() const { return snapshot_; } -uint64_t ScanDescImpl::GetSnapshot() const { - return snapshot_; -} +void ScanDescImpl::SetBufferSize(int64_t buf_size) { buf_size_ = buf_size; } -void ScanDescImpl::SetBufferSize(int64_t buf_size) { - buf_size_ = buf_size; -} +void ScanDescImpl::SetNumberLimit(int64_t number_limit) { number_limit_ = number_limit; } -void ScanDescImpl::SetNumberLimit(int64_t number_limit) { - number_limit_ = number_limit; -} +const string& ScanDescImpl::GetStartRowKey() const { return start_key_; } -void ScanDescImpl::SetAsync(bool async) { - is_async_ = async; -} +const string& ScanDescImpl::GetEndRowKey() const { return end_key_; } -const string& ScanDescImpl::GetStartRowKey() const { - return start_key_; -} +const string& ScanDescImpl::GetStartColumnFamily() const { return start_column_family_; } -const string& ScanDescImpl::GetEndRowKey() const { - return end_key_; -} +const string& ScanDescImpl::GetStartQualifier() const { return start_qualifier_; } -const string& ScanDescImpl::GetStartColumnFamily() const { - return start_column_family_; -} +int64_t ScanDescImpl::GetStartTimeStamp() const { return start_timestamp_; } -const string& ScanDescImpl::GetStartQualifier() const { - return start_qualifier_; -} - -int64_t ScanDescImpl::GetStartTimeStamp() const { - return start_timestamp_; -} - -int32_t ScanDescImpl::GetSizeofColumnFamilyList() const { - return cf_list_.size(); -} +int32_t ScanDescImpl::GetSizeofColumnFamilyList() const { return cf_list_.size(); } const tera::ColumnFamily* ScanDescImpl::GetColumnFamily(int32_t num) const { - if (static_cast(num) >= cf_list_.size()) { - return NULL; - } - return cf_list_[num]; -} - -int32_t ScanDescImpl::GetMaxVersion() const { - return max_version_; -} - -int64_t ScanDescImpl::GetMaxQualifiers() const { - return max_qualifiers_; + if (static_cast(num) >= cf_list_.size()) { + return NULL; + } + return cf_list_[num]; } -int64_t ScanDescImpl::GetPackInterval() const { - return pack_interval_; -} - -const tera::TimeRange* ScanDescImpl::GetTimerRange() const { - return timer_range_; -} +int32_t ScanDescImpl::GetMaxVersion() const { return max_version_; } -const string& ScanDescImpl::GetFilterString() const { - return filter_string_; -} +int64_t ScanDescImpl::GetMaxQualifiers() const { return max_qualifiers_; } -const FilterList& ScanDescImpl::GetFilterList() const { - return filter_list_; -} +int64_t ScanDescImpl::GetPackInterval() const { return scan_slot_timeout_; } -const ValueConverter ScanDescImpl::GetValueConverter() const { - return value_converter_; -} +const tera::TimeRange* ScanDescImpl::GetTimerRange() const { return timer_range_; } -int64_t ScanDescImpl::GetBufferSize() const { - return buf_size_; -} - -int64_t ScanDescImpl::GetNumberLimit() { - return number_limit_; -} - -bool ScanDescImpl::IsAsync() const { - return is_async_; -} +filter::FilterDesc* ScanDescImpl::GetFilterDesc() const { return filter_desc_; } -void ScanDescImpl::SetTableSchema(const TableSchema& schema) { - table_schema_ = schema; -} +int64_t ScanDescImpl::GetBufferSize() const { return buf_size_; } -bool ScanDescImpl::IsKvOnlyTable() { - return IsKvTable(table_schema_); -} +int64_t ScanDescImpl::GetNumberLimit() { return number_limit_; } -// SELECT * WHERE AND -bool ScanDescImpl::SetFilter(const std::string& schema) { - std::string select; - std::string where; - std::string::size_type pos; - if ((pos = schema.find("SELECT ")) != 0) { - LOG(ERROR) << "illegal scan expression: should be begin with \"SELECT\""; - return false; - } - if ((pos = schema.find(" WHERE ")) != string::npos) { - select = schema.substr(7, pos - 7); - where = schema.substr(pos + 7, schema.size() - pos - 7); - } else { - select = schema.substr(7); - } - // parse select - { - select = RemoveInvisibleChar(select); - if (select != "*") { - std::vector cfs; - SplitString(select, ",", &cfs); - for (size_t i = 0; i < cfs.size(); ++i) { - // add columnfamily - AddColumnFamily(cfs[i]); - VLOG(10) << "add cf: " << cfs[i] << " to scan descriptor"; - } - } - } - // parse where - if (where != "") { - filter_string_ = where; - if (!ParseFilterString()) { - return false; - } - } - return true; -} +void ScanDescImpl::SetTableSchema(const TableSchema& schema) { table_schema_ = schema; } -bool ScanDescImpl::ParseFilterString() { - const char* and_op = " AND "; - filter_list_.Clear(); - std::vector filter_v; - SplitString(filter_string_, and_op, &filter_v); - for (size_t i = 0; i < filter_v.size(); ++i) { - Filter filter; - if (ParseSubFilterString(filter_v[i], &filter)) { - Filter* pf = filter_list_.add_filter(); - pf->CopyFrom(filter); - } else { - LOG(ERROR) << "fail to parse expression: " << filter_v[i]; - return false; - } - } +bool ScanDescImpl::IsKvOnlyTable() { return IsKvTable(table_schema_); } - return true; -} - -bool ScanDescImpl::ParseSubFilterString(const string& filter_str, - Filter* filter) { - string filter_t = RemoveInvisibleChar(filter_str); - if (filter_t.size() < 3) { - LOG(ERROR) << "illegal filter expression: " << filter_t; - return false; - } - if (filter_t.find("@") == string::npos) { - // default filter, value compare filter - if (!ParseValueCompareFilter(filter_t, filter)) { - return false; - } - } else { - // TODO: other filter - LOG(ERROR) << "illegal filter expression: " << filter_t; - return false; - } - return true; -} - -bool ScanDescImpl::ParseValueCompareFilter(const string& filter_str, - Filter* filter) { - if (filter == NULL) { - LOG(ERROR) << "filter ptr is NULL."; - return false; - } - - if (max_version_ != 1) { - LOG(ERROR) << "only support 1 version scan if there is a value filter: " - << filter_str; - return false; - } - string::size_type type_pos; - string::size_type cf_pos; - if ((type_pos = filter_str.find("int64")) != string::npos) { - filter->set_value_type(kINT64); - cf_pos = type_pos + 5; - } else { - LOG(ERROR) << "only support int64 value filter, but got: " - << filter_str; - return false; - } - - string cf_name, value; - string::size_type op_pos; - BinCompOp comp_op = UNKNOWN; - if ((op_pos = filter_str.find(">=")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 2, filter_str.size() - op_pos - 2); - comp_op = GE; - } else if ((op_pos = filter_str.find(">")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 1, filter_str.size() - op_pos - 1); - comp_op = GT; - } else if ((op_pos = filter_str.find("<=")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 2, filter_str.size() - op_pos - 2); - comp_op = LE; - } else if ((op_pos = filter_str.find("<")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 1, filter_str.size() - op_pos - 1); - comp_op = LT; - } else if ((op_pos = filter_str.find("==")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 2, filter_str.size() - op_pos - 2); - comp_op = EQ; - } else if ((op_pos = filter_str.find("!=")) != string::npos) { - cf_name = filter_str.substr(cf_pos, op_pos - cf_pos); - value = filter_str.substr(op_pos + 2, filter_str.size() - op_pos - 2); - comp_op = NE; - } else { - LOG(ERROR) << "fail to parse expression: " << filter_str; - return false; - } - string type; - if (filter->value_type() == kINT64) { - type = "int64"; - } else { - assert(false); - } - - string value_internal; - if (!value_converter_(value, type, &value_internal)) { - LOG(ERROR) << "fail to convert value: \""<< value << "\"(" << type << ")"; - return false; - } - - filter->set_type(BinComp); - filter->set_bin_comp_op(comp_op); - filter->set_field(ValueFilter); - filter->set_content(cf_name); - filter->set_ref_value(value_internal); - return true; +bool ScanDescImpl::SetFilter(const filter::FilterPtr& filter) { + if (max_version_ != 1) { + LOG(ERROR) << "only support max_version of scanner == 1"; + return false; + } + if (!filter) { + LOG(ERROR) << "filter is NULL"; + return false; + } + filter_desc_ = new filter::FilterDesc; + filter_desc_->set_type(TransFilterType(filter->Type())); + int ret = filter->SerializeTo(filter_desc_->mutable_serialized_filter()); + if (!ret) { + delete filter_desc_; + filter_desc_ = nullptr; + LOG(ERROR) << "Filter Serialize Error"; + return false; + } + return true; } -} // namespace tera - +} // namespace tera diff --git a/src/sdk/scan_impl.h b/src/sdk/scan_impl.h index 21dbaa34d..588365194 100644 --- a/src/sdk/scan_impl.h +++ b/src/sdk/scan_impl.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SCAN_IMPL_H_ -#define TERA_SDK_SCAN_IMPL_H_ +#ifndef TERA_SDK_SCAN_IMPL_H_ +#define TERA_SDK_SCAN_IMPL_H_ #include #include @@ -11,292 +11,205 @@ #include #include "common/event.h" -#include "common/thread.h" #include "proto/tabletnode_rpc.pb.h" #include "sdk/sdk_task.h" #include "tera.h" #include "types.h" #include "common/timer.h" - +#include "proto/filter.pb.h" namespace tera { class TableImpl; class ResultStreamImpl : public ResultStream { -public: - ResultStreamImpl(TableImpl* table, ScanDescImpl* scan_desc_impl); - virtual ~ResultStreamImpl(); - - bool LookUp(const std::string& row_key) = 0; - bool Done(ErrorCode* err) = 0; - void Next() = 0; - - std::string RowName() const = 0; - std::string Family() const = 0; - std::string ColumnName() const = 0; - std::string Qualifier() const = 0; - int64_t Timestamp() const = 0; - std::string Value() const = 0; - int64_t ValueInt64() const = 0; - -public: - ScanDescImpl* GetScanDesc(); - - virtual void GetRpcHandle(ScanTabletRequest** request, - ScanTabletResponse** response) = 0; - virtual void ReleaseRpcHandle(ScanTabletRequest* request, - ScanTabletResponse* response) = 0; - virtual void OnFinish(ScanTabletRequest* request, - ScanTabletResponse* response) = 0; - std::string GetNextStartPoint(const std::string& str); - -protected: - tera::ScanDescImpl* scan_desc_impl_; - TableImpl* table_ptr_; - -private: - ResultStreamImpl(const ResultStreamImpl&); - void operator=(const ResultStreamImpl&); -}; - -/////////////////////////////////////// -///// high performance scan ///// -/////////////////////////////////////// -class ResultStreamBatchImpl : public ResultStreamImpl { -public: - // user interface - ResultStreamBatchImpl(TableImpl* table, ScanDescImpl* scan_desc); - virtual ~ResultStreamBatchImpl(); - - bool LookUp(const std::string& row_key); // TODO: result maybe search like a map - bool Done(ErrorCode* err);// wait until slot become valid - void Next(); // get next kv in RowResult - - std::string RowName() const; // get row key - std::string Family() const; // get cf - std::string Qualifier() const;// get qu - std::string ColumnName() const; // get cf:qu - int64_t Timestamp() const; // get ts - std::string Value() const; // get value - int64_t ValueInt64() const; // get value as int64_t - -public: - // TableImpl interface - void GetRpcHandle(ScanTabletRequest** request, - ScanTabletResponse** response); // alloc resource for scan session - void ReleaseRpcHandle(ScanTabletRequest* request, - ScanTabletResponse* response); // free resource for scan session - void OnFinish(ScanTabletRequest* request, - ScanTabletResponse* response); // scan callback -private: - void ClearAndScanNextSlot(bool scan_next); - void ComputeStartKey(const KeyValuePair& kv, KeyValuePair* start_key); - void ScanSessionReset(bool reset_retry); - -private: - mutable Mutex mu_; - CondVar cv_; - - int32_t session_retry_; - int32_t ref_count_; // use for scan_imple destory - - // session control - uint64_t session_id_; // client and ts use session id to finish channel negotiation - bool session_done_; // session is finish - StatusCode session_error_; // if error occur during scan, set error code. - uint32_t session_data_idx_; // current result id wait - bool part_of_session_; // TODO, should be deleted - std::string session_end_key_; - KeyValuePair slot_last_key_; - uint32_t session_last_idx_; // if session done, point to the last data_idx - - // sliding window control - enum ScanSlotState { - SCANSLOT_INVALID = 0, // init state - SCANSLOT_VALID = 1, // slot can be read - }; - typedef struct ScanSlot { - uint64_t state_; // ScanSlotState - RowResult cell_; // kv result - } ScanSlot; - std::vector sliding_window_; // scan_slot buffer - int32_t sliding_window_idx_; // current slot index - int32_t next_idx_; // offset in sliding_window[cur_buffer_idx] -}; - -class ResultStreamSyncImpl : public ResultStreamImpl { -public: - ResultStreamSyncImpl(TableImpl* table, ScanDescImpl* scan_desc_impl); - virtual ~ResultStreamSyncImpl(); - - bool LookUp(const std::string& row_key); - bool Done(ErrorCode* err); - void Next(); - - std::string RowName() const; - std::string Family() const; - std::string ColumnName() const; - std::string Qualifier() const; - int64_t Timestamp() const; - std::string Value() const; - int64_t ValueInt64() const; - -public: - void GetRpcHandle(ScanTabletRequest** request, - ScanTabletResponse** response); - void ReleaseRpcHandle(ScanTabletRequest* request, - ScanTabletResponse* response); - void OnFinish(ScanTabletRequest* request, - ScanTabletResponse* response); - -public: - void Wait(); - -private: - void Signal(); - void Reset(); - -private: - tera::ScanTabletResponse* response_; - int32_t result_pos_; - mutable Mutex finish_mutex_; - common::CondVar finish_cond_; - int32_t retry_times_; - bool finish_; + public: + ResultStreamImpl(TableImpl* table, ScanDescImpl* scan_desc_impl); + virtual ~ResultStreamImpl(); + + bool LookUp(const std::string& row_key); // TODO: result maybe search like a map + bool Done(ErrorCode* err); // wait until slot become valid + void Next(); // get next kv in RowResult + + std::string RowName() const; // get row key + std::string Family() const; // get cf + std::string Qualifier() const; // get qu + std::string ColumnName() const; // get cf:qu + int64_t Timestamp() const; // get ts + std::string Value() const; // get value + int64_t ValueInt64() const; // get value as int64_t + uint64_t GetDataSize() const; // get total data size until last slot scan + uint64_t GetRowCount() const; // get total row count(kv count) until last slot scan + std::string GetLastKey() const; // get last key string until last slot scan + void Cancel(); // cancel the scan task + + public: + ScanDescImpl* GetScanDesc(); + void GetRpcHandle(ScanTabletRequest** request, + ScanTabletResponse** response); // alloc resource for scan session + void ReleaseRpcHandle(ScanTabletRequest* request, + ScanTabletResponse* response); // free resource for scan session + void OnFinish(ScanTabletRequest* request, ScanTabletResponse* response); // scan callback + std::string GetNextStartPoint(const std::string& str); // for session reset + + private: + void ClearAndScanNextSlot(bool scan_next); + void ComputeStartKey(const KeyValuePair& kv, KeyValuePair* start_key); + void ScanSessionReset(bool reset_retry); + void UpdateDataSize(uint32_t data_size); // get total data size until last slot scan + void UpdateRowCount(uint32_t row_count); // get total row count until last slot scan + void UpdateLastKey(const KeyValuePair& kv); // get last key until last slot scan + + private: + mutable Mutex mu_; + CondVar cv_; + tera::ScanDescImpl* scan_desc_impl_; + TableImpl* table_ptr_; + + int32_t session_retry_; + int32_t ref_count_; // use for scan_impl destory + + // session control + uint64_t session_id_; // client and ts use session id to finish channel negotiation + bool session_done_; // session is finish + StatusCode session_error_; // if error occur during scan, set error code. + uint32_t session_data_idx_; // current result id wait + bool part_of_session_; // TODO, should be deleted + std::string session_end_key_; + KeyValuePair slot_last_key_; + uint32_t session_last_idx_; // if session done, point to the last data_idx + + // sliding window control + enum ScanSlotState { + SCANSLOT_INVALID = 0, // init state + SCANSLOT_VALID = 1, // slot can be read + }; + typedef struct ScanSlot { + uint64_t state_; // ScanSlotState + RowResult cell_; // kv result + } ScanSlot; + std::vector sliding_window_; // scan_slot buffer + int32_t sliding_window_idx_; // current slot index + int32_t next_idx_; // offset in sliding_window[cur_buffer_idx] + uint64_t data_size_; + uint64_t row_count_; + std::string last_key_; + bool canceled_; + + private: + ResultStreamImpl(const ResultStreamImpl&); + void operator=(const ResultStreamImpl&); }; -typedef ScanDescriptor::ValueConverter ValueConverter; - class ScanDescImpl { -public: - ScanDescImpl(const std::string& rowkey); - - ScanDescImpl(const ScanDescImpl& impl); - - ~ScanDescImpl(); - - void SetEnd(const std::string& rowkey); - - void AddColumnFamily(const std::string& cf); - - void AddColumn(const std::string& cf, const std::string& qualifier); - - void SetMaxVersions(int32_t versions); - - void SetMaxQualifiers(int64_t max_qualifiers); - - void SetPackInterval(int64_t timeout); - - void SetTimeRange(int64_t ts_end, int64_t ts_start); + public: + ScanDescImpl(const std::string& rowkey); - bool SetFilter(const std::string& schema); + ScanDescImpl(const ScanDescImpl& impl); - void SetValueConverter(ValueConverter converter); + ~ScanDescImpl(); - void SetSnapshot(uint64_t snapshot_id); + void SetEnd(const std::string& rowkey); - void SetBufferSize(int64_t buf_size); + void AddColumnFamily(const std::string& cf); - void SetNumberLimit(int64_t number_limit); + void AddColumn(const std::string& cf, const std::string& qualifier); - void SetAsync(bool async); + void SetMaxVersions(int32_t versions); - void SetStart(const std::string& row_key, const std::string& column_family = "", - const std::string& qualifier = "", int64_t time_stamp = kLatestTs); + void SetMaxQualifiers(int64_t max_qualifiers); - const std::string& GetStartRowKey() const; + void SetPackInterval(int64_t timeout); - const std::string& GetEndRowKey() const; + void SetTimeRange(int64_t ts_end, int64_t ts_start); - const std::string& GetStartColumnFamily() const; + bool SetFilter(const filter::FilterPtr& filter); - const std::string& GetStartQualifier() const; + void SetSnapshot(uint64_t snapshot_id); - int64_t GetStartTimeStamp() const; + void SetBufferSize(int64_t buf_size); - int32_t GetSizeofColumnFamilyList() const; + void SetNumberLimit(int64_t number_limit); - const tera::ColumnFamily* GetColumnFamily(int32_t num) const; + void SetStart(const std::string& row_key, const std::string& column_family = "", + const std::string& qualifier = "", int64_t time_stamp = kLatestTs); - const tera::TimeRange* GetTimerRange() const; + const std::string& GetStartRowKey() const; - const std::string& GetFilterString() const; + const std::string& GetEndRowKey() const; - const FilterList& GetFilterList() const; + const std::string& GetStartColumnFamily() const; - const ValueConverter GetValueConverter() const; + const std::string& GetStartQualifier() const; - int32_t GetMaxVersion() const; + int64_t GetStartTimeStamp() const; - int64_t GetMaxQualifiers() const; + int32_t GetSizeofColumnFamilyList() const; - int64_t GetPackInterval() const; + const tera::ColumnFamily* GetColumnFamily(int32_t num) const; - uint64_t GetSnapshot() const; + const tera::TimeRange* GetTimerRange() const; - int64_t GetBufferSize() const; + filter::FilterDesc* GetFilterDesc() const; - int64_t GetNumberLimit(); + int32_t GetMaxVersion() const; - bool IsAsync() const; + int64_t GetMaxQualifiers() const; - void SetTableSchema(const TableSchema& schema); + int64_t GetPackInterval() const; + uint64_t GetSnapshot() const; - bool IsKvOnlyTable(); + int64_t GetBufferSize() const; -private: + int64_t GetNumberLimit(); - bool ParseFilterString(); - bool ParseSubFilterString(const std::string& filter_str, Filter* filter); + void SetTableSchema(const TableSchema& schema); - bool ParseValueCompareFilter(const std::string& filter_str, Filter* filter); + bool IsKvOnlyTable(); -private: - std::string start_key_; - std::string end_key_; - std::string start_column_family_; - std::string start_qualifier_; - int64_t start_timestamp_; - std::vector cf_list_; - tera::TimeRange* timer_range_; - int64_t buf_size_; - int64_t number_limit_; - bool is_async_; - int32_t max_version_; - int64_t max_qualifiers_; - int64_t pack_interval_; - uint64_t snapshot_; - std::string filter_string_; - FilterList filter_list_; - ValueConverter value_converter_; - TableSchema table_schema_; + private: + std::string start_key_; + std::string end_key_; + std::string start_column_family_; + std::string start_qualifier_; + int64_t start_timestamp_; + std::vector cf_list_; + tera::TimeRange* timer_range_; + int64_t buf_size_; + int64_t number_limit_; + int32_t max_version_; + int64_t max_qualifiers_; + int64_t scan_slot_timeout_; + uint64_t snapshot_; + TableSchema table_schema_; + filter::FilterDesc* filter_desc_; }; struct ScanTask : public SdkTask { - ResultStreamImpl* stream; - tera::ScanTabletRequest* request; - tera::ScanTabletResponse* response; - - uint32_t retry_times; - void IncRetryTimes() { retry_times++; } - uint32_t RetryTimes() { return retry_times; } - ScanTask() : SdkTask(SdkTask::SCAN), stream(NULL), request(NULL), - response(NULL), retry_times(0) {} - - virtual bool IsAsync() { return false; } - virtual uint32_t Size() { return 0; } - virtual int64_t TimeOut() { return 0; } - virtual void Wait() {} - virtual void SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) {} - virtual const std::string& RowKey() { return stream->GetScanDesc()->GetStartRowKey(); } + ResultStreamImpl* stream; + tera::ScanTabletRequest* request; + tera::ScanTabletResponse* response; + + ScanTask() : SdkTask(SdkTask::SCAN), stream(NULL), request(NULL), response(NULL) {} + + virtual bool IsAsync() { return false; } + virtual uint32_t Size() { return 0; } + virtual void SetTimeOut(int64_t timeout) {} + virtual int64_t TimeOut() { return 0; } + virtual void Wait() {} + virtual void SetError(ErrorCode::ErrorCodeType err, const std::string& reason) {} + std::string InternalRowKey() { return stream->GetScanDesc()->GetStartRowKey(); } + + virtual void RunCallback() { abort(); } // Not implement this method + virtual int64_t GetCommitTimes() { return 0; } // Not implement this method }; -#define SCAN_LOG LOG_IF(INFO, FLAGS_debug_tera_sdk_scan) << "sdk-scan[" << session_id_ << "] " +#define SCAN_LOG \ + LOG_IF(INFO, FLAGS_debug_tera_sdk_scan) << "sdk-scan[" << session_id_ << "]" \ + " " #define SCAN_WLOG LOG(WARNING) << "sdk-scan[" << session_id_ << "] " -} // namespace tera +} // namespace tera #endif // TERA_SDK_SCAN_IMPL_H_ diff --git a/src/sdk/schema.cc b/src/sdk/schema.cc index d7d84da85..28dfd333b 100644 --- a/src/sdk/schema.cc +++ b/src/sdk/schema.cc @@ -7,12 +7,10 @@ namespace tera { -TableDescriptor::TableDescriptor(const std::string& tb_name) { - impl_ = new TableDescImpl(tb_name); -} +TableDescriptor::TableDescriptor(const std::string& tb_name) { impl_ = new TableDescImpl(tb_name); } TableDescriptor::~TableDescriptor() { - delete impl_; - impl_ = NULL; + delete impl_; + impl_ = NULL; } /* @@ -29,132 +27,98 @@ TableDescriptor& TableDescriptor::operator=(const TableDescriptor& desc) { } */ -void TableDescriptor::SetTableName(const std::string& name) { - return impl_->SetTableName(name); -} +void TableDescriptor::SetTableName(const std::string& name) { return impl_->SetTableName(name); } -std::string TableDescriptor::TableName() const { - return impl_->TableName(); -} +std::string TableDescriptor::TableName() const { return impl_->TableName(); } /// 增加一个localitygroup, 名字仅允许使用字母、数字和下划线构造,长度不超过256 LocalityGroupDescriptor* TableDescriptor::AddLocalityGroup(const std::string& lg_name) { - return impl_->AddLocalityGroup(lg_name); + return impl_->AddLocalityGroup(lg_name); } LocalityGroupDescriptor* TableDescriptor::DefaultLocalityGroup() { - return impl_->DefaultLocalityGroup(); + return impl_->DefaultLocalityGroup(); } /// 删除一个localitygroup, bool TableDescriptor::RemoveLocalityGroup(const std::string& lg_name) { - return impl_->RemoveLocalityGroup(lg_name); + return impl_->RemoveLocalityGroup(lg_name); } /// 获取localitygroup const LocalityGroupDescriptor* TableDescriptor::LocalityGroup(int32_t id) const { - return impl_->LocalityGroup(id); + return impl_->LocalityGroup(id); } const LocalityGroupDescriptor* TableDescriptor::LocalityGroup(const std::string& name) const { - return impl_->LocalityGroup(name); + return impl_->LocalityGroup(name); } /// LG数量 -int32_t TableDescriptor::LocalityGroupNum() const { - return impl_->LocalityGroupNum(); -} +int32_t TableDescriptor::LocalityGroupNum() const { return impl_->LocalityGroupNum(); } /// 增加一个columnfamily, 名字仅允许使用字母、数字和下划线构造,长度不超过256 ColumnFamilyDescriptor* TableDescriptor::AddColumnFamily(const std::string& cf_name, - const std::string& lg_name) { - return impl_->AddColumnFamily(cf_name, lg_name); + const std::string& lg_name) { + return impl_->AddColumnFamily(cf_name, lg_name); } ColumnFamilyDescriptor* TableDescriptor::DefaultColumnFamily() { - return impl_->DefaultColumnFamily(); + return impl_->DefaultColumnFamily(); } /// 删除一个columnfamily void TableDescriptor::RemoveColumnFamily(const std::string& cf_name) { - return impl_->RemoveColumnFamily(cf_name); + return impl_->RemoveColumnFamily(cf_name); } /// 获取所有的colmnfamily const ColumnFamilyDescriptor* TableDescriptor::ColumnFamily(int32_t id) const { - return impl_->ColumnFamily(id); + return impl_->ColumnFamily(id); } const ColumnFamilyDescriptor* TableDescriptor::ColumnFamily(const std::string& cf_name) const { - return impl_->ColumnFamily(cf_name); + return impl_->ColumnFamily(cf_name); } /// CF数量 -int32_t TableDescriptor::ColumnFamilyNum() const { - return impl_->ColumnFamilyNum(); -} +int32_t TableDescriptor::ColumnFamilyNum() const { return impl_->ColumnFamilyNum(); } -void TableDescriptor::SetRawKey(RawKeyType type) { - impl_->SetRawKey(type); -} +void TableDescriptor::SetRawKey(RawKeyType type) { impl_->SetRawKey(type); } -RawKeyType TableDescriptor::RawKey() const { - return impl_->RawKey(); -} +RawKeyType TableDescriptor::RawKey() const { return impl_->RawKey(); } -void TableDescriptor::SetSplitSize(int64_t size) { - impl_->SetSplitSize(size); -} +void TableDescriptor::SetSplitSize(int64_t size) { impl_->SetSplitSize(size); } -int64_t TableDescriptor::SplitSize() const { - return impl_->SplitSize(); -} +int64_t TableDescriptor::SplitSize() const { return impl_->SplitSize(); } -void TableDescriptor::SetMergeSize(int64_t size) { - impl_->SetMergeSize(size); -} +void TableDescriptor::SetMergeSize(int64_t size) { impl_->SetMergeSize(size); } -int64_t TableDescriptor::MergeSize() const { - return impl_->MergeSize(); -} +int64_t TableDescriptor::MergeSize() const { return impl_->MergeSize(); } -void TableDescriptor::DisableWal() { - impl_->DisableWal(); -} +void TableDescriptor::DisableWal() { impl_->DisableWal(); } -bool TableDescriptor::IsWalDisabled() const { - return impl_->IsWalDisabled(); -} +bool TableDescriptor::IsWalDisabled() const { return impl_->IsWalDisabled(); } -void TableDescriptor::EnableTxn() { - impl_->EnableTxn(); -} +void TableDescriptor::EnableTxn() { impl_->EnableTxn(); } -bool TableDescriptor::IsTxnEnabled() const { - return impl_->IsTxnEnabled(); -} +bool TableDescriptor::IsTxnEnabled() const { return impl_->IsTxnEnabled(); } -int32_t TableDescriptor::AddSnapshot(uint64_t snapshot) { - return impl_->AddSnapshot(snapshot); -} +int32_t TableDescriptor::AddSnapshot(uint64_t snapshot) { return impl_->AddSnapshot(snapshot); } -uint64_t TableDescriptor::Snapshot(int32_t id) const { - return impl_->Snapshot(id); -} +uint64_t TableDescriptor::Snapshot(int32_t id) const { return impl_->Snapshot(id); } /// Snapshot数量 -int32_t TableDescriptor::SnapshotNum() const { - return impl_->SnapshotNum(); -} +int32_t TableDescriptor::SnapshotNum() const { return impl_->SnapshotNum(); } -void TableDescriptor::SetAdminGroup(const std::string& name) { - return impl_->SetAdminGroup(name); -} +void TableDescriptor::SetAdminGroup(const std::string& name) { return impl_->SetAdminGroup(name); } -void TableDescriptor::SetAdmin(const std::string& name) { - return impl_->SetAdmin(name); -} +void TableDescriptor::SetAdmin(const std::string& name) { return impl_->SetAdmin(name); } -std::string TableDescriptor::AdminGroup() const { - return impl_->AdminGroup(); -} +std::string TableDescriptor::AdminGroup() const { return impl_->AdminGroup(); } + +std::string TableDescriptor::Admin() const { return impl_->Admin(); } + +void TableDescriptor::EnableHash() { impl_->EnableHash(); } +bool TableDescriptor::IsHashEnabled() const { return impl_->IsHashEnabled(); } -std::string TableDescriptor::Admin() const { - return impl_->Admin(); +uint32_t TableDescriptor::BloomFilterBitsPerKey() const { return impl_->BloomFilterBitsPerKey(); } +void TableDescriptor::SetBloomFilterBitsPerKey(uint32_t val) { + return impl_->SetBloomFilterBitsPerKey(val); } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/schema_impl.cc b/src/sdk/schema_impl.cc index b84880b1b..4051af410 100644 --- a/src/sdk/schema_impl.cc +++ b/src/sdk/schema_impl.cc @@ -18,9 +18,7 @@ const std::string TableDescImpl::NOTIFY_LG_NAME = "notify"; const std::string TableDescImpl::DEFAULT_CF_NAME = ""; /// 列族名字仅允许使用字母、数字和下划线构造, 长度不超过256 -CFDescImpl::CFDescImpl(const std::string& cf_name, - int32_t id, - const std::string& lg_name) +CFDescImpl::CFDescImpl(const std::string& cf_name, int32_t id, const std::string& lg_name) : id_(id), name_(cf_name), lg_name_(lg_name), @@ -32,96 +30,54 @@ CFDescImpl::CFDescImpl(const std::string& cf_name, disk_quota_(-1), type_(""), is_global_transaction_(false), - is_notify_enabled_(false) { -} + is_notify_enabled_(false) {} -int32_t CFDescImpl::Id() const { - return id_; -} +int32_t CFDescImpl::Id() const { return id_; } -const std::string& CFDescImpl::Name() const { - return name_; -} +const std::string& CFDescImpl::Name() const { return name_; } -const std::string& CFDescImpl::LocalityGroup() const { - return lg_name_; -} +const std::string& CFDescImpl::LocalityGroup() const { return lg_name_; } /// 历史版本保留时间, 不设置时为-1, 表示无限大永久保存 -void CFDescImpl::SetTimeToLive(int32_t ttl) { - ttl_ = ttl; -} +void CFDescImpl::SetTimeToLive(int32_t ttl) { ttl_ = ttl; } -int32_t CFDescImpl::TimeToLive() const { - return ttl_; -} +int32_t CFDescImpl::TimeToLive() const { return ttl_; } /// 在TTL内,最多存储的版本数 -void CFDescImpl::SetMaxVersions(int32_t max_versions) { - max_versions_ = max_versions; -} +void CFDescImpl::SetMaxVersions(int32_t max_versions) { max_versions_ = max_versions; } -int32_t CFDescImpl::MaxVersions() const { - return max_versions_; -} +int32_t CFDescImpl::MaxVersions() const { return max_versions_; } /// 最少存储的版本数,即使超出TTL,也至少保留min_versions个版本 -void CFDescImpl::SetMinVersions(int32_t min_versions) { - min_versions_ = min_versions; -} +void CFDescImpl::SetMinVersions(int32_t min_versions) { min_versions_ = min_versions; } -int32_t CFDescImpl::MinVersions() const { - return min_versions_; -} +int32_t CFDescImpl::MinVersions() const { return min_versions_; } /// 存储限额, MBytes -void CFDescImpl::SetDiskQuota(int64_t quota) { - disk_quota_ = quota; -} +void CFDescImpl::SetDiskQuota(int64_t quota) { disk_quota_ = quota; } -int64_t CFDescImpl::DiskQuota() const { - return disk_quota_; -} +int64_t CFDescImpl::DiskQuota() const { return disk_quota_; } /// ACL -void CFDescImpl::SetAcl(ACL acl) { -} +void CFDescImpl::SetAcl(ACL acl) {} -ACL CFDescImpl::Acl() const { - return ACL(); -} +ACL CFDescImpl::Acl() const { return ACL(); } -void CFDescImpl::EnableGlobalTransaction() { - is_global_transaction_ = true; -} +void CFDescImpl::EnableGlobalTransaction() { is_global_transaction_ = true; } -void CFDescImpl::DisableGlobalTransaction() { - is_global_transaction_ = false; -} +void CFDescImpl::DisableGlobalTransaction() { is_global_transaction_ = false; } -bool CFDescImpl::GlobalTransaction() const { - return is_global_transaction_; -} +bool CFDescImpl::GlobalTransaction() const { return is_global_transaction_; } -void CFDescImpl::EnableNotify() { - is_notify_enabled_ = true; -} +void CFDescImpl::EnableNotify() { is_notify_enabled_ = true; } -void CFDescImpl::DisableNotify() { - is_notify_enabled_ = false; -} +void CFDescImpl::DisableNotify() { is_notify_enabled_ = false; } -bool CFDescImpl::IsNotifyEnabled() const { - return is_notify_enabled_; -} +bool CFDescImpl::IsNotifyEnabled() const { return is_notify_enabled_; } -void CFDescImpl::SetType(const std::string& type) { - type_ = type; -} +void CFDescImpl::SetType(const std::string& type) { type_ = type; } -const std::string& CFDescImpl::Type() const { - return type_; -} +const std::string& CFDescImpl::Type() const { return type_; } /// 局部性群组名字仅允许使用字母、数字和下划线构造,长度不超过256 LGDescImpl::LGDescImpl(const std::string& lg_name, int32_t id) @@ -134,85 +90,54 @@ LGDescImpl::LGDescImpl(const std::string& lg_name, int32_t id) use_memtable_on_leveldb_(false), memtable_ldb_write_buffer_size_(0), memtable_ldb_block_size_(0), - sst_size_(FLAGS_tera_tablet_ldb_sst_size << 20){ -} + sst_size_(FLAGS_tera_tablet_ldb_sst_size << 20) {} /// Id read only -int32_t LGDescImpl::Id() const { - return id_; -} +int32_t LGDescImpl::Id() const { return id_; } -const std::string& LGDescImpl::Name() const { - return name_; -} +const std::string& LGDescImpl::Name() const { return name_; } /// Compress type -void LGDescImpl::SetCompress(CompressType type) { - compress_type_ = type; -} +void LGDescImpl::SetCompress(CompressType type) { compress_type_ = type; } -CompressType LGDescImpl::Compress() const { - return compress_type_; -} +CompressType LGDescImpl::Compress() const { return compress_type_; } /// Block size -void LGDescImpl::SetBlockSize(int block_size) { - block_size_ = block_size; -} +void LGDescImpl::SetBlockSize(int block_size) { block_size_ = block_size; } -int LGDescImpl::BlockSize() const { - return block_size_; -} +int LGDescImpl::BlockSize() const { return block_size_; } /// Store type -void LGDescImpl::SetStore(StoreType type) { - store_type_ = type; -} +void LGDescImpl::SetStore(StoreType type) { store_type_ = type; } -StoreType LGDescImpl::Store() const { - return store_type_; -} +StoreType LGDescImpl::Store() const { return store_type_; } /// Bloomfilter -void LGDescImpl::SetUseBloomfilter(bool use_bloomfilter) { - use_bloomfilter_ = use_bloomfilter; -} +void LGDescImpl::SetUseBloomfilter(bool use_bloomfilter) { use_bloomfilter_ = use_bloomfilter; } -bool LGDescImpl::UseBloomfilter() const { - return use_bloomfilter_; -} +bool LGDescImpl::UseBloomfilter() const { return use_bloomfilter_; } -bool LGDescImpl::UseMemtableOnLeveldb() const { - return use_memtable_on_leveldb_; -} +bool LGDescImpl::UseMemtableOnLeveldb() const { return use_memtable_on_leveldb_; } void LGDescImpl::SetUseMemtableOnLeveldb(bool use_mem_ldb) { - use_memtable_on_leveldb_ = use_mem_ldb; + use_memtable_on_leveldb_ = use_mem_ldb; } -int32_t LGDescImpl::MemtableLdbWriteBufferSize() const { - return memtable_ldb_write_buffer_size_; -} +int32_t LGDescImpl::MemtableLdbWriteBufferSize() const { return memtable_ldb_write_buffer_size_; } void LGDescImpl::SetMemtableLdbWriteBufferSize(int32_t buffer_size) { - memtable_ldb_write_buffer_size_ = buffer_size; + memtable_ldb_write_buffer_size_ = buffer_size; } -int32_t LGDescImpl::MemtableLdbBlockSize() const { - return memtable_ldb_block_size_; -} +int32_t LGDescImpl::MemtableLdbBlockSize() const { return memtable_ldb_block_size_; } void LGDescImpl::SetMemtableLdbBlockSize(int32_t block_size) { - memtable_ldb_block_size_ = block_size; + memtable_ldb_block_size_ = block_size; } -int32_t LGDescImpl::SstSize() const { - return sst_size_; -} +int32_t LGDescImpl::SstSize() const { return sst_size_; } -void LGDescImpl::SetSstSize(int32_t sst_size) { - sst_size_ = sst_size; -} +void LGDescImpl::SetSstSize(int32_t sst_size) { sst_size_ = sst_size; } /// 表格名字仅允许使用字母、数字和下划线构造,长度不超过256 TableDescImpl::TableDescImpl(const std::string& tb_name) @@ -223,8 +148,9 @@ TableDescImpl::TableDescImpl(const std::string& tb_name) split_size_(FLAGS_tera_master_split_tablet_size), merge_size_(FLAGS_tera_master_merge_tablet_size), disable_wal_(false), - enable_txn_(false) { -} + enable_txn_(false), + enable_hash_(false), + bloom_filter_bits_per_key_(10) {} /* TableDescImpl::TableDescImpl(TableDescImpl& desc) { @@ -252,212 +178,176 @@ TableDescImpl& TableDescImpl::operator=(const TableDescImpl& desc) { */ TableDescImpl::~TableDescImpl() { - int32_t lg_num = lgs_.size(); - for (int32_t i = 0; i < lg_num; i++) { - delete lgs_[i]; - } - int32_t cf_num = cfs_.size(); - for (int32_t i = 0; i < cf_num; i++) { - delete cfs_[i]; - } + int32_t lg_num = lgs_.size(); + for (int32_t i = 0; i < lg_num; i++) { + delete lgs_[i]; + } + int32_t cf_num = cfs_.size(); + for (int32_t i = 0; i < cf_num; i++) { + delete cfs_[i]; + } } -void TableDescImpl::SetTableName(const std::string& name) { - name_ = name; -} +void TableDescImpl::SetTableName(const std::string& name) { name_ = name; } -std::string TableDescImpl::TableName() const{ - return name_; -} +std::string TableDescImpl::TableName() const { return name_; } -void TableDescImpl::SetAdminGroup(const std::string& name) { - admin_group_ = name; -} +void TableDescImpl::SetAdminGroup(const std::string& name) { admin_group_ = name; } -std::string TableDescImpl::AdminGroup() const { - return admin_group_; -} +std::string TableDescImpl::AdminGroup() const { return admin_group_; } -void TableDescImpl::SetAdmin(const std::string& name) { - admin_ = name; -} +void TableDescImpl::SetAdmin(const std::string& name) { admin_ = name; } -std::string TableDescImpl::Admin() const { - return admin_; -} +std::string TableDescImpl::Admin() const { return admin_; } /// 增加一个localitygroup LocalityGroupDescriptor* TableDescImpl::AddLocalityGroup(const std::string& lg_name) { - LGMap::iterator it = lg_map_.find(lg_name); - if (it != lg_map_.end()) { - return it->second; - } - int id = next_lg_id_ ++; - LGDescImpl* lg = new LGDescImpl(lg_name, id); - lg_map_[lg_name] = lg; - lgs_.push_back(lg); - return lg; + LGMap::iterator it = lg_map_.find(lg_name); + if (it != lg_map_.end()) { + return it->second; + } + int id = next_lg_id_++; + LGDescImpl* lg = new LGDescImpl(lg_name, id); + lg_map_[lg_name] = lg; + lgs_.push_back(lg); + return lg; } -LocalityGroupDescriptor* TableDescImpl::DefaultLocalityGroup() { - return lg_map_.begin()->second; -} +LocalityGroupDescriptor* TableDescImpl::DefaultLocalityGroup() { return lg_map_.begin()->second; } /// 删除一个localitygroup bool TableDescImpl::RemoveLocalityGroup(const std::string& lg_name) { - if (lg_map_.size() == 1 && lg_map_.begin()->first == lg_name) { - return false; - } - LGMap::iterator it = lg_map_.find(lg_name); - if (it == lg_map_.end()) { - return false; + if (lg_map_.size() == 1 && lg_map_.begin()->first == lg_name) { + return false; + } + LGMap::iterator it = lg_map_.find(lg_name); + if (it == lg_map_.end()) { + return false; + } + LGDescImpl* lg = it->second; + for (size_t i = 0; i < cfs_.size(); i++) { + if (cfs_[i]->LocalityGroup() == lg->Name()) { + return false; } - LGDescImpl* lg = it->second; - for (size_t i = 0; i < cfs_.size(); i++) { - if (cfs_[i]->LocalityGroup() == lg->Name()) { - return false; - } - } - lg_map_.erase(it); - lgs_[lg->Id()] = lgs_[lgs_.size() - 1]; - lgs_.resize(lgs_.size() - 1); - delete lg; - return true; + } + lg_map_.erase(it); + lgs_[lg->Id()] = lgs_[lgs_.size() - 1]; + lgs_.resize(lgs_.size() - 1); + delete lg; + return true; } /// 获取localitygroup const LocalityGroupDescriptor* TableDescImpl::LocalityGroup(int32_t id) const { - if (id < static_cast(lgs_.size())) { - return lgs_[id]; - } - return NULL; + if (id < static_cast(lgs_.size())) { + return lgs_[id]; + } + return NULL; } const LocalityGroupDescriptor* TableDescImpl::LocalityGroup(const std::string& lg_name) const { - LGMap::const_iterator it = lg_map_.find(lg_name); - if (it != lg_map_.end()) { - return it->second; - } else { - return NULL; - } + LGMap::const_iterator it = lg_map_.find(lg_name); + if (it != lg_map_.end()) { + return it->second; + } else { + return NULL; + } } /// 获取localitygroup数量 -int32_t TableDescImpl::LocalityGroupNum() const { - return lgs_.size(); -} +int32_t TableDescImpl::LocalityGroupNum() const { return lgs_.size(); } /// 增加一个columnfamily ColumnFamilyDescriptor* TableDescImpl::AddColumnFamily(const std::string& cf_name, - const std::string& lg_name) { - LGMap::iterator it = lg_map_.find(lg_name); - if (it == lg_map_.end()) { - LOG(ERROR) << "lg:" << lg_name << " not exist."; - return NULL; - } - CFMap::iterator cf_it = cf_map_.find(cf_name); - if (cf_it != cf_map_.end()) { - return cf_it->second; - } - int id = next_cf_id_ ++; - CFDescImpl* cf = new CFDescImpl(cf_name, id, lg_name); - cf_map_[cf_name] = cf; - cfs_.push_back(cf); - return cf; + const std::string& lg_name) { + LGMap::iterator it = lg_map_.find(lg_name); + if (it == lg_map_.end()) { + LOG(ERROR) << "lg:" << lg_name << " not exist."; + return NULL; + } + CFMap::iterator cf_it = cf_map_.find(cf_name); + if (cf_it != cf_map_.end()) { + return cf_it->second; + } + int id = next_cf_id_++; + CFDescImpl* cf = new CFDescImpl(cf_name, id, lg_name); + cf_map_[cf_name] = cf; + cfs_.push_back(cf); + return cf; } -ColumnFamilyDescriptor* TableDescImpl::DefaultColumnFamily() { - return cf_map_.begin()->second; -} +ColumnFamilyDescriptor* TableDescImpl::DefaultColumnFamily() { return cf_map_.begin()->second; } /// 删除一个columnfamily void TableDescImpl::RemoveColumnFamily(const std::string& cf_name) { - if (cf_map_.size() == 1 && cf_map_.begin()->first == cf_name) { - return; - } - CFMap::iterator it = cf_map_.find(cf_name); - if (it == cf_map_.end()) { - return; - } - CFDescImpl* cf = it->second; - cf_map_.erase(it); - cfs_[cf->Id()] = cfs_[cfs_.size() - 1]; - cfs_.resize(cfs_.size() - 1); - delete cf; + if (cf_map_.size() == 1 && cf_map_.begin()->first == cf_name) { + return; + } + CFMap::iterator it = cf_map_.find(cf_name); + if (it == cf_map_.end()) { + return; + } + CFDescImpl* cf = it->second; + cf_map_.erase(it); + cfs_[cf->Id()] = cfs_[cfs_.size() - 1]; + cfs_.resize(cfs_.size() - 1); + delete cf; } /// 获取所有的colmnfamily const ColumnFamilyDescriptor* TableDescImpl::ColumnFamily(int32_t id) const { - if (id < static_cast(cfs_.size())) { - return cfs_[id]; - } - return NULL; + if (id < static_cast(cfs_.size())) { + return cfs_[id]; + } + return NULL; } const ColumnFamilyDescriptor* TableDescImpl::ColumnFamily(const std::string& cf_name) const { - CFMap::const_iterator it = cf_map_.find(cf_name); - if (it != cf_map_.end()) { - return it->second; - } else { - return NULL; - } + CFMap::const_iterator it = cf_map_.find(cf_name); + if (it != cf_map_.end()) { + return it->second; + } else { + return NULL; + } } -int32_t TableDescImpl::ColumnFamilyNum() const { - return cfs_.size(); -} +int32_t TableDescImpl::ColumnFamilyNum() const { return cfs_.size(); } -void TableDescImpl::SetRawKey(RawKeyType type) { - raw_key_type_ = type; -} +void TableDescImpl::SetRawKey(RawKeyType type) { raw_key_type_ = type; } -RawKeyType TableDescImpl::RawKey() const { - return raw_key_type_; -} +RawKeyType TableDescImpl::RawKey() const { return raw_key_type_; } -void TableDescImpl::SetSplitSize(int64_t size) { - split_size_ = size; -} +void TableDescImpl::SetSplitSize(int64_t size) { split_size_ = size; } -int64_t TableDescImpl::SplitSize() const { - return split_size_; -} +int64_t TableDescImpl::SplitSize() const { return split_size_; } -void TableDescImpl::SetMergeSize(int64_t size) { - merge_size_ = size; -} +void TableDescImpl::SetMergeSize(int64_t size) { merge_size_ = size; } -int64_t TableDescImpl::MergeSize() const { - return merge_size_; -} +int64_t TableDescImpl::MergeSize() const { return merge_size_; } -void TableDescImpl::DisableWal() { - disable_wal_ = true; -} +void TableDescImpl::DisableWal() { disable_wal_ = true; } -bool TableDescImpl::IsWalDisabled() const { - return disable_wal_; -} +bool TableDescImpl::IsWalDisabled() const { return disable_wal_; } -void TableDescImpl::EnableTxn() { - enable_txn_ = true; -} +void TableDescImpl::EnableTxn() { enable_txn_ = true; } -bool TableDescImpl::IsTxnEnabled() const { - return enable_txn_; -} +bool TableDescImpl::IsTxnEnabled() const { return enable_txn_; } /// 插入snapshot int32_t TableDescImpl::AddSnapshot(uint64_t snapshot) { - snapshots_.push_back(snapshot); - return snapshots_.size(); + snapshots_.push_back(snapshot); + return snapshots_.size(); } /// 获取snapshot -uint64_t TableDescImpl::Snapshot(int32_t id) const { - return snapshots_[id]; -} +uint64_t TableDescImpl::Snapshot(int32_t id) const { return snapshots_[id]; } /// Snapshot数量 -int32_t TableDescImpl::SnapshotNum() const { - return snapshots_.size(); -} -} // namespace tera +int32_t TableDescImpl::SnapshotNum() const { return snapshots_.size(); } + +void TableDescImpl::EnableHash() { enable_hash_ = true; } + +bool TableDescImpl::IsHashEnabled() const { return enable_hash_; } + +uint32_t TableDescImpl::BloomFilterBitsPerKey() const { return bloom_filter_bits_per_key_; } + +void TableDescImpl::SetBloomFilterBitsPerKey(uint32_t val) { bloom_filter_bits_per_key_ = val; } +} // namespace tera diff --git a/src/sdk/schema_impl.h b/src/sdk/schema_impl.h index 5402f6d64..ba1f53a90 100644 --- a/src/sdk/schema_impl.h +++ b/src/sdk/schema_impl.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SCHEMA_IMPL_H_ -#define TERA_SDK_SCHEMA_IMPL_H_ +#ifndef TERA_SDK_SCHEMA_IMPL_H_ +#define TERA_SDK_SCHEMA_IMPL_H_ #include @@ -14,218 +14,225 @@ namespace tera { /// 列族描述 class CFDescImpl : public ColumnFamilyDescriptor { -public: - /// 列族名字仅允许使用字母、数字和下划线构造, 长度不超过256 - CFDescImpl(const std::string& cf_name, int32_t id, const std::string& lg_name); - /// id - int32_t Id() const; - const std::string& Name() const; + public: + /// 列族名字仅允许使用字母、数字和下划线构造, 长度不超过256 + CFDescImpl(const std::string& cf_name, int32_t id, const std::string& lg_name); + /// id + int32_t Id() const; + const std::string& Name() const; - const std::string& LocalityGroup() const; + const std::string& LocalityGroup() const; - /// 历史版本保留时间, 不设置时为0, 表示无限大永久保存 - void SetTimeToLive(int32_t ttl); + /// 历史版本保留时间, 不设置时为0, 表示无限大永久保存 + void SetTimeToLive(int32_t ttl); - int32_t TimeToLive() const; + int32_t TimeToLive() const; - /// 在TTL内,最多存储的版本数 - void SetMaxVersions(int32_t max_versions); + /// 在TTL内,最多存储的版本数 + void SetMaxVersions(int32_t max_versions); - int32_t MaxVersions() const; + int32_t MaxVersions() const; - /// 最少存储的版本数,即使超出TTL,也至少保留min_versions个版本 - void SetMinVersions(int32_t min_versions); + /// 最少存储的版本数,即使超出TTL,也至少保留min_versions个版本 + void SetMinVersions(int32_t min_versions); - int32_t MinVersions() const; + int32_t MinVersions() const; - /// 存储限额, MBytes - void SetDiskQuota(int64_t quota); + /// 存储限额, MBytes + void SetDiskQuota(int64_t quota); - int64_t DiskQuota() const; + int64_t DiskQuota() const; - /// ACL - void SetAcl(ACL acl); + /// ACL + void SetAcl(ACL acl); - ACL Acl() const; + ACL Acl() const; - void EnableGlobalTransaction(); + void EnableGlobalTransaction(); - void DisableGlobalTransaction(); - - bool GlobalTransaction() const; + void DisableGlobalTransaction(); - void EnableNotify(); + bool GlobalTransaction() const; - void DisableNotify(); + void EnableNotify(); - bool IsNotifyEnabled() const; + void DisableNotify(); - void SetType(const std::string& type); + bool IsNotifyEnabled() const; - const std::string& Type() const; + void SetType(const std::string& type); -private: - int32_t id_; - std::string name_; - std::string lg_name_; - int32_t max_versions_; - int32_t min_versions_; - int32_t ttl_; - int64_t acl_; - int32_t owner_; - int32_t disk_quota_; - std::string type_; - bool is_global_transaction_; - bool is_notify_enabled_; + const std::string& Type() const; + + private: + int32_t id_; + std::string name_; + std::string lg_name_; + int32_t max_versions_; + int32_t min_versions_; + int32_t ttl_; + int64_t acl_; + int32_t owner_; + int32_t disk_quota_; + std::string type_; + bool is_global_transaction_; + bool is_notify_enabled_; }; /// 局部性群组描述 class LGDescImpl : public LocalityGroupDescriptor { -public: - /// 局部性群组名字仅允许使用字母、数字和下划线构造,长度不超过256 - LGDescImpl(const std::string& lg_name, int32_t id); + public: + /// 局部性群组名字仅允许使用字母、数字和下划线构造,长度不超过256 + LGDescImpl(const std::string& lg_name, int32_t id); - /// Id read only - int32_t Id() const; + /// Id read only + int32_t Id() const; - /// Name read only - const std::string& Name() const; + /// Name read only + const std::string& Name() const; - /// Compress type - void SetCompress(CompressType type); + /// Compress type + void SetCompress(CompressType type); - CompressType Compress() const; + CompressType Compress() const; - /// Block size - void SetBlockSize(int block_size); + /// Block size + void SetBlockSize(int block_size); - int BlockSize() const; + int BlockSize() const; - /// Store type - void SetStore(StoreType type); + /// Store type + void SetStore(StoreType type); - StoreType Store() const; + StoreType Store() const; - /// Bloomfilter - void SetUseBloomfilter(bool use_bloomfilter); + /// Bloomfilter + void SetUseBloomfilter(bool use_bloomfilter); - bool UseBloomfilter() const; + bool UseBloomfilter() const; - /// Memtable On Leveldb (disable/enable) - bool UseMemtableOnLeveldb() const; + /// Memtable On Leveldb (disable/enable) + bool UseMemtableOnLeveldb() const; - void SetUseMemtableOnLeveldb(bool use_mem_ldb); + void SetUseMemtableOnLeveldb(bool use_mem_ldb); - /// Memtable-LDB WriteBuffer Size - int32_t MemtableLdbWriteBufferSize() const; + /// Memtable-LDB WriteBuffer Size + int32_t MemtableLdbWriteBufferSize() const; - void SetMemtableLdbWriteBufferSize(int32_t buffer_size); + void SetMemtableLdbWriteBufferSize(int32_t buffer_size); - /// Memtable-LDB Block Size - int32_t MemtableLdbBlockSize() const; + /// Memtable-LDB Block Size + int32_t MemtableLdbBlockSize() const; - void SetMemtableLdbBlockSize(int32_t block_size); + void SetMemtableLdbBlockSize(int32_t block_size); - /// sst file size, in Bytes - int32_t SstSize() const; - void SetSstSize(int32_t sst_size); + /// sst file size, in Bytes + int32_t SstSize() const; + void SetSstSize(int32_t sst_size); -private: - int32_t id_; - std::string name_; - CompressType compress_type_; - StoreType store_type_; - int block_size_; - bool use_bloomfilter_; - bool use_memtable_on_leveldb_; - int32_t memtable_ldb_write_buffer_size_; - int32_t memtable_ldb_block_size_; - int32_t sst_size_; // in bytes + private: + int32_t id_; + std::string name_; + CompressType compress_type_; + StoreType store_type_; + int block_size_; + bool use_bloomfilter_; + bool use_memtable_on_leveldb_; + int32_t memtable_ldb_write_buffer_size_; + int32_t memtable_ldb_block_size_; + int32_t sst_size_; // in bytes }; /// 表描述符. class TableDescImpl { -public: - /// 表格名字仅允许使用字母、数字和下划线构造,长度不超过256 - TableDescImpl(const std::string& tb_name); - ~TableDescImpl(); - void SetTableName(const std::string& name); - std::string TableName() const; - /// 增加一个localitygroup - LocalityGroupDescriptor* AddLocalityGroup(const std::string& lg_name); - /// 获取默认localitygroup,仅用于kv表 - LocalityGroupDescriptor* DefaultLocalityGroup(); - /// 删除一个localitygroup - bool RemoveLocalityGroup(const std::string& lg_name); - /// 获取localitygroup - const LocalityGroupDescriptor* LocalityGroup(int32_t id) const; - const LocalityGroupDescriptor* LocalityGroup(const std::string& lg_name) const; - /// 获取localitygroup数量 - int32_t LocalityGroupNum() const; - /// 增加一个columnfamily - ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name, - const std::string& lg_name); - ColumnFamilyDescriptor* DefaultColumnFamily(); - /// 删除一个columnfamily - void RemoveColumnFamily(const std::string& cf_name); - /// 获取所有的colmnfamily - const ColumnFamilyDescriptor* ColumnFamily(int32_t id) const; - const ColumnFamilyDescriptor* ColumnFamily(const std::string& cf_name) const; - int32_t ColumnFamilyNum() const; - - /// Raw Key Mode - void SetRawKey(RawKeyType type); - RawKeyType RawKey() const; - - void SetSplitSize(int64_t size); - int64_t SplitSize() const; - - void SetMergeSize(int64_t size); - int64_t MergeSize() const; - - void DisableWal(); - bool IsWalDisabled() const; - - void EnableTxn(); - bool IsTxnEnabled() const; - - /// 插入snapshot - int32_t AddSnapshot(uint64_t snapshot); - /// 获取snapshot - uint64_t Snapshot(int32_t id) const; - /// Snapshot数量 - int32_t SnapshotNum() const; - - void SetAdminGroup(const std::string& name); - std::string AdminGroup() const; - - void SetAdmin(const std::string& name); - std::string Admin() const; - - static const std::string DEFAULT_LG_NAME; - static const std::string NOTIFY_LG_NAME; - static const std::string DEFAULT_CF_NAME; - -private: - typedef std::map LGMap; - typedef std::map CFMap; - std::string name_; - LGMap lg_map_; - std::vector lgs_; - CFMap cf_map_; - std::vector cfs_; - int32_t next_lg_id_; - int32_t next_cf_id_; - std::vector snapshots_; - RawKeyType raw_key_type_; - int64_t split_size_; - int64_t merge_size_; - bool disable_wal_; - bool enable_txn_; - std::string admin_group_; - std::string admin_; + public: + /// 表格名字仅允许使用字母、数字和下划线构造,长度不超过256 + TableDescImpl(const std::string& tb_name); + ~TableDescImpl(); + void SetTableName(const std::string& name); + std::string TableName() const; + /// 增加一个localitygroup + LocalityGroupDescriptor* AddLocalityGroup(const std::string& lg_name); + /// 获取默认localitygroup,仅用于kv表 + LocalityGroupDescriptor* DefaultLocalityGroup(); + /// 删除一个localitygroup + bool RemoveLocalityGroup(const std::string& lg_name); + /// 获取localitygroup + const LocalityGroupDescriptor* LocalityGroup(int32_t id) const; + const LocalityGroupDescriptor* LocalityGroup(const std::string& lg_name) const; + /// 获取localitygroup数量 + int32_t LocalityGroupNum() const; + /// 增加一个columnfamily + ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name, const std::string& lg_name); + ColumnFamilyDescriptor* DefaultColumnFamily(); + /// 删除一个columnfamily + void RemoveColumnFamily(const std::string& cf_name); + /// 获取所有的colmnfamily + const ColumnFamilyDescriptor* ColumnFamily(int32_t id) const; + const ColumnFamilyDescriptor* ColumnFamily(const std::string& cf_name) const; + int32_t ColumnFamilyNum() const; + + /// Raw Key Mode + void SetRawKey(RawKeyType type); + RawKeyType RawKey() const; + + void SetSplitSize(int64_t size); + int64_t SplitSize() const; + + void SetMergeSize(int64_t size); + int64_t MergeSize() const; + + void DisableWal(); + bool IsWalDisabled() const; + + void EnableTxn(); + bool IsTxnEnabled() const; + + void EnableHash(); + bool IsHashEnabled() const; + + /// 插入snapshot + int32_t AddSnapshot(uint64_t snapshot); + /// 获取snapshot + uint64_t Snapshot(int32_t id) const; + /// Snapshot数量 + int32_t SnapshotNum() const; + + void SetAdminGroup(const std::string& name); + std::string AdminGroup() const; + + void SetAdmin(const std::string& name); + std::string Admin() const; + + uint32_t BloomFilterBitsPerKey() const; + void SetBloomFilterBitsPerKey(uint32_t); + + static const std::string DEFAULT_LG_NAME; + static const std::string NOTIFY_LG_NAME; + static const std::string DEFAULT_CF_NAME; + + private: + typedef std::map LGMap; + typedef std::map CFMap; + std::string name_; + LGMap lg_map_; + std::vector lgs_; + CFMap cf_map_; + std::vector cfs_; + int32_t next_lg_id_; + int32_t next_cf_id_; + std::vector snapshots_; + RawKeyType raw_key_type_; + int64_t split_size_; + int64_t merge_size_; + bool disable_wal_; + bool enable_txn_; + bool enable_hash_; + std::string admin_group_; + std::string admin_; + uint32_t bloom_filter_bits_per_key_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_SCHEMA_IMPL_H_ diff --git a/src/sdk/sdk_flags.cc b/src/sdk/sdk_flags.cc index 2365cf96d..9383ba174 100644 --- a/src/sdk/sdk_flags.cc +++ b/src/sdk/sdk_flags.cc @@ -7,49 +7,69 @@ ///////// global transaction //////// DEFINE_bool(tera_sdk_client_for_gtxn, false, "build thread_pool for global transaction"); -DEFINE_bool(tera_sdk_tso_client_enabled, false, "get timestamp from timeoracle, default from local timestamp"); -DEFINE_int32(tera_gtxn_thread_max_num, 20, "the max thread number for global transaction operations"); -DEFINE_int32(tera_gtxn_commit_timeout_ms, 600000, "global transaction timeout limit (ms) default 10 minutes"); +DEFINE_bool(tera_sdk_tso_client_enabled, false, + "get timestamp from timeoracle, default from local timestamp"); +DEFINE_int32(tera_gtxn_thread_max_num, 20, + "the max thread number for global transaction operations"); +DEFINE_int32(tera_gtxn_commit_timeout_ms, 600000, + "global transaction timeout limit (ms) default 10 minutes"); DEFINE_int32(tera_gtxn_get_waited_times_limit, 10, "global txn wait other locked times limit"); DEFINE_int32(tera_gtxn_all_puts_size_limit, 10000, "(B) global txn all puts data size limit"); -DEFINE_int32(tera_gtxn_timeout_ms, 86400000, "global transaction timeout limit (ms) default 24 hours"); +DEFINE_int32(tera_gtxn_timeout_ms, 86400000, + "global transaction timeout limit (ms) default 24 hours"); ///////// SDK ///////// DEFINE_string(tera_sdk_impl_type, "tera", "the activated type of SDK impl"); -DEFINE_int32(tera_sdk_retry_times, 10, "the max retry times during sdk operation fail"); +DEFINE_int32(tera_sdk_meta_read_retry_times, 10, "the max retry times during meta read"); DEFINE_int32(tera_sdk_retry_period, 500, "the retry period (in ms) between two operations"); DEFINE_string(tera_sdk_conf_file, "", "the path of default flag file"); DEFINE_int32(tera_sdk_show_max_num, 20000, "the max fetch meta number for each rpc connection"); DEFINE_int32(tera_sdk_async_pending_limit, 2000, "the max number for pending task in async writer"); -DEFINE_int32(tera_sdk_async_sync_task_threshold, 1000, "the sync task threshold to do sync operation"); +DEFINE_int32(tera_sdk_async_sync_task_threshold, 1000, + "the sync task threshold to do sync operation"); DEFINE_int32(tera_sdk_async_sync_record_threshold, 1000, "the sync kv record threshold"); DEFINE_int32(tera_sdk_async_sync_interval, 15, "the interval (in ms) to sync write buffer to disk"); -DEFINE_int32(tera_sdk_async_thread_min_num, 1, "the min thread number for tablet node impl operations"); -DEFINE_int32(tera_sdk_async_thread_max_num, 200, "the max thread number for tablet node impl operations"); +DEFINE_int32(tera_sdk_async_thread_min_num, 1, + "the min thread number for tablet node impl operations"); +DEFINE_int32(tera_sdk_async_thread_max_num, 200, + "the max thread number for tablet node impl operations"); DEFINE_int32(tera_sdk_rpc_request_max_size, 30, "the max size(MB) for the request message of RPC"); -DEFINE_string(tera_sdk_root_table_addr,"127.0.0.1:22000","the default table server has root_table"); +DEFINE_string(tera_sdk_root_table_addr, "127.0.0.1:22000", + "the default table server has root_table"); DEFINE_int32(tera_sdk_thread_min_num, 1, "the min thread number for tablet node impl operations"); DEFINE_int32(tera_sdk_thread_max_num, 20, "the max thread number for tablet node impl operations"); DEFINE_bool(tera_sdk_rpc_limit_enabled, false, "enable the rpc traffic limit in sdk"); -DEFINE_int32(tera_sdk_rpc_limit_max_inflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on input flow"); -DEFINE_int32(tera_sdk_rpc_limit_max_outflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on output flow"); -DEFINE_int32(tera_sdk_rpc_max_pending_buffer_size, 200, "max pending buffer size (in MB) for sdk rpc"); +DEFINE_int32(tera_sdk_rpc_limit_max_inflow, 10, + "the max bandwidth (in MB/s) for sdk rpc traffic limitation on input flow"); +DEFINE_int32(tera_sdk_rpc_limit_max_outflow, 10, + "the max bandwidth (in MB/s) for sdk rpc traffic limitation on " + "output flow"); +DEFINE_int32(tera_sdk_rpc_max_pending_buffer_size, 200, + "max pending buffer size (in MB) for sdk rpc"); DEFINE_int32(tera_sdk_rpc_work_thread_num, 8, "thread num of sdk rpc client"); DEFINE_int32(tera_sdk_update_meta_internal, 10000, "the sdk update meta table internal time(ms)"); -DEFINE_int32(tera_sdk_check_timer_internal, 100, "the sdk check the resend quest queue internal time"); -DEFINE_int32(tera_sdk_timeout, 60000, "timeout of sdk read/write request (in ms)"); -DEFINE_int32(tera_sdk_timeout_precision, 100, "precision of sdk read/write timeout detector (in ms)"); +DEFINE_int32(tera_sdk_check_timer_internal, 100, + "the sdk check the resend quest queue internal time"); +DEFINE_int32(tera_sdk_timeout_precision, 100, + "precision of sdk read/write timeout detector (in ms)"); DEFINE_int32(tera_sdk_delay_send_internal, 2, "the sdk resend the request internal time(s)"); DEFINE_int32(tera_sdk_scan_buffer_limit, 2048000, "(B) the pack size limit for scan operation"); DEFINE_bool(tera_sdk_write_sync, false, "sync flag for write"); DEFINE_int32(tera_sdk_batch_size, 250, "batch_size (task number in task_batch)"); +DEFINE_int32(tera_sdk_read_timeout, 10000, "timeout of sdk readrequest (in ms)"); +DEFINE_int32(tera_sdk_write_timeout, 10000, "timeout of sdk write request (in ms)"); DEFINE_int32(tera_sdk_write_send_interval, 10, "(ms) write batch send interval time"); DEFINE_int32(tera_sdk_read_send_interval, 5, "(ms) read batch send interval time"); -DEFINE_int64(tera_sdk_max_mutation_pending_num, INT64_MAX, "default number of pending mutations in async put op"); -DEFINE_int64(tera_sdk_max_reader_pending_num, INT64_MAX, "default number of pending readers in async get op"); -DEFINE_bool(tera_sdk_async_blocking_enabled, true, "enable blocking when async writing and reading"); + +DEFINE_int64(tera_sdk_max_mutation_pending_num, INT64_MAX, + "default number of pending mutations in async put op"); +DEFINE_int64(tera_sdk_max_reader_pending_num, INT64_MAX, + "default number of pending readers in async get op"); +DEFINE_bool(tera_sdk_async_blocking_enabled, true, + "enable blocking when async writing and reading"); DEFINE_int32(tera_sdk_update_meta_concurrency, 3, "the concurrency for updating meta"); -DEFINE_int32(tera_sdk_update_meta_buffer_limit, 102400, "(B) the pack size limit for updating meta"); +DEFINE_int32(tera_sdk_update_meta_buffer_limit, 102400, + "(B) the pack size limit for updating meta"); DEFINE_bool(tera_sdk_table_rename_enabled, false, "enable sdk table rename"); DEFINE_bool(tera_sdk_cookie_enabled, true, "enable sdk cookie"); @@ -57,32 +77,42 @@ DEFINE_string(tera_sdk_cookie_path, "/tmp/.tera_cookie", "the default path of sd DEFINE_int32(tera_sdk_cookie_update_interval, 600, "the interval of cookie updating(s)"); DEFINE_bool(tera_sdk_perf_counter_enabled, true, "enable performance counter log"); -DEFINE_int64(tera_sdk_perf_counter_log_interval, 60, "the interval period (in sec) of performance counter log dumping"); +DEFINE_int64(tera_sdk_perf_counter_log_interval, 60, + "the interval period (in sec) of performance counter log dumping"); DEFINE_bool(tera_sdk_perf_collect_enabled, false, "enable collect perf counter for metrics"); DEFINE_int32(tera_sdk_perf_collect_interval, 10000, "the interval of collect perf counter(ms)"); -DEFINE_bool(tera_sdk_batch_scan_enabled, true, "enable batch scan"); DEFINE_int64(tera_sdk_scan_buffer_size, 65536, "(B) default buffer limit for scan"); DEFINE_int64(tera_sdk_scan_number_limit, 1000000000, "default number limit for scan"); DEFINE_int32(tera_sdk_max_batch_scan_req, 30, "the max number of concurrent scan req"); -DEFINE_int64(tera_sdk_scan_timeout, 30000, "(ms) scan timeout"); -DEFINE_int32(tera_sdk_batch_scan_max_retry, 60, "the max retry times for session scan"); -DEFINE_int64(batch_scan_delay_retry_in_us, 1000000, "timewait in us before retry batch scan"); -DEFINE_int32(tera_sdk_sync_scan_max_retry, 10, "the max retry times for sync scan"); -DEFINE_int64(sync_scan_delay_retry_in_ms, 1000, "timewait in ms before retry sync scan"); +DEFINE_int64(tera_sdk_scan_timeout, 30000, "(ms) once scan timeout"); +DEFINE_int32(tera_sdk_scan_max_retry, 10, "the max retry times for session scan"); +DEFINE_int64(tera_sdk_scan_delay_retry_in_us, 1000000, "timewait in us before retry batch scan"); DEFINE_string(tera_ins_addr_list, "", "the ins cluster addr. e.g. abc.com:1234,abb.com:1234"); DEFINE_string(tera_ins_root_path, "", "root path on ins. e.g /ps/sandbox"); -DEFINE_bool(tera_ins_enabled, false, "[obsoleted replace by --tera_coord_type=ins] option to open ins naming"); -DEFINE_bool(tera_mock_ins_enabled, false, "[obsoleted replace by --tera_coord_type=mock_ins] option to open mock ins naming"); +DEFINE_bool(tera_ins_enabled, false, + "[obsoleted replace by --tera_coord_type=ins] option to open ins naming"); +DEFINE_bool(tera_mock_ins_enabled, false, + "[obsoleted replace by --tera_coord_type=mock_ins] option to open " + "mock ins naming"); DEFINE_int64(tera_ins_session_timeout, 600000000, "ins session timeout(us), default 10min"); DEFINE_int64(tera_sdk_ins_session_timeout, 10000000, "ins session timeout(us), default 10s"); DEFINE_int64(tera_sdk_status_timeout, 600, "(s) check tablet/tabletnode status timeout"); -DEFINE_uint64(tera_sdk_read_max_qualifiers, 18446744073709551615U, "read qu limit of each cf, default value is the max of uint64"); +DEFINE_uint64(tera_sdk_read_max_qualifiers, 18446744073709551615U, + "read qu limit of each cf, default value is the max of uint64"); DEFINE_bool(tera_sdk_mock_enable, false, "tera sdk mock enable"); // --- Only for online debug --- // Batch Scan DEFINE_bool(debug_tera_sdk_scan, false, "enable print detail info for debug online"); + +DEFINE_string(tera_auth_group_name, "tera_dev", "tera auth group_name stands for service line"); +DEFINE_int32(tera_sdk_get_tablet_retry_times, 10, + "retry times for update meta when get tablet fail"); + +DEFINE_int32(tera_sdk_update_meta_rpc_timeout_max_ms, 300000, + "requests on a server return rpc timeout max duration time(ms), over this threshold " + "sdk will retry update meta"); diff --git a/src/sdk/sdk_metric_name.h b/src/sdk/sdk_metric_name.h index 5b358e912..a156139eb 100644 --- a/src/sdk/sdk_metric_name.h +++ b/src/sdk/sdk_metric_name.h @@ -3,8 +3,8 @@ // found in the LICENSE file. #ifndef TERA_SDK_METRIC_NAME_H_ -#define TERA_SDK_METRIC_NAME_H_ - +#define TERA_SDK_METRIC_NAME_H_ + #include #include "common/metric/hardware_collectors.h" @@ -51,8 +51,8 @@ const char* const kGTxnNotifiesFailCountMetric = "tera_sdk_gtxn_notifies_fail_co const char* const kGTxnTsoDelayMetric = "tera_sdk_gtxn_tso_delay_us"; const char* const kGTxnTsoRequestCountMetric = "tera_sdk_gtxn_tso_request_count"; -} // end namespace tera - -#endif // TERA_SDK_METRIC_NAME_H_ - +} // end namespace tera + +#endif // TERA_SDK_METRIC_NAME_H_ + /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/sdk_perf.cc b/src/sdk/sdk_perf.cc index 7cc5704d8..ac538496f 100644 --- a/src/sdk/sdk_perf.cc +++ b/src/sdk/sdk_perf.cc @@ -13,73 +13,77 @@ namespace tera { namespace sdk { void PerfCollecter::DumpLog() { - std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - int64_t interval = latest_report->interval_ms; - if (interval <= 0) { - // maybe happen at first report, the metric values must be 0 - // set to any non-zero value to avoid div 0 - VLOG(16) << "Metric Report interval is 0"; - interval = 1000; - } - int64_t read_delay = latest_report->FindMetricValue(kGTxnReadDelayMetric, kGTxnLabelRead); - int64_t read_cnt = latest_report->FindMetricValue(kGTxnReadCountMetric, kGTxnLabelRead); - read_delay = read_cnt > 0 ? read_delay / read_cnt : 0; - - LOG(INFO) << "[perf][gtxn] " - << "read_delay " << read_delay << " read_cnt " << read_cnt << " read_fail " - << latest_report->FindMetricValue(kGTxnReadFailCountMetric, kGTxnLabelRead) - << " read_retry_cnt " - << latest_report->FindMetricValue(kGTxnReadRetryCountMetric, kGTxnLabelRead) - << " read_rollback_cnt " - << latest_report->FindMetricValue(kGTxnReadRollBackCountMetric, kGTxnLabelRead) - << " read_rollforward_cnt " - << latest_report->FindMetricValue(kGTxnReadRollForwardCountMetric, kGTxnLabelRead); - - int64_t commit_delay = latest_report->FindMetricValue(kGTxnCommitDelayMetric, kGTxnLabelCommit); - int64_t commit_cnt = latest_report->FindMetricValue(kGTxnCommitCountMetric, kGTxnLabelCommit); - commit_delay = commit_cnt > 0 ? commit_delay / commit_cnt : 0; - - int64_t prewrite_delay = latest_report->FindMetricValue(kGTxnPrewriteDelayMetric, kGTxnLabelCommit); - int64_t prewrite_cnt = latest_report->FindMetricValue(kGTxnPrewriteCountMetric, kGTxnLabelCommit); - prewrite_delay = prewrite_cnt > 0 ? prewrite_delay / prewrite_cnt : 0; - - int64_t primary_delay = latest_report->FindMetricValue(kGTxnPrimaryDelayMetric, kGTxnLabelCommit); - int64_t primary_cnt = latest_report->FindMetricValue(kGTxnPrimaryCountMetric, kGTxnLabelCommit); - primary_delay = primary_cnt > 0 ? primary_delay / primary_cnt : 0; - - int64_t secondaries_delay = latest_report->FindMetricValue(kGTxnSecondariesDelayMetric, kGTxnLabelCommit); - int64_t secondaries_cnt = latest_report->FindMetricValue(kGTxnSecondariesCountMetric, kGTxnLabelCommit); - secondaries_delay = secondaries_cnt > 0 ? secondaries_delay / secondaries_cnt : 0; - - LOG(INFO) << "[perf][gtxn] " - << "commit_delay " << commit_delay << " commit_cnt " << commit_cnt << " commit_fail " - << latest_report->FindMetricValue(kGTxnCommitFailCountMetric, kGTxnLabelCommit) - << " prew_delay " << prewrite_delay << " prew_cnt " << prewrite_cnt << " prew_fail " - << latest_report->FindMetricValue(kGTxnPrewriteFailCountMetric, kGTxnLabelCommit) - << " pri_delay " << primary_delay << " pri_cnt " << primary_cnt << " pri_fail " - << latest_report->FindMetricValue(kGTxnPrimaryFailCountMetric, kGTxnLabelCommit) - << " se_delay " << secondaries_delay << " se_cnt " << secondaries_cnt << " se_fail " - << latest_report->FindMetricValue(kGTxnSecondariesFailCountMetric, kGTxnLabelCommit); - - int64_t tso_delay = latest_report->FindMetricValue(kGTxnTsoDelayMetric, kGTxnLabelTso); - int64_t tso_cnt = latest_report->FindMetricValue(kGTxnTsoRequestCountMetric, kGTxnLabelTso); - tso_delay = tso_cnt > 0 ? tso_delay / tso_cnt : 0; - LOG(INFO) << "[perf][gtxn] tso_delay " << tso_delay << " tso_cnt " << tso_cnt; - - int64_t notify_delay = latest_report->FindMetricValue(kGTxnNotifiesDelayMetric, kGTxnLabelCommit); - int64_t notify_cnt = latest_report->FindMetricValue(kGTxnNotifiesCountMetric, kGTxnLabelCommit); - notify_delay = notify_cnt > 0 ? notify_delay / notify_cnt : 0; - - int64_t ack_delay = latest_report->FindMetricValue(kGTxnAcksDelayMetric, kGTxnLabelCommit); - int64_t ack_cnt = latest_report->FindMetricValue(kGTxnAcksCountMetric, kGTxnLabelCommit); - ack_delay = ack_cnt > 0 ? ack_delay / ack_cnt : 0; - - LOG(INFO) << "[perf][gtxn] " - << "notify_delay " << notify_delay << " notify_cnt " << notify_cnt << " notify_fail " - << latest_report->FindMetricValue(kGTxnNotifiesFailCountMetric, kGTxnLabelCommit) - << " ack_delay " << ack_delay << " ack_cnt " << ack_cnt << " ack_fail " - << latest_report->FindMetricValue(kGTxnAcksFailCountMetric, kGTxnLabelCommit); + std::shared_ptr latest_report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + int64_t interval = latest_report->interval_ms; + if (interval <= 0) { + // maybe happen at first report, the metric values must be 0 + // set to any non-zero value to avoid div 0 + VLOG(16) << "Metric Report interval is 0"; + interval = 1000; + } + int64_t read_delay = latest_report->FindMetricValue(kGTxnReadDelayMetric, kGTxnLabelRead); + int64_t read_cnt = latest_report->FindMetricValue(kGTxnReadCountMetric, kGTxnLabelRead); + read_delay = read_cnt > 0 ? read_delay / read_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "read_delay " << read_delay << " read_cnt " << read_cnt << " read_fail " + << latest_report->FindMetricValue(kGTxnReadFailCountMetric, kGTxnLabelRead) + << " read_retry_cnt " + << latest_report->FindMetricValue(kGTxnReadRetryCountMetric, kGTxnLabelRead) + << " read_rollback_cnt " + << latest_report->FindMetricValue(kGTxnReadRollBackCountMetric, kGTxnLabelRead) + << " read_rollforward_cnt " + << latest_report->FindMetricValue(kGTxnReadRollForwardCountMetric, kGTxnLabelRead); + + int64_t commit_delay = latest_report->FindMetricValue(kGTxnCommitDelayMetric, kGTxnLabelCommit); + int64_t commit_cnt = latest_report->FindMetricValue(kGTxnCommitCountMetric, kGTxnLabelCommit); + commit_delay = commit_cnt > 0 ? commit_delay / commit_cnt : 0; + + int64_t prewrite_delay = + latest_report->FindMetricValue(kGTxnPrewriteDelayMetric, kGTxnLabelCommit); + int64_t prewrite_cnt = latest_report->FindMetricValue(kGTxnPrewriteCountMetric, kGTxnLabelCommit); + prewrite_delay = prewrite_cnt > 0 ? prewrite_delay / prewrite_cnt : 0; + + int64_t primary_delay = latest_report->FindMetricValue(kGTxnPrimaryDelayMetric, kGTxnLabelCommit); + int64_t primary_cnt = latest_report->FindMetricValue(kGTxnPrimaryCountMetric, kGTxnLabelCommit); + primary_delay = primary_cnt > 0 ? primary_delay / primary_cnt : 0; + + int64_t secondaries_delay = + latest_report->FindMetricValue(kGTxnSecondariesDelayMetric, kGTxnLabelCommit); + int64_t secondaries_cnt = + latest_report->FindMetricValue(kGTxnSecondariesCountMetric, kGTxnLabelCommit); + secondaries_delay = secondaries_cnt > 0 ? secondaries_delay / secondaries_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "commit_delay " << commit_delay << " commit_cnt " << commit_cnt << " commit_fail " + << latest_report->FindMetricValue(kGTxnCommitFailCountMetric, kGTxnLabelCommit) + << " prew_delay " << prewrite_delay << " prew_cnt " << prewrite_cnt << " prew_fail " + << latest_report->FindMetricValue(kGTxnPrewriteFailCountMetric, kGTxnLabelCommit) + << " pri_delay " << primary_delay << " pri_cnt " << primary_cnt << " pri_fail " + << latest_report->FindMetricValue(kGTxnPrimaryFailCountMetric, kGTxnLabelCommit) + << " se_delay " << secondaries_delay << " se_cnt " << secondaries_cnt << " se_fail " + << latest_report->FindMetricValue(kGTxnSecondariesFailCountMetric, kGTxnLabelCommit); + + int64_t tso_delay = latest_report->FindMetricValue(kGTxnTsoDelayMetric, kGTxnLabelTso); + int64_t tso_cnt = latest_report->FindMetricValue(kGTxnTsoRequestCountMetric, kGTxnLabelTso); + tso_delay = tso_cnt > 0 ? tso_delay / tso_cnt : 0; + LOG(INFO) << "[perf][gtxn] tso_delay " << tso_delay << " tso_cnt " << tso_cnt; + + int64_t notify_delay = latest_report->FindMetricValue(kGTxnNotifiesDelayMetric, kGTxnLabelCommit); + int64_t notify_cnt = latest_report->FindMetricValue(kGTxnNotifiesCountMetric, kGTxnLabelCommit); + notify_delay = notify_cnt > 0 ? notify_delay / notify_cnt : 0; + + int64_t ack_delay = latest_report->FindMetricValue(kGTxnAcksDelayMetric, kGTxnLabelCommit); + int64_t ack_cnt = latest_report->FindMetricValue(kGTxnAcksCountMetric, kGTxnLabelCommit); + ack_delay = ack_cnt > 0 ? ack_delay / ack_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "notify_delay " << notify_delay << " notify_cnt " << notify_cnt << " notify_fail " + << latest_report->FindMetricValue(kGTxnNotifiesFailCountMetric, kGTxnLabelCommit) + << " ack_delay " << ack_delay << " ack_cnt " << ack_cnt << " ack_fail " + << latest_report->FindMetricValue(kGTxnAcksFailCountMetric, kGTxnLabelCommit); } - + } // namespace sdk } // namespace tera diff --git a/src/sdk/sdk_perf.h b/src/sdk/sdk_perf.h index d6b756a9e..42772b627 100644 --- a/src/sdk/sdk_perf.h +++ b/src/sdk/sdk_perf.h @@ -2,15 +2,16 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SDK_PERF_H_ -#define TERA_SDK_SDK_PERF_H_ +#ifndef TERA_SDK_SDK_PERF_H_ +#define TERA_SDK_SDK_PERF_H_ + +#include #include "gflags/gflags.h" #include "glog/logging.h" #include "common/metric/metric_counter.h" #include "common/metric/collector_report.h" -#include "common/thread.h" #include "common/this_thread.h" #include "tera.h" @@ -20,32 +21,31 @@ namespace tera { namespace sdk { class PerfCollecter { -public: - PerfCollecter() : stopped_(false){} - ~PerfCollecter() {} - - void Run() { - thread_.Start(std::bind(&PerfCollecter::ScheduleCollect, this)); + public: + PerfCollecter() : stopped_(false) {} + ~PerfCollecter() {} + + void Run() { thread_ = std::thread{&PerfCollecter::ScheduleCollect, this}; } + + void Stop() { + stopped_ = true; + thread_.join(); + } + + private: + void ScheduleCollect() { + while (!stopped_) { + CollectorReportPublisher::GetInstance().Refresh(); + DumpLog(); + ThisThread::Sleep(FLAGS_tera_sdk_perf_collect_interval); } + } - void Stop() { - stopped_ = true; - thread_.Join(); - } - -private: - void ScheduleCollect() { - while (!stopped_) { - CollectorReportPublisher::GetInstance().Refresh(); - DumpLog(); - ThisThread::Sleep(FLAGS_tera_sdk_perf_collect_interval); - } - } + void DumpLog(); - void DumpLog(); -private: - common::Thread thread_; - bool stopped_; + private: + std::thread thread_; + bool stopped_; }; } // namespace sdk diff --git a/src/sdk/sdk_task.cc b/src/sdk/sdk_task.cc index 834bb4a97..4d8e0cb8a 100644 --- a/src/sdk/sdk_task.cc +++ b/src/sdk/sdk_task.cc @@ -13,29 +13,53 @@ DECLARE_int32(tera_sdk_timeout_precision); namespace tera { int64_t SdkTask::GetRef() { - MutexLock l(&mutex_); - return ref_; + MutexLock l(&mutex_); + return ref_; } void SdkTask::IncRef() { - MutexLock l(&mutex_); - ++ref_; + MutexLock l(&mutex_); + ++ref_; } void SdkTask::DecRef() { - MutexLock l(&mutex_); - CHECK_GT(ref_, 1); - if (--ref_ == 1) { - cond_.Signal(); - } + MutexLock l(&mutex_); + CHECK_GT(ref_, 1); + if (--ref_ == 1) { + cond_.Signal(); + } } void SdkTask::ExcludeOtherRef() { - MutexLock l(&mutex_); - while (ref_ > 1) { - cond_.Wait(); - } - CHECK_EQ(ref_, 1); + MutexLock l(&mutex_); + while (ref_ > 1) { + cond_.Wait(); + } + CHECK_EQ(ref_, 1); +} + +std::string SdkTask::GetTypeName(TYPE type) { + std::string type_name = "unknown"; + switch (type) { + case SdkTask::READ: + type_name = "read"; + break; + case SdkTask::MUTATION: + type_name = "mutation"; + break; + case SdkTask::BATCH_MUTATION: + type_name = "batch mutation"; + break; + case SdkTask::TASKBATCH: + type_name = "taskbatch"; + break; + case SdkTask::SCAN: + type_name = "scan"; + break; + default: + type_name = "unknown"; + } + return type_name; } SdkTimeoutManager::SdkTimeoutManager(ThreadPool* thread_pool) @@ -46,129 +70,127 @@ SdkTimeoutManager::SdkTimeoutManager(ThreadPool* thread_pool) bg_cond_(&bg_mutex_), bg_func_id_(0), bg_func_(std::bind(&SdkTimeoutManager::CheckTimeout, this)) { - if (timeout_precision_ <= 0) { - timeout_precision_ = 1; - } - if (timeout_precision_ > 1000) { - timeout_precision_ = 1000; - } - bg_func_id_ = thread_pool_->DelayTask(timeout_precision_, bg_func_); + if (timeout_precision_ <= 0) { + timeout_precision_ = 1; + } + if (timeout_precision_ > 1000) { + timeout_precision_ = 1000; + } + bg_func_id_ = thread_pool_->DelayTask(timeout_precision_, bg_func_); } SdkTimeoutManager::~SdkTimeoutManager() { - MutexLock l(&bg_mutex_); - stop_ = true; - if (bg_func_id_ > 0) { - bool non_block = true; - bool is_running = false; - if (thread_pool_->CancelTask(bg_func_id_, non_block, &is_running)) { - bg_exit_ = true; - } else { - CHECK(is_running); - } - } - while (!bg_exit_) { - bg_cond_.Wait(); + MutexLock l(&bg_mutex_); + stop_ = true; + if (bg_func_id_ > 0) { + bool non_block = true; + bool is_running = false; + if (thread_pool_->CancelTask(bg_func_id_, non_block, &is_running)) { + bg_exit_ = true; + } else { + CHECK(is_running); } + } + while (!bg_exit_) { + bg_cond_.Wait(); + } } -bool SdkTimeoutManager::PutTask(SdkTask* task, int64_t timeout, - SdkTask::TimeoutFunc timeout_func) { - int64_t task_id = task->GetId(); - CHECK_GE(task_id, 0); - if (timeout > 0) { - task->SetDueTime(get_millis() + timeout); - task->SetTimeoutFunc(timeout_func); - } - - uint32_t shard_id = Shard(task_id); - TaskMap& map = map_shard_[shard_id]; - Mutex& mutex = mutex_shard_[shard_id]; - - MutexLock l(&mutex); - std::pair insert_ret; - insert_ret = map.id_hash_map.insert(std::pair(task_id, task)); - bool insert_success = insert_ret.second; - if (insert_success) { - map.due_time_map.insert(task); - task->IncRef(); - } - return insert_success; +bool SdkTimeoutManager::PutTask(SdkTask* task, int64_t timeout, SdkTask::TimeoutFunc timeout_func) { + int64_t task_id = task->GetId(); + CHECK_GE(task_id, 0); + if (timeout > 0) { + task->SetDueTime(get_millis() + timeout); + task->SetTimeoutFunc(timeout_func); + task->SetTimeOut(timeout); + } + + uint32_t shard_id = Shard(task_id); + TaskMap& map = map_shard_[shard_id]; + Mutex& mutex = mutex_shard_[shard_id]; + + MutexLock l(&mutex); + std::pair insert_ret; + insert_ret = map.id_hash_map.insert(std::pair(task_id, task)); + bool insert_success = insert_ret.second; + if (insert_success) { + map.due_time_map.insert(task); + task->IncRef(); + } + return insert_success; } SdkTask* SdkTimeoutManager::GetTask(int64_t task_id) { - uint32_t shard_id = Shard(task_id); - TaskMap& map = map_shard_[shard_id]; - Mutex& mutex = mutex_shard_[shard_id]; - - MutexLock l(&mutex); - IdHashMap::iterator it = map.id_hash_map.find(task_id); - if (it != map.id_hash_map.end()) { - SdkTask* task = it->second; - CHECK_EQ(task->GetId(), task_id); - task->IncRef(); - return task; - } else { - return NULL; - } + uint32_t shard_id = Shard(task_id); + TaskMap& map = map_shard_[shard_id]; + Mutex& mutex = mutex_shard_[shard_id]; + + MutexLock l(&mutex); + IdHashMap::iterator it = map.id_hash_map.find(task_id); + if (it != map.id_hash_map.end()) { + SdkTask* task = it->second; + CHECK_EQ(task->GetId(), task_id); + task->IncRef(); + return task; + } else { + return NULL; + } } SdkTask* SdkTimeoutManager::PopTask(int64_t task_id) { - uint32_t shard_id = Shard(task_id); + uint32_t shard_id = Shard(task_id); + TaskMap& map = map_shard_[shard_id]; + Mutex& mutex = mutex_shard_[shard_id]; + + MutexLock l(&mutex); + IdHashMap::iterator it = map.id_hash_map.find(task_id); + if (it != map.id_hash_map.end()) { + SdkTask* task = it->second; + CHECK_EQ(task->GetId(), task_id); + map.id_hash_map.erase(it); + // make sure that we only erased the right one element + assert(map.due_time_map.erase(task) == 1); + return task; + } else { + return NULL; + } +} + +void SdkTimeoutManager::CheckTimeout() { + int64_t now_ms = get_millis(); + for (uint32_t shard_id = 0; shard_id < kShardNum; shard_id++) { TaskMap& map = map_shard_[shard_id]; Mutex& mutex = mutex_shard_[shard_id]; MutexLock l(&mutex); - IdHashMap::iterator it = map.id_hash_map.find(task_id); - if (it != map.id_hash_map.end()) { - SdkTask* task = it->second; - CHECK_EQ(task->GetId(), task_id); - map.id_hash_map.erase(it); - // make sure that we only erased the right one element - assert(map.due_time_map.erase(task) == 1); - return task; - } else { - return NULL; + while (!map.due_time_map.empty()) { + DueTimeMap::iterator it = map.due_time_map.begin(); + SdkTask* task = *it; + if (task->DueTime() > (uint64_t)now_ms) { + break; + } + map.due_time_map.erase(it); + map.id_hash_map.erase(task->GetId()); + mutex.Unlock(); + thread_pool_->AddTask(std::bind(&SdkTimeoutManager::RunTimeoutFunc, this, task)); + mutex.Lock(); } -} + } -void SdkTimeoutManager::CheckTimeout() { - int64_t now_ms = get_millis(); - for (uint32_t shard_id = 0; shard_id < kShardNum; shard_id++) { - TaskMap& map = map_shard_[shard_id]; - Mutex& mutex = mutex_shard_[shard_id]; - - MutexLock l(&mutex); - while (!map.due_time_map.empty()) { - DueTimeMap::iterator it = map.due_time_map.begin(); - SdkTask* task = *it; - if (task->DueTime() > (uint64_t)now_ms) { - break; - } - map.due_time_map.erase(it); - map.id_hash_map.erase(task->GetId()); - mutex.Unlock(); - thread_pool_->AddTask(std::bind(&SdkTimeoutManager::RunTimeoutFunc, this, task)); - mutex.Lock(); - } - } + MutexLock l(&bg_mutex_); + if (stop_) { + bg_exit_ = true; + bg_cond_.Signal(); + return; + } - MutexLock l(&bg_mutex_); - if (stop_) { - bg_exit_ = true; - bg_cond_.Signal(); - return; - } - - bg_func_id_ = thread_pool_->DelayTask(timeout_precision_, bg_func_); + bg_func_id_ = thread_pool_->DelayTask(timeout_precision_, bg_func_); } -void SdkTimeoutManager::RunTimeoutFunc(SdkTask* sdk_task) { - sdk_task->GetTimeoutFunc()(sdk_task); -} +void SdkTimeoutManager::RunTimeoutFunc(SdkTask* sdk_task) { sdk_task->GetTimeoutFunc()(sdk_task); } uint32_t SdkTimeoutManager::Shard(int64_t task_id) { - return (uint64_t)task_id & ((1ull << kShardBits) - 1); + return (uint64_t)task_id & ((1ull << kShardBits) - 1); } -} // namespace tera +} // namespace tera diff --git a/src/sdk/sdk_task.h b/src/sdk/sdk_task.h index 34ec25b1d..62d5e432c 100644 --- a/src/sdk/sdk_task.h +++ b/src/sdk/sdk_task.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SDK_TASK_H_ -#define TERA_SDK_SDK_TASK_H_ +#ifndef TERA_SDK_SDK_TASK_H_ +#define TERA_SDK_SDK_TASK_H_ #include #include @@ -19,120 +19,138 @@ namespace tera { class SdkTask { -public: - typedef std::function TimeoutFunc; - enum TYPE { - READ, - MUTATION, - SCAN, - TASKBATCH, - }; - TYPE Type() { return type_; } - - void SetInternalError(StatusCode err) { internal_err_ = err; } - StatusCode GetInternalError() { return internal_err_; } - - void SetMetaTimeStamp(int64_t meta_ts) { meta_timestamp_ = meta_ts; } - int64_t GetMetaTimeStamp() { return meta_timestamp_; } - - void SetId(int64_t id) { id_ = id; } - int64_t GetId() { return id_; } - - void SetDueTime(uint64_t due_time) { due_time_ms_ = due_time; } - uint64_t DueTime() { return due_time_ms_; } - - void SetTimeoutFunc(TimeoutFunc timeout_func) { timeout_func_ = timeout_func; } - TimeoutFunc GetTimeoutFunc() { return timeout_func_; } - - int64_t GetRef(); - void IncRef(); - void DecRef(); - void ExcludeOtherRef(); - - virtual bool IsAsync() = 0; - virtual uint32_t Size() = 0; - virtual int64_t TimeOut() = 0; - virtual void Wait() = 0; - virtual void SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) = 0; - virtual const std::string& RowKey() = 0; - -protected: - SdkTask(TYPE type) - : type_(type), - internal_err_(kTabletNodeOk), - meta_timestamp_(0), - id_(-1), - due_time_ms_(UINT64_MAX), - cond_(&mutex_), - ref_(1) {} - virtual ~SdkTask() {} - -private: - TYPE type_; - StatusCode internal_err_; - int64_t meta_timestamp_; - int64_t id_; - uint64_t due_time_ms_; // timestamp of timeout - TimeoutFunc timeout_func_; - - Mutex mutex_; - CondVar cond_; - int64_t ref_; + public: + typedef std::function TimeoutFunc; + enum TYPE { + READ, + MUTATION, + SCAN, + TASKBATCH, + BATCH_MUTATION, + }; + TYPE Type() { return type_; } + + static std::string GetTypeName(TYPE type); + + void SetInternalError(StatusCode err) { internal_err_ = err; } + StatusCode GetInternalError() { return internal_err_; } + + void SetMetaTimeStamp(int64_t meta_ts) { meta_timestamp_ = meta_ts; } + int64_t GetMetaTimeStamp() { return meta_timestamp_; } + + void SetId(int64_t id) { id_ = id; } + int64_t GetId() { return id_; } + + void SetDueTime(uint64_t due_time) { due_time_ms_ = due_time; } + uint64_t DueTime() { return due_time_ms_; } + + void SetTimeoutFunc(TimeoutFunc timeout_func) { timeout_func_ = timeout_func; } + TimeoutFunc GetTimeoutFunc() { return timeout_func_; } + + uint32_t RetryTimes() const { return retry_times_; } + void IncRetryTimes() { retry_times_++; } + void ResetRetryTimes() { retry_times_ = 0; } + void SetServerAddr(const std::string& server_addr) { server_addr_ = server_addr; } + std::string GetServerAddr() { return server_addr_; } + + int64_t GetRef(); + void IncRef(); + void DecRef(); + void ExcludeOtherRef(); + + virtual bool IsAsync() = 0; + virtual uint32_t Size() = 0; + virtual void SetTimeOut(int64_t timeout) = 0; + virtual int64_t TimeOut() = 0; + virtual void Wait() = 0; + virtual void SetError(ErrorCode::ErrorCodeType err, const std::string& reason) = 0; + + virtual std::string InternalRowKey() = 0; + + // only for user callback + virtual void RunCallback() = 0; + + virtual int64_t GetCommitTimes() = 0; + + protected: + SdkTask(TYPE type) + : type_(type), + internal_err_(kTabletNodeOk), + meta_timestamp_(0), + id_(-1), + due_time_ms_(UINT64_MAX), + retry_times_(0), + cond_(&mutex_), + ref_(1), + server_addr_("") {} + virtual ~SdkTask() {} + + private: + TYPE type_; + StatusCode internal_err_; + int64_t meta_timestamp_; + int64_t id_; + uint64_t due_time_ms_; // timestamp of timeout + TimeoutFunc timeout_func_; + uint32_t retry_times_; + + Mutex mutex_; + CondVar cond_; + int64_t ref_; + std::string server_addr_; }; typedef void (*StatCallback)(Table* table, SdkTask* task); struct SdkTaskDueTimeComp { - bool operator() (SdkTask* lhs, SdkTask* rhs) { - if (lhs->DueTime() != rhs->DueTime()) { - return lhs->DueTime() < rhs->DueTime(); - } - return lhs->GetId() < rhs->GetId(); + bool operator()(SdkTask* lhs, SdkTask* rhs) { + if (lhs->DueTime() != rhs->DueTime()) { + return lhs->DueTime() < rhs->DueTime(); } + return lhs->GetId() < rhs->GetId(); + } }; class SdkTimeoutManager { -public: - SdkTimeoutManager(ThreadPool* thread_pool); - ~SdkTimeoutManager(); - - // timeout <= 0 means NEVER timeout - bool PutTask(SdkTask* task, int64_t timeout = 0, - SdkTask::TimeoutFunc timeout_func = NULL); - SdkTask* GetTask(int64_t task_id); - SdkTask* PopTask(int64_t task_id); - - void CheckTimeout(); - void RunTimeoutFunc(SdkTask* sdk_task); - -private: - uint32_t Shard(int64_t task_id); - -private: - const static uint32_t kShardBits = 6; - const static uint32_t kShardNum = (1 << kShardBits); - - typedef std::multiset DueTimeMap; - typedef std::unordered_map IdHashMap; - struct TaskMap { - DueTimeMap due_time_map; - IdHashMap id_hash_map; - }; - - TaskMap map_shard_[kShardNum]; - mutable Mutex mutex_shard_[kShardNum]; - ThreadPool* thread_pool_; - int32_t timeout_precision_; - - mutable Mutex bg_mutex_; - bool stop_; - bool bg_exit_; - CondVar bg_cond_; - int64_t bg_func_id_; - const ThreadPool::Task bg_func_; + public: + SdkTimeoutManager(ThreadPool* thread_pool); + ~SdkTimeoutManager(); + + // timeout <= 0 means NEVER timeout + bool PutTask(SdkTask* task, int64_t timeout = 0, SdkTask::TimeoutFunc timeout_func = NULL); + SdkTask* GetTask(int64_t task_id); + SdkTask* PopTask(int64_t task_id); + + void CheckTimeout(); + void RunTimeoutFunc(SdkTask* sdk_task); + + private: + uint32_t Shard(int64_t task_id); + + private: + const static uint32_t kShardBits = 6; + const static uint32_t kShardNum = (1 << kShardBits); + + typedef std::multiset DueTimeMap; + typedef std::unordered_map IdHashMap; + struct TaskMap { + DueTimeMap due_time_map; + IdHashMap id_hash_map; + }; + + TaskMap map_shard_[kShardNum]; + mutable Mutex mutex_shard_[kShardNum]; + ThreadPool* thread_pool_; + int32_t timeout_precision_; + + mutable Mutex bg_mutex_; + bool stop_; + bool bg_exit_; + CondVar bg_cond_; + int64_t bg_func_id_; + const ThreadPool::Task bg_func_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_SDK_TASK_H_ diff --git a/src/sdk/sdk_utils.cc b/src/sdk/sdk_utils.cc index 98d25fcf2..e7a93eee3 100644 --- a/src/sdk/sdk_utils.cc +++ b/src/sdk/sdk_utils.cc @@ -8,6 +8,7 @@ #include #include +#include #include "common/base/string_ext.h" #include "common/base/string_number.h" @@ -18,6 +19,7 @@ #include "sdk/schema_impl.h" #include "sdk/filter_utils.h" +#include "tera.h" #include "types.h" DECLARE_int64(tera_tablet_write_block_size); @@ -27,1042 +29,1114 @@ DECLARE_int64(tera_master_merge_tablet_size); namespace tera { string LgProp2Str(bool type) { - if (type) { - return "snappy"; - } else { - return "none"; - } + if (type) { + return "snappy"; + } else { + return "none"; + } } string LgProp2Str(StoreMedium type) { - if (type == DiskStore) { - return "disk"; - } else if (type == FlashStore) { - return "flash"; - } else if (type == MemoryStore) { - return "memory"; - } else { - return ""; - } + if (type == DiskStore) { + return "disk"; + } else if (type == FlashStore) { + return "flash"; + } else if (type == MemoryStore) { + return "memory"; + } else { + return ""; + } } string TableProp2Str(RawKey type) { - if (type == Readable) { - return "readable"; - } else if (type == Binary) { - return "binary"; - } else if (type == TTLKv) { - return "ttlkv"; - } else if (type == GeneralKv) { - return "kv"; - } else { - return ""; - } + if (type == Readable) { + return "readable"; + } else if (type == Binary) { + return "binary"; + } else if (type == TTLKv) { + return "ttlkv"; + } else if (type == GeneralKv) { + return "kv"; + } else { + return ""; + } } string Switch2Str(bool enabled) { - if (enabled) { - return "on"; - } else { - return "off"; - } + if (enabled) { + return "on"; + } else { + return "off"; + } } -void ReplaceStringInPlace(std::string& subject, - const std::string& search, +void ReplaceStringInPlace(std::string& subject, const std::string& search, const std::string& replace) { - size_t pos = 0; - while ((pos = subject.find(search, pos)) != std::string::npos) { - subject.replace(pos, search.length(), replace); - pos += replace.length(); - } + size_t pos = 0; + while ((pos = subject.find(search, pos)) != std::string::npos) { + subject.replace(pos, search.length(), replace); + pos += replace.length(); + } } void ShowTableSchema(const TableSchema& s, bool is_x) { - TableSchema schema = s; - std::stringstream ss; - std::string str; - std::string table_alias = schema.name(); - if (!schema.alias().empty()) { - table_alias = schema.alias(); - } - if (schema.has_kv_only() && schema.kv_only() && schema.raw_key() != GeneralKv) { - std::cerr << "caution: old style schema, do not update it if necessary." << std::endl; - schema.set_raw_key(GeneralKv); - } - - if (schema.raw_key() == TTLKv || schema.raw_key() == GeneralKv) { - const LocalityGroupSchema& lg_schema = schema.locality_groups(0); - ss << "\n " << table_alias << " <"; - if (is_x) { - ss << "rawkey=" << TableProp2Str(schema.raw_key()) << ","; - } - ss << "splitsize=" << schema.split_size() << ","; - ss << "mergesize=" << schema.merge_size() << ","; - if (is_x || schema.disable_wal()) { - ss << "wal=" << Switch2Str(!schema.disable_wal()) << ","; - } - if (is_x || lg_schema.store_type() != DiskStore) { - ss << "storage=" << LgProp2Str(lg_schema.store_type()) << ","; - } - if (is_x || lg_schema.block_size() != FLAGS_tera_tablet_write_block_size) { - ss << "blocksize=" << lg_schema.block_size() << ","; - } - if (is_x && schema.admin_group() != "") { - ss << "admin_group=" << schema.admin_group() << ","; - } - if (is_x && schema.admin() != "") { - ss << "admin=" << schema.admin() << ","; - } - ss << "\b>\n" << " (kv mode)\n"; - str = ss.str(); - ReplaceStringInPlace(str, ",\b", ""); - std::cout << str << std::endl; - return; - } + TableSchema schema = s; + std::stringstream ss; + std::string str; + std::string table_alias = schema.name(); + if (!schema.alias().empty()) { + table_alias = schema.alias(); + } + if (schema.has_kv_only() && schema.kv_only() && schema.raw_key() != GeneralKv) { + std::cerr << "caution: old style schema, do not update it if necessary." << std::endl; + schema.set_raw_key(GeneralKv); + } + if (schema.raw_key() == TTLKv || schema.raw_key() == GeneralKv) { + const LocalityGroupSchema& lg_schema = schema.locality_groups(0); ss << "\n " << table_alias << " <"; if (is_x) { - ss << "rawkey=" << TableProp2Str(schema.raw_key()) << ","; + ss << "rawkey=" << TableProp2Str(schema.raw_key()) << ","; } ss << "splitsize=" << schema.split_size() << ","; ss << "mergesize=" << schema.merge_size() << ","; + if (is_x || schema.disable_wal()) { + ss << "wal=" << Switch2Str(!schema.disable_wal()) << ","; + } + if (is_x || lg_schema.store_type() != DiskStore) { + ss << "storage=" << LgProp2Str(lg_schema.store_type()) << ","; + } + if (is_x || lg_schema.block_size() != FLAGS_tera_tablet_write_block_size) { + ss << "blocksize=" << lg_schema.block_size() << ","; + } if (is_x && schema.admin_group() != "") { - ss << "admin_group=" << schema.admin_group() << ","; + ss << "admin_group=" << schema.admin_group() << ","; } if (is_x && schema.admin() != "") { - ss << "admin=" << schema.admin() << ","; - } - if (is_x || schema.disable_wal()) { - ss << "wal=" << Switch2Str(!schema.disable_wal()) << ","; + ss << "admin=" << schema.admin() << ","; } - if (is_x || schema.enable_txn()) { - ss << "txn=" << Switch2Str(schema.enable_txn()) << ","; + if (is_x || schema.enable_hash()) { + ss << "hash=" << Switch2Str(schema.enable_hash()) << ","; } - ss << "\b> {" << std::endl; - - size_t lg_num = schema.locality_groups_size(); - size_t cf_num = schema.column_families_size(); - for (size_t lg_no = 0; lg_no < lg_num; ++lg_no) { - const LocalityGroupSchema& lg_schema = schema.locality_groups(lg_no); - ss << " " << lg_schema.name() << " <"; - ss << "storage=" << LgProp2Str(lg_schema.store_type()) << ","; - if (is_x || lg_schema.block_size() != FLAGS_tera_tablet_write_block_size) { - ss << "blocksize=" << lg_schema.block_size() << ","; - } - if (is_x) { - ss << "sst_size=" << (lg_schema.sst_size() >> 20) << ","; - } - if (lg_schema.use_memtable_on_leveldb()) { - ss << "use_memtable_on_leveldb=true" - << ",memtable_ldb_write_buffer_size=" - << lg_schema.memtable_ldb_write_buffer_size() - << ",memtable_ldb_block_size=" - << lg_schema.memtable_ldb_block_size() << ","; - } - ss << "\b> {" << std::endl; - for (size_t cf_no = 0; cf_no < cf_num; ++cf_no) { - const ColumnFamilySchema& cf_schema = schema.column_families(cf_no); - if (cf_schema.locality_group() != lg_schema.name()) { - continue; - } - ss << " " << cf_schema.name(); - std::stringstream cf_ss; - cf_ss << " <"; - if (is_x || cf_schema.max_versions() != 1) { - cf_ss << "maxversions=" << cf_schema.max_versions() << ","; - } - if (is_x || cf_schema.min_versions() != 1) { - cf_ss << "minversions=" << cf_schema.min_versions() << ","; - } - if (is_x || cf_schema.time_to_live() != 0) { - cf_ss << "ttl=" << cf_schema.time_to_live() << ","; - } - if (is_x || (cf_schema.type() != "bytes" && cf_schema.type() != "")) { - if (cf_schema.type() != "") { - cf_ss << "type=" << cf_schema.type() << ","; - } else { - cf_ss << "type=bytes" << ","; - } - } - if (is_x || (cf_schema.gtxn() != false)) { - cf_ss << "gtxn=" << Switch2Str(cf_schema.gtxn()) << ","; - } - if (is_x || (cf_schema.notify() != false)) { - cf_ss << "notify=" << Switch2Str(cf_schema.notify()) << ","; - } - cf_ss << "\b>"; - if (cf_ss.str().size() > 5) { - ss << cf_ss.str(); - } - ss << "," << std::endl; - } - ss << " }," << std::endl; + if (is_x || schema.bloom_filter_bits_per_key() != 10) { + ss << "bloom_filter_bits_per_key=" << schema.bloom_filter_bits_per_key() << ","; } - ss << " }" << std::endl; + ss << "\b>\n" + << " (kv mode)\n"; str = ss.str(); ReplaceStringInPlace(str, ",\b", ""); std::cout << str << std::endl; + return; + } + + ss << "\n " << table_alias << " <"; + if (is_x) { + ss << "rawkey=" << TableProp2Str(schema.raw_key()) << ","; + } + ss << "splitsize=" << schema.split_size() << ","; + ss << "mergesize=" << schema.merge_size() << ","; + if (is_x && schema.admin_group() != "") { + ss << "admin_group=" << schema.admin_group() << ","; + } + if (is_x && schema.admin() != "") { + ss << "admin=" << schema.admin() << ","; + } + if (is_x || schema.disable_wal()) { + ss << "wal=" << Switch2Str(!schema.disable_wal()) << ","; + } + if (is_x || schema.enable_txn()) { + ss << "txn=" << Switch2Str(schema.enable_txn()) << ","; + } + if (is_x || schema.enable_hash()) { + ss << "hash=" << Switch2Str(schema.enable_hash()) << ","; + } + if (is_x || schema.bloom_filter_bits_per_key() != 10) { + ss << "bloom_filter_bits_per_key=" << schema.bloom_filter_bits_per_key() << ","; + } + ss << "\b> {" << std::endl; + + size_t lg_num = schema.locality_groups_size(); + size_t cf_num = schema.column_families_size(); + for (size_t lg_no = 0; lg_no < lg_num; ++lg_no) { + const LocalityGroupSchema& lg_schema = schema.locality_groups(lg_no); + ss << " " << lg_schema.name() << " <"; + ss << "storage=" << LgProp2Str(lg_schema.store_type()) << ","; + if (is_x || lg_schema.block_size() != FLAGS_tera_tablet_write_block_size) { + ss << "blocksize=" << lg_schema.block_size() << ","; + } + if (is_x) { + ss << "sst_size=" << (lg_schema.sst_size() >> 20) << ","; + } + if (lg_schema.use_memtable_on_leveldb()) { + ss << "use_memtable_on_leveldb=true" + << ",memtable_ldb_write_buffer_size=" << lg_schema.memtable_ldb_write_buffer_size() + << ",memtable_ldb_block_size=" << lg_schema.memtable_ldb_block_size() << ","; + } + ss << "\b> {" << std::endl; + for (size_t cf_no = 0; cf_no < cf_num; ++cf_no) { + const ColumnFamilySchema& cf_schema = schema.column_families(cf_no); + if (cf_schema.locality_group() != lg_schema.name()) { + continue; + } + ss << " " << cf_schema.name(); + std::stringstream cf_ss; + cf_ss << " <"; + if (is_x || cf_schema.max_versions() != 1) { + cf_ss << "maxversions=" << cf_schema.max_versions() << ","; + } + if (is_x || cf_schema.min_versions() != 1) { + cf_ss << "minversions=" << cf_schema.min_versions() << ","; + } + if (is_x || cf_schema.time_to_live() != 0) { + cf_ss << "ttl=" << cf_schema.time_to_live() << ","; + } + if (is_x || (cf_schema.type() != "bytes" && cf_schema.type() != "")) { + if (cf_schema.type() != "") { + cf_ss << "type=" << cf_schema.type() << ","; + } else { + cf_ss << "type=bytes" + << ","; + } + } + if (is_x || (cf_schema.gtxn() != false)) { + cf_ss << "gtxn=" << Switch2Str(cf_schema.gtxn()) << ","; + } + if (is_x || (cf_schema.notify() != false)) { + cf_ss << "notify=" << Switch2Str(cf_schema.notify()) << ","; + } + cf_ss << "\b>"; + if (cf_ss.str().size() > 5) { + ss << cf_ss.str(); + } + ss << "," << std::endl; + } + ss << " }," << std::endl; + } + ss << " }" << std::endl; + str = ss.str(); + ReplaceStringInPlace(str, ",\b", ""); + std::cout << str << std::endl; } void ShowTableMeta(const TableMeta& meta) { - const TableSchema& schema = meta.schema(); - ShowTableSchema(schema); - std::cout << std::endl; + const TableSchema& schema = meta.schema(); + ShowTableSchema(schema); + std::cout << std::endl; } void ShowTableDescriptor(TableDescriptor& table_desc, bool is_x) { - TableSchema schema; - TableDescToSchema(table_desc, &schema); - ShowTableSchema(schema, is_x); + TableSchema schema; + TableDescToSchema(table_desc, &schema); + ShowTableSchema(schema, is_x); } void TableDescToSchema(const TableDescriptor& desc, TableSchema* schema) { - schema->set_name(desc.TableName()); - switch (desc.RawKey()) { - case kBinary: - schema->set_raw_key(Binary); - break; - case kTTLKv: - schema->set_raw_key(TTLKv); - break; - case kGeneralKv: - schema->set_raw_key(GeneralKv); - // compat old code - schema->set_kv_only(true); - break; - default: - schema->set_raw_key(Readable); - break; - } - schema->set_split_size(desc.SplitSize()); - schema->set_merge_size(desc.MergeSize()); - schema->set_admin_group(desc.AdminGroup()); - schema->set_admin(desc.Admin()); - schema->set_disable_wal(desc.IsWalDisabled()); - schema->set_enable_txn(desc.IsTxnEnabled()); - // add lg - int num = desc.LocalityGroupNum(); - for (int i = 0; i < num; ++i) { - LocalityGroupSchema* lg = schema->add_locality_groups(); - const LocalityGroupDescriptor* lgdesc = desc.LocalityGroup(i); - lg->set_block_size(lgdesc->BlockSize()); - lg->set_compress_type(lgdesc->Compress() != kNoneCompress); - lg->set_name(lgdesc->Name()); - // printf("add lg %s\n", lgdesc->Name().c_str()); - switch (lgdesc->Store()) { - case kInMemory: - lg->set_store_type(MemoryStore); - break; - case kInFlash: - lg->set_store_type(FlashStore); - break; - default: - lg->set_store_type(DiskStore); - break; - } - lg->set_use_memtable_on_leveldb(lgdesc->UseMemtableOnLeveldb()); - if (lgdesc->MemtableLdbBlockSize() > 0) { - lg->set_memtable_ldb_write_buffer_size(lgdesc->MemtableLdbWriteBufferSize()); - lg->set_memtable_ldb_block_size(lgdesc->MemtableLdbBlockSize()); - } - lg->set_sst_size(lgdesc->SstSize()); - lg->set_id(lgdesc->Id()); - } - // add cf - int cfnum = desc.ColumnFamilyNum(); - for (int i = 0; i < cfnum; ++i) { - ColumnFamilySchema* cf = schema->add_column_families(); - const ColumnFamilyDescriptor* cf_desc = desc.ColumnFamily(i); - const LocalityGroupDescriptor* lg_desc = - desc.LocalityGroup(cf_desc->LocalityGroup()); - assert(lg_desc); - cf->set_name(cf_desc->Name()); - cf->set_time_to_live(cf_desc->TimeToLive()); - cf->set_locality_group(cf_desc->LocalityGroup()); - cf->set_max_versions(cf_desc->MaxVersions()); - cf->set_min_versions(cf_desc->MinVersions()); - cf->set_type(cf_desc->Type()); - cf->set_gtxn(cf_desc->GlobalTransaction()); - cf->set_notify(cf_desc->IsNotifyEnabled()); - } + schema->set_name(desc.TableName()); + switch (desc.RawKey()) { + case kBinary: + schema->set_raw_key(Binary); + break; + case kTTLKv: + schema->set_raw_key(TTLKv); + break; + case kGeneralKv: + schema->set_raw_key(GeneralKv); + // compat old code + schema->set_kv_only(true); + break; + default: + schema->set_raw_key(Readable); + break; + } + schema->set_split_size(desc.SplitSize()); + schema->set_merge_size(desc.MergeSize()); + schema->set_admin_group(desc.AdminGroup()); + schema->set_admin(desc.Admin()); + schema->set_disable_wal(desc.IsWalDisabled()); + schema->set_enable_txn(desc.IsTxnEnabled()); + schema->set_enable_hash(desc.IsHashEnabled()); + schema->set_bloom_filter_bits_per_key(desc.BloomFilterBitsPerKey()); + // add lg + int num = desc.LocalityGroupNum(); + for (int i = 0; i < num; ++i) { + LocalityGroupSchema* lg = schema->add_locality_groups(); + const LocalityGroupDescriptor* lgdesc = desc.LocalityGroup(i); + lg->set_block_size(lgdesc->BlockSize()); + lg->set_compress_type(lgdesc->Compress() != kNoneCompress); + lg->set_name(lgdesc->Name()); + // printf("add lg %s\n", lgdesc->Name().c_str()); + switch (lgdesc->Store()) { + case kInMemory: + lg->set_store_type(MemoryStore); + break; + case kInFlash: + lg->set_store_type(FlashStore); + break; + default: + lg->set_store_type(DiskStore); + break; + } + lg->set_use_memtable_on_leveldb(lgdesc->UseMemtableOnLeveldb()); + if (lgdesc->MemtableLdbBlockSize() > 0) { + lg->set_memtable_ldb_write_buffer_size(lgdesc->MemtableLdbWriteBufferSize()); + lg->set_memtable_ldb_block_size(lgdesc->MemtableLdbBlockSize()); + } + lg->set_sst_size(lgdesc->SstSize()); + lg->set_id(lgdesc->Id()); + } + // add cf + int cfnum = desc.ColumnFamilyNum(); + for (int i = 0; i < cfnum; ++i) { + ColumnFamilySchema* cf = schema->add_column_families(); + const ColumnFamilyDescriptor* cf_desc = desc.ColumnFamily(i); + const LocalityGroupDescriptor* lg_desc = desc.LocalityGroup(cf_desc->LocalityGroup()); + assert(lg_desc); + cf->set_name(cf_desc->Name()); + cf->set_time_to_live(cf_desc->TimeToLive()); + cf->set_locality_group(cf_desc->LocalityGroup()); + cf->set_max_versions(cf_desc->MaxVersions()); + cf->set_min_versions(cf_desc->MinVersions()); + cf->set_type(cf_desc->Type()); + cf->set_gtxn(cf_desc->GlobalTransaction()); + cf->set_notify(cf_desc->IsNotifyEnabled()); + } } void TableSchemaToDesc(const TableSchema& schema, TableDescriptor* desc) { - switch (schema.raw_key()) { - case Binary: - desc->SetRawKey(kBinary); - break; - case TTLKv: - desc->SetRawKey(kTTLKv); - break; - case GeneralKv: - desc->SetRawKey(kGeneralKv); - break; - default: - desc->SetRawKey(kReadable); - } - // for compatibility - if (schema.has_kv_only() && schema.kv_only() && schema.raw_key() != GeneralKv) { - LOG(WARNING) << "table " << schema.name() - << ": old style schema, do not update it if necessary."; - desc->SetRawKey(kGeneralKv); - } + switch (schema.raw_key()) { + case Binary: + desc->SetRawKey(kBinary); + break; + case TTLKv: + desc->SetRawKey(kTTLKv); + break; + case GeneralKv: + desc->SetRawKey(kGeneralKv); + break; + default: + desc->SetRawKey(kReadable); + } + // for compatibility + if (schema.has_kv_only() && schema.kv_only() && schema.raw_key() != GeneralKv) { + LOG(WARNING) << "table " << schema.name() + << ": old style schema, do not update it if necessary."; + desc->SetRawKey(kGeneralKv); + } - if (schema.has_split_size()) { - desc->SetSplitSize(schema.split_size()); - } - if (schema.has_merge_size()) { - desc->SetMergeSize(schema.merge_size()); - } - if (schema.has_admin_group()) { - desc->SetAdminGroup(schema.admin_group()); - } - if (schema.has_admin()) { - desc->SetAdmin(schema.admin()); - } - if (schema.has_disable_wal() && schema.disable_wal()) { - desc->DisableWal(); - } - if (schema.has_enable_txn() && schema.enable_txn()) { - desc->EnableTxn(); + if (schema.has_split_size()) { + desc->SetSplitSize(schema.split_size()); + } + if (schema.has_merge_size()) { + desc->SetMergeSize(schema.merge_size()); + } + if (schema.has_admin_group()) { + desc->SetAdminGroup(schema.admin_group()); + } + if (schema.has_admin()) { + desc->SetAdmin(schema.admin()); + } + if (schema.has_disable_wal() && schema.disable_wal()) { + desc->DisableWal(); + } + if (schema.has_enable_txn() && schema.enable_txn()) { + desc->EnableTxn(); + } + if (schema.has_enable_hash() && schema.enable_hash()) { + desc->EnableHash(); + } + if (schema.has_bloom_filter_bits_per_key()) { + desc->SetBloomFilterBitsPerKey(schema.bloom_filter_bits_per_key()); + } + int32_t lg_num = schema.locality_groups_size(); + for (int32_t i = 0; i < lg_num; i++) { + const LocalityGroupSchema& lg = schema.locality_groups(i); + LocalityGroupDescriptor* lgd = desc->AddLocalityGroup(lg.name()); + if (lgd == NULL) { + continue; + } + lgd->SetBlockSize(lg.block_size()); + switch (lg.store_type()) { + case MemoryStore: + lgd->SetStore(kInMemory); + break; + case FlashStore: + lgd->SetStore(kInFlash); + break; + default: + lgd->SetStore(kInDisk); + break; + } + lgd->SetCompress(lg.compress_type() ? kSnappyCompress : kNoneCompress); + lgd->SetUseBloomfilter(lg.use_bloom_filter()); + lgd->SetUseMemtableOnLeveldb(lg.use_memtable_on_leveldb()); + lgd->SetMemtableLdbWriteBufferSize(lg.memtable_ldb_write_buffer_size()); + lgd->SetMemtableLdbBlockSize(lg.memtable_ldb_block_size()); + lgd->SetSstSize(lg.sst_size()); + } + int32_t cf_num = schema.column_families_size(); + for (int32_t i = 0; i < cf_num; i++) { + const ColumnFamilySchema& cf = schema.column_families(i); + ColumnFamilyDescriptor* cfd = desc->AddColumnFamily(cf.name(), cf.locality_group()); + if (cfd == NULL) { + continue; + } + cfd->SetDiskQuota(cf.disk_quota()); + cfd->SetMaxVersions(cf.max_versions()); + cfd->SetMinVersions(cf.min_versions()); + cfd->SetTimeToLive(cf.time_to_live()); + cfd->SetType(cf.type()); + if (cf.gtxn()) { + cfd->EnableGlobalTransaction(); + } else { + cfd->DisableGlobalTransaction(); } - int32_t lg_num = schema.locality_groups_size(); - for (int32_t i = 0; i < lg_num; i++) { - const LocalityGroupSchema& lg = schema.locality_groups(i); - LocalityGroupDescriptor* lgd = desc->AddLocalityGroup(lg.name()); - if (lgd == NULL) { - continue; - } - lgd->SetBlockSize(lg.block_size()); - switch (lg.store_type()) { - case MemoryStore: - lgd->SetStore(kInMemory); - break; - case FlashStore: - lgd->SetStore(kInFlash); - break; - default: - lgd->SetStore(kInDisk); - break; - } - lgd->SetCompress(lg.compress_type() ? kSnappyCompress : kNoneCompress); - lgd->SetUseBloomfilter(lg.use_bloom_filter()); - lgd->SetUseMemtableOnLeveldb(lg.use_memtable_on_leveldb()); - lgd->SetMemtableLdbWriteBufferSize(lg.memtable_ldb_write_buffer_size()); - lgd->SetMemtableLdbBlockSize(lg.memtable_ldb_block_size()); - lgd->SetSstSize(lg.sst_size()); - } - int32_t cf_num = schema.column_families_size(); - for (int32_t i = 0; i < cf_num; i++) { - const ColumnFamilySchema& cf = schema.column_families(i); - ColumnFamilyDescriptor* cfd = - desc->AddColumnFamily(cf.name(), cf.locality_group()); - if (cfd == NULL) { - continue; - } - cfd->SetDiskQuota(cf.disk_quota()); - cfd->SetMaxVersions(cf.max_versions()); - cfd->SetMinVersions(cf.min_versions()); - cfd->SetTimeToLive(cf.time_to_live()); - cfd->SetType(cf.type()); - if (cf.gtxn()) { - cfd->EnableGlobalTransaction(); - } else { - cfd->DisableGlobalTransaction(); - } - if (cf.notify()) { - cfd->EnableNotify(); - } else { - cfd->DisableNotify(); - } + if (cf.notify()) { + cfd->EnableNotify(); + } else { + cfd->DisableNotify(); } + } } -bool SetCfProperties(const string& name, const string& value, - ColumnFamilyDescriptor* desc) { - if (desc == NULL) { - return false; +bool SetCfProperties(const string& name, const string& value, ColumnFamilyDescriptor* desc) { + if (desc == NULL) { + return false; + } + if (name == "ttl") { + int32_t ttl; + if (!StringToNumber(value, &ttl) || (ttl < 0)) { + return false; + } + desc->SetTimeToLive(ttl); + } else if (name == "maxversions") { + int32_t versions; + if (!StringToNumber(value, &versions) || (versions <= 0)) { + return false; + } + desc->SetMaxVersions(versions); + } else if (name == "minversions") { + int32_t versions; + if (!StringToNumber(value, &versions) || (versions <= 0)) { + return false; + } + desc->SetMinVersions(versions); + } else if (name == "diskquota") { + int64_t quota; + if (!StringToNumber(value, "a) || (quota <= 0)) { + return false; + } + desc->SetDiskQuota(quota); + } else if (name == "type") { + if (value != "bytes") { + return false; + } + desc->SetType(value); + } else if (name == "gtxn") { + if (value == "on") { + desc->EnableGlobalTransaction(); + } else if (value == "off") { + desc->DisableGlobalTransaction(); + } else { + return false; } - if (name == "ttl") { - int32_t ttl; - if (!StringToNumber(value, &ttl) || (ttl < 0)) { - return false; - } - desc->SetTimeToLive(ttl); - } else if (name == "maxversions") { - int32_t versions; - if (!StringToNumber(value, &versions) || (versions <= 0)) { - return false; - } - desc->SetMaxVersions(versions); - } else if (name == "minversions") { - int32_t versions; - if (!StringToNumber(value, &versions) || (versions <= 0)) { - return false; - } - desc->SetMinVersions(versions); - } else if (name == "diskquota") { - int64_t quota; - if (!StringToNumber(value, "a) || (quota <= 0)) { - return false; - } - desc->SetDiskQuota(quota); - } else if (name == "type") { - if (value != "bytes") { - return false; - } - desc->SetType(value); - } else if (name == "gtxn") { - if (value == "on") { - desc->EnableGlobalTransaction(); - } else if (value == "off") { - desc->DisableGlobalTransaction(); - } else { - return false; - } - } else if (name == "notify") { - if (value == "on") { - desc->EnableNotify(); - } else if (value == "off") { - desc->DisableNotify(); - } else { - return false; - } - }else { - return false; + } else if (name == "notify") { + if (value == "on") { + desc->EnableNotify(); + } else if (value == "off") { + desc->DisableNotify(); + } else { + return false; } - return true; + } else { + return false; + } + return true; } -bool SetLgProperties(const string& name, const string& value, - LocalityGroupDescriptor* desc) { - if (desc == NULL) { - return false; - } - if (name == "compress") { - if (value == "none") { - desc->SetCompress(kNoneCompress); - } else if (value == "snappy") { - desc->SetCompress(kSnappyCompress); - } else { - return false; - } - } else if (name == "storage") { - if (value == "disk") { - desc->SetStore(kInDisk); - } else if (value == "flash") { - desc->SetStore(kInFlash); - } else if (value == "memory") { - desc->SetStore(kInMemory); - } else { - return false; - } - } else if (name == "blocksize") { - int blocksize; - if (!StringToNumber(value, &blocksize) || (blocksize <= 0)){ - return false; - } - desc->SetBlockSize(blocksize); - } else if (name == "use_memtable_on_leveldb") { - if (value == "true") { - desc->SetUseMemtableOnLeveldb(true); - } else if (value == "false") { - desc->SetUseMemtableOnLeveldb(false); - } else { - return false; - } - } else if (name == "memtable_ldb_write_buffer_size") { - int32_t buffer_size; //KB - if (!StringToNumber(value, &buffer_size) || (buffer_size <= 0)) { - return false; - } - desc->SetMemtableLdbWriteBufferSize(buffer_size); - } else if (name == "memtable_ldb_block_size") { - int32_t block_size; //KB - if (!StringToNumber(value, &block_size) || (block_size <= 0)) { - return false; - } - desc->SetMemtableLdbBlockSize(block_size); - } else if (name == "sst_size") { - const int32_t SST_SIZE_MAX = 1024; // MB - int32_t sst_size; - if (!StringToNumber(value, &sst_size) || (sst_size <= 0) || (sst_size > SST_SIZE_MAX) ) { - return false; - } - desc->SetSstSize(sst_size<<20); // display in MB, store in Bytes. +bool SetLgProperties(const string& name, const string& value, LocalityGroupDescriptor* desc) { + if (desc == NULL) { + return false; + } + if (name == "compress") { + if (value == "none") { + desc->SetCompress(kNoneCompress); + } else if (value == "snappy") { + desc->SetCompress(kSnappyCompress); } else { - return false; - } - return true; + return false; + } + } else if (name == "storage") { + if (value == "disk") { + desc->SetStore(kInDisk); + } else if (value == "flash") { + desc->SetStore(kInFlash); + } else if (value == "memory") { + desc->SetStore(kInMemory); + } else { + return false; + } + } else if (name == "blocksize") { + int blocksize; + if (!StringToNumber(value, &blocksize) || (blocksize <= 0)) { + return false; + } + desc->SetBlockSize(blocksize); + } else if (name == "use_memtable_on_leveldb") { + if (value == "true") { + desc->SetUseMemtableOnLeveldb(true); + } else if (value == "false") { + desc->SetUseMemtableOnLeveldb(false); + } else { + return false; + } + } else if (name == "memtable_ldb_write_buffer_size") { + int32_t buffer_size; // KB + if (!StringToNumber(value, &buffer_size) || (buffer_size <= 0)) { + return false; + } + desc->SetMemtableLdbWriteBufferSize(buffer_size); + } else if (name == "memtable_ldb_block_size") { + int32_t block_size; // KB + if (!StringToNumber(value, &block_size) || (block_size <= 0)) { + return false; + } + desc->SetMemtableLdbBlockSize(block_size); + } else if (name == "sst_size") { + const int32_t SST_SIZE_MAX = 1024; // MB + int32_t sst_size; + if (!StringToNumber(value, &sst_size) || (sst_size <= 0) || (sst_size > SST_SIZE_MAX)) { + return false; + } + desc->SetSstSize(sst_size << 20); // display in MB, store in Bytes. + } else { + return false; + } + return true; } -bool SetTableProperties(const string& name, const string& value, - TableDescriptor* desc) { - if (desc == NULL) { - return false; +bool SetTableProperties(const string& name, const string& value, TableDescriptor* desc) { + if (desc == NULL) { + return false; + } + if (name == "rawkey") { + if (value == "readable") { + desc->SetRawKey(kReadable); + } else if (value == "binary") { + desc->SetRawKey(kBinary); + } else if (value == "ttlkv") { + desc->SetRawKey(kTTLKv); + } else if (value == "kv") { + desc->SetRawKey(kGeneralKv); + } else { + return false; + } + } else if (name == "splitsize") { + int splitsize; // MB + if (!StringToNumber(value, &splitsize) || (splitsize < 0)) { + return false; + } + desc->SetSplitSize(splitsize); + } else if (name == "mergesize") { + int mergesize; // MB + if (!StringToNumber(value, &mergesize) || (mergesize < 0)) { // mergesize == 0 : merge closed + return false; + } + desc->SetMergeSize(mergesize); + } else if (name == "admin_group") { + if (!IsValidGroupName(value)) { + return false; + } + desc->SetAdminGroup(value); + } else if (name == "admin") { + if (!IsValidUserName(value)) { + return false; + } + desc->SetAdmin(value); + } else if (name == "wal") { + if (value == "on") { + // do nothing + } else if (value == "off") { + desc->DisableWal(); + } else { + return false; } - if (name == "rawkey") { - if (value == "readable") { - desc->SetRawKey(kReadable); - } else if (value == "binary") { - desc->SetRawKey(kBinary); - } else if (value == "ttlkv") { - desc->SetRawKey(kTTLKv); - } else if (value == "kv") { - desc->SetRawKey(kGeneralKv); - } else { - return false; - } - } else if (name == "splitsize") { - int splitsize; // MB - if (!StringToNumber(value, &splitsize) || (splitsize < 0)) { - return false; - } - desc->SetSplitSize(splitsize); - } else if (name == "mergesize") { - int mergesize; // MB - if (!StringToNumber(value, &mergesize) || (mergesize < 0)) { // mergesize == 0 : merge closed - return false; - } - desc->SetMergeSize(mergesize); - } else if (name == "admin_group"){ - if (!IsValidGroupName(value)) { - return false; - } - desc->SetAdminGroup(value); - } else if (name == "admin") { - if (!IsValidUserName(value)) { - return false; - } - desc->SetAdmin(value); - } else if (name == "wal") { - if (value == "on") { - // do nothing - } else if (value == "off") { - desc->DisableWal(); - } else { - return false; - } - } else if (name == "txn") { - if (value == "on") { - desc->EnableTxn(); - } else if (value == "off") { - // do nothing - } else { - return false; - } + } else if (name == "txn") { + if (value == "on") { + desc->EnableTxn(); + } else if (value == "off") { + // do nothing } else { - return false; + return false; } - return true; + } else if (name == "hash") { + if (value == "on") { + desc->EnableHash(); + } else if (value == "off") { + // do nothing + } else { + return false; + } + } else if (name == "bloom_filter_bits_per_key") { + uint32_t bloom_filter_bits_per_key; + if (!StringToNumber(value, &bloom_filter_bits_per_key)) { + return false; + } + desc->SetBloomFilterBitsPerKey(bloom_filter_bits_per_key); + } else { + return false; + } + return true; } bool CheckTableDescrptor(const TableDescriptor& desc, ErrorCode* err) { - std::stringstream ss; - if (desc.SplitSize() < desc.MergeSize() * 3) { - ss << "splitsize should be 3 times larger than mergesize" - << ", splitsize: " << desc.SplitSize() - << ", mergesize: " << desc.MergeSize(); - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return false; - } - if (!IsValidTableName(desc.TableName())) { - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, " invalid tablename "); - } - return false; + std::stringstream ss; + if (desc.SplitSize() < desc.MergeSize() * 3) { + ss << "splitsize should be 3 times larger than mergesize" + << ", splitsize: " << desc.SplitSize() << ", mergesize: " << desc.MergeSize(); + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); } - for (int32_t i = 0; i < desc.ColumnFamilyNum(); ++i) { - if (!IsValidColumnFamilyName(desc.ColumnFamily(i)->Name())) { - ss << " invalid columnfamily name:" << desc.ColumnFamily(i)->Name(); - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return false; - } - if (!desc.IsTxnEnabled() && desc.ColumnFamily(i)->GlobalTransaction() == true) { - ss << " columnfamily property: gtxn is valid only when table set 'txn=on') "; - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return false; - } - } - if (desc.IsTxnEnabled() && (desc.RawKey() == kGeneralKv || desc.RawKey() == kTTLKv)) { - ss << "kv and ttlkv don't support txn"; - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return false; + return false; + } + if (!IsValidTableName(desc.TableName())) { + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, " invalid tablename "); } - if ((desc.RawKey() == kReadable || desc.RawKey() == kBinary)) { - if (desc.ColumnFamilyNum() == 0) { - ss << "kBinary/kReadable MUST have cf"; - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return false; - } + return false; + } + for (int32_t i = 0; i < desc.ColumnFamilyNum(); ++i) { + if (!IsValidColumnFamilyName(desc.ColumnFamily(i)->Name())) { + ss << " invalid columnfamily name:" << desc.ColumnFamily(i)->Name(); + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + return false; + } + if (!desc.IsTxnEnabled() && desc.ColumnFamily(i)->GlobalTransaction() == true) { + ss << " columnfamily property: gtxn is valid only when table set " + "'txn=on') "; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + return false; + } + if (desc.IsTxnEnabled() && desc.ColumnFamily(i)->TimeToLive() != 0) { + ss << " columnfamily property: ttl must be set 0 when table set " + "'txn=on') "; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + } + } + if (desc.IsTxnEnabled() && (desc.RawKey() == kGeneralKv || desc.RawKey() == kTTLKv)) { + ss << "kv and ttlkv don't support txn"; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); } - return true; + return false; + } + if ((desc.RawKey() == kReadable || desc.RawKey() == kBinary)) { + if (desc.ColumnFamilyNum() == 0) { + ss << "kBinary/kReadable MUST have cf"; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + return false; + } + } + return true; } bool UpdateCfProperties(const PropTree::Node* table_node, TableDescriptor* table_desc) { - if (table_node == NULL || table_desc == NULL) { - return false; - } - for (size_t i = 0; i < table_node->children_.size(); ++i) { - PropTree::Node* lg_node = table_node->children_[i]; - LocalityGroupDescriptor* lg_desc; - lg_desc = const_cast - (table_desc->LocalityGroup(lg_node->name_)); - if (lg_desc == NULL) { - LOG(ERROR) << "[update] fail to get locality group: " << lg_node->name_; + if (table_node == NULL || table_desc == NULL) { + return false; + } + for (size_t i = 0; i < table_node->children_.size(); ++i) { + PropTree::Node* lg_node = table_node->children_[i]; + LocalityGroupDescriptor* lg_desc; + lg_desc = const_cast(table_desc->LocalityGroup(lg_node->name_)); + if (lg_desc == NULL) { + LOG(ERROR) << "[update] fail to get locality group: " << lg_node->name_; + return false; + } + // add all column families and properties + for (size_t j = 0; j < lg_node->children_.size(); ++j) { + PropTree::Node* cf_node = lg_node->children_[j]; + ColumnFamilyDescriptor* cf_desc; + cf_desc = const_cast(table_desc->ColumnFamily(cf_node->name_)); + for (std::map::iterator it = cf_node->properties_.begin(); + it != cf_node->properties_.end(); ++it) { + if ((cf_desc == NULL) && (it->first == "op") && (it->second == "add")) { + cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); + if (cf_desc == NULL) { + LOG(ERROR) << "[update] fail to add column family"; return false; - } - // add all column families and properties - for (size_t j = 0; j < lg_node->children_.size(); ++j) { - PropTree::Node* cf_node = lg_node->children_[j]; - ColumnFamilyDescriptor* cf_desc; - cf_desc = const_cast - (table_desc->ColumnFamily(cf_node->name_)); - for (std::map::iterator it = cf_node->properties_.begin(); - it != cf_node->properties_.end(); ++it) { - if ((cf_desc == NULL) && (it->first == "op") && (it->second == "add")) { - cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); - if(cf_desc == NULL) { - LOG(ERROR) << "[update] fail to add column family"; - return false; - } - LOG(INFO) << "[update] add cf: " << cf_node->name_; - continue; - } else if ((it->first == "op") && (it->second == "del")) { - // del cf - table_desc->RemoveColumnFamily(cf_node->name_); - LOG(INFO) << "[update] try to del cf: " << cf_node->name_; - continue; - } - if (!SetCfProperties(it->first, it->second, cf_desc)) { - LOG(ERROR) << "[update] illegal value: " << it->second - << " for cf property: " << it->first; - return false; - } - } - } - } - return true; + } + LOG(INFO) << "[update] add cf: " << cf_node->name_; + continue; + } else if ((it->first == "op") && (it->second == "del")) { + // del cf + table_desc->RemoveColumnFamily(cf_node->name_); + LOG(INFO) << "[update] try to del cf: " << cf_node->name_; + continue; + } + if (!SetCfProperties(it->first, it->second, cf_desc)) { + LOG(ERROR) << "[update] illegal value: " << it->second + << " for cf property: " << it->first; + return false; + } + } + } + } + return true; } bool UpdateLgProperties(const PropTree::Node* table_node, TableDescriptor* table_desc) { - if (table_node == NULL || table_desc == NULL) { + if (table_node == NULL || table_desc == NULL) { + return false; + } + for (size_t i = 0; i < table_node->children_.size(); ++i) { + PropTree::Node* lg_node = table_node->children_[i]; + LocalityGroupDescriptor* lg_desc; + lg_desc = const_cast(table_desc->LocalityGroup(lg_node->name_)); + if (lg_desc == NULL) { + LOG(ERROR) << "[update] fail to get locality group: " << lg_node->name_; + return false; + } + // set locality group properties + for (std::map::iterator it_lg = lg_node->properties_.begin(); + it_lg != lg_node->properties_.end(); ++it_lg) { + if (!SetLgProperties(it_lg->first, it_lg->second, lg_desc)) { + LOG(ERROR) << "[update] illegal value: " << it_lg->second + << " for lg property: " << it_lg->first; return false; + } } - for (size_t i = 0; i < table_node->children_.size(); ++i) { - PropTree::Node* lg_node = table_node->children_[i]; - LocalityGroupDescriptor* lg_desc; - lg_desc = const_cast - (table_desc->LocalityGroup(lg_node->name_)); - if (lg_desc == NULL) { - LOG(ERROR) << "[update] fail to get locality group: " << lg_node->name_; - return false; - } - // set locality group properties - for (std::map::iterator it_lg = lg_node->properties_.begin(); - it_lg != lg_node->properties_.end(); ++it_lg) { - if (!SetLgProperties(it_lg->first, it_lg->second, lg_desc)) { - LOG(ERROR) << "[update] illegal value: " << it_lg->second - << " for lg property: " << it_lg->first; - return false; - } - } - } - return true; + } + return true; } bool UpdateTableProperties(const PropTree::Node* table_node, TableDescriptor* table_desc) { - if (table_node == NULL || table_desc == NULL) { - return false; - } - for (std::map::const_iterator i = table_node->properties_.begin(); - i != table_node->properties_.end(); ++i) { - if (i->first == "rawkey") { - LOG(ERROR) << "[update] can't reset rawkey!"; - return false; - } - if (!SetTableProperties(i->first, i->second, table_desc)) { - LOG(ERROR) << "[update] illegal value: " << i->second - << " for table property: " << i->first; - return false; - } - } - return true; + if (table_node == NULL || table_desc == NULL) { + return false; + } + for (std::map::const_iterator i = table_node->properties_.begin(); + i != table_node->properties_.end(); ++i) { + if (i->first == "rawkey") { + LOG(ERROR) << "[update] can't reset rawkey!"; + return false; + } + if (i->first == "hash") { + LOG(ERROR) << "[update] can't reset hash options!"; + return false; + } + if (!SetTableProperties(i->first, i->second, table_desc)) { + LOG(ERROR) << "[update] illegal value: " << i->second << " for table property: " << i->first; + return false; + } + } + return true; } bool UpdateKvTableProperties(const PropTree::Node* table_node, TableDescriptor* table_desc) { - if (table_node == NULL || table_desc == NULL) { - return false; - } - LocalityGroupDescriptor* lg_desc = - const_cast(table_desc->LocalityGroup("kv")); - if (lg_desc == NULL) { - LOG(ERROR) << "[update][WARNING] can not get locality group: kv(kv table)"; + if (table_node == NULL || table_desc == NULL) { + return false; + } + LocalityGroupDescriptor* lg_desc = + const_cast(table_desc->LocalityGroup("kv")); + if (lg_desc == NULL) { + LOG(ERROR) << "[update][WARNING] can not get locality group: kv(kv table)"; - // maybe this is a old kv table, it's LocalityGroup name is TableDescImpl::DEFAULT_LG_NAME - lg_desc = - const_cast(table_desc->LocalityGroup(TableDescImpl::DEFAULT_LG_NAME)); - if (lg_desc == NULL) { - LOG(ERROR) << "[update] fail to get locality group: " << TableDescImpl::DEFAULT_LG_NAME; - return false; - } else { - LOG(ERROR) << "[update][WARNING] it seems this is a old-style kv table"; - } - } - for (std::map::const_iterator i = table_node->properties_.begin(); - i != table_node->properties_.end(); ++i) { - if (i->first == "rawkey") { - LOG(ERROR) << "[update] can't reset rawkey!"; - return false; - } - if (SetLgProperties(i->first, i->second, lg_desc)) { - // do nothing - } else if (!SetTableProperties(i->first, i->second, table_desc)) { - LOG(ERROR) << "[update] illegal value: " << i->second - << " for table property: " << i->first; - return false; - } - } - return true; + // maybe this is a old kv table, it's LocalityGroup name is + // TableDescImpl::DEFAULT_LG_NAME + lg_desc = const_cast( + table_desc->LocalityGroup(TableDescImpl::DEFAULT_LG_NAME)); + if (lg_desc == NULL) { + LOG(ERROR) << "[update] fail to get locality group: " << TableDescImpl::DEFAULT_LG_NAME; + return false; + } else { + LOG(ERROR) << "[update][WARNING] it seems this is a old-style kv table"; + } + } + for (std::map::const_iterator i = table_node->properties_.begin(); + i != table_node->properties_.end(); ++i) { + if (i->first == "hash") { + LOG(ERROR) << "[update] can't reset hash options!"; + return false; + } + if (i->first == "rawkey") { + LOG(ERROR) << "[update] can't reset rawkey!"; + return false; + } + if (SetLgProperties(i->first, i->second, lg_desc)) { + // do nothing + } else if (!SetTableProperties(i->first, i->second, table_desc)) { + LOG(ERROR) << "[update] illegal value: " << i->second << " for table property: " << i->first; + return false; + } + } + return true; } bool UpdateTableDescriptor(PropTree& schema_tree, TableDescriptor* table_desc, ErrorCode* err) { - PropTree::Node* table_node = schema_tree.GetRootNode(); - if (table_node == NULL || table_desc == NULL) { - return false; - } - bool is_ok = false; - if (table_desc->RawKey() == kTTLKv || table_desc->RawKey() == kGeneralKv) { - if (schema_tree.MaxDepth() != 1) { - LOG(ERROR) << "invalid schema for kv table: " << table_node->name_; - return false; - } - is_ok = UpdateKvTableProperties(table_node, table_desc); - } else if (schema_tree.MaxDepth() == 1) { - // updates table properties, no updates for lg & cf properties - is_ok = UpdateTableProperties(table_node, table_desc); - } else if (schema_tree.MaxDepth() == 2) { - is_ok = UpdateLgProperties(table_node, table_desc) - && UpdateTableProperties(table_node, table_desc); - } else if (schema_tree.MaxDepth() == 3) { - is_ok = UpdateCfProperties(table_node, table_desc) - && UpdateLgProperties(table_node, table_desc) - && UpdateTableProperties(table_node, table_desc); - } else { - LOG(ERROR) << "invalid schema"; - return false; - } - if (is_ok) { - return CheckTableDescrptor(*table_desc, err); - } + PropTree::Node* table_node = schema_tree.GetRootNode(); + if (table_node == NULL || table_desc == NULL) { return false; + } + bool is_ok = false; + if (table_desc->RawKey() == kTTLKv || table_desc->RawKey() == kGeneralKv) { + if (schema_tree.MaxDepth() != 1) { + LOG(ERROR) << "invalid schema for kv table: " << table_node->name_; + return false; + } + is_ok = UpdateKvTableProperties(table_node, table_desc); + } else if (schema_tree.MaxDepth() == 1) { + // updates table properties, no updates for lg & cf properties + is_ok = UpdateTableProperties(table_node, table_desc); + } else if (schema_tree.MaxDepth() == 2) { + is_ok = + UpdateLgProperties(table_node, table_desc) && UpdateTableProperties(table_node, table_desc); + } else if (schema_tree.MaxDepth() == 3) { + is_ok = UpdateCfProperties(table_node, table_desc) && + UpdateLgProperties(table_node, table_desc) && + UpdateTableProperties(table_node, table_desc); + } else { + LOG(ERROR) << "invalid schema"; + return false; + } + if (is_ok) { + return CheckTableDescrptor(*table_desc, err); + } + return false; } bool FillTableDescriptor(PropTree& schema_tree, TableDescriptor* table_desc) { - PropTree::Node* table_node = schema_tree.GetRootNode(); - if (table_desc->TableName() != "" && - table_desc->TableName() != table_node->name_) { - LOG(ERROR) << "table name error: " << table_desc->TableName() - << ":" << table_node->name_; - return false; + PropTree::Node* table_node = schema_tree.GetRootNode(); + if (table_desc->TableName() != "" && table_desc->TableName() != table_node->name_) { + LOG(ERROR) << "table name error: " << table_desc->TableName() << ":" << table_node->name_; + return false; + } + table_desc->SetTableName(schema_tree.GetRootNode()->name_); + table_desc->SetRawKey(kBinary); + if (schema_tree.MaxDepth() != schema_tree.MinDepth() || schema_tree.MaxDepth() == 0 || + schema_tree.MaxDepth() > 3) { + LOG(ERROR) << "schema error: " << schema_tree.FormatString(); + return false; + } + + if (schema_tree.MaxDepth() == 1) { + // kv mode, only have 1 locality group + // e.g. table1 + table_desc->SetRawKey(kTTLKv); + LocalityGroupDescriptor* lg_desc; + lg_desc = table_desc->AddLocalityGroup("kv"); + if (lg_desc == NULL) { + LOG(ERROR) << "fail to add locality group: kv"; + return false; } - table_desc->SetTableName(schema_tree.GetRootNode()->name_); - table_desc->SetRawKey(kBinary); - if (schema_tree.MaxDepth() != schema_tree.MinDepth() || - schema_tree.MaxDepth() == 0 || schema_tree.MaxDepth() > 3) { - LOG(ERROR) << "schema error: " << schema_tree.FormatString(); + for (std::map::iterator i = table_node->properties_.begin(); + i != table_node->properties_.end(); ++i) { + if (!SetTableProperties(i->first, i->second, table_desc) && + !SetLgProperties(i->first, i->second, lg_desc)) { + LOG(ERROR) << "illegal value: " << i->second << " for table property: " << i->first; return false; + } } - - if (schema_tree.MaxDepth() == 1) { - // kv mode, only have 1 locality group - // e.g. table1 - table_desc->SetRawKey(kTTLKv); - LocalityGroupDescriptor* lg_desc; - lg_desc = table_desc->AddLocalityGroup("kv"); - if (lg_desc == NULL) { - LOG(ERROR) << "fail to add locality group: kv"; - return false; - } - for (std::map::iterator i = table_node->properties_.begin(); - i != table_node->properties_.end(); ++i) { - if (!SetTableProperties(i->first, i->second, table_desc) && - !SetLgProperties(i->first, i->second, lg_desc)) { - LOG(ERROR) << "illegal value: " << i->second - << " for table property: " << i->first; - return false; - } + } else if (schema_tree.MaxDepth() == 2) { + // simple table mode, have 1 default lg + // e.g. table1{cf1, cf2, cf3} + LocalityGroupDescriptor* lg_desc; + lg_desc = table_desc->AddLocalityGroup(TableDescImpl::DEFAULT_LG_NAME); + if (lg_desc == NULL) { + LOG(ERROR) << "fail to add locality group: " << TableDescImpl::DEFAULT_LG_NAME; + return false; + } + // add all column families and properties + for (size_t i = 0; i < table_node->children_.size(); ++i) { + PropTree::Node* cf_node = table_node->children_[i]; + ColumnFamilyDescriptor* cf_desc; + cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); + if (cf_desc == NULL) { + LOG(ERROR) << "fail to add column family: " << cf_node->name_; + return false; + } + for (std::map::iterator it = cf_node->properties_.begin(); + it != cf_node->properties_.end(); ++it) { + if (!SetCfProperties(it->first, it->second, cf_desc)) { + LOG(ERROR) << "illegal value: " << it->second << " for cf property: " << it->first; + return false; } - } else if (schema_tree.MaxDepth() == 2) { - // simple table mode, have 1 default lg - // e.g. table1{cf1, cf2, cf3} - LocalityGroupDescriptor* lg_desc; - lg_desc = table_desc->AddLocalityGroup(TableDescImpl::DEFAULT_LG_NAME); - if (lg_desc == NULL) { - LOG(ERROR) << "fail to add locality group: " << TableDescImpl::DEFAULT_LG_NAME; + } + } + // set table properties + for (std::map::iterator i = table_node->properties_.begin(); + i != table_node->properties_.end(); ++i) { + if (!SetTableProperties(i->first, i->second, table_desc)) { + LOG(ERROR) << "illegal value: " << i->second << " for table property: " << i->first; + return false; + } + } + // extend notify locality group and _N_ columnfamily + return ExtendNotifyLgToDescriptor(table_desc); + } else if (schema_tree.MaxDepth() == 3) { + // full mode, all elements are user-defined + // e.g. table1{ + // lg0{ + // cf1, + // cf2 + // }, + // lg1{cf3} + // } + for (size_t i = 0; i < table_node->children_.size(); ++i) { + PropTree::Node* lg_node = table_node->children_[i]; + LocalityGroupDescriptor* lg_desc; + lg_desc = table_desc->AddLocalityGroup(lg_node->name_); + if (lg_desc == NULL) { + LOG(ERROR) << "fail to add locality group: " << lg_node->name_; + return false; + } + // add all column families and properties + for (size_t j = 0; j < lg_node->children_.size(); ++j) { + PropTree::Node* cf_node = lg_node->children_[j]; + ColumnFamilyDescriptor* cf_desc; + cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); + if (cf_desc == NULL) { + LOG(ERROR) << "fail to add column family: " << cf_node->name_; + return false; + } + for (std::map::iterator it = cf_node->properties_.begin(); + it != cf_node->properties_.end(); ++it) { + if (!SetCfProperties(it->first, it->second, cf_desc)) { + LOG(ERROR) << "illegal value: " << it->second << " for cf property: " << it->first; return false; + } } - // add all column families and properties - for (size_t i = 0; i < table_node->children_.size(); ++i) { - PropTree::Node* cf_node = table_node->children_[i]; - ColumnFamilyDescriptor* cf_desc; - cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); - if (cf_desc == NULL) { - LOG(ERROR) << "fail to add column family: " << cf_node->name_; - return false; - } - for (std::map::iterator it = cf_node->properties_.begin(); - it != cf_node->properties_.end(); ++it) { - if (!SetCfProperties(it->first, it->second, cf_desc)) { - LOG(ERROR) << "illegal value: " << it->second - << " for cf property: " << it->first; - return false; - } - } - } - // set table properties - for (std::map::iterator i = table_node->properties_.begin(); - i != table_node->properties_.end(); ++i) { - if (!SetTableProperties(i->first, i->second, table_desc)) { - LOG(ERROR) << "illegal value: " << i->second - << " for table property: " << i->first; - return false; - } - } - // extend notify locality group and _N_ columnfamily - return ExtendNotifyLgToDescriptor(table_desc); - } else if (schema_tree.MaxDepth() == 3) { - // full mode, all elements are user-defined - // e.g. table1{ - // lg0{ - // cf1, - // cf2 - // }, - // lg1{cf3} - // } - for (size_t i = 0; i < table_node->children_.size(); ++i) { - PropTree::Node* lg_node = table_node->children_[i]; - LocalityGroupDescriptor* lg_desc; - lg_desc = table_desc->AddLocalityGroup(lg_node->name_); - if (lg_desc == NULL) { - LOG(ERROR) << "fail to add locality group: " << lg_node->name_; - return false; - } - // add all column families and properties - for (size_t j = 0; j < lg_node->children_.size(); ++j) { - PropTree::Node* cf_node = lg_node->children_[j]; - ColumnFamilyDescriptor* cf_desc; - cf_desc = table_desc->AddColumnFamily(cf_node->name_, lg_desc->Name()); - if (cf_desc == NULL) { - LOG(ERROR) << "fail to add column family: " << cf_node->name_; - return false; - } - for (std::map::iterator it = cf_node->properties_.begin(); - it != cf_node->properties_.end(); ++it) { - if (!SetCfProperties(it->first, it->second, cf_desc)) { - LOG(ERROR) << "illegal value: " << it->second - << " for cf property: " << it->first; - return false; - } - } - } - // set locality group properties - for (std::map::iterator it_lg = lg_node->properties_.begin(); - it_lg != lg_node->properties_.end(); ++it_lg) { - if (!SetLgProperties(it_lg->first, it_lg->second, lg_desc)) { - LOG(ERROR) << "illegal value: " << it_lg->second - << " for lg property: " << it_lg->first; - return false; - } - } - } - // set table properties - for (std::map::iterator i = table_node->properties_.begin(); - i != table_node->properties_.end(); ++i) { - if (!SetTableProperties(i->first, i->second, table_desc)) { - LOG(ERROR) << "illegal value: " << i->second - << " for table property: " << i->first; - return false; - } + } + // set locality group properties + for (std::map::iterator it_lg = lg_node->properties_.begin(); + it_lg != lg_node->properties_.end(); ++it_lg) { + if (!SetLgProperties(it_lg->first, it_lg->second, lg_desc)) { + LOG(ERROR) << "illegal value: " << it_lg->second << " for lg property: " << it_lg->first; + return false; } - // extend notify locality group and _N_ columnfamily - return ExtendNotifyLgToDescriptor(table_desc); - } else { - LOG(FATAL) << "never here."; + } } - return true; + // set table properties + for (std::map::iterator i = table_node->properties_.begin(); + i != table_node->properties_.end(); ++i) { + if (!SetTableProperties(i->first, i->second, table_desc)) { + LOG(ERROR) << "illegal value: " << i->second << " for table property: " << i->first; + return false; + } + } + // extend notify locality group and _N_ columnfamily + return ExtendNotifyLgToDescriptor(table_desc); + } else { + LOG(FATAL) << "never here."; + } + return true; } bool ParseTableSchema(const string& schema, TableDescriptor* table_desc, ErrorCode* err) { - PropTree schema_tree; - if (!schema_tree.ParseFromString(schema)) { - LOG(ERROR) << schema_tree.State(); - LOG(ERROR) << schema; - return false; - } - - VLOG(10) << "table to create: " << schema_tree.FormatString(); - if (FillTableDescriptor(schema_tree, table_desc) && - CheckTableDescrptor(*table_desc, err)) { - return true; - } + PropTree schema_tree; + if (!schema_tree.ParseFromString(schema)) { + LOG(ERROR) << schema_tree.State(); + LOG(ERROR) << schema; return false; + } + + VLOG(10) << "table to create: " << schema_tree.FormatString(); + return FillTableDescriptor(schema_tree, table_desc) && CheckTableDescrptor(*table_desc, err); } bool ParseTableSchemaFile(const string& file, TableDescriptor* table_desc, ErrorCode* err) { - PropTree schema_tree; - if (!schema_tree.ParseFromFile(file)) { - LOG(ERROR) << schema_tree.State(); - LOG(ERROR) << file; - return false; - } - - VLOG(10) << "table to create: " << schema_tree.FormatString(); - if (FillTableDescriptor(schema_tree, table_desc) && - CheckTableDescrptor(*table_desc, err)) { - return true; - } + PropTree schema_tree; + if (!schema_tree.ParseFromFile(file)) { + LOG(ERROR) << schema_tree.State(); + LOG(ERROR) << file; return false; + } + + VLOG(10) << "table to create: " << schema_tree.FormatString(); + return FillTableDescriptor(schema_tree, table_desc) && CheckTableDescrptor(*table_desc, err); } bool BuildSchema(TableDescriptor* table_desc, string* schema) { - // build schema string from table descriptor - if (schema == NULL) { - LOG(ERROR) << "schema string is NULL."; - return false; - } - if (table_desc == NULL) { - LOG(ERROR) << "table descriptor is NULL."; - return false; - } + // build schema string from table descriptor + if (schema == NULL) { + LOG(ERROR) << "schema string is NULL."; + return false; + } + if (table_desc == NULL) { + LOG(ERROR) << "table descriptor is NULL."; + return false; + } - schema->clear(); - int32_t lg_num = table_desc->LocalityGroupNum(); - int32_t cf_num = table_desc->ColumnFamilyNum(); - for (int32_t lg_no = 0; lg_no < lg_num; ++lg_no) { - const LocalityGroupDescriptor* lg_desc = table_desc->LocalityGroup(lg_no); - string lg_name = lg_desc->Name(); - if (lg_no > 0) { - schema->append("|"); - } - schema->append(lg_name); - schema->append(":"); - int cf_cnt = 0; - for (int32_t cf_no = 0; cf_no < cf_num; ++cf_no) { - const ColumnFamilyDescriptor* cf_desc = table_desc->ColumnFamily(cf_no); - if (cf_desc->LocalityGroup() == lg_name && cf_desc->Name() != "") { - if (cf_cnt++ > 0) { - schema->append(","); - } - schema->append(cf_desc->Name()); - } - } - } - return true; + schema->clear(); + int32_t lg_num = table_desc->LocalityGroupNum(); + int32_t cf_num = table_desc->ColumnFamilyNum(); + for (int32_t lg_no = 0; lg_no < lg_num; ++lg_no) { + const LocalityGroupDescriptor* lg_desc = table_desc->LocalityGroup(lg_no); + string lg_name = lg_desc->Name(); + if (lg_no > 0) { + schema->append("|"); + } + schema->append(lg_name); + schema->append(":"); + int cf_cnt = 0; + for (int32_t cf_no = 0; cf_no < cf_num; ++cf_no) { + const ColumnFamilyDescriptor* cf_desc = table_desc->ColumnFamily(cf_no); + if (cf_desc->LocalityGroup() == lg_name && cf_desc->Name() != "") { + if (cf_cnt++ > 0) { + schema->append(","); + } + schema->append(cf_desc->Name()); + } + } + } + return true; } bool ParseDelimiterFile(const string& filename, std::vector* delims) { - std::ifstream fin(filename.c_str()); - if (fin.fail()) { - LOG(ERROR) << "fail to read delimiter file: " << filename; - return false; - } + std::ifstream fin(filename.c_str()); + if (fin.fail()) { + LOG(ERROR) << "fail to read delimiter file: " << filename; + return false; + } - std::vector delimiters; - string str; - while (fin >> str) { - delimiters.push_back(str); - } + std::vector delimiters; + string str; + string raw_str; + while (fin >> str) { + ParseDebugString(str, &raw_str); + delimiters.push_back(raw_str); + } - bool is_delim_error = false; - for (size_t i = 1; i < delimiters.size(); i++) { - if (delimiters[i] <= delimiters[i-1]) { - LOG(ERROR) << "line[" << i << "]" << " SHOULD less than line[" << i + 1 - << "] (bitwise comparison, maybe LC_ALL=C if you use command sort(1))"; - LOG(ERROR) << "line[" << i << "]: (" << delimiters[i-1] << ")"; - LOG(ERROR) << "line[" << i + 1 << "]: (" << delimiters[i] << ")"; - is_delim_error = true; - // just print the 1st invalid input case, - // if print all invalid input, - // it will print too many log to read/understand - break; - } - } - if (is_delim_error) { - LOG(ERROR) << "create table fail, delimiter error."; - return false; - } - delims->swap(delimiters); - return true; + bool is_delim_error = false; + for (size_t i = 1; i < delimiters.size(); i++) { + if (delimiters[i] <= delimiters[i - 1]) { + LOG(ERROR) << "line[" << i << "]" + << " SHOULD less than line[" << i + 1 + << "] (bitwise comparison, maybe LC_ALL=C if you use command " + "sort(1))"; + LOG(ERROR) << "line[" << i << "]: (" << delimiters[i - 1] << ")"; + LOG(ERROR) << "line[" << i + 1 << "]: (" << delimiters[i] << ")"; + is_delim_error = true; + // just print the 1st invalid input case, + // if print all invalid input, + // it will print too many log to read/understand + break; + } + } + if (is_delim_error) { + LOG(ERROR) << "create table fail, delimiter error."; + return false; + } + delims->swap(delimiters); + return true; } bool IsKvTable(const TableSchema& schema) { - return (schema.kv_only() || - schema.raw_key() == GeneralKv || - schema.raw_key() == TTLKv); + return (schema.kv_only() || schema.raw_key() == GeneralKv || schema.raw_key() == TTLKv); } -bool IsTransactionTable(const TableSchema& schema) { - return schema.enable_txn(); -} +bool IsTransactionTable(const TableSchema& schema) { return schema.enable_txn(); } -void FindGlobalTransactionCfs(const TableSchema& schema, - std::set* column_families) { - size_t cf_num = schema.column_families_size(); - for (size_t cf_no = 0; cf_no < cf_num; ++cf_no) { - const ColumnFamilySchema& cf_schema = schema.column_families(cf_no); - if (cf_schema.gtxn()) { - column_families->insert(cf_schema.name()); - } - } +void FindGlobalTransactionCfs(const TableSchema& schema, std::set* column_families) { + size_t cf_num = schema.column_families_size(); + for (size_t cf_no = 0; cf_no < cf_num; ++cf_no) { + const ColumnFamilySchema& cf_schema = schema.column_families(cf_no); + if (cf_schema.gtxn()) { + column_families->insert(cf_schema.name()); + } + } } bool ExtendNotifyLgToDescriptor(TableDescriptor* desc) { - bool do_extend = false; - bool have_n_cf = false; - for (int32_t i = 0; i < desc->ColumnFamilyNum(); ++i) { - if (desc->ColumnFamily(i)->Name() == kNotifyColumnFamily) { - have_n_cf = true; - } - if (desc->ColumnFamily(i)->IsNotifyEnabled()) { - do_extend = true; - } + bool do_extend = false; + bool have_n_cf = false; + for (int32_t i = 0; i < desc->ColumnFamilyNum(); ++i) { + if (desc->ColumnFamily(i)->Name() == kNotifyColumnFamily) { + have_n_cf = true; } - if (!do_extend) { - return true; - } else if (do_extend && have_n_cf) { - return false; - } - if (desc->LocalityGroup(TableDescImpl::NOTIFY_LG_NAME) != NULL) { - LOG(ERROR) << "already exists locality group: " - << TableDescImpl::NOTIFY_LG_NAME; - return false; - } - LocalityGroupDescriptor* lg_desc - = desc->AddLocalityGroup(TableDescImpl::NOTIFY_LG_NAME); - if (lg_desc == NULL) { - LOG(ERROR) << "fail to add locality group: " - << TableDescImpl::NOTIFY_LG_NAME; - return false; - } - ColumnFamilyDescriptor* cf_desc - = desc->AddColumnFamily(kNotifyColumnFamily, TableDescImpl::NOTIFY_LG_NAME); - if (cf_desc == NULL) { - LOG(ERROR) << "fail to add column family: " << kNotifyColumnFamily; - return false; + if (desc->ColumnFamily(i)->IsNotifyEnabled()) { + do_extend = true; } + } + if (!do_extend) { return true; + } else if (do_extend && have_n_cf) { + return false; + } + if (desc->LocalityGroup(TableDescImpl::NOTIFY_LG_NAME) != NULL) { + LOG(ERROR) << "already exists locality group: " << TableDescImpl::NOTIFY_LG_NAME; + return false; + } + LocalityGroupDescriptor* lg_desc = desc->AddLocalityGroup(TableDescImpl::NOTIFY_LG_NAME); + if (lg_desc == NULL) { + LOG(ERROR) << "fail to add locality group: " << TableDescImpl::NOTIFY_LG_NAME; + return false; + } + ColumnFamilyDescriptor* cf_desc = + desc->AddColumnFamily(kNotifyColumnFamily, TableDescImpl::NOTIFY_LG_NAME); + if (cf_desc == NULL) { + LOG(ERROR) << "fail to add column family: " << kNotifyColumnFamily; + return false; + } + return true; +} + +void GenerateHashDelimiters(int64_t hash_num, std::vector* delims) { + if (!delims) return; + delims->clear(); + if (hash_num <= 1) return; + uint64_t start_key = 0x0; + uint64_t end_key = 0xFFFFFFFFFFFFFFFF; + uint64_t bulk_size = (end_key - start_key) / hash_num; + for (int i = 1; i != hash_num; ++i) { + assert(start_key + bulk_size > start_key); + start_key += bulk_size; + std::stringstream ss; + ss << std::setw(16) << std::setfill('0') << std::hex << start_key; + delims->emplace_back(ss.str()); + } +} + +void SetMutationErrorIfInvalid(const string& field, const FieldType& field_type, ErrorCode* err) { + std::string reason = err->GetReason(); + switch (field_type) { + case FieldType::kRowkey: { + if (field.size() >= kRowkeySize) { + reason.append(" Bad parameters: rowkey should < 64KB"); + err->SetFailed(ErrorCode::kBadParam, reason); + } + } break; + case FieldType::kQualifier: { + if (field.size() >= kQualifierSize) { + reason.append(" Bad parameters: qualifier should < 64KB"); + err->SetFailed(ErrorCode::kBadParam, reason); + } + } break; + case FieldType::kValue: { + if (field.size() >= kValueSize) { + reason.append(" Bad parameters: value should < 32MB"); + err->SetFailed(ErrorCode::kBadParam, reason); + } + } break; + case FieldType::kKVColumnFamily: { + if (field.size() != 0) { + reason.append(" Bad parameters: kv/ttlkv cf should = ''"); + err->SetFailed(ErrorCode::kBadParam, reason); + } + } break; + case FieldType::kKVQualifier: { + if (field.size() != 0) { + reason.append(" Bad parameters: kv/ttlkv qualifier should = ''"); + err->SetFailed(ErrorCode::kBadParam, reason); + } + } break; + default: { abort(); } + } } -} // namespace tera +} // namespace tera diff --git a/src/sdk/sdk_utils.h b/src/sdk/sdk_utils.h index 0e8ddad54..e2b5a725a 100644 --- a/src/sdk/sdk_utils.h +++ b/src/sdk/sdk_utils.h @@ -4,8 +4,8 @@ // // Author: Xu Peilin (xupeilin@baidu.com) -#ifndef TERA_SDK_SDK_UTILS_H_ -#define TERA_SDK_SDK_UTILS_H_ +#ifndef TERA_SDK_SDK_UTILS_H_ +#define TERA_SDK_SDK_UTILS_H_ #include "proto/table_meta.pb.h" #include "tera.h" @@ -25,12 +25,9 @@ void TableDescToSchema(const TableDescriptor& desc, TableSchema* schema); void TableSchemaToDesc(const TableSchema& schema, TableDescriptor* desc); -bool SetCfProperties(const string& name, const string& value, - ColumnFamilyDescriptor* desc); -bool SetLgProperties(const string& name, const string& value, - LocalityGroupDescriptor* desc); -bool SetTableProperties(const string& name, const string& value, - TableDescriptor* desc); +bool SetCfProperties(const string& name, const string& value, ColumnFamilyDescriptor* desc); +bool SetLgProperties(const string& name, const string& value, LocalityGroupDescriptor* desc); +bool SetTableProperties(const string& name, const string& value, TableDescriptor* desc); bool FillTableDescriptor(PropTree& schema_tree, TableDescriptor* desc); bool UpdateTableDescriptor(PropTree& schema_tree, TableDescriptor* table_desc, ErrorCode* err); @@ -56,5 +53,19 @@ bool IsTransactionTable(const TableSchema& schema); void FindGlobalTransactionCfs(const TableSchema& schema, std::set* column_families); -} // namespace tera -#endif // TERA_SDK_SDK_UTILS_H_ +void GenerateHashDelimiters(int64_t hash_num, std::vector* delims); + +enum class FieldType { + kRowkey = 0, + kColumnFamily, + kKVColumnFamily, + kQualifier, + kKVQualifier, + kTimeStamp, + kValue +}; + +void SetMutationErrorIfInvalid(const string& field, const FieldType& field_type, ErrorCode* err); + +} // namespace tera +#endif // TERA_SDK_SDK_UTILS_H_ diff --git a/src/sdk/sdk_zk.cc b/src/sdk/sdk_zk.cc index 97852c1e1..1cb501222 100644 --- a/src/sdk/sdk_zk.cc +++ b/src/sdk/sdk_zk.cc @@ -40,350 +40,331 @@ namespace sdk { static pthread_once_t zk_init_once = PTHREAD_ONCE_INIT; static void InitZkLogOnce() { - zk::ZooKeeperLightAdapter::SetLibraryLogOutput(FLAGS_tera_zk_lib_log_path); + zk::ZooKeeperLightAdapter::SetLibraryLogOutput(FLAGS_tera_zk_lib_log_path); } bool ClientZkAdapter::Init() { - pthread_once(&zk_init_once, InitZkLogOnce); - MutexLock lock(&mutex_); - LOG(INFO) << "try init zk ..."; - int zk_errno = zk::ZE_OK; - int32_t retry_cnt = 0; - int wait_time = 60000; - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_tera_zk_root_path, - FLAGS_tera_zk_timeout, - "", &zk_errno, wait_time)) { - if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_cnt; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + pthread_once(&zk_init_once, InitZkLogOnce); + MutexLock lock(&mutex_); + LOG(INFO) << "try init zk ..."; + int zk_errno = zk::ZE_OK; + int32_t retry_cnt = 0; + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, // session timeout + "", &zk_errno, + FLAGS_tera_zk_timeout // connect timeout + )) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; } - LOG(INFO) << "init zk success"; - return true; + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + return true; } bool ClientZkAdapter::RegisterClient(std::string* path) { - int64_t session_id = 0; - int zk_errno = zk::ZE_OK; - int32_t retry_cnt = 0; - LOG(INFO) << "try get client sesssion"; - while (!GetSessionId(&session_id, &zk_errno)) { - if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to get client session : " - << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "get client session fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_cnt; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + int64_t session_id = 0; + int zk_errno = zk::ZE_OK; + int32_t retry_cnt = 0; + LOG(INFO) << "try get client sesssion"; + while (!GetSessionId(&session_id, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to get client session : " << zk::ZkErrnoToString(zk_errno); + return false; } - std::string internal_path = utils::GetLocalHostAddr() - + "-" + std::to_string(getpid()) - + "-" + std::to_string(session_id); - LOG(INFO) << "get client session success : " << internal_path; + LOG(ERROR) << "get client session fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); zk_errno = zk::ZE_OK; - retry_cnt = 0; - LOG(INFO) << "try create client node : " << internal_path; - while (!CreateEphemeralNode(kClientsNodePath + "/" + internal_path, - "", - &zk_errno)) { - if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create client node : " - << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "create client node fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_cnt; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + } + std::string internal_path = + utils::GetLocalHostAddr() + "-" + std::to_string(getpid()) + "-" + std::to_string(session_id); + LOG(INFO) << "get client session success : " << internal_path; + zk_errno = zk::ZE_OK; + retry_cnt = 0; + LOG(INFO) << "try create client node : " << internal_path; + while (!CreateEphemeralNode(kClientsNodePath + "/" + internal_path, "", &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create client node : " << zk::ZkErrnoToString(zk_errno); + return false; } - LOG(INFO) << "create client node success"; - *path = internal_path; - return true; + LOG(ERROR) << "create client node fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "create client node success"; + *path = internal_path; + return true; } bool ClientZkAdapter::IsClientAlive(const std::string& path) { - VLOG(12) << "try check client alive : " << path; - int32_t retry_cnt = 0; - int zk_errno = zk::ZE_OK; - bool ret = true; - while (!CheckExist(kClientsNodePath + "/" + path, &ret, &zk_errno)) { - if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to check client alive : " - << zk::ZkErrnoToString(zk_errno); - // when zk server error, client should think other client is alive - return true; - } - LOG(ERROR) << "check client alive fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_cnt; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + VLOG(12) << "try check client alive : " << path; + int32_t retry_cnt = 0; + int zk_errno = zk::ZE_OK; + bool ret = true; + while (!CheckExist(kClientsNodePath + "/" + path, &ret, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to check client alive : " << zk::ZkErrnoToString(zk_errno); + // when zk server error, client should think other client is alive + return true; } - VLOG(12) << "check client alive success"; - return ret; + LOG(ERROR) << "check client alive fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + VLOG(12) << "check client alive success"; + return ret; } bool ClientZkAdapter::ReadNode(const std::string& path, std::string* value) { - VLOG(12) << "try read node : " << path; - int32_t retry_cnt = 0; - int zk_errno = zk::ZE_OK; - while (!ZooKeeperAdapter::ReadNode(path, value, &zk_errno)) { - if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to read node : " - << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "read node fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_cnt; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; + VLOG(12) << "try read node : " << path; + int32_t retry_cnt = 0; + int zk_errno = zk::ZE_OK; + while (!ZooKeeperAdapter::ReadNode(path, value, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to read node : " << zk::ZkErrnoToString(zk_errno); + return false; } - VLOG(12) << "read node success"; - return true; + LOG(ERROR) << "read node fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + VLOG(12) << "read node success"; + return true; } bool InsClientZkAdapter::Init() { - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - ins_sdk_->SetTimeoutTime(FLAGS_tera_sdk_ins_session_timeout); - return true; + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_sdk_ins_session_timeout); + return true; } bool InsClientZkAdapter::RegisterClient(std::string* path) { - std::string internal_path = utils::GetLocalHostAddr() - + "-" + std::to_string(getpid()) - + "-" + ins_sdk_->GetSessionID(); - LOG(INFO) << "get client session success : " << internal_path; - std::string client_path = FLAGS_tera_ins_root_path + kClientsNodePath - + "/" + internal_path; - galaxy::ins::sdk::SDKError err; - bool ret = ins_sdk_->Put(client_path, "", &err); - if (ret) { - *path = internal_path; - } - return ret; + std::string internal_path = + utils::GetLocalHostAddr() + "-" + std::to_string(getpid()) + "-" + ins_sdk_->GetSessionID(); + LOG(INFO) << "get client session success : " << internal_path; + std::string client_path = FLAGS_tera_ins_root_path + kClientsNodePath + "/" + internal_path; + galaxy::ins::sdk::SDKError err; + bool ret = ins_sdk_->Put(client_path, "", &err); + if (ret) { + *path = internal_path; + } + return ret; } bool InsClientZkAdapter::IsClientAlive(const std::string& path) { - std::string client_path = kClientsNodePath + "/" + path; - std::string value; - return ReadNode(client_path, &value); + std::string client_path = kClientsNodePath + "/" + path; + std::string value; + return ReadNode(client_path, &value); } bool InsClientZkAdapter::ReadNode(const std::string& path, std::string* value) { - std::string target_path = FLAGS_tera_ins_root_path + path; - galaxy::ins::sdk::SDKError err; - if (!ins_sdk_->Get(target_path, value, &err)) { - LOG(ERROR) << "ins read " << target_path << " fail: " << err; - return false; - } - return true; + std::string target_path = FLAGS_tera_ins_root_path + path; + galaxy::ins::sdk::SDKError err; + if (!ins_sdk_->Get(target_path, value, &err)) { + LOG(ERROR) << "ins read " << target_path << " fail: " << err; + return false; + } + return true; } std::string ClusterFinder::MasterAddr(bool update) { - std::string master_addr; - if (update || master_addr_ == "") { - if (!ReadNode(kMasterNodePath, &master_addr)) { - master_addr = ""; - } - } - if (!master_addr.empty()) { - MutexLock lock(&mutex_); - master_addr_ = master_addr; - LOG(INFO) << "master addr: " << master_addr_; + std::string master_addr; + if (update || master_addr_ == "") { + if (!ReadNode(kMasterNodePath, &master_addr)) { + master_addr = ""; } - return master_addr_; + } + if (!master_addr.empty()) { + MutexLock lock(&mutex_); + master_addr_ = master_addr; + LOG(INFO) << "master addr: " << master_addr_; + } + return master_addr_; } std::string ClusterFinder::TimeoracleAddr(bool update) { - std::string timeoracle_addr; - if (update || timeoracle_addr_ == "") { - if (!ReadNode(kTimeoracleNodePath, &timeoracle_addr)) { - timeoracle_addr = ""; - } - } - if (!timeoracle_addr.empty()) { - MutexLock lock(&mutex_); - timeoracle_addr_ = timeoracle_addr; - LOG(INFO) << "timeoracle addr: " << timeoracle_addr_; + std::string timeoracle_addr; + if (update || timeoracle_addr_ == "") { + if (!ReadNode(kTimeoracleNodePath, &timeoracle_addr)) { + timeoracle_addr = ""; } - return timeoracle_addr_; + } + if (!timeoracle_addr.empty()) { + MutexLock lock(&mutex_); + timeoracle_addr_ = timeoracle_addr; + LOG(INFO) << "timeoracle addr: " << timeoracle_addr_; + } + return timeoracle_addr_; } std::string ClusterFinder::RootTableAddr(bool update) { - std::string root_table_addr; - if (update || root_table_addr_ == "") { - if (!ReadNode(kRootTabletNodePath, &root_table_addr)) { - root_table_addr = ""; - } + std::string root_table_addr; + if (update || root_table_addr_ == "") { + if (!ReadNode(kRootTabletNodePath, &root_table_addr)) { + root_table_addr = ""; } - if (!root_table_addr.empty()) { - MutexLock lock(&mutex_); - root_table_addr_ = root_table_addr; - LOG(INFO) << "root addr: " << root_table_addr_; - } - return root_table_addr_; + } + if (!root_table_addr.empty()) { + MutexLock lock(&mutex_); + root_table_addr_ = root_table_addr; + LOG(INFO) << "root addr: " << root_table_addr_; + } + return root_table_addr_; } std::string ClusterFinder::ClusterId() { - std::string name = Name(); - std::string authority = Authority(); - std::string path = Path(); - std::string cluster_id = name + "://" + authority; - if (path[0] != '/') { - cluster_id += "/"; - } - cluster_id += path; - return cluster_id; + std::string name = Name(); + std::string authority = Authority(); + std::string path = Path(); + std::string cluster_id = name + "://" + authority; + if (path[0] != '/') { + cluster_id += "/"; + } + cluster_id += path; + return cluster_id; } -ZkClusterFinder::ZkClusterFinder(const std::string& zk_root_path, - const std::string& zk_addr_list, +ZkClusterFinder::ZkClusterFinder(const std::string& zk_root_path, const std::string& zk_addr_list, ClientZkAdapterBase* zk_adapter) - : zk_root_path_(zk_root_path), - zk_addr_list_(zk_addr_list), - zk_adapter_(zk_adapter) { -} + : zk_root_path_(zk_root_path), zk_addr_list_(zk_addr_list), zk_adapter_(zk_adapter) {} bool ZkClusterFinder::ReadNode(const std::string& name, std::string* value) { - if (zk_adapter_ == NULL) { - pthread_once(&zk_init_once, InitZkLogOnce); - - int zk_errno = tera::zk::ZE_OK; - zk::ZooKeeperLightAdapter zk_adapter; - if (!zk_adapter.Init(zk_addr_list_, zk_root_path_, 1000 * 15, "", &zk_errno)) { - LOG(ERROR) << "Init zookeeper fail: " << tera::zk::ZkErrnoToString(zk_errno); - return false; - } - - if (!zk_adapter.ReadNode(name, value, &zk_errno)) { - LOG(ERROR) << "zk read " << name << " fail: " << zk::ZkErrnoToString(zk_errno); - return false; - } - return true; - } else { - return zk_adapter_->ReadNode(name, value); + if (zk_adapter_ == NULL) { + pthread_once(&zk_init_once, InitZkLogOnce); + + int zk_errno = tera::zk::ZE_OK; + zk::ZooKeeperLightAdapter zk_adapter; + if (!zk_adapter.Init(zk_addr_list_, zk_root_path_, FLAGS_tera_zk_timeout, "", &zk_errno, + FLAGS_tera_zk_timeout)) { + LOG(ERROR) << "Init zookeeper fail: " << tera::zk::ZkErrnoToString(zk_errno); + return false; + } + + if (!zk_adapter.ReadNode(name, value, &zk_errno)) { + LOG(ERROR) << "zk read " << name << " fail: " << zk::ZkErrnoToString(zk_errno); + return false; } + return true; + } else { + return zk_adapter_->ReadNode(name, value); + } } InsClusterFinder::InsClusterFinder(const std::string& ins_root_path, const std::string& ins_addr_list, ClientZkAdapterBase* zk_adapter) - : ins_root_path_(ins_root_path), - ins_addr_list_(ins_addr_list), - zk_adapter_(zk_adapter) { -} + : ins_root_path_(ins_root_path), ins_addr_list_(ins_addr_list), zk_adapter_(zk_adapter) {} bool InsClusterFinder::ReadNode(const std::string& name, std::string* value) { - if (zk_adapter_ == NULL) { - galaxy::ins::sdk::InsSDK ins_sdk(ins_addr_list_); - galaxy::ins::sdk::SDKError err; - if (!ins_sdk.Get(ins_root_path_ + name, value, &err)) { - LOG(ERROR) << "ins read " << name << " fail: " << err; - return false; - } - return true; - } else { - return zk_adapter_->ReadNode(name, value); + if (zk_adapter_ == NULL) { + galaxy::ins::sdk::InsSDK ins_sdk(ins_addr_list_); + galaxy::ins::sdk::SDKError err; + if (!ins_sdk.Get(ins_root_path_ + name, value, &err)) { + LOG(ERROR) << "ins read " << name << " fail: " << err; + return false; } + return true; + } else { + return zk_adapter_->ReadNode(name, value); + } } FakeZkClusterFinder::FakeZkClusterFinder(const std::string& fake_zk_path_prefix) - : fake_zk_path_prefix_(fake_zk_path_prefix) { -} + : fake_zk_path_prefix_(fake_zk_path_prefix) {} bool FakeZkClusterFinder::ReadNode(const std::string& name, std::string* value) { - return zk::FakeZkUtil::ReadNode(fake_zk_path_prefix_ + name, value); + return zk::FakeZkUtil::ReadNode(fake_zk_path_prefix_ + name, value); } MockTimeoracleClusterFinder::MockTimeoracleClusterFinder(const std::string& mock_root_path) { - mock_root_path_ = mock_root_path; + mock_root_path_ = mock_root_path; } bool MockTimeoracleClusterFinder::ReadNode(const std::string& kpath, std::string* value) { - std::string path = mock_root_path_ + kpath; - int fd = ::open(path.c_str(), O_RDWR); - if (fd < 0) { - return false; - } - - value->resize(1024); - char *buf = &(*value)[0]; - ssize_t len = ::pread(fd, buf, sizeof(buf), 0); - ::close(fd); - if (len < 0) { - return false; - } - value->resize(len); - return true; + std::string path = mock_root_path_ + kpath; + int fd = ::open(path.c_str(), O_RDWR); + if (fd < 0) { + return false; + } + + value->resize(1024); + char* buf = &(*value)[0]; + ssize_t len = ::pread(fd, buf, sizeof(buf), 0); + ::close(fd); + if (len < 0) { + return false; + } + value->resize(len); + return true; } ClientZkAdapterBase* NewClientZkAdapter() { - if (FLAGS_tera_coord_type.empty()) { - LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" - << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is usually recommended."; - } - - if (FLAGS_tera_coord_type == "zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { - return new sdk::ClientZkAdapter(); - } else if (FLAGS_tera_coord_type == "ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { - return new sdk::InsClientZkAdapter(); - } else if (FLAGS_tera_coord_type == "mock_zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { - return new sdk::MockClientZkAdapter(); - } else if (FLAGS_tera_coord_type == "mock_ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { - return new sdk::MockInsClientZkAdapter(); - } - return NULL; + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' " + "flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is " + "usually recommended."; + } + + if (FLAGS_tera_coord_type == "zk" || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + return new sdk::ClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + return new sdk::InsClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "mock_zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { + return new sdk::MockClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "mock_ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { + return new sdk::MockInsClientZkAdapter(); + } + return NULL; } ClusterFinder* NewClusterFinder(ClientZkAdapterBase* zk_adapter) { - if (FLAGS_tera_coord_type.empty()) { - LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" - << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is usually recommended."; - } - if (FLAGS_tera_coord_type == "zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { - return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list, zk_adapter); - } else if (FLAGS_tera_coord_type == "ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { - return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list, zk_adapter); - } else if (FLAGS_tera_coord_type == "mock_zk" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { - return new sdk::MockZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); - } else if (FLAGS_tera_coord_type == "mock_ins" - || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { - return new sdk::MockInsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); - } else if (FLAGS_tera_coord_type == "fake_zk" - || FLAGS_tera_coord_type.empty()) { - return new sdk::FakeZkClusterFinder(FLAGS_tera_fake_zk_path_prefix); - } - return nullptr; + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' " + "flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is " + "usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list, zk_adapter); + } else if (FLAGS_tera_coord_type == "ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list, + zk_adapter); + } else if (FLAGS_tera_coord_type == "mock_zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { + return new sdk::MockZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); + } else if (FLAGS_tera_coord_type == "mock_ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { + return new sdk::MockInsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); + } else if (FLAGS_tera_coord_type == "fake_zk" || FLAGS_tera_coord_type.empty()) { + return new sdk::FakeZkClusterFinder(FLAGS_tera_fake_zk_path_prefix); + } + return nullptr; } ClusterFinder* NewTimeoracleClusterFinder() { - if (FLAGS_tera_timeoracle_mock_enabled) { - return new sdk::MockTimeoracleClusterFinder(FLAGS_tera_timeoracle_mock_root_path); - } else if (FLAGS_tera_coord_type == "zk") { - return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); - } else if (FLAGS_tera_coord_type == "ins") { - return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); - } - - return nullptr; + if (FLAGS_tera_timeoracle_mock_enabled) { + return new sdk::MockTimeoracleClusterFinder(FLAGS_tera_timeoracle_mock_root_path); + } else if (FLAGS_tera_coord_type == "zk") { + return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); + } else if (FLAGS_tera_coord_type == "ins") { + return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); + } + + return nullptr; } } // namespace sdk diff --git a/src/sdk/sdk_zk.h b/src/sdk/sdk_zk.h index 8ad026ebd..8de1d1cc6 100644 --- a/src/sdk/sdk_zk.h +++ b/src/sdk/sdk_zk.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SDK_ZK_H_ -#define TERA_SDK_SDK_ZK_H_ +#ifndef TERA_SDK_SDK_ZK_H_ +#define TERA_SDK_SDK_ZK_H_ #include #include @@ -12,10 +12,10 @@ #include "ins_sdk.h" #include "zk/zk_adapter.h" -namespace galaxy{ -namespace ins{ +namespace galaxy { +namespace ins { namespace sdk { - class InsSDK; +class InsSDK; } } } @@ -24,177 +24,181 @@ namespace tera { namespace sdk { class ClientZkAdapterBase : public zk::ZooKeeperLightAdapter { -public: - virtual ~ClientZkAdapterBase() {}; - virtual bool Init() = 0; - virtual bool RegisterClient(std::string* session_str) = 0; - virtual bool IsClientAlive(const std::string& path) = 0; - virtual bool ReadNode(const std::string& path, std::string* value) = 0; + public: + virtual ~ClientZkAdapterBase(){}; + virtual bool Init() = 0; + virtual bool RegisterClient(std::string* session_str) = 0; + virtual bool IsClientAlive(const std::string& path) = 0; + virtual bool ReadNode(const std::string& path, std::string* value) = 0; }; class ClientZkAdapter : public ClientZkAdapterBase { -public: - ClientZkAdapter() {} - virtual ~ClientZkAdapter() {} - virtual bool Init(); - virtual bool RegisterClient(std::string* session_str); - virtual bool IsClientAlive(const std::string& path); - virtual bool ReadNode(const std::string& path, std::string* value); -private: - mutable Mutex mutex_; + public: + ClientZkAdapter() {} + virtual ~ClientZkAdapter() {} + virtual bool Init(); + virtual bool RegisterClient(std::string* session_str); + virtual bool IsClientAlive(const std::string& path); + virtual bool ReadNode(const std::string& path, std::string* value); + + private: + mutable Mutex mutex_; }; class MockClientZkAdapter : public ClientZkAdapter { -public: - MockClientZkAdapter(): ClientZkAdapter() {} - virtual ~MockClientZkAdapter() {} - virtual bool Init() { return true; } - virtual bool RegisterClient(std::string* session_str) { - *session_str = "localhost"; - return true; - } - virtual bool IsClientAlive(const std::string& path) { - return true; - } - virtual bool ReadNode(const std::string& path, std::string* value) { - *value = "mock_zk_value"; - return true; - } + public: + MockClientZkAdapter() : ClientZkAdapter() {} + virtual ~MockClientZkAdapter() {} + virtual bool Init() { return true; } + virtual bool RegisterClient(std::string* session_str) { + *session_str = "localhost"; + return true; + } + virtual bool IsClientAlive(const std::string& path) { return true; } + virtual bool ReadNode(const std::string& path, std::string* value) { + *value = "mock_zk_value"; + return true; + } }; class InsClientZkAdapter : public ClientZkAdapterBase { -public: - InsClientZkAdapter() : ins_sdk_(NULL) {} - virtual ~InsClientZkAdapter() { - if (ins_sdk_ != NULL) { - delete ins_sdk_; - } + public: + InsClientZkAdapter() : ins_sdk_(NULL) {} + virtual ~InsClientZkAdapter() { + if (ins_sdk_ != NULL) { + delete ins_sdk_; } - virtual bool Init (); - virtual bool RegisterClient(std::string* session_str); - virtual bool IsClientAlive(const std::string& path); - virtual bool ReadNode(const std::string& path, std::string* value); -private: - galaxy::ins::sdk::InsSDK* ins_sdk_; + } + virtual bool Init(); + virtual bool RegisterClient(std::string* session_str); + virtual bool IsClientAlive(const std::string& path); + virtual bool ReadNode(const std::string& path, std::string* value); + + private: + galaxy::ins::sdk::InsSDK* ins_sdk_; }; class MockInsClientZkAdapter : public InsClientZkAdapter { -public: - MockInsClientZkAdapter() : InsClientZkAdapter() {} - virtual ~MockInsClientZkAdapter() {} - virtual bool Init() { return true; } - virtual bool RegisterClient(std::string* session_str) { - *session_str = "localhost"; - return true; - } - virtual bool IsClientAlive(const std::string& path) { - return true; - } - virtual bool ReadNode(const std::string& path, std::string* value) { - *value = "mock_ins_value"; - return true; - } + public: + MockInsClientZkAdapter() : InsClientZkAdapter() {} + virtual ~MockInsClientZkAdapter() {} + virtual bool Init() { return true; } + virtual bool RegisterClient(std::string* session_str) { + *session_str = "localhost"; + return true; + } + virtual bool IsClientAlive(const std::string& path) { return true; } + virtual bool ReadNode(const std::string& path, std::string* value) { + *value = "mock_ins_value"; + return true; + } }; ClientZkAdapterBase* NewClientZkAdapter(); -class ClusterFinder -{ -public: - ClusterFinder() {} - virtual ~ClusterFinder() {} - std::string MasterAddr(bool update = false); - std::string RootTableAddr(bool update = false); - std::string TimeoracleAddr(bool update = false); - std::string ClusterId(); // cluster URI: :/// - -protected: - virtual bool ReadNode(const std::string& path, std::string* value) = 0; - virtual std::string Name() = 0; - virtual std::string Authority() = 0; - virtual std::string Path() = 0; - -private: - mutable Mutex mutex_; - std::string master_addr_; - std::string timeoracle_addr_; - std::string root_table_addr_; +class ClusterFinder { + public: + ClusterFinder() {} + virtual ~ClusterFinder() {} + std::string MasterAddr(bool update = false); + std::string RootTableAddr(bool update = false); + std::string TimeoracleAddr(bool update = false); + std::string ClusterId(); // cluster URI: :/// + + protected: + virtual bool ReadNode(const std::string& path, std::string* value) = 0; + virtual std::string Name() = 0; + virtual std::string Authority() = 0; + virtual std::string Path() = 0; + + private: + mutable Mutex mutex_; + std::string master_addr_; + std::string timeoracle_addr_; + std::string root_table_addr_; }; class ZkClusterFinder : public ClusterFinder { -public: - ZkClusterFinder(const std::string& zk_root_path, - const std::string& zk_addr_list, - ClientZkAdapterBase* zk_adapter = NULL); -protected: - virtual bool ReadNode(const std::string& path, std::string* value); - virtual std::string Name() { return "zk"; }; - virtual std::string Authority() { return zk_addr_list_; } - virtual std::string Path() { return zk_root_path_; } -private: - std::string zk_root_path_; - std::string zk_addr_list_; - ClientZkAdapterBase* zk_adapter_; + public: + ZkClusterFinder(const std::string& zk_root_path, const std::string& zk_addr_list, + ClientZkAdapterBase* zk_adapter = NULL); + + protected: + virtual bool ReadNode(const std::string& path, std::string* value); + virtual std::string Name() { return "zk"; }; + virtual std::string Authority() { return zk_addr_list_; } + virtual std::string Path() { return zk_root_path_; } + + private: + std::string zk_root_path_; + std::string zk_addr_list_; + ClientZkAdapterBase* zk_adapter_; }; class MockZkClusterFinder : public ZkClusterFinder { -public: - MockZkClusterFinder(const std::string& zk_root_path, const std::string& zk_addr_list) : - ZkClusterFinder(zk_root_path, zk_addr_list) {} -protected: - virtual std::string Name() { return "mock zk"; } + public: + MockZkClusterFinder(const std::string& zk_root_path, const std::string& zk_addr_list) + : ZkClusterFinder(zk_root_path, zk_addr_list) {} + + protected: + virtual std::string Name() { return "mock zk"; } }; class InsClusterFinder : public ClusterFinder { -public: - InsClusterFinder(const std::string& ins_root_path, - const std::string& ins_addr_list, - ClientZkAdapterBase* zk_adapter = NULL); -protected: - virtual bool ReadNode(const std::string& path, std::string* value); - virtual std::string Name() { return "ins"; } - virtual std::string Authority() { return ins_addr_list_; } - virtual std::string Path() { return ins_root_path_; } -private: - std::string ins_root_path_; - std::string ins_addr_list_; - ClientZkAdapterBase* zk_adapter_; + public: + InsClusterFinder(const std::string& ins_root_path, const std::string& ins_addr_list, + ClientZkAdapterBase* zk_adapter = NULL); + + protected: + virtual bool ReadNode(const std::string& path, std::string* value); + virtual std::string Name() { return "ins"; } + virtual std::string Authority() { return ins_addr_list_; } + virtual std::string Path() { return ins_root_path_; } + + private: + std::string ins_root_path_; + std::string ins_addr_list_; + ClientZkAdapterBase* zk_adapter_; }; class MockInsClusterFinder : public InsClusterFinder { -public: - MockInsClusterFinder(const std::string& ins_root_path, const std::string& ins_addr_list) : - InsClusterFinder(ins_root_path, ins_addr_list) {} -protected: - virtual std::string Name() { return "mock ins"; } + public: + MockInsClusterFinder(const std::string& ins_root_path, const std::string& ins_addr_list) + : InsClusterFinder(ins_root_path, ins_addr_list) {} + + protected: + virtual std::string Name() { return "mock ins"; } }; class FakeZkClusterFinder : public ClusterFinder { -public: - FakeZkClusterFinder(const std::string& fake_zk_path_prefix); -protected: - virtual bool ReadNode(const std::string& path, std::string* value); - virtual std::string Name() { return "fakezk"; }; - virtual std::string Authority() { return "localhost"; } - virtual std::string Path() { return fake_zk_path_prefix_; } -private: - std::string fake_zk_path_prefix_; + public: + FakeZkClusterFinder(const std::string& fake_zk_path_prefix); + + protected: + virtual bool ReadNode(const std::string& path, std::string* value); + virtual std::string Name() { return "fakezk"; }; + virtual std::string Authority() { return "localhost"; } + virtual std::string Path() { return fake_zk_path_prefix_; } + + private: + std::string fake_zk_path_prefix_; }; class MockTimeoracleClusterFinder : public ClusterFinder { -public: - MockTimeoracleClusterFinder(const std::string& mock_root_path); + public: + MockTimeoracleClusterFinder(const std::string& mock_root_path); + + protected: + virtual bool ReadNode(const std::string& path, std::string* value); -protected: - virtual bool ReadNode(const std::string& path, std::string* value); + virtual std::string Name() { return "fakezk"; }; - virtual std::string Name() { return "fakezk"; }; + virtual std::string Authority() { return "localhost"; } - virtual std::string Authority() { return "localhost"; } + virtual std::string Path() { return mock_root_path_; } - virtual std::string Path() { return mock_root_path_; } -private: - std::string mock_root_path_; + private: + std::string mock_root_path_; }; ClusterFinder* NewTimeoracleClusterFinder(); diff --git a/src/sdk/single_row_txn.cc b/src/sdk/single_row_txn.cc index 51a84cae2..25381e4b0 100644 --- a/src/sdk/single_row_txn.cc +++ b/src/sdk/single_row_txn.cc @@ -34,280 +34,265 @@ SingleRowTxn::SingleRowTxn(std::shared_ptr table_impl, const std::str mutation_buffer_(table_impl_.get(), row_key), user_commit_callback_(NULL), user_commit_context_(NULL) { - start_timestamp_ = get_micros(); + start_timestamp_ = get_micros(); } -SingleRowTxn::~SingleRowTxn() { -} +SingleRowTxn::~SingleRowTxn() {} bool SingleRowTxn::MarkHasRead() { - MutexLock l(&mu_); - if (has_read_) { - return false; - } else { - has_read_ = true; - return true; - } + MutexLock l(&mu_); + if (has_read_) { + return false; + } else { + has_read_ = true; + return true; + } } void SingleRowTxn::MarkNoRead() { - MutexLock l(&mu_); - assert(has_read_ == true); - has_read_ = false; + MutexLock l(&mu_); + assert(has_read_ == true); + has_read_ = false; } /// 提交一个修改操作 void SingleRowTxn::ApplyMutation(RowMutation* row_mu) { - RowMutationImpl* row_mu_impl = static_cast(row_mu); - row_mu_impl->SetTransaction(this); - - if (row_mu->RowKey() == row_key_) { - mutation_buffer_.Concatenate(*row_mu_impl); - row_mu_impl->SetError(ErrorCode::kOK); - } else { - row_mu_impl->SetError(ErrorCode::kBadParam, "not same row"); - } - - if (row_mu->IsAsync()) { - ThreadPool::Task task = std::bind(&RowMutationImpl::RunCallback, row_mu_impl); - thread_pool_->AddTask(task); - } + RowMutationImpl* row_mu_impl = static_cast(row_mu); + row_mu_impl->SetTransaction(this); + + if (row_mu->RowKey() == row_key_) { + mutation_buffer_.Concatenate(*row_mu_impl); + row_mu_impl->SetError(ErrorCode::kOK); + } else { + row_mu_impl->SetError(ErrorCode::kBadParam, "not same row"); + } + + if (row_mu->IsAsync()) { + ThreadPool::Task task = std::bind(&RowMutationImpl::RunCallback, row_mu_impl); + thread_pool_->AddTask(task); + } } void ReadCallbackWrapper(RowReader* row_reader) { - RowReaderImpl* reader_impl = static_cast(row_reader); - SingleRowTxn* txn_impl = static_cast(reader_impl->GetContext()); - txn_impl->ReadCallback(reader_impl); + RowReaderImpl* reader_impl = static_cast(row_reader); + SingleRowTxn* txn_impl = static_cast(reader_impl->GetContext()); + txn_impl->ReadCallback(reader_impl); } /// 读取操作 ErrorCode SingleRowTxn::Get(RowReader* row_reader) { - RowReaderImpl* reader_impl = static_cast(row_reader); - reader_impl->SetTransaction(this); - int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); - if (odd_time_ms < reader_impl->TimeOut()) { - reader_impl->SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); - } - bool is_async = reader_impl->IsAsync(); - - // safe check - if (reader_impl->RowName() != row_key_) { - reader_impl->SetError(ErrorCode::kBadParam, "not same row"); - } else if (!MarkHasRead()) { - reader_impl->SetError(ErrorCode::kBadParam, "not support read more than once in txn"); - } else if (reader_impl->GetSnapshot() != 0) { - reader_impl->SetError(ErrorCode::kBadParam, "not support read a snapshot in txn"); - } - if (reader_impl->GetError().GetType() != ErrorCode::kOK) { - if (is_async) { - ThreadPool::Task task = std::bind(&RowReaderImpl::RunCallback, reader_impl); - thread_pool_->AddTask(task); - return ErrorCode(); - } else { - return reader_impl->GetError(); - } - } - - int64_t ts_start = 0, ts_end = 0; - reader_impl->GetTimeRange(&ts_start, &ts_end); - reader_start_timestamp_ = ts_start; - reader_end_timestamp_ = ts_end; - reader_max_versions_ = reader_impl->GetMaxVersions(); - - // save user's callback & context - user_reader_callback_ = reader_impl->GetCallBack(); - user_reader_context_ = reader_impl->GetContext(); - - // use our callback wrapper - reader_impl->SetCallBack(ReadCallbackWrapper); - reader_impl->SetContext(this); - - table_impl_->Get(reader_impl); + RowReaderImpl* reader_impl = static_cast(row_reader); + reader_impl->SetTransaction(this); + int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); + if (odd_time_ms < reader_impl->TimeOut()) { + reader_impl->SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); + } + bool is_async = reader_impl->IsAsync(); + + // safe check + if (reader_impl->RowName() != row_key_) { + reader_impl->SetError(ErrorCode::kBadParam, "not same row"); + } else if (!MarkHasRead()) { + reader_impl->SetError(ErrorCode::kBadParam, "not support read more than once in txn"); + } else if (reader_impl->GetSnapshot() != 0) { + reader_impl->SetError(ErrorCode::kBadParam, "not support read a snapshot in txn"); + } + if (reader_impl->GetError().GetType() != ErrorCode::kOK) { if (is_async) { - return ErrorCode(); + ThreadPool::Task task = std::bind(&RowReaderImpl::RunCallback, reader_impl); + thread_pool_->AddTask(task); + return ErrorCode(); } else { - reader_impl->Wait(); - return reader_impl->GetError(); + return reader_impl->GetError(); } + } + + int64_t ts_start = 0, ts_end = 0; + reader_impl->GetTimeRange(&ts_start, &ts_end); + reader_start_timestamp_ = ts_start; + reader_end_timestamp_ = ts_end; + reader_max_versions_ = reader_impl->GetMaxVersions(); + + // save user's callback & context + user_reader_callback_ = reader_impl->GetCallBack(); + user_reader_context_ = reader_impl->GetContext(); + + // use our callback wrapper + reader_impl->SetCallBack(ReadCallbackWrapper); + reader_impl->SetContext(this); + + table_impl_->Get(reader_impl); + if (is_async) { + return ErrorCode(); + } else { + reader_impl->Wait(); + return reader_impl->GetError(); + } } /// 设置提交回调, 提交操作会异步返回 -void SingleRowTxn::SetCommitCallback(Callback callback) { - user_commit_callback_ = callback; -} +void SingleRowTxn::SetCommitCallback(Callback callback) { user_commit_callback_ = callback; } /// 获取提交回调 -Transaction::Callback SingleRowTxn::GetCommitCallback() { - return user_commit_callback_; -} +Transaction::Callback SingleRowTxn::GetCommitCallback() { return user_commit_callback_; } /// 设置用户上下文,可在回调函数中获取 -void SingleRowTxn::SetContext(void* context) { - user_commit_context_ = context; -} +void SingleRowTxn::SetContext(void* context) { user_commit_context_ = context; } /// 获取用户上下文 -void* SingleRowTxn::GetContext() { - return user_commit_context_; -} +void* SingleRowTxn::GetContext() { return user_commit_context_; } /// 获得结果错误码 -const ErrorCode& SingleRowTxn::GetError() { - return mutation_buffer_.GetError(); -} +const ErrorCode& SingleRowTxn::GetError() { return mutation_buffer_.GetError(); } /// 内部读操作回调 void SingleRowTxn::ReadCallback(RowReaderImpl* reader_impl) { - // restore user's callback & context - reader_impl->SetCallBack(user_reader_callback_); - reader_impl->SetContext(user_reader_context_); - - // save results for commit check - ErrorCode::ErrorCodeType code = reader_impl->GetError().GetType(); - if (code == ErrorCode::kOK || code == ErrorCode::kNotFound) { - // copy read_column_list - read_column_list_ = reader_impl->GetReadColumnList(); - - // copy read result (not including value) - while (!reader_impl->Done()) { - const std::string& family = reader_impl->Family(); - const std::string& qualifier = reader_impl->Qualifier(); - int64_t timestamp = reader_impl->Timestamp(); - read_result_[family][qualifier][timestamp] = reader_impl->Value(); - reader_impl->Next(); - } - reader_impl->ResetResultPos(); - } else { - MarkNoRead(); + // restore user's callback & context + reader_impl->SetCallBack(user_reader_callback_); + reader_impl->SetContext(user_reader_context_); + + // save results for commit check + ErrorCode::ErrorCodeType code = reader_impl->GetError().GetType(); + if (code == ErrorCode::kOK || code == ErrorCode::kNotFound) { + // copy read_column_list + read_column_list_ = reader_impl->GetReadColumnList(); + + // copy read result (not including value) + while (!reader_impl->Done()) { + const std::string& family = reader_impl->Family(); + const std::string& qualifier = reader_impl->Qualifier(); + int64_t timestamp = reader_impl->Timestamp(); + read_result_[family][qualifier][timestamp] = reader_impl->Value(); + reader_impl->Next(); } + reader_impl->ResetResultPos(); + } else { + MarkNoRead(); + } - // run user's callback - reader_impl->RunCallback(); + // run user's callback + reader_impl->RunCallback(); } void CommitCallbackWrapper(RowMutation* row_mu) { - RowMutationImpl* mu_impl = static_cast(row_mu); - SingleRowTxn* txn_impl = static_cast(row_mu->GetContext()); - txn_impl->CommitCallback(mu_impl); + RowMutationImpl* mu_impl = static_cast(row_mu); + SingleRowTxn* txn_impl = static_cast(row_mu->GetContext()); + txn_impl->CommitCallback(mu_impl); } /// 提交事务 ErrorCode SingleRowTxn::Commit() { - int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); - if (odd_time_ms < mutation_buffer_.TimeOut()) { - mutation_buffer_.SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); + int64_t odd_time_ms = ttl_timestamp_ms_ - get_millis(); + if (odd_time_ms < mutation_buffer_.TimeOut()) { + mutation_buffer_.SetTimeOut(odd_time_ms > 0 ? odd_time_ms : 1); + } + commit_timestamp_ = get_micros(); + InternalNotify(); + if (mutation_buffer_.MutationNum() > 0) { + if (user_commit_callback_ != NULL) { + // use our callback wrapper + mutation_buffer_.SetCallBack(CommitCallbackWrapper); + mutation_buffer_.SetContext(this); } - commit_timestamp_ = get_micros(); - InternalNotify(); - if (mutation_buffer_.MutationNum() > 0) { - if (user_commit_callback_ != NULL) { - // use our callback wrapper - mutation_buffer_.SetCallBack(CommitCallbackWrapper); - mutation_buffer_.SetContext(this); - } - mutation_buffer_.SetTransaction(this); - table_impl_->ApplyMutation(&mutation_buffer_); - if (mutation_buffer_.IsAsync()) { - return ErrorCode(); - } else { - return mutation_buffer_.GetError(); - } + mutation_buffer_.SetTransaction(this); + table_impl_->ApplyMutation(&mutation_buffer_); + if (mutation_buffer_.IsAsync()) { + return ErrorCode(); } else { - if (user_commit_callback_ != NULL) { - ThreadPool::Task task = std::bind(user_commit_callback_, this); - thread_pool_->AddTask(task); - } - return ErrorCode(); + return mutation_buffer_.GetError(); + } + } else { + if (user_commit_callback_ != NULL) { + ThreadPool::Task task = std::bind(user_commit_callback_, this); + thread_pool_->AddTask(task); } + return ErrorCode(); + } } /// 内部提交回调 void SingleRowTxn::CommitCallback(RowMutationImpl* mu_impl) { - CHECK_EQ(&mutation_buffer_, mu_impl); - CHECK_NOTNULL(user_commit_callback_); - // run user's commit callback - user_commit_callback_(this); + CHECK_EQ(&mutation_buffer_, mu_impl); + CHECK_NOTNULL(user_commit_callback_); + // run user's commit callback + user_commit_callback_(this); } /// 序列化 void SingleRowTxn::Serialize(RowMutationSequence* mu_seq) { - SingleRowTxnReadInfo* pb_read_info = mu_seq->mutable_txn_read_info(); - pb_read_info->set_has_read(has_read_); - assert(reader_max_versions_ >= 1); - pb_read_info->set_max_versions(reader_max_versions_); - if (reader_start_timestamp_ != kOldestTs) { - pb_read_info->set_start_timestamp(reader_start_timestamp_); + SingleRowTxnReadInfo* pb_read_info = mu_seq->mutable_txn_read_info(); + pb_read_info->set_has_read(has_read_); + assert(reader_max_versions_ >= 1); + pb_read_info->set_max_versions(reader_max_versions_); + if (reader_start_timestamp_ != kOldestTs) { + pb_read_info->set_start_timestamp(reader_start_timestamp_); + } + if (reader_end_timestamp_ != kLatestTs) { + pb_read_info->set_end_timestamp(reader_end_timestamp_); + } + + // serialize read_clumn_list + RowReader::ReadColumnList::iterator column_it = read_column_list_.begin(); + for (; column_it != read_column_list_.end(); ++column_it) { + const std::string& family = column_it->first; + std::set& qualifier_set = column_it->second; + + ColumnFamily* pb_column_info = pb_read_info->add_read_column_list(); + pb_column_info->set_family_name(family); + + std::set::iterator cq_it = qualifier_set.begin(); + for (; cq_it != qualifier_set.end(); ++cq_it) { + pb_column_info->add_qualifier_list(*cq_it); } - if (reader_end_timestamp_ != kLatestTs) { - pb_read_info->set_end_timestamp(reader_end_timestamp_); - } - - // serialize read_clumn_list - RowReader::ReadColumnList::iterator column_it = read_column_list_.begin(); - for (; column_it != read_column_list_.end(); ++column_it) { - const std::string& family = column_it->first; - std::set& qualifier_set = column_it->second; - - ColumnFamily* pb_column_info = pb_read_info->add_read_column_list(); - pb_column_info->set_family_name(family); - - std::set::iterator cq_it = qualifier_set.begin(); - for (; cq_it != qualifier_set.end(); ++cq_it) { - pb_column_info->add_qualifier_list(*cq_it); - } - } - - // serialize read_result (family & qualifier & timestamp & value) - ReadResult::iterator cf_it = read_result_.begin(); - for (; cf_it != read_result_.end(); ++cf_it) { - const std::string& family = cf_it->first; - auto& qualifier_map = cf_it->second; - - auto cq_it = qualifier_map.begin(); - for (; cq_it != qualifier_map.end(); ++cq_it) { - const std::string& qualifier = cq_it->first; - auto& cell_map = cq_it->second; - - auto it = cell_map.rbegin(); - for (; it != cell_map.rend(); ++it) { - KeyValuePair* kv = pb_read_info->mutable_read_result()->add_key_values(); - kv->set_column_family(family); - kv->set_qualifier(qualifier); - kv->set_timestamp(it->first); - kv->set_value(it->second); - } - } + } + + // serialize read_result (family & qualifier & timestamp & value) + ReadResult::iterator cf_it = read_result_.begin(); + for (; cf_it != read_result_.end(); ++cf_it) { + const std::string& family = cf_it->first; + auto& qualifier_map = cf_it->second; + + auto cq_it = qualifier_map.begin(); + for (; cq_it != qualifier_map.end(); ++cq_it) { + const std::string& qualifier = cq_it->first; + auto& cell_map = cq_it->second; + + auto it = cell_map.rbegin(); + for (; it != cell_map.rend(); ++it) { + KeyValuePair* kv = pb_read_info->mutable_read_result()->add_key_values(); + kv->set_column_family(family); + kv->set_qualifier(qualifier); + kv->set_timestamp(it->first); + kv->set_value(it->second); + } } + } } -void SingleRowTxn::Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - std::unique_ptr mutation(t->NewRowMutation(row_key)); - std::string notify_qulifier = PackNotifyName(column_family, qualifier); - mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); - this->ApplyMutation(mutation.get()); +void SingleRowTxn::Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) { + std::unique_ptr mutation(t->NewRowMutation(row_key)); + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); + this->ApplyMutation(mutation.get()); } -void SingleRowTxn::Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - Cell cell(t, row_key, column_family, qualifier); - notify_cells_.push_back(cell); +void SingleRowTxn::Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) { + Cell cell(t, row_key, column_family, qualifier); + notify_cells_.push_back(cell); } void SingleRowTxn::InternalNotify() { - for (auto cell : notify_cells_) { - std::unique_ptr mutation(cell.Table()->NewRowMutation(cell.RowKey())); - std::string notify_qulifier = PackNotifyName(cell.ColFamily(), cell.Qualifier()); - mutation->Put(kNotifyColumnFamily, notify_qulifier, commit_timestamp_); - // single row transaction may notify different rows - cell.Table()->ApplyMutation(mutation.get()); - } + for (auto cell : notify_cells_) { + std::unique_ptr mutation(cell.Table()->NewRowMutation(cell.RowKey())); + std::string notify_qulifier = PackNotifyName(cell.ColFamily(), cell.Qualifier()); + mutation->Put(kNotifyColumnFamily, notify_qulifier, commit_timestamp_); + // single row transaction may notify different rows + cell.Table()->ApplyMutation(mutation.get()); + } } -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/single_row_txn.h b/src/sdk/single_row_txn.h index 89fc0cb4b..b5281d492 100644 --- a/src/sdk/single_row_txn.h +++ b/src/sdk/single_row_txn.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_SINGLE_ROW_TXN_H_ -#define TERA_SDK_SINGLE_ROW_TXN_H_ +#ifndef TERA_SDK_SINGLE_ROW_TXN_H_ +#define TERA_SDK_SINGLE_ROW_TXN_H_ #include @@ -22,101 +22,96 @@ class TableImpl; class Cell; class SingleRowTxn : public Transaction { -public: - SingleRowTxn(std::shared_ptr table_impl, const std::string& row_key, - common::ThreadPool* thread_pool); - virtual ~SingleRowTxn(); - - /// 提交一个修改操作 - virtual void ApplyMutation(RowMutation* row_mu); - /// 读取操作 - virtual ErrorCode Get(RowReader* row_reader); - - /// 设置提交回调, 提交操作会异步返回 - virtual void SetCommitCallback(Callback callback); - /// 获取提交回调 - virtual Callback GetCommitCallback(); - - /// 设置用户上下文,可在回调函数中获取 - virtual void SetContext(void* context); - /// 获取用户上下文 - virtual void* GetContext(); - - /// 获得结果错误码 - virtual const ErrorCode& GetError(); - - /// 提交事务 - virtual ErrorCode Commit(); - - virtual int64_t GetStartTimestamp() { return start_timestamp_; } - - virtual int64_t GetCommitTimestamp() { return commit_timestamp_; } - - virtual void Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - virtual void Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier); - - // not support - virtual void SetIsolation(const IsolationLevel& isolation_level) { abort(); } - - // use default isolation level snapshot - virtual IsolationLevel Isolation() { return IsolationLevel::kSnapshot; } - - virtual void SetTimeout(int64_t timeout_ms) { - mutation_buffer_.SetTimeOut(timeout_ms); - } - -public: - /// 内部读操作回调 - void ReadCallback(RowReaderImpl* reader_impl); - /// 内部提交回调 - void CommitCallback(RowMutationImpl* mu_impl); - /// 序列化 - void Serialize(RowMutationSequence* mu_seq); - -private: - // prevent users from reading more than once in one single-row-txn - bool MarkHasRead(); - - void MarkNoRead(); - - void InternalNotify(); -private: - std::shared_ptr table_impl_; - const std::string row_key_; - common::ThreadPool* thread_pool_; - - bool has_read_; - RowReader::Callback user_reader_callback_; - void* user_reader_context_; - RowReader::ReadColumnList read_column_list_; - // columnfamily qualifier timestamp value - typedef std::map> > ReadResult; - ReadResult read_result_; - uint32_t reader_max_versions_; - int64_t reader_start_timestamp_; - int64_t reader_end_timestamp_; - - int64_t start_timestamp_; - int64_t commit_timestamp_; - - int64_t ttl_timestamp_ms_; - - RowMutationImpl mutation_buffer_; - Callback user_commit_callback_; - void* user_commit_context_; - - std::vector notify_cells_; - - mutable Mutex mu_; + public: + SingleRowTxn(std::shared_ptr table_impl, const std::string& row_key, + common::ThreadPool* thread_pool); + virtual ~SingleRowTxn(); + + /// 提交一个修改操作 + virtual void ApplyMutation(RowMutation* row_mu); + /// 读取操作 + virtual ErrorCode Get(RowReader* row_reader); + + /// 设置提交回调, 提交操作会异步返回 + virtual void SetCommitCallback(Callback callback); + /// 获取提交回调 + virtual Callback GetCommitCallback(); + + /// 设置用户上下文,可在回调函数中获取 + virtual void SetContext(void* context); + /// 获取用户上下文 + virtual void* GetContext(); + + /// 获得结果错误码 + virtual const ErrorCode& GetError(); + + /// 提交事务 + virtual ErrorCode Commit(); + + virtual int64_t GetStartTimestamp() { return start_timestamp_; } + + virtual int64_t GetCommitTimestamp() { return commit_timestamp_; } + + virtual void Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + virtual void Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier); + + // not support + virtual void SetIsolation(const IsolationLevel& isolation_level) { abort(); } + + // use default isolation level snapshot + virtual IsolationLevel Isolation() { return IsolationLevel::kSnapshot; } + + virtual void SetTimeout(int64_t timeout_ms) { mutation_buffer_.SetTimeOut(timeout_ms); } + + public: + /// 内部读操作回调 + void ReadCallback(RowReaderImpl* reader_impl); + /// 内部提交回调 + void CommitCallback(RowMutationImpl* mu_impl); + /// 序列化 + void Serialize(RowMutationSequence* mu_seq); + + private: + // prevent users from reading more than once in one single-row-txn + bool MarkHasRead(); + + void MarkNoRead(); + + void InternalNotify(); + + private: + std::shared_ptr table_impl_; + const std::string row_key_; + common::ThreadPool* thread_pool_; + + bool has_read_; + RowReader::Callback user_reader_callback_; + void* user_reader_context_; + RowReader::ReadColumnList read_column_list_; + // columnfamily qualifier timestamp value + typedef std::map>> ReadResult; + ReadResult read_result_; + uint32_t reader_max_versions_; + int64_t reader_start_timestamp_; + int64_t reader_end_timestamp_; + + int64_t start_timestamp_; + int64_t commit_timestamp_; + + int64_t ttl_timestamp_ms_; + + RowMutationImpl mutation_buffer_; + Callback user_commit_callback_; + void* user_commit_context_; + + std::vector notify_cells_; + + mutable Mutex mu_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_SINGLE_ROW_TXN_H_ diff --git a/src/sdk/stat_table.cc b/src/sdk/stat_table.cc index 14f944b3d..4c2519671 100644 --- a/src/sdk/stat_table.cc +++ b/src/sdk/stat_table.cc @@ -10,185 +10,223 @@ DECLARE_bool(tera_stat_table_enabled); DECLARE_int64(tera_stat_table_ttl); DECLARE_int64(tera_stat_table_splitsize); +DECLARE_string(tera_auth_policy); namespace tera { namespace sdk { -StatTable::StatTable(ThreadPool* thread_pool, - const StatTableCustomer& c, - const std::string& local_addr) +StatTable::StatTable(ThreadPool* thread_pool, std::shared_ptr access_builder, + const StatTableCustomer& c, const std::string& local_addr) : created_(false), opened_(false), local_addr_(local_addr), customer_type_(c), - thread_pool_(thread_pool) { } - -void StatTable::SelectTabletsFailMessages(const std::string& ts_addr, - const std::string& tablet, - int64_t start_ts, - int64_t end_ts) { - ErrorCode error_code; - - tera::ScanDescriptor scan_desc("!"); - scan_desc.SetEnd("\""); - scan_desc.AddColumn("tsinfo", "corrupt"); - scan_desc.SetMaxVersions(1); - scan_desc.SetTimeRange(end_ts, start_ts); - - tera::TPrinter printer; - tera::TPrinter::PrintOpt printer_opt; - printer_opt.print_head = true; - int cols = 6; - int row_cnt = 0; + thread_pool_(thread_pool), + access_builder_(access_builder) { + if (!access_builder_) { + access_builder_.reset(new auth::AccessBuilder(FLAGS_tera_auth_policy)); + access_builder_->Login(auth::kInternalGroup, "", nullptr); + } +} + +void StatTable::SelectTabletsFailMessages(const std::vector& filters, bool is_detail) { + CorruptPhase phase = CorruptPhase::kUnknown; + if (filters[0] == "Load") { + phase = CorruptPhase::kLoading; + } else if (filters[0] == "Comp") { + phase = CorruptPhase::kCompacting; + } + const std::string& time_range = filters[4]; + int64_t start_ts = kOldestTs, end_ts = kLatestTs; + std::size_t found = time_range.find(","); + if (found != std::string::npos) { + start_ts = get_timestamp_from_str(time_range.substr(0, found)); + end_ts = get_timestamp_from_str(time_range.substr(found + 1, time_range.size() - 1)); + if (start_ts != 0 && end_ts != 0) { + start_ts *= 1000000; + end_ts *= 1000000; + } + } + SelectTabletsFailMessages(phase, filters[1], filters[2], filters[3], start_ts, end_ts, is_detail); +} + +void StatTable::SelectTabletsFailMessages(const CorruptPhase& phase, const std::string& ts_addr, + const std::string& tablename, const std::string& tablet, + int64_t start_ts, int64_t end_ts, bool is_detail) { + ErrorCode error_code; + + tera::ScanDescriptor scan_desc("!"); + scan_desc.SetEnd("\""); + scan_desc.AddColumn("tsinfo", "corrupt"); + scan_desc.SetMaxVersions(1); + scan_desc.SetTimeRange(end_ts, start_ts); + + tera::TPrinter printer; + tera::TPrinter::PrintOpt printer_opt; + printer_opt.print_head = true; + int cols = 6; + int row_cnt = 0; + if (is_detail) { printer.Reset(cols, " ", "tablet", "server_addr", "time", "phase", "detail_msg"); - std::vector row; - tera::ResultStream* scanner = stat_table_->Scan(scan_desc, &error_code); - for (scanner->LookUp("!"); !scanner->Done(); scanner->Next()) { - tera::TabletCorruptMessage corrupt_msg; - DeserializeCorrupt(scanner->Value(), &corrupt_msg); - int64_t record_time = scanner->Timestamp() / 1000 / 1000; - if ((ts_addr != "" && corrupt_msg.tabletnode() != ts_addr) || - (tablet != "" && corrupt_msg.tablet() != tablet)) { - continue; - } - std::string corrupt_phase = - static_cast(corrupt_msg.corrupt_phase()) == CorruptPhase::kLoading ? "Load" : "Comp"; - row.clear(); - row.push_back(std::to_string(row_cnt++)); - row.push_back(corrupt_msg.tablet()); - row.push_back(corrupt_msg.tabletnode()); - row.push_back(get_time_str(record_time)); - row.push_back(corrupt_phase); - row.push_back(corrupt_msg.detail_message()); - printer.AddRow(row); + } + std::vector row; + tera::ResultStream* scanner = stat_table_->Scan(scan_desc, &error_code); + for (scanner->LookUp("!"); !scanner->Done(); scanner->Next()) { + tera::TabletCorruptMessage corrupt_msg; + DeserializeCorrupt(scanner->Value(), &corrupt_msg); + int64_t record_time = scanner->Timestamp() / 1000 / 1000; + if ((ts_addr != "" && corrupt_msg.tabletnode() != ts_addr) || + (tablet != "" && corrupt_msg.tablet() != tablet) || + (tablename != "" && corrupt_msg.tablet().find(tablename + "/") == std::string::npos)) { + continue; + } + if (phase != CorruptPhase::kUnknown && + static_cast(corrupt_msg.corrupt_phase()) != phase) { + continue; + } + std::string corrupt_phase = + static_cast(corrupt_msg.corrupt_phase()) == CorruptPhase::kLoading ? "Load" + : "Comp"; + row.clear(); + row.push_back(std::to_string(row_cnt++)); + row.push_back(corrupt_msg.tablet()); + row.push_back(corrupt_msg.tabletnode()); + row.push_back(get_time_str(record_time)); + row.push_back(corrupt_phase); + row.push_back(corrupt_msg.detail_message()); + if (is_detail) { + printer.AddRow(row); } + } + if (is_detail) { printer.Print(printer_opt); - delete scanner; + } else { + std::cout << "corruption tablet count :" << row_cnt << std::endl; + } + delete scanner; } -void StatTable::RecordTabletCorrupt(const std::string& tablet, - const std::string& corrupt_msg) { - if (!opened_) { - LOG(WARNING) << "stat_table not opened"; - return; - } - std::string key = "!" + tablet; - RowMutation* mutation = stat_table_->NewRowMutation(key); - mutation->Put("tsinfo", "corrupt", corrupt_msg); - mutation->SetCallBack(&RecordStatTableCallBack); - stat_table_->ApplyMutation(mutation); +void StatTable::RecordTabletCorrupt(const std::string& tablet, const std::string& corrupt_msg) { + if (!opened_) { + LOG(WARNING) << "stat_table not opened"; + return; + } + std::string key = "!" + tablet; + RowMutation* mutation = stat_table_->NewRowMutation(key); + mutation->Put("tsinfo", "corrupt", corrupt_msg); + mutation->SetCallBack(&RecordStatTableCallBack); + stat_table_->ApplyMutation(mutation); } void StatTable::ErasureTabletCorrupt(const std::string& tablet) { - if (!opened_) { - LOG(WARNING) << "stat_table not opened"; - return; - } - std::string key = "!" + tablet; - RowMutation* mutation = stat_table_->NewRowMutation(key); - mutation->DeleteRow(-1); - mutation->SetCallBack(&RecordStatTableCallBack); - stat_table_->ApplyMutation(mutation); + if (!opened_) { + LOG(WARNING) << "stat_table not opened"; + return; + } + std::string key = "!" + tablet; + RowMutation* mutation = stat_table_->NewRowMutation(key); + mutation->DeleteRow(-1); + mutation->SetCallBack(&RecordStatTableCallBack); + stat_table_->ApplyMutation(mutation); } std::string StatTable::SerializeLoadContext(const LoadTabletRequest& request, const std::string& tabletnode_session_id) { - tera::TabletLoadContext load_ctx; - std::string load_context_str; - LoadTabletRequest* req = load_ctx.mutable_load_request(); - req->CopyFrom(request); - load_ctx.set_tabletnode_session_id(tabletnode_session_id); - load_ctx.SerializeToString(&load_context_str); - return load_context_str; + tera::TabletLoadContext load_ctx; + std::string load_context_str; + LoadTabletRequest* req = load_ctx.mutable_load_request(); + req->CopyFrom(request); + load_ctx.set_tabletnode_session_id(tabletnode_session_id); + load_ctx.SerializeToString(&load_context_str); + return load_context_str; } - -std::string StatTable::SerializeCorrupt(CorruptPhase phase, - const std::string& tabletnode, + +std::string StatTable::SerializeCorrupt(CorruptPhase phase, const std::string& tabletnode, const std::string& tablet, const std::string& load_context_str, const std::string& msg) { - tera::TabletCorruptMessage corrupt_msg; - std::string corrupt_msg_str; - corrupt_msg.set_tablet(tablet); - corrupt_msg.set_tabletnode(tabletnode); - corrupt_msg.set_corrupt_phase(static_cast(phase)); - corrupt_msg.set_corrupt_type(0); - corrupt_msg.set_locality_group(""); - corrupt_msg.set_detail_message(msg); - corrupt_msg.set_load_context(load_context_str); - corrupt_msg.SerializeToString(&corrupt_msg_str); - return corrupt_msg_str; + tera::TabletCorruptMessage corrupt_msg; + std::string corrupt_msg_str; + corrupt_msg.set_tablet(tablet); + corrupt_msg.set_tabletnode(tabletnode); + corrupt_msg.set_corrupt_phase(static_cast(phase)); + corrupt_msg.set_corrupt_type(0); + corrupt_msg.set_locality_group(""); + corrupt_msg.set_detail_message(msg); + corrupt_msg.set_load_context(load_context_str); + corrupt_msg.SerializeToString(&corrupt_msg_str); + return corrupt_msg_str; } void StatTable::DeserializeCorrupt(const std::string& corrupt_msg_str, - tera::TabletCorruptMessage* corrupt_msg) { - corrupt_msg->ParseFromString(corrupt_msg_str); + tera::TabletCorruptMessage* corrupt_msg) { + corrupt_msg->ParseFromString(corrupt_msg_str); } bool StatTable::CreateStatTable() { - master::MasterClient master_client(local_addr_); - CreateTableRequest request; - CreateTableResponse response; - request.set_sequence_id(0); - request.set_table_name(kStatTableName); - TableSchema* schema = request.mutable_schema(); - schema->set_name(kStatTableName); - schema->set_raw_key(Binary); - schema->set_split_size(FLAGS_tera_stat_table_splitsize); - LocalityGroupSchema* lg = schema->add_locality_groups(); - lg->set_name("lg0"); - lg->set_store_type(FlashStore); - lg->set_id(0); - ColumnFamilySchema* cf = schema->add_column_families(); - cf->set_name("tsinfo"); - cf->set_time_to_live(FLAGS_tera_stat_table_ttl); - cf->set_locality_group("lg0"); - master_client.CreateTable(&request, &response); - switch (response.status()) { - case kMasterOk: - return true; - case kTableExist: - return true; - default: - return false; - } + master::MasterClient master_client(local_addr_); + CreateTableRequest request; + CreateTableResponse response; + request.set_sequence_id(0); + request.set_table_name(kStatTableName); + access_builder_->BuildInternalGroupRequest(&request); + TableSchema* schema = request.mutable_schema(); + schema->set_name(kStatTableName); + schema->set_raw_key(Binary); + schema->set_split_size(FLAGS_tera_stat_table_splitsize); + LocalityGroupSchema* lg = schema->add_locality_groups(); + lg->set_name("lg0"); + lg->set_store_type(FlashStore); + lg->set_id(0); + ColumnFamilySchema* cf = schema->add_column_families(); + cf->set_name("tsinfo"); + cf->set_time_to_live(FLAGS_tera_stat_table_ttl); + cf->set_locality_group("lg0"); + master_client.CreateTable(&request, &response); + switch (response.status()) { + case kMasterOk: + return true; + case kTableExist: + return true; + default: + return false; + } } bool StatTable::OpenStatTable() { - MutexLock locker(&mutex_); - // ts will not access stat_table - if (customer_type_ == StatTableCustomer::kTabletNode || opened_) { - return true; + MutexLock locker(&mutex_); + // ts will not access stat_table + if (customer_type_ == StatTableCustomer::kTabletNode || opened_) { + return true; + } + if (customer_type_ == StatTableCustomer::kMaster && !created_) { + created_ = CreateStatTable(); + if (!created_) { + return false; } - if (customer_type_ == StatTableCustomer::kMaster && !created_) { - created_ = CreateStatTable(); - if (!created_) { - return false; - } - } - ErrorCode err; - stat_table_.reset(new TableImpl(kStatTableName, thread_pool_, std::shared_ptr())); - if (stat_table_->OpenInternal(&err)) { - opened_ = true; - return true; - } else { - opened_ = false; - stat_table_.reset(); - LOG(ERROR) << "fail to open stat_table."; - } - return false; + } + ErrorCode err; + stat_table_.reset(new TableImpl(kStatTableName, thread_pool_, std::shared_ptr())); + if (stat_table_->OpenInternal({}, &err)) { + opened_ = true; + return true; + } else { + opened_ = false; + stat_table_.reset(); + LOG(ERROR) << "fail to open stat_table."; + } + return false; } - + void StatTable::RecordStatTableCallBack(RowMutation* mutation) { - const ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != ErrorCode::kOK) { - LOG(WARNING) << "dump stat exception occured, reason:" - << error_code.GetReason(); - } else { - LOG(INFO) << "dump stat success:" << mutation->RowKey(); - } - delete mutation; + const ErrorCode& error_code = mutation->GetError(); + if (error_code.GetType() != ErrorCode::kOK) { + LOG(WARNING) << "dump stat exception occured, reason:" << error_code.GetReason(); + } else { + LOG(INFO) << "dump stat success:" << mutation->RowKey(); + } + delete mutation; } -} // namespace sdk -} // namespace tera +} // namespace sdk +} // namespace tera diff --git a/src/sdk/stat_table.h b/src/sdk/stat_table.h index 545e028b4..43a7b62c5 100644 --- a/src/sdk/stat_table.h +++ b/src/sdk/stat_table.h @@ -20,66 +20,68 @@ namespace tera { namespace sdk { enum class StatTableCustomer { - kMaster = 0, - kTabletNode = 1, - kClient = 2, + kMaster = 0, + kTabletNode = 1, + kClient = 2, }; enum class CorruptPhase { - kLoading = 0, - kCompacting = 1, + kLoading = 0, + kCompacting = 1, + kUnknown = 10, }; class StatTable { -public: - enum class CorruptType { - kUnknown = 0, - kSst = 1, - kCurrent = 2, - kManifest = 3, - kLoadlock = 4, - }; - // master and ts need set custmer explicit - StatTable(ThreadPool* thread_pool, - const StatTableCustomer& c = StatTableCustomer::kClient, - const std::string& local_addr = ""); - // default select all fail msg - // set args to limit ts/tablet/timerange - void SelectTabletsFailMessages(const std::string& ts_addr = "", - const std::string& tablet = "", - int64_t start_ts = kOldestTs, - int64_t end_ts = kLatestTs); - // record by tabletserver - void RecordTabletCorrupt(const std::string& tablet, - const std::string& fail_msg); - - void ErasureTabletCorrupt(const std::string& tablet); - - static std::string SerializeLoadContext(const LoadTabletRequest& request, - const std::string& tabletnode_session_id); + public: + enum class CorruptType { + kUnknown = 0, + kSst = 1, + kCurrent = 2, + kManifest = 3, + kLoadlock = 4, + }; + // master and ts need set custmer explicit + StatTable(ThreadPool* thread_pool, std::shared_ptr access_builder, + const StatTableCustomer& c = StatTableCustomer::kClient, + const std::string& local_addr = ""); - static std::string SerializeCorrupt(CorruptPhase phase, - const std::string& tabletnode, - const std::string& tablet, - const std::string& context_str, - const std::string& msg); + void SelectTabletsFailMessages(const std::vector& filters, bool is_detail); - void DeserializeCorrupt(const string& corrupt_str, - tera::TabletCorruptMessage* corrupt_msg); - - bool OpenStatTable(); - -private: - bool CreateStatTable(); - static void RecordStatTableCallBack(RowMutation* mutation); -private: - std::shared_ptr stat_table_; - std::atomic created_; - std::atomic opened_; - std::string local_addr_; - StatTableCustomer customer_type_; - mutable Mutex mutex_; - ThreadPool* thread_pool_; + // default select all fail msg + // set args to limit ts/tablet/timerange + void SelectTabletsFailMessages(const CorruptPhase& phase = CorruptPhase::kUnknown, + const std::string& ts_addr = "", const std::string& tablename = "", + const std::string& tablet = "", int64_t start_ts = kOldestTs, + int64_t end_ts = kLatestTs, bool is_detail = false); + // record by tabletserver + void RecordTabletCorrupt(const std::string& tablet, const std::string& fail_msg); + + void ErasureTabletCorrupt(const std::string& tablet); + + static std::string SerializeLoadContext(const LoadTabletRequest& request, + const std::string& tabletnode_session_id); + + static std::string SerializeCorrupt(CorruptPhase phase, const std::string& tabletnode, + const std::string& tablet, const std::string& context_str, + const std::string& msg); + + void DeserializeCorrupt(const string& corrupt_str, tera::TabletCorruptMessage* corrupt_msg); + + bool OpenStatTable(); + + private: + bool CreateStatTable(); + static void RecordStatTableCallBack(RowMutation* mutation); + + private: + std::shared_ptr stat_table_; + std::atomic created_; + std::atomic opened_; + std::string local_addr_; + StatTableCustomer customer_type_; + mutable Mutex mutex_; + ThreadPool* thread_pool_; + std::shared_ptr access_builder_; }; } // namespace sdk } // namespace tera diff --git a/src/sdk/table_impl.cc b/src/sdk/table_impl.cc index 0e680964b..6c117d799 100644 --- a/src/sdk/table_impl.cc +++ b/src/sdk/table_impl.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -24,6 +25,7 @@ #include "proto/kv_helper.h" #include "proto/proto_helper.h" #include "proto/tabletnode_client.h" +#include "sdk/batch_mutation_impl.h" #include "sdk/cookie.h" #include "sdk/mutate_impl.h" #include "sdk/read_impl.h" @@ -36,10 +38,12 @@ #include "utils/string_util.h" #include "common/timer.h" #include "sdk/transaction_wrapper.h" +#include "common/event.h" +#include "common/this_thread.h" DECLARE_string(tera_master_meta_table_name); DECLARE_int32(tera_sdk_delay_send_internal); -DECLARE_int32(tera_sdk_retry_times); +DECLARE_int32(tera_sdk_meta_read_retry_times); DECLARE_int32(tera_sdk_retry_period); DECLARE_int32(tera_sdk_update_meta_internal); DECLARE_bool(tera_sdk_write_sync); @@ -49,7 +53,8 @@ DECLARE_int32(tera_sdk_read_send_interval); DECLARE_int64(tera_sdk_max_mutation_pending_num); DECLARE_int64(tera_sdk_max_reader_pending_num); DECLARE_bool(tera_sdk_async_blocking_enabled); -DECLARE_int32(tera_sdk_timeout); +DECLARE_int32(tera_sdk_read_timeout); +DECLARE_int32(tera_sdk_write_timeout); DECLARE_int32(tera_sdk_scan_buffer_limit); DECLARE_int32(tera_sdk_update_meta_concurrency); DECLARE_int32(tera_sdk_update_meta_buffer_limit); @@ -59,18 +64,21 @@ DECLARE_int32(tera_sdk_cookie_update_interval); DECLARE_bool(tera_sdk_perf_counter_enabled); DECLARE_int64(tera_sdk_perf_counter_log_interval); DECLARE_int32(tera_rpc_timeout_period); +DECLARE_string(tera_auth_policy); +DECLARE_int32(tera_sdk_get_tablet_retry_times); +DECLARE_int32(tera_sdk_update_meta_rpc_timeout_max_ms); using namespace std::placeholders; namespace tera { -TableImpl::TableImpl(const std::string& table_name, - common::ThreadPool* thread_pool, +TableImpl::TableImpl(const std::string& table_name, common::ThreadPool* thread_pool, std::shared_ptr client_impl) : name_(table_name), create_time_(0), last_sequence_id_(0), - timeout_(FLAGS_tera_sdk_timeout), + write_timeout_(FLAGS_tera_sdk_write_timeout), + read_timeout_(FLAGS_tera_sdk_read_timeout), client_impl_(client_impl), commit_size_(FLAGS_tera_sdk_batch_size), write_commit_timeout_(FLAGS_tera_sdk_write_send_interval), @@ -86,2085 +94,2256 @@ TableImpl::TableImpl(const std::string& table_name, tabletnode_client_(NULL), thread_pool_(thread_pool), cluster_(NULL), - cluster_private_(false), - pending_timeout_ms_(FLAGS_tera_rpc_timeout_period) { - if (client_impl) { - cluster_ = client_impl->GetClusterFinder(); - } - if (cluster_ == NULL) { - cluster_ = sdk::NewClusterFinder(); - cluster_private_ = true; - } + cluster_private_(false) { + hash_method_ = [](const std::string& key) { return MurmurHash(key) + key; }; + if (client_impl) { + cluster_ = client_impl->GetClusterFinder(); + access_builder_ = client_impl->GetAccessBuilder(); + } else { + cluster_ = sdk::NewClusterFinder(); + cluster_private_ = true; + access_builder_.reset(new auth::AccessBuilder(FLAGS_tera_auth_policy)); + access_builder_->Login(auth::kInternalGroup, "", nullptr); + } } TableImpl::~TableImpl() { - ClearDelayTask(); - if (FLAGS_tera_sdk_cookie_enabled) { - DoDumpCookie(); - } - if (cluster_private_) { - delete cluster_; - } + ClearDelayTask(); + if (FLAGS_tera_sdk_cookie_enabled) { + DoDumpCookie(); + } + if (cluster_private_) { + delete cluster_; + } } RowMutation* TableImpl::NewRowMutation(const std::string& row_key) { - RowMutationImpl* row_mu = new RowMutationImpl(this, row_key); - return row_mu; + RowMutationImpl* row_mu = new RowMutationImpl(this, row_key); + return row_mu; } +BatchMutation* TableImpl::NewBatchMutation() { return new BatchMutationImpl(this); } + RowReader* TableImpl::NewRowReader(const std::string& row_key) { - RowReaderImpl* row_rd = new RowReaderImpl(this, row_key); - return row_rd; + RowReaderImpl* row_rd = new RowReaderImpl(this, row_key); + return row_rd; } -void TableImpl::Put(RowMutation* row_mu) { - ApplyMutation(row_mu); -} +void TableImpl::Put(RowMutation* row_mu) { ApplyMutation(row_mu); } void TableImpl::Put(const std::vector& row_mutations) { - ApplyMutation(row_mutations); + ApplyMutation(row_mutations); } void OpStatCallback(Table* table, SdkTask* task) { - if (task->Type() == SdkTask::MUTATION) { - ((TableImpl*)table)->StatUserPerfCounter(task->Type(), - ((RowMutationImpl*)task)->GetError().GetType(), - get_micros() - ((RowMutationImpl*)task)->GetStartTime()); - } else if (task->Type() == SdkTask::READ) { - ((TableImpl*)table)->StatUserPerfCounter(task->Type(), - ((RowReaderImpl*)task)->GetError().GetType(), - get_micros() - ((RowReaderImpl*)task)->GetStartTime()); - } + if (task->Type() == SdkTask::MUTATION) { + ((TableImpl*)table) + ->StatUserPerfCounter(task->Type(), ((RowMutationImpl*)task)->GetError().GetType(), + get_micros() - ((RowMutationImpl*)task)->GetStartTime()); + } else if (task->Type() == SdkTask::READ) { + ((TableImpl*)table) + ->StatUserPerfCounter(task->Type(), ((RowReaderImpl*)task)->GetError().GetType(), + get_micros() - ((RowReaderImpl*)task)->GetStartTime()); + } else if (task->Type() == SdkTask::BATCH_MUTATION) { + ((TableImpl*)table) + ->StatUserPerfCounter(task->Type(), ((BatchMutationImpl*)task)->GetError().GetType(), + get_micros() - ((BatchMutationImpl*)task)->GetStartTime()); + } } void TableImpl::ApplyMutation(RowMutation* row_mu) { - perf_counter_.user_mu_cnt.Add(1); - ((RowMutationImpl*)row_mu)->Prepare(OpStatCallback); - if (row_mu->GetError().GetType() != ErrorCode::kOK) { // local check fail - if (!((RowMutationImpl*)row_mu)->IsAsync()) { - ((RowMutationImpl*)row_mu)->RunCallback(); - return; - } - ThreadPool::Task task = - std::bind(&RowMutationImpl::RunCallback, - static_cast(row_mu)); - thread_pool_->AddTask(task); - return; + perf_counter_.user_mu_cnt.Add(1); + ((RowMutationImpl*)row_mu)->Prepare(OpStatCallback); + if (row_mu->GetError().GetType() != ErrorCode::kOK) { // local check fail + if (!((RowMutationImpl*)row_mu)->IsAsync()) { + ((RowMutationImpl*)row_mu)->RunCallback(); + return; } - std::vector task_list; - task_list.push_back(static_cast((RowMutationImpl*)row_mu)); - int64_t ts = get_micros(); - DistributeTasks(task_list, true, SdkTask::MUTATION); - perf_counter_.hist_async_cost.Add(get_micros() - ts); + ThreadPool::Task task = + std::bind(&RowMutationImpl::RunCallback, static_cast(row_mu)); + thread_pool_->AddTask(task); + return; + } + std::vector task_list; + task_list.push_back(static_cast((RowMutationImpl*)row_mu)); + int64_t ts = get_micros(); + DistributeTasks(task_list, true, SdkTask::MUTATION); + perf_counter_.hist_async_cost.Add(get_micros() - ts); +} + +void TableImpl::ApplyMutation(BatchMutation* batch_mu) { + perf_counter_.user_mu_cnt.Add(1); + BatchMutationImpl* batch_mu_impl = static_cast(batch_mu); + batch_mu_impl->Prepare(OpStatCallback); + if (batch_mu->GetError().GetType() != ErrorCode::kOK) { + if (!batch_mu_impl->IsAsync()) { + batch_mu_impl->RunCallback(); + return; + } + ThreadPool::Task task = std::bind(&BatchMutationImpl::RunCallback, batch_mu_impl); + thread_pool_->AddTask(task); + return; + } + std::vector task_list; + task_list.push_back(static_cast(batch_mu_impl)); + int64_t ts = get_micros(); + DistributeTasks(task_list, true, SdkTask::BATCH_MUTATION); + perf_counter_.hist_async_cost.Add(get_micros() - ts); } void TableImpl::ApplyMutation(const std::vector& row_mutations) { - std::vector task_list; - for (uint32_t i = 0; i < row_mutations.size(); i++) { - perf_counter_.user_mu_cnt.Add(1); - ((RowMutationImpl*)row_mutations[i])->Prepare(OpStatCallback); - if (row_mutations[i]->GetError().GetType() != ErrorCode::kOK) { // local check fail - if (!((RowMutationImpl*)row_mutations[i])->IsAsync()) { - ((RowMutationImpl*)row_mutations[i])->RunCallback(); - continue; - } - ThreadPool::Task task = - std::bind(&RowMutationImpl::RunCallback, - static_cast(row_mutations[i])); - thread_pool_->AddTask(task); - continue; - } - task_list.push_back(static_cast((RowMutationImpl*)row_mutations[i])); - } - int64_t ts = get_micros(); - DistributeTasks(task_list, true, SdkTask::MUTATION); - perf_counter_.hist_async_cost.Add(get_micros() - ts); + std::vector task_list; + for (uint32_t i = 0; i < row_mutations.size(); i++) { + perf_counter_.user_mu_cnt.Add(1); + ((RowMutationImpl*)row_mutations[i])->Prepare(OpStatCallback); + if (row_mutations[i]->GetError().GetType() != ErrorCode::kOK) { // local check fail + if (!((RowMutationImpl*)row_mutations[i])->IsAsync()) { + ((RowMutationImpl*)row_mutations[i])->RunCallback(); + continue; + } + ThreadPool::Task task = + std::bind(&RowMutationImpl::RunCallback, static_cast(row_mutations[i])); + thread_pool_->AddTask(task); + continue; + } + task_list.push_back(static_cast((RowMutationImpl*)row_mutations[i])); + } + int64_t ts = get_micros(); + DistributeTasks(task_list, true, SdkTask::MUTATION); + perf_counter_.hist_async_cost.Add(get_micros() - ts); } bool TableImpl::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const int64_t value, - ErrorCode* err) { - std::string value_str((char*)&value, sizeof(int64_t)); - return Put(row_key, family, qualifier, value_str, err); + const std::string& qualifier, const int64_t value, ErrorCode* err) { + std::string value_str((char*)&value, sizeof(int64_t)); + return Put(row_key, family, qualifier, value_str, err); } bool TableImpl::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Put(family, qualifier, value); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, const std::string& value, ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Put(family, qualifier, value); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int64_t timestamp, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Put(family, qualifier, timestamp, value); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, const std::string& value, int64_t timestamp, + ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Put(family, qualifier, timestamp, value); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int32_t ttl, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Put(family, qualifier, value, ttl); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, const std::string& value, int32_t ttl, + ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Put(family, qualifier, value, ttl); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int64_t timestamp, int32_t ttl, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Put(family, qualifier, timestamp, value, ttl); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, const std::string& value, int64_t timestamp, + int32_t ttl, ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Put(family, qualifier, timestamp, value, ttl); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::Add(const std::string& row_key, const std::string& family, const std::string& qualifier, int64_t delta, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Add(family, qualifier, delta); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Add(family, qualifier, delta); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::AddInt64(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t delta, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->AddInt64(family, qualifier, delta); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, int64_t delta, ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->AddInt64(family, qualifier, delta); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::PutIfAbsent(const std::string& row_key, const std::string& family, const std::string& qualifier, const std::string& value, ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->PutIfAbsent(family, qualifier, value); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->PutIfAbsent(family, qualifier, value); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } bool TableImpl::Append(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - RowMutation* row_mu = NewRowMutation(row_key); - row_mu->Append(family, qualifier, value); - ApplyMutation(row_mu); - *err = row_mu->GetError(); - delete row_mu; - return (err->GetType() == ErrorCode::kOK ? true : false); + const std::string& qualifier, const std::string& value, ErrorCode* err) { + RowMutation* row_mu = NewRowMutation(row_key); + row_mu->Append(family, qualifier, value); + ApplyMutation(row_mu); + *err = row_mu->GetError(); + delete row_mu; + return (err->GetType() == ErrorCode::kOK ? true : false); } -bool TableImpl::Flush() { - return false; -} +bool TableImpl::Flush() { return false; } bool TableImpl::CheckAndApply(const std::string& rowkey, const std::string& cf_c, - const std::string& value, const RowMutation& row_mu, - ErrorCode* err) { - err->SetFailed(ErrorCode::kNotImpl); - return false; + const std::string& value, const RowMutation& row_mu, ErrorCode* err) { + err->SetFailed(ErrorCode::kNotImpl); + return false; } -int64_t TableImpl::IncrementColumnValue(const std::string& row, - const std::string& family, - const std::string& qualifier, - int64_t amount, ErrorCode* err) { - err->SetFailed(ErrorCode::kNotImpl); - return 0L; +int64_t TableImpl::IncrementColumnValue(const std::string& row, const std::string& family, + const std::string& qualifier, int64_t amount, + ErrorCode* err) { + err->SetFailed(ErrorCode::kNotImpl); + return 0L; } -void TableImpl::SetWriteTimeout(int64_t timeout_ms) { -} +void TableImpl::SetWriteTimeout(int64_t timeout_ms) {} void TableImpl::Get(RowReader* row_reader) { - perf_counter_.user_read_cnt.Add(1); - ((RowReaderImpl*)row_reader)->Prepare(OpStatCallback); - std::vector row_reader_list; - row_reader_list.push_back(static_cast(row_reader)); - DistributeReaders(row_reader_list, true); + perf_counter_.user_read_cnt.Add(1); + ((RowReaderImpl*)row_reader)->Prepare(OpStatCallback); + std::vector row_reader_list; + row_reader_list.push_back(static_cast(row_reader)); + DistributeReaders(row_reader_list, true); } void TableImpl::Get(const std::vector& row_readers) { - std::vector row_reader_list(row_readers.size()); - for (uint32_t i = 0; i < row_readers.size(); ++i) { - perf_counter_.user_read_cnt.Add(1); - ((RowReaderImpl*)row_readers[i])->Prepare(OpStatCallback); - row_reader_list[i] = static_cast(row_readers[i]); - } - DistributeReaders(row_reader_list, true); + std::vector row_reader_list(row_readers.size()); + for (uint32_t i = 0; i < row_readers.size(); ++i) { + perf_counter_.user_read_cnt.Add(1); + ((RowReaderImpl*)row_readers[i])->Prepare(OpStatCallback); + row_reader_list[i] = static_cast(row_readers[i]); + } + DistributeReaders(row_reader_list, true); } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err) { - return Get(row_key, family, qualifier, value, 0, err); + const std::string& qualifier, int64_t* value, ErrorCode* err) { + return Get(row_key, family, qualifier, value, 0, err); } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err) { - return Get(row_key, family, qualifier, value, 0, err); + const std::string& qualifier, std::string* value, ErrorCode* err) { + return Get(row_key, family, qualifier, value, 0, err); } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err, uint64_t snapshot_id) { - return Get(row_key, family, qualifier, value, snapshot_id, err); + const std::string& qualifier, int64_t* value, ErrorCode* err, + uint64_t snapshot_id) { + return Get(row_key, family, qualifier, value, snapshot_id, err); } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - uint64_t snapshot_id, ErrorCode* err) { - std::string value_str; - if (Get(row_key, family, qualifier, &value_str, err, snapshot_id) - && value_str.size() == sizeof(int64_t)) { - *value = *(int64_t*)value_str.c_str(); - return true; - } - return false; + const std::string& qualifier, int64_t* value, uint64_t snapshot_id, + ErrorCode* err) { + std::string value_str; + if (Get(row_key, family, qualifier, &value_str, err, snapshot_id) && + value_str.size() == sizeof(int64_t)) { + *value = *(int64_t*)value_str.c_str(); + return true; + } + return false; } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err, uint64_t snapshot_id) { - return Get(row_key, family, qualifier, value, snapshot_id, err); + const std::string& qualifier, std::string* value, ErrorCode* err, + uint64_t snapshot_id) { + return Get(row_key, family, qualifier, value, snapshot_id, err); } bool TableImpl::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - uint64_t snapshot_id, ErrorCode* err) { - RowReader* row_reader = NewRowReader(row_key); - row_reader->AddColumn(family, qualifier); - row_reader->SetSnapshot(snapshot_id); - Get(row_reader); - *err = row_reader->GetError(); - if (err->GetType() == ErrorCode::kOK) { - *value = row_reader->Value(); - delete row_reader; - return true; - } + const std::string& qualifier, std::string* value, uint64_t snapshot_id, + ErrorCode* err) { + RowReader* row_reader = NewRowReader(row_key); + row_reader->AddColumn(family, qualifier); + row_reader->SetSnapshot(snapshot_id); + Get(row_reader); + *err = row_reader->GetError(); + if (err->GetType() == ErrorCode::kOK) { + *value = row_reader->Value(); delete row_reader; - return false; + return true; + } + delete row_reader; + return false; } ResultStream* TableImpl::Scan(const ScanDescriptor& desc, ErrorCode* err) { - ScanDescImpl * impl = desc.GetImpl(); - impl->SetTableSchema(table_schema_); - ResultStream * results = NULL; - if (desc.IsAsync() && - (table_schema_.raw_key() == Binary || table_schema_.raw_key() == Readable)) { - VLOG(6) << "activate async-scan"; - results = new ResultStreamBatchImpl(this, impl); - } else { - VLOG(6) << "activate sync-scan"; - results = new ResultStreamSyncImpl(this, impl); - } - return results; -} - -void TableImpl::ScanTabletSync(ResultStreamSyncImpl* stream) { - ScanTabletAsync(stream); - stream->Wait(); + ScanDescImpl impl(*desc.GetImpl()); + if (impl.GetFilterDesc() && impl.GetMaxVersion() != 1) { + LOG(ERROR) << "when scan with a filter, only support max_version of scanner == 1"; + return NULL; + } + if (IsHashTable()) { + if (!impl.GetStartRowKey().empty()) { + impl.SetStart(hash_method_(impl.GetStartRowKey()), impl.GetStartColumnFamily(), + impl.GetStartQualifier(), impl.GetStartTimeStamp()); + } + if (!impl.GetEndRowKey().empty()) { + impl.SetEnd(hash_method_(impl.GetEndRowKey())); + } + } + impl.SetTableSchema(table_schema_); + ResultStream* results = NULL; + VLOG(6) << "activate async-scan"; + results = new ResultStreamImpl(this, &impl); + return results; } void TableImpl::ScanTabletAsync(ResultStreamImpl* stream) { - ScanTask* scan_task = new ScanTask; - scan_task->stream = stream; - stream->GetRpcHandle(&scan_task->request, &scan_task->response); - ScanTabletAsync(scan_task, true); + ScanTask* scan_task = new ScanTask; + scan_task->stream = stream; + stream->GetRpcHandle(&scan_task->request, &scan_task->response); + ScanTabletAsync(scan_task, true); } void TableImpl::ScanTabletAsync(ScanTask* scan_task, bool called_by_user) { - if (called_by_user) { - scan_task->SetId(next_task_id_.Inc()); - task_pool_.PutTask(scan_task); - } - - const std::string& row_key = scan_task->stream->GetScanDesc()->GetStartRowKey(); - std::string server_addr; - if (GetTabletAddrOrScheduleUpdateMeta(row_key, scan_task, &server_addr)) { - CommitScan(scan_task, server_addr); - } -} - -void TableImpl::CommitScan(ScanTask* scan_task, - const std::string& server_addr) { - tabletnode::TabletNodeClient tabletnode_client(thread_pool_, server_addr); - ResultStreamImpl* stream = scan_task->stream; - ScanTabletRequest* request = scan_task->request; - ScanTabletResponse* response = scan_task->response; - response->Clear(); - - ScanDescImpl* impl = stream->GetScanDesc(); - request->set_sequence_id(last_sequence_id_++); - request->set_table_name(name_); - request->set_start(impl->GetStartRowKey()); - request->set_end(impl->GetEndRowKey()); - request->set_snapshot_id(impl->GetSnapshot()); - request->set_timeout(impl->GetPackInterval()); - if (impl->GetStartColumnFamily() != "") { - request->set_start_family(impl->GetStartColumnFamily()); - } - if (impl->GetStartQualifier() != "") { - request->set_start_qualifier(impl->GetStartQualifier()); - } - if (impl->GetStartTimeStamp() != 0) { - request->set_start_timestamp(impl->GetStartTimeStamp()); - } - if (impl->GetMaxVersion() != 0) { - request->set_max_version(impl->GetMaxVersion()); - } - request->set_max_qualifiers(impl->GetMaxQualifiers()); - if (impl->GetBufferSize() != 0) { - request->set_buffer_limit(impl->GetBufferSize()); - } - if (impl->GetNumberLimit() != 0) { - request->set_number_limit(impl->GetNumberLimit()); - } - if (impl->GetTimerRange() != NULL) { - TimeRange* time_range = request->mutable_timerange(); - time_range->CopyFrom(*(impl->GetTimerRange())); - } - if (impl->GetFilterString().size() > 0) { - FilterList* filter_list = request->mutable_filter_list(); - filter_list->CopyFrom(impl->GetFilterList()); - } - for (int32_t i = 0; i < impl->GetSizeofColumnFamilyList(); ++i) { - tera::ColumnFamily* column_family = request->add_cf_list(); - column_family->CopyFrom(*(impl->GetColumnFamily(i))); - } - - VLOG(20) << "table " << request->table_name() - << ", start_key " << request->start() - << ", end_key " << request->end() - << ", scan to " << server_addr; - request->set_timestamp(get_micros()); - std::function done = - std::bind(&TableImpl::ScanCallBackWrapper, std::weak_ptr(shared_from_this()), - scan_task, _1, _2, _3, _4); - tabletnode_client.ScanTablet(request, response, done); -} - -void TableImpl::ScanCallBackWrapper(std::weak_ptr weak_ptr_table, - ScanTask* scan_task, - ScanTabletRequest* request, - ScanTabletResponse* response, + if (called_by_user) { + scan_task->SetId(next_task_id_.Inc()); + task_pool_.PutTask(scan_task); + } + + const std::string& row_key = scan_task->stream->GetScanDesc()->GetStartRowKey(); + std::string server_addr; + if (GetTabletAddrOrScheduleUpdateMeta(row_key, scan_task, &server_addr)) { + CommitScan(scan_task, server_addr); + } +} + +void TableImpl::CommitScan(ScanTask* scan_task, const std::string& server_addr) { + tabletnode::TabletNodeClient tabletnode_client(thread_pool_, server_addr); + ResultStreamImpl* stream = scan_task->stream; + ScanTabletRequest* request = scan_task->request; + ScanTabletResponse* response = scan_task->response; + response->Clear(); + + ScanDescImpl* impl = stream->GetScanDesc(); + request->set_sequence_id(last_sequence_id_++); + request->set_table_name(name_); + request->set_start(impl->GetStartRowKey()); + request->set_end(impl->GetEndRowKey()); + request->set_snapshot_id(impl->GetSnapshot()); + request->set_timeout(impl->GetPackInterval()); + if (impl->GetStartColumnFamily() != "") { + request->set_start_family(impl->GetStartColumnFamily()); + } + if (impl->GetStartQualifier() != "") { + request->set_start_qualifier(impl->GetStartQualifier()); + } + if (impl->GetStartTimeStamp() != 0) { + request->set_start_timestamp(impl->GetStartTimeStamp()); + } + if (impl->GetMaxVersion() != 0) { + request->set_max_version(impl->GetMaxVersion()); + } + request->set_max_qualifiers(impl->GetMaxQualifiers()); + if (impl->GetBufferSize() != 0) { + request->set_buffer_limit(impl->GetBufferSize()); + } + if (impl->GetNumberLimit() != 0) { + request->set_number_limit(impl->GetNumberLimit()); + } + if (impl->GetTimerRange() != NULL) { + TimeRange* time_range = request->mutable_timerange(); + time_range->CopyFrom(*(impl->GetTimerRange())); + } + if (impl->GetFilterDesc()) { + request->mutable_filter()->CopyFrom(*(impl->GetFilterDesc())); + } + for (int32_t i = 0; i < impl->GetSizeofColumnFamilyList(); ++i) { + tera::ColumnFamily* column_family = request->add_cf_list(); + column_family->CopyFrom(*(impl->GetColumnFamily(i))); + } + + VLOG(20) << "table " << request->table_name() << ", start_key " << request->start() + << ", end_key " << request->end() << ", scan to " << server_addr + << "timeout:" << request->timeout(); + request->set_timestamp(get_micros()); + + access_builder_->BuildRequest(request); + + std::function done = + std::bind(&TableImpl::ScanCallBackWrapper, std::weak_ptr(shared_from_this()), + scan_task, _1, _2, _3, _4); + tabletnode_client.ScanTablet(request, response, done); +} + +void TableImpl::ScanCallBackWrapper(std::weak_ptr weak_ptr_table, ScanTask* scan_task, + ScanTabletRequest* request, ScanTabletResponse* response, bool failed, int error_code) { - auto table = weak_ptr_table.lock(); - if (!table) { - return; - } - table->ScanCallBack(scan_task, request, response, failed, error_code); -} - -void TableImpl::ScanCallBack(ScanTask* scan_task, - ScanTabletRequest* request, - ScanTabletResponse* response, - bool failed, int error_code) { - perf_counter_.rpc_s.Add(get_micros() - request->timestamp()); - perf_counter_.rpc_s_cnt.Inc(); - ResultStreamImpl* stream = scan_task->stream; - - if (failed) { - if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { - response->set_status(kServerError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || - error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { - response->set_status(kClientError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || - error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { - response->set_status(kConnectError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { - response->set_status(kRPCTimeout); - } else { - response->set_status(kRPCError); - } + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->ScanCallBack(scan_task, request, response, failed, error_code); +} + +void TableImpl::ScanCallBack(ScanTask* scan_task, ScanTabletRequest* request, + ScanTabletResponse* response, bool failed, int error_code) { + perf_counter_.rpc_s.Add(get_micros() - request->timestamp()); + perf_counter_.rpc_s_cnt.Inc(); + ResultStreamImpl* stream = scan_task->stream; + + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); + } else { + response->set_status(kRPCError); } + } - StatusCode err = response->status(); - if (err != kTabletNodeOk && err != kSnapshotNotExist) { - VLOG(10) << "fail to scan table: " << name_ - << " errcode: " << StatusCodeToString(err); - } + StatusCode err = response->status(); + if (err != kTabletNodeOk && err != kSnapshotNotExist) { + VLOG(10) << "fail to scan table: " << name_ << " errcode: " << StatusCodeToString(err); + } - scan_task->SetInternalError(err); - if (err == kTabletNodeOk || - err == kSnapshotNotExist || - stream->GetScanDesc()->IsAsync() || // batch scan retry internal - scan_task->RetryTimes() >= static_cast(FLAGS_tera_sdk_retry_times)) { - if (err == kKeyNotInRange || err == kConnectError) { - ScheduleUpdateMeta(stream->GetScanDesc()->GetStartRowKey(), - scan_task->GetMetaTimeStamp()); - } - stream->OnFinish(request, response); - stream->ReleaseRpcHandle(request, response); - task_pool_.PopTask(scan_task->GetId()); - CHECK_EQ(scan_task->GetRef(), 2); - delete scan_task; - } else { - scan_task->IncRetryTimes(); - ThreadPool::Task retry_task = - std::bind((void (TableImpl::*)(ScanTask*, bool))&TableImpl::ScanTabletAsync, - this, scan_task, false); - CHECK(scan_task->RetryTimes() > 0); - int64_t retry_interval = - static_cast(pow(FLAGS_tera_sdk_delay_send_internal, - scan_task->RetryTimes() - 1) * 1000); - thread_pool_->DelayTask(retry_interval, retry_task); - } + scan_task->SetInternalError(err); + if (err == kKeyNotInRange || err == kConnectError) { + ScheduleUpdateMeta(stream->GetScanDesc()->GetStartRowKey(), scan_task->GetMetaTimeStamp()); + } + if (err == kNotPermission) { + // Couldn't stop session_retry + // TODO: scan add Cancel() method for this scaning tablet + VLOG(10) << "fail to scan table: " << name_ << " errcode: " << StatusCodeToString(err); + } + stream->OnFinish(request, response); + stream->ReleaseRpcHandle(request, response); + task_pool_.PopTask(scan_task->GetId()); + CHECK_EQ(scan_task->GetRef(), 2); + delete scan_task; } -void TableImpl::SetReadTimeout(int64_t timeout_ms) { -} +void TableImpl::SetReadTimeout(int64_t timeout_ms) {} bool TableImpl::LockRow(const std::string& rowkey, RowLock* lock, ErrorCode* err) { - err->SetFailed(ErrorCode::kNotImpl); - return false; + err->SetFailed(ErrorCode::kNotImpl); + return false; } -bool TableImpl::GetStartEndKeys(std::string* start_key, std::string* end_key, - ErrorCode* err) { - err->SetFailed(ErrorCode::kNotImpl); - return false; +bool TableImpl::GetStartEndKeys(std::string* start_key, std::string* end_key, ErrorCode* err) { + err->SetFailed(ErrorCode::kNotImpl); + return false; } -bool TableImpl::OpenInternal(ErrorCode* err) { - if (!UpdateTableMeta(err)) { - LOG(ERROR) << "fail to update table meta."; - return false; - } - if (FLAGS_tera_sdk_cookie_enabled) { - if (!RestoreCookie()) { - LOG(ERROR) << "fail to restore cookie."; - return false; - } - EnableCookieUpdateTimer(); - } - if (FLAGS_tera_sdk_perf_counter_enabled) { - DumpPerfCounterLogDelay(); - } - LOG(INFO) << "open table " << name_ << " at cluster " << cluster_->ClusterId(); - return true; -} +bool TableImpl::OpenInternal(std::function hash_method, + ErrorCode* err) { + if (!UpdateTableMeta(err)) { + LOG(ERROR) << "fail to update table meta."; + return false; + } -void TableImpl::DistributeTasks(const std::vector& task_list, - bool called_by_user, - SdkTask::TYPE task_type) { - typedef std::map > TsTaskMap; - TsTaskMap ts_task_list; - int64_t sync_min_timeout = -1; - std::vector sync_task_list; - - int64_t max_pending_counter; - Counter* task_cnt = NULL; - Counter* pending_counter = NULL; - SdkTask::TimeoutFunc timeout_task; - std::string err_reason; - if (task_type == SdkTask::MUTATION) { - task_cnt = &(perf_counter_.mutate_cnt); - pending_counter = &(cur_commit_pending_counter_); - max_pending_counter = max_commit_pending_num_; - err_reason = "pending too much mutations, try it later."; - timeout_task = std::bind(&TableImpl::MutationTimeout, this, _1); - } else if (task_type == SdkTask::READ) { - task_cnt = &(perf_counter_.reader_cnt); - pending_counter = &(cur_reader_pending_counter_); - max_pending_counter = max_reader_pending_num_; - err_reason = "pending too much readers, try it later."; - timeout_task = std::bind(&TableImpl::ReaderTimeout, this, _1); - } else { - assert(0); - } + if (IsHashTable() && hash_method) { + hash_method_ = hash_method; + } - for (uint32_t i = 0; called_by_user && i < task_list.size(); i++) { - SdkTask* task = (SdkTask*)task_list[i]; - if (!task->IsAsync()) { - sync_task_list.push_back(task); - int64_t task_timeout = task->TimeOut() > 0 ? task->TimeOut() : timeout_; - if (task_timeout > 0 && (sync_min_timeout <= 0 || sync_min_timeout > task_timeout)) { - sync_min_timeout = task_timeout; - } - } + if (FLAGS_tera_sdk_cookie_enabled) { + if (!RestoreCookie()) { + LOG(ERROR) << "fail to restore cookie."; + return false; } + EnableCookieUpdateTimer(); + } + if (FLAGS_tera_sdk_perf_counter_enabled) { + DumpPerfCounterLogDelay(); + } + LOG(INFO) << "open table " << name_ << " at cluster " << cluster_->ClusterId(); - for (uint32_t i = 0; i < task_list.size(); i++) { - SdkTask* task = (SdkTask*)task_list[i]; - task_cnt->Inc(); - if (called_by_user) { - task->SetId(next_task_id_.Inc()); - - int64_t task_timeout = -1; - if (!task->IsAsync()) { - task_timeout = sync_min_timeout; - } else { - task_timeout = task->TimeOut() > 0 ? task->TimeOut() : timeout_; - } - perf_counter_.total_task_cnt.Inc(); - task_pool_.PutTask(task, task_timeout, timeout_task); - } + return true; +} - // flow control - if (called_by_user - && pending_counter->Inc() > max_pending_counter - && task->IsAsync()) { - if (FLAGS_tera_sdk_async_blocking_enabled) { - while (pending_counter->Get() > max_pending_counter) { - usleep(100000); - } - } else { - pending_counter->Dec(); - task->SetError(ErrorCode::kBusy, err_reason); - ThreadPool::Task break_task = - std::bind(&TableImpl::BreakRequest, this, task->GetId()); - task->DecRef(); - thread_pool_->AddTask(break_task); - continue; - } +void TableImpl::DistributeTasks(const std::vector& task_list, bool called_by_user, + SdkTask::TYPE task_type) { + typedef std::map > TsTaskMap; + TsTaskMap ts_task_list; + int64_t read_sync_min_timeout = -1; + int64_t write_sync_min_timeout = -1; + std::vector sync_task_list; + + int64_t max_pending_counter; + Counter* pending_counter = NULL; + SdkTask::TimeoutFunc timeout_task = std::bind(&TableImpl::TaskTimeout, this, _1); + if (task_type == SdkTask::MUTATION) { + pending_counter = &(cur_commit_pending_counter_); + max_pending_counter = max_commit_pending_num_; + } else if (task_type == SdkTask::READ) { + pending_counter = &(cur_reader_pending_counter_); + max_pending_counter = max_reader_pending_num_; + } else if (task_type == SdkTask::BATCH_MUTATION) { + pending_counter = &(cur_commit_pending_counter_); + max_pending_counter = max_commit_pending_num_; + } else { + assert(0); + } + + for (uint32_t i = 0; called_by_user && i < task_list.size(); i++) { + SdkTask* task = (SdkTask*)task_list[i]; + if (!task->IsAsync()) { + sync_task_list.push_back(task); + int64_t task_timeout; + if (task->Type() == SdkTask::READ) { + task_timeout = task->TimeOut() > 0 ? task->TimeOut() : read_timeout_; + if (task_timeout > 0 && + (read_sync_min_timeout <= 0 || read_sync_min_timeout > task_timeout)) { + read_sync_min_timeout = task_timeout; } - - std::string server_addr; - if (!GetTabletAddrOrScheduleUpdateMeta(task->RowKey(), - task, &server_addr)) { - perf_counter_.meta_sched_cnt.Inc(); - continue; + } else { + task_timeout = task->TimeOut() > 0 ? task->TimeOut() : write_timeout_; + if (task_timeout > 0 && + (write_sync_min_timeout <= 0 || write_sync_min_timeout > task_timeout)) { + write_sync_min_timeout = task_timeout; } - ts_task_list[server_addr].push_back(task); - } - - TsTaskMap::iterator it = ts_task_list.begin(); - for (; it != ts_task_list.end(); ++it) { - PackSdkTasks(it->first, it->second, task_type); + } } + } - // 从现在开始,所有异步的row_mutation都不可以再操作了,因为随时会被用户释放 - // 不是用户调用的,立即返回 - if (!called_by_user) { - return; - } - - // 等待同步操作返回或超时 - for (uint32_t i = 0; i < sync_task_list.size(); i++) { + for (uint32_t i = 0; i < task_list.size(); i++) { + SdkTask* task = (SdkTask*)task_list[i]; + perf_counter_.GetTaskCnt(task).Inc(); + if (called_by_user) { + task->SetId(next_task_id_.Inc()); + + int64_t task_timeout = -1; + if (!task->IsAsync()) { + task_timeout = + task->Type() == SdkTask::READ ? read_sync_min_timeout : write_sync_min_timeout; + } else { + task_timeout = task->TimeOut() > 0 + ? task->TimeOut() + : (task->Type() == SdkTask::READ ? read_timeout_ : write_timeout_); + } + perf_counter_.total_task_cnt.Inc(); + task_pool_.PutTask(task, task_timeout, timeout_task); + } + + // flow control + if (called_by_user && pending_counter->Inc() > max_pending_counter && task->IsAsync()) { + if (FLAGS_tera_sdk_async_blocking_enabled) { while (pending_counter->Get() > max_pending_counter) { - usleep(100000); + usleep(100000); } - SdkTask* task = (SdkTask*)sync_task_list[i]; - task->Wait(); + } else { + pending_counter->Dec(); + const std::string& err_reason = + "pending too much " + SdkTask::GetTypeName(task_type) + ", try it later."; + task->SetError(ErrorCode::kBusy, err_reason); + ThreadPool::Task break_task = std::bind(&TableImpl::BreakRequest, this, task->GetId()); + task->DecRef(); + thread_pool_->AddTask(break_task); + continue; + } + } + + std::string server_addr; + if (!GetTabletAddrOrScheduleUpdateMeta(task->InternalRowKey(), task, &server_addr)) { + perf_counter_.meta_sched_cnt.Inc(); + continue; + } + task->SetServerAddr(server_addr); + ts_task_list[server_addr].push_back(task); + } + + TsTaskMap::iterator it = ts_task_list.begin(); + for (; it != ts_task_list.end(); ++it) { + PackSdkTasks(it->first, it->second, task_type); + } + + // 从现在开始,所有异步的row_mutation都不可以再操作了,因为随时会被用户释放 + // 不是用户调用的,立即返回 + if (!called_by_user) { + return; + } + + // 等待同步操作返回或超时 + for (uint32_t i = 0; i < sync_task_list.size(); i++) { + while (pending_counter->Get() > max_pending_counter) { + usleep(100000); + } + SdkTask* task = (SdkTask*)sync_task_list[i]; + task->Wait(); + } +} + +void TableImpl::DistributeDelayTasks( + const std::map*>& retry_times_list, SdkTask::TYPE task_type) { + for (auto it = retry_times_list.begin(); it != retry_times_list.end(); ++it) { + int64_t retry_interval = + static_cast(pow(FLAGS_tera_sdk_delay_send_internal, it->first) * 1000); + ThreadPool::Task retry_task = + std::bind(&TableImpl::DistributeTasksById, this, it->second, task_type); + thread_pool_->DelayTask(retry_interval, retry_task); + } +} + +void TableImpl::CollectFailedTasks(int64_t task_id, SdkTask::TYPE type, StatusCode err, + std::vector* not_in_range_list, + std::map*>* retry_times_list) { + perf_counter_.GetTaskFailCnt(type).Inc(); + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << SdkTask::GetTypeName(type) << task_id << " fail but timeout"; + return; + } + + VLOG(10) << "fail to " << SdkTask::GetTypeName(type) << " table: " << name_ + << " errcode: " << StatusCodeToString(err); + + CHECK_EQ(task->Type(), type); + task->SetInternalError(err); + + task->IncRetryTimes(); + if (err == kKeyNotInRange) { + perf_counter_.GetRangeCnt(task).Inc(); + not_in_range_list->push_back(task); + } else { + std::vector* retry_task_id_list = NULL; + std::map*>::iterator it = + retry_times_list->find(task->RetryTimes()); + if (it != retry_times_list->end()) { + retry_task_id_list = it->second; + } else { + retry_task_id_list = new std::vector; + (*retry_times_list)[task->RetryTimes()] = retry_task_id_list; + } + retry_task_id_list->push_back(task->GetId()); + task->DecRef(); + } +} + +void TableImpl::CommitBatchMutations(const std::string& server_addr, + std::vector& mu_list) { + tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, server_addr); + WriteTabletRequest* request = new WriteTabletRequest; + WriteTabletResponse* response = new WriteTabletResponse; + request->set_sequence_id(last_sequence_id_++); + request->set_tablet_name(name_); + request->set_is_sync(FLAGS_tera_sdk_write_sync); + + access_builder_->BuildRequest(request); + + bool is_instant = false; + std::vector* mu_id_list = new std::vector; + for (uint32_t i = 0; i < mu_list.size(); ++i) { + BatchMutationImpl* batch_mutation = mu_list[i]; + if (!request->has_client_timeout_ms() || + (request->has_client_timeout_ms() && + request->client_timeout_ms() > batch_mutation->TimeOut())) { + request->set_client_timeout_ms(batch_mutation->TimeOut()); + } + for (const auto& row_key : batch_mutation->GetRows()) { + RowMutationSequence* mu_seq = request->add_row_list(); + if (IsHashTable()) { + mu_seq->set_row_key(GetHashMethod()(row_key)); + } else { + mu_seq->set_row_key(row_key); + } + for (uint32_t j = 0; j < batch_mutation->MutationNum(row_key); j++) { + const RowMutation::Mutation& mu = batch_mutation->GetMutation(row_key, j); + tera::Mutation* mutation = mu_seq->add_mutation_sequence(); + SerializeMutation(mu, mutation); + } + } + mu_id_list->push_back(batch_mutation->GetId()); + is_instant |= !batch_mutation->IsAsync(); + batch_mutation->AddCommitTimes(); + batch_mutation->DecRef(); + } + request->set_is_instant(is_instant); + + VLOG(20) << "commit " << mu_list.size() << " batch mutations to " << server_addr; + request->set_timestamp(get_micros()); + std::function done = + std::bind(&TableImpl::BatchMutateCallBackWrapper, + std::weak_ptr(shared_from_this()), mu_id_list, _1, _2, _3, _4); + tabletnode_client_async.WriteTablet(request, response, done); +} + +void TableImpl::BatchMutateCallBackWrapper(std::weak_ptr weak_ptr_table, + std::vector* mu_id_list, + WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, + int error_code) { + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->BatchMutateCallBack(mu_id_list, request, response, failed, error_code); +} + +void TableImpl::BatchMutateCallBack(std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code) { + perf_counter_.rpc_w.Add(get_micros() - request->timestamp()); + perf_counter_.rpc_w_cnt.Inc(); + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); + } else { + response->set_status(kRPCError); + } + } + + bool rpc_timeout_timer_reset = (kRPCTimeout != response->status()); + std::map*> retry_times_list; + std::vector not_in_range_list; + for (uint32_t i = 0; i < mu_id_list->size(); ++i) { + const std::string& row = request->row_list(i).row_key(); + int64_t mu_id = (*mu_id_list)[i]; + if (rpc_timeout_timer_reset) { + SdkTask* task = task_pool_.GetTask(mu_id); + if (task == NULL) { + VLOG(10) << "mutation " << mu_id << " finish but timeout"; + } else if (!task->GetServerAddr().empty()) { + MutexLock lock(&rpc_timeout_duration_mutex_); + rpc_timeout_duration_[task->GetServerAddr()] = get_millis(); + rpc_timeout_timer_reset = false; + } else { + VLOG(20) << "task ServerAddr is not assigned"; + } + if (task != NULL) { + task->DecRef(); + } } + StatusCode err = response->status(); + if (err == kTabletNodeOk) { + err = response->row_status_list(i); + } + + if (err == kTabletNodeOk || err == kTxnFail || err == kTableInvalidArg) { + perf_counter_.mutate_ok_cnt.Inc(); + SdkTask* task = task_pool_.PopTask(mu_id); + if (task == NULL) { + VLOG(10) << "mutation " << mu_id << " finish but timeout: " << DebugString(row); + continue; + } + CHECK_EQ(task->Type(), SdkTask::BATCH_MUTATION); + CHECK_EQ(task->GetRef(), 1); + BatchMutationImpl* batch_mutation = (BatchMutationImpl*)task; + if (err == kTabletNodeOk) { + batch_mutation->SetError(ErrorCode::kOK); + } else if (err == kTxnFail) { + batch_mutation->SetError(ErrorCode::kTxnFail, "transaction commit fail"); + } else { + batch_mutation->SetError(ErrorCode::kBadParam, "illegal arg error"); + } + + // only for flow control + cur_commit_pending_counter_.Dec(); + int64_t perf_time = get_micros(); + batch_mutation->RunCallback(); + perf_counter_.user_callback.Add(get_micros() - perf_time); + perf_counter_.user_callback_cnt.Inc(); + continue; + } + CollectFailedTasks(mu_id, SdkTask::BATCH_MUTATION, err, ¬_in_range_list, &retry_times_list); + } + + if (not_in_range_list.size() > 0) { + DistributeTasks(not_in_range_list, false, SdkTask::BATCH_MUTATION); + } + DistributeDelayTasks(retry_times_list, SdkTask::BATCH_MUTATION); + + delete request; + delete response; + delete mu_id_list; } void TableImpl::CommitMutations(const std::string& server_addr, std::vector& mu_list) { - tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, server_addr); - WriteTabletRequest* request = new WriteTabletRequest; - WriteTabletResponse* response = new WriteTabletResponse; - request->set_sequence_id(last_sequence_id_++); - request->set_tablet_name(name_); - request->set_is_sync(FLAGS_tera_sdk_write_sync); - - bool is_instant = false; - std::vector* mu_id_list = new std::vector; - for (uint32_t i = 0; i < mu_list.size(); ++i) { - RowMutationImpl* row_mutation = mu_list[i]; - RowMutationSequence* mu_seq = request->add_row_list(); - mu_seq->set_row_key(row_mutation->RowKey()); - for (uint32_t j = 0; j < row_mutation->MutationNum(); j++) { - const RowMutation::Mutation& mu = row_mutation->GetMutation(j); - tera::Mutation* mutation = mu_seq->add_mutation_sequence(); - SerializeMutation(mu, mutation); - } - SingleRowTxn* txn = (SingleRowTxn*)(row_mutation->GetTransaction()); - if (txn != NULL) { - txn->Serialize(mu_seq); - } - mu_id_list->push_back(row_mutation->GetId()); - is_instant |= !row_mutation->IsAsync(); - row_mutation->AddCommitTimes(); - row_mutation->DecRef(); - } - request->set_is_instant(is_instant); - - VLOG(20) << "commit " << mu_list.size() << " mutations to " << server_addr; - request->set_timestamp(get_micros()); - std::function done = - std::bind(&TableImpl::MutateCallBackWrapper, std::weak_ptr(shared_from_this()), - mu_id_list, _1, _2, _3, _4); - tabletnode_client_async.WriteTablet(request, response, done); + tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, server_addr); + WriteTabletRequest* request = new WriteTabletRequest; + WriteTabletResponse* response = new WriteTabletResponse; + request->set_sequence_id(last_sequence_id_++); + request->set_tablet_name(name_); + request->set_is_sync(FLAGS_tera_sdk_write_sync); + + access_builder_->BuildRequest(request); + + bool is_instant = false; + std::vector* mu_id_list = new std::vector; + for (uint32_t i = 0; i < mu_list.size(); ++i) { + RowMutationImpl* row_mutation = mu_list[i]; + RowMutationSequence* mu_seq = request->add_row_list(); + if (!request->has_client_timeout_ms() || + (request->has_client_timeout_ms() && + request->client_timeout_ms() > row_mutation->TimeOut())) { + request->set_client_timeout_ms(row_mutation->TimeOut()); + } + mu_seq->set_row_key(row_mutation->InternalRowKey()); + for (uint32_t j = 0; j < row_mutation->MutationNum(); j++) { + const RowMutation::Mutation& mu = row_mutation->GetMutation(j); + tera::Mutation* mutation = mu_seq->add_mutation_sequence(); + SerializeMutation(mu, mutation); + } + SingleRowTxn* txn = (SingleRowTxn*)(row_mutation->GetTransaction()); + if (txn != NULL) { + txn->Serialize(mu_seq); + } + mu_id_list->push_back(row_mutation->GetId()); + is_instant |= !row_mutation->IsAsync(); + row_mutation->AddCommitTimes(); + row_mutation->DecRef(); + } + request->set_is_instant(is_instant); + + VLOG(20) << "commit " << mu_list.size() << " mutations to " << server_addr + << "timeout:" << request->client_timeout_ms(); + request->set_timestamp(get_micros()); + std::function done = + std::bind(&TableImpl::MutateCallBackWrapper, std::weak_ptr(shared_from_this()), + mu_id_list, _1, _2, _3, _4); + tabletnode_client_async.WriteTablet(request, response, done); } void TableImpl::MutateCallBackWrapper(std::weak_ptr weak_ptr_table, - std::vector* mu_id_list, - WriteTabletRequest* request, - WriteTabletResponse* response, - bool failed, int error_code) { - auto table = weak_ptr_table.lock(); - if (!table) { - return; - } - table->MutateCallBack(mu_id_list, request, response, failed, error_code); -} - -void TableImpl::MutateCallBack(std::vector* mu_id_list, - WriteTabletRequest* request, - WriteTabletResponse* response, - bool failed, int error_code) { - perf_counter_.rpc_w.Add(get_micros() - request->timestamp()); - perf_counter_.rpc_w_cnt.Inc(); - if (failed) { - if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { - response->set_status(kServerError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || - error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { - response->set_status(kClientError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || - error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { - response->set_status(kConnectError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { - response->set_status(kRPCTimeout); - } else { - response->set_status(kRPCError); - } - } - - std::map* > retry_times_list; - std::vector not_in_range_list; - for (uint32_t i = 0; i < mu_id_list->size(); ++i) { - const std::string& row = request->row_list(i).row_key(); - int64_t mu_id = (*mu_id_list)[i]; - StatusCode err = response->status(); - if (err == kTabletNodeOk) { - err = response->row_status_list(i); - } - - if (err == kTabletNodeOk || err == kTxnFail || err == kTableInvalidArg) { - perf_counter_.mutate_ok_cnt.Inc(); - SdkTask* task = task_pool_.PopTask(mu_id); - if (task == NULL) { - VLOG(10) << "mutation " << mu_id << " finish but timeout: " << DebugString(row); - continue; - } - CHECK_EQ(task->Type(), SdkTask::MUTATION); - CHECK_EQ(task->GetRef(), 1); - RowMutationImpl* row_mutation = (RowMutationImpl*)task; - if (err == kTabletNodeOk) { - row_mutation->SetError(ErrorCode::kOK); - } else if (err == kTxnFail) { - row_mutation->SetError(ErrorCode::kTxnFail, "transaction commit fail"); - } else { - row_mutation->SetError(ErrorCode::kBadParam, "illegal arg error"); - } - - // only for flow control - cur_commit_pending_counter_.Dec(); - int64_t perf_time = get_micros(); - row_mutation->RunCallback(); - perf_counter_.user_callback.Add(get_micros() - perf_time); - perf_counter_.user_callback_cnt.Inc(); - continue; - } - perf_counter_.mutate_fail_cnt.Inc(); - - VLOG(10) << "fail to mutate table: " << name_ - << " row: " << DebugString(row) - << " errcode: " << StatusCodeToString(err); - - SdkTask* task = task_pool_.GetTask(mu_id); - if (task == NULL) { - VLOG(10) << "mutation " << mu_id << " timeout: " << DebugString(row); - continue; - } - CHECK_EQ(task->Type(), SdkTask::MUTATION); - RowMutationImpl* row_mutation = (RowMutationImpl*)task; - row_mutation->SetInternalError(err); - - if (err == kKeyNotInRange) { - perf_counter_.mutate_range_cnt.Inc(); - row_mutation->IncRetryTimes(); - not_in_range_list.push_back(task); - } else { - row_mutation->IncRetryTimes(); - std::vector* retry_mu_id_list = NULL; - std::map* >::iterator it = - retry_times_list.find(row_mutation->RetryTimes()); - if (it != retry_times_list.end()) { - retry_mu_id_list = it->second; - } else { - retry_mu_id_list = new std::vector; - retry_times_list[row_mutation->RetryTimes()] = retry_mu_id_list; - } - retry_mu_id_list->push_back(mu_id); - row_mutation->DecRef(); - } - } - - if (not_in_range_list.size() > 0) { - DistributeTasks(not_in_range_list, false, SdkTask::MUTATION); - } - std::map* >::iterator it; - for (it = retry_times_list.begin(); it != retry_times_list.end(); ++it) { - int64_t retry_interval = - static_cast(pow(FLAGS_tera_sdk_delay_send_internal, it->first) * 1000); - ThreadPool::Task retry_task = - std::bind(&TableImpl::DistributeMutationsById, this, it->second); - thread_pool_->DelayTask(retry_interval, retry_task); - } - - delete request; - delete response; - delete mu_id_list; -} - -void TableImpl::DistributeMutationsById(std::vector* mu_id_list) { - std::vector task_list; - for (uint32_t i = 0; i < mu_id_list->size(); ++i) { - int64_t mu_id = (*mu_id_list)[i]; - SdkTask* task = task_pool_.GetTask(mu_id); - if (task == NULL) { - VLOG(10) << "mutation " << mu_id << " timeout when retry mutate";; - continue; - } - CHECK_EQ(task->Type(), SdkTask::MUTATION); - task_list.push_back(task); - } - DistributeTasks(task_list, false, SdkTask::MUTATION); - delete mu_id_list; -} - -void TableImpl::MutationTimeout(SdkTask* task) { - perf_counter_.mutate_timeout_cnt.Inc(); - CHECK_NOTNULL(task); - CHECK_EQ(task->Type(), SdkTask::MUTATION); - - RowMutationImpl* row_mutation = (RowMutationImpl*)task; - row_mutation->ExcludeOtherRef(); - - StatusCode err = row_mutation->GetInternalError(); - if (err == kKeyNotInRange || err == kConnectError) { - ScheduleUpdateMeta(row_mutation->RowKey(), - row_mutation->GetMetaTimeStamp()); - } - - std::string err_reason; - if (row_mutation->RetryTimes() == 0) { - perf_counter_.mutate_queue_timeout_cnt.Inc(); - err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", - row_mutation->GetCommitTimes(), timeout_); + std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code) { + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->MutateCallBack(mu_id_list, request, response, failed, error_code); +} + +void TableImpl::MutateCallBack(std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code) { + perf_counter_.rpc_w.Add(get_micros() - request->timestamp()); + perf_counter_.rpc_w_cnt.Inc(); + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); } else { - err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", - row_mutation->GetCommitTimes(), row_mutation->RetryTimes(), - timeout_, StatusCodeToString(err).c_str()); + response->set_status(kRPCError); + } + } + + bool rpc_timeout_timer_reset = (kRPCTimeout != response->status()); + std::map*> retry_times_list; + std::vector not_in_range_list; + for (uint32_t i = 0; i < mu_id_list->size(); ++i) { + const std::string& row = request->row_list(i).row_key(); + int64_t mu_id = (*mu_id_list)[i]; + if (rpc_timeout_timer_reset) { + SdkTask* task = task_pool_.GetTask(mu_id); + if (task == NULL) { + VLOG(10) << "mutation " << mu_id << " finish but timeout"; + } else if (!task->GetServerAddr().empty()) { + MutexLock lock(&rpc_timeout_duration_mutex_); + rpc_timeout_duration_[task->GetServerAddr()] = get_millis(); + rpc_timeout_timer_reset = false; + } else { + VLOG(20) << "task ServerAddr is not assigned"; + } + if (task != NULL) { + task->DecRef(); + } } - row_mutation->SetError(ErrorCode::kTimeout, err_reason); - // only for flow control - cur_commit_pending_counter_.Dec(); - int64_t perf_time = get_micros(); - row_mutation->RunCallback(); - perf_counter_.user_callback.Add(get_micros() - perf_time); - perf_counter_.user_callback_cnt.Inc(); + StatusCode err = response->status(); + if (err == kTabletNodeOk) { + err = response->row_status_list(i); + } + if (err == kTabletNodeOk || err == kTxnFail || err == kTableInvalidArg || + err == kNotPermission) { + perf_counter_.mutate_ok_cnt.Inc(); + SdkTask* task = task_pool_.PopTask(mu_id); + if (task == NULL) { + VLOG(10) << "mutation " << mu_id << " finish but timeout: " << DebugString(row); + continue; + } + CHECK_EQ(task->Type(), SdkTask::MUTATION); + CHECK_EQ(task->GetRef(), 1); + RowMutationImpl* row_mutation = (RowMutationImpl*)task; + if (err == kTabletNodeOk) { + row_mutation->SetError(ErrorCode::kOK); + } else if (err == kTxnFail) { + row_mutation->SetError(ErrorCode::kTxnFail, "transaction commit fail"); + } else if (err == kNotPermission) { + row_mutation->SetError(ErrorCode::kNoAuth, "not permissions"); + } else { + row_mutation->SetError(ErrorCode::kBadParam, "illegal arg error"); + } + + // only for flow control + cur_commit_pending_counter_.Dec(); + int64_t perf_time = get_micros(); + row_mutation->RunCallback(); + perf_counter_.user_callback.Add(get_micros() - perf_time); + perf_counter_.user_callback_cnt.Inc(); + continue; + } + CollectFailedTasks(mu_id, SdkTask::MUTATION, err, ¬_in_range_list, &retry_times_list); + } + + if (not_in_range_list.size() > 0) { + DistributeTasks(not_in_range_list, false, SdkTask::MUTATION); + } + DistributeDelayTasks(retry_times_list, SdkTask::MUTATION); + + delete request; + delete response; + delete mu_id_list; +} + +void TableImpl::DistributeTasksById(std::vector* task_id_list, SdkTask::TYPE type) { + std::vector task_list; + for (uint32_t i = 0; i < task_id_list->size(); ++i) { + int64_t task_id = (*task_id_list)[i]; + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << SdkTask::GetTypeName(type) << " " << task_id << " timeout when retry"; + continue; + } + task_list.push_back(task); + } + DistributeTasks(task_list, false, type); + delete task_id_list; +} + +void TableImpl::TaskTimeout(SdkTask* task) { + perf_counter_.GetTimeoutCnt(task).Inc(); + CHECK_NOTNULL(task); + + task->ExcludeOtherRef(); + + StatusCode err = task->GetInternalError(); + if (err == kKeyNotInRange || err == kConnectError) { + ScheduleUpdateMeta(task->InternalRowKey(), task->GetMetaTimeStamp()); + } else if (err == kRPCTimeout && !task->GetServerAddr().empty()) { + MutexLock lock(&rpc_timeout_duration_mutex_); + if (rpc_timeout_duration_.find(task->GetServerAddr()) == rpc_timeout_duration_.end()) { + rpc_timeout_duration_[task->GetServerAddr()] = get_millis(); + } else if (get_millis() - rpc_timeout_duration_[task->GetServerAddr()] >= + FLAGS_tera_sdk_update_meta_rpc_timeout_max_ms) { + LOG(WARNING) << "requests on server<" << task->GetServerAddr() << "> continuous " + << "over " << FLAGS_tera_sdk_update_meta_rpc_timeout_max_ms / 1000 + << "s with the response of kRpcTimeout, last succ request time(" + << rpc_timeout_duration_[task->GetServerAddr()] / 1000 + << "), may be the tabletserver is zombie, try to update meta."; + rpc_timeout_duration_[task->GetServerAddr()] = get_millis(); + rpc_timeout_duration_mutex_.Unlock(); + ScheduleUpdateMeta(task->InternalRowKey(), task->GetMetaTimeStamp()); + rpc_timeout_duration_mutex_.Lock(); + } + } + + std::string err_reason; + if (task->RetryTimes() == 0) { + perf_counter_.GetQueueTimeoutCnt(task).Inc(); + err_reason = StringFormat("commit lld times, retry 0 times, in %u ms.", task->GetCommitTimes(), + task->Type() == SdkTask::READ ? read_timeout_ : write_timeout_); + } else { + err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", + task->GetCommitTimes(), task->RetryTimes(), + task->Type() == SdkTask::READ ? read_timeout_ : write_timeout_, + StatusCodeToString(err).c_str()); + } + switch (task->Type()) { + case SdkTask::READ: { + RowReaderImpl* row_reader = (RowReaderImpl*)task; + row_reader->SetError(ErrorCode::kTimeout, err_reason); + cur_reader_pending_counter_.Dec(); + } break; + case SdkTask::BATCH_MUTATION: { + BatchMutationImpl* batch_mutation = (BatchMutationImpl*)task; + batch_mutation->SetError(ErrorCode::kTimeout, err_reason); + cur_commit_pending_counter_.Dec(); + } break; + case SdkTask::MUTATION: { + RowMutationImpl* row_mutation = (RowMutationImpl*)task; + row_mutation->SetError(ErrorCode::kTimeout, err_reason); + cur_commit_pending_counter_.Dec(); + } break; + default: + abort(); + } + int64_t perf_time = get_micros(); + task->RunCallback(); + perf_counter_.user_callback.Add(get_micros() - perf_time); + perf_counter_.user_callback_cnt.Inc(); } void TableImpl::DistributeReaders(const std::vector& row_reader_list, bool called_by_user) { - std::vector task_list; - for (size_t i = 0; i < row_reader_list.size(); ++i) { - task_list.push_back((SdkTask*)(row_reader_list[i])); - } - DistributeTasks(task_list, called_by_user, SdkTask::READ); + std::vector task_list; + for (size_t i = 0; i < row_reader_list.size(); ++i) { + task_list.push_back((SdkTask*)(row_reader_list[i])); + } + DistributeTasks(task_list, called_by_user, SdkTask::READ); } -void TableImpl::CommitReaders(const std::string server_addr, +void TableImpl::CommitReaders(const std::string& server_addr, std::vector& reader_list) { - std::vector* reader_id_list = new std::vector; - tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, server_addr); - ReadTabletRequest* request = new ReadTabletRequest; - ReadTabletResponse* response = new ReadTabletResponse; - request->set_sequence_id(last_sequence_id_++); - request->set_tablet_name(name_); - request->set_client_timeout_ms(pending_timeout_ms_); - for (uint32_t i = 0; i < reader_list.size(); ++i) { - RowReaderImpl* row_reader = reader_list[i]; - RowReaderInfo* row_reader_info = request->add_row_info_list(); - request->set_snapshot_id(row_reader->GetSnapshot()); - row_reader->ToProtoBuf(row_reader_info); - // row_reader_info->CopyFrom(row_reader->GetRowReaderInfo()); - reader_id_list->push_back(row_reader->GetId()); - row_reader->AddCommitTimes(); - row_reader->DecRef(); - } - VLOG(20) << "commit " << reader_list.size() << " reads to " << server_addr; - request->set_timestamp(get_micros()); - std::function done = - std::bind(&TableImpl::ReaderCallBackWrapper, std::weak_ptr(shared_from_this()), - reader_id_list, _1, _2, _3, _4); - tabletnode_client_async.ReadTablet(request, response, done); + std::vector* reader_id_list = new std::vector; + tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, server_addr); + ReadTabletRequest* request = new ReadTabletRequest; + ReadTabletResponse* response = new ReadTabletResponse; + request->set_sequence_id(last_sequence_id_++); + request->set_tablet_name(name_); + + access_builder_->BuildRequest(request); + + for (uint32_t i = 0; i < reader_list.size(); ++i) { + RowReaderImpl* row_reader = reader_list[i]; + RowReaderInfo* row_reader_info = request->add_row_info_list(); + if (!request->has_client_timeout_ms() || + (request->has_client_timeout_ms() && + request->client_timeout_ms() > row_reader->TimeOut())) { + request->set_client_timeout_ms(row_reader->TimeOut()); + } + request->set_snapshot_id(row_reader->GetSnapshot()); + row_reader->ToProtoBuf(row_reader_info); + // row_reader_info->CopyFrom(row_reader->GetRowReaderInfo()); + reader_id_list->push_back(row_reader->GetId()); + row_reader->AddCommitTimes(); + row_reader->DecRef(); + } + VLOG(20) << "commit " << reader_list.size() << " reads to " << server_addr + << "timeout:" << request->client_timeout_ms(); + request->set_timestamp(get_micros()); + std::function done = + std::bind(&TableImpl::ReaderCallBackWrapper, std::weak_ptr(shared_from_this()), + reader_id_list, _1, _2, _3, _4); + tabletnode_client_async.ReadTablet(request, response, done); } void TableImpl::ReaderCallBackWrapper(std::weak_ptr weak_ptr_table, std::vector* reader_id_list, - ReadTabletRequest* request, - ReadTabletResponse* response, + ReadTabletRequest* request, ReadTabletResponse* response, bool failed, int error_code) { - auto table = weak_ptr_table.lock(); - if (!table) { - return; - } - table->ReaderCallBack(reader_id_list, request, response, failed, error_code); -} -void TableImpl::ReaderCallBack(std::vector* reader_id_list, - ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code) { - perf_counter_.rpc_r.Add(get_micros() - request->timestamp()); - perf_counter_.rpc_r_cnt.Inc(); - if (failed) { - if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { - response->set_status(kServerError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || - error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { - response->set_status(kClientError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || - error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { - response->set_status(kConnectError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { - response->set_status(kRPCTimeout); - } else { - response->set_status(kRPCError); - } + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->ReaderCallBack(reader_id_list, request, response, failed, error_code); +} +void TableImpl::ReaderCallBack(std::vector* reader_id_list, ReadTabletRequest* request, + ReadTabletResponse* response, bool failed, int error_code) { + perf_counter_.rpc_r.Add(get_micros() - request->timestamp()); + perf_counter_.rpc_r_cnt.Inc(); + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); + } else { + response->set_status(kRPCError); + } + } + bool rpc_timeout_timer_reset = (kRPCTimeout != response->status()); + std::map*> retry_times_list; + std::vector not_in_range_list; + uint32_t row_result_index = 0; + for (uint32_t i = 0; i < reader_id_list->size(); ++i) { + int64_t reader_id = (*reader_id_list)[i]; + if (rpc_timeout_timer_reset) { + SdkTask* task = task_pool_.GetTask(reader_id); + if (task == NULL) { + VLOG(10) << "reader " << reader_id << " success but timeout"; + } else if (!task->GetServerAddr().empty()) { + MutexLock lock(&rpc_timeout_duration_mutex_); + rpc_timeout_duration_[task->GetServerAddr()] = get_millis(); + rpc_timeout_timer_reset = false; + } else { + VLOG(20) << "task ServerAddr is not assigned"; + } + if (task != NULL) { + task->DecRef(); + } } - - std::map* > retry_times_list; - std::vector not_in_range_list; - uint32_t row_result_index = 0; - for (uint32_t i = 0; i < reader_id_list->size(); ++i) { - int64_t reader_id = (*reader_id_list)[i]; - - StatusCode err = response->status(); + StatusCode err = response->status(); + if (err == kTabletNodeOk) { + err = response->detail().status(i); + } + if (err == kTabletNodeOk || err == kKeyNotExist || err == kSnapshotNotExist || + err == kNotPermission) { + perf_counter_.reader_ok_cnt.Inc(); + SdkTask* task = task_pool_.PopTask(reader_id); + if (task == NULL) { + VLOG(10) << "reader " << reader_id << " success but timeout"; if (err == kTabletNodeOk) { - err = response->detail().status(i); - } - if (err == kTabletNodeOk || err == kKeyNotExist || err == kSnapshotNotExist) { - perf_counter_.reader_ok_cnt.Inc(); - SdkTask* task = task_pool_.PopTask(reader_id); - if (task == NULL) { - VLOG(10) << "reader " << reader_id << " success but timeout"; - if (err == kTabletNodeOk) { - // result is timeout, discard it - row_result_index++; - } - continue; - } - CHECK_EQ(task->Type(), SdkTask::READ); - CHECK_EQ(task->GetRef(), 1); - - RowReaderImpl* row_reader = (RowReaderImpl*)task; - if (err == kTabletNodeOk) { - row_reader->SetResult(response->detail().row_result(row_result_index++)); - row_reader->SetError(ErrorCode::kOK); - } else if (err == kKeyNotExist) { - row_reader->SetError(ErrorCode::kNotFound, "not found"); - } else { // err == kSnapshotNotExist - row_reader->SetError(ErrorCode::kNotFound, "snapshot not found"); - } - int64_t perf_time = get_micros(); - row_reader->RunCallback(); - perf_counter_.user_callback.Add(get_micros() - perf_time); - perf_counter_.user_callback_cnt.Inc(); - // only for flow control - cur_reader_pending_counter_.Dec(); - continue; - } - perf_counter_.reader_fail_cnt.Inc(); - - VLOG(10) << "fail to read table: " << name_ - << " errcode: " << StatusCodeToString(err); - - SdkTask* task = task_pool_.GetTask(reader_id); - if (task == NULL) { - VLOG(10) << "reader " << reader_id << " fail but timeout"; - continue; - } - CHECK_EQ(task->Type(), SdkTask::READ); - RowReaderImpl* row_reader = (RowReaderImpl*)task; - row_reader->SetInternalError(err); - - if (err == kKeyNotInRange) { - perf_counter_.reader_range_cnt.Inc(); - row_reader->IncRetryTimes(); - not_in_range_list.push_back(row_reader); - } else { - row_reader->IncRetryTimes(); - std::vector* retry_reader_id_list = NULL; - std::map* >::iterator it = - retry_times_list.find(row_reader->RetryTimes()); - if (it != retry_times_list.end()) { - retry_reader_id_list = it->second; - } else { - retry_reader_id_list = new std::vector; - retry_times_list[row_reader->RetryTimes()] = retry_reader_id_list; - } - retry_reader_id_list->push_back(row_reader->GetId()); - row_reader->DecRef(); - } - } - - if (not_in_range_list.size() > 0) { - DistributeReaders(not_in_range_list, false); - } - std::map* >::iterator it; - for (it = retry_times_list.begin(); it != retry_times_list.end(); ++it) { - int64_t retry_interval = - static_cast(pow(FLAGS_tera_sdk_delay_send_internal, it->first) * 1000); - ThreadPool::Task retry_task = - std::bind(&TableImpl::DistributeReadersById, this, it->second); - thread_pool_->DelayTask(retry_interval, retry_task); - } - - delete request; - delete response; - delete reader_id_list; -} - -void TableImpl::DistributeReadersById(std::vector* reader_id_list) { - std::vector reader_list; - for (size_t i = 0; i < reader_id_list->size(); ++i) { - int64_t reader_id = (*reader_id_list)[i]; - SdkTask* task = task_pool_.GetTask(reader_id); - if (task == NULL) { - VLOG(10) << "reader " << reader_id << " timeout when retry read"; - continue; + // result is timeout, discard it + row_result_index++; } - CHECK_EQ(task->Type(), SdkTask::READ); - reader_list.push_back((RowReaderImpl*)task); - } - DistributeReaders(reader_list, false); - delete reader_id_list; -} - -void TableImpl::ReaderTimeout(SdkTask* task) { - perf_counter_.reader_timeout_cnt.Inc(); - CHECK_NOTNULL(task); - CHECK_EQ(task->Type(), SdkTask::READ); - - RowReaderImpl* row_reader = (RowReaderImpl*)task; - row_reader->ExcludeOtherRef(); - - StatusCode err = row_reader->GetInternalError(); - if (err == kKeyNotInRange || err == kConnectError) { - ScheduleUpdateMeta(row_reader->RowName(), - row_reader->GetMetaTimeStamp()); - } - - std::string err_reason; - if (row_reader->RetryTimes() == 0) { - perf_counter_.reader_queue_timeout_cnt.Inc(); - err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", - row_reader->GetCommitTimes(), timeout_); - } else { - err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", - row_reader->GetCommitTimes(), row_reader->RetryTimes(), - timeout_, StatusCodeToString(err).c_str()); - } - row_reader->SetError(ErrorCode::kTimeout, err_reason); - int64_t perf_time = get_micros(); - row_reader->RunCallback(); - perf_counter_.user_callback.Add(get_micros() - perf_time); - perf_counter_.user_callback_cnt.Inc(); - // only for flow control - cur_reader_pending_counter_.Dec(); -} - -void TableImpl::PackSdkTasks(const std::string& server_addr, - std::vector& task_list, + continue; + } + CHECK_EQ(task->Type(), SdkTask::READ); + CHECK_EQ(task->GetRef(), 1); + + RowReaderImpl* row_reader = (RowReaderImpl*)task; + if (err == kTabletNodeOk) { + row_reader->SetResult(response->detail().row_result(row_result_index++)); + row_reader->SetError(ErrorCode::kOK); + } else if (err == kKeyNotExist) { + row_reader->SetError(ErrorCode::kNotFound, "not found"); + } else if (err == kNotPermission) { + row_reader->SetError(ErrorCode::kNoAuth, "not permissions"); + } else { // err == kSnapshotNotExist + row_reader->SetError(ErrorCode::kNotFound, "snapshot not found"); + } + int64_t perf_time = get_micros(); + row_reader->RunCallback(); + perf_counter_.user_callback.Add(get_micros() - perf_time); + perf_counter_.user_callback_cnt.Inc(); + // only for flow control + cur_reader_pending_counter_.Dec(); + continue; + } + CollectFailedTasks(reader_id, SdkTask::READ, err, ¬_in_range_list, &retry_times_list); + } + + if (not_in_range_list.size() > 0) { + DistributeTasks(not_in_range_list, false, SdkTask::READ); + } + DistributeDelayTasks(retry_times_list, SdkTask::READ); + + delete request; + delete response; + delete reader_id_list; +} + +void TableImpl::PackSdkTasks(const std::string& server_addr, std::vector& task_list, SdkTask::TYPE task_type) { - Mutex* mutex = NULL; - std::map* task_batch_map = NULL; - SdkTask::TimeoutFunc task; - uint64_t commit_timeout = 10000; - uint32_t commit_size = commit_size_; - if (task_type == SdkTask::MUTATION) { - mutex = &mutation_batch_mutex_; - task_batch_map = &mutation_batch_map_; - commit_timeout = write_commit_timeout_; - } else if (task_type == SdkTask::READ) { - mutex = &reader_batch_mutex_; - task_batch_map = &reader_batch_map_; - commit_timeout = read_commit_timeout_; - } else { - assert(0); - } - - TaskBatch* task_batch = NULL; - bool is_instant = false; - MutexLock lock(mutex); - for (size_t i = 0; i < task_list.size(); ++i) { - // find existing batch or create a new batch - if (task_batch == NULL) { - std::map::iterator it = task_batch_map->find(server_addr); - if (it != task_batch_map->end()) { - task_batch = it->second; - } else { - task_batch = new TaskBatch; - task_batch->type = task_type; - task_batch->mutex = mutex; - task_batch->task_batch_map = task_batch_map; - task_batch->byte_size = 0; - task_batch->server_addr = server_addr; - task_batch->row_id_list = new std::vector; - - task_batch->SetId(next_task_id_.Inc()); - (*task_batch_map)[server_addr] = task_batch; - SdkTask::TimeoutFunc task = std::bind(&TableImpl::TaskBatchTimeout, this, _1); - task_pool_.PutTask(task_batch, commit_timeout, task); - task_batch->DecRef(); - } - } - - // put task into the batch - SdkTask* sdk_task = task_list[i]; - task_batch->row_id_list->push_back(sdk_task->GetId()); - task_batch->byte_size += sdk_task->Size(); - is_instant |= !sdk_task->IsAsync(); - sdk_task->DecRef(); - - // commit the batch if: - // 1) batch_byte_size >= max_rpc_byte_size - // for the *LAST* batch, commit it if: - // 2) any mutation is sync (flush == true) - // 3) batch_row_num >= min_batch_row_num - // 4) commit timeout - if (task_batch->byte_size >= kMaxRpcSize || - ((i == task_list.size() - 1) && - (is_instant || - (task_batch->row_id_list->size() >= commit_size)))) { - std::vector* task_id_list = task_batch->row_id_list; - task_batch->row_id_list = NULL; - task_batch_map->erase(server_addr); - mutex->Unlock(); - - CommitTasksById(server_addr, *task_id_list, task_type); - delete task_id_list; - task_batch = NULL; - is_instant = false; - mutex->Lock(); - } - } + Mutex* mutex = NULL; + std::map* task_batch_map = NULL; + SdkTask::TimeoutFunc task; + uint64_t commit_timeout = 10000; + uint32_t commit_size = commit_size_; + if (task_type == SdkTask::MUTATION) { + mutex = &mutation_batch_mutex_; + task_batch_map = &mutation_batch_map_; + commit_timeout = write_commit_timeout_; + } else if (task_type == SdkTask::READ) { + mutex = &reader_batch_mutex_; + task_batch_map = &reader_batch_map_; + commit_timeout = read_commit_timeout_; + } else if (task_type == SdkTask::BATCH_MUTATION) { + mutex = &mutation_batch_mutex_; + task_batch_map = &mutation_batch_map_; + commit_timeout = write_commit_timeout_; + } else { + assert(0); + } + + TaskBatch* task_batch = NULL; + bool is_instant = false; + MutexLock lock(mutex); + for (size_t i = 0; i < task_list.size(); ++i) { + // find existing batch or create a new batch + if (task_batch == NULL) { + std::map::iterator it = task_batch_map->find(server_addr); + if (it != task_batch_map->end()) { + task_batch = it->second; + } else { + task_batch = new TaskBatch; + task_batch->type = task_type; + task_batch->mutex = mutex; + task_batch->task_batch_map = task_batch_map; + task_batch->byte_size = 0; + task_batch->server_addr = server_addr; + task_batch->row_id_list = new std::vector; + + task_batch->SetId(next_task_id_.Inc()); + (*task_batch_map)[server_addr] = task_batch; + SdkTask::TimeoutFunc task = std::bind(&TableImpl::TaskBatchTimeout, this, _1); + task_pool_.PutTask(task_batch, commit_timeout, task); + task_batch->DecRef(); + } + } + + // put task into the batch + SdkTask* sdk_task = task_list[i]; + task_batch->row_id_list->push_back(sdk_task->GetId()); + task_batch->byte_size += sdk_task->Size(); + is_instant |= !sdk_task->IsAsync(); + sdk_task->DecRef(); + + // commit the batch if: + // 1) batch_byte_size >= max_rpc_byte_size + // for the *LAST* batch, commit it if: + // 2) any mutation is sync (flush == true) + // 3) batch_row_num >= min_batch_row_num + // 4) commit timeout + if (task_batch->byte_size >= kMaxRpcSize || + ((i == task_list.size() - 1) && + (is_instant || (task_batch->row_id_list->size() >= commit_size)))) { + std::vector* task_id_list = task_batch->row_id_list; + task_batch->row_id_list = NULL; + task_batch_map->erase(server_addr); + mutex->Unlock(); + + CommitTasksById(server_addr, *task_id_list, task_type); + delete task_id_list; + task_batch = NULL; + is_instant = false; + mutex->Lock(); + } + } } void TableImpl::TaskBatchTimeout(SdkTask* task) { - std::vector* task_id_list = NULL; - CHECK_NOTNULL(task); - CHECK_EQ(task->Type(), SdkTask::TASKBATCH); - TaskBatch* task_batch = (TaskBatch*)task; - task_batch->ExcludeOtherRef(); - - const std::string& server_addr = task_batch->server_addr; - SdkTask::TYPE task_type = task_batch->type; - Mutex* mutex = task_batch->mutex; - std::map* task_batch_map = task_batch->task_batch_map; - { - MutexLock lock(mutex); - std::map::iterator it = - task_batch_map->find(server_addr); - if (it != task_batch_map->end() && - task_batch->GetId() == it->second->GetId()) { - task_id_list = task_batch->row_id_list; - task_batch->row_id_list = NULL; - task_batch_map->erase(it); - } + std::vector* task_id_list = NULL; + CHECK_NOTNULL(task); + CHECK_EQ(task->Type(), SdkTask::TASKBATCH); + TaskBatch* task_batch = (TaskBatch*)task; + task_batch->ExcludeOtherRef(); + + const std::string& server_addr = task_batch->server_addr; + SdkTask::TYPE task_type = task_batch->type; + Mutex* mutex = task_batch->mutex; + std::map* task_batch_map = task_batch->task_batch_map; + { + MutexLock lock(mutex); + std::map::iterator it = task_batch_map->find(server_addr); + if (it != task_batch_map->end() && task_batch->GetId() == it->second->GetId()) { + task_id_list = task_batch->row_id_list; + task_batch->row_id_list = NULL; + task_batch_map->erase(it); } + } - if (task_id_list != NULL) { - CommitTasksById(server_addr, *task_id_list, task_type); - delete task_id_list; - } - delete task_batch; + if (task_id_list != NULL) { + CommitTasksById(server_addr, *task_id_list, task_type); + delete task_id_list; + } + delete task_batch; } -void TableImpl::CommitTasksById(const std::string& server_addr, - std::vector& task_id_list, +void TableImpl::CommitTasksById(const std::string& server_addr, std::vector& task_id_list, SdkTask::TYPE task_type) { - std::vector mutation_list; - std::vector reader_list; - - for (size_t i = 0; i < task_id_list.size(); i++) { - int64_t task_id = task_id_list[i]; - SdkTask* task = task_pool_.GetTask(task_id); - if (task == NULL) { - VLOG(10) << "commit task, type " << task_type << ", id " << task_id << " timeout"; - continue; - } - perf_counter_.total_commit_cnt.Inc(); - CHECK_EQ(task->Type(), task_type); - if (task_type == SdkTask::MUTATION) { - mutation_list.push_back((RowMutationImpl*)task); - } else if (task_type == SdkTask::READ) { - reader_list.push_back((RowReaderImpl*)task); - } + std::vector mutation_list; + std::vector reader_list; + std::vector batch_mutation_list; + + for (size_t i = 0; i < task_id_list.size(); i++) { + int64_t task_id = task_id_list[i]; + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << "commit task, type " << task_type << ", id " << task_id << " timeout"; + continue; } + perf_counter_.total_commit_cnt.Inc(); + CHECK_EQ(task->Type(), task_type); if (task_type == SdkTask::MUTATION) { - CommitMutations(server_addr, mutation_list); + mutation_list.push_back((RowMutationImpl*)task); } else if (task_type == SdkTask::READ) { - CommitReaders(server_addr, reader_list); + reader_list.push_back((RowReaderImpl*)task); + } else if (task_type == SdkTask::BATCH_MUTATION) { + batch_mutation_list.push_back((BatchMutationImpl*)task); } + } + if (task_type == SdkTask::MUTATION) { + CommitMutations(server_addr, mutation_list); + } else if (task_type == SdkTask::READ) { + CommitReaders(server_addr, reader_list); + } else if (task_type == SdkTask::BATCH_MUTATION) { + CommitBatchMutations(server_addr, batch_mutation_list); + } } bool TableImpl::GetTabletMetaForKey(const std::string& key, TabletMeta* meta) { - MutexLock lock(&meta_mutex_); - TabletMetaNode* node = GetTabletMetaNodeForKey(key); - if (node == NULL) { - VLOG(10) << "no meta for key: " << key; - return false; + MutexLock lock(&meta_mutex_); + TabletMetaNode* node = GetTabletMetaNodeForKey(key); + if (node == NULL) { + VLOG(10) << "no meta for key: " << key; + return false; + } + meta->CopyFrom(node->meta); + return true; +} + +bool TableImpl::GetTablet(const std::string& row_key, std::string* tablet) { + TabletMeta meta; + int retry_times = 0; + int wait_time_ms = 10; + bool success = false; + while (retry_times < FLAGS_tera_sdk_get_tablet_retry_times) { + if (IsHashTable()) { + if (!GetTabletMetaForKey(hash_method_(row_key), &meta) || meta.path().empty()) { + ScheduleUpdateMeta(hash_method_(row_key), 0); + } else { + success = true; + } + } else { + if (!GetTabletMetaForKey(row_key, &meta) || meta.path().empty()) { + ScheduleUpdateMeta(row_key, 0); + } else { + success = true; + } + } + + if (!success) { + ++retry_times; + LOG(INFO) << "Get Tablet Failed, retry times: " << retry_times << " row_key: " << row_key + << " wait " << wait_time_ms << " ms for update meta"; + ThisThread::Sleep(wait_time_ms); + wait_time_ms = std::min(wait_time_ms << 1, 10000); + continue; + } else { + *tablet = meta.path(); + return true; } - meta->CopyFrom(node->meta); - return true; -} + } + return false; +}; void TableImpl::BreakScan(ScanTask* scan_task) { - ResultStreamImpl* stream = scan_task->stream; - stream->OnFinish(scan_task->request, - scan_task->response); - stream->ReleaseRpcHandle(scan_task->request, - scan_task->response); - delete scan_task; + ResultStreamImpl* stream = scan_task->stream; + stream->OnFinish(scan_task->request, scan_task->response); + stream->ReleaseRpcHandle(scan_task->request, scan_task->response); + delete scan_task; } -bool TableImpl::GetTabletAddrOrScheduleUpdateMeta(const std::string& row, - SdkTask* task, +bool TableImpl::GetTabletAddrOrScheduleUpdateMeta(const std::string& row, SdkTask* task, std::string* server_addr) { - CHECK_NOTNULL(task); - MutexLock lock(&meta_mutex_); - TabletMetaNode* node = GetTabletMetaNodeForKey(row); - if (node == NULL) { - VLOG(10) << "no meta for key: " << row; - pending_task_id_list_[row].push_back(task->GetId()); - task->DecRef(); - TabletMetaNode& new_node = tablet_meta_list_[row]; - new_node.meta.mutable_key_range()->set_key_start(row); - new_node.meta.mutable_key_range()->set_key_end(row + '\0'); - new_node.status = WAIT_UPDATE; - UpdateMetaAsync(); - return false; - } - if (node->status != NORMAL) { - VLOG(10) << "abnormal meta for key: " << row; - pending_task_id_list_[row].push_back(task->GetId()); - task->DecRef(); - return false; - } - if ((task->GetInternalError() == kKeyNotInRange || task->GetInternalError() == kConnectError) - && task->GetMetaTimeStamp() >= node->update_time) { - pending_task_id_list_[row].push_back(task->GetId()); - task->DecRef(); - int64_t update_interval = node->update_time - + FLAGS_tera_sdk_update_meta_internal - get_micros() / 1000; - if (update_interval <= 0) { - VLOG(10) << "update meta now for key: " << row; - node->status = WAIT_UPDATE; - UpdateMetaAsync(); - } else { - VLOG(10) << "update meta in " << update_interval << " (ms) for key:" << row; - node->status = DELAY_UPDATE; - ThreadPool::Task delay_task = - std::bind(&TableImpl::DelayUpdateMeta, this, - node->meta.key_range().key_start(), - node->meta.key_range().key_end()); - thread_pool_->DelayTask(update_interval, delay_task); - } - return false; - } - CHECK_EQ(node->status, NORMAL); - task->SetMetaTimeStamp(node->update_time); - *server_addr = node->meta.server_addr(); - return true; -} - -TableImpl::TabletMetaNode* TableImpl::GetTabletMetaNodeForKey(const std::string& key) { - meta_mutex_.AssertHeld(); - if (tablet_meta_list_.size() == 0) { - VLOG(10) << "the meta list is empty"; - return NULL; - } - std::map::iterator it = - tablet_meta_list_.upper_bound(key); - if (it == tablet_meta_list_.begin()) { - return NULL; + CHECK_NOTNULL(task); + MutexLock lock(&meta_mutex_); + TabletMetaNode* node = GetTabletMetaNodeForKey(row); + if (node == NULL) { + VLOG(10) << "no meta for key: " << row; + pending_task_id_list_[row].push_back(task->GetId()); + task->DecRef(); + TabletMetaNode& new_node = tablet_meta_list_[row]; + new_node.meta.mutable_key_range()->set_key_start(row); + new_node.meta.mutable_key_range()->set_key_end(row + '\0'); + new_node.status = WAIT_UPDATE; + UpdateMetaAsync(); + return false; + } + if (node->status != NORMAL) { + VLOG(10) << "abnormal meta for key: " << row; + pending_task_id_list_[row].push_back(task->GetId()); + task->DecRef(); + return false; + } + if ((task->GetInternalError() == kKeyNotInRange || task->GetInternalError() == kConnectError) && + task->GetMetaTimeStamp() >= node->update_time) { + pending_task_id_list_[row].push_back(task->GetId()); + task->DecRef(); + int64_t update_interval = + node->update_time + FLAGS_tera_sdk_update_meta_internal - get_micros() / 1000; + if (update_interval <= 0) { + VLOG(10) << "update meta now for key: " << row; + node->status = WAIT_UPDATE; + UpdateMetaAsync(); } else { - --it; - } - const std::string& end_key = it->second.meta.key_range().key_end(); - if (end_key != "" && end_key <= key) { - return NULL; + VLOG(10) << "update meta in " << update_interval << " (ms) for key:" << row; + node->status = DELAY_UPDATE; + ThreadPool::Task delay_task = + std::bind(&TableImpl::DelayUpdateMeta, this, node->meta.key_range().key_start(), + node->meta.key_range().key_end()); + thread_pool_->DelayTask(update_interval, delay_task); } - return &(it->second); + return false; + } + CHECK_EQ(node->status, NORMAL); + task->SetMetaTimeStamp(node->update_time); + *server_addr = node->meta.server_addr(); + return true; } -void TableImpl::DelayUpdateMeta(std::string start_key, std::string end_key) { - MutexLock lock(&meta_mutex_); - std::map::iterator it = - tablet_meta_list_.lower_bound(start_key); - for (; it != tablet_meta_list_.end(); ++it) { - TabletMetaNode& node = it->second; - if (node.meta.key_range().key_end() > end_key) { - break; - } - if (node.status != DELAY_UPDATE) { - continue; - } - node.status = WAIT_UPDATE; - } - UpdateMetaAsync(); +TableImpl::TabletMetaNode* TableImpl::GetTabletMetaNodeForKey(const std::string& key) { + meta_mutex_.AssertHeld(); + if (tablet_meta_list_.size() == 0) { + VLOG(10) << "the meta list is empty"; + return NULL; + } + std::map::iterator it = tablet_meta_list_.upper_bound(key); + if (it == tablet_meta_list_.begin()) { + return NULL; + } else { + --it; + } + const std::string& end_key = it->second.meta.key_range().key_end(); + if (end_key != "" && end_key <= key) { + return NULL; + } + return &(it->second); +} + +void TableImpl::DelayUpdateMeta(const std::string& start_key, const std::string& end_key) { + MutexLock lock(&meta_mutex_); + std::map::iterator it = tablet_meta_list_.lower_bound(start_key); + for (; it != tablet_meta_list_.end(); ++it) { + TabletMetaNode& node = it->second; + if (node.meta.key_range().key_end() > end_key) { + break; + } + if (node.status != DELAY_UPDATE) { + continue; + } + node.status = WAIT_UPDATE; + } + UpdateMetaAsync(); } void TableImpl::UpdateMetaAsync() { - meta_mutex_.AssertHeld(); - if (meta_updating_count_ >= static_cast(FLAGS_tera_sdk_update_meta_concurrency)) { - return; - } - bool need_update = false; - std::string update_start_key; - std::string update_end_key; - std::string update_expand_end_key; // update more tablet than need - std::map::iterator it = tablet_meta_list_.begin(); - for (; it != tablet_meta_list_.end(); ++it) { - TabletMetaNode& node = it->second; - if (node.status != WAIT_UPDATE && need_update) { - update_expand_end_key = node.meta.key_range().key_start(); - break; - } else if (node.status != WAIT_UPDATE) { - continue; - } else if (!need_update) { - need_update = true; - update_start_key = node.meta.key_range().key_start(); - update_end_key = node.meta.key_range().key_end(); - } else if (node.meta.key_range().key_start() == update_end_key) { - update_end_key = node.meta.key_range().key_end(); - } else { - CHECK_GT(node.meta.key_range().key_start(), update_end_key); - update_expand_end_key = node.meta.key_range().key_start(); - break; - } - node.status = UPDATING; - } - if (!need_update) { - return; + meta_mutex_.AssertHeld(); + if (meta_updating_count_ >= static_cast(FLAGS_tera_sdk_update_meta_concurrency)) { + return; + } + bool need_update = false; + std::string update_start_key; + std::string update_end_key; + std::string update_expand_end_key; // update more tablet than need + std::map::iterator it = tablet_meta_list_.begin(); + for (; it != tablet_meta_list_.end(); ++it) { + TabletMetaNode& node = it->second; + if (node.status != WAIT_UPDATE && need_update) { + update_expand_end_key = node.meta.key_range().key_start(); + break; + } else if (node.status != WAIT_UPDATE) { + continue; + } else if (!need_update) { + need_update = true; + update_start_key = node.meta.key_range().key_start(); + update_end_key = node.meta.key_range().key_end(); + } else if (node.meta.key_range().key_start() == update_end_key) { + update_end_key = node.meta.key_range().key_end(); + } else { + CHECK_GT(node.meta.key_range().key_start(), update_end_key); + update_expand_end_key = node.meta.key_range().key_start(); + break; } - meta_updating_count_++; - ScanMetaTableAsync(update_start_key, update_end_key, update_expand_end_key, false); + node.status = UPDATING; + } + if (!need_update) { + return; + } + meta_updating_count_++; + ScanMetaTableAsync(update_start_key, update_end_key, update_expand_end_key, false); } -void TableImpl::ScanMetaTable(const std::string& key_start, - const std::string& key_end) { - MutexLock lock(&meta_mutex_); - meta_updating_count_++; - ScanMetaTableAsync(key_start, key_end, key_end, false); - while (meta_updating_count_ > 0) { - meta_cond_.Wait(); - } +void TableImpl::ScanMetaTable(const std::string& key_start, const std::string& key_end) { + MutexLock lock(&meta_mutex_); + meta_updating_count_++; + ScanMetaTableAsync(key_start, key_end, key_end, false); + while (meta_updating_count_ > 0) { + meta_cond_.Wait(); + } } -void TableImpl::ScanMetaTableAsyncInLock(std::string key_start, std::string key_end, - std::string expand_key_end, bool zk_access) { - MutexLock lock(&meta_mutex_); - ScanMetaTableAsync(key_start, key_end, expand_key_end, zk_access); +void TableImpl::ScanMetaTableAsyncInLock(const std::string& key_start, const std::string& key_end, + const std::string& expand_key_end, bool zk_access) { + MutexLock lock(&meta_mutex_); + ScanMetaTableAsync(key_start, key_end, expand_key_end, zk_access); } void TableImpl::ScanMetaTableAsync(const std::string& key_start, const std::string& key_end, const std::string& expand_key_end, bool zk_access) { - meta_mutex_.AssertHeld(); - CHECK(expand_key_end == "" || expand_key_end >= key_end); - - std::string meta_addr = cluster_->RootTableAddr(zk_access); - if (meta_addr.empty() && !zk_access) { - meta_addr = cluster_->RootTableAddr(true); - } - - if (meta_addr.empty()) { - VLOG(6) << "root is empty"; - - ThreadPool::Task retry_task = - std::bind(&TableImpl::ScanMetaTableAsyncInLock, this, key_start, key_end, - expand_key_end, true); - thread_pool_->DelayTask(FLAGS_tera_sdk_update_meta_internal, retry_task); - return; - } - - VLOG(6) << "root: " << meta_addr; - tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, meta_addr); - ScanTabletRequest* request = new ScanTabletRequest; - ScanTabletResponse* response = new ScanTabletResponse; - request->set_sequence_id(last_sequence_id_++); - request->set_table_name(FLAGS_tera_master_meta_table_name); - MetaTableScanRange(name_, key_start, expand_key_end, - request->mutable_start(), - request->mutable_end()); - request->set_buffer_limit(FLAGS_tera_sdk_update_meta_buffer_limit); - request->set_round_down(true); - - std::function done = - std::bind(&TableImpl::ScanMetaTableCallBackWrapper, std::weak_ptr(shared_from_this()), - key_start, key_end, expand_key_end, get_micros(), _1, _2, _3, _4); - tabletnode_client_async.ScanTablet(request, response, done); + meta_mutex_.AssertHeld(); + CHECK(expand_key_end == "" || expand_key_end >= key_end); + + std::string meta_addr = cluster_->RootTableAddr(zk_access); + if (meta_addr.empty() && !zk_access) { + meta_addr = cluster_->RootTableAddr(true); + } + + if (meta_addr.empty()) { + VLOG(6) << "root is empty"; + + ThreadPool::Task retry_task = std::bind(&TableImpl::ScanMetaTableAsyncInLock, this, key_start, + key_end, expand_key_end, true); + thread_pool_->DelayTask(FLAGS_tera_sdk_update_meta_internal, retry_task); + return; + } + + VLOG(6) << "root: " << meta_addr; + tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, meta_addr); + ScanTabletRequest* request = new ScanTabletRequest; + ScanTabletResponse* response = new ScanTabletResponse; + request->set_sequence_id(last_sequence_id_++); + request->set_table_name(FLAGS_tera_master_meta_table_name); + MetaTableScanRange(name_, key_start, expand_key_end, request->mutable_start(), + request->mutable_end()); + request->set_buffer_limit(FLAGS_tera_sdk_update_meta_buffer_limit); + request->set_round_down(true); + + access_builder_->BuildInternalGroupRequest(request); + + std::function done = std::bind( + &TableImpl::ScanMetaTableCallBackWrapper, std::weak_ptr(shared_from_this()), + key_start, key_end, expand_key_end, get_micros(), _1, _2, _3, _4); + tabletnode_client_async.ScanTablet(request, response, done); } void TableImpl::ScanMetaTableCallBackWrapper(std::weak_ptr weak_ptr_table, - std::string key_start, - std::string key_end, - std::string expand_key_end, - int64_t start_time, + std::string key_start, std::string key_end, + std::string expand_key_end, int64_t start_time, ScanTabletRequest* request, - ScanTabletResponse* response, - bool failed, int error_code) { - auto table = weak_ptr_table.lock(); - if (!table) { - return; - } - table->ScanMetaTableCallBack(key_start, key_end, expand_key_end, - start_time, request, response, failed, error_code); -} - -void TableImpl::ScanMetaTableCallBack(std::string key_start, - std::string key_end, - std::string expand_key_end, - int64_t start_time, - ScanTabletRequest* request, - ScanTabletResponse* response, + ScanTabletResponse* response, bool failed, + int error_code) { + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->ScanMetaTableCallBack(key_start, key_end, expand_key_end, start_time, request, response, + failed, error_code); +} + +void TableImpl::ScanMetaTableCallBack(std::string key_start, std::string key_end, + std::string expand_key_end, int64_t start_time, + ScanTabletRequest* request, ScanTabletResponse* response, bool failed, int error_code) { - perf_counter_.get_meta.Add(get_micros() - start_time); - perf_counter_.get_meta_cnt.Inc(); - if (failed) { - if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { - response->set_status(kServerError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || - error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { - response->set_status(kClientError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || - error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { - response->set_status(kConnectError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { - response->set_status(kRPCTimeout); - } else { - response->set_status(kRPCError); - } + perf_counter_.get_meta.Add(get_micros() - start_time); + perf_counter_.get_meta_cnt.Inc(); + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); + } else { + response->set_status(kRPCError); } + } - StatusCode err = response->status(); - if (err != kTabletNodeOk) { - VLOG(10) << "fail to scan meta table [" << request->start() - << ", " << request->end() << "]: " << StatusCodeToString(err); - { - MutexLock lock(&meta_mutex_); - GiveupUpdateTabletMeta(key_start, key_end); - } - ThreadPool::Task retry_task = - std::bind(&TableImpl::ScanMetaTableAsyncInLock, this, key_start, key_end, - expand_key_end, true); - thread_pool_->DelayTask(FLAGS_tera_sdk_update_meta_internal, retry_task); - delete request; - delete response; - return; + StatusCode err = response->status(); + if (err != kTabletNodeOk) { + VLOG(10) << "fail to scan meta table [" << request->start() << ", " << request->end() + << "]: " << StatusCodeToString(err); + { + MutexLock lock(&meta_mutex_); + GiveupUpdateTabletMeta(key_start, key_end); } + ThreadPool::Task retry_task = std::bind(&TableImpl::ScanMetaTableAsyncInLock, this, key_start, + key_end, expand_key_end, true); + thread_pool_->DelayTask(FLAGS_tera_sdk_update_meta_internal, retry_task); + delete request; + delete response; + return; + } - std::string return_start, return_end; - const RowResult& scan_result = response->results(); - for (int32_t i = 0; i < scan_result.key_values_size(); i++) { - const KeyValuePair& kv = scan_result.key_values(i); - - TabletMeta meta; - ParseMetaTableKeyValue(kv.key(), kv.value(), &meta); + std::string return_start, return_end; + const RowResult& scan_result = response->results(); + for (int32_t i = 0; i < scan_result.key_values_size(); i++) { + const KeyValuePair& kv = scan_result.key_values(i); - if (i == 0) { - return_start = meta.key_range().key_start(); - } - if (i == scan_result.key_values_size() - 1) { - return_end = meta.key_range().key_end(); - } + TabletMeta meta; + ParseMetaTableKeyValue(kv.key(), kv.value(), &meta); - MutexLock lock(&meta_mutex_); - UpdateTabletMetaList(meta); + if (i == 0) { + return_start = meta.key_range().key_start(); } - VLOG(10) << "scan meta table [" << request->start() - << ", " << request->end() << "] success: return " - << scan_result.key_values_size() << " records, is_complete: " << response->complete(); - bool scan_meta_error = false; - if (scan_result.key_values_size() == 0 - || return_start > key_start - || (response->complete() && !return_end.empty() && (key_end.empty() || return_end < key_end))) { - LOG(ERROR) << "scan meta table [" << key_start << ", " << key_end - << "] return [" << return_start << ", " << return_end << "]"; - // TODO(lk): process omitted tablets - scan_meta_error = true; + if (i == scan_result.key_values_size() - 1) { + return_end = meta.key_range().key_end(); } MutexLock lock(&meta_mutex_); - if (scan_meta_error) { - ScanMetaTableAsync(key_start, key_end, expand_key_end, false); - } else if (!return_end.empty() && (key_end.empty() || return_end < key_end)) { - CHECK(!response->complete()); - ScanMetaTableAsync(return_end, key_end, expand_key_end, false); - } else { - meta_updating_count_--; - meta_cond_.Signal(); - UpdateMetaAsync(); + UpdateTabletMetaList(meta); + } + VLOG(10) << "scan meta table [" << request->start() << ", " << request->end() + << "] success: return " << scan_result.key_values_size() + << " records, is_complete: " << response->complete(); + bool scan_meta_error = false; + if (scan_result.key_values_size() == 0 || return_start > key_start || + (response->complete() && !return_end.empty() && (key_end.empty() || return_end < key_end))) { + LOG(ERROR) << "scan meta table [" << key_start << ", " << key_end << "] return [" + << return_start << ", " << return_end << "]"; + // TODO(lk): process omitted tablets + scan_meta_error = true; + } + + MutexLock lock(&meta_mutex_); + if (scan_meta_error) { + ScanMetaTableAsync(key_start, key_end, expand_key_end, false); + } else if (!return_end.empty() && (key_end.empty() || return_end < key_end)) { + CHECK(!response->complete()); + ScanMetaTableAsync(return_end, key_end, expand_key_end, false); + } else { + meta_updating_count_--; + meta_cond_.Signal(); + UpdateMetaAsync(); + } + delete request; + delete response; +} + +void TableImpl::GiveupUpdateTabletMeta(const std::string& key_start, const std::string& key_end) { + std::map >::iterator ilist = + pending_task_id_list_.lower_bound(key_start); + while (ilist != pending_task_id_list_.end()) { + if (!key_end.empty() && ilist->first >= key_end) { + break; + } + std::list& task_id_list = ilist->second; + for (std::list::iterator itask = task_id_list.begin(); itask != task_id_list.end();) { + int64_t task_id = *itask; + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << "task " << task_id << " timeout when update meta fail"; + itask = task_id_list.erase(itask); + } else { + task->DecRef(); + } + ++itask; } - delete request; - delete response; -} - -void TableImpl::GiveupUpdateTabletMeta(const std::string& key_start, - const std::string& key_end) { - std::map >::iterator ilist = - pending_task_id_list_.lower_bound(key_start); - while (ilist != pending_task_id_list_.end()) { - if (!key_end.empty() && ilist->first >= key_end) { - break; - } - std::list& task_id_list = ilist->second; - for (std::list::iterator itask = task_id_list.begin(); - itask != task_id_list.end();) { - int64_t task_id = *itask; - SdkTask* task = task_pool_.GetTask(task_id); - if (task == NULL) { - VLOG(10) << "task " << task_id << " timeout when update meta fail"; - itask = task_id_list.erase(itask); - } else { - task->DecRef(); - } - ++itask; - } - if (task_id_list.empty()) { - pending_task_id_list_.erase(ilist++); - } else { - ++ilist; - } + if (task_id_list.empty()) { + pending_task_id_list_.erase(ilist++); + } else { + ++ilist; } + } } void TableImpl::UpdateTabletMetaList(const TabletMeta& new_meta) { - meta_mutex_.AssertHeld(); - const std::string& new_start = new_meta.key_range().key_start(); - const std::string& new_end = new_meta.key_range().key_end(); - std::map::iterator it = - tablet_meta_list_.upper_bound(new_start); - if (tablet_meta_list_.size() > 0 && it != tablet_meta_list_.begin()) { - --it; - } - while (it != tablet_meta_list_.end()) { - TabletMetaNode& old_node = it->second; - std::map::iterator tmp = it; - ++it; - - const std::string& old_start = old_node.meta.key_range().key_start(); - const std::string& old_end = old_node.meta.key_range().key_end(); - // update overlaped old nodes - if (old_start < new_start) { - if (!old_end.empty() && old_end <= new_start) { - //************************************************* - //* |---old---| * - //* |------new------| * - //************************************************* - } else if (new_end.empty() || (!old_end.empty() && old_end <= new_end)) { - //************************************************* - //* |---old---| * - //* |------new------| * - //************************************************* - VLOG(10) << "meta [" << old_start << ", " << old_end << "] " - << "shrink to [" << old_start << ", " << new_start << "]"; - old_node.meta.mutable_key_range()->set_key_end(new_start); - } else { - //************************************************* - //* |----------old-----------| * - //* |------new------| * - //************************************************* - VLOG(10) << "meta [" << old_start << ", " << old_end << "] " - << "split to [" << old_start << ", " << new_start << "] " - << "and [" << new_end << ", " << old_end << "]"; - TabletMetaNode& copy_node = tablet_meta_list_[new_end]; - copy_node = old_node; - copy_node.meta.mutable_key_range()->set_key_start(new_end); - old_node.meta.mutable_key_range()->set_key_end(new_start); - } - } else if (new_end.empty() || old_start < new_end) { - if (new_end.empty() || (!old_end.empty() && old_end <= new_end)) { - //************************************************* - //* |---old---| * - //* |------new------| * - //************************************************* - VLOG(10) << "meta [" << old_start << ", " << old_end << "] " - << "is covered by [" << new_start << ", " << new_end << "]"; - tablet_meta_list_.erase(tmp); - } else { - //************************************************* - //* |-----old------| * - //* |------new------| * - //************************************************* - VLOG(10) << "meta [" << old_start << ", " << old_end << "] " - << "shrink to [" << new_end << ", " << old_end << "]"; - TabletMetaNode& copy_node = tablet_meta_list_[new_end]; - copy_node = old_node; - copy_node.meta.mutable_key_range()->set_key_start(new_end); - tablet_meta_list_.erase(tmp); - } - } else { // !new_end.empty() && old_start >= new_end - //***************************************************** - //* |---old---| * - //* |------new------| * - //***************************************************** - break; - } - } - - TabletMetaNode& new_node = tablet_meta_list_[new_start]; - new_node.meta.CopyFrom(new_meta); - new_node.status = NORMAL; - new_node.update_time = get_micros() / 1000; - VLOG(10) << "add new meta [" << new_start << ", " << new_end << "]: " - << new_meta.server_addr(); - WakeUpPendingRequest(new_node); + meta_mutex_.AssertHeld(); + const std::string& new_start = new_meta.key_range().key_start(); + const std::string& new_end = new_meta.key_range().key_end(); + std::map::iterator it = tablet_meta_list_.upper_bound(new_start); + if (tablet_meta_list_.size() > 0 && it != tablet_meta_list_.begin()) { + --it; + } + while (it != tablet_meta_list_.end()) { + TabletMetaNode& old_node = it->second; + std::map::iterator tmp = it; + ++it; + + const std::string& old_start = old_node.meta.key_range().key_start(); + const std::string& old_end = old_node.meta.key_range().key_end(); + // update overlaped old nodes + if (old_start < new_start) { + if (!old_end.empty() && old_end <= new_start) { + //************************************************* + //* |---old---| * + //* |------new------| * + //************************************************* + } else if (new_end.empty() || (!old_end.empty() && old_end <= new_end)) { + //************************************************* + //* |---old---| * + //* |------new------| * + //************************************************* + VLOG(10) << "meta [" << old_start << ", " << old_end << "] " + << "shrink to [" << old_start << ", " << new_start << "]"; + old_node.meta.mutable_key_range()->set_key_end(new_start); + } else { + //************************************************* + //* |----------old-----------| * + //* |------new------| * + //************************************************* + VLOG(10) << "meta [" << old_start << ", " << old_end << "] " + << "split to [" << old_start << ", " << new_start << "] " + << "and [" << new_end << ", " << old_end << "]"; + TabletMetaNode& copy_node = tablet_meta_list_[new_end]; + copy_node = old_node; + copy_node.meta.mutable_key_range()->set_key_start(new_end); + old_node.meta.mutable_key_range()->set_key_end(new_start); + } + } else if (new_end.empty() || old_start < new_end) { + if (new_end.empty() || (!old_end.empty() && old_end <= new_end)) { + //************************************************* + //* |---old---| * + //* |------new------| * + //************************************************* + VLOG(10) << "meta [" << old_start << ", " << old_end << "] " + << "is covered by [" << new_start << ", " << new_end << "]"; + tablet_meta_list_.erase(tmp); + } else { + //************************************************* + //* |-----old------| * + //* |------new------| * + //************************************************* + VLOG(10) << "meta [" << old_start << ", " << old_end << "] " + << "shrink to [" << new_end << ", " << old_end << "]"; + TabletMetaNode& copy_node = tablet_meta_list_[new_end]; + copy_node = old_node; + copy_node.meta.mutable_key_range()->set_key_start(new_end); + tablet_meta_list_.erase(tmp); + } + } else { // !new_end.empty() && old_start >= new_end + //***************************************************** + //* |---old---| * + //* |------new------| * + //***************************************************** + break; + } + } + + TabletMetaNode& new_node = tablet_meta_list_[new_start]; + new_node.meta.CopyFrom(new_meta); + new_node.status = NORMAL; + new_node.update_time = get_micros() / 1000; + VLOG(10) << "add new meta [" << new_start << ", " << new_end << "]: " << new_meta.server_addr(); + WakeUpPendingRequest(new_node); } void TableImpl::WakeUpPendingRequest(const TabletMetaNode& node) { - meta_mutex_.AssertHeld(); - const std::string& start_key = node.meta.key_range().key_start(); - const std::string& end_key = node.meta.key_range().key_end(); - const std::string& server_addr = node.meta.server_addr(); - int64_t meta_timestamp = node.update_time; - - std::vector mutation_list; - std::vector reader_list; - - std::map >::iterator it = - pending_task_id_list_.lower_bound(start_key); - while (it != pending_task_id_list_.end()) { - if (!end_key.empty() && it->first >= end_key) { - break; - } - std::list& task_id_list = it->second; - for (std::list::iterator itask = task_id_list.begin(); - itask != task_id_list.end(); ++itask) { - perf_counter_.meta_update_cnt.Inc(); - int64_t task_id = *itask; - SdkTask* task = task_pool_.GetTask(task_id); - if (task == NULL) { - VLOG(10) << "task " << task_id << " timeout when update meta success"; - continue; - } - task->SetMetaTimeStamp(meta_timestamp); - - switch (task->Type()) { - case SdkTask::READ: { - reader_list.push_back(task); - } break; - case SdkTask::MUTATION: { - mutation_list.push_back(task); - } break; - case SdkTask::SCAN: { - ScanTask* scan_task = (ScanTask*)task; - CommitScan(scan_task, server_addr); - } break; - default: - CHECK(false); - break; - } - } - std::map >::iterator tmp = it; - ++it; - pending_task_id_list_.erase(tmp); - } - - if (mutation_list.size() > 0) { - PackSdkTasks(server_addr, mutation_list, SdkTask::MUTATION); - } - if (reader_list.size() > 0) { - PackSdkTasks(server_addr, reader_list, SdkTask::READ); - } -} - -void TableImpl::ScheduleUpdateMeta(const std::string& row, - int64_t meta_timestamp) { - MutexLock lock(&meta_mutex_); - TabletMetaNode* node = GetTabletMetaNodeForKey(row); - if (node == NULL) { - TabletMetaNode& new_node = tablet_meta_list_[row]; - new_node.meta.mutable_key_range()->set_key_start(row); - new_node.meta.mutable_key_range()->set_key_end(row + '\0'); - new_node.status = WAIT_UPDATE; - UpdateMetaAsync(); - return; - } - if (node->status == NORMAL && meta_timestamp >= node->update_time) { - int64_t update_interval = node->update_time - + FLAGS_tera_sdk_update_meta_internal - get_micros() / 1000; - if (update_interval <= 0) { - node->status = WAIT_UPDATE; - UpdateMetaAsync(); - } else { - node->status = DELAY_UPDATE; - ThreadPool::Task delay_task = - std::bind(&TableImpl::DelayUpdateMeta, this, - node->meta.key_range().key_start(), - node->meta.key_range().key_end()); - thread_pool_->DelayTask(update_interval, delay_task); - } + meta_mutex_.AssertHeld(); + const std::string& start_key = node.meta.key_range().key_start(); + const std::string& end_key = node.meta.key_range().key_end(); + const std::string& server_addr = node.meta.server_addr(); + int64_t meta_timestamp = node.update_time; + + std::vector mutation_list; + std::vector reader_list; + std::vector batch_mutation_list; + + std::map >::iterator it = + pending_task_id_list_.lower_bound(start_key); + while (it != pending_task_id_list_.end()) { + if (!end_key.empty() && it->first >= end_key) { + break; + } + std::list& task_id_list = it->second; + for (std::list::iterator itask = task_id_list.begin(); itask != task_id_list.end(); + ++itask) { + perf_counter_.meta_update_cnt.Inc(); + int64_t task_id = *itask; + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << "task " << task_id << " timeout when update meta success"; + continue; + } + task->SetMetaTimeStamp(meta_timestamp); + + switch (task->Type()) { + case SdkTask::READ: { + reader_list.push_back(task); + } break; + case SdkTask::MUTATION: { + mutation_list.push_back(task); + } break; + case SdkTask::BATCH_MUTATION: { + batch_mutation_list.push_back(task); + } break; + case SdkTask::SCAN: { + ScanTask* scan_task = (ScanTask*)task; + CommitScan(scan_task, server_addr); + } break; + default: + CHECK(false); + break; + } + } + std::map >::iterator tmp = it; + ++it; + pending_task_id_list_.erase(tmp); + } + + if (mutation_list.size() > 0) { + PackSdkTasks(server_addr, mutation_list, SdkTask::MUTATION); + } + if (reader_list.size() > 0) { + PackSdkTasks(server_addr, reader_list, SdkTask::READ); + } + if (batch_mutation_list.size() > 0) { + PackSdkTasks(server_addr, batch_mutation_list, SdkTask::BATCH_MUTATION); + } +} + +void TableImpl::ScheduleUpdateMeta(const std::string& row, int64_t meta_timestamp) { + MutexLock lock(&meta_mutex_); + TabletMetaNode* node = GetTabletMetaNodeForKey(row); + if (node == NULL) { + TabletMetaNode& new_node = tablet_meta_list_[row]; + new_node.meta.mutable_key_range()->set_key_start(row); + new_node.meta.mutable_key_range()->set_key_end(row + '\0'); + new_node.status = WAIT_UPDATE; + UpdateMetaAsync(); + return; + } + if (node->status == NORMAL && meta_timestamp >= node->update_time) { + int64_t update_interval = + node->update_time + FLAGS_tera_sdk_update_meta_internal - get_micros() / 1000; + if (update_interval <= 0) { + node->status = WAIT_UPDATE; + UpdateMetaAsync(); + } else { + node->status = DELAY_UPDATE; + ThreadPool::Task delay_task = + std::bind(&TableImpl::DelayUpdateMeta, this, node->meta.key_range().key_start(), + node->meta.key_range().key_end()); + thread_pool_->DelayTask(update_interval, delay_task); } + } } bool TableImpl::UpdateTableMeta(ErrorCode* err) { - MutexLock lock(&table_meta_mutex_); - table_meta_updating_ = true; + MutexLock lock(&table_meta_mutex_); + table_meta_updating_ = true; - table_meta_mutex_.Unlock(); - ReadTableMetaAsync(err, 0, false); - table_meta_mutex_.Lock(); + table_meta_mutex_.Unlock(); + ReadTableMetaAsync(err, 0, false); + table_meta_mutex_.Lock(); - while (table_meta_updating_) { - table_meta_cond_.Wait(); - } - if (err->GetType() != ErrorCode::kOK) { - return false; - } - return true; + while (table_meta_updating_) { + table_meta_cond_.Wait(); + } + if (err->GetType() != ErrorCode::kOK) { + return false; + } + return true; } -void TableImpl::ReadTableMetaAsync(ErrorCode* ret_err, int32_t retry_times, - bool zk_access) { - std::string meta_server = cluster_->RootTableAddr(zk_access); - if (meta_server.empty() && !zk_access) { - meta_server = cluster_->RootTableAddr(true); - } - if (meta_server.empty()) { - VLOG(10) << "root is empty"; - - MutexLock lock(&table_meta_mutex_); - CHECK(table_meta_updating_); - if (retry_times >= FLAGS_tera_sdk_retry_times) { - ret_err->SetFailed(ErrorCode::kSystem); - table_meta_updating_ = false; - table_meta_cond_.Signal(); - } else { - int64_t retry_interval = - static_cast(pow(FLAGS_tera_sdk_delay_send_internal, retry_times) * 1000); - ThreadPool::Task retry_task = - std::bind(&TableImpl::ReadTableMetaAsync, this, ret_err, retry_times + 1, true); - thread_pool_->DelayTask(retry_interval, retry_task); - } - return; +void TableImpl::ReadTableMetaAsync(ErrorCode* ret_err, int32_t retry_times, bool zk_access) { + std::string meta_server = cluster_->RootTableAddr(zk_access); + if (meta_server.empty() && !zk_access) { + meta_server = cluster_->RootTableAddr(true); + } + if (meta_server.empty()) { + VLOG(10) << "root is empty"; + + MutexLock lock(&table_meta_mutex_); + CHECK(table_meta_updating_); + if (retry_times >= FLAGS_tera_sdk_meta_read_retry_times) { + ret_err->SetFailed(ErrorCode::kSystem); + table_meta_updating_ = false; + table_meta_cond_.Signal(); + } else { + int64_t retry_interval = + static_cast(pow(FLAGS_tera_sdk_delay_send_internal, retry_times) * 1000); + ThreadPool::Task retry_task = + std::bind(&TableImpl::ReadTableMetaAsync, this, ret_err, retry_times + 1, true); + thread_pool_->DelayTask(retry_interval, retry_task); } + return; + } + + tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, meta_server); + ReadTabletRequest* request = new ReadTabletRequest; + ReadTabletResponse* response = new ReadTabletResponse; + request->set_sequence_id(last_sequence_id_++); + request->set_tablet_name(FLAGS_tera_master_meta_table_name); + RowReaderInfo* row_info = request->add_row_info_list(); + MakeMetaTableKey(name_, row_info->mutable_key()); - tabletnode::TabletNodeClient tabletnode_client_async(thread_pool_, meta_server); - ReadTabletRequest* request = new ReadTabletRequest; - ReadTabletResponse* response = new ReadTabletResponse; - request->set_sequence_id(last_sequence_id_++); - request->set_tablet_name(FLAGS_tera_master_meta_table_name); - RowReaderInfo* row_info = request->add_row_info_list(); - MakeMetaTableKey(name_, row_info->mutable_key()); + access_builder_->BuildInternalGroupRequest(request); - std::function done = - std::bind(&TableImpl::ReadTableMetaCallBackWrapper, std::weak_ptr(shared_from_this()), - ret_err, retry_times, _1, _2, _3, _4); - tabletnode_client_async.ReadTablet(request, response, done); + std::function done = + std::bind(&TableImpl::ReadTableMetaCallBackWrapper, + std::weak_ptr(shared_from_this()), ret_err, retry_times, _1, _2, _3, _4); + tabletnode_client_async.ReadTablet(request, response, done); } void TableImpl::ReadTableMetaCallBackWrapper(std::weak_ptr weak_ptr_table, ErrorCode* ret_err, int32_t retry_times, ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code) { - auto table = weak_ptr_table.lock(); - if (!table) { - return; - } - table->ReadTableMetaCallBack(ret_err, retry_times, request, response, failed, error_code); + ReadTabletResponse* response, bool failed, + int error_code) { + auto table = weak_ptr_table.lock(); + if (!table) { + return; + } + table->ReadTableMetaCallBack(ret_err, retry_times, request, response, failed, error_code); } -void TableImpl::ReadTableMetaCallBack(ErrorCode* ret_err, - int32_t retry_times, - ReadTabletRequest* request, - ReadTabletResponse* response, +void TableImpl::ReadTableMetaCallBack(ErrorCode* ret_err, int32_t retry_times, + ReadTabletRequest* request, ReadTabletResponse* response, bool failed, int error_code) { - if (failed) { - if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || - error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { - response->set_status(kServerError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || - error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { - response->set_status(kClientError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || - error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { - response->set_status(kConnectError); - } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { - response->set_status(kRPCTimeout); - } else { - response->set_status(kRPCError); - } - } - - StatusCode err = response->status(); - if (err == kTabletNodeOk && response->detail().status_size() < 1) { - err = kKeyNotExist; - LOG(ERROR) << "read table meta: status size is 0"; - } - if (err == kTabletNodeOk) { - err = response->detail().status(0); - } - if (err == kTabletNodeOk && response->detail().row_result_size() < 1) { - err = kKeyNotExist; - LOG(ERROR) << "read table meta: row result size is 0"; - } - if (err == kTabletNodeOk && response->detail().row_result(0).key_values_size() < 1) { - err = kKeyNotExist; - LOG(ERROR) << "read table meta: row result kv size is 0"; - } - - if (err != kTabletNodeOk && err != kKeyNotExist && err != kSnapshotNotExist) { - VLOG(10) << "fail to read meta table, retry: " << retry_times - << ", errcode: " << StatusCodeToString(err); - } - - MutexLock lock(&table_meta_mutex_); - CHECK(table_meta_updating_); - - if (err == kTabletNodeOk) { - TableMeta table_meta; - const KeyValuePair& kv = response->detail().row_result(0).key_values(0); - ParseMetaTableKeyValue(kv.key(), kv.value(), &table_meta); - table_schema_.CopyFrom(table_meta.schema()); - create_time_ = table_meta.create_time(); - ret_err->SetFailed(ErrorCode::kOK); - table_meta_updating_ = false; - table_meta_cond_.Signal(); - } else if (err == kKeyNotExist || err == kSnapshotNotExist) { - ret_err->SetFailed(ErrorCode::kNotFound); - table_meta_updating_ = false; - table_meta_cond_.Signal(); - } else if (retry_times >= FLAGS_tera_sdk_retry_times) { - ret_err->SetFailed(ErrorCode::kSystem); - table_meta_updating_ = false; - table_meta_cond_.Signal(); + if (failed) { + if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNREACHABLE || + error_code == sofa::pbrpc::RPC_ERROR_SERVER_UNAVAILABLE) { + response->set_status(kServerError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_CANCELED || + error_code == sofa::pbrpc::RPC_ERROR_SEND_BUFFER_FULL) { + response->set_status(kClientError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_CONNECTION_CLOSED || + error_code == sofa::pbrpc::RPC_ERROR_RESOLVE_ADDRESS) { + response->set_status(kConnectError); + } else if (error_code == sofa::pbrpc::RPC_ERROR_REQUEST_TIMEOUT) { + response->set_status(kRPCTimeout); } else { - int64_t retry_interval = - static_cast(pow(FLAGS_tera_sdk_delay_send_internal, retry_times) * 1000); - ThreadPool::Task retry_task = - std::bind(&TableImpl::ReadTableMetaAsync, this, ret_err, retry_times + 1, true); - thread_pool_->DelayTask(retry_interval, retry_task); - } - - delete request; - delete response; + response->set_status(kRPCError); + } + } + + StatusCode err = response->status(); + if (err == kTabletNodeOk && response->detail().status_size() < 1) { + err = kKeyNotExist; + LOG(ERROR) << "read table meta: status size is 0"; + } + if (err == kTabletNodeOk) { + err = response->detail().status(0); + } + if (err == kTabletNodeOk && response->detail().row_result_size() < 1) { + err = kKeyNotExist; + LOG(ERROR) << "read table meta: row result size is 0"; + } + if (err == kTabletNodeOk && response->detail().row_result(0).key_values_size() < 1) { + err = kKeyNotExist; + LOG(ERROR) << "read table meta: row result kv size is 0"; + } + + if (err != kTabletNodeOk && err != kKeyNotExist && err != kSnapshotNotExist) { + VLOG(10) << "fail to read meta table, retry: " << retry_times + << ", errcode: " << StatusCodeToString(err); + } + + MutexLock lock(&table_meta_mutex_); + CHECK(table_meta_updating_); + + if (err == kTabletNodeOk) { + TableMeta table_meta; + const KeyValuePair& kv = response->detail().row_result(0).key_values(0); + ParseMetaTableKeyValue(kv.key(), kv.value(), &table_meta); + table_schema_.CopyFrom(table_meta.schema()); + + if (table_schema_.has_enable_hash() && table_schema_.enable_hash()) { + is_hash_table_ = true; + } else { + is_hash_table_ = false; + } + + create_time_ = table_meta.create_time(); + ret_err->SetFailed(ErrorCode::kOK); + table_meta_updating_ = false; + table_meta_cond_.Signal(); + } else if (err == kKeyNotExist || err == kSnapshotNotExist) { + ret_err->SetFailed(ErrorCode::kNotFound); + table_meta_updating_ = false; + table_meta_cond_.Signal(); + } else if (retry_times >= FLAGS_tera_sdk_meta_read_retry_times) { + ret_err->SetFailed(ErrorCode::kSystem); + table_meta_updating_ = false; + table_meta_cond_.Signal(); + } else { + int64_t retry_interval = + static_cast(pow(FLAGS_tera_sdk_delay_send_internal, retry_times) * 1000); + ThreadPool::Task retry_task = + std::bind(&TableImpl::ReadTableMetaAsync, this, ret_err, retry_times + 1, true); + thread_pool_->DelayTask(retry_interval, retry_task); + } + + delete request; + delete response; } bool TableImpl::RestoreCookie() { - const std::string& cookie_dir = FLAGS_tera_sdk_cookie_path; - if (!IsExist(cookie_dir)) { - if (!CreateDirWithRetry(cookie_dir)) { - LOG(INFO) << "[SDK COOKIE] fail to create cookie dir: " << cookie_dir; - return false; - } else { - return true; - } - } - SdkCookie cookie; - std::string cookie_file = GetCookieFilePathName(); - if (!::tera::sdk::RestoreCookie(cookie_file, true, &cookie)) { - return true; - } - if (cookie.table_name() != name_) { - LOG(INFO) << "[SDK COOKIE] cookie name error: " << cookie.table_name() - << ", should be: " << name_; - return true; - } - - MutexLock lock(&meta_mutex_); - for (int i = 0; i < cookie.tablets_size(); ++i) { - const TabletMeta& meta = cookie.tablets(i).meta(); - const std::string& start_key = meta.key_range().key_start(); - LOG(INFO) << "[SDK COOKIE] restore:" << meta.path() - << " range [" << DebugString(start_key) - << " : " << DebugString(meta.key_range().key_end()) << "]"; - TabletMetaNode& node = tablet_meta_list_[start_key]; - node.meta = meta; - node.update_time = cookie.tablets(i).update_time(); - node.status = NORMAL; + const std::string& cookie_dir = FLAGS_tera_sdk_cookie_path; + if (!IsExist(cookie_dir)) { + if (!CreateDirWithRetry(cookie_dir)) { + LOG(INFO) << "[SDK COOKIE] fail to create cookie dir: " << cookie_dir; + return false; + } else { + return true; } - LOG(INFO) << "[SDK COOKIE] restore finished, tablet num: " << cookie.tablets_size(); + } + SdkCookie cookie; + std::string cookie_file = GetCookieFilePathName(); + if (!::tera::sdk::RestoreCookie(cookie_file, true, &cookie)) { + return true; + } + if (cookie.table_name() != name_) { + LOG(INFO) << "[SDK COOKIE] cookie name error: " << cookie.table_name() + << ", should be: " << name_; return true; + } + + MutexLock lock(&meta_mutex_); + for (int i = 0; i < cookie.tablets_size(); ++i) { + const TabletMeta& meta = cookie.tablets(i).meta(); + const std::string& start_key = meta.key_range().key_start(); + LOG(INFO) << "[SDK COOKIE] restore:" << meta.path() << " range [" << DebugString(start_key) + << " : " << DebugString(meta.key_range().key_end()) << "]"; + TabletMetaNode& node = tablet_meta_list_[start_key]; + node.meta = meta; + node.update_time = cookie.tablets(i).update_time(); + node.status = NORMAL; + } + LOG(INFO) << "[SDK COOKIE] restore finished, tablet num: " << cookie.tablets_size(); + return true; } std::string TableImpl::GetCookieFilePathName(void) { - return FLAGS_tera_sdk_cookie_path + "/" - + GetCookieFileName(name_, cluster_->ClusterId(), create_time_); + return FLAGS_tera_sdk_cookie_path + "/" + + GetCookieFileName(name_, cluster_->ClusterId(), create_time_); } -std::string TableImpl::GetCookieLockFilePathName(void) { - return GetCookieFilePathName() + ".LOCK"; -} +std::string TableImpl::GetCookieLockFilePathName(void) { return GetCookieFilePathName() + ".LOCK"; } void TableImpl::DoDumpCookie() { - std::string cookie_file = GetCookieFilePathName(); - std::string cookie_lock_file = GetCookieLockFilePathName(); - SdkCookie cookie; - cookie.set_table_name(name_); - { - MutexLock lock(&meta_mutex_); - std::map::iterator it = tablet_meta_list_.begin(); - for (; it != tablet_meta_list_.end(); ++it) { - const TabletMetaNode& node = it->second; - if (!node.meta.has_table_name() || !node.meta.has_path()) { - continue; - } - SdkTabletCookie* tablet = cookie.add_tablets(); - tablet->mutable_meta()->CopyFrom(node.meta); - tablet->set_update_time(node.update_time); - tablet->set_status(node.status); - } - } - if (!IsExist(FLAGS_tera_sdk_cookie_path) && !CreateDirWithRetry(FLAGS_tera_sdk_cookie_path)) { - LOG(ERROR) << "[SDK COOKIE] fail to create cookie dir: " << FLAGS_tera_sdk_cookie_path; - return; - } - ::tera::sdk::DumpCookie(cookie_file, cookie_lock_file, cookie); + std::string cookie_file = GetCookieFilePathName(); + std::string cookie_lock_file = GetCookieLockFilePathName(); + SdkCookie cookie; + cookie.set_table_name(name_); + { + MutexLock lock(&meta_mutex_); + std::map::iterator it = tablet_meta_list_.begin(); + for (; it != tablet_meta_list_.end(); ++it) { + const TabletMetaNode& node = it->second; + if (!node.meta.has_table_name() || !node.meta.has_path()) { + continue; + } + SdkTabletCookie* tablet = cookie.add_tablets(); + tablet->mutable_meta()->CopyFrom(node.meta); + tablet->set_update_time(node.update_time); + tablet->set_status(node.status); + } + } + if (!IsExist(FLAGS_tera_sdk_cookie_path) && !CreateDirWithRetry(FLAGS_tera_sdk_cookie_path)) { + LOG(ERROR) << "[SDK COOKIE] fail to create cookie dir: " << FLAGS_tera_sdk_cookie_path; + return; + } + ::tera::sdk::DumpCookie(cookie_file, cookie_lock_file, cookie); } void TableImpl::DumpCookie() { - DoDumpCookie(); - ThreadPool::Task task = std::bind(&TableImpl::DumpCookie, this); - AddDelayTask(FLAGS_tera_sdk_cookie_update_interval * 1000LL, task); + DoDumpCookie(); + ThreadPool::Task task = std::bind(&TableImpl::DumpCookie, this); + AddDelayTask(FLAGS_tera_sdk_cookie_update_interval * 1000LL, task); } void TableImpl::EnableCookieUpdateTimer() { - ThreadPool::Task task = std::bind(&TableImpl::DumpCookie, this); - AddDelayTask(FLAGS_tera_sdk_cookie_update_interval * 1000LL, task); + ThreadPool::Task task = std::bind(&TableImpl::DumpCookie, this); + AddDelayTask(FLAGS_tera_sdk_cookie_update_interval * 1000LL, task); } std::string TableImpl::GetCookieFileName(const std::string& tablename, - const std::string& cluster_id, - int64_t create_time) { - uint32_t hash = 0; - if (GetHashNumber(cluster_id, hash, &hash) != 0) { - LOG(FATAL) << "invalid arguments"; - } - char hash_str[9] = {'\0'}; - sprintf(hash_str, "%08x", hash); - std::stringstream fname; - fname << tablename << "-" << create_time << "-" << hash_str; - return fname.str(); + const std::string& cluster_id, int64_t create_time) { + uint32_t hash = 0; + if (GetHashNumber(cluster_id, hash, &hash) != 0) { + LOG(FATAL) << "invalid arguments"; + } + char hash_str[9] = {'\0'}; + sprintf(hash_str, "%08x", hash); + std::stringstream fname; + fname << tablename << "-" << create_time << "-" << hash_str; + return fname.str(); } void TableImpl::DumpPerfCounterLogDelay() { - DoDumpPerfCounterLog(); - ThreadPool::Task task = - std::bind(&TableImpl::DumpPerfCounterLogDelay, this); - AddDelayTask(FLAGS_tera_sdk_perf_counter_log_interval * 1000, task); + DoDumpPerfCounterLog(); + ThreadPool::Task task = std::bind(&TableImpl::DumpPerfCounterLogDelay, this); + AddDelayTask(FLAGS_tera_sdk_perf_counter_log_interval * 1000, task); } void TableImpl::DoDumpPerfCounterLog() { - LOG(INFO) << "[table " << name_ << " PerfCounter][pending]" - << " pending_r: " << cur_reader_pending_counter_.Get() - << " pending_w: " << cur_commit_pending_counter_.Get(); - perf_counter_.DoDumpPerfCounterLog("[table " + name_ + " PerfCounter]"); + LOG(INFO) << "[table " << name_ << " PerfCounter][pending]" + << " pending_r: " << cur_reader_pending_counter_.Get() + << " pending_w: " << cur_commit_pending_counter_.Get(); + perf_counter_.DoDumpPerfCounterLog("[table " + name_ + " PerfCounter]"); } void TableImpl::PerfCounter::DoDumpPerfCounterLog(const std::string& log_prefix) { - LOG(INFO) << log_prefix << "[delay](ms)" - << " get meta: " << (get_meta_cnt.Get() > 0 ? get_meta.Clear() / get_meta_cnt.Clear() / 1000 : 0) - << " callback: " << (user_callback_cnt.Get() > 0 ? user_callback.Clear() / user_callback_cnt.Clear() / 1000 : 0) - << " rpc_r: " << (rpc_r_cnt.Get() > 0 ? rpc_r.Clear() / rpc_r_cnt.Clear() / 1000 : 0) - << " rpc_w: " << (rpc_w_cnt.Get() > 0 ? rpc_w.Clear() / rpc_w_cnt.Clear() / 1000 : 0) - << " rpc_s: " << (rpc_s_cnt.Get() > 0 ? rpc_s.Clear() / rpc_s_cnt.Clear() / 1000 : 0); - - LOG(INFO) << log_prefix << "[mutation]" - << " all: " << mutate_cnt.Clear() - << " ok: " << mutate_ok_cnt.Clear() - << " fail: " << mutate_fail_cnt.Clear() - << " range: " << mutate_range_cnt.Clear() - << " timeout: " << mutate_timeout_cnt.Clear() - << " queue_timeout: " << mutate_queue_timeout_cnt.Clear(); - - LOG(INFO) << log_prefix << "[reader]" - << " all: " << reader_cnt.Clear() - << " ok: " << reader_ok_cnt.Clear() - << " fail: " << reader_fail_cnt.Clear() - << " range: " << reader_range_cnt.Clear() - << " timeout: " << reader_timeout_cnt.Clear() - << " queue_timeout: " << reader_queue_timeout_cnt.Clear(); - - LOG(INFO) << log_prefix << "[user_mu]" - << " cnt: " << user_mu_cnt.Clear() - << " suc: " << user_mu_suc.Clear() - << " fail: " << user_mu_fail.Clear(); - LOG(INFO) << log_prefix << "[user_mu_cost]" << std::fixed << std::setprecision(2) - << " cost_ave: " << hist_mu_cost.Average() - << " cost_50: " << hist_mu_cost.Percentile(50) - << " cost_90: " << hist_mu_cost.Percentile(90) - << " cost_99: " << hist_mu_cost.Percentile(99); - hist_mu_cost.Clear(); - - LOG(INFO) << log_prefix << "[user_rd]" - << " cnt: " << user_read_cnt.Clear() - << " suc: " << user_read_suc.Clear() - << " notfound: " << user_read_notfound.Clear() - << " fail: " << user_read_fail.Clear(); - LOG(INFO) << log_prefix << "[user_rd_cost]" << std::fixed << std::setprecision(2) - << " cost_ave: " << hist_read_cost.Average() - << " cost_50: " << hist_read_cost.Percentile(50) - << " cost_90: " << hist_read_cost.Percentile(90) - << " cost_99: " << hist_read_cost.Percentile(99); - hist_read_cost.Clear(); - - LOG(INFO) << log_prefix << "[hist_async_cost]" - << " cost_ave: " << hist_async_cost.Average() - << " cost_50: " << hist_async_cost.Percentile(50) - << " cost_90: " << hist_async_cost.Percentile(90) - << " cost_99: " << hist_async_cost.Percentile(99); - hist_async_cost.Clear(); - - LOG(INFO) << log_prefix << "[total]" - << " meta_sched_cnt: " << meta_sched_cnt.Get() - << " meta_update_cnt: " << meta_update_cnt.Get() - << " total_task_cnt: " << total_task_cnt.Get() - << " total_commit_cnt: " << total_commit_cnt.Get(); + LOG(INFO) << log_prefix << "[delay](ms)" + << " get meta: " + << (get_meta_cnt.Get() > 0 ? get_meta.Clear() / get_meta_cnt.Clear() / 1000 : 0) + << " callback: " << (user_callback_cnt.Get() > 0 + ? user_callback.Clear() / user_callback_cnt.Clear() / 1000 + : 0) + << " rpc_r: " << (rpc_r_cnt.Get() > 0 ? rpc_r.Clear() / rpc_r_cnt.Clear() / 1000 : 0) + << " rpc_w: " << (rpc_w_cnt.Get() > 0 ? rpc_w.Clear() / rpc_w_cnt.Clear() / 1000 : 0) + << " rpc_s: " << (rpc_s_cnt.Get() > 0 ? rpc_s.Clear() / rpc_s_cnt.Clear() / 1000 : 0); + + LOG(INFO) << log_prefix << "[mutation]" + << " all: " << mutate_cnt.Clear() << " ok: " << mutate_ok_cnt.Clear() + << " fail: " << mutate_fail_cnt.Clear() << " range: " << mutate_range_cnt.Clear() + << " timeout: " << mutate_timeout_cnt.Clear() + << " queue_timeout: " << mutate_queue_timeout_cnt.Clear(); + + LOG(INFO) << log_prefix << "[reader]" + << " all: " << reader_cnt.Clear() << " ok: " << reader_ok_cnt.Clear() + << " fail: " << reader_fail_cnt.Clear() << " range: " << reader_range_cnt.Clear() + << " timeout: " << reader_timeout_cnt.Clear() + << " queue_timeout: " << reader_queue_timeout_cnt.Clear(); + + LOG(INFO) << log_prefix << "[user_mu]" + << " cnt: " << user_mu_cnt.Clear() << " suc: " << user_mu_suc.Clear() + << " fail: " << user_mu_fail.Clear(); + LOG(INFO) << log_prefix << "[user_mu_cost]" << std::fixed << std::setprecision(2) + << " cost_ave: " << hist_mu_cost.Average() + << " cost_50: " << hist_mu_cost.Percentile(50) + << " cost_90: " << hist_mu_cost.Percentile(90) + << " cost_99: " << hist_mu_cost.Percentile(99); + hist_mu_cost.Clear(); + + LOG(INFO) << log_prefix << "[user_rd]" + << " cnt: " << user_read_cnt.Clear() << " suc: " << user_read_suc.Clear() + << " notfound: " << user_read_notfound.Clear() << " fail: " << user_read_fail.Clear(); + LOG(INFO) << log_prefix << "[user_rd_cost]" << std::fixed << std::setprecision(2) + << " cost_ave: " << hist_read_cost.Average() + << " cost_50: " << hist_read_cost.Percentile(50) + << " cost_90: " << hist_read_cost.Percentile(90) + << " cost_99: " << hist_read_cost.Percentile(99); + hist_read_cost.Clear(); + + LOG(INFO) << log_prefix << "[hist_async_cost]" + << " cost_ave: " << hist_async_cost.Average() + << " cost_50: " << hist_async_cost.Percentile(50) + << " cost_90: " << hist_async_cost.Percentile(90) + << " cost_99: " << hist_async_cost.Percentile(99); + hist_async_cost.Clear(); + + LOG(INFO) << log_prefix << "[total]" + << " meta_sched_cnt: " << meta_sched_cnt.Get() + << " meta_update_cnt: " << meta_update_cnt.Get() + << " total_task_cnt: " << total_task_cnt.Get() + << " total_commit_cnt: " << total_commit_cnt.Get(); } void TableImpl::DelayTaskWrapper(ThreadPool::Task task, int64_t task_id) { - task(task_id); - { - MutexLock lock(&delay_task_id_mutex_); - delay_task_ids_.erase(task_id); - } + task(task_id); + { + MutexLock lock(&delay_task_id_mutex_); + delay_task_ids_.erase(task_id); + } } int64_t TableImpl::AddDelayTask(int64_t delay_time, ThreadPool::Task task) { - MutexLock lock(&delay_task_id_mutex_); - ThreadPool::Task t = - std::bind(&TableImpl::DelayTaskWrapper, this, task, _1); - int64_t t_id = thread_pool_->DelayTask(delay_time, t); - delay_task_ids_.insert(t_id); - return t_id; + MutexLock lock(&delay_task_id_mutex_); + ThreadPool::Task t = std::bind(&TableImpl::DelayTaskWrapper, this, task, _1); + int64_t t_id = thread_pool_->DelayTask(delay_time, t); + delay_task_ids_.insert(t_id); + return t_id; } void TableImpl::ClearDelayTask() { - MutexLock lock(&delay_task_id_mutex_); - std::set::iterator it = delay_task_ids_.begin(); - while (it != delay_task_ids_.end()) { - int64_t task_id = *it; - // may deadlock, MUST unlock - delay_task_id_mutex_.Unlock(); - bool cancelled = thread_pool_->CancelTask(*it); - delay_task_id_mutex_.Lock(); - if (cancelled) { - delay_task_ids_.erase(task_id); - } - it = delay_task_ids_.begin(); + MutexLock lock(&delay_task_id_mutex_); + std::set::iterator it = delay_task_ids_.begin(); + while (it != delay_task_ids_.end()) { + int64_t task_id = *it; + // may deadlock, MUST unlock + delay_task_id_mutex_.Unlock(); + bool cancelled = thread_pool_->CancelTask(*it); + delay_task_id_mutex_.Lock(); + if (cancelled) { + delay_task_ids_.erase(task_id); } + it = delay_task_ids_.begin(); + } } void TableImpl::BreakRequest(int64_t task_id) { - SdkTask* task = task_pool_.PopTask(task_id); - if (task == NULL) { - VLOG(10) << "task " << task_id << " timeout when brankrequest"; - return; - } - CHECK_EQ(task->GetRef(), 1); - switch (task->Type()) { + SdkTask* task = task_pool_.PopTask(task_id); + if (task == NULL) { + VLOG(10) << "task " << task_id << " timeout when brankrequest"; + return; + } + CHECK_EQ(task->GetRef(), 1); + switch (task->Type()) { case SdkTask::MUTATION: - ((RowMutationImpl*)task)->RunCallback(); - break; + ((RowMutationImpl*)task)->RunCallback(); + break; case SdkTask::READ: - ((RowReaderImpl*)task)->RunCallback(); - break; + ((RowReaderImpl*)task)->RunCallback(); + break; + case SdkTask::BATCH_MUTATION: + ((BatchMutationImpl*)task)->RunCallback(); default: - CHECK(false); - break; - } + CHECK(false); + break; + } } -void TableImpl::StatUserPerfCounter(enum SdkTask::TYPE op, ErrorCode::ErrorCodeType code, int64_t cost_time) { - switch (op) { +void TableImpl::StatUserPerfCounter(enum SdkTask::TYPE op, ErrorCode::ErrorCodeType code, + int64_t cost_time) { + switch (op) { case SdkTask::MUTATION: - if (code == ErrorCode::kOK) { - perf_counter_.user_mu_suc.Inc(); - } else { - perf_counter_.user_mu_fail.Inc(); - } - perf_counter_.hist_mu_cost.Add(cost_time); - break; + case SdkTask::BATCH_MUTATION: + if (code == ErrorCode::kOK) { + perf_counter_.user_mu_suc.Inc(); + } else { + perf_counter_.user_mu_fail.Inc(); + } + perf_counter_.hist_mu_cost.Add(cost_time); + break; case SdkTask::READ: - if (code == ErrorCode::kOK) { - perf_counter_.user_read_suc.Inc(); - } else if (code == ErrorCode::kNotFound) { - perf_counter_.user_read_notfound.Inc(); - } else { - perf_counter_.user_read_fail.Inc(); - } - perf_counter_.hist_read_cost.Add(cost_time); - break; + if (code == ErrorCode::kOK) { + perf_counter_.user_read_suc.Inc(); + } else if (code == ErrorCode::kNotFound) { + perf_counter_.user_read_notfound.Inc(); + } else { + perf_counter_.user_read_fail.Inc(); + } + perf_counter_.hist_read_cost.Add(cost_time); + break; default: - break; - } + break; + } } -bool TableImpl::GetTabletLocation(std::vector* tablets, - ErrorCode* err) { - return false; +bool TableImpl::GetTabletLocation(std::vector* tablets, ErrorCode* err) { + return false; } -bool TableImpl::GetDescriptor(TableDescriptor* desc, ErrorCode* err) { - return false; -} +bool TableImpl::GetDescriptor(TableDescriptor* desc, ErrorCode* err) { return false; } /// 创建事务 Transaction* TableImpl::StartRowTransaction(const std::string& row_key) { - std::shared_ptr sigle_row_txn_shared_ptr( - new SingleRowTxn(shared_from_this(), row_key, thread_pool_)); - return new tera::TransactionWrapper(sigle_row_txn_shared_ptr); + std::shared_ptr sigle_row_txn_shared_ptr( + new SingleRowTxn(shared_from_this(), row_key, thread_pool_)); + return new tera::TransactionWrapper(sigle_row_txn_shared_ptr); } /// 提交事务 -void TableImpl::CommitRowTransaction(Transaction* transaction) { - transaction->Commit(); -} +void TableImpl::CommitRowTransaction(Transaction* transaction) { transaction->Commit(); } std::string CounterCoding::EncodeCounter(int64_t counter) { - char counter_buf[sizeof(int64_t)]; - io::EncodeBigEndian(counter_buf, counter); - return std::string(counter_buf, sizeof(counter_buf)); + char counter_buf[sizeof(int64_t)]; + io::EncodeBigEndian(counter_buf, counter); + return std::string(counter_buf, sizeof(counter_buf)); } -bool CounterCoding::DecodeCounter(const std::string& buf, - int64_t* counter) { - assert(counter); - if (buf.size() != sizeof(int64_t)) { - *counter = 0; - return false; - } - *counter = io::DecodeBigEndainSign(buf.data()); - return true; +bool CounterCoding::DecodeCounter(const std::string& buf, int64_t* counter) { + assert(counter); + if (buf.size() != sizeof(int64_t)) { + *counter = 0; + return false; + } + *counter = io::DecodeBigEndainSign(buf.data()); + return true; } - -} // namespace tera +} // namespace tera diff --git a/src/sdk/table_impl.h b/src/sdk/table_impl.h index d895d3766..68d00d677 100644 --- a/src/sdk/table_impl.h +++ b/src/sdk/table_impl.h @@ -2,8 +2,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_TABLE_IMPL_H_ -#define TERA_SDK_TABLE_IMPL_H_ +#ifndef TERA_SDK_TABLE_IMPL_H_ +#define TERA_SDK_TABLE_IMPL_H_ +#include +#include #include "common/mutex.h" #include "common/timer.h" @@ -17,7 +19,6 @@ #include "sdk/sdk_zk.h" #include "tera.h" #include "common/counter.h" -#include namespace tera { @@ -31,8 +32,9 @@ class TabletNodeClient; class RowMutation; class RowMutationImpl; +class BatchMutation; +class BatchMutationImpl; class ResultStreamImpl; -class ResultStreamSyncImpl; class ScanTask; class ScanDescImpl; class WriteTabletRequest; @@ -41,658 +43,633 @@ class RowReaderImpl; class ReadTabletRequest; class ReadTabletResponse; -class SyncMutationBatch { -public: - std::vector row_list_; - mutable Mutex finish_mutex_; - common::CondVar finish_cond_; - uint32_t unfinished_count_; - - SyncMutationBatch(const std::vector& row_list) - : finish_cond_(&finish_mutex_) { - for (uint32_t i = 0; i < row_list.size(); i++) { - RowMutation* mutation = row_list[i]; - if (!mutation->IsAsync()) { - row_list_.push_back(mutation); - } - } - unfinished_count_ = row_list_.size(); - } +class TableImpl : public Table, public std::enable_shared_from_this { + friend class MutationCommitBuffer; + friend class RowMutationImpl; + friend class RowReaderImpl; + friend class BatchMutationImpl; - void AddMutation(RowMutation* mutation) { - MutexLock lock(&finish_mutex_); - row_list_.push_back(mutation); - unfinished_count_++; - } + public: + TableImpl(const std::string& table_name, ThreadPool* thread_pool, + std::shared_ptr client_impl); + + virtual ~TableImpl(); - void WaitUntilFinish() { - finish_mutex_.Lock(); - while (0 != unfinished_count_) { - finish_cond_.Wait(); - } - finish_mutex_.Unlock(); - } + virtual RowMutation* NewRowMutation(const std::string& row_key); + virtual BatchMutation* NewBatchMutation(); - void OnFinishOne() { - MutexLock lock(&finish_mutex_); - if (--unfinished_count_ == 0) { - finish_cond_.Signal(); - } - } -}; + virtual RowReader* NewRowReader(const std::string& row_key); -class TableImpl : public Table, public std::enable_shared_from_this { - friend class MutationCommitBuffer; - friend class RowMutationImpl; - friend class RowReaderImpl; -public: - TableImpl(const std::string& table_name, - ThreadPool* thread_pool, - std::shared_ptr client_impl); - - virtual ~TableImpl(); - - virtual RowMutation* NewRowMutation(const std::string& row_key); - - virtual RowReader* NewRowReader(const std::string& row_key); - - virtual void ApplyMutation(RowMutation* row_mu); - virtual void ApplyMutation(const std::vector& row_mutations); - - virtual void Put(RowMutation* row_mu); - virtual void Put(const std::vector& row_mutations); - - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err); - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int64_t timestamp, ErrorCode* err); - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const int64_t value, - ErrorCode* err); - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int32_t ttl, ErrorCode* err); - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int64_t timestamp, int32_t ttl, ErrorCode* err); - - virtual bool Add(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t delta, - ErrorCode* err); - virtual bool AddInt64(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - int64_t delta, - ErrorCode* err); - - virtual bool PutIfAbsent(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - const std::string& value, - ErrorCode* err); - - /// 原子操作:追加内容到一个Cell - virtual bool Append(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err); - - virtual void Get(RowReader* row_reader); - virtual void Get(const std::vector& row_readers); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - uint64_t snapshot_id, ErrorCode* err); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - uint64_t snapshot_id, ErrorCode* err); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err, uint64_t snapshot_id); - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err, uint64_t snapshot_id); - - virtual bool IsPutFinished() { return cur_commit_pending_counter_.Get() == 0; } - - virtual bool IsGetFinished() { return cur_reader_pending_counter_.Get() == 0; } - - virtual ResultStream* Scan(const ScanDescriptor& desc, ErrorCode* err); - - virtual const std::string GetName() { return name_; } - - virtual bool Flush(); - - virtual bool CheckAndApply(const std::string& rowkey, const std::string& cf_c, - const std::string& value, const RowMutation& row_mu, - ErrorCode* err); - - virtual int64_t IncrementColumnValue(const std::string& row, const std::string& family, - const std::string& qualifier, int64_t amount, - ErrorCode* err); - - /// 创建事务 - virtual Transaction* StartRowTransaction(const std::string& row_key); - /// 提交事务 - virtual void CommitRowTransaction(Transaction* transaction); - - virtual void SetWriteTimeout(int64_t timeout_ms); - virtual void SetReadTimeout(int64_t timeout_ms); - - virtual bool LockRow(const std::string& rowkey, RowLock* lock, ErrorCode* err); - - virtual bool GetStartEndKeys(std::string* start_key, std::string* end_key, - ErrorCode* err); - - virtual bool GetTabletLocation(std::vector* tablets, - ErrorCode* err); - - virtual bool GetDescriptor(TableDescriptor* desc, ErrorCode* err); - - virtual void SetMaxMutationPendingNum(uint64_t max_pending_num) { - max_commit_pending_num_ = max_pending_num; - } - virtual void SetMaxReaderPendingNum(uint64_t max_pending_num) { - max_reader_pending_num_ = max_pending_num; - } + virtual void ApplyMutation(RowMutation* row_mu); + virtual void ApplyMutation(const std::vector& row_mutations); + virtual void ApplyMutation(BatchMutation* batch_mutation); -public: - bool OpenInternal(ErrorCode* err); - - void ScanTabletSync(ResultStreamSyncImpl* stream); - void ScanTabletAsync(ResultStreamImpl* stream); - - void ScanMetaTable(const std::string& key_start, - const std::string& key_end); - - bool GetTabletMetaForKey(const std::string& key, TabletMeta* meta); - - uint64_t GetMaxMutationPendingNum() { return max_commit_pending_num_; } - uint64_t GetMaxReaderPendingNum() { return max_reader_pending_num_; } - TableSchema GetTableSchema() { return table_schema_; } - - void StatUserPerfCounter(enum SdkTask::TYPE op, ErrorCode::ErrorCodeType code, int64_t cost_time); - struct PerfCounter { - int64_t start_time; - Counter rpc_r; // 读取的耗时 - Counter rpc_r_cnt; // 读取的次数 - - Counter rpc_w; // 写入的耗时 - Counter rpc_w_cnt; // 写入的次数 - - Counter rpc_s; // scan的耗时 - Counter rpc_s_cnt; // scan的次数 - - Counter user_callback; // 运行用户callback的耗时 - Counter user_callback_cnt; // 运行用户callback的次数 - - Counter get_meta; // 更新meta的耗时 - Counter get_meta_cnt; // 更新meta的次数 - - Counter mutate_cnt; // 分发mutation的次数 - Counter mutate_ok_cnt; // mutation回调成功的次数 - Counter mutate_fail_cnt; // mutation回调失败的次数 - Counter mutate_range_cnt; // mutation回调失败-原因为not in range - Counter mutate_timeout_cnt; // mutation在sdk队列中超时 - Counter mutate_queue_timeout_cnt; // mutation在sdk队列中超时,且之前从未被重试过 - - Counter reader_cnt; // 分发reader的次数 - Counter reader_ok_cnt; // reader回调成功的次数 - Counter reader_fail_cnt; // reader回调失败的次数 - Counter reader_range_cnt; // reader回调失败-原因为not in range - Counter reader_timeout_cnt; // reader在sdk队列中超时 - Counter reader_queue_timeout_cnt; // raader在sdk队列中超时,且之前从未被重试过 - - Counter user_mu_cnt; - Counter user_mu_suc; - Counter user_mu_fail; - ::leveldb::Histogram hist_mu_cost; - - Counter user_read_cnt; - Counter user_read_suc; - Counter user_read_notfound; - Counter user_read_fail; - ::leveldb::Histogram hist_read_cost; - - ::leveldb::Histogram hist_async_cost; - Counter meta_sched_cnt; - Counter meta_update_cnt; - Counter total_task_cnt; - Counter total_commit_cnt; - - void DoDumpPerfCounterLog(const std::string& log_prefix); - - PerfCounter() { - start_time = get_micros(); - } - }; -private: - bool ScanTabletNode(const TabletMeta & tablet_meta, - const std::string& key_start, - const std::string& key_end, - std::vector* kv_list, - ErrorCode* err); - - void DistributeTasks(const std::vector& task_list, - bool called_by_user, - SdkTask::TYPE task_type); - - void DistributeMutationsById(std::vector* retry_mu_id_list); - - // 通过异步RPC将mutation提交至TS - void CommitMutations(const std::string& server_addr, - std::vector& mu_list); - - // mutate RPC回调 - static void MutateCallBackWrapper(std::weak_ptr weak_ptr_table, - std::vector* mu_id_list, - WriteTabletRequest* request, - WriteTabletResponse* response, - bool failed, int error_code); - void MutateCallBack(std::vector* mu_id_list, - WriteTabletRequest* request, - WriteTabletResponse* response, - bool failed, int error_code); - - // mutation到达用户设置的超时时间但尚未处理完 - void MutationTimeout(SdkTask* sdk_task); - - // 将一批reader根据rowkey分配给各个TS - void DistributeReaders(const std::vector& row_reader_list, - bool called_by_user); - - // 通过异步RPC将reader提交至TS - void CommitReaders(const std::string server_addr, - std::vector& reader_list); - - void DistributeReadersById(std::vector* reader_id_list); - - // reader RPC回调 - static void ReaderCallBackWrapper(std::weak_ptr weak_ptr_table, - std::vector* reader_id_list, - ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code); - void ReaderCallBack(std::vector* reader_id_list, - ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code); - - // reader到达用户设置的超时时间但尚未处理完 - void ReaderTimeout(SdkTask* sdk_task); - - void PackSdkTasks(const std::string& server_addr, - std::vector& task_list, - SdkTask::TYPE task_type); - void TaskBatchTimeout(SdkTask* task); - void CommitTasksById(const std::string& server_addr, - std::vector& task_id_list, - SdkTask::TYPE task_type); - - void ScanTabletAsync(ScanTask* scan_task, bool called_by_user); - - void CommitScan(ScanTask* scan_task, const std::string& server_addr); - - static void ScanCallBackWrapper(std::weak_ptr weak_ptr_table, - ScanTask* scan_task, - ScanTabletRequest* request, - ScanTabletResponse* response, - bool failed, int error_code); - void ScanCallBack(ScanTask* scan_task, ScanTabletRequest* request, - ScanTabletResponse* response, bool failed, int error_code); - - void BreakRequest(int64_t task_id); - void BreakScan(ScanTask* scan_task); - - enum TabletMetaStatus { - NORMAL, - DELAY_UPDATE, - WAIT_UPDATE, - UPDATING - }; - struct TabletMetaNode { - TabletMeta meta; - int64_t update_time; - TabletMetaStatus status; - - TabletMetaNode() : update_time(0), status(NORMAL) {} - }; - - bool GetTabletAddrOrScheduleUpdateMeta(const std::string& row, - SdkTask* request, - std::string* server_addr); - - TabletMetaNode* GetTabletMetaNodeForKey(const std::string& key); - - void DelayUpdateMeta(std::string start_key, std::string end_key); - - void UpdateMetaAsync(); - - void ScanMetaTableAsync(const std::string& key_start, const std::string& key_end, - const std::string& expand_key_end, bool zk_access); - - void ScanMetaTableAsyncInLock(std::string key_start, std::string key_end, - std::string expand_key_end, bool zk_access); - static void ScanMetaTableCallBackWrapper(std::weak_ptr weak_ptr_table, - std::string key_start, - std::string key_end, - std::string expand_key_end, - int64_t start_time, - ScanTabletRequest* request, - ScanTabletResponse* response, - bool failed, int error_code); - void ScanMetaTableCallBack(std::string key_start, - std::string key_end, - std::string expand_key_end, - int64_t start_time, - ScanTabletRequest* request, - ScanTabletResponse* response, - bool failed, int error_code); - - void UpdateTabletMetaList(const TabletMeta& meta); - - void GiveupUpdateTabletMeta(const std::string& key_start, const std::string& key_end); - - void WakeUpPendingRequest(const TabletMetaNode& node); - - void ScheduleUpdateMeta(const std::string& row, int64_t meta_timestamp); - - bool UpdateTableMeta(ErrorCode* err); - void ReadTableMetaAsync(ErrorCode* ret_err, int32_t retry_times, bool zk_access); - - static void ReadTableMetaCallBackWrapper(std::weak_ptr weak_ptr_table, - ErrorCode* ret_err, int32_t retry_times, - ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code); - void ReadTableMetaCallBack(ErrorCode* ret_err, int32_t retry_times, - ReadTabletRequest* request, - ReadTabletResponse* response, - bool failed, int error_code); - bool RestoreCookie(); - void EnableCookieUpdateTimer(); - void DumpCookie(); - void DoDumpCookie(); - std::string GetCookieFileName(const std::string& tablename, - const std::string& cluster_id, - int64_t create_time); - std::string GetCookieFilePathName(); - std::string GetCookieLockFilePathName(); - void DeleteLegacyCookieLockFile(const std::string& lock_file, int timeout_seconds); - void CloseAndRemoveCookieLockFile(int lock_fd, const std::string& cookie_lock_file); - - void DumpPerfCounterLogDelay(); - void DoDumpPerfCounterLog(); - - void DelayTaskWrapper(ThreadPool::Task task, int64_t task_id); - int64_t AddDelayTask(int64_t delay_time, ThreadPool::Task task); - void ClearDelayTask(); - -private: - TableImpl(const TableImpl&); - void operator=(const TableImpl&); - - struct TaskBatch : public SdkTask { - uint64_t byte_size; - std::string server_addr; - SdkTask::TYPE type; - Mutex* mutex; - std::map* task_batch_map; - std::vector* row_id_list; - - TaskBatch() : SdkTask(SdkTask::TASKBATCH) {} - virtual bool IsAsync() { return false; } - virtual uint32_t Size() { return 0; } - virtual int64_t TimeOut() { return 0; } - virtual void Wait() {} - virtual void SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) {} - virtual const std::string& RowKey() { return server_addr; } - }; - - std::string name_; - int64_t create_time_; - uint64_t last_sequence_id_; - uint32_t timeout_; - - std::shared_ptr client_impl_; - - mutable Mutex mutation_batch_mutex_; - mutable Mutex reader_batch_mutex_; - uint32_t commit_size_; - uint64_t write_commit_timeout_; - uint64_t read_commit_timeout_; - std::map mutation_batch_map_; - std::map reader_batch_map_; - Counter cur_commit_pending_counter_; - Counter cur_reader_pending_counter_; - int64_t max_commit_pending_num_; - int64_t max_reader_pending_num_; - - // meta management - mutable Mutex meta_mutex_; - common::CondVar meta_cond_; - std::map > pending_task_id_list_; - uint32_t meta_updating_count_; - std::map tablet_meta_list_; - // end of meta management - - // table meta managerment - mutable Mutex table_meta_mutex_; - common::CondVar table_meta_cond_; - bool table_meta_updating_; - TableSchema table_schema_; - // end of table meta managerment - - SdkTimeoutManager task_pool_; - Counter next_task_id_; - - master::MasterClient* master_client_; - tabletnode::TabletNodeClient* tabletnode_client_; - - ThreadPool* thread_pool_; - mutable Mutex delay_task_id_mutex_; - std::set delay_task_ids_; - /// cluster_ could cache the master_addr & root_table_addr. - /// if there is no cluster_, - /// we have to access zookeeper whenever we need master_addr or root_table_addr. - /// if there is cluster_, - /// we save master_addr & root_table_addr in cluster_, access zookeeper only once. - sdk::ClusterFinder* cluster_; - bool cluster_private_; - - PerfCounter perf_counter_; // calc time consumption, for performance analysis - - /// read request will contain this member, - /// so tabletnodes can drop the read-request that timeouted - uint64_t pending_timeout_ms_; -}; + virtual void Put(RowMutation* row_mu); + virtual void Put(const std::vector& row_mutations); -class TableWrapper: public Table { -public: - explicit TableWrapper(std::shared_ptr impl) - : impl_(impl) {} - virtual ~TableWrapper() {} - virtual RowMutation* NewRowMutation(const std::string& row_key) { - return impl_->NewRowMutation(row_key); - } - virtual RowReader* NewRowReader(const std::string& row_key) { - return impl_->NewRowReader(row_key); - } - virtual void Put(RowMutation* row_mu) { - impl_->Put(row_mu); - } - virtual void Put(const std::vector& row_mu_list) { - impl_->Put(row_mu_list); - } - virtual void ApplyMutation(RowMutation* row_mu) { - impl_->ApplyMutation(row_mu); - } - virtual void ApplyMutation(const std::vector& row_mu_list) { - impl_->ApplyMutation(row_mu_list); - } - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - return impl_->Put(row_key, family, qualifier, value, err); - } - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const int64_t value, - ErrorCode* err) { - return impl_->Put(row_key, family, qualifier, value, err); - } - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int32_t ttl, ErrorCode* err) { - return impl_->Put(row_key, family, qualifier, value, ttl, err); - } - virtual bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - int64_t timestamp, int32_t ttl, ErrorCode* err) { - return impl_->Put(row_key, family, qualifier, value, timestamp, ttl, err); - } - virtual bool Add(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t delta, - ErrorCode* err) { - return impl_->Add(row_key, family, qualifier, delta, err); - } - virtual bool AddInt64(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t delta, - ErrorCode* err) { - return impl_->AddInt64(row_key, family, qualifier, delta, err); - } + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err); + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int64_t timestamp, + ErrorCode* err); + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t value, ErrorCode* err); + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int32_t ttl, + ErrorCode* err); + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int64_t timestamp, + int32_t ttl, ErrorCode* err); - virtual bool PutIfAbsent(const std::string& row_key, - const std::string& family, - const std::string& qualifier, - const std::string& value, - ErrorCode* err) { - return impl_->PutIfAbsent(row_key, family, qualifier, value, err); - } + virtual bool Add(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t delta, ErrorCode* err); + virtual bool AddInt64(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t delta, ErrorCode* err); - virtual bool Append(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - return impl_->Append(row_key, family, qualifier, value, err); - } - virtual void Get(RowReader* row_reader) { - impl_->Get(row_reader); - } - virtual void Get(const std::vector& row_readers) { - impl_->Get(row_readers); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err) { - return impl_->Get(row_key, family, qualifier, value, err); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err) { - return impl_->Get(row_key, family, qualifier, value, err); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err, uint64_t snapshot_id) { - return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - uint64_t snapshot_id, ErrorCode* err) { - return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - ErrorCode* err, uint64_t snapshot_id) { - return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); - } - virtual bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, int64_t* value, - uint64_t snapshot_id, ErrorCode* err) { - return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); - } - - virtual bool IsPutFinished() { - return impl_->IsPutFinished(); - } - virtual bool IsGetFinished() { - return impl_->IsGetFinished(); - } + virtual bool PutIfAbsent(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err); - virtual ResultStream* Scan(const ScanDescriptor& desc, ErrorCode* err) { - return impl_->Scan(desc, err); - } + /// 原子操作:追加内容到一个Cell + virtual bool Append(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err); - virtual const std::string GetName() { - return impl_->GetName(); - } - - virtual bool Flush() { - return impl_->Flush(); - } - virtual bool CheckAndApply(const std::string& rowkey, const std::string& cf_c, - const std::string& value, const RowMutation& row_mu, - ErrorCode* err) { - return impl_->CheckAndApply(rowkey, cf_c, value, row_mu, err); - } - virtual int64_t IncrementColumnValue(const std::string& row, const std::string& family, - const std::string& qualifier, int64_t amount, - ErrorCode* err) { - return impl_->IncrementColumnValue(row, family, qualifier, amount, err); - } - virtual Transaction* StartRowTransaction(const std::string& row_key) { - return impl_->StartRowTransaction(row_key); - } - virtual void CommitRowTransaction(Transaction* transaction) { - impl_->CommitRowTransaction(transaction); - } - virtual void SetWriteTimeout(int64_t timeout_ms) { - impl_->SetWriteTimeout(timeout_ms); - } - virtual void SetReadTimeout(int64_t timeout_ms) { - impl_->SetReadTimeout(timeout_ms); - } + virtual void Get(RowReader* row_reader); + virtual void Get(const std::vector& row_readers); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, ErrorCode* err); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, ErrorCode* err); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, uint64_t snapshot_id, + ErrorCode* err); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, uint64_t snapshot_id, + ErrorCode* err); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, ErrorCode* err, + uint64_t snapshot_id); + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, ErrorCode* err, + uint64_t snapshot_id); + + virtual bool IsPutFinished() { return cur_commit_pending_counter_.Get() == 0; } + + virtual bool IsGetFinished() { return cur_reader_pending_counter_.Get() == 0; } + + virtual ResultStream* Scan(const ScanDescriptor& desc, ErrorCode* err); - virtual bool LockRow(const std::string& rowkey, RowLock* lock, ErrorCode* err) { - return impl_->LockRow(rowkey, lock, err); - } + virtual const std::string GetName() { return name_; } + + virtual bool Flush(); + + virtual bool CheckAndApply(const std::string& rowkey, const std::string& cf_c, + const std::string& value, const RowMutation& row_mu, ErrorCode* err); + + virtual int64_t IncrementColumnValue(const std::string& row, const std::string& family, + const std::string& qualifier, int64_t amount, + ErrorCode* err); + + /// 创建事务 + virtual Transaction* StartRowTransaction(const std::string& row_key); + /// 提交事务 + virtual void CommitRowTransaction(Transaction* transaction); + + virtual void SetWriteTimeout(int64_t timeout_ms); + virtual void SetReadTimeout(int64_t timeout_ms); + + virtual bool LockRow(const std::string& rowkey, RowLock* lock, ErrorCode* err); + + virtual bool GetStartEndKeys(std::string* start_key, std::string* end_key, ErrorCode* err); + + virtual bool GetTabletLocation(std::vector* tablets, ErrorCode* err); + + virtual bool GetTablet(const std::string& row_key, std::string* tablet); + + virtual bool GetDescriptor(TableDescriptor* desc, ErrorCode* err); + + virtual void SetMaxMutationPendingNum(uint64_t max_pending_num) { + max_commit_pending_num_ = max_pending_num; + } + virtual void SetMaxReaderPendingNum(uint64_t max_pending_num) { + max_reader_pending_num_ = max_pending_num; + } + + public: + bool OpenInternal(std::function hash_method, ErrorCode* err); + + virtual void ScanTabletAsync(ResultStreamImpl* stream); + + void ScanMetaTable(const std::string& key_start, const std::string& key_end); + + bool GetTabletMetaForKey(const std::string& key, TabletMeta* meta); + + uint64_t GetMaxMutationPendingNum() { return max_commit_pending_num_; } + uint64_t GetMaxReaderPendingNum() { return max_reader_pending_num_; } + TableSchema GetTableSchema() { return table_schema_; } + + void StatUserPerfCounter(enum SdkTask::TYPE op, ErrorCode::ErrorCodeType code, int64_t cost_time); + struct PerfCounter { + int64_t start_time; + Counter rpc_r; // 读取的耗时 + Counter rpc_r_cnt; // 读取的次数 + + Counter rpc_w; // 写入的耗时 + Counter rpc_w_cnt; // 写入的次数 + + Counter rpc_s; // scan的耗时 + Counter rpc_s_cnt; // scan的次数 + + Counter user_callback; // 运行用户callback的耗时 + Counter user_callback_cnt; // 运行用户callback的次数 + + Counter get_meta; // 更新meta的耗时 + Counter get_meta_cnt; // 更新meta的次数 + + Counter mutate_cnt; // 分发mutation的次数 + Counter mutate_ok_cnt; // mutation回调成功的次数 + Counter mutate_fail_cnt; // mutation回调失败的次数 + Counter mutate_range_cnt; // mutation回调失败-原因为not in range + Counter mutate_timeout_cnt; // mutation在sdk队列中超时 + Counter mutate_queue_timeout_cnt; // mutation在sdk队列中超时,且之前从未被重试过 + + Counter reader_cnt; // 分发reader的次数 + Counter reader_ok_cnt; // reader回调成功的次数 + Counter reader_fail_cnt; // reader回调失败的次数 + Counter reader_range_cnt; // reader回调失败-原因为not in range + Counter reader_timeout_cnt; // reader在sdk队列中超时 + Counter reader_queue_timeout_cnt; // raader在sdk队列中超时,且之前从未被重试过 + + Counter user_mu_cnt; + Counter user_mu_suc; + Counter user_mu_fail; + ::leveldb::Histogram hist_mu_cost; + + Counter user_read_cnt; + Counter user_read_suc; + Counter user_read_notfound; + Counter user_read_fail; + ::leveldb::Histogram hist_read_cost; - virtual bool GetStartEndKeys(std::string* start_key, std::string* end_key, - ErrorCode* err) { - return impl_->GetStartEndKeys(start_key, end_key, err); - } + ::leveldb::Histogram hist_async_cost; + Counter meta_sched_cnt; + Counter meta_update_cnt; + Counter total_task_cnt; + Counter total_commit_cnt; - virtual bool GetTabletLocation(std::vector* tablets, - ErrorCode* err) { - return impl_->GetTabletLocation(tablets, err); - } - virtual bool GetDescriptor(TableDescriptor* desc, ErrorCode* err) { - return impl_->GetDescriptor(desc, err); - } + void DoDumpPerfCounterLog(const std::string& log_prefix); - virtual void SetMaxMutationPendingNum(uint64_t max_pending_num) { - impl_->SetMaxMutationPendingNum(max_pending_num); - } - virtual void SetMaxReaderPendingNum(uint64_t max_pending_num) { - impl_->SetMaxReaderPendingNum(max_pending_num); - } + Counter GetTimeoutCnt(SdkTask* task) { + switch (task->Type()) { + case SdkTask::READ: + return reader_timeout_cnt; + case SdkTask::MUTATION: + case SdkTask::BATCH_MUTATION: + return mutate_timeout_cnt; + default: + abort(); + } + } + + Counter GetRangeCnt(SdkTask* task) { + switch (task->Type()) { + case SdkTask::READ: + return reader_range_cnt; + case SdkTask::MUTATION: + case SdkTask::BATCH_MUTATION: + return mutate_range_cnt; + default: + abort(); + } + } + + Counter GetTaskFailCnt(SdkTask::TYPE type) { + switch (type) { + case SdkTask::READ: + return reader_range_cnt; + case SdkTask::MUTATION: + case SdkTask::BATCH_MUTATION: + return mutate_range_cnt; + default: + abort(); + } + } + + Counter GetQueueTimeoutCnt(SdkTask* task) { + switch (task->Type()) { + case SdkTask::READ: + return reader_queue_timeout_cnt; + case SdkTask::MUTATION: + case SdkTask::BATCH_MUTATION: + return mutate_queue_timeout_cnt; + default: + abort(); + } + } + + Counter GetTaskCnt(SdkTask* task) { + switch (task->Type()) { + case SdkTask::READ: + return reader_cnt; + case SdkTask::MUTATION: + case SdkTask::BATCH_MUTATION: + return mutate_cnt; + default: + abort(); + } + } + + PerfCounter() { start_time = get_micros(); } + }; + + bool IsHashTable() override { return is_hash_table_.load(); } + std::function GetHashMethod() override { return hash_method_; } + + private: + bool ScanTabletNode(const TabletMeta& tablet_meta, const std::string& key_start, + const std::string& key_end, std::vector* kv_list, + ErrorCode* err); + + void DistributeTasks(const std::vector& task_list, bool called_by_user, + SdkTask::TYPE task_type); + + // 通过异步RPC将mutation提交至TS + void CommitMutations(const std::string& server_addr, std::vector& mu_list); + + void CommitBatchMutations(const std::string& server_addr, + std::vector& mu_list); + + // mutate RPC回调 + static void MutateCallBackWrapper(std::weak_ptr weak_ptr_table, + std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code); + void MutateCallBack(std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code); + + static void BatchMutateCallBackWrapper(std::weak_ptr weak_ptr_table, + std::vector* mu_id_list, + WriteTabletRequest* request, WriteTabletResponse* response, + bool failed, int error_code); + + void BatchMutateCallBack(std::vector* mu_id_list, WriteTabletRequest* request, + WriteTabletResponse* response, bool failed, int error_code); + + void TaskTimeout(SdkTask* sdk_task); + + // 将一批reader根据rowkey分配给各个TS + void DistributeReaders(const std::vector& row_reader_list, bool called_by_user); + + // 通过异步RPC将reader提交至TS + void CommitReaders(const std::string& server_addr, std::vector& reader_list); + + void DistributeTasksById(std::vector* task_id_list, SdkTask::TYPE task_type); + + void DistributeDelayTasks(const std::map*>& retry_times_list, + SdkTask::TYPE task_type); + + void CollectFailedTasks(int64_t task_id, SdkTask::TYPE type, StatusCode err, + std::vector* not_in_range_list, + std::map*>* retry_times_list); + + // reader RPC回调 + static void ReaderCallBackWrapper(std::weak_ptr weak_ptr_table, + std::vector* reader_id_list, + ReadTabletRequest* request, ReadTabletResponse* response, + bool failed, int error_code); - std::shared_ptr GetTableImpl() { - return impl_; - } + void ReaderCallBack(std::vector* reader_id_list, ReadTabletRequest* request, + ReadTabletResponse* response, bool failed, int error_code); + + void PackSdkTasks(const std::string& server_addr, std::vector& task_list, + SdkTask::TYPE task_type); + void TaskBatchTimeout(SdkTask* task); + void CommitTasksById(const std::string& server_addr, std::vector& task_id_list, + SdkTask::TYPE task_type); + + void ScanTabletAsync(ScanTask* scan_task, bool called_by_user); + + void CommitScan(ScanTask* scan_task, const std::string& server_addr); + + static void ScanCallBackWrapper(std::weak_ptr weak_ptr_table, ScanTask* scan_task, + ScanTabletRequest* request, ScanTabletResponse* response, + bool failed, int error_code); + void ScanCallBack(ScanTask* scan_task, ScanTabletRequest* request, ScanTabletResponse* response, + bool failed, int error_code); + + void BreakRequest(int64_t task_id); + void BreakScan(ScanTask* scan_task); + + enum TabletMetaStatus { NORMAL, DELAY_UPDATE, WAIT_UPDATE, UPDATING }; + struct TabletMetaNode { + TabletMeta meta; + int64_t update_time; + TabletMetaStatus status; + + TabletMetaNode() : update_time(0), status(NORMAL) {} + }; + + bool GetTabletAddrOrScheduleUpdateMeta(const std::string& row, SdkTask* request, + std::string* server_addr); + + TabletMetaNode* GetTabletMetaNodeForKey(const std::string& key); + + void DelayUpdateMeta(const std::string& start_key, const std::string& end_key); + + void UpdateMetaAsync(); + + void ScanMetaTableAsync(const std::string& key_start, const std::string& key_end, + const std::string& expand_key_end, bool zk_access); + + void ScanMetaTableAsyncInLock(const std::string& key_start, const std::string& key_end, + const std::string& expand_key_end, bool zk_access); + static void ScanMetaTableCallBackWrapper(std::weak_ptr weak_ptr_table, + std::string key_start, std::string key_end, + std::string expand_key_end, int64_t start_time, + ScanTabletRequest* request, ScanTabletResponse* response, + bool failed, int error_code); + void ScanMetaTableCallBack(std::string key_start, std::string key_end, std::string expand_key_end, + int64_t start_time, ScanTabletRequest* request, + ScanTabletResponse* response, bool failed, int error_code); + + void UpdateTabletMetaList(const TabletMeta& meta); + + void GiveupUpdateTabletMeta(const std::string& key_start, const std::string& key_end); + + void WakeUpPendingRequest(const TabletMetaNode& node); + + void ScheduleUpdateMeta(const std::string& row, int64_t meta_timestamp); + + bool UpdateTableMeta(ErrorCode* err); + void ReadTableMetaAsync(ErrorCode* ret_err, int32_t retry_times, bool zk_access); + + static void ReadTableMetaCallBackWrapper(std::weak_ptr weak_ptr_table, + ErrorCode* ret_err, int32_t retry_times, + ReadTabletRequest* request, ReadTabletResponse* response, + bool failed, int error_code); + void ReadTableMetaCallBack(ErrorCode* ret_err, int32_t retry_times, ReadTabletRequest* request, + ReadTabletResponse* response, bool failed, int error_code); + bool RestoreCookie(); + void EnableCookieUpdateTimer(); + void DumpCookie(); + void DoDumpCookie(); + std::string GetCookieFileName(const std::string& tablename, const std::string& cluster_id, + int64_t create_time); + std::string GetCookieFilePathName(); + std::string GetCookieLockFilePathName(); + void DeleteLegacyCookieLockFile(const std::string& lock_file, int timeout_seconds); + void CloseAndRemoveCookieLockFile(int lock_fd, const std::string& cookie_lock_file); + + void DumpPerfCounterLogDelay(); + void DoDumpPerfCounterLog(); + + void DelayTaskWrapper(ThreadPool::Task task, int64_t task_id); + int64_t AddDelayTask(int64_t delay_time, ThreadPool::Task task); + void ClearDelayTask(); + + private: + TableImpl(const TableImpl&); + void operator=(const TableImpl&); + + struct TaskBatch : public SdkTask { + uint64_t byte_size = 0; + std::string server_addr; + SdkTask::TYPE type; + Mutex* mutex = nullptr; + std::map* task_batch_map = nullptr; + std::vector* row_id_list = nullptr; + + TaskBatch() : SdkTask(SdkTask::TASKBATCH) {} + virtual bool IsAsync() { return false; } + virtual uint32_t Size() { return 0; } + virtual void SetTimeOut(int64_t timeout) {} + virtual int64_t TimeOut() { return 0; } + virtual void Wait() {} + virtual void SetError(ErrorCode::ErrorCodeType err, const std::string& reason) {} + virtual std::string InternalRowKey() { return server_addr; } + // task batch not implement this interface + virtual int64_t GetCommitTimes() { return 0; } + // task batch not implement this interface + virtual void RunCallback() { abort(); } + }; + + std::string name_; + int64_t create_time_; + uint64_t last_sequence_id_; + uint32_t write_timeout_; + uint32_t read_timeout_; + + std::shared_ptr client_impl_; + std::shared_ptr access_builder_; + + mutable Mutex mutation_batch_mutex_; + mutable Mutex reader_batch_mutex_; + uint32_t commit_size_; + uint64_t write_commit_timeout_; + uint64_t read_commit_timeout_; + std::map mutation_batch_map_; + std::map reader_batch_map_; + Counter cur_commit_pending_counter_; + Counter cur_reader_pending_counter_; + int64_t max_commit_pending_num_; + int64_t max_reader_pending_num_; + + // meta management + mutable Mutex meta_mutex_; + common::CondVar meta_cond_; + std::map> pending_task_id_list_; + uint32_t meta_updating_count_; + std::map tablet_meta_list_; + // end of meta management + + // table meta managerment + mutable Mutex table_meta_mutex_; + common::CondVar table_meta_cond_; + bool table_meta_updating_; + TableSchema table_schema_; + // end of table meta managerment + + SdkTimeoutManager task_pool_; + Counter next_task_id_; + + master::MasterClient* master_client_; + tabletnode::TabletNodeClient* tabletnode_client_; + + ThreadPool* thread_pool_; + mutable Mutex delay_task_id_mutex_; + std::set delay_task_ids_; + /// cluster_ could cache the master_addr & root_table_addr. + /// if there is no cluster_, + /// we have to access zookeeper whenever we need master_addr or + /// root_table_addr. + /// if there is cluster_, + /// we save master_addr & root_table_addr in cluster_, access zookeeper + /// only once. + sdk::ClusterFinder* cluster_; + bool cluster_private_; + + PerfCounter perf_counter_; // calc time consumption, for performance analysis + + std::atomic is_hash_table_{false}; + std::function hash_method_; + + // server_addr, rpc_timeout_duration + // Records the last time(ms) of the server response with non-rpctimeout. + std::unordered_map rpc_timeout_duration_; + mutable Mutex rpc_timeout_duration_mutex_; +}; -private: - std::shared_ptr impl_; +class TableWrapper : public Table { + public: + explicit TableWrapper(const std::shared_ptr& impl) : impl_(impl) {} + virtual ~TableWrapper() {} + virtual RowMutation* NewRowMutation(const std::string& row_key) { + return impl_->NewRowMutation(row_key); + } + virtual BatchMutation* NewBatchMutation() { return impl_->NewBatchMutation(); } + virtual RowReader* NewRowReader(const std::string& row_key) { + return impl_->NewRowReader(row_key); + } + virtual void Put(RowMutation* row_mu) { impl_->Put(row_mu); } + virtual void Put(const std::vector& row_mu_list) { impl_->Put(row_mu_list); } + virtual void ApplyMutation(RowMutation* row_mu) { impl_->ApplyMutation(row_mu); } + virtual void ApplyMutation(const std::vector& row_mu_list) { + impl_->ApplyMutation(row_mu_list); + } + virtual void ApplyMutation(BatchMutation* batch_mutation) { + impl_->ApplyMutation(batch_mutation); + } + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err) { + return impl_->Put(row_key, family, qualifier, value, err); + } + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const int64_t value, ErrorCode* err) { + return impl_->Put(row_key, family, qualifier, value, err); + } + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int32_t ttl, + ErrorCode* err) { + return impl_->Put(row_key, family, qualifier, value, ttl, err); + } + virtual bool Put(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, int64_t timestamp, + int32_t ttl, ErrorCode* err) { + return impl_->Put(row_key, family, qualifier, value, timestamp, ttl, err); + } + virtual bool Add(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t delta, ErrorCode* err) { + return impl_->Add(row_key, family, qualifier, delta, err); + } + virtual bool AddInt64(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t delta, ErrorCode* err) { + return impl_->AddInt64(row_key, family, qualifier, delta, err); + } + + virtual bool PutIfAbsent(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err) { + return impl_->PutIfAbsent(row_key, family, qualifier, value, err); + } + + virtual bool Append(const std::string& row_key, const std::string& family, + const std::string& qualifier, const std::string& value, ErrorCode* err) { + return impl_->Append(row_key, family, qualifier, value, err); + } + virtual void Get(RowReader* row_reader) { impl_->Get(row_reader); } + virtual void Get(const std::vector& row_readers) { impl_->Get(row_readers); } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, ErrorCode* err) { + return impl_->Get(row_key, family, qualifier, value, err); + } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, ErrorCode* err) { + return impl_->Get(row_key, family, qualifier, value, err); + } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, ErrorCode* err, + uint64_t snapshot_id) { + return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); + } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, std::string* value, uint64_t snapshot_id, + ErrorCode* err) { + return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); + } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, ErrorCode* err, + uint64_t snapshot_id) { + return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); + } + virtual bool Get(const std::string& row_key, const std::string& family, + const std::string& qualifier, int64_t* value, uint64_t snapshot_id, + ErrorCode* err) { + return impl_->Get(row_key, family, qualifier, value, snapshot_id, err); + } + + virtual bool IsPutFinished() { return impl_->IsPutFinished(); } + virtual bool IsGetFinished() { return impl_->IsGetFinished(); } + + virtual ResultStream* Scan(const ScanDescriptor& desc, ErrorCode* err) { + return impl_->Scan(desc, err); + } + + virtual const std::string GetName() { return impl_->GetName(); } + + virtual bool Flush() { return impl_->Flush(); } + virtual bool CheckAndApply(const std::string& rowkey, const std::string& cf_c, + const std::string& value, const RowMutation& row_mu, ErrorCode* err) { + return impl_->CheckAndApply(rowkey, cf_c, value, row_mu, err); + } + virtual int64_t IncrementColumnValue(const std::string& row, const std::string& family, + const std::string& qualifier, int64_t amount, + ErrorCode* err) { + return impl_->IncrementColumnValue(row, family, qualifier, amount, err); + } + virtual Transaction* StartRowTransaction(const std::string& row_key) { + return impl_->StartRowTransaction(row_key); + } + virtual void CommitRowTransaction(Transaction* transaction) { + impl_->CommitRowTransaction(transaction); + } + virtual void SetWriteTimeout(int64_t timeout_ms) { impl_->SetWriteTimeout(timeout_ms); } + virtual void SetReadTimeout(int64_t timeout_ms) { impl_->SetReadTimeout(timeout_ms); } + + virtual bool LockRow(const std::string& rowkey, RowLock* lock, ErrorCode* err) { + return impl_->LockRow(rowkey, lock, err); + } + + virtual bool GetStartEndKeys(std::string* start_key, std::string* end_key, ErrorCode* err) { + return impl_->GetStartEndKeys(start_key, end_key, err); + } + + virtual bool GetTabletLocation(std::vector* tablets, ErrorCode* err) { + return impl_->GetTabletLocation(tablets, err); + } + virtual bool GetDescriptor(TableDescriptor* desc, ErrorCode* err) { + return impl_->GetDescriptor(desc, err); + } + + virtual void SetMaxMutationPendingNum(uint64_t max_pending_num) { + impl_->SetMaxMutationPendingNum(max_pending_num); + } + virtual void SetMaxReaderPendingNum(uint64_t max_pending_num) { + impl_->SetMaxReaderPendingNum(max_pending_num); + } + + virtual bool IsHashTable() { return impl_->IsHashTable(); } + + virtual std::function GetHashMethod() { + return impl_->GetHashMethod(); + } + + virtual bool GetTablet(const std::string& row_key, std::string* tablet) override { + return impl_->GetTablet(row_key, tablet); + } + + std::shared_ptr GetTableImpl() { return impl_; } + + private: + std::shared_ptr impl_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_TABLE_IMPL_H_ diff --git a/src/sdk/tera.cc b/src/sdk/tera.cc index d01bce0fe..4746037c3 100644 --- a/src/sdk/tera.cc +++ b/src/sdk/tera.cc @@ -9,111 +9,109 @@ namespace tera { static const char* strerr(ErrorCode::ErrorCodeType type) { - const char* ret = "Unknown error"; - switch (type) { + const char* ret = "Unknown error"; + switch (type) { case ErrorCode::kOK: - ret = "OK"; - break; + ret = "OK"; + break; case ErrorCode::kNotFound: - ret = "Not Found"; - break; + ret = "Not Found"; + break; case ErrorCode::kBadParam: - ret = "Bad Parameter"; - break; + ret = "Bad Parameter"; + break; case ErrorCode::kSystem: - ret = "SystemError"; - break; + ret = "SystemError"; + break; case ErrorCode::kTimeout: - ret = "Timeout"; - break; + ret = "Timeout"; + break; case ErrorCode::kBusy: - ret = "SystemBusy"; - break; + ret = "SystemBusy"; + break; case ErrorCode::kNoQuota: - ret = "UserNoQuota"; - break; + ret = "UserNoQuota"; + break; case ErrorCode::kNoAuth: - ret = "UserUnauthorized"; - break; + ret = "UserUnauthorized"; + break; case ErrorCode::kNotImpl: - ret = "Not Implement"; - break; + ret = "Not Implement"; + break; case ErrorCode::kTxnFail: - ret = "TransactionFail"; - break; + ret = "TransactionFail"; + break; + case ErrorCode::kAuthBadParam: + ret = "Auth Bad Parameter"; + break; + case ErrorCode::kAuthLoginFailed: + ret = "Auth login failed"; + break; case ErrorCode::kGTxnDataTooLarge: - ret = "GlobalTransactionDataTooLarge"; - break; + ret = "GlobalTransactionDataTooLarge"; + break; case ErrorCode::kGTxnNotSupport: - ret = "GlobalTransactionNotSupport"; - break; + ret = "GlobalTransactionNotSupport"; + break; case ErrorCode::kGTxnSchemaError: - ret = "GlobalTransactionSchemaError"; - break; + ret = "GlobalTransactionSchemaError"; + break; case ErrorCode::kGTxnOpAfterCommit: - ret = "GlobalTransactionOpAfterCommit"; - break; + ret = "GlobalTransactionOpAfterCommit"; + break; case ErrorCode::kGTxnPrimaryLost: - ret = "GlobalTransactionPrimaryLost"; - break; + ret = "GlobalTransactionPrimaryLost"; + break; case ErrorCode::kGTxnWriteConflict: - ret = "GlobalTransactionWriteConflict"; - break; + ret = "GlobalTransactionWriteConflict"; + break; case ErrorCode::kGTxnLockConflict: - ret = "GlobalTransactionLockConflict"; - break; + ret = "GlobalTransactionLockConflict"; + break; case ErrorCode::kGTxnOKButAckFailed: - ret = "GlobalTransactionOkButAckFailed"; - break; + ret = "GlobalTransactionOkButAckFailed"; + break; case ErrorCode::kGTxnOKButNotifyFailed: - ret = "GlobalTransactionOKButNotifyFailed"; - break; + ret = "GlobalTransactionOKButNotifyFailed"; + break; case ErrorCode::kGTxnPrewriteTimeout: - ret = "GlobalTransactionPrewriteTimeout"; - break; + ret = "GlobalTransactionPrewriteTimeout"; + break; case ErrorCode::kGTxnPrimaryCommitTimeout: - ret = "GlobalTransactionPrimaryCommitTimeout"; - break; + ret = "GlobalTransactionPrimaryCommitTimeout"; + break; case ErrorCode::kGTxnTimestampLost: - ret = "GlobalTransactionTimestampLost"; - break; + ret = "GlobalTransactionTimestampLost"; + break; default: - ret = "UnkownError"; - } - return ret; + ret = "UnkownError"; + } + return ret; } -ErrorCode::ErrorCode() : err_(kOK) { -} +ErrorCode::ErrorCode() : err_(kOK) {} void ErrorCode::SetFailed(ErrorCodeType err, const std::string& reason) { - err_ = err; - reason_ = reason; + err_ = err; + reason_ = reason; } std::string ErrorCode::ToString() const { - std::string ret; - ret.append("type ["); - ret.append(strerr(err_)); - ret.append("], reason ["); - ret.append(reason_); - ret.append("]."); - return ret; + std::string ret; + ret.append("type ["); + ret.append(strerr(err_)); + ret.append("], reason ["); + ret.append(reason_); + ret.append("]."); + return ret; } -std::string ErrorCode::GetReason() const { - return reason_; -} +std::string ErrorCode::GetReason() const { return reason_; } -ErrorCode::ErrorCodeType ErrorCode::GetType() const { - return err_; -} +ErrorCode::ErrorCodeType ErrorCode::GetType() const { return err_; } -const char* strerr(ErrorCode error_code) { - return strerr(error_code.GetType()); -} +const char* strerr(ErrorCode error_code) { return strerr(error_code.GetType()); } const int64_t kLatestTimestamp = std::numeric_limits::max(); const int64_t kOldestTimestamp = std::numeric_limits::min(); - } diff --git a/src/sdk/tera_easy.cc b/src/sdk/tera_easy.cc index 238f87031..7f5eece7c 100644 --- a/src/sdk/tera_easy.cc +++ b/src/sdk/tera_easy.cc @@ -24,205 +24,198 @@ DECLARE_string(flagfile); namespace teraeasy { class TableImpl : public Table { -public: - TableImpl(tera::Table* table, tera::Client* client) - : table_(table), - client_(client), - scanner_(NULL) { - ThreadPool::Task task = std::bind(&TableImpl::PrintStatus, this); - thread_pool_.DelayTask(1000, task); - } - - ~TableImpl() { - Flush(); - delete scanner_; - delete table_; - delete client_; - } - - bool Read(const Key& key, Record* record) { - std::string value; - tera::ErrorCode err; - if (!table_->Get(key, "", "", &value, &err)) { - LOG(ERROR) << "fail to read: " << key - << ", reason: " << err.GetReason(); - return false; - } - return DeSerializeRecord(value, record); - } - - bool Write(const Key& key, const Record& record) { - CHECK(s_pending_num_.Get() >= 0) << "pending num < 0: " << s_pending_num_.Get(); - CHECK(s_pending_size_.Get() >= 0) << "pending size < 0: " << s_pending_size_.Get(); - while (s_pending_num_.Get() > FLAGS_tera_sdk_rpc_max_pending_num || - s_pending_size_.Get() > FLAGS_tera_sdk_rpc_max_pending_buffer_size * 1024 * 1024) { - usleep(1000000); - } - - std::string value; - SerializeRecord(record, &value); - - { - tera::RowMutation* mutation = table_->NewRowMutation(key); - mutation->Put(value, FLAGS_tera_easy_ttl); - mutation->SetCallBack(TableImpl::WriteCallback); - table_->ApplyMutation(mutation); - s_pending_num_.Inc(); - s_pending_size_.Add(mutation->Size()); - } - return true; - } - - void Flush() { - while (s_pending_num_.Get() > 0) { - usleep(10000); - } - } - - // sync delete - bool Delete(const Key& key) { - tera::RowMutation* mutation = table_->NewRowMutation(key); - mutation->DeleteRow(); - table_->ApplyMutation(mutation); - return true; - } - - bool SetScanner(const Key& start, const Key& end) { - if (scanner_ != NULL) { - delete scanner_; - } - tera::ErrorCode err; - tera::ScanDescriptor desc(start); - desc.SetEnd(end); - desc.SetAsync(false); - - if ((scanner_ = table_->Scan(desc, &err)) == NULL) { - LOG(ERROR) << "fail to scan the table, reason:" << err.GetReason(); - return false; - } - return true; - } - - bool NextPair(KVPair* pair) { - if (scanner_ == NULL) { - LOG(ERROR) << "scanner is empty!"; - return false; - } - if (!scanner_->Done()) { - Record record; - DeSerializeRecord(scanner_->Value(), &record); - *pair = std::make_pair(scanner_->RowName(), record); - scanner_->Next(); - return true; - } - delete scanner_; - scanner_ = NULL; - return false; - } - - static void WriteCallback(tera::RowMutation* mutation) { - const tera::ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != tera::ErrorCode::kOK) { - s_write_fail_num_.Inc(); - VLOG(5)<< "write key failed: key(" << mutation->RowKey() - << "), reason:" << error_code.GetReason(); - } else { - s_write_succ_num_.Inc(); - } - - s_pending_num_.Dec(); - s_pending_size_.Sub(mutation->Size()); - delete mutation; - } - -private: - union Fix32Converter { - int32_t v; - char buf[sizeof(v)]; - }; - - void AppendFix32(int32_t v, std::string* str) { - Fix32Converter u; - u.v = v; - str->append(u.buf, sizeof(v)); + public: + TableImpl(tera::Table* table, tera::Client* client) + : table_(table), client_(client), scanner_(NULL) { + ThreadPool::Task task = std::bind(&TableImpl::PrintStatus, this); + thread_pool_.DelayTask(1000, task); + } + + ~TableImpl() { + Flush(); + delete scanner_; + delete table_; + delete client_; + } + + bool Read(const Key& key, Record* record) { + std::string value; + tera::ErrorCode err; + if (!table_->Get(key, "", "", &value, &err)) { + LOG(ERROR) << "fail to read: " << key << ", reason: " << err.GetReason(); + return false; } + return DeSerializeRecord(value, record); + } - int32_t GetFix32(const char* buf) { - return *(reinterpret_cast(buf)); + bool Write(const Key& key, const Record& record) { + CHECK(s_pending_num_.Get() >= 0) << "pending num < 0: " << s_pending_num_.Get(); + CHECK(s_pending_size_.Get() >= 0) << "pending size < 0: " << s_pending_size_.Get(); + while (s_pending_num_.Get() > FLAGS_tera_sdk_rpc_max_pending_num || + s_pending_size_.Get() > FLAGS_tera_sdk_rpc_max_pending_buffer_size * 1024 * 1024) { + usleep(1000000); } - bool SerializeColumn(const Column& column, std::string* buf) { - Column::const_iterator it = column.begin(); - AppendFix32(column.size(), buf); - for (; it != column.end(); ++it) { - AppendFix32(it->first, buf); - AppendFix32(it->second.size(), buf); - buf->append(it->second); - } - return true; - } + std::string value; + SerializeRecord(record, &value); - int32_t DeSerializeColumn(const char* buf, Column* column) { - int num = GetFix32(buf); - int offset = sizeof(num); - for (int i = 0; i < num; i++) { - int ts = GetFix32(buf + offset); - offset += 4; - int len = GetFix32(buf + offset); - offset += 4; - (*column)[ts].assign(buf + offset, len); - offset += len; - } - return offset; + { + tera::RowMutation* mutation = table_->NewRowMutation(key); + mutation->Put(value, FLAGS_tera_easy_ttl); + mutation->SetCallBack(TableImpl::WriteCallback); + table_->ApplyMutation(mutation); + s_pending_num_.Inc(); + s_pending_size_.Add(mutation->Size()); } + return true; + } - bool SerializeRecord(const Record& record, std::string* buf) { - Record::const_iterator it = record.begin(); - AppendFix32(record.size(), buf); - for (; it != record.end(); ++it) { - AppendFix32(it->first.size(), buf); - buf->append(it->first); - SerializeColumn(it->second, buf); - } - return true; + void Flush() { + while (s_pending_num_.Get() > 0) { + usleep(10000); } + } - int32_t DeSerializeRecord(const std::string& buf, Record* record) { - int num = GetFix32(buf.data()); - int offset = sizeof(num); - for (int i = 0; i < num; i++) { - Key key; - Column column; - int len = GetFix32(buf.data()+offset); - offset += 4; - key.assign(buf.data()+offset, len); - offset += len; - len = DeSerializeColumn(buf.data()+offset, &column); - offset += len; - (*record)[key] = column; - } - return offset; - } + // sync delete + bool Delete(const Key& key) { + tera::RowMutation* mutation = table_->NewRowMutation(key); + mutation->DeleteRow(); + table_->ApplyMutation(mutation); + return true; + } - void PrintStatus() { - LOG(INFO) << "[TeraEasy] pending num " << s_pending_num_.Get() - << ", pending size " << s_pending_size_.Get() - << ", success " << s_write_succ_num_.Clear() - << ", fail " << s_write_fail_num_.Clear(); - ThreadPool::Task task = std::bind(&TableImpl::PrintStatus, this); - thread_pool_.DelayTask(1000, task); + bool SetScanner(const Key& start, const Key& end) { + if (scanner_ != NULL) { + delete scanner_; } - -private: - static tera::Counter s_pending_num_; - static tera::Counter s_pending_size_; - static tera::Counter s_write_fail_num_; - static tera::Counter s_write_succ_num_; - - tera::Table* table_; - tera::Client* client_; - tera::ResultStream* scanner_; - ThreadPool thread_pool_; + tera::ErrorCode err; + tera::ScanDescriptor desc(start); + desc.SetEnd(end); + + if ((scanner_ = table_->Scan(desc, &err)) == NULL) { + LOG(ERROR) << "fail to scan the table, reason:" << err.GetReason(); + return false; + } + return true; + } + + bool NextPair(KVPair* pair) { + if (scanner_ == NULL) { + LOG(ERROR) << "scanner is empty!"; + return false; + } + if (!scanner_->Done()) { + Record record; + DeSerializeRecord(scanner_->Value(), &record); + *pair = std::make_pair(scanner_->RowName(), record); + scanner_->Next(); + return true; + } + delete scanner_; + scanner_ = NULL; + return false; + } + + static void WriteCallback(tera::RowMutation* mutation) { + const tera::ErrorCode& error_code = mutation->GetError(); + if (error_code.GetType() != tera::ErrorCode::kOK) { + s_write_fail_num_.Inc(); + VLOG(5) << "write key failed: key(" << mutation->RowKey() + << "), reason:" << error_code.GetReason(); + } else { + s_write_succ_num_.Inc(); + } + + s_pending_num_.Dec(); + s_pending_size_.Sub(mutation->Size()); + delete mutation; + } + + private: + union Fix32Converter { + int32_t v; + char buf[sizeof(v)]; + }; + + void AppendFix32(int32_t v, std::string* str) { + Fix32Converter u; + u.v = v; + str->append(u.buf, sizeof(v)); + } + + int32_t GetFix32(const char* buf) { return *(reinterpret_cast(buf)); } + + bool SerializeColumn(const Column& column, std::string* buf) { + Column::const_iterator it = column.begin(); + AppendFix32(column.size(), buf); + for (; it != column.end(); ++it) { + AppendFix32(it->first, buf); + AppendFix32(it->second.size(), buf); + buf->append(it->second); + } + return true; + } + + int32_t DeSerializeColumn(const char* buf, Column* column) { + int num = GetFix32(buf); + int offset = sizeof(num); + for (int i = 0; i < num; i++) { + int ts = GetFix32(buf + offset); + offset += 4; + int len = GetFix32(buf + offset); + offset += 4; + (*column)[ts].assign(buf + offset, len); + offset += len; + } + return offset; + } + + bool SerializeRecord(const Record& record, std::string* buf) { + Record::const_iterator it = record.begin(); + AppendFix32(record.size(), buf); + for (; it != record.end(); ++it) { + AppendFix32(it->first.size(), buf); + buf->append(it->first); + SerializeColumn(it->second, buf); + } + return true; + } + + int32_t DeSerializeRecord(const std::string& buf, Record* record) { + int num = GetFix32(buf.data()); + int offset = sizeof(num); + for (int i = 0; i < num; i++) { + Key key; + Column column; + int len = GetFix32(buf.data() + offset); + offset += 4; + key.assign(buf.data() + offset, len); + offset += len; + len = DeSerializeColumn(buf.data() + offset, &column); + offset += len; + (*record)[key] = column; + } + return offset; + } + + void PrintStatus() { + LOG(INFO) << "[TeraEasy] pending num " << s_pending_num_.Get() << ", pending size " + << s_pending_size_.Get() << ", success " << s_write_succ_num_.Clear() << ", fail " + << s_write_fail_num_.Clear(); + ThreadPool::Task task = std::bind(&TableImpl::PrintStatus, this); + thread_pool_.DelayTask(1000, task); + } + + private: + static tera::Counter s_pending_num_; + static tera::Counter s_pending_size_; + static tera::Counter s_write_fail_num_; + static tera::Counter s_write_succ_num_; + + tera::Table* table_; + tera::Client* client_; + tera::ResultStream* scanner_; + ThreadPool thread_pool_; }; tera::Counter TableImpl::s_pending_num_; @@ -231,21 +224,21 @@ tera::Counter TableImpl::s_write_fail_num_; tera::Counter TableImpl::s_write_succ_num_; Table* OpenTable(const std::string& table_name, const std::string& conf_path) { - std::string conf; - if (conf_path == "") { - conf = "./tera.flag"; - } else { - conf = conf_path; - } - - tera::Client* client = tera::Client::NewClient(conf, "tera"); - - tera::ErrorCode err; - tera::Table* table = NULL; - if (client == NULL || (table = client->OpenTable(table_name, &err)) == NULL) { - LOG(ERROR) << "fail to open table: " << table_name; - return NULL; - } - return new TableImpl(table, client); + std::string conf; + if (conf_path == "") { + conf = "./tera.flag"; + } else { + conf = conf_path; + } + + tera::Client* client = tera::Client::NewClient(conf, "tera"); + + tera::ErrorCode err; + tera::Table* table = NULL; + if (client == NULL || (table = client->OpenTable(table_name, &err)) == NULL) { + LOG(ERROR) << "fail to open table: " << table_name; + return NULL; + } + return new TableImpl(table, client); } } diff --git a/src/sdk/tera_easy.h b/src/sdk/tera_easy.h index dd2362664..bad807a87 100644 --- a/src/sdk/tera_easy.h +++ b/src/sdk/tera_easy.h @@ -4,8 +4,8 @@ // // Author: yanshiguang02@baidu.com -#ifndef TERA_TERA_EASY_H_ -#define TERA_TERA_EASY_H_ +#ifndef TERA_TERA_EASY_H_ +#define TERA_TERA_EASY_H_ #include @@ -25,26 +25,26 @@ typedef std::map Record; typedef std::pair KVPair; class Table { -public: - Table() {} + public: + Table() {} - virtual ~Table() {} + virtual ~Table() {} - virtual bool Read(const Key& row_key, Record* record) = 0; + virtual bool Read(const Key& row_key, Record* record) = 0; - virtual bool Write(const Key& row_key, const Record& record) = 0; + virtual bool Write(const Key& row_key, const Record& record) = 0; - virtual void Flush() = 0; + virtual void Flush() = 0; - virtual bool Delete(const Key& row_key) = 0; + virtual bool Delete(const Key& row_key) = 0; - virtual bool SetScanner(const Key& start, const Key& end) = 0; + virtual bool SetScanner(const Key& start, const Key& end) = 0; - virtual bool NextPair(KVPair* kv_pair) = 0; + virtual bool NextPair(KVPair* kv_pair) = 0; -private: - Table(const Table&); - void operator=(const Table&); + private: + Table(const Table&); + void operator=(const Table&); }; Table* OpenTable(const std::string& table_name, const std::string& conf_path = ""); diff --git a/src/sdk/tera_hash.cc b/src/sdk/tera_hash.cc deleted file mode 100644 index 1324215f0..000000000 --- a/src/sdk/tera_hash.cc +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "tera_hash.h" - -#include - -#include "common/base/string_format.h" -#include "common/mutex.h" -#include "gflags/gflags.h" -#include "glog/logging.h" - -DEFINE_int32(tera_hash_sdk_rpc_max_pending_num, 1024 * 1024, "max num of pending kv"); -DEFINE_int64(tera_hash_sdk_scan_buffer_size, 128 * 1024, "max buffer size of scan in hask sdk"); -DECLARE_int32(tera_sdk_rpc_max_pending_buffer_size); - -namespace tera { - -class NullHashMethod : public HashMethod { -public: - NullHashMethod(int32_t bulk_num = -1) : HashMethod(bulk_num) {} - ~NullHashMethod() {} - std::string HashKey(const std::string& key) { - return key; - } - std::string Key(const std::string& hash_key) { - return hash_key; - } -}; - - -Mutex s_mutex_; -int32_t s_pending_num_ = 0; -int32_t s_pending_size_ = 0; - -HashClient::HashClient(HashMethod* hash_method, - const std::string& table_name, - Client* client_impl) - : table_(NULL), scan_stream_(NULL), table_name_(table_name), - hash_method_(hash_method), is_created_client_(false), - is_created_hash_method_(false) { - if (hash_method_ == NULL) { - hash_method_ = new NullHashMethod(); - is_created_hash_method_ = true; - } - ErrorCode err; - if (client_impl) { - client_ = client_impl; - } else { - client_ = Client::NewClient("./tera.flag", "tera_hash", &err); - is_created_client_ = true; - } - CHECK(client_ && err.GetType() == ErrorCode::kOK) << strerr(err); -} - -HashClient::~HashClient() { - if (is_created_hash_method_) { - delete hash_method_; - } - if (is_created_client_) { - delete client_; - } - - if (table_) { - delete table_; - } -} - -bool HashClient::OpenTable(ErrorCode* err) { - if (table_) { - return true; - } - - if (!client_->IsTableExist(table_name_, err)) { - return false; - } - - table_ = client_->OpenTable(table_name_, err); - if (table_ == NULL) { - return false; - } - - CHECK(GetColumnFamilyList(&field_types_, err)); - - return true; -} - -bool HashClient::Put(const std::string& row_key, - const std::string& value, - ErrorCode* err) { - return Put(row_key, "", "", value, err); -} - -bool HashClient::Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - if (!table_) { - LOG(ERROR) << "table not open: " << table_name_; - SetErrorCode(err, ErrorCode::kSystem, "tail not open: " + table_name_); - return false; - } - return table_->Put(hash_method_->HashKey(row_key), family, qualifier, value, err); -} - -bool HashClient::Write(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err) { - return Write(row_key, family, qualifier, value, NULL, err); -} - -bool HashClient::Write(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - UserContext* context, ErrorCode* err) { - if (!table_) { - LOG(ERROR) << "table not open: " << table_name_; - SetErrorCode(err, ErrorCode::kSystem, "tail not open: " + table_name_); - return false; - } - - while (s_pending_num_ > FLAGS_tera_hash_sdk_rpc_max_pending_num - || s_pending_size_ > FLAGS_tera_sdk_rpc_max_pending_buffer_size * 1024 * 1024) { - usleep(1000000); - } - - - { - tera::RowMutation* mutation = NewMutation(row_key); - mutation->Put(family, qualifier, value); - ApplyMutation(context, mutation, row_key.length() + family.length() - + qualifier.length() + value.length()); - } - return true; -} - -void HashClient::Flush(uint64_t sleep_time) { - while (s_pending_num_ > 0) { - usleep(sleep_time); - } -} - -RowMutation* HashClient::NewMutation(const std::string& row_key) { - return table_->NewRowMutation(hash_method_->HashKey(row_key)); -} - -void HashClient::ApplyMutation(UserContext* context, RowMutation* mutation, - int32_t value_size) { - mutation->SetCallBack(HashClient::WriteCallback); - if (context) { - mutation->SetContext(context); - } else { - mutation->SetContext(NULL); - } - MutexLock locker(&s_mutex_); - s_pending_num_++; - s_pending_size_ += value_size; - table_->ApplyMutation(mutation); -} - -void HashClient::WriteCallback(tera::RowMutation* mutation) { - MutexLock locker(&s_mutex_); - const tera::ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != tera::ErrorCode::kOK) { - LOG(ERROR) << "write failed: key = " << mutation->RowKey() - << "), reason:" << error_code.GetReason(); - } - - UserContext* context = reinterpret_cast(mutation->GetContext()); - if (context) { - if (context->callback) { - context->callback(context->param, error_code.GetType() == tera::ErrorCode::kOK); - } - delete context; - } - - s_pending_num_--; - s_pending_size_ -= mutation->Size(); - delete mutation; -} - -bool HashClient::Get(const std::string& row_key, - std::string* value, ErrorCode* err) { - return Get(row_key, "", "", value, err); -} - -bool HashClient::Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err) { - if (!table_) { - LOG(ERROR) << "table not open: " << table_name_; - SetErrorCode(err, ErrorCode::kSystem, "tail not open: " + table_name_); - return false; - } - - return table_->Get(hash_method_->HashKey(row_key), family, qualifier, value, err); -} - -bool HashClient::Get(const std::string& row_key, void* obj, - void (*callback)(void*, const std::string& family, const std::string& qualifer, - const std::string& value, const std::string& value_type)) { - RowReader* row_reader = table_->NewRowReader(hash_method_->HashKey(row_key)); - row_reader->SetMaxVersions(1); - row_reader->SetTimeOut(5000); - table_->Get(row_reader); - - while(!row_reader->Done()) { - std::string type; - std::map::iterator it = field_types_.find(row_reader->Family()); - if (it != field_types_.end()) { - type = it->second; - } - callback(obj, row_reader->Family(), row_reader->Qualifier(), row_reader->Value(), type); - row_reader->Next(); - } - delete row_reader; - - return true; -} - -bool HashClient::Delete(const std::string& row_key, ErrorCode* err) { - RowMutation* mutation = table_->NewRowMutation(hash_method_->HashKey(row_key)); - mutation->DeleteRow(); - table_->ApplyMutation(mutation); - delete mutation; - return true; -} - - -bool HashClient::Seek(const HashScanDesc& desc, ErrorCode* err) { - ScanDescriptor scan_desc(desc.start_rowkey); - scan_desc.SetEnd(desc.end_rowkey); - if (desc.buffer_size > 0) { - scan_desc.SetBufferSize(desc.buffer_size); - } else if (FLAGS_tera_hash_sdk_scan_buffer_size > 0) { - scan_desc.SetBufferSize(FLAGS_tera_hash_sdk_scan_buffer_size); - } - if (!desc.filter_expression.empty()) { - scan_desc.SetFilter(desc.filter_expression); - } - - if (desc.converter) { - scan_desc.SetValueConverter(desc.converter); - } - scan_desc.SetAsync(desc.is_async); - - std::string::size_type pos; - std::string cf, col; - for (size_t i = 0; i < desc.fields.size(); ++i) { - if ((pos = desc.fields[i].find(":", 0)) == std::string::npos) { - // add columnfamily - scan_desc.AddColumnFamily(desc.fields[i]); - VLOG(10) << "add cf: " << desc.fields[i] << " to scan descriptor"; - } else { - // add column - cf = desc.fields[i].substr(0, pos); - col = desc.fields[i].substr(pos + 1); - scan_desc.AddColumn(cf, col); - VLOG(10) << "add column: " << cf << ":" << col << " to scan descriptor"; - } - } - - scan_stream_ = table_->Scan(scan_desc, err); - return scan_stream_ != NULL; -} - -bool HashClient::Current(std::string* key, std::string* value, - ErrorCode* err) { - if (scan_stream_->Done()) { - SetErrorCode(err, ErrorCode::kSystem, "not more record"); - return false; - } - *key = hash_method_->Key(scan_stream_->RowName()); - *value = scan_stream_->Value(); - return true; -} - -bool HashClient::Current(std::string* row_key, std::string* family, std::string* qualifier, - std::string* value, ErrorCode* err) { - if (!scan_stream_) { - SetErrorCode(err, ErrorCode::kSystem, "scan not ready"); - return false; - } - if (scan_stream_->Done()) { - SetErrorCode(err, ErrorCode::kSystem, "not more record"); - return false; - } - - *row_key = hash_method_->Key(scan_stream_->RowName()); - if (family) { - *family = scan_stream_->Family(); - } - if (qualifier) { - *qualifier = scan_stream_->Qualifier(); - } - if (value) { - *value = scan_stream_->Value(); - } - return true; -} - -bool HashClient::Current(std::string* row_key, void* obj, - void (*callback)(void*, const std::string& family, - const std::string& qualifier, - const std::string& value, - const std::string& type)) { - if (scan_stream_->Done()) { - return false; - } - std::string type; - std::map::iterator it = field_types_.find(scan_stream_->Family()); - if (it != field_types_.end()) { - type = it->second; - } - *row_key = hash_method_->Key(scan_stream_->RowName()); - callback(obj, scan_stream_->Family(), scan_stream_->Qualifier(), - scan_stream_->Value(), type); - return true; -} - -bool HashClient::Next(ErrorCode* err) { - if (scan_stream_->Done()) { - SetErrorCode(err, ErrorCode::kSystem, "not more record"); - return false; - } - scan_stream_->Next(); - - if (scan_stream_->Done()) { - return false; - } - return true; -} - -const Table* HashClient::GetTable() { - return table_; -} - -bool HashClient::CreateTable(const std::map& cf_list, - ErrorCode* err) { - if (client_->IsTableExist(table_name_, err)) { - LOG(ERROR) << "table '" << table_name_ << "' already exist"; - return false; - } - - TableDescriptor table_desc(table_name_); - table_desc.SetRawKey(kBinary); - - std::string sf_lg_name = "sf_lg"; - if (!table_desc.AddLocalityGroup(sf_lg_name)) { - LOG(ERROR) << "fail to add locality group: " << sf_lg_name; - return false; - } - std::map::const_iterator it = cf_list.begin(); - for (; it != cf_list.end(); ++it) { - ColumnFamilyDescriptor* cf_desc = - table_desc.AddColumnFamily(it->first, sf_lg_name); - if (!cf_desc) { - LOG(ERROR) << "fail to add column family: " << it->first; - continue; - } - cf_desc->SetType(it->second); - } - - std::vector delimiters; - for (int32_t i = 0; i < hash_method_->GetBulkNum(); ++i) { - delimiters.push_back(StringFormat("%08llu", i)); - } - - if (!client_->CreateTable(table_desc, delimiters, err)) { - return false; - } - return true; -} - -bool HashClient::DeleteTable(ErrorCode* err) { - return true; -} - -bool HashClient::GetColumnFamilyList(std::map* cf_list, - ErrorCode* err) { - TableDescriptor* table_desc = client_->GetTableDescriptor(table_name_, err); - if (!table_desc) { - return false; - } - - for (int32_t i = 0; i < table_desc->ColumnFamilyNum(); ++i) { - const ColumnFamilyDescriptor* cf_desc = table_desc->ColumnFamily(i); - (*cf_list)[cf_desc->Name()] = cf_desc->Type(); - } - return true; -} - -void HashClient::SetErrorCode(ErrorCode* err, ErrorCode::ErrorCodeType value, - const std::string& err_reason) { - if (err) { - err->SetFailed(value, err_reason); - } -} - -} // namespace tera diff --git a/src/sdk/tera_hash.h b/src/sdk/tera_hash.h deleted file mode 100644 index 40b748365..000000000 --- a/src/sdk/tera_hash.h +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_SDK_TERA_HASH_H_ -#define TERA_SDK_TERA_HASH_H_ - -#include - -#include "tera.h" - -#pragma GCC visibility push(default) - -namespace tera { - -class HashMethod { -public: - HashMethod(int32_t bulk_num = 0) - : bulk_num_(bulk_num) {} - virtual ~HashMethod() {} - - virtual std::string HashKey(const std::string& key) = 0; - virtual std::string Key(const std::string& hash_key) = 0; - - int32_t GetBulkNum() const { - return bulk_num_; - } - -protected: - int32_t bulk_num_; -}; - -struct HashScanDesc { - std::string start_rowkey; - std::string end_rowkey; - std::vector fields; - std::string filter_expression; - int64_t buffer_size; - ScanDescriptor::ValueConverter converter; - bool is_async; - - HashScanDesc() : buffer_size(-1), converter(NULL), - is_async(true) {} -}; - -struct UserContext { - void (*callback)(void*, bool); - void* param; - - UserContext() : callback(NULL), param(NULL) {} -}; - -class HashClient { -public: - HashClient(HashMethod* hash_method, - const std::string& table_name = "sf_table", - Client* client_impl = NULL); - ~HashClient(); - - - bool GetColumnFamilyList(std::map* cf_list, - ErrorCode* err = NULL); - - bool Put(const std::string& row_key, const std::string& value, - ErrorCode* err = NULL); - bool Put(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err = NULL); - - bool Get(const std::string& row_key, - std::string* value, ErrorCode* err = NULL); - bool Get(const std::string& row_key, const std::string& family, - const std::string& qualifier, std::string* value, - ErrorCode* err = NULL); - bool Get(const std::string& row_key, void* obj, - void (*callback)(void*, const std::string& family, const std::string& qualifier, - const std::string& value, const std::string& value_type)); - - bool Delete(const std::string& row_key, ErrorCode* err = NULL); - - bool Write(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - ErrorCode* err = NULL); - bool Write(const std::string& row_key, const std::string& family, - const std::string& qualifier, const std::string& value, - UserContext* context = NULL, ErrorCode* err = NULL); - void Flush(uint64_t sleep_time = 10000); - - bool Seek(const HashScanDesc& desc, - ErrorCode* err = NULL); - bool Current(std::string* key, std::string* value, - ErrorCode* err = NULL); - bool Current(std::string* row_key, std::string* family, std::string* qualifier, - std::string* value, ErrorCode* err = NULL); - bool Current(std::string* row_key, void* obj, - void (*callback)(void*, const std::string& family, const std::string& qualifier, - const std::string& value, const std::string& value_type)); - bool Next(ErrorCode* err = NULL); - - const Table* GetTable(); - - bool CreateTable(const std::map& cf_list, - ErrorCode* err = NULL); - bool DeleteTable(ErrorCode* err = NULL); - - bool OpenTable(ErrorCode* err = NULL); - - static void WriteCallback(tera::RowMutation* mutation); - - RowMutation* NewMutation(const std::string& row_key); - void ApplyMutation(UserContext* context, RowMutation* mutation, - int32_t value_size); - -private: - void SetErrorCode(ErrorCode* err, ErrorCode::ErrorCodeType value, - const std::string& err_reason); - -private: - Client* client_; - Table* table_; - ResultStream* scan_stream_; - std::string table_name_; - - HashMethod* hash_method_; - int32_t bulk_num_; - bool is_created_client_; - bool is_created_hash_method_; - std::map field_types_; -}; - -} // namespace tera - -#pragma GCC visibility pop - -#endif // TERA_SDK_TERA_HASH_H_ diff --git a/src/sdk/tera_replication.cc b/src/sdk/tera_replication.cc index 822d961bd..750215afb 100644 --- a/src/sdk/tera_replication.cc +++ b/src/sdk/tera_replication.cc @@ -8,417 +8,384 @@ #include "types.h" #include "utils/config_utils.h" -DEFINE_bool(tera_replication_read_try_all, false, "try to read all replicas instread of randomly choose one"); -DEFINE_bool(tera_replication_write_need_all_success, false, "return OK only if all replicas write success"); -DEFINE_string(tera_replication_conf_paths, "../conf/tera.flag", "paths for flag files. use \';\' to split"); +DEFINE_bool(tera_replication_read_try_all, false, + "try to read all replicas instread of randomly choose one"); +DEFINE_bool(tera_replication_write_need_all_success, false, + "return OK only if all replicas write success"); +DEFINE_string(tera_replication_conf_paths, "../conf/tera.flag", + "paths for flag files. use \';\' to split"); namespace tera { class RowMutationReplicateImpl : public RowMutationReplicate { -public: - RowMutationReplicateImpl(const std::vector& row_mutations, - const std::vector& tables) - : row_mutations_(row_mutations), - tables_(tables), - user_callback_(NULL), - user_context_(NULL), - finish_cond_(&mutex_), - finish_count_(0), - success_row_mutation_(NULL), - fail_row_mutation_(NULL) { - CHECK_GT(row_mutations_.size(), 0u); - for (size_t i = 0; i < row_mutations_.size(); i++) { - row_mutations_[i]->SetCallBack(RowMutationCallback); - row_mutations_[i]->SetContext(this); - } - } - - virtual ~RowMutationReplicateImpl() { - for (size_t i = 0; i < row_mutations_.size(); i++) { - delete row_mutations_[i]; - } - } - - virtual const std::string& RowKey() { - return row_mutations_[0]->RowKey(); - } - - virtual void Put(const std::string& value) { - for (size_t i = 0; i < row_mutations_.size(); i++) { - row_mutations_[i]->Put(value); - } - } - - virtual void Put(const std::string& value, int32_t ttl) { - for (size_t i = 0; i < row_mutations_.size(); i++) { - row_mutations_[i]->Put(value, ttl); - } - } - - virtual void DeleteRow() { - for (size_t i = 0; i < row_mutations_.size(); i++) { - row_mutations_[i]->DeleteRow(); - } - } - - virtual void SetCallBack(Callback callback) { - user_callback_ = callback; + public: + RowMutationReplicateImpl(const std::vector& row_mutations, + const std::vector& tables) + : row_mutations_(row_mutations), + tables_(tables), + user_callback_(NULL), + user_context_(NULL), + finish_cond_(&mutex_), + finish_count_(0), + success_row_mutation_(NULL), + fail_row_mutation_(NULL) { + CHECK_GT(row_mutations_.size(), 0u); + for (size_t i = 0; i < row_mutations_.size(); i++) { + row_mutations_[i]->SetCallBack(RowMutationCallback); + row_mutations_[i]->SetContext(this); } + } - virtual Callback GetCallBack() { - return user_callback_; + virtual ~RowMutationReplicateImpl() { + for (size_t i = 0; i < row_mutations_.size(); i++) { + delete row_mutations_[i]; } + } - virtual void SetContext(void* context) { - user_context_ = context; - } + virtual const std::string& RowKey() { return row_mutations_[0]->RowKey(); } - virtual void* GetContext() { - return user_context_; + virtual void Put(const std::string& value) { + for (size_t i = 0; i < row_mutations_.size(); i++) { + row_mutations_[i]->Put(value); } + } - virtual const ErrorCode& GetError() { - if (fail_row_mutation_ == NULL) { - CHECK_NOTNULL(success_row_mutation_); - return success_row_mutation_->GetError(); - } - if (success_row_mutation_ == NULL) { - CHECK_NOTNULL(fail_row_mutation_); - return fail_row_mutation_->GetError(); - } - if (FLAGS_tera_replication_write_need_all_success) { - return fail_row_mutation_->GetError(); - } else { - return success_row_mutation_->GetError(); - } + virtual void Put(const std::string& value, int32_t ttl) { + for (size_t i = 0; i < row_mutations_.size(); i++) { + row_mutations_[i]->Put(value, ttl); } + } -public: - const std::vector& GetRowMutationList() { - return row_mutations_; + virtual void DeleteRow() { + for (size_t i = 0; i < row_mutations_.size(); i++) { + row_mutations_[i]->DeleteRow(); } + } - const std::vector& GetTableList() { - return tables_; - } + virtual void SetCallBack(Callback callback) { user_callback_ = callback; } - bool IsAsync() { - return (user_callback_ != NULL); - } + virtual Callback GetCallBack() { return user_callback_; } - void Wait() { - CHECK(user_callback_ == NULL); - MutexLock l(&mutex_); - while (finish_count_ < row_mutations_.size()) { - finish_cond_.Wait(); - } - } + virtual void SetContext(void* context) { user_context_ = context; } -private: - RowMutationReplicateImpl(const RowMutationReplicateImpl&); - void operator=(const RowMutationReplicateImpl&); + virtual void* GetContext() { return user_context_; } - static void RowMutationCallback(RowMutation* mutation) { - RowMutationReplicateImpl* mutation_rep = (RowMutationReplicateImpl*)mutation->GetContext(); - mutation_rep->ProcessCallback(mutation); + virtual const ErrorCode& GetError() { + if (fail_row_mutation_ == NULL) { + CHECK_NOTNULL(success_row_mutation_); + return success_row_mutation_->GetError(); } - - void ProcessCallback(RowMutation* mutation) { - mutex_.Lock(); - if (mutation->GetError().GetType() == tera::ErrorCode::kOK) { - if (success_row_mutation_ == NULL) { - success_row_mutation_ = mutation; - } - } else { - if (fail_row_mutation_ == NULL) { - fail_row_mutation_ = mutation; - } - } - if (++finish_count_ == row_mutations_.size()) { - if (user_callback_ != NULL) { - mutex_.Unlock(); // remember to unlock - user_callback_(this); - return; // remember to return - } else { - finish_cond_.Signal(); - } - } - mutex_.Unlock(); + if (success_row_mutation_ == NULL) { + CHECK_NOTNULL(fail_row_mutation_); + return fail_row_mutation_->GetError(); } - - std::vector row_mutations_; - std::vector tables_; - RowMutationReplicate::Callback user_callback_; - void* user_context_; - - Mutex mutex_; - CondVar finish_cond_; - uint32_t finish_count_; - RowMutation* success_row_mutation_; - RowMutation* fail_row_mutation_; -}; - -class RowReaderReplicateImpl : public RowReaderReplicate { -public: - RowReaderReplicateImpl(const std::vector& row_readers, - const std::vector& tables) - : row_readers_(row_readers), - tables_(tables), - user_callback_(NULL), - user_context_(NULL), - finish_cond_(&mutex_), - finish_count_(0), - valid_row_reader_(NULL) { - CHECK_GT(row_readers_.size(), 0u); - for (size_t i = 0; i < row_readers_.size(); i++) { - row_readers_[i]->SetCallBack(RowReaderCallback); - row_readers_[i]->SetContext(this); - } + if (FLAGS_tera_replication_write_need_all_success) { + return fail_row_mutation_->GetError(); + } else { + return success_row_mutation_->GetError(); } + } - virtual ~RowReaderReplicateImpl() { - for (size_t i = 0; i < row_readers_.size(); i++) { - delete row_readers_[i]; - } - } + public: + const std::vector& GetRowMutationList() { return row_mutations_; } - virtual const std::string& RowName() { - return row_readers_[0]->RowName(); - } + const std::vector& GetTableList() { return tables_; } - virtual void SetCallBack(Callback callback) { - user_callback_ = callback; - } + bool IsAsync() { return (user_callback_ != NULL); } - virtual void SetContext(void* context) { - user_context_ = context; + void Wait() { + CHECK(user_callback_ == NULL); + MutexLock l(&mutex_); + while (finish_count_ < row_mutations_.size()) { + finish_cond_.Wait(); } + } - virtual void* GetContext() { - return user_context_; - } + private: + RowMutationReplicateImpl(const RowMutationReplicateImpl&); + void operator=(const RowMutationReplicateImpl&); - virtual ErrorCode GetError() { - CHECK_NOTNULL(valid_row_reader_); - return valid_row_reader_->GetError(); - } + static void RowMutationCallback(RowMutation* mutation) { + RowMutationReplicateImpl* mutation_rep = (RowMutationReplicateImpl*)mutation->GetContext(); + mutation_rep->ProcessCallback(mutation); + } - virtual std::string Value() { - CHECK_NOTNULL(valid_row_reader_); - return valid_row_reader_->Value(); + void ProcessCallback(RowMutation* mutation) { + mutex_.Lock(); + if (mutation->GetError().GetType() == tera::ErrorCode::kOK) { + if (success_row_mutation_ == NULL) { + success_row_mutation_ = mutation; + } + } else { + if (fail_row_mutation_ == NULL) { + fail_row_mutation_ = mutation; + } } - -public: - const std::vector& GetRowReaderList() { - return row_readers_; + if (++finish_count_ == row_mutations_.size()) { + if (user_callback_ != NULL) { + mutex_.Unlock(); // remember to unlock + user_callback_(this); + return; // remember to return + } else { + finish_cond_.Signal(); + } } + mutex_.Unlock(); + } - const std::vector& GetTableList() { - return tables_; - } + std::vector row_mutations_; + std::vector tables_; + RowMutationReplicate::Callback user_callback_; + void* user_context_; - bool IsAsync() { - return (user_callback_ != NULL); - } - - void Wait() { - CHECK(user_callback_ == NULL); - MutexLock l(&mutex_); - while (finish_count_ < row_readers_.size()) { - finish_cond_.Wait(); - } - } - -private: - RowReaderReplicateImpl(const RowReaderReplicateImpl&); - void operator=(const RowReaderReplicateImpl&); - - static void RowReaderCallback(RowReader* reader) { - RowReaderReplicateImpl* reader_rep = (RowReaderReplicateImpl*)reader->GetContext(); - reader_rep->ProcessCallback(reader); - } - - void ProcessCallback(RowReader* reader) { - mutex_.Lock(); - if (valid_row_reader_ == NULL && reader->GetError().GetType() == tera::ErrorCode::kOK) { - valid_row_reader_ = reader; - } - if (++finish_count_ == row_readers_.size()) { - // if all readers fail, use readers[0] - if (valid_row_reader_ == NULL) { - valid_row_reader_ = row_readers_[0]; - } - if (user_callback_ != NULL) { - mutex_.Unlock(); // remember to unlock - user_callback_(this); - return; // remember to return - } else { - finish_cond_.Signal(); - } - } - mutex_.Unlock(); - } - - std::vector row_readers_; - std::vector tables_; - RowReaderReplicate::Callback user_callback_; - void* user_context_; - - Mutex mutex_; - CondVar finish_cond_; - uint32_t finish_count_; - RowReader* valid_row_reader_; + Mutex mutex_; + CondVar finish_cond_; + uint32_t finish_count_; + RowMutation* success_row_mutation_; + RowMutation* fail_row_mutation_; }; +class RowReaderReplicateImpl : public RowReaderReplicate { + public: + RowReaderReplicateImpl(const std::vector& row_readers, + const std::vector& tables) + : row_readers_(row_readers), + tables_(tables), + user_callback_(NULL), + user_context_(NULL), + finish_cond_(&mutex_), + finish_count_(0), + valid_row_reader_(NULL) { + CHECK_GT(row_readers_.size(), 0u); + for (size_t i = 0; i < row_readers_.size(); i++) { + row_readers_[i]->SetCallBack(RowReaderCallback); + row_readers_[i]->SetContext(this); + } + } + + virtual ~RowReaderReplicateImpl() { + for (size_t i = 0; i < row_readers_.size(); i++) { + delete row_readers_[i]; + } + } + + virtual const std::string& RowName() { return row_readers_[0]->RowName(); } + + virtual void SetCallBack(Callback callback) { user_callback_ = callback; } + + virtual void SetContext(void* context) { user_context_ = context; } + + virtual void* GetContext() { return user_context_; } + + virtual ErrorCode GetError() { + CHECK_NOTNULL(valid_row_reader_); + return valid_row_reader_->GetError(); + } + + virtual std::string Value() { + CHECK_NOTNULL(valid_row_reader_); + return valid_row_reader_->Value(); + } + + public: + const std::vector& GetRowReaderList() { return row_readers_; } + + const std::vector& GetTableList() { return tables_; } + + bool IsAsync() { return (user_callback_ != NULL); } + + void Wait() { + CHECK(user_callback_ == NULL); + MutexLock l(&mutex_); + while (finish_count_ < row_readers_.size()) { + finish_cond_.Wait(); + } + } + + private: + RowReaderReplicateImpl(const RowReaderReplicateImpl&); + void operator=(const RowReaderReplicateImpl&); + + static void RowReaderCallback(RowReader* reader) { + RowReaderReplicateImpl* reader_rep = (RowReaderReplicateImpl*)reader->GetContext(); + reader_rep->ProcessCallback(reader); + } + + void ProcessCallback(RowReader* reader) { + mutex_.Lock(); + if (valid_row_reader_ == NULL && reader->GetError().GetType() == tera::ErrorCode::kOK) { + valid_row_reader_ = reader; + } + if (++finish_count_ == row_readers_.size()) { + // if all readers fail, use readers[0] + if (valid_row_reader_ == NULL) { + valid_row_reader_ = row_readers_[0]; + } + if (user_callback_ != NULL) { + mutex_.Unlock(); // remember to unlock + user_callback_(this); + return; // remember to return + } else { + finish_cond_.Signal(); + } + } + mutex_.Unlock(); + } + + std::vector row_readers_; + std::vector tables_; + RowReaderReplicate::Callback user_callback_; + void* user_context_; + + Mutex mutex_; + CondVar finish_cond_; + uint32_t finish_count_; + RowReader* valid_row_reader_; +}; /// 表接口 class TableReplicateImpl : public TableReplicate { -public: - TableReplicateImpl(std::vector tables) : tables_(tables) {} - virtual ~TableReplicateImpl() { - for (size_t i = 0; i < tables_.size(); i++) { - delete tables_[i]; - } - } - - virtual RowMutationReplicate* NewRowMutation(const std::string& row_key) { - std::vector row_mutations; - for (size_t i = 0; i < tables_.size(); i++) { - row_mutations.push_back(tables_[i]->NewRowMutation(row_key)); - } - return new RowMutationReplicateImpl(row_mutations, tables_); - } - - virtual void ApplyMutation(RowMutationReplicate* mutation_rep) { - RowMutationReplicateImpl* mutation_rep_impl = (RowMutationReplicateImpl*)mutation_rep; - bool is_async = mutation_rep_impl->IsAsync(); - const std::vector& mutation_list = mutation_rep_impl->GetRowMutationList(); - const std::vector& table_list = mutation_rep_impl->GetTableList(); - // in async mode, after the last call of ApplyMutation, we should not access - // 'mutation_rep_impl' anymore, that's why we assign the value of 'mutation_list.size()' - // to a local variable 'mutation_num' - size_t mutation_num = mutation_list.size(); - for (size_t i = 0; i < mutation_num; i++) { - table_list[i]->ApplyMutation(mutation_list[i]); - } - if (!is_async) { - mutation_rep_impl->Wait(); - } - } - - virtual RowReaderReplicate* NewRowReader(const std::string& row_key) { - std::vector row_readers; - std::vector tables; - if (FLAGS_tera_replication_read_try_all) { - for (size_t i = 0; i < tables_.size(); i++) { - row_readers.push_back(tables_[i]->NewRowReader(row_key)); - tables.push_back(tables_[i]); - } - } else { - size_t i = random() % tables_.size(); - row_readers.push_back(tables_[i]->NewRowReader(row_key)); - tables.push_back(tables_[i]); - } - return new RowReaderReplicateImpl(row_readers, tables); - } - - virtual void Get(RowReaderReplicate* reader_rep) { - RowReaderReplicateImpl* reader_rep_impl = (RowReaderReplicateImpl*)reader_rep; - bool is_async = reader_rep_impl->IsAsync(); - const std::vector& reader_list = reader_rep_impl->GetRowReaderList(); - const std::vector& table_list = reader_rep_impl->GetTableList(); - size_t reader_num = reader_list.size(); - for (size_t i = 0; i < reader_num; i++) { - table_list[i]->Get(reader_list[i]); - } - if (!is_async) { - reader_rep_impl->Wait(); - } - } - -private: - TableReplicateImpl(const TableReplicateImpl&); - void operator=(const TableReplicateImpl&); - - std::vector tables_; + public: + TableReplicateImpl(const std::vector& tables) : tables_(tables) {} + virtual ~TableReplicateImpl() { + for (size_t i = 0; i < tables_.size(); i++) { + delete tables_[i]; + } + } + + virtual RowMutationReplicate* NewRowMutation(const std::string& row_key) { + std::vector row_mutations; + for (size_t i = 0; i < tables_.size(); i++) { + row_mutations.push_back(tables_[i]->NewRowMutation(row_key)); + } + return new RowMutationReplicateImpl(row_mutations, tables_); + } + + virtual void ApplyMutation(RowMutationReplicate* mutation_rep) { + RowMutationReplicateImpl* mutation_rep_impl = (RowMutationReplicateImpl*)mutation_rep; + bool is_async = mutation_rep_impl->IsAsync(); + const std::vector& mutation_list = mutation_rep_impl->GetRowMutationList(); + const std::vector& table_list = mutation_rep_impl->GetTableList(); + // in async mode, after the last call of ApplyMutation, we should not access + // 'mutation_rep_impl' anymore, that's why we assign the value of + // 'mutation_list.size()' + // to a local variable 'mutation_num' + size_t mutation_num = mutation_list.size(); + for (size_t i = 0; i < mutation_num; i++) { + table_list[i]->ApplyMutation(mutation_list[i]); + } + if (!is_async) { + mutation_rep_impl->Wait(); + } + } + + virtual RowReaderReplicate* NewRowReader(const std::string& row_key) { + std::vector row_readers; + std::vector tables; + if (FLAGS_tera_replication_read_try_all) { + for (size_t i = 0; i < tables_.size(); i++) { + row_readers.push_back(tables_[i]->NewRowReader(row_key)); + tables.push_back(tables_[i]); + } + } else { + size_t i = random() % tables_.size(); + row_readers.push_back(tables_[i]->NewRowReader(row_key)); + tables.push_back(tables_[i]); + } + return new RowReaderReplicateImpl(row_readers, tables); + } + + virtual void Get(RowReaderReplicate* reader_rep) { + RowReaderReplicateImpl* reader_rep_impl = (RowReaderReplicateImpl*)reader_rep; + bool is_async = reader_rep_impl->IsAsync(); + const std::vector& reader_list = reader_rep_impl->GetRowReaderList(); + const std::vector& table_list = reader_rep_impl->GetTableList(); + size_t reader_num = reader_list.size(); + for (size_t i = 0; i < reader_num; i++) { + table_list[i]->Get(reader_list[i]); + } + if (!is_async) { + reader_rep_impl->Wait(); + } + } + + private: + TableReplicateImpl(const TableReplicateImpl&); + void operator=(const TableReplicateImpl&); + + std::vector tables_; }; class ClientReplicateImpl : public ClientReplicate { -public: - /// 打开表格, 失败返回NULL - virtual TableReplicate* OpenTable(const std::string& table_name, ErrorCode* err) { - std::vector tables; - for (size_t i = 0; i < clients_.size(); i++) { - Table* table = clients_[i]->OpenTable(table_name, err); - if (table == NULL) { - for (size_t j = 0; j < tables.size(); j++) { - delete tables[j]; - } - return NULL; - } - tables.push_back(table); + public: + /// 打开表格, 失败返回NULL + virtual TableReplicate* OpenTable(const std::string& table_name, ErrorCode* err) { + std::vector tables; + for (size_t i = 0; i < clients_.size(); i++) { + Table* table = clients_[i]->OpenTable(table_name, err); + if (table == NULL) { + for (size_t j = 0; j < tables.size(); j++) { + delete tables[j]; } - return new TableReplicateImpl(tables); + return NULL; + } + tables.push_back(table); } + return new TableReplicateImpl(tables); + } - ClientReplicateImpl(const std::vector& clients) : clients_(clients) {} - virtual ~ClientReplicateImpl() { - for (size_t i = 0; i < clients_.size(); i++) { - delete clients_[i]; - } + ClientReplicateImpl(const std::vector& clients) : clients_(clients) {} + virtual ~ClientReplicateImpl() { + for (size_t i = 0; i < clients_.size(); i++) { + delete clients_[i]; } + } -private: - ClientReplicateImpl(const ClientReplicateImpl&); - void operator=(const ClientReplicateImpl&); + private: + ClientReplicateImpl(const ClientReplicateImpl&); + void operator=(const ClientReplicateImpl&); - std::vector clients_; + std::vector clients_; }; -void ClientReplicate::SetGlogIsInitialized() { - Client::SetGlogIsInitialized(); -} +void ClientReplicate::SetGlogIsInitialized() { Client::SetGlogIsInitialized(); } ClientReplicate* ClientReplicate::NewClient(const std::string& confpath, - const std::string& log_prefix, - ErrorCode* err) { - utils::LoadFlagFile(confpath); - std::string conf_paths = FLAGS_tera_replication_conf_paths; - std::vector confs; - size_t token_pos = 0; - while (token_pos < conf_paths.size()) { - size_t delim_pos = conf_paths.find(';', token_pos); - std::string token(conf_paths, token_pos, delim_pos - token_pos); - if (!token.empty()) { - confs.push_back(token); - } - if (delim_pos == std::string::npos) { - break; - } - token_pos = delim_pos + 1; - } - - std::vector clients; - for (size_t i = 0; i < confs.size(); i++) { - Client* client = Client::NewClient(confs[i], log_prefix, err); - if (client == NULL) { - for (size_t j = 0; j < clients.size(); j++) { - delete clients[j]; - } - return NULL; - } - clients.push_back(client); - } - return new ClientReplicateImpl(clients); + const std::string& log_prefix, ErrorCode* err) { + utils::LoadFlagFile(confpath); + std::string conf_paths = FLAGS_tera_replication_conf_paths; + std::vector confs; + size_t token_pos = 0; + while (token_pos < conf_paths.size()) { + size_t delim_pos = conf_paths.find(';', token_pos); + std::string token(conf_paths, token_pos, delim_pos - token_pos); + if (!token.empty()) { + confs.push_back(token); + } + if (delim_pos == std::string::npos) { + break; + } + token_pos = delim_pos + 1; + } + + std::vector clients; + for (size_t i = 0; i < confs.size(); i++) { + Client* client = Client::NewClient(confs[i], log_prefix, err); + if (client == NULL) { + for (size_t j = 0; j < clients.size(); j++) { + delete clients[j]; + } + return NULL; + } + clients.push_back(client); + } + return new ClientReplicateImpl(clients); } ClientReplicate* ClientReplicate::NewClient(const std::string& confpath, ErrorCode* err) { - return NewClient(confpath, "tera", err); -} - -ClientReplicate* ClientReplicate::NewClient() { - return NewClient("", NULL); + return NewClient(confpath, "tera", err); } -} // namespace tera +ClientReplicate* ClientReplicate::NewClient() { return NewClient("", NULL); } +} // namespace tera diff --git a/src/sdk/tera_replication.h b/src/sdk/tera_replication.h index 80f72d8a6..5b1830861 100644 --- a/src/sdk/tera_replication.h +++ b/src/sdk/tera_replication.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_TERA_REPLICATION_H_ -#define TERA_TERA_REPLICATION_H_ +#ifndef TERA_TERA_REPLICATION_H_ +#define TERA_TERA_REPLICATION_H_ #include @@ -14,113 +14,111 @@ namespace tera { /// 修改操作 class RowMutationReplicate { -public: - RowMutationReplicate() {} - virtual ~RowMutationReplicate() {} + public: + RowMutationReplicate() {} + virtual ~RowMutationReplicate() {} - // 获得row key - virtual const std::string& RowKey() = 0; + // 获得row key + virtual const std::string& RowKey() = 0; - /// 修改默认列 - virtual void Put(const std::string& value) = 0; + /// 修改默认列 + virtual void Put(const std::string& value) = 0; - /// 带TTL的修改默认列 - virtual void Put(const std::string& value, int32_t ttl) = 0; + /// 带TTL的修改默认列 + virtual void Put(const std::string& value, int32_t ttl) = 0; - /// 删除整行的全部数据 - virtual void DeleteRow() = 0; + /// 删除整行的全部数据 + virtual void DeleteRow() = 0; - /// 设置异步回调, 操作会异步返回 - typedef void (*Callback)(RowMutationReplicate* param); - virtual void SetCallBack(Callback callback) = 0; + /// 设置异步回调, 操作会异步返回 + typedef void (*Callback)(RowMutationReplicate* param); + virtual void SetCallBack(Callback callback) = 0; - /// 设置用户上下文,可在回调函数中获取 - virtual void SetContext(void* context) = 0; - /// 获得用户上下文 - virtual void* GetContext() = 0; + /// 设置用户上下文,可在回调函数中获取 + virtual void SetContext(void* context) = 0; + /// 获得用户上下文 + virtual void* GetContext() = 0; - /// 获得结果错误码 - virtual const ErrorCode& GetError() = 0; + /// 获得结果错误码 + virtual const ErrorCode& GetError() = 0; -private: - RowMutationReplicate(const RowMutationReplicate&); - void operator=(const RowMutationReplicate&); + private: + RowMutationReplicate(const RowMutationReplicate&); + void operator=(const RowMutationReplicate&); }; class RowReaderReplicate { -public: - RowReaderReplicate() {}; - virtual ~RowReaderReplicate() {}; - - /// 获得row key - virtual const std::string& RowName() = 0; - - /// 设置异步回调, 操作会异步返回 - typedef void (*Callback)(RowReaderReplicate* param); - virtual void SetCallBack(Callback callback) = 0; - - /// 设置用户上下文,可在回调函数中获取 - virtual void SetContext(void* context) = 0; - /// 获得用户上下文 - virtual void* GetContext() = 0; - - /// 获得结果错误码 - virtual ErrorCode GetError() = 0; - /// 读取的结果 - virtual std::string Value() = 0; - -private: - RowReaderReplicate(const RowReaderReplicate&); - void operator=(const RowReaderReplicate&); + public: + RowReaderReplicate(){}; + virtual ~RowReaderReplicate(){}; + + /// 获得row key + virtual const std::string& RowName() = 0; + + /// 设置异步回调, 操作会异步返回 + typedef void (*Callback)(RowReaderReplicate* param); + virtual void SetCallBack(Callback callback) = 0; + + /// 设置用户上下文,可在回调函数中获取 + virtual void SetContext(void* context) = 0; + /// 获得用户上下文 + virtual void* GetContext() = 0; + + /// 获得结果错误码 + virtual ErrorCode GetError() = 0; + /// 读取的结果 + virtual std::string Value() = 0; + + private: + RowReaderReplicate(const RowReaderReplicate&); + void operator=(const RowReaderReplicate&); }; - /// 表接口 class TableReplicate { -public: - TableReplicate() {} - virtual ~TableReplicate() {} - - /// 返回一个新的RowMutation - virtual RowMutationReplicate* NewRowMutation(const std::string& row_key) = 0; - /// 提交一个修改操作, 同步操作返回是否成功, 异步操作永远返回true - virtual void ApplyMutation(RowMutationReplicate* row_mu) = 0; - - /// 返回一个新的RowReader - virtual RowReaderReplicate* NewRowReader(const std::string& row_key) = 0; - /// 读取一个指定行 - virtual void Get(RowReaderReplicate* row_reader) = 0; - -private: - TableReplicate(const TableReplicate&); - void operator=(const TableReplicate&); + public: + TableReplicate() {} + virtual ~TableReplicate() {} + + /// 返回一个新的RowMutation + virtual RowMutationReplicate* NewRowMutation(const std::string& row_key) = 0; + /// 提交一个修改操作, 同步操作返回是否成功, 异步操作永远返回true + virtual void ApplyMutation(RowMutationReplicate* row_mu) = 0; + + /// 返回一个新的RowReader + virtual RowReaderReplicate* NewRowReader(const std::string& row_key) = 0; + /// 读取一个指定行 + virtual void Get(RowReaderReplicate* row_reader) = 0; + + private: + TableReplicate(const TableReplicate&); + void operator=(const TableReplicate&); }; class ClientReplicate { -public: - /// 使用glog的用户必须调用此接口,避免glog被重复初始化 - static void SetGlogIsInitialized(); + public: + /// 使用glog的用户必须调用此接口,避免glog被重复初始化 + static void SetGlogIsInitialized(); - static ClientReplicate* NewClient(const std::string& confpath, - const std::string& log_prefix, - ErrorCode* err = NULL); + static ClientReplicate* NewClient(const std::string& confpath, const std::string& log_prefix, + ErrorCode* err = NULL); - static ClientReplicate* NewClient(const std::string& confpath, ErrorCode* err = NULL); + static ClientReplicate* NewClient(const std::string& confpath, ErrorCode* err = NULL); - static ClientReplicate* NewClient(); + static ClientReplicate* NewClient(); - /// 打开表格, 失败返回NULL - virtual TableReplicate* OpenTable(const std::string& table_name, ErrorCode* err) = 0; + /// 打开表格, 失败返回NULL + virtual TableReplicate* OpenTable(const std::string& table_name, ErrorCode* err) = 0; - ClientReplicate() {} - virtual ~ClientReplicate() {} + ClientReplicate() {} + virtual ~ClientReplicate() {} -private: - ClientReplicate(const ClientReplicate&); - void operator=(const ClientReplicate&); + private: + ClientReplicate(const ClientReplicate&); + void operator=(const ClientReplicate&); }; -} // namespace tera +} // namespace tera #pragma GCC visibility pop #endif // TERA_TERA_REPLICATION_H_ diff --git a/src/sdk/test/batch_muation_test.cc b/src/sdk/test/batch_muation_test.cc new file mode 100644 index 000000000..7ba2da4ac --- /dev/null +++ b/src/sdk/test/batch_muation_test.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2015-2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "sdk/batch_mutation_impl.h" +#include "tera.h" +#include "sdk/sdk_task.h" +#include "sdk/sdk_zk.h" +#include "sdk/table_impl.h" +#include "sdk/test/mock_table.h" + +#include "tera.h" + +DECLARE_string(tera_coord_type); + +namespace tera { + +class BatchMutationTest : public ::testing::Test { + public: + BatchMutationTest() : batch_mu_(NULL) { + batch_mu_ = static_cast(OpenTable("batch_mu_test")->NewBatchMutation()); + } + virtual ~BatchMutationTest() {} + + std::shared_ptr
OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "fake_zk"; + std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); + return table_; + } + + BatchMutationImpl* batch_mu_; + common::ThreadPool thread_pool_; +}; + +TEST_F(BatchMutationTest, Put0) { + batch_mu_->Put("rowkey", "value", 12); + batch_mu_->Put("rowkey", "value1", 22); + EXPECT_EQ(batch_mu_->mu_map_.size(), 1); + EXPECT_EQ(batch_mu_->GetRows().size(), 1); +} + +TEST_F(BatchMutationTest, Put1) { + batch_mu_->Put("rowkey", "value", 12); + batch_mu_->Put("rowkey1", "value1", 22); + EXPECT_EQ(batch_mu_->mu_map_.size(), 2); + EXPECT_EQ(batch_mu_->GetRows().size(), 2); +} + +TEST_F(BatchMutationTest, Put2) { + batch_mu_->Put("rowkey", "cf", "qu", "value"); + batch_mu_->Put("rowkey", "", "qu", "value"); + batch_mu_->Put("rowkey2", "cf", "", "value"); + batch_mu_->Put("rowkey3", "", "", "value"); + batch_mu_->Put("rowkey4", "cf", "qu", "value", 0); + batch_mu_->Put("rowkey5", "cf", "qu", "value", 1); + batch_mu_->Put("rowkey6", "cf", "qu", "value", -1); + EXPECT_EQ(batch_mu_->mu_map_.size(), 6); + EXPECT_EQ(batch_mu_->GetRows().size(), 6); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].size(), 2); + EXPECT_EQ(batch_mu_->mu_map_["rowkey1"].size(), 0); +} + +TEST_F(BatchMutationTest, OtherOps) { + batch_mu_->Add("rowkey", "cf", "qu", 12); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kAdd); + + batch_mu_->PutIfAbsent("rowkey", "cf", "qu", "value"); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kPutIfAbsent); + + batch_mu_->Append("rowkey", "cf", "qu", "value"); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kAppend); + + batch_mu_->DeleteRow("rowkey"); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kDeleteRow); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().timestamp, kLatestTimestamp); + + batch_mu_->DeleteFamily("rowkey", "cf"); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kDeleteFamily); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().timestamp, kLatestTimestamp); + + batch_mu_->DeleteColumns("rowkey", "cf", "qu"); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kDeleteColumns); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().timestamp, kLatestTimestamp); + + batch_mu_->DeleteColumn("rowkey", "cf", "qu", -1); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().type, RowMutation::kDeleteColumn); + EXPECT_EQ(batch_mu_->mu_map_["rowkey"].back().timestamp, kLatestTimestamp); + + const std::string& huge_str = std::string(1 + (32 << 20), 'h'); + batch_mu_->Put(huge_str, "cf", "qu", "v"); + EXPECT_EQ(batch_mu_->GetError().GetType(), ErrorCode::kBadParam); + batch_mu_->Put("r", "cf", huge_str, "v"); + EXPECT_EQ(batch_mu_->GetError().GetType(), ErrorCode::kBadParam); + batch_mu_->Put("r", "cf", "qu", huge_str); + EXPECT_EQ(batch_mu_->GetError().GetType(), ErrorCode::kBadParam); +} + +void MockOpStatCallback(Table* table, SdkTask* task) { + // Nothing to do +} + +TEST_F(BatchMutationTest, RunCallback) { + EXPECT_FALSE(batch_mu_->IsAsync()); + std::shared_ptr
table = OpenTable("test"); + batch_mu_->Prepare(MockOpStatCallback); + EXPECT_TRUE(batch_mu_->on_finish_callback_ != NULL); + EXPECT_TRUE(batch_mu_->start_ts_ > 0); + // set OpStatCallback + batch_mu_->RunCallback(); + EXPECT_TRUE(batch_mu_->finish_); + EXPECT_TRUE(batch_mu_->IsFinished()); +} + +TEST_F(BatchMutationTest, Size) { + EXPECT_EQ(batch_mu_->Size(), 0); + int64_t ts = -1; + batch_mu_->Put("r", "cf", "qu", "v"); + EXPECT_EQ(batch_mu_->Size(), 6 + sizeof(ts)); + batch_mu_->Put("r", "cf", "qu", "v"); + // only calc one rowkey + EXPECT_EQ(batch_mu_->Size(), (6 + sizeof(ts)) * 2 - 1); + batch_mu_->Put("R", "cf", "qu", "v"); + EXPECT_EQ(batch_mu_->Size(), (6 + sizeof(ts)) * 3 - 1); +} + +TEST_F(BatchMutationTest, GetMutation) { + batch_mu_->Put("r", "cf", "qu", "v"); + batch_mu_->Put("r", "cf", "qu1", "v"); + batch_mu_->Put("r2", "cf", "qu", "v"); + batch_mu_->Put("r3", "cf", "qu", "v"); + EXPECT_EQ((batch_mu_->GetMutation("r", 1)).qualifier, "qu1"); +} +} diff --git a/src/sdk/test/filter_list_test.cc b/src/sdk/test/filter_list_test.cc new file mode 100644 index 000000000..fdc9ddb7c --- /dev/null +++ b/src/sdk/test/filter_list_test.cc @@ -0,0 +1,324 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gtest/gtest.h" +#include "tera.h" +#include "sdk/scan_impl.h" +#include "proto/filter.pb.h" +#include "io/coding.h" +#include "io/tablet_io.h" +#include "proto/tabletnode_rpc.pb.h" +#include "sdk/filter_list_base.h" +#include "leveldb/raw_key_operator.h" +#include "leveldb/db/dbformat.h" +#include "leveldb/comparator.h" +#include "leveldb/db/memtable.h" + +namespace tera { +namespace filter { + +class FilterListTest : public ::testing::Test { + private: + void SetFilterListForCheck(ScanDescriptor* desc); + void CheckFilterList(const ScanDescriptor& desc); + void SetFilterListForFilterOneRow(ScanDescriptor* desc); + void FilterOneRowCase1(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options); + void FilterOneRowCase2(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options); + void FilterOneRowCase3(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options); + void FilterOneRowCase4(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options); + void FilterOneRowCase5(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options); + void FilterOneRowForEmptyRow(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options); + void FilterOneRowForEmptyFilter(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options); + void FilterHalfRowCase1(tera::io::TabletIO& tablet_io, const tera::io::ScanOptions& scan_options, + const tera::io::SingleRowBuffer& row_buf); + void MakeOldFilterList(tera::FilterList* old_filter_list_desc); +}; + +void FilterListTest::SetFilterListForCheck(ScanDescriptor* desc) { + int64_t ref_value_1 = 10; + IntegerComparatorPtr comparator_1 = + std::make_shared(IntegerValueType::kInt64, ref_value_1); + ValueFilterPtr value_filter_1 = + std::make_shared(CompareOperator::kGreaterOrEqual, comparator_1); + int64_t ref_value_2 = 5; + IntegerComparatorPtr comparator_2 = + std::make_shared(IntegerValueType::kInt64, ref_value_2); + ValueFilterPtr value_filter_2 = + std::make_shared(CompareOperator::kLess, comparator_2); + int64_t ref_value_3 = 0; + IntegerComparatorPtr comparator_3 = + std::make_shared(IntegerValueType::kInt64, ref_value_3); + ValueFilterPtr value_filter_3 = + std::make_shared(CompareOperator::kGreater, comparator_3); + + FilterListPtr sub_filter_list = std::make_shared(FilterList::kOr); + sub_filter_list->AddFilter(value_filter_1); + sub_filter_list->AddFilter(value_filter_2); + FilterListPtr filter_list = std::make_shared(FilterList::kAnd); + filter_list->AddFilter(value_filter_3); + filter_list->AddFilter(sub_filter_list); + + ASSERT_TRUE(desc->SetFilter(filter_list)); +} + +void FilterListTest::CheckFilterList(const ScanDescriptor& desc) { + ScanDescImpl* scan_desc_impl = desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kFilterList); + FilterListPtr filter_list = std::make_shared(); + ASSERT_TRUE(filter_list->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(filter_list->op_, FilterList::kAnd); + + const std::vector& filters = filter_list->filter_list_base_->GetFilters(); + EXPECT_EQ(filters[0]->Type(), kValueFilter); + ValueFilter* value_filter = dynamic_cast(filters[0].get()); + IntegerComparator* cp = dynamic_cast(value_filter->comparator_.get()); + int64_t ref_value_3 = 0; + EXPECT_EQ((int64_t)(cp->integer_value_), ref_value_3); + EXPECT_EQ(filters[1]->Type(), kFilterList); + FilterList* sub_filter_list = dynamic_cast(filters[1].get()); + const std::vector& sub_filters = sub_filter_list->filter_list_base_->GetFilters(); + EXPECT_EQ(sub_filters[0]->Type(), kValueFilter); + value_filter = dynamic_cast(sub_filters[0].get()); + cp = dynamic_cast(value_filter->comparator_.get()); + int64_t ref_value_1 = 10; + EXPECT_EQ((int64_t)(cp->integer_value_), ref_value_1); + EXPECT_EQ(sub_filters[1]->Type(), kValueFilter); + value_filter = dynamic_cast(sub_filters[1].get()); + cp = dynamic_cast(value_filter->comparator_.get()); + int64_t ref_value_2 = 5; + EXPECT_EQ((int64_t)(cp->integer_value_), ref_value_2); +} + +TEST_F(FilterListTest, SetFilterList) { + ScanDescriptor scan_desc(""); + SetFilterListForCheck(&scan_desc); + CheckFilterList(scan_desc); +} + +void FilterListTest::SetFilterListForFilterOneRow(ScanDescriptor* desc) { + BinaryComparatorPtr comparator_1 = std::make_shared("d"); + ValueFilterPtr value_filter_1 = + std::make_shared(CompareOperator::kGreaterOrEqual, comparator_1); + value_filter_1->SetColumnFamily("cf5"); + value_filter_1->SetColumnQualifier("qu5"); + BinaryComparatorPtr comparator_2 = std::make_shared("m"); + ValueFilterPtr value_filter_2 = + std::make_shared(CompareOperator::kLess, comparator_2); + value_filter_2->SetColumnFamily("cf5"); + value_filter_2->SetColumnQualifier("qu5"); + BinaryComparatorPtr comparator_3 = std::make_shared("v"); + ValueFilterPtr value_filter_3 = + std::make_shared(CompareOperator::kGreater, comparator_3); + value_filter_3->SetColumnFamily("cf5"); + value_filter_3->SetColumnQualifier("qu5"); + + FilterListPtr sub_filter_list = std::make_shared(FilterList::kAnd); + sub_filter_list->AddFilter(value_filter_1); + sub_filter_list->AddFilter(value_filter_2); + FilterListPtr filter_list = std::make_shared(FilterList::kOr); + filter_list->AddFilter(sub_filter_list); + filter_list->AddFilter(value_filter_3); + + ASSERT_TRUE(desc->SetFilter(filter_list)); +} + +void FilterListTest::FilterOneRowCase1(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "a", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), true); +} + +void FilterListTest::FilterOneRowCase2(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "d", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), false); +} + +void FilterListTest::FilterOneRowCase3(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "m", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), true); +} + +void FilterListTest::FilterOneRowCase4(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "v", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), true); +} + +void FilterListTest::FilterOneRowCase5(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "x", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), false); +} + +void FilterListTest::FilterOneRowForEmptyRow(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), true); +} + +void FilterListTest::FilterOneRowForEmptyFilter(tera::io::TabletIO& tablet_io, + const tera::io::ScanOptions& scan_options) { + tera::io::SingleRowBuffer row_buf; + row_buf.Add("key", "cf1", "qu1", "a", 0); + row_buf.Add("key", "cf2", "qu2", "a", 0); + row_buf.Add("key", "cf5", "qu5", "x", 0); + row_buf.Add("key", "cf7", "qu7", "a", 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), false); +} + +TEST_F(FilterListTest, FilterOneRow) { + ScanDescriptor scan_desc(""); + SetFilterListForFilterOneRow(&scan_desc); + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + + tera::io::ScanOptions scan_options; + tera::io::TabletIO tablet_io("", "", ""); + + FilterOneRowForEmptyFilter(tablet_io, scan_options); + + ASSERT_TRUE(tablet_io.SetupFilter(*filter_desc, &scan_options)); + + FilterOneRowCase1(tablet_io, scan_options); + FilterOneRowCase2(tablet_io, scan_options); + FilterOneRowCase3(tablet_io, scan_options); + FilterOneRowCase4(tablet_io, scan_options); + FilterOneRowCase5(tablet_io, scan_options); + FilterOneRowForEmptyRow(tablet_io, scan_options); +} + +void FilterListTest::MakeOldFilterList(tera::FilterList* old_filter_list_desc) { + tera::Filter* old_filter_desc = old_filter_list_desc->add_filter(); + old_filter_desc->set_type(tera::BinComp); + old_filter_desc->set_bin_comp_op(tera::GE); + old_filter_desc->set_field(tera::ValueFilter); + old_filter_desc->set_content("cf3"); + std::string ref_value_1; + int64_t value_int64 = 11; + ref_value_1.assign((char*)&value_int64, sizeof(int64_t)); + old_filter_desc->set_ref_value(ref_value_1); + old_filter_desc->set_value_type(tera::kINT64); + + old_filter_desc = old_filter_list_desc->add_filter(); + old_filter_desc->set_type(tera::BinComp); + old_filter_desc->set_bin_comp_op(tera::LT); + old_filter_desc->set_field(tera::ValueFilter); + old_filter_desc->set_content("cf5"); + std::string ref_value_2; + value_int64 = 20; + ref_value_2.assign((char*)&value_int64, sizeof(int64_t)); + old_filter_desc->set_ref_value(ref_value_2); + old_filter_desc->set_value_type(tera::kINT64); +} + +TEST_F(FilterListTest, TransFilter) { + tera::FilterList old_filter_list_desc; + MakeOldFilterList(&old_filter_list_desc); + tera::io::ScanOptions scan_options; + tera::io::TabletIO tablet_io("", "", ""); + ASSERT_TRUE(tablet_io.TransFilter(old_filter_list_desc, &scan_options)); + + tera::io::SingleRowBuffer row_buf; + IntegerComparatorPtr comparator = std::make_shared(); + std::string value; + comparator->EncodeInteger(IntegerValueType::kInt64, 11, &value); + row_buf.Add("key", "cf1", "", value, 0); + row_buf.Add("key", "cf3", "", value, 0); + row_buf.Add("key", "cf5", "", value, 0); + row_buf.Add("key", "cf7", "", value, 0); + + EXPECT_EQ(tablet_io.ShouldFilterRowBuffer(row_buf, scan_options), false); +} + +TEST_F(FilterListTest, TransFilterAbnormal) { + tera::io::TabletIO tablet_io("", "", ""); + tera::FilterList old_filter_list_desc; + tera::Filter* old_filter_desc = old_filter_list_desc.add_filter(); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_type(tera::Regex); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_type(tera::BinComp); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_field(tera::RowFilter); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_field(tera::ValueFilter); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_value_type(tera::kINT64); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_content("cf1"); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + std::string ref_value_1; + int64_t value_int64 = 11; + ref_value_1.assign((char*)&value_int64, sizeof(int64_t)); + old_filter_desc->set_ref_value(ref_value_1); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), false); + } + old_filter_desc->set_bin_comp_op(tera::GT); + { + tera::io::ScanOptions scan_options; + EXPECT_EQ(tablet_io.TransFilter(old_filter_list_desc, &scan_options), true); + } +} + +} // namespace filter +} // namespace tera diff --git a/src/sdk/test/filter_utils_test.cc b/src/sdk/test/filter_utils_test.cc index 456d406e7..5785c7c7f 100644 --- a/src/sdk/test/filter_utils_test.cc +++ b/src/sdk/test/filter_utils_test.cc @@ -9,54 +9,28 @@ namespace tera { void PrintBytes(const std::string& c, int n) { - fprintf(stderr, "-------------------"); - for (int i = 0; i < n; ++i) { - fprintf(stderr, "%2x ", (unsigned char)c[i]); - } - fprintf(stderr, "\n"); + fprintf(stderr, "-------------------"); + for (int i = 0; i < n; ++i) { + fprintf(stderr, "%2x ", (unsigned char)c[i]); + } + fprintf(stderr, "\n"); } TEST(FilterUtils, RemoveInvisibleChar) { - string schema = ""; - schema = RemoveInvisibleChar(schema); - EXPECT_TRUE(schema == ""); + string schema = ""; + schema = RemoveInvisibleChar(schema); + EXPECT_TRUE(schema == ""); - schema = " "; - schema = RemoveInvisibleChar(schema); - EXPECT_TRUE(schema == ""); + schema = " "; + schema = RemoveInvisibleChar(schema); + EXPECT_TRUE(schema == ""); - schema = "a "; - schema = RemoveInvisibleChar(schema); - EXPECT_TRUE(schema == "a"); + schema = "a "; + schema = RemoveInvisibleChar(schema); + EXPECT_TRUE(schema == "a"); - schema = "a\n \t "; - schema = RemoveInvisibleChar(schema); - EXPECT_TRUE(schema == "a"); + schema = "a\n \t "; + schema = RemoveInvisibleChar(schema); + EXPECT_TRUE(schema == "a"); } - -TEST(FilterUtils, DefaultValueConverter) { - string in, type, out, out_p; - - EXPECT_FALSE(DefaultValueConverter("", "", NULL)); - - in = "8"; - out_p = string("\x08\x0\x0\x0\x0\x0\x0\x0", 8); - type = "int64"; - - EXPECT_TRUE(DefaultValueConverter(in, type, &out)); - EXPECT_EQ(out, out_p); - - in = "-8"; - out_p = string("\xF8\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8); - type = "int64"; - EXPECT_TRUE(DefaultValueConverter(in, type, &out)); - EXPECT_EQ(out, out_p); - - in = "-8"; - type = "string"; - EXPECT_FALSE(DefaultValueConverter(in, type, &out)); - - type = "illegal"; - EXPECT_FALSE(DefaultValueConverter(in, type, &out)); -} -} // namespace tera +} // namespace tera diff --git a/src/sdk/test/global_txn_batch_op.cc b/src/sdk/test/global_txn_batch_op.cc index 3e1d14af6..6315c3f2d 100644 --- a/src/sdk/test/global_txn_batch_op.cc +++ b/src/sdk/test/global_txn_batch_op.cc @@ -27,414 +27,415 @@ using std::unique_ptr; using std::unordered_map; using std::function; -using TxnPtr = shared_ptr; -using RowMutationPtr = shared_ptr; -using ClientPtr = shared_ptr; -using TablePtr = shared_ptr; - -struct RowkeyCfQu{ - RowkeyCfQu()=default; - RowkeyCfQu(string rowkey, string cf, string qu): - rowkey_(rowkey), - cf_(cf), - qu_(qu) - {} - - string rowkey_, cf_, qu_; +using TxnPtr = shared_ptr; +using RowMutationPtr = shared_ptr; +using ClientPtr = shared_ptr; +using TablePtr = shared_ptr; + +struct RowkeyCfQu { + RowkeyCfQu() = default; + RowkeyCfQu(string rowkey, string cf, string qu) : rowkey_(rowkey), cf_(cf), qu_(qu) {} + + string rowkey_, cf_, qu_; }; -//Used for parsing operator string -using OperatorStructure = vector - vector>>; //vector of rowkey-cf-qus in a table +// Used for parsing operator string +using OperatorStructure = + vector + vector>>; // vector of rowkey-cf-qus in a table static unordered_map& GetHelpCommand() { - static unordered_map help_commands; - return help_commands; + static unordered_map help_commands; + return help_commands; } static void InitHelpCommand() { - auto& help_commands = GetHelpCommand(); - help_commands["cas"] = "Compare and set old_vals to new_vals across different Tables, Rows, and Columns atomically, usage: \n" - " cas "; - help_commands["get"] = "Get values across different Tables, Rows, and Columns atomically, usage: \n" - " get "; - help_commands["put"] = "Put values across different Tables, Rows, and Columns atomically, usage: \n" - " put "; + auto& help_commands = GetHelpCommand(); + help_commands["cas"] = + "Compare and set old_vals to new_vals across different Tables, Rows, and " + "Columns atomically, usage: \n" + " cas " + " "; + help_commands["get"] = + "Get values across different Tables, Rows, and Columns atomically, " + "usage: \n" + " get "; + help_commands["put"] = + "Put values across different Tables, Rows, and Columns atomically, " + "usage: \n" + " put " + ""; } static void PrintHelp(const string& str = "") { - auto& help_commands = GetHelpCommand(); - if (str == "" || help_commands.find(str) == help_commands.end()) { - for (auto& help_info : help_commands) { - cout << help_info.first << " " << help_info.second << endl; - } - } else { - cout << str << ": " << help_commands[str] << endl; + auto& help_commands = GetHelpCommand(); + if (str == "" || help_commands.find(str) == help_commands.end()) { + for (auto& help_info : help_commands) { + cout << help_info.first << " " << help_info.second << endl; } + } else { + cout << str << ": " << help_commands[str] << endl; + } } static vector split(const string& str, const char delimiter) { - vector res; - string::size_type pos = 0; - while (pos < str.size()) { - string::size_type new_pos = str.find(delimiter, pos); - if (new_pos == string::npos) { - res.emplace_back(str.begin() + pos, str.end()); - break; - } else { - res.emplace_back(str.begin() + pos, str.begin() + new_pos); - } - pos = new_pos + 1; + vector res; + string::size_type pos = 0; + while (pos < str.size()) { + string::size_type new_pos = str.find(delimiter, pos); + if (new_pos == string::npos) { + res.emplace_back(str.begin() + pos, str.end()); + break; + } else { + res.emplace_back(str.begin() + pos, str.begin() + new_pos); } - return res; + pos = new_pos + 1; + } + return res; } static int64_t ParseOperatorStructure(const string& str, OperatorStructure& opst, size_t& num) { - opst.clear(); - num = 0; - vector table_operations = split(str, '#'); - for (auto& table_op : table_operations) { - vector table_rowkey = split(table_op, '-'); - if (table_rowkey.size() != 2) { - return -1; - } - - opst.emplace_back(table_rowkey[0], vector()); - vector row_operations = split(table_rowkey[1], ':'); - for (auto& row_op : row_operations) { - vector rowkey_cf_qu = split(row_op, '.'); - if (rowkey_cf_qu.size() < 2 || - rowkey_cf_qu.size() > 3) { - return -1; - } - - if (rowkey_cf_qu.size() == 3) { - opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], rowkey_cf_qu[2]); - } else { - opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], ""); - } - ++num; - } + opst.clear(); + num = 0; + vector table_operations = split(str, '#'); + for (auto& table_op : table_operations) { + vector table_rowkey = split(table_op, '-'); + if (table_rowkey.size() != 2) { + return -1; } - return 0; -} -static int64_t OpenTables(ClientPtr client, - const OperatorStructure& opst, - unordered_map& tables) { - tables.clear(); - tera::ErrorCode ec; - for (auto& table : opst) { - string tablename = table.first; - if (tables.find(table.first) == tables.end()) { - tables.emplace(table.first, TablePtr(client->OpenTable(table.first, &ec))); - if (!tables[table.first]) { - cout << "open table: " << table.first << " failed" << endl; - cout << ec.ToString() << endl; - return -1; - } - } + opst.emplace_back(table_rowkey[0], vector()); + vector row_operations = split(table_rowkey[1], ':'); + for (auto& row_op : row_operations) { + vector rowkey_cf_qu = split(row_op, '.'); + if (rowkey_cf_qu.size() < 2 || rowkey_cf_qu.size() > 3) { + return -1; + } + + if (rowkey_cf_qu.size() == 3) { + opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], rowkey_cf_qu[2]); + } else { + opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], ""); + } + ++num; } - return 0; + } + return 0; } -static int64_t PutOp(ClientPtr client, const vector& args) { - if (args.size() != 4) { - cout << "Arguments Error: " << args.size() << ", need 4" << endl; - PrintHelp(args[1]); - return -1; - } - - OperatorStructure opst; - size_t op_num = 0; - if (ParseOperatorStructure(args[2], opst, op_num) != 0) { - cout << "Parse Arguments Error" << endl; - PrintHelp(args[1]); +static int64_t OpenTables(ClientPtr client, const OperatorStructure& opst, + unordered_map& tables) { + tables.clear(); + tera::ErrorCode ec; + for (auto& table : opst) { + string tablename = table.first; + if (tables.find(table.first) == tables.end()) { + tables.emplace(table.first, TablePtr(client->OpenTable(table.first, &ec))); + if (!tables[table.first]) { + cout << "open table: " << table.first << " failed" << endl; + cout << ec.ToString() << endl; return -1; + } } + } + return 0; +} - vector val = split(args[3], ':'); - if (op_num != val.size()) { - cout << "op size is not equal to val size" << endl; +static int64_t PutOp(ClientPtr client, const vector& args) { + if (args.size() != 4) { + cout << "Arguments Error: " << args.size() << ", need 4" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + vector val = split(args[3], ':'); + if (op_num != val.size()) { + cout << "op size is not equal to val size" << endl; + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string result; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_; + const string& qu = row_cf_qu.qu_; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + std::cout << reader->GetError().ToString() << std::endl; return -1; - } + } - unordered_map tables; - if (OpenTables(client, opst, tables) != 0) { - return -1; + if (reader->Done()) { + result += ":"; + } else { + result += reader->Value() + ":"; + } } - - TxnPtr g_txn(client->NewGlobalTransaction()); - if (!g_txn) { - cout << "open txn failed" << endl; - return -1; + } + + if (!result.empty()) result.pop_back(); + + auto val_iter = val.begin(); + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + unordered_map row_mutations; + + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_; + const string& qu = row_cf_qu.qu_; + + if (row_mutations.find(rowkey) == row_mutations.end()) { + RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); + row_mutations[rowkey] = row_mutation; + } + row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); } - string result; - for (auto& table : opst) { - const string& tablename = table.first; - const auto& row_cf_qu_list = table.second; - for (auto& row_cf_qu : row_cf_qu_list) { - const string& rowkey = row_cf_qu.rowkey_; - const string& cf = row_cf_qu.cf_ ; - const string& qu = row_cf_qu.qu_ ; - - unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); - reader->AddColumn(cf, qu); - g_txn->Get(reader.get()); - if (reader->GetError().GetType() != tera::ErrorCode::kOK && - reader->GetError().GetType() != tera::ErrorCode::kNotFound) { - std::cout << reader->GetError().ToString() << std::endl; - return -1; - } - - if (reader->Done()) { - result += ":"; - } else { - result += reader->Value() + ":"; - } - } - } - - if (!result.empty()) result.pop_back(); - - auto val_iter = val.begin(); - for (auto& table : opst) { - const string& tablename = table.first; - const auto& row_cf_qu_list = table.second; - unordered_map row_mutations; - - for (auto& row_cf_qu : row_cf_qu_list) { - const string& rowkey = row_cf_qu.rowkey_; - const string& cf = row_cf_qu.cf_ ; - const string& qu = row_cf_qu.qu_ ; - - if (row_mutations.find(rowkey) == row_mutations.end()) { - RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); - row_mutations[rowkey] = row_mutation; - } - row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); - } - - for (auto mutation : row_mutations) { - g_txn->ApplyMutation(mutation.second.get()); - } + for (auto mutation : row_mutations) { + g_txn->ApplyMutation(mutation.second.get()); } + } - - g_txn->Commit(); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << "commit failed: " << g_txn->GetError().ToString() << std::endl; - cout << result << endl; - return -1; - } - std::cout << "commit success" << std::endl; + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "commit failed: " << g_txn->GetError().ToString() << std::endl; + cout << result << endl; + return -1; + } + std::cout << "commit success" << std::endl; - return 0; + return 0; } static int64_t GetOp(ClientPtr client, const vector& args) { - if (args.size() != 3) { - cout << "Arguments Error: " << args.size() << ", need 3" << endl; - PrintHelp(args[1]); + if (args.size() != 3) { + cout << "Arguments Error: " << args.size() << ", need 3" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string result; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_; + const string& qu = row_cf_qu.qu_; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + std::cout << reader->GetError().ToString() << std::endl; return -1; - } - - OperatorStructure opst; - size_t op_num = 0; - if (ParseOperatorStructure(args[2], opst, op_num) != 0) { - cout << "Parse Arguments Error" << endl; - PrintHelp(args[1]); - return -1; - } - - unordered_map tables; - if (OpenTables(client, opst, tables) != 0) { - return -1; - } + } - TxnPtr g_txn(client->NewGlobalTransaction()); - if (!g_txn) { - cout << "open txn failed" << endl; - return -1; + if (reader->Done()) { + result += ":"; + } else { + result += reader->Value() + ":"; + } } + } - string result; - for (auto& table : opst) { - const string& tablename = table.first; - const auto& row_cf_qu_list = table.second; - for (auto& row_cf_qu : row_cf_qu_list) { - const string& rowkey = row_cf_qu.rowkey_; - const string& cf = row_cf_qu.cf_ ; - const string& qu = row_cf_qu.qu_ ; - - unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); - reader->AddColumn(cf, qu); - g_txn->Get(reader.get()); - if (reader->GetError().GetType() != tera::ErrorCode::kOK && - reader->GetError().GetType() != tera::ErrorCode::kNotFound) { - std::cout << reader->GetError().ToString() << std::endl; - return -1; - } - - if (reader->Done()) { - result += ":"; - } else { - result += reader->Value() + ":"; - } - } - } - - if (!result.empty()) result.pop_back(); - cout << result << endl; - return 0; + if (!result.empty()) result.pop_back(); + cout << result << endl; + return 0; } static int64_t CasOp(ClientPtr client, const vector& args) { - if (args.size() != 5) { - cout << "Arguments Error: " << args.size() << ", need 5" << endl; - PrintHelp(args[1]); - return -1; - } - - OperatorStructure opst; - size_t op_num = 0; - if (ParseOperatorStructure(args[2], opst, op_num) != 0) { - cout << "Parse Arguments Error" << endl; - PrintHelp(args[1]); - return -1; - } - - unordered_map tables; - if (OpenTables(client, opst, tables) != 0) { - return -1; - } - - TxnPtr g_txn(client->NewGlobalTransaction()); - if (!g_txn) { - cout << "open txn failed" << endl; + if (args.size() != 5) { + cout << "Arguments Error: " << args.size() << ", need 5" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string cur_val; + const string& old_val = args[3]; + const string& new_val = args[4]; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_; + const string& qu = row_cf_qu.qu_; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; return -1; - } + } - string cur_val; - const string& old_val = args[3]; - const string& new_val = args[4]; - for (auto& table : opst) { - const string& tablename = table.first; - const auto& row_cf_qu_list = table.second; - for (auto& row_cf_qu : row_cf_qu_list) { - const string& rowkey = row_cf_qu.rowkey_; - const string& cf = row_cf_qu.cf_ ; - const string& qu = row_cf_qu.qu_ ; - - unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); - reader->AddColumn(cf, qu); - g_txn->Get(reader.get()); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << g_txn->GetError().ToString() << std::endl; - return -1; - } - - if (reader->Done()) { - cur_val += ":"; - } else { - cur_val += reader->Value() + ":"; - } - } + if (reader->Done()) { + cur_val += ":"; + } else { + cur_val += reader->Value() + ":"; + } } - - if (!cur_val.empty()) cur_val.pop_back(); - - if (old_val != cur_val) { - cout << "cas failed: NotEqual" << endl; - return -1; + } + + if (!cur_val.empty()) cur_val.pop_back(); + + if (old_val != cur_val) { + cout << "cas failed: NotEqual" << endl; + return -1; + } + + vector new_val_list = split(new_val, ':'); + if (op_num != new_val_list.size()) { + cout << "op size is not equal to val size" << endl; + return -1; + } + + auto val_iter = new_val_list.begin(); + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + unordered_map row_mutations; + + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_; + const string& qu = row_cf_qu.qu_; + + if (row_mutations.find(rowkey) == row_mutations.end()) { + RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); + row_mutations[rowkey] = row_mutation; + } + + row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); } - vector new_val_list = split(new_val, ':'); - if (op_num != new_val_list.size()) { - cout << "op size is not equal to val size" << endl; - return -1; + for (auto mutation : row_mutations) { + g_txn->ApplyMutation(mutation.second.get()); } + } - auto val_iter = new_val_list.begin(); - for (auto& table : opst) { - const string& tablename = table.first; - const auto& row_cf_qu_list = table.second; - unordered_map row_mutations; + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << g_txn->GetError().ToString() << std::endl; + return -1; + } else { + std::cout << "cas success" << endl; + } - for (auto& row_cf_qu : row_cf_qu_list) { - const string& rowkey = row_cf_qu.rowkey_; - const string& cf = row_cf_qu.cf_ ; - const string& qu = row_cf_qu.qu_ ; + return 0; +} - if (row_mutations.find(rowkey) == row_mutations.end()) { - RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); - row_mutations[rowkey] = row_mutation; - } +static void SignalHandler(int) { _exit(0); } - row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); - } +int main(int argc, char* argv[]) { + signal(SIGINT, SignalHandler); + signal(SIGTERM, SignalHandler); + ::google::ParseCommandLineFlags(&argc, &argv, true); - for (auto mutation : row_mutations) { - g_txn->ApplyMutation(mutation.second.get()); - } - } + vector args(argv, argv + argc); + InitHelpCommand(); - g_txn->Commit(); - if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << "cas failed: " << g_txn->GetError().ToString() << std::endl; - return -1; + if (args.size() < 2) { + PrintHelp(); + return 0; + } else if (args[1] == "help") { + if (args.size() > 2) { + PrintHelp(args[2]); + return 0; } else { - std::cout << "cas success" << endl; + PrintHelp(); + return 0; } - + } else if (args[1] == "version") { + PrintSystemVersion(); return 0; -} - -static void SignalHandler(int){ - _exit(0); -} - -int main(int argc, char *argv[]) { - signal(SIGINT, SignalHandler); - signal(SIGTERM, SignalHandler); - ::google::ParseCommandLineFlags(&argc, &argv, true); - - vector args(argv, argv + argc); - InitHelpCommand(); - - if (args.size() < 2) { - PrintHelp(); - return 0; - } else if (args[1] == "help") { - if (args.size() > 2) { - PrintHelp(args[2]); - return 0; - } else { - PrintHelp(); - return 0; - } - } else if (args[1] == "version") { - PrintSystemVersion(); - return 0; - } - - unordered_map& args)>> command_table; - command_table["put"] = PutOp; - command_table["get"] = GetOp; - command_table["cas"] = CasOp; - - if (command_table.find(args[1]) == command_table.end()) { - cout << "Wrong Command" << endl; - PrintHelp(); - return -1; - } - - tera::ErrorCode ec; - ClientPtr client(tera::Client::NewClient(FLAGS_flagfile, args[1], &ec)); - if (!client) { - cout << "Create Client Failed: " << ec.ToString() << endl; - return -1; - } - - return command_table[args[1]](client, args); + } + + unordered_map& args)>> + command_table; + command_table["put"] = PutOp; + command_table["get"] = GetOp; + command_table["cas"] = CasOp; + + if (command_table.find(args[1]) == command_table.end()) { + cout << "Wrong Command" << endl; + PrintHelp(); + return -1; + } + + tera::ErrorCode ec; + ClientPtr client(tera::Client::NewClient(FLAGS_flagfile, args[1], &ec)); + if (!client) { + cout << "Create Client Failed: " << ec.ToString() << endl; + return -1; + } + + return command_table[args[1]](client, args); } diff --git a/src/sdk/test/global_txn_internal_test.cc b/src/sdk/test/global_txn_internal_test.cc index 91719018c..86fab6505 100644 --- a/src/sdk/test/global_txn_internal_test.cc +++ b/src/sdk/test/global_txn_internal_test.cc @@ -19,735 +19,724 @@ #include "tera.h" DECLARE_string(tera_coord_type); -DECLARE_int32(tera_sdk_timeout); +DECLARE_int32(tera_sdk_read_timeout); DECLARE_int32(tera_gtxn_all_puts_size_limit); namespace tera { class GlobalTxnInternalTest : public ::testing::Test { -public: - GlobalTxnInternalTest() - : start_ts_(100), thread_pool_(2), gtxn_internal_(std::shared_ptr()) { - gtxn_internal_.SetStartTimestamp(start_ts_); - } - - ~GlobalTxnInternalTest() {} - - std::shared_ptr
OpenTable(const std::string& tablename) { - FLAGS_tera_coord_type = "fake_zk"; - std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); - return table_; - } - - void MakeKvPair(const std::string& row, - const std::string& cf, - const std::string& qu, - int64_t ts, - const std::string& val, - RowResult* value_list) { - - value_list->clear_key_values(); - KeyValuePair* kv = value_list->add_key_values(); - kv->set_key(row); - kv->set_column_family(cf); - kv->set_qualifier(qu); - kv->set_timestamp(ts); - kv->set_value(val); - } - - void SetSchema(Table* table, const TableSchema& table_schema) { - TableImpl* table_impl = static_cast(table); - table_impl->table_schema_ = table_schema; - } - - void BuildResult(RowReaderImpl* reader_impl, - const RowResult& value_list, - RowReader::TRow *row) { - - reader_impl->result_.clear_key_values(); - reader_impl->SetResult(value_list); - row->clear(); - reader_impl->ToMap(row); - } - -private: - int64_t start_ts_; - common::ThreadPool thread_pool_; - GlobalTxnInternal gtxn_internal_; + public: + GlobalTxnInternalTest() + : start_ts_(100), thread_pool_(2), gtxn_internal_(std::shared_ptr()) { + gtxn_internal_.SetStartTimestamp(start_ts_); + } + + ~GlobalTxnInternalTest() {} + + std::shared_ptr
OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "fake_zk"; + std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); + return table_; + } + + void MakeKvPair(const std::string& row, const std::string& cf, const std::string& qu, int64_t ts, + const std::string& val, RowResult* value_list) { + value_list->clear_key_values(); + KeyValuePair* kv = value_list->add_key_values(); + kv->set_key(row); + kv->set_column_family(cf); + kv->set_qualifier(qu); + kv->set_timestamp(ts); + kv->set_value(val); + } + + void SetSchema(Table* table, const TableSchema& table_schema) { + TableImpl* table_impl = static_cast(table); + table_impl->table_schema_ = table_schema; + } + + void BuildResult(RowReaderImpl* reader_impl, const RowResult& value_list, RowReader::TRow* row) { + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + row->clear(); + reader_impl->ToMap(row); + } + + private: + int64_t start_ts_; + common::ThreadPool thread_pool_; + GlobalTxnInternal gtxn_internal_; }; TEST_F(GlobalTxnInternalTest, CheckTable) { - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - std::shared_ptr
t2 = OpenTable("t2"); - std::shared_ptr
t3 = OpenTable("t3"); - std::shared_ptr
t4 = OpenTable("t4"); - EXPECT_FALSE(t1.get() == NULL); - EXPECT_FALSE(t2.get() == NULL); - EXPECT_FALSE(t3.get() == NULL); - EXPECT_FALSE(t4.get() == NULL); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - // table and not exist cf - TableDescriptor desc1("t1"); - desc1.EnableTxn(); - desc1.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd11 = desc1.AddColumnFamily("cf1"); - cfd11->DisableGlobalTransaction(); - TableSchema schema1; - TableDescToSchema(desc1, &schema1); - SetSchema(t2.get(), schema1); - EXPECT_FALSE(gtxn_internal_.CheckTable(t2.get(), &status)); - - // table and exist cf - TableDescriptor desc2("t1"); - desc2.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd12 = desc2.AddColumnFamily("cf1"); - cfd12->EnableGlobalTransaction(); - TableSchema schema2; - TableDescToSchema(desc2, &schema2); - SetSchema(t3.get(), schema2); - EXPECT_FALSE(gtxn_internal_.CheckTable(t3.get(), &status)); - - // table and not exist cf - TableDescriptor desc3("t1"); - desc3.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd13 = desc3.AddColumnFamily("cf1"); - cfd13->DisableGlobalTransaction(); - TableSchema schema3; - TableDescToSchema(desc3, &schema3); - SetSchema(t4.get(), schema3); - EXPECT_FALSE(gtxn_internal_.CheckTable(t4.get(), &status)); - + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + std::shared_ptr
t2 = OpenTable("t2"); + std::shared_ptr
t3 = OpenTable("t3"); + std::shared_ptr
t4 = OpenTable("t4"); + EXPECT_FALSE(t1.get() == NULL); + EXPECT_FALSE(t2.get() == NULL); + EXPECT_FALSE(t3.get() == NULL); + EXPECT_FALSE(t4.get() == NULL); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + // table and not exist cf + TableDescriptor desc1("t1"); + desc1.EnableTxn(); + desc1.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd11 = desc1.AddColumnFamily("cf1"); + cfd11->DisableGlobalTransaction(); + TableSchema schema1; + TableDescToSchema(desc1, &schema1); + SetSchema(t2.get(), schema1); + EXPECT_FALSE(gtxn_internal_.CheckTable(t2.get(), &status)); + + // table and exist cf + TableDescriptor desc2("t1"); + desc2.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd12 = desc2.AddColumnFamily("cf1"); + cfd12->EnableGlobalTransaction(); + TableSchema schema2; + TableDescToSchema(desc2, &schema2); + SetSchema(t3.get(), schema2); + EXPECT_FALSE(gtxn_internal_.CheckTable(t3.get(), &status)); + + // table and not exist cf + TableDescriptor desc3("t1"); + desc3.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd13 = desc3.AddColumnFamily("cf1"); + cfd13->DisableGlobalTransaction(); + TableSchema schema3; + TableDescToSchema(desc3, &schema3); + SetSchema(t4.get(), schema3); + EXPECT_FALSE(gtxn_internal_.CheckTable(t4.get(), &status)); } TEST_F(GlobalTxnInternalTest, IsLockedByOthers) { - std::shared_ptr
t1 = OpenTable("t1"); - - Cell cell1(t1.get(), "row1", "cf1", "qu1", start_ts_, "val"); - - RowReader* reader = t1->NewRowReader("row1"); - RowReaderImpl* reader_impl = (RowReaderImpl*)reader; - RowResult value_list; - // exist lock col && ts < start_ts_ - // 12 < 100 less than start_ts - MakeKvPair("row1", "cf1", PackLockName("qu1"), 12, "", &value_list); - RowReader::TRow row; - BuildResult(reader_impl, value_list, &row); - EXPECT_TRUE(gtxn_internal_.IsLockedByOthers(row, cell1)); - - // not exist lock col - value_list.clear_key_values(); - MakeKvPair("row1", "cf1", "qu1", 120, "", &value_list); - BuildResult(reader_impl, value_list, &row); - EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); - - // exist lock col && ts > start_ts_ - value_list.clear_key_values(); - // 120 > 100 - MakeKvPair("row1", "cf1", PackLockName("qu1"), 120, "", &value_list); - BuildResult(reader_impl, value_list, &row); - - EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); + std::shared_ptr
t1 = OpenTable("t1"); + + Cell cell1(t1.get(), "row1", "cf1", "qu1", start_ts_, "val"); + + RowReader* reader = t1->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)reader; + RowResult value_list; + // exist lock col && ts < start_ts_ + // 12 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackLockName("qu1"), 12, "", &value_list); + RowReader::TRow row; + BuildResult(reader_impl, value_list, &row); + EXPECT_TRUE(gtxn_internal_.IsLockedByOthers(row, cell1)); + + // not exist lock col + value_list.clear_key_values(); + MakeKvPair("row1", "cf1", "qu1", 120, "", &value_list); + BuildResult(reader_impl, value_list, &row); + EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); + + // exist lock col && ts > start_ts_ + value_list.clear_key_values(); + // 120 > 100 + MakeKvPair("row1", "cf1", PackLockName("qu1"), 120, "", &value_list); + BuildResult(reader_impl, value_list, &row); + + EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); } TEST_F(GlobalTxnInternalTest, IsPrimary) { - std::shared_ptr
t1 = OpenTable("t1"); - EXPECT_FALSE(t1.get() == NULL); - Cell cell1(t1.get(), "row1", "cf1", "qu1", start_ts_, "val"); - Cell cell2(t1.get(), "row1", "cf2", "qu1", start_ts_, "val"); - - PrimaryInfo info2; - info2.set_table_name("t1"); - info2.set_row_key("row1"); - info2.set_column_family("cf1"); - info2.set_qualifier("qu1"); - info2.set_gtxn_start_ts(200); - - EXPECT_TRUE(gtxn_internal_.IsPrimary(cell1, info2)); - EXPECT_FALSE(gtxn_internal_.IsPrimary(cell2, info2)); - + std::shared_ptr
t1 = OpenTable("t1"); + EXPECT_FALSE(t1.get() == NULL); + Cell cell1(t1.get(), "row1", "cf1", "qu1", start_ts_, "val"); + Cell cell2(t1.get(), "row1", "cf2", "qu1", start_ts_, "val"); + + PrimaryInfo info2; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(200); + + EXPECT_TRUE(gtxn_internal_.IsPrimary(cell1, info2)); + EXPECT_FALSE(gtxn_internal_.IsPrimary(cell2, info2)); } TEST_F(GlobalTxnInternalTest, FindTable) { - std::shared_ptr
t1 = OpenTable("t1"); - EXPECT_FALSE(t1.get() == NULL); - - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd = desc.AddColumnFamily("cf2"); - cfd->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - // call CheckTable(t1.get()) - ErrorCode status; - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - // t1 in tables_ - Table* t11 = gtxn_internal_.FindTable("t1"); - EXPECT_TRUE(t11->GetName() == t1->GetName()); - + std::shared_ptr
t1 = OpenTable("t1"); + EXPECT_FALSE(t1.get() == NULL); + + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd = desc.AddColumnFamily("cf2"); + cfd->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + // call CheckTable(t1.get()) + ErrorCode status; + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + // t1 in tables_ + Table* t11 = gtxn_internal_.FindTable("t1"); + EXPECT_TRUE(t11->GetName() == t1->GetName()); } TEST_F(GlobalTxnInternalTest, ConflictWithOtherWrite) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("row1"); - RowReaderImpl* reader_impl = (RowReaderImpl*)r; - RowResult value_list; - // 12 < 100 less than start_ts - MakeKvPair("row1", "cf1", "qu1", 12, "", &value_list); - reader_impl->SetResult(value_list); - ErrorCode status; - std::vector ws; - // ws is empty - std::unique_ptr reader(reader_impl); - EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - - // different row writes - for(int i = 0; i < 3; ++i) { - Cell cell(t1.get(), "row2", "cf" + std::to_string(i), - "qu" + std::to_string(i), start_ts_, "val"); - Write w(cell); - ws.push_back(w); - } - EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - - // same row, but not exist target cf - ws.clear(); - for(int i = 0; i < 3; ++i) { - Cell cell(t1.get(), "row1", "cf0", "qu" + std::to_string(i), start_ts_, "val"); - Write w(cell); - ws.push_back(w); - } - EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - - // same row,cf, but not exist write_col, lock_col - ws.clear(); - for(int i = 0; i < 3; ++i) { - Cell cell(t1.get(), "row1", "cf1", "qu" + std::to_string(i), start_ts_, "val"); - Write w(cell); - ws.push_back(w); - } - EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - - // same row, cf && exist write_col(latest_ts >= start_ts_) - value_list.clear_key_values(); - // 120 > 100 - MakeKvPair("row1", "cf1", PackWriteName("qu1"), 120, "", &value_list); - reader_impl->result_.clear_key_values(); - reader_impl->SetResult(value_list); - - EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnWriteConflict); - - // same row, cf && exist write_col(latest_ts < start_ts_) - // not exist lock_col - value_list.clear_key_values(); - // 20 < 100 less than start_ts - MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); - reader_impl->result_.clear_key_values(); - reader_impl->SetResult(value_list); - - EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - - // same row, cf && exist write_col(latest_ts < start_ts_) - // not exist lock_col - value_list.clear_key_values(); - // 20 < 100 less than start_ts - MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); - MakeKvPair("row1", "cf1", PackLockName("qu1"), 20, "", &value_list); - reader_impl->result_.clear_key_values(); - reader_impl->SetResult(value_list); - - EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); - EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnLockConflict); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)r; + RowResult value_list; + // 12 < 100 less than start_ts + MakeKvPair("row1", "cf1", "qu1", 12, "", &value_list); + reader_impl->SetResult(value_list); + ErrorCode status; + std::vector ws; + // ws is empty + std::unique_ptr reader(reader_impl); + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // different row writes + for (int i = 0; i < 3; ++i) { + Cell cell(t1.get(), "row2", "cf" + std::to_string(i), "qu" + std::to_string(i), start_ts_, + "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, but not exist target cf + ws.clear(); + for (int i = 0; i < 3; ++i) { + Cell cell(t1.get(), "row1", "cf0", "qu" + std::to_string(i), start_ts_, "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row,cf, but not exist write_col, lock_col + ws.clear(); + for (int i = 0; i < 3; ++i) { + Cell cell(t1.get(), "row1", "cf1", "qu" + std::to_string(i), start_ts_, "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, cf && exist write_col(latest_ts >= start_ts_) + value_list.clear_key_values(); + // 120 > 100 + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 120, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnWriteConflict); + + // same row, cf && exist write_col(latest_ts < start_ts_) + // not exist lock_col + value_list.clear_key_values(); + // 20 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, cf && exist write_col(latest_ts < start_ts_) + // not exist lock_col + value_list.clear_key_values(); + // 20 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); + MakeKvPair("row1", "cf1", PackLockName("qu1"), 20, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnLockConflict); } TEST_F(GlobalTxnInternalTest, IsGTxnColumnFamily) { - const std::string cf1 = "cf1", cf2 = "cf2"; - - std::shared_ptr
t1 = OpenTable("t1"); - EXPECT_FALSE(t1.get() == NULL); - - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd = desc.AddColumnFamily(cf1); - cfd->DisableGlobalTransaction(); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily(cf2); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - // IsGTxnColumnFamily(t1, xxx) must be call after CheckTable(t1.get()) - EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf1)); - EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf2)); - EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t2", cf1)); - // call CheckTable(t1.get()) - ErrorCode status; - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - // call IsGTxnColumnFamily(t1, xxx) cf1 is gtxn=false - EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf1)); - - // call IsGTxnColumnFamily(t1, xxx) cf2 is gtxn=true - EXPECT_TRUE(gtxn_internal_.IsGTxnColumnFamily("t1", cf2)); - - // call IsGTxnColumnFamily(t2, xxx) - EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t2", cf1)); -} + const std::string cf1 = "cf1", cf2 = "cf2"; + + std::shared_ptr
t1 = OpenTable("t1"); + EXPECT_FALSE(t1.get() == NULL); + + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd = desc.AddColumnFamily(cf1); + cfd->DisableGlobalTransaction(); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily(cf2); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + // IsGTxnColumnFamily(t1, xxx) must be call after CheckTable(t1.get()) + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf1)); + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf2)); + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t2", cf1)); + // call CheckTable(t1.get()) + ErrorCode status; + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + // call IsGTxnColumnFamily(t1, xxx) cf1 is gtxn=false + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t1", cf1)); + + // call IsGTxnColumnFamily(t1, xxx) cf2 is gtxn=true + EXPECT_TRUE(gtxn_internal_.IsGTxnColumnFamily("t1", cf2)); + + // call IsGTxnColumnFamily(t2, xxx) + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily("t2", cf1)); +} TEST_F(GlobalTxnInternalTest, SetInternalSdkTaskTimeout) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* reader = t1->NewRowReader("row1"); - RowReaderImpl* reader_impl = (RowReaderImpl*)reader; - - EXPECT_TRUE(gtxn_internal_.terminal_time_ == 0); - gtxn_internal_.SetCommitDuration(1000); - EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000); - - gtxn_internal_.SetInternalSdkTaskTimeout(reader); - EXPECT_TRUE(reader_impl->TimeOut() == 1000); - - sleep(2); - gtxn_internal_.SetInternalSdkTaskTimeout(reader); - EXPECT_TRUE(reader_impl->TimeOut() == 1); - EXPECT_TRUE(gtxn_internal_.IsTimeOut() == true); - - gtxn_internal_.is_timeout_ = false; - EXPECT_FALSE(gtxn_internal_.terminal_time_ == 0); - gtxn_internal_.SetCommitDuration(1000000); - EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000000); - - gtxn_internal_.SetInternalSdkTaskTimeout(reader); - EXPECT_TRUE(reader_impl->TimeOut() == FLAGS_tera_sdk_timeout); - EXPECT_TRUE(gtxn_internal_.IsTimeOut() == false); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* reader = t1->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)reader; + + EXPECT_TRUE(gtxn_internal_.terminal_time_ == 0); + gtxn_internal_.SetCommitDuration(1000); + EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000); + + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == 1000); + + sleep(2); + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == 1); + EXPECT_TRUE(gtxn_internal_.IsTimeOut() == true); + + gtxn_internal_.is_timeout_ = false; + EXPECT_FALSE(gtxn_internal_.terminal_time_ == 0); + gtxn_internal_.SetCommitDuration(1000000); + EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000000); + + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == FLAGS_tera_sdk_read_timeout); + EXPECT_TRUE(gtxn_internal_.IsTimeOut() == false); } TEST_F(GlobalTxnInternalTest, VerifyWritesSize0) { - std::shared_ptr
t1 = OpenTable("t1"); - RowMutation* mu = t1->NewRowMutation("r1"); - int64_t writes_size = 0; - bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); - EXPECT_TRUE(writes_size == 0); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); - delete mu; + std::shared_ptr
t1 = OpenTable("t1"); + RowMutation* mu = t1->NewRowMutation("r1"); + int64_t writes_size = 0; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + EXPECT_TRUE(writes_size == 0); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyWritesSize1) { - std::shared_ptr
t1 = OpenTable("t1"); - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf0", "qu1", "value", (int64_t)(5)); - mu->Put("cf0", "qu2", "value", (int64_t)(5)); - mu->Put("cf0", "qu3", "value", (int64_t)(5)); - mu->Put("cf0", "qu4", "value", (int64_t)(5)); - mu->DeleteColumns("cf1", "qu5", (int64_t)(5)); - mu->DeleteColumns("cf1", "qu6", (int64_t)(5)); - mu->DeleteColumns("cf1", "qu7", (int64_t)(5)); - - int64_t writes_size = 0; - FLAGS_tera_gtxn_all_puts_size_limit = 10; - bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); - RowMutationImpl* row_mu_impl = static_cast(mu); - EXPECT_TRUE(row_mu_impl->Size() == writes_size); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnDataTooLarge); - delete mu; + std::shared_ptr
t1 = OpenTable("t1"); + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + mu->Put("cf0", "qu2", "value", (int64_t)(5)); + mu->Put("cf0", "qu3", "value", (int64_t)(5)); + mu->Put("cf0", "qu4", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu5", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu6", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu7", (int64_t)(5)); + + int64_t writes_size = 0; + FLAGS_tera_gtxn_all_puts_size_limit = 10; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + RowMutationImpl* row_mu_impl = static_cast(mu); + EXPECT_TRUE(row_mu_impl->Size() == writes_size); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnDataTooLarge); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyWritesSize2) { - std::shared_ptr
t1 = OpenTable("t1"); - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf0", "qu1", "value", (int64_t)(5)); - - int64_t writes_size = 0; - FLAGS_tera_gtxn_all_puts_size_limit = 100000; - bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); - RowMutationImpl* row_mu_impl = static_cast(mu); - EXPECT_TRUE(row_mu_impl->Size() == writes_size); - EXPECT_TRUE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); - delete mu; + std::shared_ptr
t1 = OpenTable("t1"); + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + + int64_t writes_size = 0; + FLAGS_tera_gtxn_all_puts_size_limit = 100000; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + RowMutationImpl* row_mu_impl = static_cast(mu); + EXPECT_TRUE(row_mu_impl->Size() == writes_size); + EXPECT_TRUE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); + delete mu; } TEST_F(GlobalTxnInternalTest, BadQualifier) { - bool ret = BadQualifier(""); - EXPECT_FALSE(ret); - ret = BadQualifier("aaaaaaaaaaaaaaa"); - EXPECT_FALSE(ret); - ret = BadQualifier("!*_"); - EXPECT_TRUE(ret); - ret = BadQualifier("!!!!!!!*_"); - EXPECT_TRUE(ret); - ret = BadQualifier("!!!!!"); - EXPECT_TRUE(ret); - ret = BadQualifier("A!"); - EXPECT_FALSE(ret); + bool ret = BadQualifier(""); + EXPECT_FALSE(ret); + ret = BadQualifier("aaaaaaaaaaaaaaa"); + EXPECT_FALSE(ret); + ret = BadQualifier("!*_"); + EXPECT_TRUE(ret); + ret = BadQualifier("!!!!!!!*_"); + EXPECT_TRUE(ret); + ret = BadQualifier("!!!!!"); + EXPECT_TRUE(ret); + ret = BadQualifier("A!"); + EXPECT_FALSE(ret); } TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation0) { - std::shared_ptr
t1 = OpenTable("t1"); - RowMutation* mu = t1->NewRowMutation("r1"); - bool ret = gtxn_internal_.VerifyUserRowMutation(mu); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); - delete mu; + std::shared_ptr
t1 = OpenTable("t1"); + RowMutation* mu = t1->NewRowMutation("r1"); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation1) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf1", "qu1", "value", (int64_t)(5)); - mu->Put("cf1", "!Nqu1", "value", (int64_t)(5)); - mu->Put("cf1", "qu2", "value", (int64_t)(5)); - bool ret = gtxn_internal_.VerifyUserRowMutation(mu); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); - delete mu; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->Put("cf1", "!Nqu1", "value", (int64_t)(5)); + mu->Put("cf1", "qu2", "value", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation2) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf0", "qu1", "value", (int64_t)(5)); - mu->Put("cf1", "qu1_N_", "value", (int64_t)(5)); - mu->Put("cf1", "qu2", "value", (int64_t)(5)); - bool ret = gtxn_internal_.VerifyUserRowMutation(mu); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); - delete mu; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + mu->Put("cf1", "qu1_N_", "value", (int64_t)(5)); + mu->Put("cf1", "qu2", "value", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation3) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf1", "qu1", "value", (int64_t)(5)); - mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); - mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); - mu->DeleteFamily("cf1", (int64_t)(5)); - bool ret = gtxn_internal_.VerifyUserRowMutation(mu); - EXPECT_FALSE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnNotSupport); - delete mu; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); + mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); + mu->DeleteFamily("cf1", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnNotSupport); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation4) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowMutation* mu = t1->NewRowMutation("r1"); - mu->Put("cf1", "qu1", "value", (int64_t)(5)); - mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); - mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); - bool ret = gtxn_internal_.VerifyUserRowMutation(mu); - EXPECT_TRUE(ret); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); - delete mu; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); + mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_TRUE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); + delete mu; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader0) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - delete r; + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader1) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - //cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_FALSE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumn("cf1", "qu"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == status.GetType()); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + // cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_FALSE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == status.GetType()); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader2) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumn("cf1", "qu"); - r->SetSnapshot(10); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + r->SetSnapshot(10); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader3) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumnFamily("cf1"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumnFamily("cf1"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader4) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumn("cf0", "qu"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf0", "qu"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader5) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumn("cf1", "!qu"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_FALSE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "!qu"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete r; } TEST_F(GlobalTxnInternalTest, VerifyUserRowReader6) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - r->AddColumn("cf1", "qu"); - r->AddColumn("cf1", "q1"); - r->AddColumn("cf1", "q2"); - bool ret = gtxn_internal_.VerifyUserRowReader(r); - EXPECT_TRUE(ret); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kOK); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + r->AddColumn("cf1", "q1"); + r->AddColumn("cf1", "q2"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_TRUE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kOK); + delete r; } TEST_F(GlobalTxnInternalTest, PrimaryIsLocked1) { - // bad case b. read primary lock failed - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - tera::PrimaryInfo info2; - std::string info2_str; - info2.set_table_name("t1"); - info2.set_row_key("row1"); - info2.set_column_family("cf1"); - info2.set_qualifier("qu1"); - info2.set_gtxn_start_ts(100); - info2.SerializeToString(&info2_str); - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - ErrorCode mock_status; - mock_status.SetFailed(ErrorCode::kSystem,""); - std::vector reader_errs; - reader_errs.push_back(mock_status); - (static_cast(t1.get()))->AddReaderErrors(reader_errs); - - EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); - EXPECT_TRUE(status.GetType() == ErrorCode::kSystem); + // bad case b. read primary lock failed + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + tera::PrimaryInfo info2; + std::string info2_str; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(100); + info2.SerializeToString(&info2_str); + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kSystem, ""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1.get()))->AddReaderErrors(reader_errs); + + EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kSystem); } TEST_F(GlobalTxnInternalTest, PrimaryIsLocked2) { - // bad case a. read primary lock notfound - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - tera::PrimaryInfo info2; - std::string info2_str; - info2.set_table_name("t1"); - info2.set_row_key("row1"); - info2.set_column_family("cf1"); - info2.set_qualifier("qu1"); - info2.set_gtxn_start_ts(100); - info2.SerializeToString(&info2_str); - EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); - - ErrorCode mock_status; - mock_status.SetFailed(ErrorCode::kNotFound,""); - std::vector reader_errs; - reader_errs.push_back(mock_status); - (static_cast(t1.get()))->AddReaderErrors(reader_errs); - - EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); + // bad case a. read primary lock notfound + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + tera::PrimaryInfo info2; + std::string info2_str; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(100); + info2.SerializeToString(&info2_str); + EXPECT_TRUE(gtxn_internal_.CheckTable(t1.get(), &status)); + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound, ""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1.get()))->AddReaderErrors(reader_errs); + + EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); } -} // namespace tera +} // namespace tera diff --git a/src/sdk/test/global_txn_test.cc b/src/sdk/test/global_txn_test.cc index 606813eeb..4d3ac0a43 100644 --- a/src/sdk/test/global_txn_test.cc +++ b/src/sdk/test/global_txn_test.cc @@ -28,1238 +28,1237 @@ DECLARE_string(tera_coord_type); namespace tera { class GlobalTxnTest : public ::testing::Test { -public: - GlobalTxnTest() : - thread_pool_(2), - gtxn_(std::shared_ptr(), &thread_pool_, (new sdk::MockTimeoracleClusterFinder(""))) { - gtxn_.status_.SetFailed(ErrorCode::kOK); - gtxn_.status_returned_ = false; - } - - ~GlobalTxnTest() {} - - void SetSchema(Table* table, const TableSchema& table_schema) { - TableImpl* table_impl = static_cast(table); - table_impl->table_schema_ = table_schema; - } - - std::shared_ptr
OpenTable(const std::string& tablename) { - FLAGS_tera_coord_type = "fake_zk"; - std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); - return table_; - } - -private: - common::ThreadPool thread_pool_; - GlobalTxn gtxn_; - //std::vector> table_vec; + public: + GlobalTxnTest() + : thread_pool_(2), + gtxn_(std::shared_ptr(), &thread_pool_, + (new sdk::MockTimeoracleClusterFinder(""))) { + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.status_returned_ = false; + } + + ~GlobalTxnTest() {} + + void SetSchema(Table* table, const TableSchema& table_schema) { + TableImpl* table_impl = static_cast(table); + table_impl->table_schema_ = table_schema; + } + + std::shared_ptr
OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "fake_zk"; + std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); + return table_; + } + + private: + common::ThreadPool thread_pool_; + GlobalTxn gtxn_; + // std::vector> table_vec; }; TEST_F(GlobalTxnTest, Commit) { - - // sync commit ut - gtxn_.user_commit_callback_ = NULL; - // mutation haven't apply - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - gtxn_.put_fail_cnt_.Set(10); - gtxn_.has_commited_ = false; - EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.has_commited_ == false); - - // have commited - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - gtxn_.put_fail_cnt_.Set(0); - gtxn_.has_commited_ = true; - EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.has_commited_ == true); - - // run commit in the legal state - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - gtxn_.writes_.clear(); - gtxn_.put_fail_cnt_.Set(0); - gtxn_.has_commited_ = false; - EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kOK); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.has_commited_ == true); + // sync commit ut + gtxn_.user_commit_callback_ = NULL; + // mutation haven't apply + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.put_fail_cnt_.Set(10); + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == false); + + // have commited + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.put_fail_cnt_.Set(0); + gtxn_.has_commited_ = true; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == true); + + // run commit in the legal state + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.writes_.clear(); + gtxn_.put_fail_cnt_.Set(0); + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == true); } TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback) { - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); - reader_impl->txn_ = txn; - - // not found primary - reader_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); - PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); - reader_impl->SetContext(ctx); - - RowReader* reader = static_cast(reader_impl); - gtxn_.DoVerifyPrimaryLockedCallback(reader); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + + // not found primary + reader_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); + PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); + reader_impl->SetContext(ctx); + + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); } TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback1) { - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); - reader_impl->txn_ = txn; - - PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); - reader_impl->SetContext(ctx); - // reader timeout - reader_impl->error_code_.SetFailed(ErrorCode::kTimeout, ""); - RowReader* reader = static_cast(reader_impl); - gtxn_.DoVerifyPrimaryLockedCallback(reader); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + + PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); + reader_impl->SetContext(ctx); + // reader timeout + reader_impl->error_code_.SetFailed(ErrorCode::kTimeout, ""); + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); } TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback2) { - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); - reader_impl->txn_ = txn; - - PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); - reader_impl->SetContext(ctx); - // reader other error - reader_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); - RowReader* reader = static_cast(reader_impl); - gtxn_.DoVerifyPrimaryLockedCallback(reader); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + + PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); + reader_impl->SetContext(ctx); + // reader other error + reader_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); } TEST_F(GlobalTxnTest, CheckPrimaryStatusAndCommmitSecondaries) { - std::shared_ptr
t1 = OpenTable("t1"); - SingleRowTxn* txn = new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); - PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); - txn->SetContext(ctx); - - // primary commit timeout - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - txn->mutation_buffer_.SetError(ErrorCode::kTimeout,""); - gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); - - // primary commit other error - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - txn = new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); - ctx = new PrimaryTxnContext(>xn_, txn); - txn->SetContext(ctx); - txn->mutation_buffer_.SetError(ErrorCode::kSystem, ""); - gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); - - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); - - // primary done run next step - gtxn_.finish_ = false; - gtxn_.status_returned_ = false; - txn = new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); - ctx = new PrimaryTxnContext(>xn_, txn); - txn->SetContext(ctx); - txn->mutation_buffer_.SetError(ErrorCode::kOK, ""); - gtxn_.writes_.clear(); - const std::string tablename = "test_t"; - std::shared_ptr
t = OpenTable(tablename); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - // insert a 'Write' - gtxn_.SaveWrite(tablename, "r1", w); - - gtxn_.acks_.clear(); - gtxn_.notifies_.clear(); - gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); - - EXPECT_TRUE(gtxn_.finish_ == true); - EXPECT_TRUE(gtxn_.status_returned_ == true); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + std::shared_ptr
t1 = OpenTable("t1"); + SingleRowTxn* txn = + new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); + PrimaryTxnContext* ctx = new PrimaryTxnContext(>xn_, txn); + txn->SetContext(ctx); + + // primary commit timeout + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn->mutation_buffer_.SetError(ErrorCode::kTimeout, ""); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); + + // primary commit other error + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn = new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); + ctx = new PrimaryTxnContext(>xn_, txn); + txn->SetContext(ctx); + txn->mutation_buffer_.SetError(ErrorCode::kSystem, ""); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); + + // primary done run next step + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn = new SingleRowTxn(static_cast(t1.get())->GetTableImpl(), "rowkey", NULL); + ctx = new PrimaryTxnContext(>xn_, txn); + txn->SetContext(ctx); + txn->mutation_buffer_.SetError(ErrorCode::kOK, ""); + gtxn_.writes_.clear(); + const std::string tablename = "test_t"; + std::shared_ptr
t = OpenTable(tablename); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + // insert a 'Write' + gtxn_.SaveWrite(tablename, "r1", w); + + gtxn_.acks_.clear(); + gtxn_.notifies_.clear(); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); } TEST_F(GlobalTxnTest, SaveWrite) { - const std::string tablename = "test_t", tablename4 = "test_t4"; - std::shared_ptr
t = OpenTable(tablename); - const std::string row_key = "r1", row_key4 = "r2"; - Cell cell(t.get(), row_key, "cf", "qu", 1, "val"); - Write w(cell); - gtxn_.writes_.clear(); - // insert a 'Write' - gtxn_.SaveWrite(tablename, row_key, w); - GlobalTxn::TableWithRowkey twr(tablename, row_key); - auto w1 = gtxn_.writes_.find(twr); - EXPECT_TRUE(w1 != gtxn_.writes_.end()); - - // insert a same 'Write' - gtxn_.SaveWrite(tablename, row_key, w); - EXPECT_TRUE(gtxn_.writes_.size() == 1); - - // insert a delete type 'Write' at same Cell - Cell cell2(t.get(), row_key, "cf", "qu", 1); - Write w2(cell2); - gtxn_.SaveWrite(tablename, row_key, w2); - EXPECT_TRUE(gtxn_.writes_.size() == 1); + const std::string tablename = "test_t", tablename4 = "test_t4"; + std::shared_ptr
t = OpenTable(tablename); + const std::string row_key = "r1", row_key4 = "r2"; + Cell cell(t.get(), row_key, "cf", "qu", 1, "val"); + Write w(cell); + gtxn_.writes_.clear(); + // insert a 'Write' + gtxn_.SaveWrite(tablename, row_key, w); + GlobalTxn::TableWithRowkey twr(tablename, row_key); + auto w1 = gtxn_.writes_.find(twr); + EXPECT_TRUE(w1 != gtxn_.writes_.end()); + + // insert a same 'Write' + gtxn_.SaveWrite(tablename, row_key, w); + EXPECT_TRUE(gtxn_.writes_.size() == 1); + + // insert a delete type 'Write' at same Cell + Cell cell2(t.get(), row_key, "cf", "qu", 1); + Write w2(cell2); + gtxn_.SaveWrite(tablename, row_key, w2); + EXPECT_TRUE(gtxn_.writes_.size() == 1); } TEST_F(GlobalTxnTest, DoAckCallback) { - const std::string tablename = "test_t1", tablename5 = "test_t5"; - std::shared_ptr
t1 = OpenTable(tablename); - std::shared_ptr
t5 = OpenTable(tablename5); - - // test acks cnt = 2 && not notify - RowMutation* mu1 = t1->NewRowMutation("r1"); - RowMutation* mu5 = t5->NewRowMutation("r1"); - gtxn_.finish_ = false; - gtxn_.ack_done_cnt_.Set(0); - gtxn_.acks_cnt_.Set(2); - gtxn_.notifies_cnt_.Set(0); - gtxn_.DoAckCallback(mu1); - EXPECT_TRUE(gtxn_.finish_ == false); - gtxn_.DoAckCallback(mu5); - EXPECT_TRUE(gtxn_.finish_ == true); - - // test acks cnt = 2 && notify cnt > 0 - RowMutation* mu11 = t1->NewRowMutation("r1"); - RowMutation* mu55 = t5->NewRowMutation("r1"); - gtxn_.finish_ = false; - gtxn_.ack_done_cnt_.Set(0); - gtxn_.acks_cnt_.Set(2); - gtxn_.notifies_cnt_.Set(1); - - gtxn_.DoAckCallback(mu11); - EXPECT_TRUE(gtxn_.finish_ == false); - gtxn_.DoAckCallback(mu55); - EXPECT_TRUE(gtxn_.finish_ == false); + const std::string tablename = "test_t1", tablename5 = "test_t5"; + std::shared_ptr
t1 = OpenTable(tablename); + std::shared_ptr
t5 = OpenTable(tablename5); + + // test acks cnt = 2 && not notify + RowMutation* mu1 = t1->NewRowMutation("r1"); + RowMutation* mu5 = t5->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.ack_done_cnt_.Set(0); + gtxn_.acks_cnt_.Set(2); + gtxn_.notifies_cnt_.Set(0); + gtxn_.DoAckCallback(mu1); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoAckCallback(mu5); + EXPECT_TRUE(gtxn_.finish_ == true); + + // test acks cnt = 2 && notify cnt > 0 + RowMutation* mu11 = t1->NewRowMutation("r1"); + RowMutation* mu55 = t5->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.ack_done_cnt_.Set(0); + gtxn_.acks_cnt_.Set(2); + gtxn_.notifies_cnt_.Set(1); + + gtxn_.DoAckCallback(mu11); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoAckCallback(mu55); + EXPECT_TRUE(gtxn_.finish_ == false); } TEST_F(GlobalTxnTest, DoNotifyCallback) { - const std::string tablename = "test_t11", tablename5 = "test_t55"; - std::shared_ptr
t11 = OpenTable(tablename); - std::shared_ptr
t55 = OpenTable(tablename5); - - // test notifies cnt = 2 - RowMutation* mu1 = t11->NewRowMutation("r1"); - RowMutation* mu5 = t55->NewRowMutation("r1"); - gtxn_.finish_ = false; - gtxn_.notify_done_cnt_.Set(0); - gtxn_.notifies_cnt_.Set(2); - gtxn_.all_task_pushed_ = true; - gtxn_.DoNotifyCallback(mu1); - EXPECT_TRUE(gtxn_.finish_ == false); - gtxn_.DoNotifyCallback(mu5); - EXPECT_TRUE(gtxn_.finish_ == true); + const std::string tablename = "test_t11", tablename5 = "test_t55"; + std::shared_ptr
t11 = OpenTable(tablename); + std::shared_ptr
t55 = OpenTable(tablename5); + + // test notifies cnt = 2 + RowMutation* mu1 = t11->NewRowMutation("r1"); + RowMutation* mu5 = t55->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.notify_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(2); + gtxn_.all_task_pushed_ = true; + gtxn_.DoNotifyCallback(mu1); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoNotifyCallback(mu5); + EXPECT_TRUE(gtxn_.finish_ == true); } -void NotifyWarpper(GlobalTxn* gtxn, - Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - gtxn->Notify(t, row_key, column_family, qualifier); +void NotifyWarpper(GlobalTxn* gtxn, Table* t, const std::string& row_key, + const std::string& column_family, const std::string& qualifier) { + gtxn->Notify(t, row_key, column_family, qualifier); } TEST_F(GlobalTxnTest, Notify) { - size_t notify_thread_cnt = 30; - std::vector threads; - // all Table* is NULL - gtxn_.notifies_.clear(); - gtxn_.notifies_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); - threads.reserve(notify_thread_cnt); - Table* t0 = NULL; - for (int i = 0; i < notify_thread_cnt; ++i) { - threads.emplace_back(std::thread(NotifyWarpper, >xn_, t0, "", "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(0 == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); - - // same table and same row - gtxn_.notifies_.clear(); - gtxn_.notifies_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); - std::shared_ptr
t1 = OpenTable("t1"); - threads.reserve(30); - for (int i = 0; i < notify_thread_cnt; ++i) { - threads.emplace_back(std::thread(NotifyWarpper, >xn_, t1.get(), "r1", "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(1 == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 1); - GlobalTxn::TableWithRowkey twr("t1", "r1"); - EXPECT_TRUE(gtxn_.notifies_[twr].size() == notify_thread_cnt); - - // same table and diff row - gtxn_.notifies_.clear(); - gtxn_.notifies_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); - for (int i = 0; i < notify_thread_cnt; ++i) { - threads.emplace_back(std::thread(NotifyWarpper, >xn_, t1.get(), "r" + std::to_string(i), "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(notify_thread_cnt == gtxn_.notifies_.size()); - EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == notify_thread_cnt); - - for (int i = 0; i < notify_thread_cnt; ++i) { - GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); - EXPECT_TRUE(gtxn_.notifies_[twr1].size() == 1); - } + size_t notify_thread_cnt = 30; + std::vector threads; + // all Table* is NULL + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + threads.reserve(notify_thread_cnt); + Table* t0 = NULL; + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back(std::thread(NotifyWarpper, >xn_, t0, "", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + + // same table and same row + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + std::shared_ptr
t1 = OpenTable("t1"); + threads.reserve(30); + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back(std::thread(NotifyWarpper, >xn_, t1.get(), "r1", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(1 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 1); + GlobalTxn::TableWithRowkey twr("t1", "r1"); + EXPECT_TRUE(gtxn_.notifies_[twr].size() == notify_thread_cnt); + + // same table and diff row + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back( + std::thread(NotifyWarpper, >xn_, t1.get(), "r" + std::to_string(i), "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(notify_thread_cnt == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == notify_thread_cnt); + + for (int i = 0; i < notify_thread_cnt; ++i) { + GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); + EXPECT_TRUE(gtxn_.notifies_[twr1].size() == 1); + } } -void AckWarpper(GlobalTxn* gtxn, Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - gtxn->Ack(t, row_key, column_family, qualifier); +void AckWarpper(GlobalTxn* gtxn, Table* t, const std::string& row_key, + const std::string& column_family, const std::string& qualifier) { + gtxn->Ack(t, row_key, column_family, qualifier); } TEST_F(GlobalTxnTest, Ack) { - size_t ack_thread_cnt = 30; - std::vector threads; - // all Table* is NULL - gtxn_.acks_.clear(); - gtxn_.acks_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); - threads.reserve(ack_thread_cnt); - Table* t0 = NULL; - for (int i = 0; i < ack_thread_cnt; ++i) { - threads.emplace_back(std::thread(AckWarpper, >xn_, t0, "", "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(0 == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); - - // same table and same row - gtxn_.acks_.clear(); - gtxn_.acks_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); - std::shared_ptr
t1 = OpenTable("t1"); - threads.reserve(30); - for (int i = 0; i < ack_thread_cnt; ++i) { - threads.emplace_back(std::thread(AckWarpper, >xn_, t1.get(), "r1", "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(1 == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 1); - GlobalTxn::TableWithRowkey twr("t1", "r1"); - EXPECT_TRUE(gtxn_.acks_[twr].size() == ack_thread_cnt); - - // same table and diff row - gtxn_.acks_.clear(); - gtxn_.acks_cnt_.Set(0); - EXPECT_TRUE(0 == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); - for (int i = 0; i < ack_thread_cnt; ++i) { - threads.emplace_back(std::thread(AckWarpper, >xn_, t1.get(), "r" + std::to_string(i), "", "")); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(ack_thread_cnt == gtxn_.acks_.size()); - EXPECT_TRUE(gtxn_.acks_cnt_.Get() == ack_thread_cnt); - - for (int i = 0; i < ack_thread_cnt; ++i) { - GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); - EXPECT_TRUE(gtxn_.acks_[twr1].size() == 1); - } + size_t ack_thread_cnt = 30; + std::vector threads; + // all Table* is NULL + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + threads.reserve(ack_thread_cnt); + Table* t0 = NULL; + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back(std::thread(AckWarpper, >xn_, t0, "", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + + // same table and same row + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + std::shared_ptr
t1 = OpenTable("t1"); + threads.reserve(30); + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back(std::thread(AckWarpper, >xn_, t1.get(), "r1", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(1 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 1); + GlobalTxn::TableWithRowkey twr("t1", "r1"); + EXPECT_TRUE(gtxn_.acks_[twr].size() == ack_thread_cnt); + + // same table and diff row + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back( + std::thread(AckWarpper, >xn_, t1.get(), "r" + std::to_string(i), "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(ack_thread_cnt == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == ack_thread_cnt); + + for (int i = 0; i < ack_thread_cnt; ++i) { + GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); + EXPECT_TRUE(gtxn_.acks_[twr1].size() == 1); + } } TEST_F(GlobalTxnTest, DoCommitSecondariesCallback0) { - // mutation error is kOK will finish - std::vector threads; - size_t secondaries_thread_cnt = 10; - gtxn_.all_task_pushed_ = true; - gtxn_.status_.SetFailed(ErrorCode::kOK); - gtxn_.acks_cnt_.Set(0); - gtxn_.ack_done_cnt_.Set(0); - gtxn_.notifies_cnt_.Set(0); - gtxn_.notify_done_cnt_.Set(0); - gtxn_.writes_cnt_.Set(secondaries_thread_cnt); - for (int i = 0; i < secondaries_thread_cnt; ++i) { - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); - RowMutation* mu = static_cast(mu_impl); - auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); - threads.emplace_back(std::thread(func)); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(gtxn_.finish_ == true); + // mutation error is kOK will finish + std::vector threads; + size_t secondaries_thread_cnt = 10; + gtxn_.all_task_pushed_ = true; + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.finish_ == true); } TEST_F(GlobalTxnTest, DoCommitSecondariesCallback1) { - // mutation error is kOK not last one - size_t secondaries_thread_cnt = 50; - std::vector threads; - threads.reserve(secondaries_thread_cnt); - gtxn_.status_.SetFailed(ErrorCode::kOK); - gtxn_.acks_cnt_.Set(0); - gtxn_.ack_done_cnt_.Set(0); - gtxn_.notifies_cnt_.Set(0); - gtxn_.notify_done_cnt_.Set(0); - gtxn_.writes_cnt_.Set(secondaries_thread_cnt + 1); - for (int i = 0; i < secondaries_thread_cnt; ++i) { - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); - RowMutation* mu = static_cast(mu_impl); - auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); - threads.emplace_back(std::thread(func)); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(gtxn_.finish_ == false); + // mutation error is kOK not last one + size_t secondaries_thread_cnt = 50; + std::vector threads; + threads.reserve(secondaries_thread_cnt); + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt + 1); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.finish_ == false); } TEST_F(GlobalTxnTest, DoCommitSecondariesCallback2) { - // mutation error is not kOK but status_ is not changed - size_t secondaries_thread_cnt = 10; - std::vector threads; - threads.reserve(secondaries_thread_cnt); - gtxn_.all_task_pushed_ = true; - gtxn_.status_.SetFailed(ErrorCode::kOK); - gtxn_.acks_cnt_.Set(0); - gtxn_.ack_done_cnt_.Set(0); - gtxn_.notifies_cnt_.Set(0); - gtxn_.notify_done_cnt_.Set(0); - gtxn_.writes_cnt_.Set(secondaries_thread_cnt); - for (int i = 0; i < secondaries_thread_cnt; ++i) { - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - mu_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); - RowMutation* mu = static_cast(mu_impl); - auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); - threads.emplace_back(std::thread(func)); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); - EXPECT_TRUE(gtxn_.finish_ == true); + // mutation error is not kOK but status_ is not changed + size_t secondaries_thread_cnt = 10; + std::vector threads; + threads.reserve(secondaries_thread_cnt); + gtxn_.all_task_pushed_ = true; + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == true); } TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback3) { - // mutation error is not kOK but status_ is not changed - size_t secondaries_thread_cnt = 30; - std::vector threads; - - threads.reserve(secondaries_thread_cnt); - gtxn_.status_.SetFailed(ErrorCode::kOK); - gtxn_.acks_cnt_.Set(10); - gtxn_.ack_done_cnt_.Set(9); - gtxn_.notifies_cnt_.Set(10); - gtxn_.notify_done_cnt_.Set(10); - gtxn_.writes_cnt_.Set(secondaries_thread_cnt); - for (int i = 0; i < secondaries_thread_cnt; ++i) { - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); - RowMutation* mu = static_cast(mu_impl); - auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); - threads.emplace_back(std::thread(func)); - } - for (int i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); - EXPECT_TRUE(gtxn_.finish_ == false); - + // mutation error is not kOK but status_ is not changed + size_t secondaries_thread_cnt = 30; + std::vector threads; + + threads.reserve(secondaries_thread_cnt); + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(10); + gtxn_.ack_done_cnt_.Set(9); + gtxn_.notifies_cnt_.Set(10); + gtxn_.notify_done_cnt_.Set(10); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == false); } std::atomic g_callback_run_cnt(0); static void EmptyMutationCallback(RowMutation* mu) { - LOG(INFO) << "run empty mutation callback"; - ++g_callback_run_cnt; -} + LOG(INFO) << "run empty mutation callback"; + ++g_callback_run_cnt; +} // has_commited == true && status_returned_ == false && set mutation callback TEST_F(GlobalTxnTest, ApplyMutation0) { - g_callback_run_cnt = 0; - gtxn_.has_commited_ = true; - gtxn_.status_returned_ = false; - - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - RowMutation* mu = static_cast(mu_impl); - mu->SetCallBack(EmptyMutationCallback); - gtxn_.ApplyMutation(mu); - thread_pool_.Stop(true); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.status_returned_ == true); - EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(g_callback_run_cnt == 1); + g_callback_run_cnt = 0; + gtxn_.has_commited_ = true; + gtxn_.status_returned_ = false; + + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + RowMutation* mu = static_cast(mu_impl); + mu->SetCallBack(EmptyMutationCallback); + gtxn_.ApplyMutation(mu); + thread_pool_.Stop(true); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(g_callback_run_cnt == 1); } -// has_commited == true && status_returned_ == false && don't set mutation callback +// has_commited == true && status_returned_ == false && don't set mutation +// callback TEST_F(GlobalTxnTest, ApplyMutation1) { - g_callback_run_cnt = 0; - gtxn_.has_commited_ = true; - gtxn_.status_returned_ = false; - - RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); - RowMutation* mu = static_cast(mu_impl); - gtxn_.ApplyMutation(mu); - thread_pool_.Stop(true); - EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(gtxn_.status_returned_ == true); - EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(g_callback_run_cnt == 0); + g_callback_run_cnt = 0; + gtxn_.has_commited_ = true; + gtxn_.status_returned_ = false; + + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + RowMutation* mu = static_cast(mu_impl); + gtxn_.ApplyMutation(mu); + thread_pool_.Stop(true); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(g_callback_run_cnt == 0); } TEST_F(GlobalTxnTest, SetReaderStatusAndRunCallback0) { - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - ErrorCode status; - status.SetFailed(ErrorCode::kSystem, ""); - gtxn_.SetReaderStatusAndRunCallback(reader_impl,&status); - RowReader* r = static_cast(reader_impl); - thread_pool_.Stop(true); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); - EXPECT_TRUE(r->IsFinished()); + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.SetReaderStatusAndRunCallback(reader_impl, &status); + RowReader* r = static_cast(reader_impl); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); + EXPECT_TRUE(r->IsFinished()); } TEST_F(GlobalTxnTest, SetReaderStatusAndRunCallback1) { - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - reader_impl->SetCallBack([](RowReader* r) { - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); - delete r; - }); - ErrorCode status; - status.SetFailed(ErrorCode::kSystem, ""); - gtxn_.SetReaderStatusAndRunCallback(reader_impl,&status); - thread_pool_.Stop(true); + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + reader_impl->SetCallBack([](RowReader* r) { + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); + delete r; + }); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.SetReaderStatusAndRunCallback(reader_impl, &status); + thread_pool_.Stop(true); } TEST_F(GlobalTxnTest, Get0) { - gtxn_.has_commited_ = true; - RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); - RowReader* r = static_cast(reader_impl); - EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kGTxnOpAfterCommit); - thread_pool_.Stop(true); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); - EXPECT_TRUE(r->IsFinished()); - delete r; + gtxn_.has_commited_ = true; + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + RowReader* r = static_cast(reader_impl); + EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kGTxnOpAfterCommit); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(r->IsFinished()); + delete r; } TEST_F(GlobalTxnTest, Get1) { - // set a table to tables_ - ErrorCode status; - std::shared_ptr
t1 = OpenTable("t1"); - // table and exist cf - TableDescriptor desc("t1"); - desc.EnableTxn(); - desc.AddLocalityGroup("lg0"); - ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); - cfd1->EnableGlobalTransaction(); - - TableSchema schema; - TableDescToSchema(desc, &schema); - SetSchema(t1.get(), schema); - - EXPECT_TRUE(gtxn_.gtxn_internal_->CheckTable(t1.get(), &status)); - - RowReader* r = t1->NewRowReader("r1"); - bool ret = gtxn_.gtxn_internal_->VerifyUserRowReader(r); - EXPECT_FALSE(ret); - - gtxn_.has_commited_ = false; - EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kBadParam); - thread_pool_.Stop(true); - EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); - EXPECT_TRUE(r->IsFinished()); - delete r; + // set a table to tables_ + ErrorCode status; + std::shared_ptr
t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1.get(), schema); + + EXPECT_TRUE(gtxn_.gtxn_internal_->CheckTable(t1.get(), &status)); + + RowReader* r = t1->NewRowReader("r1"); + bool ret = gtxn_.gtxn_internal_->VerifyUserRowReader(r); + EXPECT_FALSE(ret); + + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kBadParam); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + EXPECT_TRUE(r->IsFinished()); + delete r; } TEST_F(GlobalTxnTest, DoGetCellReaderCallback0) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - RowReaderImpl* inter_r_impl = static_cast(inter_r); - inter_r_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); - gtxn_.DoGetCellReaderCallback(inter_r); - EXPECT_TRUE(ctx->not_found_cnt == 1); - EXPECT_TRUE(ctx->fail_cell_cnt == 0); - EXPECT_TRUE(ctx->active_cell_cnt == 1); - thread_pool_.Stop(true); - EXPECT_FALSE(r_impl->IsFinished()); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->not_found_cnt == 1); + EXPECT_TRUE(ctx->fail_cell_cnt == 0); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, DoGetCellReaderCallback1) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - RowReaderImpl* inter_r_impl = static_cast(inter_r); - inter_r_impl->error_code_.SetFailed(ErrorCode::kOK, ""); - gtxn_.DoGetCellReaderCallback(inter_r); - EXPECT_TRUE(ctx->fail_cell_cnt == 0); - EXPECT_TRUE(ctx->not_found_cnt == 1); - EXPECT_TRUE(ctx->active_cell_cnt == 1); - thread_pool_.Stop(true); - EXPECT_FALSE(r_impl->IsFinished()); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->fail_cell_cnt == 0); + EXPECT_TRUE(ctx->not_found_cnt == 1); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, DoGetCellReaderCallback2) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - RowReaderImpl* inter_r_impl = static_cast(inter_r); - inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); - gtxn_.DoGetCellReaderCallback(inter_r); - EXPECT_TRUE(ctx->fail_cell_cnt == 1); - EXPECT_TRUE(ctx->not_found_cnt == 0); - EXPECT_TRUE(ctx->active_cell_cnt == 1); - thread_pool_.Stop(true); - EXPECT_FALSE(r_impl->IsFinished()); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + cells.push_back(new Cell(t1.get(), "r1", "cf2", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->fail_cell_cnt == 1); + EXPECT_TRUE(ctx->not_found_cnt == 0); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, DoGetCellReaderCallback3) { - std::shared_ptr
t1(OpenTable("t1")); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - RowReaderImpl* inter_r_impl = static_cast(inter_r); - inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); - gtxn_.DoGetCellReaderCallback(inter_r); - thread_pool_.Stop(true); - EXPECT_TRUE(r_impl->IsFinished()); + std::shared_ptr
t1(OpenTable("t1")); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, MergeCellToRow) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - ErrorCode status; - status.SetFailed(ErrorCode::kSystem, ""); - gtxn_.MergeCellToRow(inter_r, status); - thread_pool_.Stop(true); - EXPECT_TRUE(r_impl->IsFinished()); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.MergeCellToRow(inter_r, status); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, GetCellCallback) { - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); - r->SetContext(ctx); - std::vector cells; - cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); - for(auto& cell : cells) { - ctx->cell_map[cell] = 0; - } - RowReader* inter_r = t1->NewRowReader("r1"); - inter_r->SetContext(new CellReaderContext(cells[0], ctx)); - RowReaderImpl* inter_r_impl = static_cast(inter_r); - inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); - gtxn_.GetCellCallback((CellReaderContext*)inter_r->GetContext()); - thread_pool_.Stop(true); - EXPECT_TRUE(r_impl->IsFinished()); + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1.get(), "r1", "cf1", "qu")); + for (auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.GetCellCallback((CellReaderContext*)inter_r->GetContext()); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); } TEST_F(GlobalTxnTest, RollForward) { - // can't find primary write cell - std::shared_ptr
t1 = OpenTable("t1"); - Cell cell(t1.get(), "r1", "cf1", "qu"); - tera::PrimaryInfo primary; - primary.set_table_name("t1"); - primary.set_row_key("r1"); - primary.set_column_family("cf1"); - primary.set_qualifier("qu"); - primary.set_gtxn_start_ts(12); - ErrorCode status; - - std::set gtxn_cfs; - gtxn_.gtxn_internal_->tables_["t1"] = - std::pair>(t1.get(), gtxn_cfs); - ErrorCode mock_status; - mock_status.SetFailed(ErrorCode::kNotFound,""); - std::vector reader_errs; - reader_errs.push_back(mock_status); - (static_cast(t1.get()))->AddReaderErrors(reader_errs); - gtxn_.RollForward(cell, primary, 0, &status); - EXPECT_TRUE(ErrorCode::kGTxnPrimaryLost == status.GetType()); + // can't find primary write cell + std::shared_ptr
t1 = OpenTable("t1"); + Cell cell(t1.get(), "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r1"); + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + ErrorCode status; + + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1.get(), gtxn_cfs); + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound, ""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1.get()))->AddReaderErrors(reader_errs); + gtxn_.RollForward(cell, primary, 0, &status); + EXPECT_TRUE(ErrorCode::kGTxnPrimaryLost == status.GetType()); } TEST_F(GlobalTxnTest, CleanLock0) { - // cell same as primary - std::shared_ptr
t1 = OpenTable("t1"); - Cell cell(t1.get(), "r1", "cf1", "qu"); - tera::PrimaryInfo primary; - primary.set_table_name("t1"); - primary.set_row_key("r1"); - primary.set_column_family("cf1"); - primary.set_qualifier("qu"); - primary.set_gtxn_start_ts(12); - // init status is OK - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - std::set gtxn_cfs; - gtxn_.gtxn_internal_->tables_["t1"] = - std::pair>(t1.get(), gtxn_cfs); - // only this cell will call mutation - ErrorCode mock_status1; - mock_status1.SetFailed(ErrorCode::kSystem,""); - std::vector mu_errs; - mu_errs.push_back(mock_status1); - (static_cast(t1.get()))->AddMutationErrors(mu_errs); - // run test - gtxn_.CleanLock(cell, primary, &status); - EXPECT_TRUE(mock_status1.GetType() == status.GetType()); + // cell same as primary + std::shared_ptr
t1 = OpenTable("t1"); + Cell cell(t1.get(), "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r1"); + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1.get(), gtxn_cfs); + // only this cell will call mutation + ErrorCode mock_status1; + mock_status1.SetFailed(ErrorCode::kSystem, ""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + (static_cast(t1.get()))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status, 12); + EXPECT_TRUE(mock_status1.GetType() == status.GetType()); } TEST_F(GlobalTxnTest, CleanLock1) { - // cell diff with primary - std::shared_ptr
t1 = OpenTable("t1"); - Cell cell(t1.get(), "r1", "cf1", "qu"); - tera::PrimaryInfo primary; - primary.set_table_name("t1"); - primary.set_row_key("r2"); // diff row - primary.set_column_family("cf1"); - primary.set_qualifier("qu"); - primary.set_gtxn_start_ts(12); - // init status is OK - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - std::set gtxn_cfs; - gtxn_.gtxn_internal_->tables_["t1"] = - std::pair>(t1.get(), gtxn_cfs); - // mock primary return kSystem but cell kOK - // will get kSystem - ErrorCode mock_status1; - ErrorCode mock_status2; - mock_status1.SetFailed(ErrorCode::kSystem,""); - mock_status2.SetFailed(ErrorCode::kOK,""); - std::vector mu_errs; - mu_errs.push_back(mock_status1); - mu_errs.push_back(mock_status2); - (static_cast(t1.get()))->AddMutationErrors(mu_errs); - // run test - gtxn_.CleanLock(cell, primary, &status); - EXPECT_TRUE(mock_status1.GetType() == status.GetType()); - EXPECT_TRUE(mock_status2.GetType() != status.GetType()); + // cell diff with primary + std::shared_ptr
t1 = OpenTable("t1"); + Cell cell(t1.get(), "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1.get(), gtxn_cfs); + // mock primary return kSystem but cell kOK + // will get kSystem + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kSystem, ""); + mock_status2.SetFailed(ErrorCode::kOK, ""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1.get()))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status, 12); + EXPECT_TRUE(mock_status1.GetType() == status.GetType()); + EXPECT_TRUE(mock_status2.GetType() != status.GetType()); } TEST_F(GlobalTxnTest, CleanLock2) { - // cell diff with primary - std::shared_ptr
t1 = OpenTable("t1"); - Cell cell(t1.get(), "r1", "cf1", "qu"); - tera::PrimaryInfo primary; - primary.set_table_name("t1"); - primary.set_row_key("r2"); // diff row - primary.set_column_family("cf1"); - primary.set_qualifier("qu"); - primary.set_gtxn_start_ts(12); - // init status is OK - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - std::set gtxn_cfs; - gtxn_.gtxn_internal_->tables_["t1"] = - std::pair>(t1.get(), gtxn_cfs); - // mock primary return kOk but cell kSystem - // will get kSystem - ErrorCode mock_status1; - ErrorCode mock_status2; - mock_status1.SetFailed(ErrorCode::kOK,""); - mock_status2.SetFailed(ErrorCode::kSystem,""); - std::vector mu_errs; - mu_errs.push_back(mock_status1); - mu_errs.push_back(mock_status2); - (static_cast(t1.get()))->AddMutationErrors(mu_errs); - // run test - gtxn_.CleanLock(cell, primary, &status); - EXPECT_TRUE(mock_status1.GetType() != status.GetType()); - EXPECT_TRUE(mock_status2.GetType() == status.GetType()); + // cell diff with primary + std::shared_ptr
t1 = OpenTable("t1"); + Cell cell(t1.get(), "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1.get(), gtxn_cfs); + // mock primary return kOk but cell kSystem + // will get kSystem + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kOK, ""); + mock_status2.SetFailed(ErrorCode::kSystem, ""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1.get()))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status, 12); + EXPECT_TRUE(mock_status1.GetType() != status.GetType()); + EXPECT_TRUE(mock_status2.GetType() == status.GetType()); } TEST_F(GlobalTxnTest, CleanLock3) { - // cell diff with primary - std::shared_ptr
t1 = OpenTable("t1"); - Cell cell(t1.get(), "r1", "cf1", "qu"); - tera::PrimaryInfo primary; - primary.set_table_name("t1"); - primary.set_row_key("r2"); // diff row - primary.set_column_family("cf1"); - primary.set_qualifier("qu"); - primary.set_gtxn_start_ts(12); - // init status is OK - ErrorCode status; - status.SetFailed(ErrorCode::kOK); - std::set gtxn_cfs; - gtxn_.gtxn_internal_->tables_["t1"] = - std::pair>(t1.get(), gtxn_cfs); - // mock primary return kTimeout but cell kSystem - // will get kSystem, the latest error will return - ErrorCode mock_status1; - ErrorCode mock_status2; - mock_status1.SetFailed(ErrorCode::kTimeout,""); - mock_status2.SetFailed(ErrorCode::kSystem,""); - std::vector mu_errs; - mu_errs.push_back(mock_status1); - mu_errs.push_back(mock_status2); - (static_cast(t1.get()))->AddMutationErrors(mu_errs); - // run test - gtxn_.CleanLock(cell, primary, &status); - EXPECT_TRUE(mock_status1.GetType() != status.GetType()); - EXPECT_TRUE(mock_status2.GetType() == status.GetType()); + // cell diff with primary + std::shared_ptr
t1 = OpenTable("t1"); + Cell cell(t1.get(), "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1.get(), gtxn_cfs); + // mock primary return kTimeout but cell kSystem + // will get kSystem, the latest error will return + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kTimeout, ""); + mock_status2.SetFailed(ErrorCode::kSystem, ""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1.get()))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status, 12); + EXPECT_TRUE(mock_status1.GetType() != status.GetType()); + EXPECT_TRUE(mock_status2.GetType() == status.GetType()); } -void AddKeyValueToResult(const std::string& key, const std::string& cf, - const std::string& qu, int64_t timestamp, - const std::string& value, RowResult* result) { - KeyValuePair* kv = result->add_key_values(); - kv->set_key(key); - kv->set_column_family(cf); - kv->set_qualifier(qu); - kv->set_timestamp(timestamp); - kv->set_value(value); +void AddKeyValueToResult(const std::string& key, const std::string& cf, const std::string& qu, + int64_t timestamp, const std::string& value, RowResult* result) { + KeyValuePair* kv = result->add_key_values(); + kv->set_key(key); + kv->set_column_family(cf); + kv->set_qualifier(qu); + kv->set_timestamp(timestamp); + kv->set_value(value); } TEST_F(GlobalTxnTest, EncodeWriteValue) { - std::string ret = EncodeWriteValue(1, 100); - int type; - int64_t ts; - DecodeWriteValue(ret, &type, &ts); + std::string ret = EncodeWriteValue(1, 100); + int type; + int64_t ts; + DecodeWriteValue(ret, &type, &ts); - EXPECT_TRUE(type == 1); - EXPECT_TRUE(ts == 100); + EXPECT_TRUE(type == 1); + EXPECT_TRUE(ts == 100); } TEST_F(GlobalTxnTest, DecodeWriteValue) { - // a int bigger than mutaion type - std::string ret = EncodeWriteValue(99, 1000000); - int type; - int64_t ts; - DecodeWriteValue(ret, &type, &ts); - - EXPECT_TRUE(type == 99); - EXPECT_TRUE(ts == 1000000); + // a int bigger than mutaion type + std::string ret = EncodeWriteValue(99, 1000000); + int type; + int64_t ts; + DecodeWriteValue(ret, &type, &ts); + + EXPECT_TRUE(type == 99); + EXPECT_TRUE(ts == 1000000); } TEST_F(GlobalTxnTest, FindValueFromResultRow0) { - // the success case - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 14; - std::string qu = "qu1"; - AddKeyValueToResult("r1", "cf1", PackDataName(qu), 9, "v1", &result); - AddKeyValueToResult("r1", "cf1", PackDataName(qu), 13, "v2", &result); - - AddKeyValueToResult("r1", "cf1", PackWriteName(qu), 15, EncodeWriteValue(0, 13), &result); - AddKeyValueToResult("r1", "cf1", PackWriteName(qu), 12, EncodeWriteValue(0, 9), &result); - r_impl->SetResult(result); - RowReader::TRow row; - r->ToMap(&row); - - for (auto& cf : row) { - std::cout << cf.first << "\n"; - for (auto& qu : cf.second) { - std::cout << "\t" << qu.first << "\n"; - for (auto& v : qu.second) { - std::cout << "\t\tts=" << v.first << ",v=" << v.second << "\n"; - } - } + // the success case + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 14; + std::string qu = "qu1"; + AddKeyValueToResult("r1", "cf1", PackDataName(qu), 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", PackDataName(qu), 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", PackWriteName(qu), 15, EncodeWriteValue(0, 13), &result); + AddKeyValueToResult("r1", "cf1", PackWriteName(qu), 12, EncodeWriteValue(0, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + for (auto& cf : row) { + std::cout << cf.first << "\n"; + for (auto& qu : cf.second) { + std::cout << "\t" << qu.first << "\n"; + for (auto& v : qu.second) { + std::cout << "\t\tts=" << v.first << ",v=" << v.second << "\n"; + } } + } - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", PackDataName(qu)); - - // run test - EXPECT_TRUE(gtxn_.FindValueFromResultRow(row, &target_cell)); - EXPECT_TRUE(target_cell.Timestamp() == 9); - EXPECT_TRUE(target_cell.Value() == "v1"); + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", PackDataName(qu)); - delete r; + // run test + EXPECT_TRUE(gtxn_.FindValueFromResultRow(row, &target_cell)); + EXPECT_TRUE(target_cell.Timestamp() == 9); + EXPECT_TRUE(target_cell.Value() == "v1"); + + delete r; } TEST_F(GlobalTxnTest, FindValueFromResultRow1) { - // the not found - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - r_impl->SetResult(result); - gtxn_.start_ts_ = 11; - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - - // run test - EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); - - delete r; + // the not found + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + r_impl->SetResult(result); + gtxn_.start_ts_ = 11; + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete r; } TEST_F(GlobalTxnTest, FindValueFromResultRow2) { - // the not found write col - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 11; - - AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); - AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); - r_impl->SetResult(result); - - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - - // run test - EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + // the not found write col + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); - delete r; + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + r_impl->SetResult(result); + + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete r; } TEST_F(GlobalTxnTest, FindValueFromResultRow3) { - // the not found rigth version - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 11; - - AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); - AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); - - AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); - // make ts = 9 v1 is deleted before this function called - AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 12, EncodeWriteValue(1, 9), &result); - r_impl->SetResult(result); - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - // run test - EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); - - delete r; + // the not found rigth version + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); + // make ts = 9 v1 is deleted before this function called + AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 12, EncodeWriteValue(1, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete r; } TEST_F(GlobalTxnTest, FindValueFromResultRow4) { - // the not found rigth version - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 11; - - AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); - AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); - - // maybe other older version clean by gc, before this function called - AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); - r_impl->SetResult(result); - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - // run test - EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); - - delete r; + // the not found rigth version + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + // maybe other older version clean by gc, before this function called + AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete r; } TEST_F(GlobalTxnTest, FindValueFromResultRow5) { - // the not found rigth version - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 11; - - // maybe version 1 was clean by gc, before this function called - AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); - - AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); - AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 12, EncodeWriteValue(0, 9), &result); - r_impl->SetResult(result); - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - // run test - EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); - - // delete t1; - //delete r; + // the not found rigth version + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + // maybe version 1 was clean by gc, before this function called + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 15, EncodeWriteValue(0, 13), &result); + AddKeyValueToResult("r1", "cf1", PackWriteName("qu1"), 12, EncodeWriteValue(0, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + // delete t1; + // delete r; } TEST_F(GlobalTxnTest, SetLastStatus) { - ErrorCode status; - status.SetFailed(ErrorCode::kOK, ""); - gtxn_.status_returned_ = false; - gtxn_.SetLastStatus(&status); - EXPECT_TRUE(gtxn_.status_returned_); - EXPECT_TRUE(gtxn_.status_.GetType() == status.GetType()); - - status.SetFailed(ErrorCode::kTimeout, ""); - gtxn_.status_returned_ = true; - gtxn_.SetLastStatus(&status); - EXPECT_TRUE(gtxn_.status_returned_); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + ErrorCode status; + status.SetFailed(ErrorCode::kOK, ""); + gtxn_.status_returned_ = false; + gtxn_.SetLastStatus(&status); + EXPECT_TRUE(gtxn_.status_returned_); + EXPECT_TRUE(gtxn_.status_.GetType() == status.GetType()); + + status.SetFailed(ErrorCode::kTimeout, ""); + gtxn_.status_returned_ = true; + gtxn_.SetLastStatus(&status); + EXPECT_TRUE(gtxn_.status_returned_); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); } static bool g_callback_run_flag = false; TEST_F(GlobalTxnTest, RunUserCallback0) { - g_callback_run_flag = false; - gtxn_.SetCommitCallback([](Transaction* t) {g_callback_run_flag = true;}); - gtxn_.RunUserCallback(); - EXPECT_TRUE(g_callback_run_flag); + g_callback_run_flag = false; + gtxn_.SetCommitCallback([](Transaction* t) { g_callback_run_flag = true; }); + gtxn_.RunUserCallback(); + EXPECT_TRUE(g_callback_run_flag); } static void WaitWapper(GlobalTxn* gtxn) { - gtxn->WaitForComplete(); - g_callback_run_flag = true; + gtxn->WaitForComplete(); + g_callback_run_flag = true; } TEST_F(GlobalTxnTest, RunUserCallback1) { - g_callback_run_flag = false; - thread_pool_.AddTask(std::bind(&WaitWapper, >xn_)); - gtxn_.RunUserCallback(); - EXPECT_TRUE(gtxn_.finish_); - thread_pool_.Stop(true); - EXPECT_TRUE(g_callback_run_flag); + g_callback_run_flag = false; + thread_pool_.AddTask(std::bind(&WaitWapper, >xn_)); + gtxn_.RunUserCallback(); + EXPECT_TRUE(gtxn_.finish_); + thread_pool_.Stop(true); + EXPECT_TRUE(g_callback_run_flag); } TEST_F(GlobalTxnTest, BackoffAndMaybeCleanupLock0) { - bool try_clean = false; - ErrorCode status; - // make sure have lock_ts < start_ts - // can't found primary - std::shared_ptr
t1 = OpenTable("t1"); - RowReader* r = t1->NewRowReader("r1"); - RowReaderImpl* r_impl = static_cast(r); - - // build RowReader::TRow - // cf must exist before call FindValueFromResultRow - RowResult result; - gtxn_.start_ts_ = 11; - - // start_ts > lock ts and primary info is bad for parse - AddKeyValueToResult("r1", "cf1", PackLockName("qu1"), 9, "primary info", &result); - r_impl->SetResult(result); - RowReader::TRow row; - r->ToMap(&row); - - // build target_cell - Cell target_cell(t1.get(), "r1", "cf1", "qu1"); - // run test - gtxn_.BackoffAndMaybeCleanupLock(row, target_cell, try_clean, &status); - EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnPrimaryLost); - delete r; + bool try_clean = false; + ErrorCode status; + // make sure have lock_ts < start_ts + // can't found primary + std::shared_ptr
t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + // start_ts > lock ts and primary info is bad for parse + AddKeyValueToResult("r1", "cf1", PackLockName("qu1"), 9, "primary info", &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1.get(), "r1", "cf1", "qu1"); + // run test + gtxn_.BackoffAndMaybeCleanupLock(row, target_cell, try_clean, &status); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnPrimaryLost); + delete r; } TEST_F(GlobalTxnTest, RunAfterPrewriteFailed0) { - std::shared_ptr
t = OpenTable("t1"); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - ctx->status.SetFailed(ErrorCode::kOK, ""); - gtxn_.RunAfterPrewriteFailed(ctx); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + std::shared_ptr
t = OpenTable("t1"); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + ctx->status.SetFailed(ErrorCode::kOK, ""); + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); } TEST_F(GlobalTxnTest, RunAfterPrewriteFailed1) { - std::shared_ptr
t = OpenTable("t1"); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - ctx->status.SetFailed(ErrorCode::kTimeout, ""); - gtxn_.RunAfterPrewriteFailed(ctx); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + std::shared_ptr
t = OpenTable("t1"); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + ctx->status.SetFailed(ErrorCode::kTimeout, ""); + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); } TEST_F(GlobalTxnTest, RunAfterPrewriteFailed2) { - std::shared_ptr
t = OpenTable("t1"); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - gtxn_.gtxn_internal_->is_timeout_ = true; - gtxn_.RunAfterPrewriteFailed(ctx); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + std::shared_ptr
t = OpenTable("t1"); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + gtxn_.gtxn_internal_->is_timeout_ = true; + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); } TEST_F(GlobalTxnTest, DoPrewriteCallback0) { - // case a. global timeout - std::shared_ptr
t = OpenTable("t1"); - Transaction* txn = t->StartRowTransaction("r1"); - SingleRowTxn* stxn = static_cast*>(txn)->GetTransactionPtr().get(); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - stxn->SetContext(ctx); - gtxn_.gtxn_internal_->is_timeout_ = true; - gtxn_.DoPrewriteCallback(stxn); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + // case a. global timeout + std::shared_ptr
t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = + static_cast*>(txn)->GetTransactionPtr().get(); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + gtxn_.gtxn_internal_->is_timeout_ = true; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); } TEST_F(GlobalTxnTest, DoPrewriteCallback1) { - // case b. this operator timeout - std::shared_ptr
t = OpenTable("t1"); - Transaction* txn = t->StartRowTransaction("r1"); - SingleRowTxn* stxn = static_cast*>(txn)->GetTransactionPtr().get(); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - stxn->SetContext(ctx); - stxn->mutation_buffer_.SetError(ErrorCode::kTimeout,""); - gtxn_.gtxn_internal_->is_timeout_ = false; - gtxn_.DoPrewriteCallback(stxn); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + // case b. this operator timeout + std::shared_ptr
t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = + static_cast*>(txn)->GetTransactionPtr().get(); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + stxn->mutation_buffer_.SetError(ErrorCode::kTimeout, ""); + gtxn_.gtxn_internal_->is_timeout_ = false; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); } TEST_F(GlobalTxnTest, DoPrewriteCallback2) { - // case b. this operator error - std::shared_ptr
t = OpenTable("t1"); - Transaction* txn = t->StartRowTransaction("r1"); - SingleRowTxn* stxn = static_cast*>(txn)->GetTransactionPtr().get(); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - std::vector ws; - ws.push_back(w); - PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); - stxn->SetContext(ctx); - stxn->mutation_buffer_.SetError(ErrorCode::kSystem,""); - gtxn_.gtxn_internal_->is_timeout_ = false; - gtxn_.DoPrewriteCallback(stxn); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); + // case b. this operator error + std::shared_ptr
t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = + static_cast*>(txn)->GetTransactionPtr().get(); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, NULL, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + stxn->mutation_buffer_.SetError(ErrorCode::kSystem, ""); + gtxn_.gtxn_internal_->is_timeout_ = false; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); } TEST_F(GlobalTxnTest, VerifyPrimaryLocked) { - std::shared_ptr
t = OpenTable("t1"); - Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); - Write w(cell); - gtxn_.primary_write_ = &w; - - ErrorCode mock_status; - mock_status.SetFailed(ErrorCode::kNotFound,""); - std::vector reader_errs; - reader_errs.push_back(mock_status); - (static_cast(t.get()))->AddReaderErrors(reader_errs); - - gtxn_.VerifyPrimaryLocked(); - EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); + std::shared_ptr
t = OpenTable("t1"); + Cell cell(t.get(), "r1", "cf", "qu", 1, "val"); + Write w(cell); + gtxn_.primary_write_ = &w; + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound, ""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t.get()))->AddReaderErrors(reader_errs); + + gtxn_.VerifyPrimaryLocked(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); } - -} // namespace tera +} // namespace tera diff --git a/src/sdk/test/global_txn_test_tool.cc b/src/sdk/test/global_txn_test_tool.cc index 3a54c7393..b552296e3 100644 --- a/src/sdk/test/global_txn_test_tool.cc +++ b/src/sdk/test/global_txn_test_tool.cc @@ -1,7 +1,7 @@ // Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// +// // Author: baorenyi@baidu.com #include "sdk/test/global_txn_test_tool.h" @@ -42,7 +42,7 @@ namespace tera { * * CONF_ROOR/cases/1/T_1/op_list [operations list] * Format of op_list: - * + * * TABLES:table_1,table_2,table_3 * GET table_1 r1 cf1 qu1 * PUT table_2 r2 cf2 qu2 valuex @@ -57,699 +57,723 @@ namespace tera { * **/ bool GlobalTxnTestTool::LoadTestConf() { - // list cases - const std::string case_dir = FLAGS_gtxn_test_case_dir; - std::vector file_list; - if (IsEmpty(case_dir) || !ListCurrentDir(case_dir, &file_list)) { - LOG(ERROR) << "list cases failed, dir:" << case_dir; - return false; - } - for (auto it = file_list.begin(); it != file_list.end(); ++it) { - if (FLAGS_case_number != "" && (*it) != FLAGS_case_number) { - continue; - } - const std::string& dir_name = case_dir + (*it); - - if (!IsDir(dir_name)) { - continue; - } - - int case_num = atoi((*it).c_str()); - if (case_num <= 0) { - LOG(ERROR) << "load case failed, dir:" << dir_name; - return false; - } - // list cases/x/schemas/ - std::vector schema_files; - const std::string& schema_dir = dir_name + "/schemas/"; - if (IsEmpty(schema_dir) || !ListCurrentDir(schema_dir, &schema_files)) { - LOG(ERROR) << "list case(" << dir_name << ") schemas failed"; - return false; - } - int schema_cnt = 0; - for (auto sit = schema_files.begin(); sit != schema_files.end(); ++sit) { - const std::string& schema_file = schema_dir + (*sit); - if (IsDir(schema_file)) { - continue; - } - // load schemas - TableDescriptor* desc = new TableDescriptor(); - if (LoadDescriptor(schema_file, desc)) { - if (case_desc_map_.find(case_num) == case_desc_map_.end()) { - case_desc_map_[case_num] = std::vector(); - } - case_desc_map_[case_num].push_back(desc); - ++schema_cnt; - } else { - delete desc; - LOG(ERROR) << "load schema failed, schema_file:" << schema_file; - break; - } - } - if (schema_cnt == 0) { - LOG(ERROR) << "schemafile not found"; - return false; - } - - // mark cases/x/T_xx/ - std::vector txn_list; - if (!ListCurrentDir(dir_name, &txn_list)) { - LOG(ERROR) << "find txn dir failed, dir:" << dir_name; - return false; - } - int reg_cnt = 0; - for(auto it = txn_list.begin(); it != txn_list.end(); ++it) { - if (!IsDir(dir_name + "/" + (*it)) || *it == "schemas") { - continue; - } - if ((*it).find("T_") != std::string::npos) { - // find transaction - int gtxn_id = atoi(((*it).substr(2)).c_str()); - if (gtxn_id <= 0) { - LOG(ERROR) << "mark gtxn conf failed, dir:" - << case_dir << "/" << dir_name; - return false; - } else { - CaseRegister(case_num, gtxn_id); - ++reg_cnt; - } - } - } - if (reg_cnt == 0) { - LOG(ERROR) << "transaction not found"; - return false; + // list cases + const std::string case_dir = FLAGS_gtxn_test_case_dir; + std::vector file_list; + if (IsEmpty(case_dir) || !ListCurrentDir(case_dir, &file_list)) { + LOG(ERROR) << "list cases failed, dir:" << case_dir; + return false; + } + for (auto it = file_list.begin(); it != file_list.end(); ++it) { + if (FLAGS_case_number != "all" && (*it) != FLAGS_case_number) { + continue; + } + const std::string& dir_name = case_dir + (*it); + + if (!IsDir(dir_name)) { + continue; + } + + int case_num = atoi((*it).c_str()); + if (case_num <= 0) { + LOG(ERROR) << "load case failed, dir:" << dir_name; + return false; + } + // list cases/x/schemas/ + std::vector schema_files; + const std::string& schema_dir = dir_name + "/schemas/"; + if (IsEmpty(schema_dir) || !ListCurrentDir(schema_dir, &schema_files)) { + LOG(ERROR) << "list case(" << dir_name << ") schemas failed"; + return false; + } + int schema_cnt = 0; + for (auto sit = schema_files.begin(); sit != schema_files.end(); ++sit) { + const std::string& schema_file = schema_dir + (*sit); + if (IsDir(schema_file)) { + continue; + } + // load schemas + TableDescriptor* desc = new TableDescriptor(); + if (LoadDescriptor(schema_file, desc)) { + if (case_desc_map_.find(case_num) == case_desc_map_.end()) { + case_desc_map_[case_num] = std::vector(); + } + case_desc_map_[case_num].push_back(desc); + ++schema_cnt; + } else { + delete desc; + LOG(ERROR) << "load schema failed, schema_file:" << schema_file; + break; + } + } + if (schema_cnt == 0) { + LOG(ERROR) << "schemafile not found"; + return false; + } + + // mark cases/x/T_xx/ + std::vector txn_list; + if (!ListCurrentDir(dir_name, &txn_list)) { + LOG(ERROR) << "find txn dir failed, dir:" << dir_name; + return false; + } + int reg_cnt = 0; + for (auto it = txn_list.begin(); it != txn_list.end(); ++it) { + if (!IsDir(dir_name + "/" + (*it)) || *it == "schemas") { + continue; + } + if ((*it).find("T_") != std::string::npos) { + // find transaction + int gtxn_id = atoi(((*it).substr(2)).c_str()); + if (gtxn_id <= 0) { + LOG(ERROR) << "mark gtxn conf failed, dir:" << case_dir << "/" << dir_name; + return false; + } else { + CaseRegister(case_num, gtxn_id); + ++reg_cnt; } + } } - return true; + if (reg_cnt == 0) { + LOG(ERROR) << "transaction not found"; + return false; + } + } + return true; } void GlobalTxnTestTool::CaseRegister(const int case_num, const int gtxn_id) { - CasePair case_pair(case_num, gtxn_id); - case_list_.push_back(case_pair); + CasePair case_pair(case_num, gtxn_id); + case_list_.push_back(case_pair); } -bool GlobalTxnTestTool::LoadDescriptor(const std::string& schema_file, +bool GlobalTxnTestTool::LoadDescriptor(const std::string& schema_file, TableDescriptor* table_desc) { - ErrorCode err; - if (!ParseTableSchemaFile(schema_file, table_desc, &err)) { - LOG(ERROR) << "fail to parse input table schema." << schema_file; - return false; - } - //ShowTableDescriptor(*table_desc, true); - return true; + ErrorCode err; + if (!ParseTableSchemaFile(schema_file, table_desc, &err)) { + LOG(ERROR) << "fail to parse input table schema." << schema_file; + return false; + } + // ShowTableDescriptor(*table_desc, true); + return true; } -GlobalTxnTestTool::GlobalTxnTestTool(Client* client): - thread_pool_(FLAGS_gtxn_test_thread_pool_size), - client_(client) { -} +GlobalTxnTestTool::GlobalTxnTestTool(Client* client) + : thread_pool_(FLAGS_gtxn_test_thread_pool_size), client_(client) {} void GlobalTxnTestTool::RunTest(tera::Client* client, int case_number) { - do_cnt_.Set(0); - done_cnt_.Set(0); - done_fail_cnt_.Set(0); - for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { - CasePair case_pair = *it; - int case_num = case_pair.first; - if (case_number != -1 && case_num != case_number) { - continue; - } - int gtxn_id = case_pair.second; - - const std::string case_dir = FLAGS_gtxn_test_case_dir; - const std::string conf_dir = case_dir + std::to_string(case_num) - + "/T_" + std::to_string(gtxn_id); - const std::string& op_list_file = conf_dir + "/op_list"; - std::vector op_list; - std::ifstream ifile(op_list_file); - std::string line; - int cnt = 0; - while (std::getline(ifile, line)) { - if (cnt == 0) { - std::size_t found = line.find("TABLES:"); - if (found!=std::string::npos) { - std::vector tables; - SplitString(line.substr(found + 7), ",", &tables); - if (!OpenTestTables(tables)) { - return; - } - } - } else { - op_list.push_back(line); - } - ++cnt; - } - ifile.close(); - if (cnt < 1) { - LOG(ERROR) << "no operations in op_list"; + do_cnt_.Set(0); + done_cnt_.Set(0); + done_fail_cnt_.Set(0); + for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { + CasePair case_pair = *it; + int case_num = case_pair.first; + if (case_number != -1 && case_num != case_number) { + continue; + } + int gtxn_id = case_pair.second; + + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = + case_dir + std::to_string(case_num) + "/T_" + std::to_string(gtxn_id); + const std::string& op_list_file = conf_dir + "/op_list"; + std::vector op_list; + std::ifstream ifile(op_list_file); + std::string line; + int cnt = 0; + while (std::getline(ifile, line)) { + if (cnt == 0) { + std::size_t found = line.find("TABLES:"); + if (found != std::string::npos) { + std::vector tables; + SplitString(line.substr(found + 7), ",", &tables); + if (!OpenTestTables(tables)) { + return; + } } - do_cnt_.Inc(); - ThreadPool::Task task = std::bind(&GlobalTxnTestTool::RunTestInternal, - this, client, case_num, gtxn_id, op_list); - thread_pool_.AddTask(task); + } else { + op_list.push_back(line); + } + ++cnt; + } + ifile.close(); + if (cnt < 1) { + LOG(ERROR) << "no operations in op_list"; } + do_cnt_.Inc(); + ThreadPool::Task task = + std::bind(&GlobalTxnTestTool::RunTestInternal, this, client, case_num, gtxn_id, op_list); + thread_pool_.AddTask(task); + } } -void GlobalTxnTestTool::RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, +void GlobalTxnTestTool::RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, const std::vector& op_list) { - const std::string case_dir = FLAGS_gtxn_test_case_dir; - const std::string conf_dir = case_dir + std::to_string(case_num) - + "/T_" + std::to_string(gtxn_id); - - // make sure flagfile only service for this transaction - tera::Transaction* gtxn = nullptr; - { - MutexLock lock(&mu_); - FLAGS_tera_gtxn_test_flagfile = conf_dir + "/gtxn.flag"; - gtxn = client->NewGlobalTransaction(); - } - - if (!FLAGS_gtxn_test_async_mode) { - std::vector result; - for (auto it = op_list.begin(); it != op_list.end(); ++it) { - const std::string& op_str = *it; - VLOG(12) << "OPERATION:" << op_str; - OpType op_type; - std::vector op_args; - if (!ParseOp(op_str, &op_type, &op_args) - || !DoOp(gtxn, op_type, op_args, &result)) { - LOG(ERROR) << gtxn->GetError().ToString(); - delete gtxn; - done_cnt_.Inc(); - return; - } - } - gtxn->Commit(); - result.push_back(std::to_string(gtxn->GetError().GetType())); - if(!CheckResult(case_num, gtxn_id, result)) { - done_fail_cnt_.Inc(); - } + LOG(INFO) << "Run GTXN Test BEGIN, T_" << gtxn_id; + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = + case_dir + std::to_string(case_num) + "/T_" + std::to_string(gtxn_id); + + // make sure flagfile only service for this transaction + tera::Transaction* gtxn = nullptr; + { + MutexLock lock(&mu_); + FLAGS_tera_gtxn_test_flagfile = conf_dir + "/gtxn.flag"; + gtxn = client->NewGlobalTransaction(); + } + + if (!FLAGS_gtxn_test_async_mode) { + std::vector result; + for (auto it = op_list.begin(); it != op_list.end(); ++it) { + const std::string& op_str = *it; + VLOG(12) << "OPERATION:" << op_str; + OpType op_type; + std::vector op_args; + if (!ParseOp(op_str, &op_type, &op_args) || !DoOp(gtxn, op_type, op_args, &result)) { + LOG(ERROR) << gtxn->GetError().ToString(); delete gtxn; done_cnt_.Inc(); + return; + } + } + LOG(INFO) << "Global Transaction Begin, T_" << gtxn_id; + gtxn->Commit(); + LOG(INFO) << "Global Transaction End, T_" << gtxn_id; + result.push_back(std::to_string(gtxn->GetError().GetType())); + if (!CheckResult(case_num, gtxn_id, result)) { + done_fail_cnt_.Inc(); + } + delete gtxn; + done_cnt_.Inc(); + } else { + if (op_list.size() > 0) { + GTxnTestContext* ctx = new GTxnTestContext(); + ctx->tool = this; + ctx->gtxn = gtxn; + ctx->op_list = op_list; + ctx->case_num = case_num; + ctx->gtxn_id = gtxn_id; + ctx->it = ctx->op_list.begin(); + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType op_type; + std::vector op_args; + if (!ParseOp(op_str, &op_type, &op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); + return; + } + DoOpAsync(ctx, op_type, op_args); } else { - if (op_list.size() > 0) { - GTxnTestContext* ctx = new GTxnTestContext(); - ctx->tool = this; - ctx->gtxn = gtxn; - ctx->op_list = op_list; - ctx->case_num = case_num; - ctx->gtxn_id = gtxn_id; - ctx->it = ctx->op_list.begin(); - const std::string& op_str = *(ctx->it); - VLOG(12) << "OPERATION:" << op_str; - OpType op_type; - std::vector op_args; - if (!ParseOp(op_str, &op_type, &op_args)) { - LOG(ERROR) << "parse op failed"; - delete ctx->gtxn; - delete ctx; - done_cnt_.Inc(); - return; - } - DoOpAsync(ctx, op_type, op_args); - } else { - LOG(ERROR) << "not set operators"; - delete gtxn; - done_cnt_.Inc(); - } + LOG(ERROR) << "not set operators"; + delete gtxn; + done_cnt_.Inc(); } + } } bool GlobalTxnTestTool::OpenTestTables(const std::vector& tables) { - ErrorCode err; - MutexLock lock(&mu_); - for(auto it = tables.begin(); it != tables.end(); ++it) { - const std::string tablename = *it; - if (tables_.find(tablename) == tables_.end()) { - Table* table = client_->OpenTable(tablename, &err); - if (table == NULL) { - return false; - } - tables_[tablename] = table; - } + ErrorCode err; + MutexLock lock(&mu_); + for (auto it = tables.begin(); it != tables.end(); ++it) { + const std::string tablename = *it; + if (tables_.find(tablename) == tables_.end()) { + Table* table = client_->OpenTable(tablename, &err); + if (table == NULL) { + return false; + } + tables_[tablename] = table; } - return true; + } + return true; } -void GlobalTxnTestTool::DoOpAsync(GTxnTestContext* ctx, - const OpType& op_type, +void GlobalTxnTestTool::DoOpAsync(GTxnTestContext* ctx, const OpType& op_type, const std::vector& op_args) { - if (op_args.size() < 4) { + if (op_args.size() < 4) { + return; + } + Table* table = nullptr; + const std::string tablename = op_args[0]; + auto table_it = tables_.find(tablename); + if (table_it != tables_.end()) { + table = table_it->second; + } else { + return; + } + const std::string row = op_args[1]; + const std::string cf = op_args[2]; + const std::string qu = op_args[3]; + if (op_type == OpType::PUT && op_args.size() == 5) { + const std::string value = op_args[4]; + tera::RowMutation* m = table->NewRowMutation(row); + m->Put(cf, qu, value); + ctx->gtxn->ApplyMutation(m); + ctx->result.push_back("PUT: " + std::to_string(ctx->gtxn->GetError().GetType())); + delete m; + } else if (op_type == OpType::GET && op_args.size() == 4) { + tera::RowReader* r = table->NewRowReader(row); + r->AddColumn(cf, qu); + r->SetCallBack( + [](RowReader* r) { ((GTxnTestContext*)r->GetContext())->tool->DoOpAsyncCallback(r); }); + r->SetContext(ctx); + ctx->gtxn->Get(r); + return; + } else if (op_type == OpType::DEL && op_args.size() == 4) { + tera::RowMutation* m = table->NewRowMutation(row); + m->DeleteColumns(cf, qu); + ctx->gtxn->ApplyMutation(m); + ctx->result.push_back("DEL: " + std::to_string(ctx->gtxn->GetError().GetType())); + delete m; + } + + // this operation is muation , run next operation + if (op_type == OpType::PUT || op_type == OpType::DEL) { + if (++ctx->it != ctx->op_list.end()) { + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType next_op_type; + std::vector next_op_args; + if (!ParseOp(op_str, &next_op_type, &next_op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); return; - } - Table* table = nullptr; - const std::string tablename = op_args[0]; - auto table_it = tables_.find(tablename); - if (table_it != tables_.end()) { - table = table_it->second; + } + DoOpAsync(ctx, next_op_type, next_op_args); } else { - return; - } - const std::string row = op_args[1]; - const std::string cf = op_args[2]; - const std::string qu = op_args[3]; - if (op_type == OpType::PUT && op_args.size() == 5) { - const std::string value = op_args[4]; - tera::RowMutation* m = table->NewRowMutation(row); - m->Put(cf, qu, value); - ctx->gtxn->ApplyMutation(m); - ctx->result.push_back("PUT: " + std::to_string(ctx->gtxn->GetError().GetType())); - delete m; - } else if (op_type == OpType::GET && op_args.size() == 4) { - tera::RowReader* r = table->NewRowReader(row); - r->AddColumn(cf, qu); - r->SetCallBack([] (RowReader* r) { - ((GTxnTestContext*)r->GetContext())->tool->DoOpAsyncCallback(r); - }); - r->SetContext(ctx); - ctx->gtxn->Get(r); - return; - } else if (op_type == OpType::DEL && op_args.size() == 4) { - tera::RowMutation* m = table->NewRowMutation(row); - m->DeleteColumns(cf, qu); - ctx->gtxn->ApplyMutation(m); - ctx->result.push_back("DEL: " + std::to_string(ctx->gtxn->GetError().GetType())); - delete m; - } - - // this operation is muation , run next operation - if (op_type == OpType::PUT || op_type == OpType::DEL) { - if (++ctx->it != ctx->op_list.end()) { - const std::string& op_str = *(ctx->it); - VLOG(12) << "OPERATION:" << op_str; - OpType next_op_type; - std::vector next_op_args; - if (!ParseOp(op_str, &next_op_type, &next_op_args)) { - LOG(ERROR) << "parse op failed"; - delete ctx->gtxn; - delete ctx; - done_cnt_.Inc(); - return; - } - DoOpAsync(ctx, next_op_type, next_op_args); - } else { - ctx->gtxn->SetCommitCallback([] (Transaction* t) { - ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); - }); - ctx->gtxn->SetContext(ctx); - ctx->gtxn->Commit(); - } + ctx->gtxn->SetCommitCallback( + [](Transaction* t) { ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); }); + ctx->gtxn->SetContext(ctx); + ctx->gtxn->Commit(); } + } } void GlobalTxnTestTool::DoOpAsyncCallback(RowReader* r) { - GTxnTestContext* ctx = (GTxnTestContext*)r->GetContext(); - if (r->GetError().GetType() == ErrorCode::kOK) { - while (!r->Done()) { - const std::string& result_item = "GET: " - + std::to_string(r->GetError().GetType()) + " " - + std::to_string(r->Timestamp()) + ":" + r->Value(); - ctx->result.push_back(result_item); - r->Next(); - } - } else if (r->GetError().GetType() == ErrorCode::kNotFound) { - ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); - } else { - ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); - } - delete r; - // if not last, call next operation - if (++ctx->it != ctx->op_list.end()) { - const std::string& op_str = *(ctx->it); - VLOG(12) << "OPERATION:" << op_str; - OpType next_op_type; - std::vector next_op_args; - if (!ParseOp(op_str, &next_op_type, &next_op_args)) { - LOG(ERROR) << "parse op failed"; - delete ctx->gtxn; - delete ctx; - done_cnt_.Inc(); - return; - } - DoOpAsync(ctx, next_op_type, next_op_args); - } else { - ctx->gtxn->SetCommitCallback([] (Transaction* t) { - ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); - }); - ctx->gtxn->SetContext(ctx); - ctx->gtxn->Commit(); - } + GTxnTestContext* ctx = (GTxnTestContext*)r->GetContext(); + if (r->GetError().GetType() == ErrorCode::kOK) { + while (!r->Done()) { + const std::string& result_item = "GET: " + std::to_string(r->GetError().GetType()) + " " + + std::to_string(r->Timestamp()) + ":" + r->Value(); + ctx->result.push_back(result_item); + r->Next(); + } + } else if (r->GetError().GetType() == ErrorCode::kNotFound) { + ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); + } else { + ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); + } + delete r; + // if not last, call next operation + if (++ctx->it != ctx->op_list.end()) { + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType next_op_type; + std::vector next_op_args; + if (!ParseOp(op_str, &next_op_type, &next_op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); + return; + } + DoOpAsync(ctx, next_op_type, next_op_args); + } else { + ctx->gtxn->SetCommitCallback( + [](Transaction* t) { ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); }); + ctx->gtxn->SetContext(ctx); + ctx->gtxn->Commit(); + } } void GlobalTxnTestTool::DoCommitCallback(Transaction* t) { - GTxnTestContext* ctx = (GTxnTestContext*)t->GetContext(); - - ctx->result.push_back(std::to_string(t->GetError().GetType())); - if (!CheckResult(ctx->case_num, ctx->gtxn_id, ctx->result)) { - done_fail_cnt_.Inc(); - } - delete ctx; - delete t; - done_cnt_.Inc(); + GTxnTestContext* ctx = (GTxnTestContext*)t->GetContext(); + + ctx->result.push_back(std::to_string(t->GetError().GetType())); + if (!CheckResult(ctx->case_num, ctx->gtxn_id, ctx->result)) { + done_fail_cnt_.Inc(); + } + delete ctx; + delete t; + done_cnt_.Inc(); } -bool GlobalTxnTestTool::DoOp(tera::Transaction* gtxn, - const OpType& op_type, +bool GlobalTxnTestTool::DoOp(tera::Transaction* gtxn, const OpType& op_type, const std::vector& op_args, std::vector* result) { - if (op_args.size() < 4) { - return false; - } - Table* table = nullptr; - const std::string tablename = op_args[0]; - auto table_it = tables_.find(tablename); - if (table_it != tables_.end()) { - table = table_it->second; - } else { - return false; - } - const std::string row = op_args[1]; - const std::string cf = op_args[2]; - const std::string qu = op_args[3]; - if (op_type == OpType::PUT && op_args.size() == 5) { - const std::string value = op_args[4]; - std::unique_ptr m(table->NewRowMutation(row)); - m->Put(cf, qu, value); - gtxn->ApplyMutation(m.get()); - result->push_back("PUT: " + std::to_string(gtxn->GetError().GetType())); - return true; - } else if (op_type == OpType::GET && op_args.size() == 4) { - std::unique_ptr r(table->NewRowReader(row)); - r->AddColumn(cf, qu); - gtxn->Get(r.get()); - if (r->GetError().GetType() == ErrorCode::kOK) { - while (!r->Done()) { - const std::string& result_item = "GET: " - + std::to_string(r->GetError().GetType()) + " " - + std::to_string(r->Timestamp()) + ":" + r->Value(); - result->push_back(result_item); - r->Next(); - } - return true; - } else if (r->GetError().GetType() == ErrorCode::kNotFound) { - result->push_back("GET: " + std::to_string(r->GetError().GetType())); - return true; - } else { - result->push_back("GET: " + std::to_string(r->GetError().GetType())); - } - } else if (op_type == OpType::DEL && op_args.size() == 4) { - std::unique_ptr m(table->NewRowMutation(row)); - m->DeleteColumns(cf, qu); - gtxn->ApplyMutation(m.get()); - result->push_back("DEL: " + std::to_string(gtxn->GetError().GetType())); - return true; - } + if (op_args.size() < 4) { return false; -} - -bool GlobalTxnTestTool::ParseOp(const std::string& op_str, - OpType* op_type, std::vector* op_args) { - std::vector args; - SplitString(op_str, " ", &args); - if (TrimString(args[0]) == "PUT") { - *op_type = OpType::PUT; - } else if (TrimString(args[0]) == "GET") { - *op_type = OpType::GET; - } else if (TrimString(args[0]) == "DEL") { - *op_type = OpType::DEL; + } + Table* table = nullptr; + const std::string tablename = op_args[0]; + auto table_it = tables_.find(tablename); + if (table_it != tables_.end()) { + table = table_it->second; + } else { + return false; + } + const std::string row = op_args[1]; + const std::string cf = op_args[2]; + const std::string qu = op_args[3]; + if (op_type == OpType::PUT && op_args.size() == 5) { + const std::string value = op_args[4]; + std::unique_ptr m(table->NewRowMutation(row)); + m->Put(cf, qu, value); + gtxn->ApplyMutation(m.get()); + result->push_back("PUT: " + std::to_string(gtxn->GetError().GetType())); + return true; + } else if (op_type == OpType::GET && op_args.size() == 4) { + std::unique_ptr r(table->NewRowReader(row)); + r->AddColumn(cf, qu); + gtxn->Get(r.get()); + LOG(INFO) << "TEST: " << tablename << " " << row << " " << cf << " " << qu; + if (r->GetError().GetType() == ErrorCode::kOK) { + while (!r->Done()) { + const std::string& result_item = "GET: " + std::to_string(r->GetError().GetType()) + " " + + std::to_string(r->Timestamp()) + ":" + r->Value(); + result->push_back(result_item); + r->Next(); + } + return true; + } else if (r->GetError().GetType() == ErrorCode::kNotFound) { + result->push_back("GET: " + std::to_string(r->GetError().GetType())); + return true; } else { - LOG(ERROR) << "operation type not support :[" << TrimString(args[0]) << "]"; - return false; - } - for (size_t i = 1; i < args.size(); ++i) { - op_args->push_back(TrimString(args[i])); + result->push_back("GET: " + std::to_string(r->GetError().GetType())); } + } else if (op_type == OpType::DEL && op_args.size() == 4) { + std::unique_ptr m(table->NewRowMutation(row)); + m->DeleteColumns(cf, qu); + gtxn->ApplyMutation(m.get()); + result->push_back("DEL: " + std::to_string(gtxn->GetError().GetType())); return true; + } + return false; +} + +bool GlobalTxnTestTool::ParseOp(const std::string& op_str, OpType* op_type, + std::vector* op_args) { + std::vector args; + SplitString(op_str, " ", &args); + if (TrimString(args[0]) == "PUT") { + *op_type = OpType::PUT; + } else if (TrimString(args[0]) == "GET") { + *op_type = OpType::GET; + } else if (TrimString(args[0]) == "DEL") { + *op_type = OpType::DEL; + } else { + LOG(ERROR) << "operation type not support :[" << TrimString(args[0]) << "]"; + return false; + } + for (size_t i = 1; i < args.size(); ++i) { + op_args->push_back(TrimString(args[i])); + } + return true; } void GlobalTxnTestTool::DebugOpList(const std::string& op_list_file) { - std::vector op_list; - std::ifstream ofile(op_list_file); - std::string line; - int cnt = 0; - while (std::getline(ofile, line)) { - op_list.push_back(line); - ++cnt; - } - ofile.close(); - if (cnt < 1) { - LOG(ERROR) << "no operators in op_list"; - } - std::cout << "OpList:" << std::endl; - for (auto l : op_list) { - std::cout << l < op_list; + std::ifstream ofile(op_list_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + op_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no operators in op_list"; + } + std::cout << "OpList:" << std::endl; + for (auto l : op_list) { + std::cout << l << std::endl; + } + std::cout << "-------------------------------------------" << std::endl; } void GlobalTxnTestTool::DebugFlagFile(const std::string& flag_file) { - std::vector flag_list; - std::ifstream ofile(flag_file); - std::string line; - int cnt = 0; - while (std::getline(ofile, line)) { - flag_list.push_back(line); - ++cnt; - } - ofile.close(); - if (cnt < 1) { - LOG(ERROR) << "no flags in gtxn.flag"; - } - std::cout << "FLAGS:" << std::endl; - for (auto f : flag_list) { - std::string flag = TrimString(f); - if (flag.length() > 0 && flag[0] == '#') { - continue; - } - std::cout << flag < flag_list; + std::ifstream ofile(flag_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + flag_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no flags in gtxn.flag"; + } + std::cout << "FLAGS:" << std::endl; + for (auto f : flag_list) { + std::string flag = TrimString(f); + if (flag.length() > 0 && flag[0] == '#') { + continue; + } + std::cout << flag << std::endl; + } + std::cout << "-------------------------------------------" << std::endl; +} -bool GlobalTxnTestTool::CheckResult(const int case_num, const int gtxn_id, +bool GlobalTxnTestTool::CheckResult(const int case_num, const int gtxn_id, const std::vector& result) { - MutexLock lock(&mu_); - const std::string case_dir = FLAGS_gtxn_test_case_dir; - const std::string conf_dir = case_dir + std::to_string(case_num) - + "/T_" + std::to_string(gtxn_id); - std::cout << "===========================================" << std::endl; - std::cout << "CASE:" << case_num << " GTXN_ID:" << gtxn_id << std::endl; - if (FLAGS_gtxn_test_debug_opened) { - const std::string& op_list_file = conf_dir + "/op_list"; - const std::string& flag_file = conf_dir + "/gtxn.flag"; - DebugOpList(op_list_file); - DebugFlagFile(flag_file); - std::cout << "Result Printing:" << std::endl; - for (auto it = result.begin(); it != result.end(); ++it) { - std::cout << "RESULT:" << *it << std::endl; - } - std::cout << "-------------------------------------------" << std::endl; - } - - VLOG(12) << "case:" << case_num - << " gtxn_id:" << gtxn_id << " Printing"; + MutexLock lock(&mu_); + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = + case_dir + std::to_string(case_num) + "/T_" + std::to_string(gtxn_id); + std::cout << "===========================================" << std::endl; + std::cout << "CASE:" << case_num << " GTXN_ID:" << gtxn_id << std::endl; + if (FLAGS_gtxn_test_debug_opened) { + const std::string& op_list_file = conf_dir + "/op_list"; + const std::string& flag_file = conf_dir + "/gtxn.flag"; + DebugOpList(op_list_file); + DebugFlagFile(flag_file); + std::cout << "Result Printing:" << std::endl; for (auto it = result.begin(); it != result.end(); ++it) { - VLOG(12) << "RESULT:" << *it; - } - - const std::string& result_list_file = conf_dir + "/result_list"; - std::vector result_list; - std::ifstream ofile(result_list_file); - std::string line; - int cnt = 0; - while (std::getline(ofile, line)) { - result_list.push_back(line); - ++cnt; - } - ofile.close(); - if (cnt < 1) { - LOG(ERROR) << "no results in result_list"; - return false; - } + std::cout << "RESULT:" << *it << std::endl; + } + std::cout << "-------------------------------------------" << std::endl; + } + + VLOG(12) << "case:" << case_num << " gtxn_id:" << gtxn_id << " Printing"; + for (auto it = result.begin(); it != result.end(); ++it) { + VLOG(12) << "RESULT:" << *it; + } + + const std::string& result_list_file = conf_dir + "/result_list"; + std::vector result_list; + std::ifstream ofile(result_list_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + result_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no results in result_list"; + return false; + } - if (result_list.size() != result.size()) { - std::cout << "\tERROR[expect_line_count: " << result_list.size() << " actual_line_count: " << result.size() << "]\n"; - return false; - } else { - int have_diff = 0; - for (size_t i = 0; i < result.size(); ++i) { - const std::string& ret = result[i]; - const std::string& default_ret = result_list[i]; - if (TrimString(ret) != TrimString(default_ret)) { - std::cout << "\tERROR[expect: (" << default_ret << ") actual: (" << ret << ")]\n"; - ++have_diff; - } - } - if (have_diff > 0) { - std::cout << "FAILED :" << have_diff << std::endl; - return false; - } - } - std::cout << "SUCCEED" << std::endl; - return true; + if (result_list.size() != result.size()) { + std::cout << "\tERROR[expect_line_count: " << result_list.size() + << " actual_line_count: " << result.size() << "]\n"; + return false; + } else { + int have_diff = 0; + for (size_t i = 0; i < result.size(); ++i) { + const std::string& ret = result[i]; + const std::string& default_ret = result_list[i]; + if (TrimString(ret) != TrimString(default_ret)) { + std::cout << "\tERROR[expect: (" << default_ret << ") actual: (" << ret << ")]\n"; + ++have_diff; + } + } + if (have_diff > 0) { + std::cout << "FAILED :" << have_diff << std::endl; + return false; + } + } + std::cout << "SUCCEED" << std::endl; + return true; } bool GlobalTxnTestTool::InitTestTables(int case_num) { - ErrorCode err; - std::unordered_map table_map; - for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { - if (case_num != -1 && case_num != it->first) { - continue; - } - std::vector& desc_list = it->second; - for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { - TableDescriptor* desc = (*dit); - const std::string& tablename = desc->TableName(); - if (table_map.find(tablename) == table_map.end()) { - table_map[tablename] = desc; - } - } - } - - for (auto& table : table_map) { - if (client_->CreateTable(*(table.second), &err) && err.GetType() == ErrorCode::kOK) { - VLOG(12) << "create table " << table.first << " ok"; - } else { - LOG(ERROR) << "create table " << table.first << " failed"; - return false; - } - } - return true; + ErrorCode err; + std::unordered_map table_map; + for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { + if (case_num != -1 && case_num != it->first) { + continue; + } + std::vector& desc_list = it->second; + for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { + TableDescriptor* desc = (*dit); + const std::string& tablename = desc->TableName(); + if (table_map.find(tablename) == table_map.end()) { + table_map[tablename] = desc; + } + } + } + + for (auto& table : table_map) { + if (client_->CreateTable(*(table.second), &err) && err.GetType() == ErrorCode::kOK) { + VLOG(12) << "create table " << table.first << " ok"; + } else { + LOG(ERROR) << "create table " << table.first << " failed"; + return false; + } + const std::string& tablename = table.first; + std::shared_ptr client_impl( + (static_cast(client_))->GetClientImpl()); + TableMeta table_meta; + TabletMetaList tablet_list; + while (true) { + if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << tablename; + continue; + } + uint64_t tablet_num = tablet_list.meta_size(); + uint64_t tablet_cnt = 0; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& tablet = tablet_list.meta(i); + if (tablet.status() == TabletMeta::kTabletReady) { + tablet_cnt++; + } + } + if (tablet_cnt == tablet_num) { + // disable finish + break; + } + usleep(10000); + } + } + return true; } bool GlobalTxnTestTool::DropTestTables(int case_num) { - ErrorCode err; - std::unordered_map table_map; - for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { - if (case_num != -1 && case_num != it->first) { - continue; - } - std::vector& desc_list = it->second; - for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { - TableDescriptor* desc = (*dit); - const std::string& tablename = desc->TableName(); - if (table_map.find(tablename) == table_map.end()) { - table_map[tablename] = desc; - } - } - } - - for (auto& table : table_map) { - const std::string& tablename = table.first; - if (!client_->DisableTable(tablename, &err)) { - LOG(ERROR) << "disable table failed, table: " << tablename; - return false; - } - TableMeta table_meta; - TabletMetaList tablet_list; - tera::ClientImpl* client_impl = static_cast(client_); - if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { - LOG(ERROR) << "table not exist: " << tablename; - return false; - } - - uint64_t tablet_num = tablet_list.meta_size(); - while (true) { - if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { - LOG(ERROR) << "table not exist: " << tablename; - return false; - } - uint64_t tablet_cnt = 0; - for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - const TabletMeta& tablet = tablet_list.meta(i); - if (tablet.status() == TabletMeta::kTabletDisable || - tablet.status() == TabletMeta::kTabletOffline) { - tablet_cnt++; - } - } - if (tablet_cnt == tablet_num) { - // disable finish - break; - } - sleep(1); - } - - if (!client_->DropTable(tablename, &err)) { - LOG(ERROR) << "drop table " << tablename << " failed"; - return false; - } - } - return true; + ErrorCode err; + std::unordered_map table_map; + for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { + if (case_num != -1 && case_num != it->first) { + continue; + } + std::vector& desc_list = it->second; + for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { + TableDescriptor* desc = (*dit); + const std::string& tablename = desc->TableName(); + if (table_map.find(tablename) == table_map.end()) { + table_map[tablename] = desc; + } + } + } + + for (auto& table : table_map) { + const std::string& tablename = table.first; + std::shared_ptr client_impl( + (static_cast(client_))->GetClientImpl()); + TableMeta table_meta; + TabletMetaList tablet_list; + while (true) { + if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << tablename; + return false; + } + if (table_meta.status() != kTableDisable) { + client_->DisableTable(tablename, &err); + usleep(10000); + } else { + LOG(INFO) << "disable table succ, table: " << tablename; + break; + } + } + uint64_t tablet_num = tablet_list.meta_size(); + while (true) { + if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << tablename; + continue; + } + uint64_t tablet_cnt = 0; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& tablet = tablet_list.meta(i); + if (tablet.status() == TabletMeta::kTabletDisable || + tablet.status() == TabletMeta::kTabletOffline) { + tablet_cnt++; + } + } + if (tablet_cnt == tablet_num) { + // disable finish + break; + } + usleep(10000); + } + + if (!client_->DropTable(tablename, &err)) { + LOG(ERROR) << "drop table " << tablename << " failed"; + continue; + } + } + return true; } void GlobalTxnTestTool::Wait() { - while(do_cnt_.Get() > done_cnt_.Get()) { - sleep(1); - } + while (do_cnt_.Get() > done_cnt_.Get()) { + sleep(1); + } } void GlobalTxnTestTool::RunCaseOneByOne() { - std::set cases; - for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { - CasePair case_pair = *it; - int case_num = case_pair.first; - cases.insert(case_num); - } - for (auto& case_num : cases) { - LOG(INFO) << "GlobalTxnTest Case " << case_num << " Begin"; - // drop table - if (FLAGS_gtxn_test_drop_table_before) { - DropTestTables(case_num); - } - - if (!InitTestTables(case_num)) { - LOG(ERROR) << "GlobalTxnTest Case " << case_num - << " InitTestTables Failed"; - if (FLAGS_ignore_bad_case == true) { - continue; - } else { - break; - } - } - RunTest(client_, case_num); - Wait(); - LOG(INFO) << "GlobalTxnTest Case " << case_num << " Finish"; - if (done_fail_cnt_.Get() > 0) { - if (FLAGS_ignore_bad_case == true) { - continue; - } else { - break; - } - } - } + std::set cases; + for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { + CasePair case_pair = *it; + int case_num = case_pair.first; + cases.insert(case_num); + } + for (auto& case_num : cases) { + LOG(INFO) << "GlobalTxnTest Case " << case_num << " Begin"; + // drop table + if (FLAGS_gtxn_test_drop_table_before) { + DropTestTables(case_num); + } + if (!InitTestTables(case_num)) { + LOG(ERROR) << "GlobalTxnTest Case " << case_num << " InitTestTables Failed"; + if (FLAGS_ignore_bad_case == true) { + continue; + } else { + break; + } + } + RunTest(client_, case_num); + Wait(); + LOG(INFO) << "GlobalTxnTest Case " << case_num << " Finish"; + if (done_fail_cnt_.Get() > 0) { + if (FLAGS_ignore_bad_case == true) { + continue; + } else { + break; + } + } + } } -} // namespace tera - - -int main(int argc, char *argv[]){ - ::google::ParseCommandLineFlags(&argc, &argv, true); - - if (argc > 1 && std::string(argv[1]) == "version") { - PrintSystemVersion(); - return 0; - } - if (FLAGS_gtxn_test_conf_dir == "") { - LOG(ERROR) << "not set \"--gtxn_test_conf_dir\""; - return -1; - } - if (FLAGS_gtxn_test_case_dir == "") { - LOG(ERROR) << "not set \"--gtxn_test_case_dir\""; - return -1; - } - - tera::ErrorCode error_code; - tera::Client* client = tera::Client::NewClient(FLAGS_gtxn_test_conf_dir + "/tera.flag", - &error_code); - if (client == NULL) { - return -1; - } - - tera::GlobalTxnTestTool gtxn_test_tool(client); - // init table - if (!gtxn_test_tool.LoadTestConf()) { - return -1; - } - gtxn_test_tool.RunCaseOneByOne(); +} // namespace tera + +int main(int argc, char* argv[]) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); return 0; + } + if (FLAGS_gtxn_test_conf_dir == "") { + LOG(ERROR) << "not set \"--gtxn_test_conf_dir\""; + return -1; + } + if (FLAGS_gtxn_test_case_dir == "") { + LOG(ERROR) << "not set \"--gtxn_test_case_dir\""; + return -1; + } + + tera::ErrorCode error_code; + tera::Client* client = + tera::Client::NewClient(FLAGS_gtxn_test_conf_dir + "/tera.flag", &error_code); + if (client == NULL) { + LOG(ERROR) << "Init Client Failed, Check " << FLAGS_gtxn_test_conf_dir << "/tera.flag"; + return -1; + } + + LOG(INFO) << "Start Load GlobalTxnTest Case " << FLAGS_case_number; + + tera::GlobalTxnTestTool gtxn_test_tool(client); + + if (!gtxn_test_tool.LoadTestConf()) { + return -1; + } + + LOG(INFO) << "Start Run GlobalTxnTest Case " << FLAGS_case_number; + gtxn_test_tool.RunCaseOneByOne(); + LOG(INFO) << "Run GlobalTxnTest Case " << FLAGS_case_number << " Succ"; + return 0; } diff --git a/src/sdk/test/global_txn_test_tool.h b/src/sdk/test/global_txn_test_tool.h index 7acf12644..4a03d2f61 100644 --- a/src/sdk/test/global_txn_test_tool.h +++ b/src/sdk/test/global_txn_test_tool.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ -#define TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ +#ifndef TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ +#define TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ #include @@ -17,79 +17,75 @@ enum OpType { GET, PUT, DEL }; class GlobalTxnTestTool; - struct GTxnTestContext { - GlobalTxnTestTool* tool; - tera::Transaction* gtxn; - std::vector op_list; - std::vector result; - std::vector::iterator it; - int case_num; - int gtxn_id; + GlobalTxnTestTool* tool; + tera::Transaction* gtxn; + std::vector op_list; + std::vector result; + std::vector::iterator it; + int case_num; + int gtxn_id; }; class GlobalTxnTestTool { -public: - GlobalTxnTestTool(Client* client); - ~GlobalTxnTestTool(){} + public: + GlobalTxnTestTool(Client* client); + ~GlobalTxnTestTool() {} + + bool LoadTestConf(); + + bool InitTestTables(int case_num = -1); - bool LoadTestConf(); + bool DropTestTables(int case_num = -1); - bool InitTestTables(int case_num = -1); + void RunTest(tera::Client* client, int case_num = -1); - bool DropTestTables(int case_num = -1); + void Wait(); - void RunTest(tera::Client* client, int case_num = -1); + void RunCaseOneByOne(); - void Wait(); + private: + void RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, + const std::vector& op_list); - void RunCaseOneByOne(); -private: - void RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, - const std::vector& op_list); + void CaseRegister(const int case_num, const int gtxn_id); - void CaseRegister(const int case_num, const int gtxn_id); + bool LoadDescriptor(const std::string& schema_file, TableDescriptor* schema); - bool LoadDescriptor(const std::string& schema_file, TableDescriptor* schema); + void DebugOpList(const std::string& op_list_file); - void DebugOpList(const std::string& op_list_file); + void DebugFlagFile(const std::string& flag_file); - void DebugFlagFile(const std::string& flag_file); - - bool CheckResult(const int case_num, const int gtxn_id, - const std::vector& result); + bool CheckResult(const int case_num, const int gtxn_id, const std::vector& result); - bool ParseOp(const std::string& op_str, - OpType* op_type, std::vector* op_args); + bool ParseOp(const std::string& op_str, OpType* op_type, std::vector* op_args); - bool DoOp(tera::Transaction* gtxn, - const OpType& op_type, - const std::vector& op_args, - std::vector* result); + bool DoOp(tera::Transaction* gtxn, const OpType& op_type, const std::vector& op_args, + std::vector* result); - void DoOpAsync(GTxnTestContext* ctx, const OpType& op_type, - const std::vector& op_args); + void DoOpAsync(GTxnTestContext* ctx, const OpType& op_type, + const std::vector& op_args); - void DoOpAsyncCallback(tera::RowReader* r); + void DoOpAsyncCallback(tera::RowReader* r); - void DoCommitCallback(tera::Transaction* t); + void DoCommitCallback(tera::Transaction* t); - bool OpenTestTables(const std::vector& tables); + bool OpenTestTables(const std::vector& tables); -private: - typedef std::pair CasePair; - std::vector case_list_; - typedef std::map> CaseDescMap; - CaseDescMap case_desc_map_; - std::map tables_; - mutable Mutex mu_; - common::ThreadPool thread_pool_; - Client* client_; - Counter do_cnt_; - Counter done_cnt_; - Counter done_fail_cnt_; + private: + typedef std::pair CasePair; + std::vector case_list_; + typedef std::map> CaseDescMap; + CaseDescMap case_desc_map_; + std::map tables_; + mutable Mutex mu_; + common::ThreadPool thread_pool_; + Client* client_; + Counter do_cnt_; + Counter done_cnt_; + Counter done_fail_cnt_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ diff --git a/src/sdk/test/global_txn_testutils.cc b/src/sdk/test/global_txn_testutils.cc index c615489d7..6a5d523a2 100644 --- a/src/sdk/test/global_txn_testutils.cc +++ b/src/sdk/test/global_txn_testutils.cc @@ -11,7 +11,8 @@ #include "common/timer.h" DEFINE_bool(tera_gtxn_test_opened, false, "for test gtxn opened"); -DEFINE_bool(tera_gtxn_test_isolation_snapshot, true, "true means Snapshot, false means ReadCommitedSnapshot"); +DEFINE_bool(tera_gtxn_test_isolation_snapshot, true, + "true means Snapshot, false means ReadCommitedSnapshot"); DEFINE_string(tera_gtxn_test_flagfile, "", "gtxn test flagfile"); DEFINE_int64(start_ts, 1, "start ts"); DEFINE_int64(begin_commit_ts, 0, "time to wait before begin commit"); @@ -27,152 +28,146 @@ namespace tera { constexpr int64_t kMillisPerSecond = 1000L; -GlobalTxnTestHelper::GlobalTxnTestHelper(const std::string& conffile) : - pos_(0), get_pos_(0), conf_file_(conffile), - start_ts_(0), prewrite_start_ts_(0), commit_ts_(0), - helper_create_time_(get_millis()) { -} +GlobalTxnTestHelper::GlobalTxnTestHelper(const std::string& conffile) + : pos_(0), + get_pos_(0), + conf_file_(conffile), + start_ts_(0), + prewrite_start_ts_(0), + commit_ts_(0), + helper_create_time_(get_millis()) {} void GlobalTxnTestHelper::LoadTxnConf() { - utils::LoadFlagFile(conf_file_); - ts_[0] = FLAGS_start_ts; - start_ts_ = FLAGS_start_ts; - ts_[1] = FLAGS_begin_commit_ts; - ts_[2] = FLAGS_begin_prewrite_ts; - ts_[3] = FLAGS_end_prewrite_ts; - ts_[4] = FLAGS_commit_ts; - ts_[5] = FLAGS_begin_primary_commit_ts; - ts_[6] = FLAGS_end_primary_commit_ts; - ts_[7] = FLAGS_begin_other_commit_ts; - VLOG(13) << "split get wait ts list begin..."; - SplitString(FLAGS_get_wait_ts_list, ",", &get_ts_list_); - for (auto item : get_ts_list_) { - VLOG(13) << item; - } - VLOG(13) << "split get wait ts list done"; - // if isolation_level == ReadCommitedSnapshot - if (!FLAGS_tera_gtxn_test_isolation_snapshot) { - prewrite_start_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts; - } else { - prewrite_start_ts_ = start_ts_; - } - commit_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts - + FLAGS_end_prewrite_ts + FLAGS_commit_ts; - if (commit_ts_ <= prewrite_start_ts_) { - commit_ts_ = prewrite_start_ts_ + 1; - } - Wait(ts_[0]); + utils::LoadFlagFile(conf_file_); + ts_[0] = FLAGS_start_ts; + start_ts_ = FLAGS_start_ts; + ts_[1] = FLAGS_begin_commit_ts; + ts_[2] = FLAGS_begin_prewrite_ts; + ts_[3] = FLAGS_end_prewrite_ts; + ts_[4] = FLAGS_commit_ts; + ts_[5] = FLAGS_begin_primary_commit_ts; + ts_[6] = FLAGS_end_primary_commit_ts; + ts_[7] = FLAGS_begin_other_commit_ts; + VLOG(13) << "split get wait ts list begin..."; + SplitString(FLAGS_get_wait_ts_list, ",", &get_ts_list_); + for (auto item : get_ts_list_) { + VLOG(13) << item; + } + VLOG(13) << "split get wait ts list done"; + // if isolation_level == ReadCommitedSnapshot + if (!FLAGS_tera_gtxn_test_isolation_snapshot) { + prewrite_start_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts; + } else { + prewrite_start_ts_ = start_ts_; + } + commit_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts + + FLAGS_end_prewrite_ts + FLAGS_commit_ts; + if (commit_ts_ <= prewrite_start_ts_) { + commit_ts_ = prewrite_start_ts_ + 1; + } + Wait(ts_[0]); } -int64_t GlobalTxnTestHelper::GetStartTs() { - return start_ts_; -} +int64_t GlobalTxnTestHelper::GetStartTs() { return start_ts_; } -int64_t GlobalTxnTestHelper::GetPrewriteStartTs() { - return prewrite_start_ts_; -} +int64_t GlobalTxnTestHelper::GetPrewriteStartTs() { return prewrite_start_ts_; } -int64_t GlobalTxnTestHelper::GetCommitTs() { - return commit_ts_; -} +int64_t GlobalTxnTestHelper::GetCommitTs() { return commit_ts_; } void GlobalTxnTestHelper::GetWait(int64_t start_ts) { - if (get_ts_list_.size() == 0) { - // don't wait - VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater immediate"; + if (get_ts_list_.size() == 0) { + // don't wait + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater immediate"; + } else { + // get operaters in 'get_ts_list' will wait by 'get_ts_list' set, + // not in get_ts_list will immediate GET after the last 'get_ts_list' item + // finished + if (get_pos_ < get_ts_list_.size()) { + int64_t now_millis = tera::get_millis(); + int64_t def_wait_time = stol(get_ts_list_[get_pos_]) * kMillisPerSecond; + int64_t wait_time = helper_create_time_ + def_wait_time - now_millis; + VLOG(13) << "get_pos_:" << get_pos_ << " now_millis:" << now_millis + << " def_wait_time:" << def_wait_time << " size:" << get_ts_list_.size() + << " wait_time:" << wait_time; + if (wait_time > 0) { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" << (get_pos_ + 1) + << ") after" << wait_time << " ms."; + ThisThread::Sleep(wait_time); + } else { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" << (get_pos_ + 1) + << ") immediate"; + } } else { - // get operaters in 'get_ts_list' will wait by 'get_ts_list' set, - // not in get_ts_list will immediate GET after the last 'get_ts_list' item finished - if (get_pos_ < get_ts_list_.size()) { - int64_t now_millis = tera::get_millis(); - int64_t def_wait_time = stol(get_ts_list_[get_pos_]) * kMillisPerSecond; - int64_t wait_time = helper_create_time_ + def_wait_time - now_millis; - VLOG(13) << "get_pos_:" << get_pos_ - << " now_millis:" << now_millis - << " def_wait_time:" << def_wait_time - << " size:" << get_ts_list_.size() - << " wait_time:" << wait_time; - if (wait_time > 0) { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" - << (get_pos_ + 1) << ") after" << wait_time << " ms."; - ThisThread::Sleep(wait_time); - } else { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" - << (get_pos_ + 1) << ") immediate"; - } - } else { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" - << (get_pos_ + 1) << ") immediate"; - } - get_pos_++; + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" << (get_pos_ + 1) + << ") immediate"; } + get_pos_++; + } } void GlobalTxnTestHelper::Wait(int64_t start_ts) { - int wait_position = pos_++; - int64_t* info = ts_; - int64_t now_micros = tera::get_micros(); - if (wait_position == 0) { - PrintLog(start_ts, "begin txn", info[wait_position + 1]); - } else { - if (info[wait_position] == -1) { - ExitNow(start_ts, wait_position); - } - int64_t should_wait = info[wait_position] * 1000000L + info[wait_position - 1]; - if (should_wait - now_micros > 10) { - ThisThread::Sleep((should_wait - now_micros) / 1000L); - } else if (info[wait_position] == 0) { - // nothing to do - } else if (should_wait < now_micros) { - LOG(ERROR) << "[gtxn_helper] [" << start_ts << "] txn run timeout, exited"; - _Exit(0); - } - switch (wait_position) { - case 1: - PrintLog(start_ts, "begin commit", info[wait_position + 1]); - break; - case 2: - PrintLog(start_ts, "begin prewrite", info[wait_position + 1]); - break; - case 3: - PrintLog(start_ts, "end prewrite", info[wait_position + 1]); - break; - case 4: - PrintLog(start_ts, "begin real commit", info[wait_position + 1]); - break; - case 5: - PrintLog(start_ts, "begin primary commit", info[wait_position + 1]); - break; - case 6: - PrintLog(start_ts, "end primary commit", info[wait_position + 1]); - break; - case 7: - PrintLog(start_ts, "begin other commit"); - break; - default: - LOG(ERROR) << "overflow position"; - _Exit(0); - } + int wait_position = pos_++; + int64_t* info = ts_; + int64_t now_micros = tera::get_micros(); + if (wait_position == 0) { + PrintLog(start_ts, "begin txn", info[wait_position + 1]); + } else { + if (info[wait_position] == -1) { + ExitNow(start_ts, wait_position); + } + int64_t should_wait = info[wait_position] * 1000000L + info[wait_position - 1]; + if (should_wait - now_micros > 10) { + ThisThread::Sleep((should_wait - now_micros) / 1000L); + } else if (info[wait_position] == 0) { + // nothing to do + } else if (should_wait < now_micros) { + LOG(ERROR) << "[gtxn_helper] [" << start_ts << "] txn run timeout, exited"; + _Exit(0); + } + switch (wait_position) { + case 1: + PrintLog(start_ts, "begin commit", info[wait_position + 1]); + break; + case 2: + PrintLog(start_ts, "begin prewrite", info[wait_position + 1]); + break; + case 3: + PrintLog(start_ts, "end prewrite", info[wait_position + 1]); + break; + case 4: + PrintLog(start_ts, "begin real commit", info[wait_position + 1]); + break; + case 5: + PrintLog(start_ts, "begin primary commit", info[wait_position + 1]); + break; + case 6: + PrintLog(start_ts, "end primary commit", info[wait_position + 1]); + break; + case 7: + PrintLog(start_ts, "begin other commit"); + break; + default: + LOG(ERROR) << "overflow position"; + _Exit(0); } - info[wait_position] = tera::get_micros(); - return; + } + info[wait_position] = tera::get_micros(); + return; } void GlobalTxnTestHelper::ExitNow(int64_t start_ts, int position) { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] exit @ position=" << position; - _Exit(0); // for simulate test gtxn stop at anywhere + VLOG(13) << "[gtxn_helper] [" << start_ts << "] exit @ position=" << position; + _Exit(0); // for simulate test gtxn stop at anywhere } -void GlobalTxnTestHelper::PrintLog(int64_t start_ts, - const std::string& log_str, +void GlobalTxnTestHelper::PrintLog(int64_t start_ts, const std::string& log_str, int64_t next_wait_time) { - if (next_wait_time == -1) { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str << ", txn will be done."; - } else { - VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str - << ", next step will begin after [" << next_wait_time << "s]"; - } + if (next_wait_time == -1) { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str << ", txn will be done."; + } else { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str << ", next step will begin after [" + << next_wait_time << "s]"; + } } -} // namespace tera - +} // namespace tera diff --git a/src/sdk/test/global_txn_testutils.h b/src/sdk/test/global_txn_testutils.h index 278ef8e68..d6528ff1c 100644 --- a/src/sdk/test/global_txn_testutils.h +++ b/src/sdk/test/global_txn_testutils.h @@ -2,40 +2,38 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ -#define TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ +#ifndef TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ +#define TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ #include namespace tera { class GlobalTxnTestHelper { -public: - GlobalTxnTestHelper(const std::string& conffile); - ~GlobalTxnTestHelper(){} - int64_t GetStartTs(); - int64_t GetPrewriteStartTs(); - int64_t GetCommitTs(); - void Wait(int64_t start_ts); - void GetWait(int64_t start_ts); - void LoadTxnConf(); -private: - - void ExitNow(int64_t start_ts, int position); - void PrintLog(int64_t start_ts, - const std::string& log_str, - int64_t next_wait_time = -1); - int pos_; - size_t get_pos_; - std::string conf_file_; - int64_t start_ts_; - int64_t prewrite_start_ts_; - int64_t commit_ts_; - int64_t ts_[8]; - std::vector get_ts_list_; - int64_t helper_create_time_; + public: + GlobalTxnTestHelper(const std::string& conffile); + ~GlobalTxnTestHelper() {} + int64_t GetStartTs(); + int64_t GetPrewriteStartTs(); + int64_t GetCommitTs(); + void Wait(int64_t start_ts); + void GetWait(int64_t start_ts); + void LoadTxnConf(); + + private: + void ExitNow(int64_t start_ts, int position); + void PrintLog(int64_t start_ts, const std::string& log_str, int64_t next_wait_time = -1); + int pos_; + size_t get_pos_; + std::string conf_file_; + int64_t start_ts_; + int64_t prewrite_start_ts_; + int64_t commit_ts_; + int64_t ts_[8]; + std::vector get_ts_list_; + int64_t helper_create_time_; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ diff --git a/src/sdk/test/hash_table_test.cc b/src/sdk/test/hash_table_test.cc new file mode 100644 index 000000000..9cbec3721 --- /dev/null +++ b/src/sdk/test/hash_table_test.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: tianye15@baidu.com + +#include +#include + +#include "sdk/sdk_utils.h" +#include "tera.h" +#include "sdk/scan_impl.h" +#include "gtest/gtest.h" +#include "mock_table.h" + +using std::string; +using std::vector; + +namespace tera { +class HashTableTest : public ::testing::Test { + public: + HashTableTest() {} + ~HashTableTest() {} + std::function hash_method; + MockHashTable hash_table_; +}; + +TEST_F(HashTableTest, MutateReaderTest) { + auto mu = dynamic_cast(hash_table_.NewRowMutation("Happy Water")); + EXPECT_EQ(mu->RowKey(), std::string("Happy Water")); + EXPECT_EQ(mu->InternalRowKey(), hash_table_.hash_method_("Happy Water")); + auto rd = dynamic_cast(hash_table_.NewRowReader("Happy Water")); + EXPECT_EQ(rd->RowKey(), std::string("Happy Water")); + EXPECT_EQ(rd->InternalRowKey(), hash_table_.hash_method_("Happy Water")); + mu->Reset("coffee"); + EXPECT_EQ(mu->RowKey(), std::string("coffee")); + EXPECT_EQ(mu->InternalRowKey(), hash_table_.hash_method_("coffee")); + hash_table_.is_hash_table_ = false; + delete mu; + delete rd; + mu = dynamic_cast(hash_table_.NewRowMutation("Happy Water")); + EXPECT_EQ(mu->RowKey(), std::string("Happy Water")); + EXPECT_EQ(mu->InternalRowKey(), std::string("Happy Water")); + rd = dynamic_cast(hash_table_.NewRowReader("Happy Water")); + EXPECT_EQ(rd->RowKey(), std::string("Happy Water")); + EXPECT_EQ(rd->InternalRowKey(), std::string("Happy Water")); + delete mu; + delete rd; +} + +TEST_F(HashTableTest, ScanTest) { + ScanDescriptor desc("coffee"); + desc.SetEnd("Water"); + ErrorCode err; + auto stream_async_hash = dynamic_cast(hash_table_.Scan(desc, &err)); + auto* desc_impl = stream_async_hash->GetScanDesc(); + EXPECT_EQ(desc_impl->GetStartRowKey(), hash_table_.hash_method_("coffee")); + EXPECT_EQ(desc_impl->GetEndRowKey(), hash_table_.hash_method_("Water")); + hash_table_.is_hash_table_ = false; + auto stream_async_normal = dynamic_cast(hash_table_.Scan(desc, &err)); + desc_impl = stream_async_normal->GetScanDesc(); + EXPECT_EQ(desc_impl->GetStartRowKey(), std::string("coffee")); + EXPECT_EQ(desc_impl->GetEndRowKey(), std::string("Water")); +} + +TEST_F(HashTableTest, GenerateHashDelimiters) { + vector delimiters; + GenerateHashDelimiters(2, &delimiters); + EXPECT_EQ(delimiters.size(), 1); + EXPECT_EQ(delimiters[0], std::string{"7fffffffffffffff"}); + GenerateHashDelimiters(1, &delimiters); + EXPECT_TRUE(delimiters.empty()); + GenerateHashDelimiters(-2029, &delimiters); + EXPECT_TRUE(delimiters.empty()); + GenerateHashDelimiters(32, &delimiters); + EXPECT_EQ(delimiters.size(), 31); + EXPECT_EQ(std::stoul(delimiters[2], 0, 16) - std::stoul(delimiters[1], 0, 16), + std::stoul(delimiters[1], 0, 16) - std::stoul(delimiters[0], 0, 16)); + EXPECT_EQ( + 0xFFFFFFFFFFFFFFFFul / (std::stoul(delimiters[2], 0, 16) - std::stoul(delimiters[1], 0, 16)), + 32); +} + +TEST_F(HashTableTest, MurmurhashMethodTest) { + EXPECT_EQ(hash_table_.GetHashMethod()("2UhGxUShBZr1ZBIJqz8g"), + std::string{"91e5c5cc21866b182UhGxUShBZr1ZBIJqz8g"}); + EXPECT_EQ(hash_table_.GetHashMethod()("192zw9vc84dQzR2ptLCm"), + std::string{"0bb8c3e200317607192zw9vc84dQzR2ptLCm"}); + EXPECT_EQ(hash_table_.GetHashMethod()("H4alYbi4yMfyNnDBtt7m"), + std::string{"184b87380d6b1877H4alYbi4yMfyNnDBtt7m"}); + EXPECT_EQ(hash_table_.GetHashMethod()("96SIlaNgrQYyuVjt2Mbc"), + std::string{"7e66efcac7a4de6996SIlaNgrQYyuVjt2Mbc"}); + EXPECT_EQ(hash_table_.GetHashMethod()("0FTkCOZmZcJRaacH9mLX"), + std::string{"07832f92ed43e9740FTkCOZmZcJRaacH9mLX"}); + EXPECT_EQ(hash_table_.GetHashMethod()("wjwMOOh77asWARPKPZBl"), + std::string{"ab159d1c102d398fwjwMOOh77asWARPKPZBl"}); + EXPECT_EQ(hash_table_.GetHashMethod()("KBnFAGwiJRBjrKE4hR9h"), + std::string{"102bd69736f79785KBnFAGwiJRBjrKE4hR9h"}); + EXPECT_EQ(hash_table_.GetHashMethod()("76IfzK8E9y5IF41d0N4J"), + std::string{"9de642f80be247c676IfzK8E9y5IF41d0N4J"}); + EXPECT_EQ(hash_table_.GetHashMethod()("lcrQ3nWyzfNE7TG7i4nl"), + std::string{"ede00a2cbdcd1d63lcrQ3nWyzfNE7TG7i4nl"}); + EXPECT_EQ(hash_table_.GetHashMethod()("3G4XIeg6L963ws670jwe"), + std::string{"9debe002db27ca393G4XIeg6L963ws670jwe"}); + EXPECT_EQ(hash_table_.GetHashMethod()("bhyKndJqNFLDCtmcg4g7"), + std::string{"cc78b8dd79999489bhyKndJqNFLDCtmcg4g7"}); + EXPECT_EQ(hash_table_.GetHashMethod()("qwibdpkqfa5MOuwYiN4g"), + std::string{"e2d20de35e14606eqwibdpkqfa5MOuwYiN4g"}); + EXPECT_EQ(hash_table_.GetHashMethod()("IoZl7jnqJKp4fYwaamAZ"), + std::string{"9d635c9af1e44681IoZl7jnqJKp4fYwaamAZ"}); + EXPECT_EQ(hash_table_.GetHashMethod()("bRttFpTudIQk1OJv3oyQ"), + std::string{"124538f8da7a6f84bRttFpTudIQk1OJv3oyQ"}); + EXPECT_EQ(hash_table_.GetHashMethod()("ap2C59zqWvRyRx2OUrs2"), + std::string{"986eb90637360c00ap2C59zqWvRyRx2OUrs2"}); + EXPECT_EQ(hash_table_.GetHashMethod()("yIbxvdQdS8TBSD4tJXlT"), + std::string{"87b9f235973943c2yIbxvdQdS8TBSD4tJXlT"}); + EXPECT_EQ(hash_table_.GetHashMethod()("Vh0Q2CxhXR0VRu4AtVG9"), + std::string{"f322fa051195cdbdVh0Q2CxhXR0VRu4AtVG9"}); + EXPECT_EQ(hash_table_.GetHashMethod()("T5M7Hu3SuZoPGbN8nIHa"), + std::string{"983bf8eee743fa1bT5M7Hu3SuZoPGbN8nIHa"}); + EXPECT_EQ(hash_table_.GetHashMethod()("Sb2CcBfN21pGlUmiSHhK"), + std::string{"30cb6b5fb2525f05Sb2CcBfN21pGlUmiSHhK"}); + EXPECT_EQ(hash_table_.GetHashMethod()("qKHg43YHRyUblS9oUXBT"), + std::string{"1d0c8442749505ceqKHg43YHRyUblS9oUXBT"}); + EXPECT_EQ(hash_table_.GetHashMethod()("w4XgzUg1gRectsjM4aEp"), + std::string{"31642d6f4f6a2d67w4XgzUg1gRectsjM4aEp"}); + EXPECT_EQ(hash_table_.GetHashMethod()("0SbYixWcXXcHGNLN54c3"), + std::string{"aa6cbe0341e42f830SbYixWcXXcHGNLN54c3"}); + EXPECT_EQ(hash_table_.GetHashMethod()("vZJ9zTljUr6QFku7EkCq"), + std::string{"860600849ef6838fvZJ9zTljUr6QFku7EkCq"}); + EXPECT_EQ(hash_table_.GetHashMethod()("VarWftftOIB6bsGH9hOF"), + std::string{"8e785c31f128a833VarWftftOIB6bsGH9hOF"}); + EXPECT_EQ(hash_table_.GetHashMethod()("V80PBkjgU7oJ8GYM7d7M"), + std::string{"210dd5d1f5e839efV80PBkjgU7oJ8GYM7d7M"}); + EXPECT_EQ(hash_table_.GetHashMethod()("VJeTDjiO4kBxudvWAWIp"), + std::string{"5e76614754cfcd5bVJeTDjiO4kBxudvWAWIp"}); + EXPECT_EQ(hash_table_.GetHashMethod()("AJUm9zqDsA4UQnxQ6SGh"), + std::string{"f0c5c28cd56398f6AJUm9zqDsA4UQnxQ6SGh"}); + EXPECT_EQ(hash_table_.GetHashMethod()("g9lL6kWChRtRk85rUO98"), + std::string{"272ba12c212a8844g9lL6kWChRtRk85rUO98"}); + EXPECT_EQ(hash_table_.GetHashMethod()("e5bv8EmZOR1UpBbN4Eh9"), + std::string{"f189f6f85893524be5bv8EmZOR1UpBbN4Eh9"}); + EXPECT_EQ(hash_table_.GetHashMethod()("K3Ny3yiZJROTY15Imrca"), + std::string{"fadeaf75c024c176K3Ny3yiZJROTY15Imrca"}); +} +} // namespace tera diff --git a/src/sdk/test/mock_table.h b/src/sdk/test/mock_table.h index 3f781412e..5380d90cf 100644 --- a/src/sdk/test/mock_table.h +++ b/src/sdk/test/mock_table.h @@ -4,8 +4,8 @@ // // Author: baorenyi@baidu.com -#ifndef TERA_SDK_TEST_MOCK_TABLE_H_ -#define TERA_SDK_TEST_MOCK_TABLE_H_ +#ifndef TERA_SDK_TEST_MOCK_TABLE_H_ +#define TERA_SDK_TEST_MOCK_TABLE_H_ #include #include @@ -21,64 +21,73 @@ namespace tera { struct MockReaderResult { - RowResult result; - ErrorCode status; + RowResult result; + ErrorCode status; }; -class MockTable: public TableImpl { -public: - MockTable(const std::string& table_name, - common::ThreadPool* thread_pool) - : TableImpl(table_name, thread_pool, std::shared_ptr()), +class MockTable : public TableImpl { + public: + MockTable(const std::string& table_name, common::ThreadPool* thread_pool) + : TableImpl(table_name, thread_pool, std::shared_ptr()), thread_pool_(thread_pool) { - reader_err_.clear(); - mu_err_.clear(); - reader_pos_ = 0; - mu_pos_ = 0; - } + reader_err_.clear(); + mu_err_.clear(); + reader_pos_ = 0; + mu_pos_ = 0; + } - void AddDelayTask(int64_t delay_time, ThreadPool::Task& task) { - thread_pool_->DelayTask(delay_time, task); - } + void AddDelayTask(int64_t delay_time, ThreadPool::Task& task) { + thread_pool_->DelayTask(delay_time, task); + } - void ApplyMutation(RowMutation* row_mu) { - RowMutationImpl* mu = static_cast(row_mu); - mu->SetError(mu_err_[mu_pos_++].GetType(),""); - mu->RunCallback(); - } + void ApplyMutation(RowMutation* row_mu) { + RowMutationImpl* mu = static_cast(row_mu); + mu->SetError(mu_err_[mu_pos_++].GetType(), ""); + mu->RunCallback(); + } - void Get(RowReader* reader) { - RowReaderImpl* r = static_cast(reader); - if (reader_result_.size() > 0) { - r->SetResult(reader_result_[reader_pos_].result); - r->SetError(reader_result_[reader_pos_++].status.GetType(), ""); - } else { - r->SetError(reader_err_[reader_pos_++].GetType(), ""); - } - r->RunCallback(); + void Get(RowReader* reader) { + RowReaderImpl* r = static_cast(reader); + if (reader_result_.size() > 0) { + r->SetResult(reader_result_[reader_pos_].result); + r->SetError(reader_result_[reader_pos_++].status.GetType(), ""); + } else { + r->SetError(reader_err_[reader_pos_++].GetType(), ""); } + r->RunCallback(); + } - void AddReaderResult(const std::vector& results) { - reader_result_.insert(reader_result_.end(), - results.begin(), results.end()); - } - - void AddReaderErrors(const std::vector& errs) { - reader_err_.insert(reader_err_.end(), errs.begin(), errs.end()); - } + void AddReaderResult(const std::vector& results) { + reader_result_.insert(reader_result_.end(), results.begin(), results.end()); + } - void AddMutationErrors(const std::vector& errs) { - mu_err_.insert(mu_err_.end(), errs.begin(), errs.end()); - } -private: - common::ThreadPool* thread_pool_; - std::vector reader_err_; - std::vector mu_err_; - std::vector reader_result_; - int reader_pos_; - int mu_pos_; + void AddReaderErrors(const std::vector& errs) { + reader_err_.insert(reader_err_.end(), errs.begin(), errs.end()); + } + + void AddMutationErrors(const std::vector& errs) { + mu_err_.insert(mu_err_.end(), errs.begin(), errs.end()); + } + + private: + common::ThreadPool* thread_pool_; + std::vector reader_err_; + std::vector mu_err_; + std::vector reader_result_; + int reader_pos_; + int mu_pos_; +}; + +class MockHashTable : public TableImpl { + public: + MockHashTable() : TableImpl("", new ThreadPool, std::shared_ptr()) {} + + void ScanTabletAsync(ResultStreamImpl* stream) override { return; } + + bool IsHashTable() override { return is_hash_table_; } + bool is_hash_table_{true}; }; -} // namespace tera +} // namespace tera #endif // TERA_SDK_TEST_MOCK_TABLE_H_ diff --git a/src/sdk/test/scan_impl_test.cc b/src/sdk/test/scan_impl_test.cc index 475e2ff1c..5da6393a3 100644 --- a/src/sdk/test/scan_impl_test.cc +++ b/src/sdk/test/scan_impl_test.cc @@ -13,89 +13,38 @@ using std::string; namespace tera { class ScanDescImplTest : public ::testing::Test, public ScanDescImpl { -public: - ScanDescImplTest() : ScanDescImpl("row") { - CreateSchema(); - SetTableSchema(table_schema_); - } - - ~ScanDescImplTest() {} - - void CreateSchema() { - table_schema_.set_name("linkcache"); - LocalityGroupSchema* lg = table_schema_.add_locality_groups(); - lg->set_name("lg0"); - ColumnFamilySchema* cf = table_schema_.add_column_families(); - cf->set_name("cf0"); - cf->set_locality_group("lg0"); - cf->set_type("int32"); - - cf = table_schema_.add_column_families(); - cf->set_name("cf1"); - cf->set_locality_group("lg0"); - cf->set_type("uint64"); - - cf = table_schema_.add_column_families(); - cf->set_name("cf2"); - cf->set_locality_group("lg0"); - cf->set_type("binary"); - } - - const TableSchema& GetSchema() const { - return table_schema_; - } - -private: - TableSchema table_schema_; + public: + ScanDescImplTest() : ScanDescImpl("row") { + CreateSchema(); + SetTableSchema(table_schema_); + } + + ~ScanDescImplTest() {} + + void CreateSchema() { + table_schema_.set_enable_hash(false); + table_schema_.set_name("linkcache"); + LocalityGroupSchema* lg = table_schema_.add_locality_groups(); + lg->set_name("lg0"); + ColumnFamilySchema* cf = table_schema_.add_column_families(); + cf->set_name("cf0"); + cf->set_locality_group("lg0"); + cf->set_type("int32"); + + cf = table_schema_.add_column_families(); + cf->set_name("cf1"); + cf->set_locality_group("lg0"); + cf->set_type("uint64"); + + cf = table_schema_.add_column_families(); + cf->set_name("cf2"); + cf->set_locality_group("lg0"); + cf->set_type("binary"); + } + + const TableSchema& GetSchema() const { return table_schema_; } + + private: + TableSchema table_schema_; }; - -TEST_F(ScanDescImplTest, ParseValueCompareFilter) { - string filter_str; - Filter filter; - - EXPECT_FALSE(ParseValueCompareFilter(filter_str, NULL)); - - filter_str = "qualifier==10"; - EXPECT_FALSE(ParseValueCompareFilter(filter_str, &filter)); - - filter_str = "qualifier10"; - EXPECT_FALSE(ParseValueCompareFilter(filter_str, &filter)); - - filter_str = "int64cf0==-10"; - EXPECT_TRUE(ParseValueCompareFilter(filter_str, &filter)); - EXPECT_EQ(filter.type(), BinComp); - EXPECT_EQ(filter.bin_comp_op(), EQ); - EXPECT_EQ(filter.field(), ValueFilter); - EXPECT_EQ(filter.content(), "cf0"); - - filter_str = "int64cf1>1"; - EXPECT_TRUE(ParseValueCompareFilter(filter_str, &filter)); - EXPECT_EQ(filter.bin_comp_op(), GT); - - filter_str = "cf2==hello"; - EXPECT_FALSE(ParseValueCompareFilter(filter_str, &filter)); -} - -TEST_F(ScanDescImplTest, ParseSubFilterString) { - // add more filter types - string filter_str; - Filter filter; - - filter_str = "qu"; - EXPECT_FALSE(ParseSubFilterString(filter_str, &filter)); - - filter_str = "qual@ifier10"; - EXPECT_FALSE(ParseSubFilterString(filter_str, &filter)); - - filter_str = "int64cf0 == -10"; - EXPECT_TRUE(ParseSubFilterString(filter_str, &filter)); - EXPECT_EQ(filter.type(), BinComp); - EXPECT_EQ(filter.bin_comp_op(), EQ); - EXPECT_EQ(filter.field(), ValueFilter); - EXPECT_EQ(filter.content(), "cf0"); - - filter_str = "int64cf1 > 1"; - EXPECT_TRUE(ParseSubFilterString(filter_str, &filter)); - EXPECT_EQ(filter.bin_comp_op(), GT); -} -} // namespace tera +} // namespace tera diff --git a/src/sdk/test/sdk_client_test.cc b/src/sdk/test/sdk_client_test.cc index 58838e995..98198b026 100644 --- a/src/sdk/test/sdk_client_test.cc +++ b/src/sdk/test/sdk_client_test.cc @@ -21,137 +21,137 @@ DECLARE_bool(tera_sdk_mock_enable); namespace tera { -class SdkClientTest : public ::testing::Test { -public: - SdkClientTest() { - FLAGS_tera_coord_type = "mock_zk"; - FLAGS_tera_sdk_mock_enable = true; - } - ~SdkClientTest() {} +class SdkClientTest : public ::testing::Test { + public: + SdkClientTest() { + FLAGS_tera_coord_type = "mock_zk"; + FLAGS_tera_sdk_mock_enable = true; + } + ~SdkClientTest() {} }; TEST_F(SdkClientTest, MultiNewClient) { - Client* client1 = Client::NewClient(); - EXPECT_TRUE(NULL != client1); - Client* client2 = Client::NewClient(); - EXPECT_TRUE(NULL != client2); - EXPECT_TRUE((static_cast(client1))->GetClientImpl() - == (static_cast(client2))->GetClientImpl()); - delete client1; - delete client2; + Client* client1 = Client::NewClient(); + EXPECT_TRUE(NULL != client1); + Client* client2 = Client::NewClient(); + EXPECT_TRUE(NULL != client2); + EXPECT_TRUE((static_cast(client1))->GetClientImpl() == + (static_cast(client2))->GetClientImpl()); + delete client1; + delete client2; } TEST_F(SdkClientTest, SingleClientSingleTable) { - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - ErrorCode err; - Table* table1 = client->OpenTable("t1", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - delete table1; - delete client; + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + ErrorCode err; + Table* table1 = client->OpenTable("t1", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + delete table1; + delete client; } TEST_F(SdkClientTest, SingleClientMutiTable) { - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - ErrorCode err; - Table* table1 = client->OpenTable("t1", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - Table* table2 = client->OpenTable("t1", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - - delete table1; - delete client; - delete table2; + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + ErrorCode err; + Table* table1 = client->OpenTable("t1", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + Table* table2 = client->OpenTable("t1", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + + delete table1; + delete client; + delete table2; } TEST_F(SdkClientTest, MultiClientMutiTable) { - Client* client1 = Client::NewClient(); - EXPECT_TRUE(NULL != client1); - Client* client2 = Client::NewClient(); - EXPECT_TRUE(NULL != client2); - ErrorCode err; - Table* table1 = client1->OpenTable("t1", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - Table* table2 = client2->OpenTable("t2", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - - delete table1; - delete client1; - delete table2; - delete client2; + Client* client1 = Client::NewClient(); + EXPECT_TRUE(NULL != client1); + Client* client2 = Client::NewClient(); + EXPECT_TRUE(NULL != client2); + ErrorCode err; + Table* table1 = client1->OpenTable("t1", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + Table* table2 = client2->OpenTable("t2", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + + delete table1; + delete client1; + delete table2; + delete client2; } static void MultiThreadTable(Client* client) { - ErrorCode err; - Table* table1 = client->OpenTable("t1", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - Table* table2 = client->OpenTable("t2", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - delete table1; - delete table2; + ErrorCode err; + Table* table1 = client->OpenTable("t1", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + Table* table2 = client->OpenTable("t2", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + delete table1; + delete table2; } TEST_F(SdkClientTest, MultiClientMutiTableMultiThread) { - common::ThreadPool thread_pool(5); - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - ThreadPool::Task task_ = std::bind(MultiThreadTable, client); - int cnt = 10; - while (cnt--) { - thread_pool.AddTask(task_); - } - thread_pool.Stop(true); - delete client; // delete client* won't let multi-thread know, - // so delete at last in case client is used after it's deleted. + common::ThreadPool thread_pool(5); + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + ThreadPool::Task task_ = std::bind(MultiThreadTable, client); + int cnt = 10; + while (cnt--) { + thread_pool.AddTask(task_); + } + thread_pool.Stop(true); + delete client; // delete client* won't let multi-thread know, + // so delete at last in case client is used after it's deleted. } TEST_F(SdkClientTest, MultiClientMutiTableMultiThreadDelayTask) { - common::ThreadPool thread_pool(5); - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - ThreadPool::Task task_ = std::bind(MultiThreadTable, client); - int cnt = 10; - while (cnt--) { - thread_pool.DelayTask(cnt * 100 /*ms*/, task_); - } - thread_pool.Stop(true); - delete client; + common::ThreadPool thread_pool(5); + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + ThreadPool::Task task_ = std::bind(MultiThreadTable, client); + int cnt = 10; + while (cnt--) { + thread_pool.DelayTask(cnt * 100 /*ms*/, task_); + } + thread_pool.Stop(true); + delete client; } TEST_F(SdkClientTest, SingleClientTwoGlobalTransaction) { - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - Transaction* global_transaction1 = client->NewGlobalTransaction(); - EXPECT_TRUE(NULL != global_transaction1); - Transaction* global_transaction2 = client->NewGlobalTransaction(); - EXPECT_TRUE(NULL != global_transaction2); - delete global_transaction1; - delete client; - delete global_transaction2; + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + Transaction* global_transaction1 = client->NewGlobalTransaction(); + EXPECT_TRUE(NULL != global_transaction1); + Transaction* global_transaction2 = client->NewGlobalTransaction(); + EXPECT_TRUE(NULL != global_transaction2); + delete global_transaction1; + delete client; + delete global_transaction2; } TEST_F(SdkClientTest, SingleClientNULLGlobalTransaction) { - Transaction* global_transaction = GlobalTxn::NewGlobalTxn(std::shared_ptr(), NULL, NULL); - EXPECT_TRUE(NULL == global_transaction); + Transaction* global_transaction = + GlobalTxn::NewGlobalTxn(std::shared_ptr(), NULL, NULL); + EXPECT_TRUE(NULL == global_transaction); } TEST_F(SdkClientTest, SingleClientRowTransaction) { - Client* client = Client::NewClient(); - EXPECT_TRUE(NULL != client); - ErrorCode err; - Table* table = client->OpenTable("t", &err); - EXPECT_TRUE(err.GetType() == ErrorCode::kOK); - std::string row_key("test1"); - Transaction* row_transaction1 = table->StartRowTransaction(row_key); - EXPECT_TRUE(NULL != row_transaction1); - row_key = "test2"; - Transaction* row_transaction2 = table->StartRowTransaction(row_key); - EXPECT_TRUE(NULL != row_transaction2); - delete row_transaction1; - delete table; - delete client; - delete row_transaction2; + Client* client = Client::NewClient(); + EXPECT_TRUE(NULL != client); + ErrorCode err; + Table* table = client->OpenTable("t", &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kOK); + std::string row_key("test1"); + Transaction* row_transaction1 = table->StartRowTransaction(row_key); + EXPECT_TRUE(NULL != row_transaction1); + row_key = "test2"; + Transaction* row_transaction2 = table->StartRowTransaction(row_key); + EXPECT_TRUE(NULL != row_transaction2); + delete row_transaction1; + delete table; + delete client; + delete row_transaction2; } - } \ No newline at end of file diff --git a/src/sdk/test/sdk_table_test.cc b/src/sdk/test/sdk_table_test.cc index 5a4c12a18..da72acf04 100644 --- a/src/sdk/test/sdk_table_test.cc +++ b/src/sdk/test/sdk_table_test.cc @@ -25,85 +25,89 @@ DECLARE_string(tera_coord_type); namespace tera { -class SdkTableTest : public ::testing::Test { -public: - SdkTableTest() : thread_pool_(2) {} - ~SdkTableTest() {} +class SdkTableTest : public ::testing::Test { + public: + SdkTableTest() : thread_pool_(2) {} + ~SdkTableTest() {} - std::shared_ptr OpenTable(const std::string& tablename) { - FLAGS_tera_coord_type = "mock_zk"; - std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); - return table_; - } + std::shared_ptr OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "mock_zk"; + std::shared_ptr table_(new MockTable(tablename, &thread_pool_)); + return table_; + } -private: - common::ThreadPool thread_pool_; + private: + common::ThreadPool thread_pool_; }; TEST_F(SdkTableTest, UnnormalLifeCycleReadTableMetaCallBack) { - std::shared_ptr table_ = OpenTable("t1"); - ErrorCode ret_err; - ReadTabletRequest request; - ReadTabletResponse response; - ThreadPool::Task task_ = std::bind(&TableImpl::ReadTableMetaCallBackWrapper, - std::weak_ptr(std::static_pointer_cast(table_)), - &ret_err, 0, &request, &response, false, 0); - table_->AddDelayTask(1 * 1000/*ms*/, task_); - table_.reset(); - sleep(2); + std::shared_ptr table_ = OpenTable("t1"); + ErrorCode ret_err; + ReadTabletRequest request; + ReadTabletResponse response; + ThreadPool::Task task_ = + std::bind(&TableImpl::ReadTableMetaCallBackWrapper, + std::weak_ptr(std::static_pointer_cast(table_)), &ret_err, 0, + &request, &response, false, 0); + table_->AddDelayTask(1 * 1000 /*ms*/, task_); + table_.reset(); + sleep(2); } TEST_F(SdkTableTest, UnnormalLifeCycleScanMetaTableCallBack) { - std::shared_ptr table_ = OpenTable("t1"); - ScanTabletRequest request; - ScanTabletResponse response; - std::string empty_str(""); - int64_t start_time = 0; - ThreadPool::Task task_ = std::bind(&TableImpl::ScanMetaTableCallBackWrapper, - std::weak_ptr(std::static_pointer_cast(table_)), - empty_str, empty_str, empty_str, start_time, &request, &response, false, 0); - table_->AddDelayTask(1 * 1000/*ms*/, task_); - table_.reset(); - sleep(2); + std::shared_ptr table_ = OpenTable("t1"); + ScanTabletRequest request; + ScanTabletResponse response; + std::string empty_str(""); + int64_t start_time = 0; + ThreadPool::Task task_ = + std::bind(&TableImpl::ScanMetaTableCallBackWrapper, + std::weak_ptr(std::static_pointer_cast(table_)), empty_str, + empty_str, empty_str, start_time, &request, &response, false, 0); + table_->AddDelayTask(1 * 1000 /*ms*/, task_); + table_.reset(); + sleep(2); } TEST_F(SdkTableTest, UnnormalLifeCycleReaderCallBack) { - std::shared_ptr table_ = OpenTable("t1"); - std::vector vec; - ReadTabletRequest request; - ReadTabletResponse response; - ThreadPool::Task task_ = std::bind(&TableImpl::ReaderCallBackWrapper, - std::weak_ptr(std::static_pointer_cast(table_)), - &vec, &request, &response, false, 0); - table_->AddDelayTask(1 * 1000/*ms*/, task_); - table_.reset(); - sleep(2); + std::shared_ptr table_ = OpenTable("t1"); + std::vector vec; + ReadTabletRequest request; + ReadTabletResponse response; + ThreadPool::Task task_ = + std::bind(&TableImpl::ReaderCallBackWrapper, + std::weak_ptr(std::static_pointer_cast(table_)), &vec, + &request, &response, false, 0); + table_->AddDelayTask(1 * 1000 /*ms*/, task_); + table_.reset(); + sleep(2); } TEST_F(SdkTableTest, UnnormalLifeCycleMutateCallBack) { - std::shared_ptr table_ = OpenTable("t1"); - std::vector vec; - WriteTabletRequest request; - WriteTabletResponse response; - ThreadPool::Task task_ = std::bind(&TableImpl::MutateCallBackWrapper, - std::weak_ptr(std::static_pointer_cast(table_)), - &vec, &request, &response, false, 0); - table_->AddDelayTask(1 * 1000/*ms*/, task_); - table_.reset(); - sleep(2); + std::shared_ptr table_ = OpenTable("t1"); + std::vector vec; + WriteTabletRequest request; + WriteTabletResponse response; + ThreadPool::Task task_ = + std::bind(&TableImpl::MutateCallBackWrapper, + std::weak_ptr(std::static_pointer_cast(table_)), &vec, + &request, &response, false, 0); + table_->AddDelayTask(1 * 1000 /*ms*/, task_); + table_.reset(); + sleep(2); } TEST_F(SdkTableTest, UnnormalLifeCycleScanCallBack) { - std::shared_ptr table_ = OpenTable("t1"); - ScanTask scan_task; - ScanTabletRequest request; - ScanTabletResponse response; - ThreadPool::Task task_ = std::bind(&TableImpl::ScanCallBackWrapper, - std::weak_ptr(std::static_pointer_cast(table_)), - &scan_task, &request, &response, false, 0); - table_->AddDelayTask(1 * 1000/*ms*/, task_); - table_.reset(); - sleep(2); + std::shared_ptr table_ = OpenTable("t1"); + ScanTask scan_task; + ScanTabletRequest request; + ScanTabletResponse response; + ThreadPool::Task task_ = + std::bind(&TableImpl::ScanCallBackWrapper, + std::weak_ptr(std::static_pointer_cast(table_)), &scan_task, + &request, &response, false, 0); + table_->AddDelayTask(1 * 1000 /*ms*/, task_); + table_.reset(); + sleep(2); } - } \ No newline at end of file diff --git a/src/sdk/test/sdk_test.cc b/src/sdk/test/sdk_test.cc index 7177bdc3a..4e6cb9572 100644 --- a/src/sdk/test/sdk_test.cc +++ b/src/sdk/test/sdk_test.cc @@ -9,8 +9,8 @@ DECLARE_bool(tera_sdk_tso_client_enabled); DECLARE_bool(tera_sdk_client_for_gtxn); int main(int argc, char* argv[]) { - FLAGS_tera_sdk_client_for_gtxn = true; - FLAGS_tera_sdk_tso_client_enabled = false; - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_sdk_tso_client_enabled = false; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/sdk/test/sdk_timeout_manager_test.cc b/src/sdk/test/sdk_timeout_manager_test.cc index 84ea5a4c1..a95fc7273 100644 --- a/src/sdk/test/sdk_timeout_manager_test.cc +++ b/src/sdk/test/sdk_timeout_manager_test.cc @@ -17,228 +17,225 @@ DEFINE_int32(perf_test_thead_num, 10, "thread number of put/pop"); DEFINE_int32(perf_test_duration, 2, "seconds for performance test"); namespace tera { -#define YELLOW "\033[33m" /* Yellow */ +#define YELLOW "\033[33m" /* Yellow */ static Counter callback_called_times = Counter(); static Counter task_counter = Counter(); class TestTask : public SdkTask { -public: - std::string dummy_key; - - TestTask() : SdkTask(SdkTask::READ) {} - virtual ~TestTask() {} - - bool IsAsync() { return false; } - uint32_t Size() { return 0; } - int64_t TimeOut() { return 0; } - void Wait() {} - void SetError(ErrorCode::ErrorCodeType err, - const std::string& reason) {} - const std::string& RowKey() { return dummy_key; } + public: + std::string dummy_key; + + TestTask() : SdkTask(SdkTask::READ) {} + virtual ~TestTask() {} + + bool IsAsync() { return false; } + uint32_t Size() { return 0; } + void SetTimeOut(int64_t timeout) {} + int64_t TimeOut() { return 0; } + void Wait() {} + void SetError(ErrorCode::ErrorCodeType err, const std::string& reason) {} + std::string InternalRowKey() { return dummy_key; } + int64_t GetCommitTimes() { return 0; } + void RunCallback() { abort(); } }; class SdkTimeoutManagerTest : public ::testing::Test { -public: - SdkTimeoutManagerTest() : thread_pool_(FLAGS_thread_num), timeout_manager_(NULL) {} - - virtual void SetUp() { - timeout_manager_ = new SdkTimeoutManager(&thread_pool_); - ASSERT_TRUE(timeout_manager_ != NULL); - callback_called_times.Clear(); - task_counter.Clear(); - } - virtual void TearDown() { - delete timeout_manager_; - } - -private: - common::ThreadPool thread_pool_; - SdkTimeoutManager* timeout_manager_ = NULL; + public: + SdkTimeoutManagerTest() : thread_pool_(FLAGS_thread_num), timeout_manager_(NULL) {} + + virtual void SetUp() { + timeout_manager_ = new SdkTimeoutManager(&thread_pool_); + ASSERT_TRUE(timeout_manager_ != NULL); + callback_called_times.Clear(); + task_counter.Clear(); + } + virtual void TearDown() { delete timeout_manager_; } + + private: + common::ThreadPool thread_pool_; + SdkTimeoutManager* timeout_manager_ = NULL; }; -static void TimeoutFunc(SdkTask* task) { - callback_called_times.Add(1); -} +static void TimeoutFunc(SdkTask* task) { callback_called_times.Add(1); } static SdkTask::TimeoutFunc timeout_func = std::bind(TimeoutFunc, _1); TEST_F(SdkTimeoutManagerTest, PutTaskPopTaskTest) { - const int32_t LOOP_CNT = 10000; - int64_t put_start_time = get_micros(); - bool succ = true; - for (int32_t i = 0; i < LOOP_CNT; ++i) { - TestTask* sdk_task = new TestTask(); - sdk_task->SetId(LOOP_CNT - i); - succ &= timeout_manager_->PutTask(sdk_task, 5000, timeout_func); - } - EXPECT_TRUE(succ); - int64_t put_done_time = get_micros(); - - uint32_t task_cnt = 0; - for (uint32_t i = 0; i < SdkTimeoutManager::kShardNum; ++i) { - uint32_t shard_due_cnt = timeout_manager_->map_shard_[i].due_time_map.size(); - EXPECT_EQ(shard_due_cnt, timeout_manager_->map_shard_[i].id_hash_map.size()); - task_cnt += shard_due_cnt; - } - EXPECT_EQ(task_cnt, LOOP_CNT); - - int64_t pop_start_time = get_micros(); - for (uint32_t shard_idx = 0; shard_idx < SdkTimeoutManager::kShardNum; ++shard_idx) { - SdkTimeoutManager::DueTimeMap& due_time_map = - timeout_manager_->map_shard_[shard_idx].due_time_map; - uint32_t shard_task_cnt = due_time_map.size(); - uint32_t shard_pop_cnt = 0; - while (!due_time_map.empty()) { - SdkTask* task = timeout_manager_->PopTask((*due_time_map.begin())->GetId()); - EXPECT_TRUE(task != NULL); - shard_pop_cnt += 1; - delete static_cast(task); - } - EXPECT_EQ(shard_pop_cnt, shard_task_cnt); - } - int64_t pop_done_time = get_micros(); - - std::cout << YELLOW << "SdkTimeoutManager performance(single thread): " - << "\n\t\tPutTask: " << int(LOOP_CNT / ((put_done_time - put_start_time + 1) / 1000000.0)) - << "\n\t\tPopTask: " << int(LOOP_CNT / ((pop_done_time - pop_start_time + 1) / 1000000.0)) - << std::endl; + const int32_t LOOP_CNT = 10000; + int64_t put_start_time = get_micros(); + bool succ = true; + for (int32_t i = 0; i < LOOP_CNT; ++i) { + TestTask* sdk_task = new TestTask(); + sdk_task->SetId(LOOP_CNT - i); + succ &= timeout_manager_->PutTask(sdk_task, 5000, timeout_func); + } + EXPECT_TRUE(succ); + int64_t put_done_time = get_micros(); + + uint32_t task_cnt = 0; + for (uint32_t i = 0; i < SdkTimeoutManager::kShardNum; ++i) { + uint32_t shard_due_cnt = timeout_manager_->map_shard_[i].due_time_map.size(); + EXPECT_EQ(shard_due_cnt, timeout_manager_->map_shard_[i].id_hash_map.size()); + task_cnt += shard_due_cnt; + } + EXPECT_EQ(task_cnt, LOOP_CNT); + + int64_t pop_start_time = get_micros(); + for (uint32_t shard_idx = 0; shard_idx < SdkTimeoutManager::kShardNum; ++shard_idx) { + SdkTimeoutManager::DueTimeMap& due_time_map = + timeout_manager_->map_shard_[shard_idx].due_time_map; + uint32_t shard_task_cnt = due_time_map.size(); + uint32_t shard_pop_cnt = 0; + while (!due_time_map.empty()) { + SdkTask* task = timeout_manager_->PopTask((*due_time_map.begin())->GetId()); + EXPECT_TRUE(task != NULL); + shard_pop_cnt += 1; + delete static_cast(task); + } + EXPECT_EQ(shard_pop_cnt, shard_task_cnt); + } + int64_t pop_done_time = get_micros(); + + std::cout << YELLOW << "SdkTimeoutManager performance(single thread): " + << "\n\t\tPutTask: " + << int(LOOP_CNT / ((put_done_time - put_start_time + 1) / 1000000.0)) + << "\n\t\tPopTask: " + << int(LOOP_CNT / ((pop_done_time - pop_start_time + 1) / 1000000.0)) << std::endl; } TEST_F(SdkTimeoutManagerTest, CheckTimeout) { - const int32_t LOOP_CNT = 10000; - std::vector tasks; - tasks.reserve(LOOP_CNT); - bool succ = true; - for (int32_t i = 0; i < LOOP_CNT; ++i) { - TestTask* sdk_task = new TestTask(); - sdk_task->SetId(i + 1); - succ &= timeout_manager_->PutTask(sdk_task, 500, timeout_func); - tasks.push_back(sdk_task); - } - EXPECT_TRUE(true); - // waiting until all SdkTasks have been check timeout and their TimeoutFunc been put to thread pool to execute - for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { - while (!timeout_manager_->map_shard_[shard].due_time_map.empty()){ - usleep(timeout_manager_->timeout_precision_); - } - } - // waiting another 100ms until all TimeoutFunc in thread_pool have been done - usleep(250000); - EXPECT_EQ(callback_called_times.Get(), LOOP_CNT); - + const int32_t LOOP_CNT = 10000; + std::vector tasks; + tasks.reserve(LOOP_CNT); + bool succ = true; + for (int32_t i = 0; i < LOOP_CNT; ++i) { TestTask* sdk_task = new TestTask(); - sdk_task->SetId(100); - EXPECT_TRUE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); - tasks.push_back(sdk_task); - EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); - - sdk_task = new TestTask(); - sdk_task->SetId(100); - EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + sdk_task->SetId(i + 1); + succ &= timeout_manager_->PutTask(sdk_task, 500, timeout_func); tasks.push_back(sdk_task); - - usleep(1000); - sdk_task = new TestTask(); - sdk_task->SetId(100); - EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); - tasks.push_back(sdk_task); - // waiting until all SdkTasks have been check timeout and their TimeoutFunc been put to thread pool to execute - for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { - while (!timeout_manager_->map_shard_[shard].due_time_map.empty()){ - usleep(timeout_manager_->timeout_precision_); - } - } - // waiting another 100ms until all TimeoutFunc in thread_pool have been done - usleep(250000); - EXPECT_EQ(callback_called_times.Get(), 1 + LOOP_CNT); - for (std::size_t i = 0; i < tasks.size(); ++i) { - delete tasks[i]; - } + } + EXPECT_TRUE(true); + // waiting until all SdkTasks have been check timeout and their TimeoutFunc + // been put to thread pool to execute + for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { + while (!timeout_manager_->map_shard_[shard].due_time_map.empty()) { + usleep(timeout_manager_->timeout_precision_); + } + } + // waiting another 100ms until all TimeoutFunc in thread_pool have been done + usleep(250000); + EXPECT_EQ(callback_called_times.Get(), LOOP_CNT); + + TestTask* sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_TRUE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + + sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + + usleep(1000); + sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + // waiting until all SdkTasks have been check timeout and their TimeoutFunc + // been put to thread pool to execute + for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { + while (!timeout_manager_->map_shard_[shard].due_time_map.empty()) { + usleep(timeout_manager_->timeout_precision_); + } + } + // waiting another 100ms until all TimeoutFunc in thread_pool have been done + usleep(250000); + EXPECT_EQ(callback_called_times.Get(), 1 + LOOP_CNT); + for (std::size_t i = 0; i < tasks.size(); ++i) { + delete tasks[i]; + } } static bool add_task_run = true; static void AddTaskFunc(SdkTimeoutManager* mgr, int64_t timeout) { - while (add_task_run) { - SdkTask* task = new TestTask(); - task->SetId(task_counter.Add(1)); - mgr->PutTask(task, timeout, timeout_func); - } + while (add_task_run) { + SdkTask* task = new TestTask(); + task->SetId(task_counter.Add(1)); + mgr->PutTask(task, timeout, timeout_func); + } } static void PopTaskFunc(SdkTimeoutManager* mgr) { - int64_t task_id; - while ((task_id = task_counter.Sub(1) + 1) > 0) { - SdkTask* task = mgr->PopTask(task_id); - delete static_cast(task); - } + int64_t task_id; + while ((task_id = task_counter.Sub(1) + 1) > 0) { + SdkTask* task = mgr->PopTask(task_id); + delete static_cast(task); + } } TEST_F(SdkTimeoutManagerTest, PutPopPerformance) { - std::vector threads; - threads.reserve(FLAGS_perf_test_thead_num); - add_task_run = true; - int64_t timeout = FLAGS_perf_test_duration * 1000 + 1000; - for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { - threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_manager_, timeout))); - } - sleep(FLAGS_perf_test_duration); - add_task_run = false; - for (std::size_t i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - int64_t task_cnt = task_counter.Get(); - - int64_t pop_start_time = get_micros(); - for (int i = 0; i < FLAGS_perf_test_thead_num; ++i) { - threads.emplace_back(std::thread(std::bind(PopTaskFunc, timeout_manager_))); - } - for (std::size_t i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - int64_t pop_end_time = get_micros(); - std::cout << YELLOW - << "SdkTimeoutManager performance(" << FLAGS_perf_test_thead_num <<" put/pop threads): " - << "\n\t\tPutTask: " << task_cnt / FLAGS_perf_test_duration - << "\n\t\tPopTask: " << int(task_cnt / ((pop_end_time - pop_start_time) / 1000000.0)) - << std::endl; + std::vector threads; + threads.reserve(FLAGS_perf_test_thead_num); + add_task_run = true; + int64_t timeout = FLAGS_perf_test_duration * 1000 + 1000; + for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_manager_, timeout))); + } + sleep(FLAGS_perf_test_duration); + add_task_run = false; + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + int64_t task_cnt = task_counter.Get(); + + int64_t pop_start_time = get_micros(); + for (int i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(PopTaskFunc, timeout_manager_))); + } + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + int64_t pop_end_time = get_micros(); + std::cout << YELLOW << "SdkTimeoutManager performance(" << FLAGS_perf_test_thead_num + << " put/pop threads): " + << "\n\t\tPutTask: " << task_cnt / FLAGS_perf_test_duration + << "\n\t\tPopTask: " << int(task_cnt / ((pop_end_time - pop_start_time) / 1000000.0)) + << std::endl; } TEST_F(SdkTimeoutManagerTest, CheckTimeoutPerformance) { - common::ThreadPool thread_pool(FLAGS_thread_num); - SdkTimeoutManager* timeout_mgr = new SdkTimeoutManager(&thread_pool); - - std::vector threads; - threads.reserve(FLAGS_perf_test_thead_num); - add_task_run = true; - // timeout set to 1us - int64_t timeout = 1; - int64_t start_time = get_micros(); - for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { - threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_mgr, timeout))); - } - sleep(FLAGS_perf_test_duration); - add_task_run = false; - int64_t end_time = get_micros(); - for (std::size_t i = 0; i < threads.size(); ++i) { - threads[i].join(); - } - threads.clear(); - int64_t callback_run_cnt = callback_called_times.Get(); - int64_t pending_cnt = task_counter.Get() - callback_run_cnt; - delete timeout_mgr; - - std::cout << YELLOW - << "SdkTimeoutManager performance@CheckTimeout(" - << FLAGS_perf_test_thead_num <<" put threads, " - << FLAGS_thread_num << "TimeoutFunc run threads): " - << "\n\t\tPutTask: " << task_counter.Get() / FLAGS_perf_test_duration - << "\n\t\tPending: " << pending_cnt / FLAGS_perf_test_duration - << "\n\t\tCheckTimeout: " < threads; + threads.reserve(FLAGS_perf_test_thead_num); + add_task_run = true; + // timeout set to 1us + int64_t timeout = 1; + int64_t start_time = get_micros(); + for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_mgr, timeout))); + } + sleep(FLAGS_perf_test_duration); + add_task_run = false; + int64_t end_time = get_micros(); + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + int64_t callback_run_cnt = callback_called_times.Get(); + int64_t pending_cnt = task_counter.Get() - callback_run_cnt; + delete timeout_mgr; + + std::cout << YELLOW << "SdkTimeoutManager performance@CheckTimeout(" << FLAGS_perf_test_thead_num + << " put threads, " << FLAGS_thread_num << "TimeoutFunc run threads): " + << "\n\t\tPutTask: " << task_counter.Get() / FLAGS_perf_test_duration + << "\n\t\tPending: " << pending_cnt / FLAGS_perf_test_duration + << "\n\t\tCheckTimeout: " << callback_run_cnt / FLAGS_perf_test_duration << "," + << int(task_counter.Get() / ((end_time - start_time) / 1000000.0)) << std::endl; } - -} // namespace tera +} // namespace tera diff --git a/src/sdk/test/sdk_utils_test.cc b/src/sdk/test/sdk_utils_test.cc index 559a7baf2..d31d912b3 100644 --- a/src/sdk/test/sdk_utils_test.cc +++ b/src/sdk/test/sdk_utils_test.cc @@ -11,107 +11,131 @@ namespace tera { class SdkUtilsTest : public ::testing::Test { -public: - SdkUtilsTest() {} - ~SdkUtilsTest() {} + public: + SdkUtilsTest() {} + ~SdkUtilsTest() {} }; TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor0) { - // all disable notify - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->DisableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->DisableNotify(); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num); + // all disable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->DisableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor1) { - // some disable notify - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->EnableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->DisableNotify(); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); + // some disable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor2) { - // some disable notify - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->DisableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->EnableNotify(); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); + // some disable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->DisableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor3) { - // all enable notify - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); - cfd1->EnableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - cfd2->EnableNotify(); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); + // all enable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor4) { - // have lg named 'notify' but not set any cf 'notify=on' - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("notify"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num); + // have lg named 'notify' but not set any cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor5) { - // have lg named 'notify' and set some cf 'notify=on' - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("notify"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); - cfd1->EnableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); - auto before_num = schema.LocalityGroupNum(); - EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num); + // have lg named 'notify' and set some cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor6) { - // have cf named '_N_' but not set any cf 'notify=on' - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - auto before_num = schema.LocalityGroupNum(); - EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num); + // have cf named '_N_' but not set any cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor7) { - // have cf named '_N_' but some set cf 'notify=on' - tera::TableDescriptor schema("t1"); - schema.AddLocalityGroup("lg0"); - tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); - cfd1->EnableNotify(); - tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); - auto before_num = schema.LocalityGroupNum(); - EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); - EXPECT_TRUE(schema.LocalityGroupNum() == before_num); + // have cf named '_N_' but some set cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -} // namespace tera +TEST(SdkUtilsTest, SetMutationErrorIfInvalid) { + ErrorCode err; + std::string test_str = std::string(1 + (64 << 10), 'h'); + SetMutationErrorIfInvalid(test_str, FieldType::kRowkey, &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kBadParam); + err.SetFailed(ErrorCode::kOK, ""); + + SetMutationErrorIfInvalid(test_str, FieldType::kKVColumnFamily, &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kBadParam); + err.SetFailed(ErrorCode::kOK, ""); + + SetMutationErrorIfInvalid(test_str, FieldType::kKVQualifier, &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kBadParam); + err.SetFailed(ErrorCode::kOK, ""); + + SetMutationErrorIfInvalid(test_str, FieldType::kQualifier, &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kBadParam); + err.SetFailed(ErrorCode::kOK, ""); + + test_str = std::string(1 + (32 << 20), 'h'); + SetMutationErrorIfInvalid(test_str, FieldType::kValue, &err); + EXPECT_TRUE(err.GetType() == ErrorCode::kBadParam); +} + +} // namespace tera diff --git a/src/sdk/test/value_filter_test.cc b/src/sdk/test/value_filter_test.cc new file mode 100644 index 000000000..743376e8d --- /dev/null +++ b/src/sdk/test/value_filter_test.cc @@ -0,0 +1,324 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gtest/gtest.h" +#include "tera.h" +#include "sdk/scan_impl.h" +#include "proto/filter.pb.h" +#include "io/coding.h" +#include "proto/tabletnode_rpc.pb.h" + +namespace tera { +namespace filter { + +struct RowData { + std::string column_family; + std::string qualifier; + std::string value; +}; + +class ValueFilterTest : public ::testing::Test {}; + +TEST_F(ValueFilterTest, SetValueFilterINT64) { + ScanDescriptor scan_desc(""); + int64_t ref_value = INT64_MIN; + IntegerComparatorPtr comparator = + std::make_shared(IntegerValueType::kInt64, ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kLess, comparator); + value_filter_in->SetColumnFamily("cf1"); + value_filter_in->SetColumnQualifier("qu1"); + value_filter_in->SetFilterIfMissing(true); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, "cf1"); + EXPECT_EQ(value_filter_out->column_qualifier_, "qu1"); + EXPECT_EQ(value_filter_out->filter_if_missing_, true); + IntegerComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_EQ((int64_t)(cp->integer_value_), ref_value); +} + +TEST_F(ValueFilterTest, SetValueFilterUINT64) { + ScanDescriptor scan_desc(""); + uint64_t ref_value = UINT64_MAX; + IntegerComparatorPtr comparator = + std::make_shared(IntegerValueType::kUint64, ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kLessOrEqual, comparator); + value_filter_in->SetColumnFamily("cf1"); + value_filter_in->SetFilterIfMissing(true); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, "cf1"); + EXPECT_EQ(value_filter_out->column_qualifier_, ""); + EXPECT_EQ(value_filter_out->filter_if_missing_, true); + IntegerComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_EQ((uint64_t)(cp->integer_value_), ref_value); +} + +TEST_F(ValueFilterTest, SetValueFilterINT32) { + ScanDescriptor scan_desc(""); + int32_t ref_value = INT32_MIN; + IntegerComparatorPtr comparator = + std::make_shared(IntegerValueType::kInt32, ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kEqual, comparator); + value_filter_in->SetFilterIfMissing(true); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, ""); + EXPECT_EQ(value_filter_out->column_qualifier_, ""); + EXPECT_EQ(value_filter_out->filter_if_missing_, true); + IntegerComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_EQ((int32_t)(cp->integer_value_), ref_value); +} + +TEST_F(ValueFilterTest, SetValueFilterUINT32) { + ScanDescriptor scan_desc(""); + uint32_t ref_value = UINT32_MAX; + IntegerComparatorPtr comparator = + std::make_shared(IntegerValueType::kUint32, ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kNotEqual, comparator); + value_filter_in->SetColumnFamily(""); + value_filter_in->SetColumnQualifier("qu1"); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, ""); + EXPECT_EQ(value_filter_out->column_qualifier_, "qu1"); + EXPECT_EQ(value_filter_out->filter_if_missing_, false); + IntegerComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_EQ((uint32_t)(cp->integer_value_), ref_value); +} + +TEST_F(ValueFilterTest, SetValueFilterDecimal) { + ScanDescriptor scan_desc(""); + float ref_value = 123.456; + DecimalComparatorPtr comparator = std::make_shared(ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kGreaterOrEqual, comparator); + value_filter_in->SetColumnFamily("cf1"); + value_filter_in->SetColumnQualifier("qu1"); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, "cf1"); + EXPECT_EQ(value_filter_out->column_qualifier_, "qu1"); + EXPECT_EQ(value_filter_out->filter_if_missing_, false); + DecimalComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_TRUE(cp->decimal_value_ < ref_value + 1e-6 && cp->decimal_value_ > ref_value - 1e-6); +} + +TEST_F(ValueFilterTest, SetValueFilterBinary) { + ScanDescriptor scan_desc(""); + std::string ref_value = "abcdefg"; + BinaryComparatorPtr comparator = std::make_shared(ref_value); + ValueFilterPtr value_filter_in = + std::make_shared(CompareOperator::kGreater, comparator); + value_filter_in->SetColumnFamily("cf1"); + value_filter_in->SetColumnQualifier("qu1"); + ASSERT_TRUE(scan_desc.SetFilter(value_filter_in)); + + ScanDescImpl* scan_desc_impl = scan_desc.GetImpl(); + ASSERT_TRUE(scan_desc_impl); + FilterDesc* filter_desc = scan_desc_impl->GetFilterDesc(); + ASSERT_TRUE(filter_desc); + EXPECT_EQ(filter_desc->type(), FilterDesc::kValueFilter); + ValueFilterPtr value_filter_out = std::make_shared(); + ASSERT_TRUE(value_filter_out->ParseFrom(filter_desc->serialized_filter())); + EXPECT_EQ(value_filter_out->column_family_, "cf1"); + EXPECT_EQ(value_filter_out->column_qualifier_, "qu1"); + EXPECT_EQ(value_filter_out->filter_if_missing_, false); + BinaryComparator* cp = dynamic_cast(value_filter_out->comparator_.get()); + EXPECT_EQ(cp->value_, ref_value); +} + +TEST_F(ValueFilterTest, FilterCase) { + int64_t ref_value_1 = 10; + IntegerComparatorPtr comparator1 = + std::make_shared(IntegerValueType::kInt64, ref_value_1); + ValueFilterPtr value_filter_1 = + std::make_shared(CompareOperator::kLess, comparator1); + value_filter_1->SetColumnFamily("cf1"); + value_filter_1->SetColumnQualifier("qu1"); + value_filter_1->SetFilterIfMissing(true); + + float ref_value_2 = 123.456; + DecimalComparatorPtr comparator2 = std::make_shared(ref_value_2); + ValueFilterPtr value_filter_2 = + std::make_shared(CompareOperator::kGreaterOrEqual, comparator2); + value_filter_2->SetColumnFamily("cf2"); + value_filter_2->SetColumnQualifier("qu2"); + + std::string ref_value_3 = "abc"; + BinaryComparatorPtr comparator3 = std::make_shared(ref_value_3); + ValueFilterPtr value_filter_3 = + std::make_shared(CompareOperator::kGreater, comparator3); + value_filter_3->SetColumnFamily("cf3"); + value_filter_3->SetColumnQualifier("qu3"); + + int64_t ref_value_4 = 10; + IntegerComparatorPtr comparator4 = + std::make_shared(IntegerValueType::kInt64, ref_value_4); + ValueFilterPtr value_filter_4 = + std::make_shared(CompareOperator::kLess, comparator1); + value_filter_4->SetColumnFamily("cf4"); + value_filter_4->SetColumnQualifier("qu4"); + value_filter_4->SetFilterIfMissing(true); + + float ref_value_5 = 123.456; + DecimalComparatorPtr comparator5 = std::make_shared(ref_value_5); + ValueFilterPtr value_filter_5 = + std::make_shared(CompareOperator::kGreaterOrEqual, comparator5); + value_filter_5->SetColumnFamily("cf5"); + value_filter_5->SetColumnQualifier("qu5"); + + int64_t ref_value_6 = 11; + IntegerComparatorPtr comparator6 = + std::make_shared(IntegerValueType::kInt64, ref_value_6); + ValueFilterPtr value_filter_6 = + std::make_shared(CompareOperator::kGreater, comparator6); + value_filter_6->SetColumnFamily("cf1"); + + int64_t ref_value_7 = 13; + IntegerComparatorPtr comparator7 = + std::make_shared(IntegerValueType::kInt64, ref_value_7); + ValueFilterPtr value_filter_7 = + std::make_shared(CompareOperator::kLess, comparator7); + value_filter_7->SetColumnFamily("cf1"); + + RowData row_buf[5]; + + row_buf[0].column_family = "cf1"; + row_buf[0].qualifier = ""; + std::string value; + comparator1->EncodeInteger(IntegerValueType::kInt64, 12, &value); + row_buf[0].value = value; + + row_buf[1].column_family = "cf1"; + row_buf[1].qualifier = "qu0"; + comparator1->EncodeInteger(IntegerValueType::kInt64, 11, &value); + row_buf[1].value = value; + + row_buf[2].column_family = "cf1"; + row_buf[2].qualifier = "qu1"; + comparator1->EncodeInteger(IntegerValueType::kInt64, 9, &value); + row_buf[2].value = value; + + row_buf[3].column_family = "cf2"; + row_buf[3].qualifier = "qu2"; + value = comparator2->EncodeDecimal(123.455); + row_buf[3].value = value; + + row_buf[4].column_family = "cf3"; + row_buf[4].qualifier = "qu3"; + row_buf[4].value = "abd"; + + value_filter_1->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_1->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_1->FilterRow(), false); + + value_filter_2->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_2->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_2->FilterRow(), true); + + value_filter_3->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_3->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_3->FilterRow(), false); + + value_filter_4->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_4->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_4->FilterRow(), true); + + value_filter_5->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_5->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_5->FilterRow(), false); + + value_filter_6->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_6->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_6->FilterRow(), true); + + value_filter_7->Reset(); + for (size_t i = 0; i < 5; ++i) { + Filter::ReturnCode rc = value_filter_7->FilterCell(row_buf[i].column_family, + row_buf[i].qualifier, row_buf[i].value); + if (rc == Filter::kNotIncludeCurAndLeftCellOfRow) { + break; + } + } + EXPECT_EQ(value_filter_7->FilterRow(), false); +} + +} // namespace filter +} // namespace tera diff --git a/src/sdk/timeoracle_client_impl.cc b/src/sdk/timeoracle_client_impl.cc index 7f0e16b6e..8a14768ef 100644 --- a/src/sdk/timeoracle_client_impl.cc +++ b/src/sdk/timeoracle_client_impl.cc @@ -12,107 +12,94 @@ namespace tera { namespace timeoracle { TimeoracleClientImpl::TimeoracleClientImpl(ThreadPool* thread_pool, - sdk::ClusterFinder* cluster_finder, - int32_t rpc_timeout) : - RpcClient(cluster_finder->TimeoracleAddr()), - thread_pool_(thread_pool), - rpc_timeout_(rpc_timeout), - update_timestamp_(0), - cluster_finder_(cluster_finder) {} + sdk::ClusterFinder* cluster_finder, int32_t rpc_timeout) + : RpcClient(cluster_finder->TimeoracleAddr()), + thread_pool_(thread_pool), + rpc_timeout_(rpc_timeout), + update_timestamp_(0), + cluster_finder_(cluster_finder) {} void TimeoracleClientImpl::refresh_timeoracle_address(int64_t last_timestamp) { - std::unique_lock lock_guard(mutex_); - if (last_timestamp > 0 && last_timestamp < update_timestamp_) { - return; - } - - LOG(INFO) << "TimeoracleClientImpl try to update cluster, before is " << GetConnectAddr(); - std::string addr = cluster_finder_->TimeoracleAddr(true); - ResetClient(addr); - LOG(INFO) << "TimeoracleClientImpl update cluster, current is " << GetConnectAddr(); - update_timestamp_ = get_micros(); + std::unique_lock lock_guard(mutex_); + if (last_timestamp > 0 && last_timestamp < update_timestamp_) { + return; + } + + LOG(INFO) << "TimeoracleClientImpl try to update cluster, before is " << GetConnectAddr(); + std::string addr = cluster_finder_->TimeoracleAddr(true); + ResetClient(addr); + LOG(INFO) << "TimeoracleClientImpl update cluster, current is " << GetConnectAddr(); + update_timestamp_ = get_micros(); } int64_t TimeoracleClientImpl::GetTimestamp(uint32_t count) { - GetTimestampRequest request; - GetTimestampResponse response; - - request.set_count(count); - - std::function done; - - if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, - &request, - &response, - done, - "GetTimestamp", - rpc_timeout_, - thread_pool_)) { - int code = response.status(); - if (code != kTimeoracleOk) { - // Internel Error - return 0; - } - return response.start_timestamp(); - } + GetTimestampRequest request; + GetTimestampResponse response; - // Rpc Failed - refresh_timeoracle_address(0); - return 0; -} + request.set_count(count); + + std::function done; -bool TimeoracleClientImpl::GetTimestamp(uint32_t count, std::function callback) { - auto request = new GetTimestampRequest(); - auto response = new GetTimestampResponse(); - request->set_count(count); - int64_t start_time = get_micros(); - - std::function done - = std::bind(&TimeoracleClientImpl::OnRpcFinished, this, start_time, callback, - std::placeholders::_1, std::placeholders::_2, - std::placeholders::_3, std::placeholders::_4); - - if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, - request, - response, - done, - "GetTimestamp", - rpc_timeout_, - thread_pool_)) { - return true; + if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, &request, &response, done, + "GetTimestamp", rpc_timeout_, thread_pool_)) { + int code = response.status(); + if (code != kTimeoracleOk) { + // Internel Error + return 0; } + return response.start_timestamp(); + } - // Rpc Failed - refresh_timeoracle_address(0); - return false; + // Rpc Failed + refresh_timeoracle_address(0); + return 0; } -void TimeoracleClientImpl::OnRpcFinished(int64_t start_time, - std::function callback, +bool TimeoracleClientImpl::GetTimestamp(uint32_t count, std::function callback) { + auto request = new GetTimestampRequest(); + auto response = new GetTimestampResponse(); + request->set_count(count); + int64_t start_time = get_micros(); + + std::function done = + std::bind(&TimeoracleClientImpl::OnRpcFinished, this, start_time, callback, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + std::placeholders::_4); + + if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, request, response, done, + "GetTimestamp", rpc_timeout_, thread_pool_)) { + return true; + } + + // Rpc Failed + refresh_timeoracle_address(0); + return false; +} + +void TimeoracleClientImpl::OnRpcFinished(int64_t start_time, std::function callback, const GetTimestampRequest* request, - GetTimestampResponse* response, - bool rpc_error, - int error_code){ - std::unique_ptr req_hold(request); - std::unique_ptr res_hold(response); - - if (rpc_error) { - LOG(ERROR) << "RpcRequest failed for GetTimestamp, errno=" << error_code; - callback(0); - refresh_timeoracle_address(start_time); - return ; - } + GetTimestampResponse* response, bool rpc_error, + int error_code) { + std::unique_ptr req_hold(request); + std::unique_ptr res_hold(response); - int64_t ts = response->start_timestamp(); + if (rpc_error) { + LOG(ERROR) << "RpcRequest failed for GetTimestamp, errno=" << error_code; + callback(0); + refresh_timeoracle_address(start_time); + return; + } - int code = response->status(); + int64_t ts = response->start_timestamp(); - if (code != kTimeoracleOk) { - ts = 0; - } + int code = response->status(); + + if (code != kTimeoracleOk) { + ts = 0; + } - callback(ts); + callback(ts); } -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera diff --git a/src/sdk/timeoracle_client_impl.h b/src/sdk/timeoracle_client_impl.h index e47fe9995..6e2780502 100644 --- a/src/sdk/timeoracle_client_impl.h +++ b/src/sdk/timeoracle_client_impl.h @@ -20,37 +20,33 @@ namespace tera { namespace timeoracle { class TimeoracleClientImpl : public RpcClient { -public: - TimeoracleClientImpl(ThreadPool* thread_pool, - sdk::ClusterFinder* cluster_finder, - int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + public: + TimeoracleClientImpl(ThreadPool* thread_pool, sdk::ClusterFinder* cluster_finder, + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); - ~TimeoracleClientImpl() {} + ~TimeoracleClientImpl() {} - int64_t GetTimestamp(uint32_t count); + int64_t GetTimestamp(uint32_t count); - bool GetTimestamp(uint32_t count, std::function callback); + bool GetTimestamp(uint32_t count, std::function callback); -private: - void refresh_timeoracle_address(int64_t last_timestamp); + private: + void refresh_timeoracle_address(int64_t last_timestamp); - void OnRpcFinished(int64_t start_time, - std::function callback, - const GetTimestampRequest* request, - GetTimestampResponse* response, - bool rpc_error, - int error_code); + void OnRpcFinished(int64_t start_time, std::function callback, + const GetTimestampRequest* request, GetTimestampResponse* response, + bool rpc_error, int error_code); -private: - ThreadPool* thread_pool_; - int32_t rpc_timeout_; + private: + ThreadPool* thread_pool_; + int32_t rpc_timeout_; - std::mutex mutex_; - int64_t update_timestamp_; - sdk::ClusterFinder* cluster_finder_; + std::mutex mutex_; + int64_t update_timestamp_; + sdk::ClusterFinder* cluster_finder_; }; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera -#endif // TERA_SDK_TIMEORACLE_CLIENT_IMPL_H_ +#endif // TERA_SDK_TIMEORACLE_CLIENT_IMPL_H_ diff --git a/src/sdk/transaction_wrapper.h b/src/sdk/transaction_wrapper.h index e44a6b789..1872e2263 100644 --- a/src/sdk/transaction_wrapper.h +++ b/src/sdk/transaction_wrapper.h @@ -12,68 +12,38 @@ namespace tera { template class TransactionWrapper : public Transaction { -public: - TransactionWrapper(const std::shared_ptr& transaction_impl) - : transaction_impl_(transaction_impl) {} - ~TransactionWrapper() {} - void ApplyMutation(RowMutation* row_mu) { - transaction_impl_->ApplyMutation(row_mu); - } - ErrorCode Get(RowReader* row_reader) { - return transaction_impl_->Get(row_reader); - } - void SetCommitCallback(Callback callback) { - transaction_impl_->SetCommitCallback(callback); - } - Callback GetCommitCallback() { - return transaction_impl_->GetCommitCallback(); - } - void SetContext(void* context) { - transaction_impl_->SetContext(context); - } - void* GetContext() { - return transaction_impl_->GetContext(); - } - const ErrorCode& GetError() { - return transaction_impl_->GetError(); - } - ErrorCode Commit() { - return transaction_impl_->Commit(); - } - int64_t GetStartTimestamp() { - return transaction_impl_->GetStartTimestamp(); - } - int64_t GetCommitTimestamp() { - return transaction_impl_->GetCommitTimestamp(); - } - void Ack(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - transaction_impl_->Ack(t, row_key, column_family, qualifier); - } - void Notify(Table* t, - const std::string& row_key, - const std::string& column_family, - const std::string& qualifier) { - transaction_impl_->Notify(t, row_key, column_family, qualifier); - } - void SetIsolation(const IsolationLevel& isolation_level) { - transaction_impl_->SetIsolation(isolation_level); - } - IsolationLevel Isolation() { - return transaction_impl_->Isolation(); - } - void SetTimeout(int64_t timeout_ms) { - transaction_impl_->SetTimeout(timeout_ms); - } - std::shared_ptr GetTransactionPtr() { - return transaction_impl_; - } -private: - std::shared_ptr transaction_impl_; + public: + TransactionWrapper(const std::shared_ptr& transaction_impl) + : transaction_impl_(transaction_impl) {} + ~TransactionWrapper() {} + void ApplyMutation(RowMutation* row_mu) { transaction_impl_->ApplyMutation(row_mu); } + ErrorCode Get(RowReader* row_reader) { return transaction_impl_->Get(row_reader); } + void SetCommitCallback(Callback callback) { transaction_impl_->SetCommitCallback(callback); } + Callback GetCommitCallback() { return transaction_impl_->GetCommitCallback(); } + void SetContext(void* context) { transaction_impl_->SetContext(context); } + void* GetContext() { return transaction_impl_->GetContext(); } + const ErrorCode& GetError() { return transaction_impl_->GetError(); } + ErrorCode Commit() { return transaction_impl_->Commit(); } + int64_t GetStartTimestamp() { return transaction_impl_->GetStartTimestamp(); } + int64_t GetCommitTimestamp() { return transaction_impl_->GetCommitTimestamp(); } + void Ack(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) { + transaction_impl_->Ack(t, row_key, column_family, qualifier); + } + void Notify(Table* t, const std::string& row_key, const std::string& column_family, + const std::string& qualifier) { + transaction_impl_->Notify(t, row_key, column_family, qualifier); + } + void SetIsolation(const IsolationLevel& isolation_level) { + transaction_impl_->SetIsolation(isolation_level); + } + IsolationLevel Isolation() { return transaction_impl_->Isolation(); } + void SetTimeout(int64_t timeout_ms) { transaction_impl_->SetTimeout(timeout_ms); } + std::shared_ptr GetTransactionPtr() { return transaction_impl_; } + private: + std::shared_ptr transaction_impl_; }; -} // namespace tera +} // namespace tera -#endif // TERA_SDK_TRANSACTION_WRAPPER_H_ \ No newline at end of file +#endif // TERA_SDK_TRANSACTION_WRAPPER_H_ \ No newline at end of file diff --git a/src/sdk/value_filter.cc b/src/sdk/value_filter.cc new file mode 100644 index 000000000..fc33e9ee0 --- /dev/null +++ b/src/sdk/value_filter.cc @@ -0,0 +1,208 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include "tera.h" +#include "proto/filter.pb.h" +#include "filter_utils.h" +#include "gflags/gflags.h" +#include "glog/logging.h" + +namespace tera { +namespace filter { + +ValueFilter::ValueFilter() + : op_(CompareOperator::kNoOp), filter_if_missing_(false), match_status_(kNotMatchAnything) {} + +ValueFilter::ValueFilter(CompareOperator op, const FilterComparatorPtr& comparator) + : column_family_(""), + column_qualifier_(""), + op_(CompareOperator::kNoOp), + comparator_(comparator), + filter_if_missing_(false), + match_status_(kNotMatchAnything) { + if (op == CompareOperator::kLess || op == CompareOperator::kLessOrEqual || + op == CompareOperator::kEqual || op == CompareOperator::kNotEqual || + op == CompareOperator::kGreaterOrEqual || op == CompareOperator::kGreater) { + op_ = op; + } +} +ValueFilter::~ValueFilter() {} + +FilterType ValueFilter::Type() { return kValueFilter; } + +void ValueFilter::Reset() { match_status_ = kNotMatchAnything; } + +bool ValueFilter::MatchOp(int compare_result) { + switch (op_) { + case CompareOperator::kLess: + return compare_result < 0; + case CompareOperator::kLessOrEqual: + return compare_result <= 0; + case CompareOperator::kEqual: + return compare_result == 0; + case CompareOperator::kNotEqual: + return compare_result != 0; + case CompareOperator::kGreaterOrEqual: + return compare_result >= 0; + case CompareOperator::kGreater: + return compare_result > 0; + default: + LOG(ERROR) << "not support CompareOperator"; + return false; + } +} + +bool ValueFilter::MatchValue(const std::string& value) { + int compare_result = comparator_->CompareWith(value); + return MatchOp(compare_result); +} + +Filter::ReturnCode ValueFilter::FilterCell(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + /* + * The behavior of ValueFilter is different then qu is empty or not. + */ + if (column_qualifier_.empty()) { + /* + * The behavior for empty qu is compatible with old filter. The FilterCell will compare + * the value of each qu for the specified cf till the condition is not satisfied. + */ + return FilterCellWithEmptyQualifier(column_family, column_qualifier, value); + } else { + /* + * When the qu is specified, not empty. The FilterCell just does the comparation for the value + * of the specified cf and qu. + */ + return FilterCellWithNotEmptyQualifier(column_family, column_qualifier, value); + } +} + +Filter::ReturnCode ValueFilter::FilterCellWithEmptyQualifier(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + if (match_status_ == kMatchColumnButNotValue) { + return kNotIncludeCurAndLeftCellOfRow; + } + if (column_family != column_family_) { + return kIncludeCurCell; + } + if (MatchValue(value)) { + match_status_ = kMatchColumnAndValue; + return kIncludeCurCell; + } else { + match_status_ = kMatchColumnButNotValue; + return kNotIncludeCurAndLeftCellOfRow; + } +} + +Filter::ReturnCode ValueFilter::FilterCellWithNotEmptyQualifier(const std::string& column_family, + const std::string& column_qualifier, + const std::string& value) { + if (match_status_ == kMatchColumnAndValue) { + return kIncludeCurCell; + } else if (match_status_ == kMatchColumnButNotValue) { + return kNotIncludeCurAndLeftCellOfRow; + } + if (column_family != column_family_ || column_qualifier != column_qualifier_) { + return kIncludeCurCell; + } + if (MatchValue(value)) { + match_status_ = kMatchColumnAndValue; + return kIncludeCurCell; + } else { + match_status_ = kMatchColumnButNotValue; + return kNotIncludeCurAndLeftCellOfRow; + } +} + +bool ValueFilter::FilterRow() { + if (match_status_ == kNotMatchAnything) { + return filter_if_missing_; + } else if (match_status_ == kMatchColumnButNotValue) { + return true; + } else { + return false; + } +} + +bool ValueFilter::SerializeTo(std::string* serialized_filter) { + ValueFilterDesc value_filter_desc; + value_filter_desc.set_column_family(column_family_); + value_filter_desc.set_column_qualifier(column_qualifier_); + if (op_ == CompareOperator::kNoOp) { + LOG(ERROR) << "not support CompareOperator"; + return false; + } + value_filter_desc.set_compare_op(TransCompareOperator(op_)); + ComparatorDesc* comparator_desc = new ComparatorDesc(); + ComparatorDesc::ComparatorType comp_type = TransComparatorType(comparator_->Type()); + if (comp_type == ComparatorDesc::kUnknownComparator) { + return false; + } + comparator_desc->set_type(comp_type); + int ret = comparator_->SerializeTo(comparator_desc->mutable_serialized_comparator()); + if (!ret) { + return false; + } + value_filter_desc.set_allocated_comparator(comparator_desc); + value_filter_desc.set_filter_if_missing(filter_if_missing_); + return value_filter_desc.SerializeToString(serialized_filter); +} + +bool ValueFilter::ParseFrom(const std::string& serialized_filter) { + ValueFilterDesc value_filter_desc; + int ret = value_filter_desc.ParseFromString(serialized_filter); + if (!ret) { + return false; + } + column_family_ = value_filter_desc.column_family(); + column_qualifier_ = value_filter_desc.column_qualifier(); + op_ = TransCompareType(value_filter_desc.compare_op()); + if (op_ == CompareOperator::kNoOp) { + LOG(ERROR) << "not support CompareOperator"; + return false; + } + switch (value_filter_desc.comparator().type()) { + case ComparatorDesc::kIntegerComparator: + comparator_ = std::make_shared(); + break; + case ComparatorDesc::kDecimalComparator: + comparator_ = std::make_shared(); + break; + case ComparatorDesc::kBinaryComparator: + comparator_ = std::make_shared(); + break; + default: + LOG(WARNING) << "not support comparator type"; + return false; + } + ret = comparator_->ParseFrom(value_filter_desc.comparator().serialized_comparator()); + if (!ret) { + return false; + } + filter_if_missing_ = value_filter_desc.filter_if_missing(); + return true; +} + +void ValueFilter::SetColumnFamily(const std::string& column_family) { + column_family_ = column_family; +} + +void ValueFilter::SetColumnQualifier(const std::string& column_qualifier) { + column_qualifier_ = column_qualifier; +} + +void ValueFilter::SetFilterIfMissing(bool filter_if_missing) { + filter_if_missing_ = filter_if_missing; +} + +void ValueFilter::GetAllColumn(ColumnSet* column_set) { + column_set->insert(std::make_pair(column_family_, column_qualifier_)); +} + +} // namesapce filter +} // namesapce tera diff --git a/src/tabletnode/remote_tabletnode.cc b/src/tabletnode/remote_tabletnode.cc index 7f37f2146..b783554fa 100644 --- a/src/tabletnode/remote_tabletnode.cc +++ b/src/tabletnode/remote_tabletnode.cc @@ -14,63 +14,93 @@ #include "common/metric/ratio_subscriber.h" #include "common/metric/prometheus_subscriber.h" #include "common/metric/percentile_counter.h" +#include "common/timer.h" #include "tabletnode/tabletnode_impl.h" #include "tabletnode/tabletnode_metric_name.h" #include "utils/network_utils.h" -#include "common/timer.h" +#include "quota/ts_write_flow_controller.h" DECLARE_int32(tera_tabletnode_ctrl_thread_num); +DECLARE_int32(tera_tabletnode_lightweight_ctrl_thread_num); DECLARE_int32(tera_tabletnode_write_thread_num); DECLARE_int32(tera_tabletnode_read_thread_num); DECLARE_int32(tera_tabletnode_scan_thread_num); DECLARE_int32(tera_tabletnode_manual_compact_thread_num); DECLARE_int32(tera_request_pending_limit); DECLARE_int32(tera_scan_request_pending_limit); +DECLARE_string(tera_auth_policy); +DECLARE_double(tera_quota_unlimited_pending_ratio); +DECLARE_int32(tera_quota_scan_max_retry_times); +DECLARE_int32(tera_quota_scan_retry_delay_interval); +DECLARE_uint64(tera_quota_max_retry_queue_length); namespace tera { namespace tabletnode { -//Add SubscriberType::SUM for caculating SLA -tera::MetricCounter read_request_counter(kRequestCountMetric, kApiLabelRead, +// Add SubscriberType::SUM for caculating SLA +tera::MetricCounter read_request_counter(kRequestCountMetric, kApiLabelRead, {SubscriberType::QPS, SubscriberType::SUM}); -tera::MetricCounter write_request_counter(kRequestCountMetric, kApiLabelWrite, +tera::MetricCounter write_request_counter(kRequestCountMetric, kApiLabelWrite, {SubscriberType::QPS, SubscriberType::SUM}); tera::MetricCounter scan_request_counter(kRequestCountMetric, kApiLabelScan, {SubscriberType::QPS}); -tera::MetricCounter read_pending_counter(kPendingCountMetric, kApiLabelRead, {SubscriberType::LATEST}, false); -tera::MetricCounter write_pending_counter(kPendingCountMetric, kApiLabelWrite, {SubscriberType::LATEST}, false); -tera::MetricCounter scan_pending_counter(kPendingCountMetric, kApiLabelScan, {SubscriberType::LATEST}, false); -tera::MetricCounter compact_pending_counter(kPendingCountMetric, kApiLabelCompact, {SubscriberType::LATEST}, false); - -//Add SubscriberType::SUM for caculating SLA -tera::MetricCounter read_reject_counter(kRejectCountMetric, kApiLabelRead, +tera::MetricCounter read_pending_counter(kPendingCountMetric, kApiLabelRead, + {SubscriberType::LATEST}, false); +tera::MetricCounter write_pending_counter(kPendingCountMetric, kApiLabelWrite, + {SubscriberType::LATEST}, false); +tera::MetricCounter scan_pending_counter(kPendingCountMetric, kApiLabelScan, + {SubscriberType::LATEST}, false); +tera::MetricCounter compact_pending_counter(kPendingCountMetric, kApiLabelCompact, + {SubscriberType::LATEST}, false); + +// Add SubscriberType::SUM for caculating SLA +tera::MetricCounter read_reject_counter(kRejectCountMetric, kApiLabelRead, {SubscriberType::QPS, SubscriberType::SUM}); -tera::MetricCounter write_reject_counter(kRejectCountMetric, kApiLabelWrite, +tera::MetricCounter write_reject_counter(kRejectCountMetric, kApiLabelWrite, {SubscriberType::QPS, SubscriberType::SUM}); tera::MetricCounter scan_reject_counter(kRejectCountMetric, kApiLabelScan, {SubscriberType::QPS}); -tera::MetricCounter finished_read_request_counter(kFinishedRequestCountMetric, kApiLabelRead, {SubscriberType::QPS}); -tera::MetricCounter finished_write_request_counter(kFinishedRequestCountMetric, kApiLabelWrite, {SubscriberType::QPS}); -tera::MetricCounter finished_scan_request_counter(kFinishedRequestCountMetric, kApiLabelScan, {SubscriberType::QPS}); +tera::MetricCounter read_quota_rejest_counter(kQuotaRejectCountMetric, kApiLabelRead, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter write_quota_reject_counter(kQuotaRejectCountMetric, kApiLabelWrite, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter scan_quota_reject_counter(kQuotaRejectCountMetric, kApiLabelScan, + {SubscriberType::QPS}); + +tera::MetricCounter finished_read_request_counter(kFinishedRequestCountMetric, kApiLabelRead, + {SubscriberType::QPS}); +tera::MetricCounter finished_write_request_counter(kFinishedRequestCountMetric, kApiLabelWrite, + {SubscriberType::QPS}); +tera::MetricCounter finished_scan_request_counter(kFinishedRequestCountMetric, kApiLabelScan, + {SubscriberType::QPS}); tera::MetricCounter read_delay(kRequestDelayMetric, kApiLabelRead, {}); tera::MetricCounter write_delay(kRequestDelayMetric, kApiLabelWrite, {}); tera::MetricCounter scan_delay(kRequestDelayMetric, kApiLabelScan, {}); -tera::AutoSubscriberRegister rand_read_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( - MetricId(kRequestDelayAvgMetric, kApiLabelRead), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelRead), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelRead), SubscriberType::SUM))))); - -tera::AutoSubscriberRegister write_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( - MetricId(kRequestDelayAvgMetric, kApiLabelWrite), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelWrite), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelWrite), SubscriberType::SUM))))); - -tera::AutoSubscriberRegister scan_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( - MetricId(kRequestDelayAvgMetric, kApiLabelScan), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelScan), SubscriberType::SUM)), - std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelScan), SubscriberType::SUM))))); +tera::AutoSubscriberRegister rand_read_delay_per_request( + std::unique_ptr(new tera::RatioSubscriber( + MetricId(kRequestDelayAvgMetric, kApiLabelRead), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRequestDelayMetric, kApiLabelRead), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kFinishedRequestCountMetric, kApiLabelRead), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister write_delay_per_request( + std::unique_ptr(new tera::RatioSubscriber( + MetricId(kRequestDelayAvgMetric, kApiLabelWrite), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRequestDelayMetric, kApiLabelWrite), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kFinishedRequestCountMetric, kApiLabelWrite), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister scan_delay_per_request( + std::unique_ptr(new tera::RatioSubscriber( + MetricId(kRequestDelayAvgMetric, kApiLabelScan), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kRequestDelayMetric, kApiLabelScan), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber( + MetricId(kFinishedRequestCountMetric, kApiLabelScan), SubscriberType::SUM))))); tera::PercentileCounter write_95(kRequestDelayPercentileMetric, kWriteLabelPercentile95, 95); tera::PercentileCounter write_99(kRequestDelayPercentileMetric, kWriteLabelPercentile99, 99); @@ -79,472 +109,685 @@ tera::PercentileCounter read_99(kRequestDelayPercentileMetric, kReadLabelPercent tera::PercentileCounter scan_95(kRequestDelayPercentileMetric, kScanLabelPercentile95, 95); tera::PercentileCounter scan_99(kRequestDelayPercentileMetric, kScanLabelPercentile99, 99); - void ReadDoneWrapper::Run() { - int64_t now_us = get_micros(); - int64_t used_us = now_us - start_micros_; - int64_t row_num = request_->row_info_list_size(); - if (used_us <= 0) { - LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; - } - finished_read_request_counter.Add(row_num); - read_delay.Add(used_us); - if (row_num > 0) { - read_95.Append(used_us / row_num); - read_99.Append(used_us / row_num); + int64_t now_us = get_micros(); + int64_t used_us = now_us - start_micros_; + int64_t row_num = request_->row_info_list_size(); + if (used_us < 0) { + LOG(ERROR) << "now us: " << now_us << " start_us: " << start_micros_; + } + finished_read_request_counter.Add(row_num); + read_delay.Add(used_us); + if (row_num > 0) { + read_95.Append(used_us / row_num); + read_99.Append(used_us / row_num); + } + + // quota entry adjuest + if (response_->has_detail() && response_->success_num() > 0) { + int64_t success_num = response_->success_num(); + int64_t sum_read_bytes = 0; + int32_t row_result_size = response_->detail().row_result_size(); + for (int32_t row_result_index = 0; row_result_index < row_result_size; ++row_result_index) { + sum_read_bytes += response_->detail().row_result(row_result_index).ByteSize(); } - delete this; + quota_entry_->Adjust(request_->tablet_name(), kQuotaReadBytes, sum_read_bytes / success_num); + } + delete this; } void WriteDoneWrapper::Run() { + int64_t now_us = get_micros(); + int64_t used_us = now_us - start_micros_; + int64_t row_num = request_->row_list_size(); + if (used_us < 0) { + LOG(ERROR) << "now us: " << now_us << " start_us: " << start_micros_; + } + finished_write_request_counter.Add(row_num); + write_delay.Add(used_us); + if (row_num > 0) { + write_95.Append(used_us / row_num); + write_99.Append(used_us / row_num); + } + delete this; +} + +void ScanDoneWrapper::Run() { + if (response_->has_results()) { int64_t now_us = get_micros(); - int64_t used_us = now_us - start_micros_; - int64_t row_num = request_->row_list_size(); - if (used_us <= 0) { - LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; + int64_t used_us = now_us - start_micros_; + if (used_us < 0) { + LOG(ERROR) << "now us: " << now_us << " start_us: " << start_micros_; } - finished_write_request_counter.Add(row_num); - write_delay.Add(used_us); + int64_t row_num = response_->results().key_values_size(); + finished_scan_request_counter.Add(row_num); + scan_delay.Add(used_us); if (row_num > 0) { - write_95.Append(used_us / row_num); - write_99.Append(used_us / row_num); + scan_95.Append(used_us / row_num); + scan_99.Append(used_us / row_num); } - delete this; -} -void ScanDoneWrapper::Run() { - if (response_->has_results()) { - int64_t now_us = get_micros(); - int64_t used_us = now_us - start_micros_; - if (used_us <= 0) { - LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; - } - int64_t row_num = response_->results().key_values_size(); - finished_scan_request_counter.Add(row_num); - scan_delay.Add(used_us); - if (row_num > 0) { - scan_95.Append(used_us / row_num); - scan_99.Append(used_us / row_num); - } + if (response_->has_row_count() && response_->row_count() > 0) { + quota_entry_->Adjust(request_->table_name(), kQuotaScanReqs, response_->row_count()); } - delete this; + if (response_->has_data_size() && response_->data_size() > 0) { + quota_entry_->Adjust(request_->table_name(), kQuotaScanBytes, response_->data_size()); + } + } + delete this; } -enum RpcType { - RPC_READ = 1, - RPC_SCAN = 2 -}; +enum RpcType { RPC_READ = 1, RPC_SCAN = 2 }; struct ReadRpc : public RpcTask { - google::protobuf::RpcController* controller; - const ReadTabletRequest* request; - ReadTabletResponse* response; - google::protobuf::Closure* done; - ReadRpcTimer* timer; - int64_t start_micros; - - ReadRpc(google::protobuf::RpcController* ctrl, - const ReadTabletRequest* req, ReadTabletResponse* resp, - google::protobuf::Closure* done, ReadRpcTimer* timer, - int64_t start_micros) - : RpcTask(RPC_READ), controller(ctrl), request(req), - response(resp), done(done), timer(timer), + google::protobuf::RpcController* controller; + const ReadTabletRequest* request; + ReadTabletResponse* response; + google::protobuf::Closure* done; + ReadRpcTimer* timer; + int64_t start_micros; + + ReadRpc(google::protobuf::RpcController* ctrl, const ReadTabletRequest* req, + ReadTabletResponse* resp, google::protobuf::Closure* done, ReadRpcTimer* timer, + int64_t start_micros) + : RpcTask(RPC_READ), + controller(ctrl), + request(req), + response(resp), + done(done), + timer(timer), start_micros(start_micros) {} }; struct ScanRpc : public RpcTask { - google::protobuf::RpcController* controller; - const ScanTabletRequest* request; - ScanTabletResponse* response; - google::protobuf::Closure* done; - - ScanRpc(google::protobuf::RpcController* ctrl, - const ScanTabletRequest* req, ScanTabletResponse* resp, - google::protobuf::Closure* done) - : RpcTask(RPC_SCAN), controller(ctrl), request(req), - response(resp), done(done) {} + google::protobuf::RpcController* controller; + const ScanTabletRequest* request; + ScanTabletResponse* response; + google::protobuf::Closure* done; + int64_t retry_time; + + ScanRpc(google::protobuf::RpcController* ctrl, const ScanTabletRequest* req, + ScanTabletResponse* resp, google::protobuf::Closure* done) + : RpcTask(RPC_SCAN), + controller(ctrl), + request(req), + response(resp), + done(done), + retry_time(0) {} }; RemoteTabletNode::RemoteTabletNode(TabletNodeImpl* tabletnode_impl) : tabletnode_impl_(tabletnode_impl), ctrl_thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_ctrl_thread_num)), + lightweight_ctrl_thread_pool_( + new ThreadPool(FLAGS_tera_tabletnode_lightweight_ctrl_thread_num)), write_thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_write_thread_num)), read_thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_read_thread_num)), scan_thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_scan_thread_num)), compact_thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_manual_compact_thread_num)), read_rpc_schedule_(new RpcSchedule(new FairSchedulePolicy)), - scan_rpc_schedule_(new RpcSchedule(new FairSchedulePolicy)) {} + scan_rpc_schedule_(new RpcSchedule(new FairSchedulePolicy)), + quota_retry_rpc_schedule_(new RpcSchedule(new FairSchedulePolicy)), + access_entry_(new auth::AccessEntry(FLAGS_tera_auth_policy)), + quota_entry_(new quota::QuotaEntry) {} RemoteTabletNode::~RemoteTabletNode() {} void RemoteTabletNode::LoadTablet(google::protobuf::RpcController* controller, - const LoadTabletRequest* request, - LoadTabletResponse* response, + const LoadTabletRequest* request, LoadTabletResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (LoadTablet) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoLoadTablet, this, controller, - request, response, done); - ctrl_thread_pool_->AddTask(callback); + uint64_t id = request->sequence_id(); + response->set_sequence_id(id); + LOG(INFO) << "accept RPC (LoadTablet) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + const std::string& tablet_path = request->path(); + std::lock_guard lock(tablets_ctrl_mutex_); + if (tablets_ctrl_status_.find(tablet_path) != tablets_ctrl_status_.end()) { + ThreadPool::Task query_task = std::bind(&RemoteTabletNode::DoQueryTabletLoadStatus, this, + controller, request, response, done); + lightweight_ctrl_thread_pool_->AddTask(query_task); + return; + } + if (ctrl_thread_pool_->PendingNum() > FLAGS_tera_tabletnode_ctrl_thread_num) { + response->set_status(kTabletNodeIsBusy); + done->Run(); + return; + } + + tablets_ctrl_status_[tablet_path] = TabletCtrlStatus::kCtrlWaitLoad; + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoLoadTablet, this, controller, request, response, done); + ctrl_thread_pool_->AddTask(callback); } void RemoteTabletNode::UnloadTablet(google::protobuf::RpcController* controller, const UnloadTabletRequest* request, UnloadTabletResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (UnloadTablet) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoUnloadTablet, this, controller, - request, response, done); - ctrl_thread_pool_->AddTask(callback); + uint64_t id = request->sequence_id(); + response->set_sequence_id(id); + LOG(INFO) << "accept RPC (UnloadTablet) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + if (request->has_path()) { + std::lock_guard lock(tablets_ctrl_mutex_); + const std::string& tablet_path = request->path(); + if (tablets_ctrl_status_.find(tablet_path) != tablets_ctrl_status_.end()) { + ThreadPool::Task query_task = std::bind(&RemoteTabletNode::DoQueryTabletUnloadStatus, this, + controller, request, response, done); + lightweight_ctrl_thread_pool_->AddTask(query_task); + return; + } + } + + if (ctrl_thread_pool_->PendingNum() > FLAGS_tera_tabletnode_ctrl_thread_num) { + response->set_status(kTabletNodeIsBusy); + done->Run(); + return; + } + if (request->has_path()) { + tablets_ctrl_status_[request->path()] = TabletCtrlStatus::kCtrlWaitUnload; + } + + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoUnloadTablet, this, controller, request, response, done); + ctrl_thread_pool_->AddTask(callback); } void RemoteTabletNode::ReadTablet(google::protobuf::RpcController* controller, - const ReadTabletRequest* request, - ReadTabletResponse* response, + const ReadTabletRequest* request, ReadTabletResponse* response, google::protobuf::Closure* done) { - int64_t start_micros = get_micros(); - done = ReadDoneWrapper::NewInstance(start_micros, request, response, done); - VLOG(8) << "accept RPC (ReadTablet): [" << request->tablet_name() << "] " << tera::utils::GetRemoteAddress(controller); - static uint32_t last_print = time(NULL); - int32_t row_num = request->row_info_list_size(); - read_request_counter.Add(row_num); - if (read_pending_counter.Get() > FLAGS_tera_request_pending_limit) { + int64_t start_micros = get_micros(); + done = ReadDoneWrapper::NewInstance(start_micros, request, response, done, quota_entry_); + VLOG(8) << "accept RPC (ReadTablet): [" << request->tablet_name() << "] " + << tera::utils::GetRemoteAddress(controller); + static uint32_t last_print = time(NULL); + int32_t row_num = request->row_info_list_size(); + read_request_counter.Add(row_num); + if (read_pending_counter.Get() > FLAGS_tera_request_pending_limit) { + response->set_sequence_id(request->sequence_id()); + response->set_status(kTabletNodeIsBusy); + read_reject_counter.Add(row_num); + done->Run(); + uint32_t now_time = time(NULL); + if (now_time > last_print) { + LOG(WARNING) << "Too many pending read requests, return TabletNode Is Busy!"; + last_print = now_time; + } + VLOG(8) << "finish RPC (ReadTablet)"; + } else { + // check user identification & access + if (!access_entry_->VerifyAndAuthorize(request, response)) { + response->set_sequence_id(request->sequence_id()); + VLOG(20) << "Access VerifyAndAuthorize failed for ReadTablet"; + done->Run(); + return; + } + if (read_pending_counter.Get() >= + FLAGS_tera_request_pending_limit * FLAGS_tera_quota_unlimited_pending_ratio) { + if (!quota_entry_->CheckAndConsume( + request->tablet_name(), + quota::OpTypeAmountList{std::make_pair(kQuotaReadReqs, row_num)})) { response->set_sequence_id(request->sequence_id()); - response->set_status(kTabletNodeIsBusy); - read_reject_counter.Add(row_num); + response->set_status(kQuotaLimited); + read_quota_rejest_counter.Add(row_num); + VLOG(20) << "quota_entry check failed for ReadTablet"; done->Run(); - uint32_t now_time = time(NULL); - if (now_time > last_print) { - LOG(WARNING) << "Too many pending read requests, return TabletNode Is Busy!"; - last_print = now_time; - } - VLOG(8) << "finish RPC (ReadTablet)"; - } else { - read_pending_counter.Add(row_num); - ReadRpcTimer* timer = new ReadRpcTimer(request, response, done, start_micros); - RpcTimerList::Instance()->Push(timer); - - ReadRpc* rpc = new ReadRpc(controller, request, response, done, - timer, start_micros); - read_rpc_schedule_->EnqueueRpc(request->tablet_name(), rpc); - read_thread_pool_->AddTask(std::bind(&RemoteTabletNode::DoScheduleRpc, this, - read_rpc_schedule_.get())); + return; + } } + read_pending_counter.Add(row_num); + ReadRpcTimer* timer = new ReadRpcTimer(request, response, done, start_micros); + RpcTimerList::Instance()->Push(timer); + + ReadRpc* rpc = new ReadRpc(controller, request, response, done, timer, start_micros); + read_rpc_schedule_->EnqueueRpc(request->tablet_name(), rpc); + read_thread_pool_->AddTask( + std::bind(&RemoteTabletNode::DoScheduleRpc, this, read_rpc_schedule_.get())); + } } void RemoteTabletNode::WriteTablet(google::protobuf::RpcController* controller, - const WriteTabletRequest* request, - WriteTabletResponse* response, + const WriteTabletRequest* request, WriteTabletResponse* response, google::protobuf::Closure* done) { - int64_t start_micros = get_micros(); - done = WriteDoneWrapper::NewInstance(start_micros, request, response, done); - VLOG(8) << "accept RPC (WriteTablet): [" << request->tablet_name() << "] " << tera::utils::GetRemoteAddress(controller); - static uint32_t last_print = time(NULL); - int32_t row_num = request->row_list_size(); - write_request_counter.Add(row_num); - if (write_pending_counter.Get() > FLAGS_tera_request_pending_limit) { + int64_t start_micros = get_micros(); + done = WriteDoneWrapper::NewInstance(start_micros, request, response, done); + VLOG(8) << "accept RPC (WriteTablet): [" << request->tablet_name() << "] " + << tera::utils::GetRemoteAddress(controller); + static uint32_t last_print = time(NULL); + int32_t row_num = request->row_list_size(); + write_request_counter.Add(row_num); + if (write_pending_counter.Get() > FLAGS_tera_request_pending_limit) { + response->set_sequence_id(request->sequence_id()); + response->set_status(kTabletNodeIsBusy); + write_reject_counter.Add(row_num); + done->Run(); + uint32_t now_time = time(NULL); + if (now_time > last_print) { + LOG(WARNING) << "Too many pending write requests, return TabletNode Is Busy!"; + last_print = now_time; + } + VLOG(8) << "finish RPC (WriteTablet)"; + } else { + // check user identification & access + if (!access_entry_->VerifyAndAuthorize(request, response)) { + response->set_sequence_id(request->sequence_id()); + VLOG(20) << "Access VerifyAndAuthorize failed for WriteTablet"; + done->Run(); + return; + } + + // sum write bytes + int64_t sum_write_bytes = 0; + for (int32_t row_index = 0; row_index < row_num; ++row_index) { + sum_write_bytes += request->row_list(row_index).ByteSize(); + } + if (!TsWriteFlowController::Instance().TryWrite(sum_write_bytes)) { + response->set_sequence_id(request->sequence_id()); + response->set_status(kFlowControlLimited); + write_reject_counter.Add(row_num); + VLOG(20) << "Reject write request due to write flow controller"; + done->Run(); + return; + } + if (write_pending_counter.Get() >= + FLAGS_tera_request_pending_limit * FLAGS_tera_quota_unlimited_pending_ratio) { + if (!quota_entry_->CheckAndConsume( + request->tablet_name(), + quota::OpTypeAmountList{std::make_pair(kQuotaWriteReqs, row_num), + std::make_pair(kQuotaWriteBytes, sum_write_bytes)})) { response->set_sequence_id(request->sequence_id()); - response->set_status(kTabletNodeIsBusy); - write_reject_counter.Add(row_num); + response->set_status(kQuotaLimited); + write_quota_reject_counter.Add(row_num); + VLOG(20) << "quota_entry check failed for WriteTablet"; done->Run(); - uint32_t now_time = time(NULL); - if (now_time > last_print) { - LOG(WARNING) << "Too many pending write requests, return TabletNode Is Busy!"; - last_print = now_time; - } - VLOG(8) << "finish RPC (WriteTablet)"; + return; + } + } + write_pending_counter.Add(row_num); + WriteRpcTimer* timer = new WriteRpcTimer(request, response, done, start_micros); + RpcTimerList::Instance()->Push(timer); + ThreadPool::Task callback = std::bind(&RemoteTabletNode::DoWriteTablet, this, controller, + request, response, done, timer); + write_thread_pool_->AddTask(callback); + } +} + +bool RemoteTabletNode::DoQuotaScanRpcRetry(RpcTask* rpc) { + CHECK(rpc->rpc_type == RPC_SCAN); + ScanRpc* scan_rpc = (ScanRpc*)rpc; + std::string table_name = scan_rpc->request->table_name(); + if (!quota_entry_->CheckAndConsume(table_name, + quota::OpTypeAmountList{std::make_pair(kQuotaScanReqs, 1)})) { + if ((quota_retry_rpc_schedule_->GetPendingTaskCount() < + FLAGS_tera_quota_max_retry_queue_length) && + (++scan_rpc->retry_time < FLAGS_tera_quota_scan_max_retry_times)) { + quota_retry_rpc_schedule_->EnqueueRpc(table_name, rpc); + scan_thread_pool_->DelayTask(FLAGS_tera_quota_scan_retry_delay_interval, // default 100ms + std::bind(&RemoteTabletNode::DoQuotaRetryScheduleRpc, this, + quota_retry_rpc_schedule_.get())); } else { - write_pending_counter.Add(row_num); - WriteRpcTimer* timer = new WriteRpcTimer(request, response, done, start_micros); - RpcTimerList::Instance()->Push(timer); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoWriteTablet, this, - controller, request, response, done, timer); - write_thread_pool_->AddTask(callback); + scan_rpc->response->set_sequence_id(scan_rpc->request->sequence_id()); + scan_rpc->response->set_status(kQuotaLimited); + scan_quota_reject_counter.Inc(); + VLOG(20) << "quota_entry check failed for ScanTablet"; + scan_rpc->done->Run(); + delete rpc; } + return false; + } + return true; +} + +void RemoteTabletNode::DoQuotaRetryScheduleRpc(RpcSchedule* rpc_schedule) { + RpcTask* rpc = NULL; + bool status = rpc_schedule->DequeueRpc(&rpc); + CHECK(status); + if (!DoQuotaScanRpcRetry(rpc)) { + return; + } + CHECK(rpc->rpc_type == RPC_SCAN); + ScanRpc* scan_rpc = (ScanRpc*)rpc; + std::string table_name = scan_rpc->request->table_name(); + DoScanTablet(scan_rpc->controller, scan_rpc->request, scan_rpc->response, scan_rpc->done); + delete rpc; + status = rpc_schedule->FinishRpc(table_name); + CHECK(status); } void RemoteTabletNode::ScanTablet(google::protobuf::RpcController* controller, - const ScanTabletRequest* request, - ScanTabletResponse* response, + const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - done = ScanDoneWrapper::NewInstance(get_micros(), request, response, done); - VLOG(8) << "accept RPC (ScanTablet): [" << request->table_name() << "] " << tera::utils::GetRemoteAddress(controller); - scan_request_counter.Inc(); - if (scan_pending_counter.Get() > FLAGS_tera_scan_request_pending_limit) { - response->set_sequence_id(request->sequence_id()); - response->set_status(kTabletNodeIsBusy); - scan_reject_counter.Inc(); - done->Run(); - VLOG(8) << "finish RPC (ScanTablet)"; - } else { - scan_pending_counter.Inc(); - ScanRpc* rpc = new ScanRpc(controller, request, response, done); - scan_rpc_schedule_->EnqueueRpc(request->table_name(), rpc); - scan_thread_pool_->AddTask(std::bind(&RemoteTabletNode::DoScheduleRpc, - this, scan_rpc_schedule_.get())); + done = ScanDoneWrapper::NewInstance(get_micros(), request, response, done, quota_entry_); + VLOG(8) << "accept RPC (ScanTablet): [" << request->table_name() << "] " + << tera::utils::GetRemoteAddress(controller); + scan_request_counter.Inc(); + if (scan_pending_counter.Get() > FLAGS_tera_scan_request_pending_limit) { + response->set_sequence_id(request->sequence_id()); + response->set_status(kTabletNodeIsBusy); + scan_reject_counter.Inc(); + done->Run(); + VLOG(8) << "finish RPC (ScanTablet)"; + } else { + // check user identification & access + if (!access_entry_->VerifyAndAuthorize(request, response)) { + response->set_sequence_id(request->sequence_id()); + VLOG(20) << "Access VerifyAndAuthorize failed for ScanTablet"; + done->Run(); + return; } + ScanRpc* rpc = new ScanRpc(controller, request, response, done); + if (scan_pending_counter.Get() >= + FLAGS_tera_request_pending_limit * FLAGS_tera_quota_unlimited_pending_ratio) { + if (!DoQuotaScanRpcRetry(rpc)) { + VLOG(8) << "ScanTablet Rpc push to QuotaRetry queue"; + return; + } + } + scan_pending_counter.Inc(); + scan_rpc_schedule_->EnqueueRpc(request->table_name(), rpc); + scan_thread_pool_->AddTask( + std::bind(&RemoteTabletNode::DoScheduleRpc, this, scan_rpc_schedule_.get())); + } } void RemoteTabletNode::Query(google::protobuf::RpcController* controller, - const QueryRequest* request, - QueryResponse* response, + const QueryRequest* request, QueryResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (Query) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoQuery, this, controller, - request, response, done); - ctrl_thread_pool_->AddPriorityTask(callback); + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (Query) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoQuery, this, controller, request, response, done); + lightweight_ctrl_thread_pool_->AddPriorityTask(callback); } void RemoteTabletNode::CmdCtrl(google::protobuf::RpcController* controller, - const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, + const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (CmdCtrl) id: " << id << ", [" << request->command() - << "] src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoCmdCtrl, this, controller, - request, response, done); - ctrl_thread_pool_->AddPriorityTask(callback); -} - -void RemoteTabletNode::SplitTablet(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (SplitTablet) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoSplitTablet, this, controller, - request, response, done); - ctrl_thread_pool_->AddTask(callback); + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (CmdCtrl) id: " << id << ", [" << request->command() + << "] src: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoCmdCtrl, this, controller, request, response, done); + lightweight_ctrl_thread_pool_->AddPriorityTask(callback); } void RemoteTabletNode::ComputeSplitKey(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (ComputeSplitKey) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoComputeSplitKey, this, controller, - request, response, done); - ctrl_thread_pool_->AddTask(callback); + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (ComputeSplitKey) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoComputeSplitKey, this, controller, request, response, done); + lightweight_ctrl_thread_pool_->AddTask(callback); } void RemoteTabletNode::CompactTablet(google::protobuf::RpcController* controller, - const CompactTabletRequest* request, - CompactTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (CompactTablet) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - compact_pending_counter.Inc(); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoCompactTablet, this, controller, - request, response, done); - compact_thread_pool_->AddTask(callback); + const CompactTabletRequest* request, + CompactTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (CompactTablet) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + compact_pending_counter.Inc(); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoCompactTablet, this, controller, request, response, done); + // Reject all manual compact request when slowdown mode triggered. + if (TsWriteFlowController::Instance().InSlowdownMode()) { + LOG(WARNING) << "compact fail: " << request->tablet_name() + << " due to slowdown write mode triggered."; + response->set_sequence_id(request->sequence_id()); + response->set_status(kFlowControlLimited); + + done->Run(); + return; + } + compact_thread_pool_->AddTask(callback); } void RemoteTabletNode::Update(google::protobuf::RpcController* controller, - const UpdateRequest* request, - UpdateResponse* response, + const UpdateRequest* request, UpdateResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (Update) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); - ThreadPool::Task callback = - std::bind(&RemoteTabletNode::DoUpdate, this, controller, - request, response, done); - ctrl_thread_pool_->AddTask(callback); + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (Update) id: " << id + << ", src: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoUpdate, this, controller, request, response, done); + lightweight_ctrl_thread_pool_->AddTask(callback); } std::string RemoteTabletNode::ProfilingLog() { - return "ctrl " + ctrl_thread_pool_->ProfilingLog() - + " read " + read_thread_pool_->ProfilingLog() - + " write " + write_thread_pool_->ProfilingLog() - + " scan " + scan_thread_pool_->ProfilingLog() - + " compact " + compact_thread_pool_->ProfilingLog(); + return "ctrl " + lightweight_ctrl_thread_pool_->ProfilingLog() + " read " + + read_thread_pool_->ProfilingLog() + " write " + write_thread_pool_->ProfilingLog() + + " scan " + scan_thread_pool_->ProfilingLog() + " compact " + + compact_thread_pool_->ProfilingLog(); +} + +void RemoteTabletNode::DoQueryTabletLoadStatus(google::protobuf::RpcController* controller, + const LoadTabletRequest* request, + LoadTabletResponse* response, + google::protobuf::Closure* done) { + const std::string& tablet_path = request->path(); + { + std::lock_guard lock(tablets_ctrl_mutex_); + if (tablets_ctrl_status_.find(tablet_path) != tablets_ctrl_status_.end()) { + response->set_status(static_cast(tablets_ctrl_status_[tablet_path])); + done->Run(); + return; + } + } + + const std::string& start_key = request->key_range().key_start(); + const std::string& end_key = request->key_range().key_end(); + StatusCode status = tabletnode_impl_->QueryTabletStatus(tablet_path, start_key, end_key); + response->set_status(status); + done->Run(); +} + +void RemoteTabletNode::DoQueryTabletUnloadStatus(google::protobuf::RpcController* controller, + const UnloadTabletRequest* request, + UnloadTabletResponse* response, + google::protobuf::Closure* done) { + const std::string& tablet_path = request->path(); + { + std::lock_guard lock(tablets_ctrl_mutex_); + if (tablets_ctrl_status_.find(tablet_path) != tablets_ctrl_status_.end()) { + response->set_status(static_cast(tablets_ctrl_status_[tablet_path])); + done->Run(); + return; + } + } + + const std::string& start_key = request->key_range().key_start(); + const std::string& end_key = request->key_range().key_end(); + StatusCode status = + tabletnode_impl_->QueryTabletStatus(request->tablet_name(), start_key, end_key); + response->set_status(status); + done->Run(); } void RemoteTabletNode::DoLoadTablet(google::protobuf::RpcController* controller, - const LoadTabletRequest* request, - LoadTabletResponse* response, + const LoadTabletRequest* request, LoadTabletResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "run RPC (LoadTablet) id: " << id; - tabletnode_impl_->LoadTablet(request, response, done); - LOG(INFO) << "finish RPC (LoadTablet) id: " << id; + uint64_t id = request->sequence_id(); + LOG(INFO) << "run RPC (LoadTablet) id: " << id; + { + std::lock_guard lock(tablets_ctrl_mutex_); + tablets_ctrl_status_[request->path()] = TabletCtrlStatus::kCtrlOnLoad; + } + tabletnode_impl_->LoadTablet(request, response); + { + std::lock_guard lock(tablets_ctrl_mutex_); + tablets_ctrl_status_.erase(request->path()); + } + LOG(INFO) << "finish RPC (LoadTablet) id: " << id; + done->Run(); } void RemoteTabletNode::DoUnloadTablet(google::protobuf::RpcController* controller, const UnloadTabletRequest* request, UnloadTabletResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "run RPC (UnloadTablet) id: " << id; - tabletnode_impl_->UnloadTablet(request, response, done); - LOG(INFO) << "finish RPC (UnloadTablet) id: " << id; + uint64_t id = request->sequence_id(); + LOG(INFO) << "run RPC (UnloadTablet) id: " << id; + std::string tablet_path; + if (request->has_path()) { + tablet_path = request->path(); + std::lock_guard lock(tablets_ctrl_mutex_); + tablets_ctrl_status_[tablet_path] = TabletCtrlStatus::kCtrlUnloading; + } + + tabletnode_impl_->UnloadTablet(request, response); + { + std::lock_guard lock(tablets_ctrl_mutex_); + tablets_ctrl_status_.erase(tablet_path); + } + LOG(INFO) << "finish RPC (UnloadTablet) id: " << id; + done->Run(); } void RemoteTabletNode::DoReadTablet(google::protobuf::RpcController* controller, - int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done, + int64_t start_micros, const ReadTabletRequest* request, + ReadTabletResponse* response, google::protobuf::Closure* done, ReadRpcTimer* timer) { - VLOG(8) << "run RPC (ReadTablet)"; - int32_t row_num = request->row_info_list_size(); - read_pending_counter.Sub(row_num); - - bool is_read_timeout = false; - if (request->has_client_timeout_ms()) { - int64_t read_timeout = request->client_timeout_ms() * 1000; // ms -> us - int64_t detal = get_micros() - start_micros; - if (detal > read_timeout) { - LOG(WARNING) << "timeout, drop read request for:" << request->tablet_name() - << ", detal(in us):" << detal - << ", read_timeout(in us):" << read_timeout; - is_read_timeout = true; - } + VLOG(8) << "run RPC (ReadTablet)"; + int32_t row_num = request->row_info_list_size(); + read_pending_counter.Sub(row_num); + + bool is_read_timeout = false; + if (request->has_client_timeout_ms()) { + int64_t read_timeout = request->client_timeout_ms() * 1000; // ms -> us + int64_t detal = get_micros() - start_micros; + if (detal > read_timeout) { + LOG(WARNING) << "timeout, drop read request for:" << request->tablet_name() + << ", detal(in us):" << detal << ", read_timeout(in us):" << read_timeout; + is_read_timeout = true; } - - if (!is_read_timeout) { - tabletnode_impl_->ReadTablet(start_micros, request, response, done); - } else { - response->set_sequence_id(request->sequence_id()); - response->set_success_num(0); - response->set_status(kTableIsBusy); - read_reject_counter.Inc(); - done->Run(); - } - - if (NULL != timer) { - RpcTimerList::Instance()->Erase(timer); - delete timer; - } - VLOG(8) << "finish RPC (ReadTablet)"; + } + + if (!is_read_timeout) { + tabletnode_impl_->ReadTablet(start_micros, request, response, done, read_thread_pool_.get()); + } else { + response->set_sequence_id(request->sequence_id()); + response->set_success_num(0); + response->set_status(kTableIsBusy); + read_reject_counter.Inc(); + done->Run(); + } + + if (NULL != timer) { + RpcTimerList::Instance()->Erase(timer); + delete timer; + } + VLOG(8) << "finish RPC (ReadTablet)"; } void RemoteTabletNode::DoWriteTablet(google::protobuf::RpcController* controller, const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, + WriteTabletResponse* response, google::protobuf::Closure* done, WriteRpcTimer* timer) { - VLOG(8) << "run RPC (WriteTablet)"; - int32_t row_num = request->row_list_size(); - write_pending_counter.Sub(row_num); - tabletnode_impl_->WriteTablet(request, response, done, timer); - VLOG(8) << "finish RPC (WriteTablet)"; + VLOG(8) << "run RPC (WriteTablet)"; + int32_t row_num = request->row_list_size(); + write_pending_counter.Sub(row_num); + tabletnode_impl_->WriteTablet(request, response, done, timer); + VLOG(8) << "finish RPC (WriteTablet)"; } void RemoteTabletNode::DoScanTablet(google::protobuf::RpcController* controller, - const ScanTabletRequest* request, - ScanTabletResponse* response, + const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - VLOG(8) << "run RPC (ScanTablet)"; - scan_pending_counter.Dec(); - tabletnode_impl_->ScanTablet(request, response, done); - VLOG(8) << "finish RPC (ScanTablet)"; + VLOG(8) << "run RPC (ScanTablet)"; + tabletnode_impl_->ScanTablet(request, response, done); + VLOG(8) << "finish RPC (ScanTablet)"; } void RemoteTabletNode::DoQuery(google::protobuf::RpcController* controller, - const QueryRequest* request, - QueryResponse* response, + const QueryRequest* request, QueryResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - int64_t start_micros = get_micros(); - LOG(INFO) << "run RPC (Query) id: " << id; - tabletnode_impl_->Query(request, response, done); - LOG(INFO) << "finish RPC (Query) id: " << id - << ", cost " << (get_micros() - start_micros) / 1000 << "ms."; + uint64_t id = request->sequence_id(); + int64_t start_micros = get_micros(); + LOG(INFO) << "run RPC (Query) id: " << id; + access_entry_->GetAccessUpdater().UpdateTs(request, response); + + // Reset Quota iif version dismatch + quota_entry_->Update(request, response); + + tabletnode_impl_->Query(request, response, done); + + LOG(INFO) << "finish RPC (Query) id: " << id << ", cost " << (get_micros() - start_micros) / 1000 + << "ms."; } void RemoteTabletNode::DoCmdCtrl(google::protobuf::RpcController* controller, - const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, + const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - int64_t start_micros = get_micros(); - LOG(INFO) << "run RPC (CmdCtrl) id: " << id; - tabletnode_impl_->CmdCtrl(request, response, done); - LOG(INFO) << "finish RPC (CmdCtrl) id: " << id - << ", cost " << (get_micros() - start_micros) / 1000 << "ms."; -} - -void RemoteTabletNode::DoSplitTablet(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "run RPC (SplitTablet) id: " << id; - tabletnode_impl_->SplitTablet(request, response, done); - LOG(INFO) << "finish RPC (SplitTablet) id: " << id; + uint64_t id = request->sequence_id(); + int64_t start_micros = get_micros(); + LOG(INFO) << "run RPC (CmdCtrl) id: " << id; + tabletnode_impl_->CmdCtrl(request, response, done); + LOG(INFO) << "finish RPC (CmdCtrl) id: " << id << ", cost " + << (get_micros() - start_micros) / 1000 << "ms."; } void RemoteTabletNode::DoComputeSplitKey(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "run RPC (ComputeSplitKey) id: " << id; - tabletnode_impl_->ComputeSplitKey(request, response, done); - LOG(INFO) << "finish RPC (ComputeSplitKey) id: " << id; + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "run RPC (ComputeSplitKey) id: " << id; + tabletnode_impl_->ComputeSplitKey(request, response, done); + LOG(INFO) << "finish RPC (ComputeSplitKey) id: " << id; } void RemoteTabletNode::DoCompactTablet(google::protobuf::RpcController* controller, - const CompactTabletRequest* request, - CompactTabletResponse* response, - google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "run RPC (CompactTablet) id: " << id; - compact_pending_counter.Dec(); - tabletnode_impl_->CompactTablet(request, response, done); - LOG(INFO) << "finish RPC (CompactTablet) id: " << id; + const CompactTabletRequest* request, + CompactTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "run RPC (CompactTablet) id: " << id; + compact_pending_counter.Dec(); + tabletnode_impl_->CompactTablet(request, response, done); + LOG(INFO) << "finish RPC (CompactTablet) id: " << id; } void RemoteTabletNode::DoUpdate(google::protobuf::RpcController* controller, - const UpdateRequest* request, - UpdateResponse* response, + const UpdateRequest* request, UpdateResponse* response, google::protobuf::Closure* done) { - uint64_t id = request->sequence_id(); - LOG(INFO) << "accept RPC (Update) id: " << id; - tabletnode_impl_->Update(request, response, done); - LOG(INFO) << "finish RPC (Update) id: " << id; + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (Update) id: " << id; + tabletnode_impl_->Update(request, response, done); + LOG(INFO) << "finish RPC (Update) id: " << id; } void RemoteTabletNode::DoScheduleRpc(RpcSchedule* rpc_schedule) { - RpcTask* rpc = NULL; - bool status = rpc_schedule->DequeueRpc(&rpc); - CHECK(status); - std::string table_name; + RpcTask* rpc = NULL; + bool status = rpc_schedule->DequeueRpc(&rpc); + CHECK(status); + std::string table_name; - switch (rpc->rpc_type) { + switch (rpc->rpc_type) { case RPC_READ: { - ReadRpc* read_rpc = (ReadRpc*)rpc; - table_name = read_rpc->request->tablet_name(); - DoReadTablet(read_rpc->controller, read_rpc->start_micros, - read_rpc->request, read_rpc->response, - read_rpc->done,read_rpc->timer); + ReadRpc* read_rpc = (ReadRpc*)rpc; + table_name = read_rpc->request->tablet_name(); + DoReadTablet(read_rpc->controller, read_rpc->start_micros, read_rpc->request, + read_rpc->response, read_rpc->done, read_rpc->timer); } break; case RPC_SCAN: { - ScanRpc* scan_rpc = (ScanRpc*)rpc; - table_name = scan_rpc->request->table_name(); - DoScanTablet(scan_rpc->controller, scan_rpc->request, - scan_rpc->response, scan_rpc->done); + ScanRpc* scan_rpc = (ScanRpc*)rpc; + table_name = scan_rpc->request->table_name(); + scan_pending_counter.Dec(); + DoScanTablet(scan_rpc->controller, scan_rpc->request, scan_rpc->response, scan_rpc->done); } break; default: - abort(); - } + abort(); + } - delete rpc; - status = rpc_schedule->FinishRpc(table_name); - CHECK(status); + delete rpc; + status = rpc_schedule->FinishRpc(table_name); + CHECK(status); } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/remote_tabletnode.h b/src/tabletnode/remote_tabletnode.h index aecadf9ff..5dff58784 100644 --- a/src/tabletnode/remote_tabletnode.h +++ b/src/tabletnode/remote_tabletnode.h @@ -5,6 +5,7 @@ #ifndef TERA_TABLETNODE_REMOTE_TABLETNODE_H_ #define TERA_TABLETNODE_REMOTE_TABLETNODE_H_ +#include #include "common/base/scoped_ptr.h" #include "common/thread_pool.h" #include "common/request_done_wrapper.h" @@ -12,236 +13,223 @@ #include "proto/tabletnode_rpc.pb.h" #include "tabletnode/rpc_schedule.h" #include "utils/rpc_timer_list.h" +#include "access/access_entry.h" +#include "quota/quota_entry.h" namespace tera { namespace tabletnode { class TabletNodeImpl; - class ReadDoneWrapper final : public RequestDoneWrapper { -public: - static google::protobuf::Closure* NewInstance(int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done) { - return new ReadDoneWrapper(start_micros, request, response, done); - } - - virtual void Run() override; - - virtual ~ReadDoneWrapper() {} - -protected: - //Just Can Create on Heap; - ReadDoneWrapper(int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done): - RequestDoneWrapper(done), + public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + const ReadTabletRequest* request, + ReadTabletResponse* response, + google::protobuf::Closure* done, + std::shared_ptr quota_entry) { + return new ReadDoneWrapper(start_micros, request, response, done, quota_entry); + } + + virtual void Run() override; + + virtual ~ReadDoneWrapper() {} + + protected: + // Just Can Create on Heap; + ReadDoneWrapper(int64_t start_micros, const ReadTabletRequest* request, + ReadTabletResponse* response, google::protobuf::Closure* done, + std::shared_ptr quota_entry) + : RequestDoneWrapper(done), start_micros_(start_micros), request_(request), - response_(response) {} + response_(response), + quota_entry_(quota_entry) {} - int64_t start_micros_; - const ReadTabletRequest* request_; - ReadTabletResponse* response_; + int64_t start_micros_; + const ReadTabletRequest* request_; + ReadTabletResponse* response_; + std::shared_ptr quota_entry_; }; class WriteDoneWrapper final : public RequestDoneWrapper { -public: - static google::protobuf::Closure* NewInstance(int64_t start_micros, - const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done) { - return new WriteDoneWrapper(start_micros, request, response, done); - } - - virtual void Run() override; - - virtual ~WriteDoneWrapper() {} - -protected: - //Just Can Create on Heap; - WriteDoneWrapper(int64_t start_micros, - const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done): - RequestDoneWrapper(done), + public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + const WriteTabletRequest* request, + WriteTabletResponse* response, + google::protobuf::Closure* done) { + return new WriteDoneWrapper(start_micros, request, response, done); + } + + virtual void Run() override; + + virtual ~WriteDoneWrapper() {} + + protected: + // Just Can Create on Heap; + WriteDoneWrapper(int64_t start_micros, const WriteTabletRequest* request, + WriteTabletResponse* response, google::protobuf::Closure* done) + : RequestDoneWrapper(done), start_micros_(start_micros), request_(request), response_(response) {} - int64_t start_micros_; - const WriteTabletRequest* request_; - WriteTabletResponse* response_; + int64_t start_micros_; + const WriteTabletRequest* request_; + WriteTabletResponse* response_; }; class ScanDoneWrapper final : public RequestDoneWrapper { -public: - static google::protobuf::Closure* NewInstance(int64_t start_micros, - const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done) { - return new ScanDoneWrapper(start_micros, request, response, done); - } - - virtual void Run() override; - - virtual ~ScanDoneWrapper() {} - -protected: - //Just Can Create on Heap; - ScanDoneWrapper(int64_t start_micros, - const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done): - RequestDoneWrapper(done), + public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + const ScanTabletRequest* request, + ScanTabletResponse* response, + google::protobuf::Closure* done, + std::shared_ptr quota_entry) { + return new ScanDoneWrapper(start_micros, request, response, done, quota_entry); + } + + virtual void Run() override; + + virtual ~ScanDoneWrapper() {} + + protected: + // Just Can Create on Heap; + ScanDoneWrapper(int64_t start_micros, const ScanTabletRequest* request, + ScanTabletResponse* response, google::protobuf::Closure* done, + std::shared_ptr quota_entry) + : RequestDoneWrapper(done), start_micros_(start_micros), request_(request), - response_(response) {} + response_(response), + quota_entry_(quota_entry) {} - int64_t start_micros_; - const ScanTabletRequest* request_; - ScanTabletResponse* response_; + int64_t start_micros_; + const ScanTabletRequest* request_; + ScanTabletResponse* response_; + std::shared_ptr quota_entry_; }; class RemoteTabletNode : public TabletNodeServer { -public: - explicit RemoteTabletNode(TabletNodeImpl* tabletnode_impl); - ~RemoteTabletNode(); - - void LoadTablet(google::protobuf::RpcController* controller, - const LoadTabletRequest* request, - LoadTabletResponse* response, - google::protobuf::Closure* done); - - void UnloadTablet(google::protobuf::RpcController* controller, - const UnloadTabletRequest* request, - UnloadTabletResponse* response, - google::protobuf::Closure* done); + public: + explicit RemoteTabletNode(TabletNodeImpl* tabletnode_impl); + ~RemoteTabletNode(); - void ReadTablet(google::protobuf::RpcController* controller, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done); + void LoadTablet(google::protobuf::RpcController* controller, const LoadTabletRequest* request, + LoadTabletResponse* response, google::protobuf::Closure* done); - void WriteTablet(google::protobuf::RpcController* controller, - const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done); + void UnloadTablet(google::protobuf::RpcController* controller, const UnloadTabletRequest* request, + UnloadTabletResponse* response, google::protobuf::Closure* done); - void ScanTablet(google::protobuf::RpcController* controller, - const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done); + void ReadTablet(google::protobuf::RpcController* controller, const ReadTabletRequest* request, + ReadTabletResponse* response, google::protobuf::Closure* done); - void Query(google::protobuf::RpcController* controller, - const QueryRequest* request, - QueryResponse* response, - google::protobuf::Closure* done); + void WriteTablet(google::protobuf::RpcController* controller, const WriteTabletRequest* request, + WriteTabletResponse* response, google::protobuf::Closure* done); - void SplitTablet(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done); + void ScanTablet(google::protobuf::RpcController* controller, const ScanTabletRequest* request, + ScanTabletResponse* response, google::protobuf::Closure* done); - void ComputeSplitKey(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done); + void Query(google::protobuf::RpcController* controller, const QueryRequest* request, + QueryResponse* response, google::protobuf::Closure* done); - void CompactTablet(google::protobuf::RpcController* controller, - const CompactTabletRequest* request, - CompactTabletResponse* response, + void ComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, SplitTabletResponse* response, google::protobuf::Closure* done); - void CmdCtrl(google::protobuf::RpcController* controller, - const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, - google::protobuf::Closure* done); - - void Update(google::protobuf::RpcController* controller, - const UpdateRequest* request, - UpdateResponse* response, - google::protobuf::Closure* done); - std::string ProfilingLog(); -private: - void DoLoadTablet(google::protobuf::RpcController* controller, - const LoadTabletRequest* request, - LoadTabletResponse* response, - google::protobuf::Closure* done); + void CompactTablet(google::protobuf::RpcController* controller, + const CompactTabletRequest* request, CompactTabletResponse* response, + google::protobuf::Closure* done); + + void CmdCtrl(google::protobuf::RpcController* controller, const TsCmdCtrlRequest* request, + TsCmdCtrlResponse* response, google::protobuf::Closure* done); + + void Update(google::protobuf::RpcController* controller, const UpdateRequest* request, + UpdateResponse* response, google::protobuf::Closure* done); + std::string ProfilingLog(); - void DoUnloadTablet(google::protobuf::RpcController* controller, - const UnloadTabletRequest* request, - UnloadTabletResponse* response, - google::protobuf::Closure* done); - - void DoReadTablet(google::protobuf::RpcController* controller, - int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done, - ReadRpcTimer* timer = NULL); - - void DoWriteTablet(google::protobuf::RpcController* controller, - const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, - WriteRpcTimer* timer = NULL); - - void DoQuery(google::protobuf::RpcController* controller, - const QueryRequest* request, QueryResponse* response, - google::protobuf::Closure* done); - - void DoScanTablet(google::protobuf::RpcController* controller, - const ScanTabletRequest* request, - ScanTabletResponse* response, + private: + void DoLoadTablet(google::protobuf::RpcController* controller, const LoadTabletRequest* request, + LoadTabletResponse* response, google::protobuf::Closure* done); + + void DoUnloadTablet(google::protobuf::RpcController* controller, + const UnloadTabletRequest* request, UnloadTabletResponse* response, google::protobuf::Closure* done); - void DoSplitTablet(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done); - - void DoComputeSplitKey(google::protobuf::RpcController* controller, - const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done); + void DoReadTablet(google::protobuf::RpcController* controller, int64_t start_micros, + const ReadTabletRequest* request, ReadTabletResponse* response, + google::protobuf::Closure* done, ReadRpcTimer* timer = NULL); - void DoMergeTablet(google::protobuf::RpcController* controller, - const MergeTabletRequest* request, - MergeTabletResponse* response, - google::protobuf::Closure* done); + void DoWriteTablet(google::protobuf::RpcController* controller, const WriteTabletRequest* request, + WriteTabletResponse* response, google::protobuf::Closure* done, + WriteRpcTimer* timer = NULL); + + void UpdateAuth(const QueryRequest* request, QueryResponse* response); + + void DoQuery(google::protobuf::RpcController* controller, const QueryRequest* request, + QueryResponse* response, google::protobuf::Closure* done); - void DoCompactTablet(google::protobuf::RpcController* controller, - const CompactTabletRequest* request, - CompactTabletResponse* response, + void DoQueryTabletLoadStatus(google::protobuf::RpcController* controller, + const LoadTabletRequest* request, LoadTabletResponse* response, + google::protobuf::Closure* done); + void DoQueryTabletUnloadStatus(google::protobuf::RpcController* controller, + const UnloadTabletRequest* request, UnloadTabletResponse* response, + google::protobuf::Closure* done); + + void DoScanTablet(google::protobuf::RpcController* controller, const ScanTabletRequest* request, + ScanTabletResponse* response, google::protobuf::Closure* done); + + void DoComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, SplitTabletResponse* response, google::protobuf::Closure* done); - void DoCmdCtrl(google::protobuf::RpcController* controller, - const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, - google::protobuf::Closure* done); - - void DoUpdate(google::protobuf::RpcController* controller, - const UpdateRequest* request, - UpdateResponse* response, - google::protobuf::Closure* done); - void DoScheduleRpc(RpcSchedule* rpc_schedule); - -private: - TabletNodeImpl* tabletnode_impl_; - scoped_ptr ctrl_thread_pool_; - scoped_ptr write_thread_pool_; - scoped_ptr read_thread_pool_; - scoped_ptr scan_thread_pool_; - scoped_ptr compact_thread_pool_; - scoped_ptr read_rpc_schedule_; - scoped_ptr scan_rpc_schedule_; + void DoMergeTablet(google::protobuf::RpcController* controller, const MergeTabletRequest* request, + MergeTabletResponse* response, google::protobuf::Closure* done); + + void DoCompactTablet(google::protobuf::RpcController* controller, + const CompactTabletRequest* request, CompactTabletResponse* response, + google::protobuf::Closure* done); + + void DoCmdCtrl(google::protobuf::RpcController* controller, const TsCmdCtrlRequest* request, + TsCmdCtrlResponse* response, google::protobuf::Closure* done); + + void DoUpdate(google::protobuf::RpcController* controller, const UpdateRequest* request, + UpdateResponse* response, google::protobuf::Closure* done); + void DoScheduleRpc(RpcSchedule* rpc_schedule); + + bool DoQuotaScanRpcRetry(RpcTask* rpc); + void DoQuotaRetryScheduleRpc(RpcSchedule* rpc_schedule); + + private: + TabletNodeImpl* tabletnode_impl_; + // do heavyweight kinds of ctrl tasks, such as tablet load/unload + scoped_ptr ctrl_thread_pool_; + // do some lightweight task, such as query, reload conf, update tablet schema, + // etc. + scoped_ptr lightweight_ctrl_thread_pool_; + scoped_ptr write_thread_pool_; + scoped_ptr read_thread_pool_; + scoped_ptr scan_thread_pool_; + scoped_ptr compact_thread_pool_; + scoped_ptr read_rpc_schedule_; + scoped_ptr scan_rpc_schedule_; + scoped_ptr quota_retry_rpc_schedule_; + + enum TabletCtrlStatus { + kCtrlWaitLoad = kTabletWaitLoad, + kCtrlOnLoad = kTabletOnLoad, + kCtrlWaitUnload = kTabletWaitUnload, + kCtrlUnloading = kTabletUnloading, + }; + + std::mutex tablets_ctrl_mutex_; + std::map tablets_ctrl_status_; + + std::unique_ptr access_entry_; + std::shared_ptr quota_entry_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_REMOTE_TABLETNODE_H_ +#endif // TERA_TABLETNODE_REMOTE_TABLETNODE_H_ diff --git a/src/tabletnode/rpc_compactor.h b/src/tabletnode/rpc_compactor.h index fb770da78..31e386579 100644 --- a/src/tabletnode/rpc_compactor.h +++ b/src/tabletnode/rpc_compactor.h @@ -15,74 +15,73 @@ namespace tera { namespace tabletnode { -template +template class RpcCompactor { -public: - RpcCompactor() - : last_success_sequence_id_(0), - curr_sequence_id_(0), - last_success_response_(new ResponseType()) {} - ~RpcCompactor() {} + public: + RpcCompactor() + : last_success_sequence_id_(0), + curr_sequence_id_(0), + last_success_response_(new ResponseType()) {} + ~RpcCompactor() {} - bool RpcExceptionHappened(uint64_t request_sequence_id, - ResponseType* response, - google::protobuf::Closure* done) { - MutexLock lock(&mutex_); - if (request_sequence_id < last_success_sequence_id_) { - LOG(ERROR) << "invalid sequence id: " << request_sequence_id - << ", last succeed sequence is: " << last_success_sequence_id_; - response->set_status(kInvalidSequenceId); - done->Run(); - return false; - } else if (request_sequence_id == last_success_sequence_id_) { - response->CopyFrom(*last_success_response_); - done->Run(); - return false; - } else if (request_sequence_id < curr_sequence_id_) { - LOG(ERROR) << "invalid sequence id: " << request_sequence_id - << ", current is: " << curr_sequence_id_; - response->set_status(kInvalidSequenceId); - done->Run(); - } else if (request_sequence_id == curr_sequence_id_) { - LOG(WARNING) << "same sequence id: " << request_sequence_id; - ResponseNode response_node(response, done); - done_list_.push_back(response_node); - return true; - } else { - LOG(WARNING) << "sequence id: " << request_sequence_id - << " should not larger than current: " << curr_sequence_id_; - } - CHECK(done_list_.size() == 0); - curr_sequence_id_ = request_sequence_id; - ResponseNode response_node(response, done); - done_list_.push_back(response_node); - - return true; + bool RpcExceptionHappened(uint64_t request_sequence_id, ResponseType* response, + google::protobuf::Closure* done) { + MutexLock lock(&mutex_); + if (request_sequence_id < last_success_sequence_id_) { + LOG(ERROR) << "invalid sequence id: " << request_sequence_id + << ", last succeed sequence is: " << last_success_sequence_id_; + response->set_status(kInvalidSequenceId); + done->Run(); + return false; + } else if (request_sequence_id == last_success_sequence_id_) { + response->CopyFrom(*last_success_response_); + done->Run(); + return false; + } else if (request_sequence_id < curr_sequence_id_) { + LOG(ERROR) << "invalid sequence id: " << request_sequence_id + << ", current is: " << curr_sequence_id_; + response->set_status(kInvalidSequenceId); + done->Run(); + } else if (request_sequence_id == curr_sequence_id_) { + LOG(WARNING) << "same sequence id: " << request_sequence_id; + ResponseNode response_node(response, done); + done_list_.push_back(response_node); + return true; + } else { + LOG(WARNING) << "sequence id: " << request_sequence_id + << " should not larger than current: " << curr_sequence_id_; } + CHECK(done_list_.size() == 0); + curr_sequence_id_ = request_sequence_id; + ResponseNode response_node(response, done); + done_list_.push_back(response_node); + + return true; + } - void FillResponseAndDone(ResponseType* response) { - MutexLock lock(&mutex_); - last_success_response_->CopyFrom(*response); - last_success_sequence_id_ = curr_sequence_id_; + void FillResponseAndDone(ResponseType* response) { + MutexLock lock(&mutex_); + last_success_response_->CopyFrom(*response); + last_success_sequence_id_ = curr_sequence_id_; - for (uint32_t i = 0; i < done_list_.size(); ++i) { - done_list_[i].first->CopyFrom(*response); - done_list_[i].second->Run(); - } - done_list_.clear(); + for (uint32_t i = 0; i < done_list_.size(); ++i) { + done_list_[i].first->CopyFrom(*response); + done_list_[i].second->Run(); } + done_list_.clear(); + } -private: - typedef std::pair ResponseNode; + private: + typedef std::pair ResponseNode; - mutable Mutex mutex_; - std::vector done_list_; - uint64_t last_success_sequence_id_; - uint64_t curr_sequence_id_; - scoped_ptr last_success_response_; + mutable Mutex mutex_; + std::vector done_list_; + uint64_t last_success_sequence_id_; + uint64_t curr_sequence_id_; + scoped_ptr last_success_response_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_RPC_COMPACTOR_H_ +#endif // TERA_TABLETNODE_RPC_COMPACTOR_H_ diff --git a/src/tabletnode/rpc_schedule.cc b/src/tabletnode/rpc_schedule.cc index ff4d7fc35..795a39cf4 100644 --- a/src/tabletnode/rpc_schedule.cc +++ b/src/tabletnode/rpc_schedule.cc @@ -12,86 +12,83 @@ namespace tabletnode { RpcSchedule::RpcSchedule(SchedulePolicy* policy) : policy_(policy), pending_task_count_(0), running_task_count_(0) {} -RpcSchedule::~RpcSchedule() { - delete policy_; -} +RpcSchedule::~RpcSchedule() { delete policy_; } void RpcSchedule::EnqueueRpc(const std::string& table_name, RpcTask* rpc) { - MutexLock lock(&mutex_); + MutexLock lock(&mutex_); - ScheduleEntity* entity = NULL; - TableList::iterator it = table_list_.find(table_name); - if (it != table_list_.end()) { - entity = it->second; - } else { - entity = table_list_[table_name] = policy_->NewScheEntity(new TaskQueue); - } + ScheduleEntity* entity = NULL; + TableList::iterator it = table_list_.find(table_name); + if (it != table_list_.end()) { + entity = it->second; + } else { + entity = table_list_[table_name] = policy_->NewScheEntity(new TaskQueue); + } - TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; - task_queue->push(rpc); + TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; + task_queue->push(rpc); - task_queue->pending_count++; - pending_task_count_++; + task_queue->pending_count++; + pending_task_count_++; - if (task_queue->pending_count == 1) { - policy_->Enable(entity); - } + if (task_queue->pending_count == 1) { + policy_->Enable(entity); + } } bool RpcSchedule::DequeueRpc(RpcTask** rpc) { - MutexLock lock(&mutex_); - if (pending_task_count_ == 0) { - return false; - } - - TableList::iterator it = policy_->Pick(&table_list_); - CHECK(it != table_list_.end()); - - ScheduleEntity* entity = (ScheduleEntity*)it->second; - TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; - CHECK_GT(task_queue->size(), 0U); - - *rpc = task_queue->front(); - task_queue->pop(); - - task_queue->pending_count--; - task_queue->running_count++; - pending_task_count_--; - running_task_count_++; - - if (task_queue->pending_count == 0) { - policy_->Disable(entity); - } - return true; + MutexLock lock(&mutex_); + if (pending_task_count_ == 0) { + return false; + } + + TableList::iterator it = policy_->Pick(&table_list_); + CHECK(it != table_list_.end()); + + ScheduleEntity* entity = (ScheduleEntity*)it->second; + TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; + CHECK_GT(task_queue->size(), 0U); + + *rpc = task_queue->front(); + task_queue->pop(); + + task_queue->pending_count--; + task_queue->running_count++; + pending_task_count_--; + running_task_count_++; + + if (task_queue->pending_count == 0) { + policy_->Disable(entity); + } + return true; } bool RpcSchedule::FinishRpc(const std::string& table_name) { - MutexLock lock(&mutex_); - if (running_task_count_ == 0) { - return false; - } - TableList::iterator it = table_list_.find(table_name); - if (it == table_list_.end()) { - return false; - } - - ScheduleEntity* entity = (ScheduleEntity*)it->second; - policy_->Done(entity); - - TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; - task_queue->running_count--; - running_task_count_--; - - if (task_queue->running_count == 0 - && task_queue->pending_count == 0) { - delete task_queue; - delete entity; - table_list_.erase(it); - } - return true; + MutexLock lock(&mutex_); + if (running_task_count_ == 0) { + return false; + } + TableList::iterator it = table_list_.find(table_name); + if (it == table_list_.end()) { + return false; + } + + ScheduleEntity* entity = (ScheduleEntity*)it->second; + policy_->Done(entity); + + TaskQueue* task_queue = (TaskQueue*)entity->user_ptr; + task_queue->running_count--; + running_task_count_--; + + if (task_queue->running_count == 0 && task_queue->pending_count == 0) { + delete task_queue; + delete entity; + table_list_.erase(it); + } + return true; } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/tabletnode/rpc_schedule.h b/src/tabletnode/rpc_schedule.h index 9e9c5d3ee..999fdd1d8 100644 --- a/src/tabletnode/rpc_schedule.h +++ b/src/tabletnode/rpc_schedule.h @@ -16,41 +16,43 @@ namespace tera { namespace tabletnode { struct RpcTask { - uint8_t rpc_type; - RpcTask(uint8_t type) : rpc_type(type) {} + uint8_t rpc_type; + RpcTask(uint8_t type) : rpc_type(type) {} }; class RpcSchedule { -public: - RpcSchedule(SchedulePolicy* policy); - ~RpcSchedule(); + public: + RpcSchedule(SchedulePolicy* policy); + ~RpcSchedule(); - void EnqueueRpc(const std::string& table_name, RpcTask* rpc); + void EnqueueRpc(const std::string& table_name, RpcTask* rpc); - bool DequeueRpc(RpcTask** rpc); + bool DequeueRpc(RpcTask** rpc); - bool FinishRpc(const std::string& table_name); + bool FinishRpc(const std::string& table_name); -private: - mutable Mutex mutex_; - SchedulePolicy* policy_; + uint64_t GetPendingTaskCount() { return pending_task_count_; } - typedef std::string TableName; - struct TaskQueue : public std::queue { - uint64_t pending_count; - uint64_t running_count; + private: + mutable Mutex mutex_; + SchedulePolicy* policy_; - TaskQueue() : pending_count(0), running_count(0) {} - }; + typedef std::string TableName; + struct TaskQueue : public std::queue { + uint64_t pending_count; + uint64_t running_count; - typedef std::map TableList; + TaskQueue() : pending_count(0), running_count(0) {} + }; - TableList table_list_; - uint64_t pending_task_count_; - uint64_t running_task_count_; + typedef std::map TableList; + + TableList table_list_; + uint64_t pending_task_count_; + uint64_t running_task_count_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera #endif // TERA_TABLETNODE_RPC_SCHEDULE_H_ diff --git a/src/tabletnode/rpc_schedule_policy.cc b/src/tabletnode/rpc_schedule_policy.cc index 99a897dee..3cb0c9c53 100644 --- a/src/tabletnode/rpc_schedule_policy.cc +++ b/src/tabletnode/rpc_schedule_policy.cc @@ -13,70 +13,69 @@ namespace tera { namespace tabletnode { -FairSchedulePolicy::FairSchedulePolicy() - : min_elapse_time_(0) {} +FairSchedulePolicy::FairSchedulePolicy() : min_elapse_time_(0) {} FairSchedulePolicy::~FairSchedulePolicy() {} ScheduleEntity* FairSchedulePolicy::NewScheEntity(void* user_ptr) { - return new FairScheduleEntity(user_ptr); + return new FairScheduleEntity(user_ptr); } SchedulePolicy::ScheduleEntityList::iterator FairSchedulePolicy::Pick( - ScheduleEntityList* entity_list) { - ScheduleEntityList::iterator pick = entity_list->end(); - int64_t min_elapse_time = INT64_MAX; - - ScheduleEntityList::iterator it = entity_list->begin(); - for (; it != entity_list->end(); ++it) { - FairScheduleEntity* entity = (FairScheduleEntity*)it->second; - UpdateEntity(entity); - if (!entity->pickable) { - continue; - } - if (min_elapse_time > entity->elapse_time) { - min_elapse_time = entity->elapse_time; - pick = it; - } + ScheduleEntityList* entity_list) { + ScheduleEntityList::iterator pick = entity_list->end(); + int64_t min_elapse_time = INT64_MAX; + + ScheduleEntityList::iterator it = entity_list->begin(); + for (; it != entity_list->end(); ++it) { + FairScheduleEntity* entity = (FairScheduleEntity*)it->second; + UpdateEntity(entity); + if (!entity->pickable) { + continue; } - - if (pick != entity_list->end()) { - FairScheduleEntity* pick_entity = (FairScheduleEntity*)pick->second; - pick_entity->running_count++; - min_elapse_time_ = min_elapse_time; + if (min_elapse_time > entity->elapse_time) { + min_elapse_time = entity->elapse_time; + pick = it; } - return pick; + } + + if (pick != entity_list->end()) { + FairScheduleEntity* pick_entity = (FairScheduleEntity*)pick->second; + pick_entity->running_count++; + min_elapse_time_ = min_elapse_time; + } + return pick; } void FairSchedulePolicy::Done(ScheduleEntity* entity) { - FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; - UpdateEntity(fair_entity); - CHECK_GE(fair_entity->running_count, 1); - fair_entity->running_count--; + FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; + UpdateEntity(fair_entity); + CHECK_GE(fair_entity->running_count, 1); + fair_entity->running_count--; } void FairSchedulePolicy::Enable(ScheduleEntity* entity) { - FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; - CHECK(!fair_entity->pickable); - fair_entity->pickable = true; - fair_entity->elapse_time += min_elapse_time_; + FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; + CHECK(!fair_entity->pickable); + fair_entity->pickable = true; + fair_entity->elapse_time += min_elapse_time_; } void FairSchedulePolicy::Disable(ScheduleEntity* entity) { - FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; - CHECK(fair_entity->pickable); - fair_entity->pickable = false; - CHECK_GE(fair_entity->elapse_time, min_elapse_time_); - fair_entity->elapse_time -= min_elapse_time_; + FairScheduleEntity* fair_entity = (FairScheduleEntity*)entity; + CHECK(fair_entity->pickable); + fair_entity->pickable = false; + CHECK_GE(fair_entity->elapse_time, min_elapse_time_); + fair_entity->elapse_time -= min_elapse_time_; } void FairSchedulePolicy::UpdateEntity(FairScheduleEntity* entity) { - int64_t now = get_micros(); - entity->elapse_time += (now - entity->last_update_time) * entity->running_count; - entity->last_update_time = now; + int64_t now = get_micros(); + entity->elapse_time += (now - entity->last_update_time) * entity->running_count; + entity->last_update_time = now; } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/tabletnode/rpc_schedule_policy.h b/src/tabletnode/rpc_schedule_policy.h index 665da7407..f99622d58 100644 --- a/src/tabletnode/rpc_schedule_policy.h +++ b/src/tabletnode/rpc_schedule_policy.h @@ -16,68 +16,68 @@ namespace tera { namespace tabletnode { struct ScheduleEntity { - void* user_ptr; + void* user_ptr; - ScheduleEntity(void* user_ptr) : user_ptr(user_ptr) {} - virtual ~ScheduleEntity() {} + ScheduleEntity(void* user_ptr) : user_ptr(user_ptr) {} + virtual ~ScheduleEntity() {} }; class SchedulePolicy { -public: - typedef std::string TableName; - typedef std::map ScheduleEntityList; + public: + typedef std::string TableName; + typedef std::map ScheduleEntityList; - SchedulePolicy() {} - virtual ~SchedulePolicy() {} + SchedulePolicy() {} + virtual ~SchedulePolicy() {} - virtual ScheduleEntity* NewScheEntity(void* user_ptr = NULL) = 0; + virtual ScheduleEntity* NewScheEntity(void* user_ptr = NULL) = 0; - virtual ScheduleEntityList::iterator Pick(ScheduleEntityList* entity_list) = 0; + virtual ScheduleEntityList::iterator Pick(ScheduleEntityList* entity_list) = 0; - virtual void Done(ScheduleEntity* entity) = 0; + virtual void Done(ScheduleEntity* entity) = 0; - virtual void Enable(ScheduleEntity* entity) = 0; + virtual void Enable(ScheduleEntity* entity) = 0; - virtual void Disable(ScheduleEntity* entity) = 0; + virtual void Disable(ScheduleEntity* entity) = 0; }; struct FairScheduleEntity : public ScheduleEntity { - bool pickable; - int64_t last_update_time; - int64_t elapse_time; - int64_t running_count; - - FairScheduleEntity(void* user_ptr) - : ScheduleEntity(user_ptr), - pickable(false), - last_update_time(0), - elapse_time(0), - running_count(0) {} + bool pickable; + int64_t last_update_time; + int64_t elapse_time; + int64_t running_count; + + FairScheduleEntity(void* user_ptr) + : ScheduleEntity(user_ptr), + pickable(false), + last_update_time(0), + elapse_time(0), + running_count(0) {} }; class FairSchedulePolicy : public SchedulePolicy { -public: - FairSchedulePolicy(); + public: + FairSchedulePolicy(); - ~FairSchedulePolicy(); + ~FairSchedulePolicy(); - ScheduleEntity* NewScheEntity(void* user_ptr = NULL); + ScheduleEntity* NewScheEntity(void* user_ptr = NULL); - ScheduleEntityList::iterator Pick(ScheduleEntityList* entity_list); + ScheduleEntityList::iterator Pick(ScheduleEntityList* entity_list); - void Done(ScheduleEntity* entity); + void Done(ScheduleEntity* entity); - void Enable(ScheduleEntity* entity); + void Enable(ScheduleEntity* entity); - void Disable(ScheduleEntity* entity); + void Disable(ScheduleEntity* entity); -private: - void UpdateEntity(FairScheduleEntity* entity); + private: + void UpdateEntity(FairScheduleEntity* entity); - int64_t min_elapse_time_; + int64_t min_elapse_time_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera #endif // TERA_TABLETNODE_RPC_SCHEDULE_POLICY_H_ diff --git a/src/tabletnode/tablet_manager.cc b/src/tabletnode/tablet_manager.cc index ba5099a68..bb49f8964 100644 --- a/src/tabletnode/tablet_manager.cc +++ b/src/tabletnode/tablet_manager.cc @@ -21,157 +21,146 @@ TabletManager::TabletManager() {} TabletManager::~TabletManager() {} -bool TabletManager::AddTablet(const std::string& table_name, - const std::string& table_path, - const std::string& key_start, - const std::string& key_end, - io::TabletIO** tablet_io, +bool TabletManager::AddTablet(const std::string& table_name, const std::string& table_path, + const std::string& key_start, const std::string& key_end, + int64_t ctime, uint64_t version, io::TabletIO** tablet_io, StatusCode* status) { - MutexLock lock(&mutex_); + MutexLock lock(&mutex_); - TabletRange tablet_range(table_name, key_start, key_end); - std::map::iterator it = - tablet_list_.find(tablet_range); - if (it != tablet_list_.end()) { - LOG(ERROR) << "tablet exist: " << table_name << ", " << key_start; - *tablet_io = it->second; - (*tablet_io)->AddRef(); - SetStatusCode(kTableExist, status); - return false; - } - *tablet_io = tablet_list_[tablet_range] = new io::TabletIO(key_start, key_end, table_path); + TabletRange tablet_range(table_name, key_start, key_end); + std::map::iterator it = tablet_list_.find(tablet_range); + if (it != tablet_list_.end()) { + LOG(ERROR) << "tablet exist: " << table_name << ", " << key_start; + *tablet_io = it->second; (*tablet_io)->AddRef(); - return true; -} - -bool TabletManager::RemoveTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status) { - io::TabletIO* tablet_io = NULL; - { - MutexLock lock(&mutex_); - std::map::iterator it = - tablet_list_.lower_bound(TabletRange(table_name, key_start, key_end)); - if (it == tablet_list_.end() || - it->first.table_name != table_name || - it->first.key_start != key_start || - it->first.key_end != key_end) { - LOG(ERROR) << "tablet not exist: " << table_name << " [" - << key_start << ", " << key_end << "]"; - SetStatusCode(kKeyNotInRange, status); - return false; - } - tablet_io = it->second; - tablet_list_.erase(it); - } - tablet_io->DecRef(); - return true; + SetStatusCode(kTableExist, status); + return false; + } + *tablet_io = tablet_list_[tablet_range] = + new io::TabletIO(key_start, key_end, table_path, ctime, version); + (*tablet_io)->AddRef(); + return true; } -io::TabletIO* TabletManager::GetTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status) { +bool TabletManager::RemoveTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status) { + io::TabletIO* tablet_io = NULL; + { MutexLock lock(&mutex_); std::map::iterator it = tablet_list_.lower_bound(TabletRange(table_name, key_start, key_end)); - if (it == tablet_list_.end() || - it->first.table_name != table_name || - it->first.key_start != key_start || - it->first.key_end != key_end) { - SetStatusCode(kKeyNotInRange, status); - return NULL; + if (it == tablet_list_.end() || it->first.table_name != table_name || + it->first.key_start != key_start || it->first.key_end != key_end) { + LOG(ERROR) << "tablet not exist: " << table_name << " [" << key_start << ", " << key_end + << "]"; + SetStatusCode(kKeyNotInRange, status); + return false; } + tablet_io = it->second; + tablet_list_.erase(it); + } + tablet_io->DecRef(); + return true; +} - it->second->AddRef(); - return it->second; +io::TabletIO* TabletManager::GetTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status) { + MutexLock lock(&mutex_); + std::map::iterator it = + tablet_list_.lower_bound(TabletRange(table_name, key_start, key_end)); + if (it == tablet_list_.end() || it->first.table_name != table_name || + it->first.key_start != key_start || it->first.key_end != key_end) { + SetStatusCode(kKeyNotInRange, status); + return NULL; + } + + it->second->AddRef(); + return it->second; } -io::TabletIO* TabletManager::GetTablet(const std::string& table_name, - const std::string& key, +io::TabletIO* TabletManager::GetTablet(const std::string& table_name, const std::string& key, StatusCode* status) { - MutexLock lock(&mutex_); - std::map::iterator it = - tablet_list_.upper_bound(TabletRange(table_name, key, key)); - if (it == tablet_list_.begin()) { - SetStatusCode(kKeyNotInRange, status); - return NULL; - } else { - --it; - } - const TabletRange& tablet_range = it->first; - if (tablet_range.table_name != table_name || - (tablet_range.key_end != "" && tablet_range.key_end <= key)) { - SetStatusCode(kKeyNotInRange, status); - return NULL; - } - - it->second->AddRef(); - return it->second; + MutexLock lock(&mutex_); + std::map::iterator it = + tablet_list_.upper_bound(TabletRange(table_name, key, key)); + if (it == tablet_list_.begin()) { + SetStatusCode(kKeyNotInRange, status); + return NULL; + } else { + --it; + } + const TabletRange& tablet_range = it->first; + if (tablet_range.table_name != table_name || + (tablet_range.key_end != "" && tablet_range.key_end <= key)) { + SetStatusCode(kKeyNotInRange, status); + return NULL; + } + + it->second->AddRef(); + return it->second; } void TabletManager::GetAllTabletMeta(std::vector* tablet_meta_list) { - MutexLock lock(&mutex_); - std::map::iterator it; - for (it = tablet_list_.begin(); it != tablet_list_.end(); ++it) { - const TabletRange& range = it->first; - io::TabletIO*& tablet_io = it->second; - if (tablet_io->GetStatus() != io::TabletIO::kReady) { - continue; - } - TabletMeta* tablet_meta = new TabletMeta; - tablet_meta->set_table_name(range.table_name); - tablet_meta->set_path(tablet_io->GetTablePath()); - tablet_meta->mutable_key_range()->set_key_start(range.key_start); - tablet_meta->mutable_key_range()->set_key_end(range.key_end); - tablet_meta->set_status(TabletMeta::TabletStatus(tablet_io->GetStatus())); - uint64_t size; - tablet_io->GetDataSize(&size); - tablet_meta->set_size(size); - tablet_meta->set_compact_status(tablet_io->GetCompactStatus()); - tablet_meta_list->push_back(tablet_meta); - //std::vector snapshots; - //tablet_io->ListSnapshot(&snapshots); - //for (uint32_t i = 0; i < snapshots.size(); ++i) { - // tablet_meta->add_snapshot_list(snapshots[i]); - //} + MutexLock lock(&mutex_); + std::map::iterator it; + for (it = tablet_list_.begin(); it != tablet_list_.end(); ++it) { + const TabletRange& range = it->first; + io::TabletIO*& tablet_io = it->second; + if (tablet_io->GetStatus() != io::TabletIO::kReady) { + continue; } + TabletMeta* tablet_meta = new TabletMeta; + tablet_meta->set_table_name(range.table_name); + tablet_meta->set_path(tablet_io->GetTablePath()); + tablet_meta->mutable_key_range()->set_key_start(range.key_start); + tablet_meta->mutable_key_range()->set_key_end(range.key_end); + tablet_meta->set_status(TabletMeta::TabletStatus(tablet_io->GetStatus())); + uint64_t size; + tablet_io->GetDataSize(&size); + tablet_meta->set_size(size); + tablet_meta->set_compact_status(tablet_io->GetCompactStatus()); + tablet_meta_list->push_back(tablet_meta); + // std::vector snapshots; + // tablet_io->ListSnapshot(&snapshots); + // for (uint32_t i = 0; i < snapshots.size(); ++i) { + // tablet_meta->add_snapshot_list(snapshots[i]); + //} + } } void TabletManager::GetAllTablets(std::vector* tablet_list) { - MutexLock lock(&mutex_); - std::map::iterator it; - for (it = tablet_list_.begin(); it != tablet_list_.end(); ++it) { - it->second->AddRef(); - tablet_list->push_back(it->second); - } + MutexLock lock(&mutex_); + std::map::iterator it; + for (it = tablet_list_.begin(); it != tablet_list_.end(); ++it) { + it->second->AddRef(); + tablet_list->push_back(it->second); + } } bool TabletManager::RemoveAllTablets(bool force, StatusCode* status) { - bool all_success = true; - MutexLock lock(&mutex_); - std::map::iterator it; - for (it = tablet_list_.begin(); it != tablet_list_.end();) { - StatusCode code = kTabletNodeOk; - if (it->second->Unload(&code) || force) { - it->second->DecRef(); - tablet_list_.erase(it++); - } else { - if (all_success) { - SetStatusCode(code, status); - all_success = false; - } - ++it; - } + bool all_success = true; + MutexLock lock(&mutex_); + std::map::iterator it; + for (it = tablet_list_.begin(); it != tablet_list_.end();) { + StatusCode code = kTabletNodeOk; + if (it->second->Unload(&code) || force) { + it->second->DecRef(); + tablet_list_.erase(it++); + } else { + if (all_success) { + SetStatusCode(code, status); + all_success = false; + } + ++it; } - return all_success; + } + return all_success; } uint32_t TabletManager::Size() { - MutexLock lock(&mutex_); - return tablet_list_.size(); + MutexLock lock(&mutex_); + return tablet_list_.size(); } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/tablet_manager.h b/src/tabletnode/tablet_manager.h index c982b4fa8..e2f0a58a0 100644 --- a/src/tabletnode/tablet_manager.h +++ b/src/tabletnode/tablet_manager.h @@ -18,69 +18,62 @@ namespace tera { namespace tabletnode { struct TabletRange { - TabletRange(const std::string& name, - const std::string& start, - const std::string& end) - : table_name(name), key_start(start), key_end(end) {} - - bool operator<(const TabletRange& other) const { - int cmp_ret = table_name.compare(other.table_name); - if (cmp_ret < 0) { - return true; - } else if (cmp_ret == 0) { - return key_start < other.key_start; - } else { - return false; - } + TabletRange(const std::string& name, const std::string& start, const std::string& end) + : table_name(name), key_start(start), key_end(end) {} + + bool operator<(const TabletRange& other) const { + int cmp_ret = table_name.compare(other.table_name); + if (cmp_ret < 0) { + return true; + } else if (cmp_ret == 0) { + return key_start < other.key_start; + } else { + return false; } + } - bool operator==(const TabletRange& other) const { - return (table_name == other.table_name && key_start == other.key_start); - } + bool operator==(const TabletRange& other) const { + return (table_name == other.table_name && key_start == other.key_start); + } - std::string table_name; - std::string key_start; - std::string key_end; + std::string table_name; + std::string key_start; + std::string key_end; }; class TabletManager { -public: - TabletManager(); - virtual ~TabletManager(); + public: + TabletManager(); + virtual ~TabletManager(); - virtual bool AddTablet(const std::string& table_name, const std::string& table_path, - const std::string& key_start, const std::string& key_end, - io::TabletIO** tablet_io, StatusCode* status = NULL); + virtual bool AddTablet(const std::string& table_name, const std::string& table_path, + const std::string& key_start, const std::string& key_end, int64_t ctime, + uint64_t version, io::TabletIO** tablet_io, StatusCode* status = NULL); - virtual bool RemoveTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status = NULL); + virtual bool RemoveTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status = NULL); - virtual io::TabletIO* GetTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status = NULL); + virtual io::TabletIO* GetTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status = NULL); - virtual io::TabletIO* GetTablet(const std::string& table_name, - const std::string& key, - StatusCode* status = NULL); + virtual io::TabletIO* GetTablet(const std::string& table_name, const std::string& key, + StatusCode* status = NULL); - virtual void GetAllTabletMeta(std::vector* tablet_meta_list); + virtual void GetAllTabletMeta(std::vector* tablet_meta_list); - virtual void GetAllTablets(std::vector* taletio_list); + virtual void GetAllTablets(std::vector* taletio_list); - virtual bool RemoveAllTablets(bool force = false, StatusCode* status = NULL); + virtual bool RemoveAllTablets(bool force = false, StatusCode* status = NULL); - uint32_t Size(); + uint32_t Size(); -private: - mutable Mutex mutex_; + private: + mutable Mutex mutex_; - std::map tablet_list_; + std::map tablet_list_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLET_MANAGER_H_ +#endif // TERA_TABLETNODE_TABLET_MANAGER_H_ diff --git a/src/tabletnode/tabletnode_entry.cc b/src/tabletnode/tabletnode_entry.cc index 231903f68..0833e76e9 100644 --- a/src/tabletnode/tabletnode_entry.cc +++ b/src/tabletnode/tabletnode_entry.cc @@ -41,13 +41,9 @@ DECLARE_int32(tera_metric_http_server_listen_port); DECLARE_bool(tera_tabletnode_dump_level_size_info_enabled); -std::string GetTeraEntryName() { - return "tabletnode"; -} +std::string GetTeraEntryName() { return "tabletnode"; } -tera::TeraEntry* GetTeraEntry() { - return new tera::tabletnode::TabletNodeEntry(); -} +tera::TeraEntry* GetTeraEntry() { return new tera::tabletnode::TabletNodeEntry(); } namespace tera { namespace tabletnode { @@ -56,124 +52,119 @@ TabletNodeEntry::TabletNodeEntry() : tabletnode_impl_(NULL), remote_tabletnode_(NULL), metric_http_server_(new tera::MetricHttpServer()) { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_options.max_throughput_in = FLAGS_tera_tabletnode_rpc_server_max_inflow; - rpc_options.max_throughput_out = FLAGS_tera_tabletnode_rpc_server_max_outflow; - rpc_options.keep_alive_time = 7200; - rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = FLAGS_tera_tabletnode_rpc_server_max_inflow; + rpc_options.max_throughput_out = FLAGS_tera_tabletnode_rpc_server_max_outflow; + rpc_options.keep_alive_time = 7200; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); } TabletNodeEntry::~TabletNodeEntry() {} bool TabletNodeEntry::StartServer() { - // set which core could work on this TS - SetProcessorAffinity(); - // - // start metric http server - if (FLAGS_tera_metric_http_server_enable) { - if(!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { - LOG(ERROR) << "Start metric http server failed."; - return false; - } - } else { - LOG(INFO) << "Metric http server is disabled."; - } - - IpAddress tabletnode_addr("0.0.0.0", FLAGS_tera_tabletnode_port); - LOG(INFO) << "Start RPC server at: " << tabletnode_addr.ToString(); - - tabletnode_impl_.reset(new TabletNodeImpl()); - remote_tabletnode_ = new RemoteTabletNode(tabletnode_impl_.get()); - - // 注册给rpcserver, rpcserver会负责delete - rpc_server_->RegisterService(remote_tabletnode_); - if (!rpc_server_->Start(tabletnode_addr.ToString())) { - LOG(ERROR) << "start RPC server error"; - return false; - } - - if (!tabletnode_impl_->Init()) { //register on ZK - LOG(ERROR) << "fail to init tabletnode_impl"; - return false; + // set which core could work on this TS + SetProcessorAffinity(); + // + // start metric http server + if (FLAGS_tera_metric_http_server_enable) { + if (!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { + LOG(ERROR) << "Start metric http server failed."; + return false; } - LOG(INFO) << "finish starting RPC server"; - return true; + } else { + LOG(INFO) << "Metric http server is disabled."; + } + + IpAddress tabletnode_addr("0.0.0.0", FLAGS_tera_tabletnode_port); + LOG(INFO) << "Start RPC server at: " << tabletnode_addr.ToString(); + + tabletnode_impl_.reset(new TabletNodeImpl()); + remote_tabletnode_ = new RemoteTabletNode(tabletnode_impl_.get()); + + // 注册给rpcserver, rpcserver会负责delete + rpc_server_->RegisterService(remote_tabletnode_); + if (!rpc_server_->Start(tabletnode_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + + if (!tabletnode_impl_->Init()) { // register on ZK + LOG(ERROR) << "fail to init tabletnode_impl"; + return false; + } + LOG(INFO) << "finish starting RPC server"; + return true; } void TabletNodeEntry::ShutdownServer() { - metric_http_server_->Stop(); - tabletnode_impl_->Exit(); - LOG(INFO) << "TabletNodeEntry stop done!"; - _exit(0); + tabletnode_impl_->Exit(); + LOG(INFO) << "TabletNodeEntry stop done!"; + _exit(0); } bool TabletNodeEntry::Run() { - static int64_t timer_ticks = 0; - ++timer_ticks; - - // Run garbage collect, in secondes. - const int garbage_collect_period = (FLAGS_tera_garbage_collect_period)? - FLAGS_tera_garbage_collect_period : 60; - if (timer_ticks % garbage_collect_period == 0) { - tabletnode_impl_->GarbageCollect(); - } - - if (FLAGS_tera_tabletnode_dump_level_size_info_enabled) { - tabletnode_impl_->RefreshLevelSize(); - } - - CollectorReportPublisher::GetInstance().Refresh(); - tabletnode_impl_->RefreshSysInfo(); - tabletnode_impl_->GetSysInfo().DumpLog(); - LOG(INFO) << "[ThreadPool schd/task/cnt] " - << remote_tabletnode_->ProfilingLog(); - - int64_t now_time = get_micros(); - int64_t earliest_rpc_time = now_time; - RpcTimerList::Instance()->TopTime(&earliest_rpc_time); - double max_delay = (now_time - earliest_rpc_time) / 1000.0; - LOG(INFO) << "pending rpc max delay: " - << std::fixed<< std::setprecision(2) << max_delay; - if (FLAGS_tera_tabletnode_hang_detect_enabled && - max_delay > FLAGS_tera_tabletnode_hang_detect_threshold) { - LOG(FATAL) << "hang detected: " - << std::fixed<< std::setprecision(2) << max_delay; - } - - ThisThread::Sleep(1000); - return true; + static int64_t timer_ticks = 0; + ++timer_ticks; + + // Run garbage collect, in secondes. + const int garbage_collect_period = + (FLAGS_tera_garbage_collect_period) ? FLAGS_tera_garbage_collect_period : 60; + if (timer_ticks % garbage_collect_period == 0) { + tabletnode_impl_->GarbageCollect(); + } + + if (FLAGS_tera_tabletnode_dump_level_size_info_enabled) { + tabletnode_impl_->RefreshLevelSize(); + } + + CollectorReportPublisher::GetInstance().Refresh(); + tabletnode_impl_->RefreshAndDumpSysInfo(); + + LOG(INFO) << "[ThreadPool schd/task/cnt] " << remote_tabletnode_->ProfilingLog(); + + int64_t now_time = get_micros(); + int64_t earliest_rpc_time = now_time; + RpcTimerList::Instance()->TopTime(&earliest_rpc_time); + double max_delay = (now_time - earliest_rpc_time) / 1000.0; + LOG(INFO) << "pending rpc max delay: " << std::fixed << std::setprecision(2) << max_delay; + if (FLAGS_tera_tabletnode_hang_detect_enabled && + max_delay > FLAGS_tera_tabletnode_hang_detect_threshold) { + LOG(FATAL) << "hang detected: " << std::fixed << std::setprecision(2) << max_delay; + } + + ThisThread::Sleep(1000); + return true; } void TabletNodeEntry::SetProcessorAffinity() { - if (!FLAGS_tera_tabletnode_cpu_affinity_enabled) { - return; - } - - ThreadAttributes thread_attr; - thread_attr.MarkCurMask(); - thread_attr.ResetCpuMask(); - std::vector cpu_set; - - SplitString(FLAGS_tera_tabletnode_cpu_affinity_set, ",", &cpu_set); - for (uint32_t i = 0; i < cpu_set.size(); ++i) { - int32_t cpu_id; - if (StringToNumber(cpu_set[i], &cpu_id)) { - thread_attr.SetCpuMask(cpu_id); - } else { - LOG(ERROR) << "invalid cpu affinity id: " << cpu_set[i]; - } + if (!FLAGS_tera_tabletnode_cpu_affinity_enabled) { + return; + } + + ThreadAttributes thread_attr; + thread_attr.MarkCurMask(); + thread_attr.ResetCpuMask(); + std::vector cpu_set; + + SplitString(FLAGS_tera_tabletnode_cpu_affinity_set, ",", &cpu_set); + for (uint32_t i = 0; i < cpu_set.size(); ++i) { + int32_t cpu_id; + if (StringToNumber(cpu_set[i], &cpu_id)) { + thread_attr.SetCpuMask(cpu_id); + } else { + LOG(ERROR) << "invalid cpu affinity id: " << cpu_set[i]; } + } - if (!thread_attr.SetCpuAffinity()) { - LOG(ERROR) << "fail to set affinity, revert back"; - if (!thread_attr.RevertCpuAffinity()) { - LOG(ERROR) << "fail to revert previous affinity"; - } - return; + if (!thread_attr.SetCpuAffinity()) { + LOG(ERROR) << "fail to set affinity, revert back"; + if (!thread_attr.RevertCpuAffinity()) { + LOG(ERROR) << "fail to revert previous affinity"; } + return; + } - LOG(INFO) << "Set processor affinity to CPU: " - << FLAGS_tera_tabletnode_cpu_affinity_set; + LOG(INFO) << "Set processor affinity to CPU: " << FLAGS_tera_tabletnode_cpu_affinity_set; } -}// namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/tabletnode_entry.h b/src/tabletnode/tabletnode_entry.h index ec87acc2b..7cbe328f1 100644 --- a/src/tabletnode/tabletnode_entry.h +++ b/src/tabletnode/tabletnode_entry.h @@ -11,7 +11,7 @@ #include "common/base/scoped_ptr.h" #include "common/metric/metric_http_server.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" namespace tera { @@ -25,23 +25,24 @@ class TabletNodeImpl; class RemoteTabletNode; class TabletNodeEntry : public TeraEntry { -public: - TabletNodeEntry(); - ~TabletNodeEntry(); - - bool StartServer(); - bool Run(); - void ShutdownServer(); - - void SetProcessorAffinity(); -private: - scoped_ptr tabletnode_impl_; - RemoteTabletNode* remote_tabletnode_; - scoped_ptr rpc_server_; - scoped_ptr metric_http_server_; + public: + TabletNodeEntry(); + ~TabletNodeEntry(); + + bool StartServer(); + bool Run(); + void ShutdownServer(); + + void SetProcessorAffinity(); + + private: + scoped_ptr tabletnode_impl_; + RemoteTabletNode* remote_tabletnode_; + scoped_ptr rpc_server_; + scoped_ptr metric_http_server_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_ENTRY_H_ +#endif // TERA_TABLETNODE_TABLETNODE_ENTRY_H_ diff --git a/src/tabletnode/tabletnode_flags.cc b/src/tabletnode/tabletnode_flags.cc index 8c6ac0a95..0ea6f9aad 100644 --- a/src/tabletnode/tabletnode_flags.cc +++ b/src/tabletnode/tabletnode_flags.cc @@ -6,13 +6,21 @@ #include "gflags/gflags.h" DEFINE_string(tera_tabletnode_port, "20000", "the tablet node port of tera system"); -DEFINE_int32(tera_tabletnode_ctrl_thread_num, 20, "control thread number of tablet node (query/load/unload/split)"); +DEFINE_int32(tera_tabletnode_ctrl_thread_num, 20, + "control thread number of tablet node (load/unload)"); +DEFINE_int32(tera_tabletnode_lightweight_ctrl_thread_num, 10, + "control thread number of tablet node (query/split)"); +DEFINE_int32(tera_tabletnode_ctrl_query_thread_num, 10, + "control query thread num(query/load query/unload query)"); DEFINE_int32(tera_tabletnode_write_thread_num, 10, "write thread number of tablet node"); DEFINE_int32(tera_tabletnode_read_thread_num, 40, "read thread number of tablet node"); DEFINE_int32(tera_tabletnode_scan_thread_num, 30, "scan thread number of tablet node"); -DEFINE_int32(tera_tabletnode_manual_compact_thread_num, 2, "the manual compact thread number of tablet node server"); -DEFINE_int32(tera_tabletnode_impl_thread_max_num, 10, "the max thread number for tablet node impl operations"); -DEFINE_int32(tera_tabletnode_compact_thread_num, 30, "the max thread number for leveldb compaction"); +DEFINE_int32(tera_tabletnode_manual_compact_thread_num, 2, + "the manual compact thread number of tablet node server"); +DEFINE_int32(tera_tabletnode_impl_thread_max_num, 10, + "the max thread number for tablet node impl operations"); +DEFINE_int32(tera_tabletnode_compact_thread_num, 30, + "the max thread number for leveldb compaction"); DEFINE_int32(tera_tabletnode_block_cache_size, 2000, "the cache size of tablet (in MB)"); DEFINE_int32(tera_tabletnode_table_cache_size, 2000, "the table cache size (in MB)"); @@ -21,35 +29,53 @@ DEFINE_int32(tera_request_pending_limit, 100000, "the max read/write request pen DEFINE_int32(tera_scan_request_pending_limit, 1000, "the max scan request pending"); DEFINE_int32(tera_garbage_collect_period, 1800, "garbage collect period in s"); -DEFINE_int32(tera_tabletnode_retry_period, 100, "the retry interval period (in ms) when operate tablet"); +DEFINE_int32(tera_tabletnode_retry_period, 100, + "the retry interval period (in ms) when operate tablet"); -DEFINE_int32(tera_tabletnode_rpc_server_max_inflow, -1, "the max input flow (in MB/s) for tabletnode rpc-server, -1 means no limit"); -DEFINE_int32(tera_tabletnode_rpc_server_max_outflow, -1, "the max output flow (in MB/s) for tabletnode rpc-server, -1 means no limit"); +DEFINE_int32(tera_tabletnode_rpc_server_max_inflow, -1, + "the max input flow (in MB/s) for tabletnode rpc-server, -1 means " + "no limit"); +DEFINE_int32(tera_tabletnode_rpc_server_max_outflow, -1, + "the max output flow (in MB/s) for tabletnode rpc-server, -1 " + "means no limit"); DEFINE_bool(tera_tabletnode_cpu_affinity_enabled, false, "enable cpu affinity or not"); DEFINE_string(tera_tabletnode_cpu_affinity_set, "1,2", "the cpu set of cpu affinity setting"); DEFINE_bool(tera_tabletnode_hang_detect_enabled, false, "enable detect read/write hang"); -DEFINE_int32(tera_tabletnode_hang_detect_threshold, 60000, "read/write hang detect threshold (in ms)"); +DEFINE_int32(tera_tabletnode_hang_detect_threshold, 60000, + "read/write hang detect threshold (in ms)"); DEFINE_bool(tera_tabletnode_delete_old_flash_cache_enabled, true, "delete old flash cache"); -DEFINE_bool(flash_block_cache_force_update_conf_enabled, false, "force update conf from FLAG file"); -DEFINE_int64(flash_block_cache_size, 350UL << 30, "max capacity size can be use for each ssd, default 350GB"); -DEFINE_int64(flash_block_cache_blockset_size, 1UL << 30, "block set size, default 1GB"); -DEFINE_int64(flash_block_cache_block_size, 8192, "block size in each block set, default 8KB"); -DEFINE_int64(flash_block_cache_fid_batch_num, 100000, "fid batch write number"); DEFINE_int64(meta_block_cache_size, 2000, "(MB) mem block cache size for meta leveldb"); DEFINE_int64(meta_table_cache_size, 500, "(MB) mem table cache size for meta leveldb"); -DEFINE_int64(flash_block_cache_write_buffer_size, 1048576, "(B) write buffer size for meta leveldb"); -DEFINE_string(tera_tabletnode_cache_paths, "../data/cache/", "paths for cached data storage. Mutiple definition like: \"./path1/;./path2/\""); -DEFINE_int32(tera_tabletnode_cache_update_thread_num, 4, "thread num for update cache"); -DEFINE_bool(tera_tabletnode_cache_force_read_from_cache, true, "force update cache before any read"); DEFINE_int32(tera_tabletnode_gc_log_level, 15, "the vlog level [0 - 16] for cache gc."); -DEFINE_bool(tera_tabletnode_tcm_cache_release_enabled, true, "enable the timer to release tcmalloc cache"); -DEFINE_int32(tera_tabletnode_tcm_cache_release_period, 180, "the period (in sec) to try release tcmalloc cache"); +DEFINE_bool(tera_tabletnode_tcm_cache_release_enabled, true, + "enable the timer to release tcmalloc cache"); +DEFINE_int32(tera_tabletnode_tcm_cache_release_period, 180, + "the period (in sec) to try release tcmalloc cache"); DEFINE_int64(tera_tabletnode_tcm_cache_size, 838860800, "TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"); DEFINE_bool(tera_tabletnode_dump_running_info, true, "dump tabletnode running info"); -DEFINE_string(tera_tabletnode_running_info_dump_file, "../monitor/ts.info.data", "file path for dump running info"); -DEFINE_int64(tera_refresh_tablets_status_interval_ms, 1800000, "background thread refresh tablets status interval in ms, default 0.5h"); +DEFINE_string(tera_tabletnode_running_info_dump_file, "../monitor/ts.info.data", + "file path for dump running info"); +DEFINE_int64(tera_refresh_tablets_status_interval_ms, 1800000, + "background thread refresh tablets status interval in ms, default 0.5h"); -DEFINE_bool(tera_tabletnode_dump_level_size_info_enabled, false, "enable dump level size or not, it's mainly used for performance-test"); +DEFINE_bool(tera_tabletnode_dump_level_size_info_enabled, false, + "enable dump level size or not, it's mainly used for performance-test"); + +DEFINE_int64(tera_tabletnode_parallel_read_task_num, 10, + "max tasks can be splited from a read request"); +DEFINE_int64(tera_tabletnode_parallel_read_rows_per_task, 30, + "min row num of a parallel read task"); +DEFINE_bool(tera_tabletnode_clean_persistent_cache_paths, false, + "Clean persistent cache paths when roll back to env flash"); + +DEFINE_double(tera_quota_unlimited_pending_ratio, 0.1, + "while pending queue less then ratio*pending_limit, quota limit doesn't need to use"); +DEFINE_int32(tera_quota_scan_max_retry_times, 100, + "quota limit maximum retry times for every scan slot rpc"); +DEFINE_int32(tera_quota_scan_retry_delay_interval, 100, + "quota limit retry task delay time for every scan slot rpc(ms)"); +DEFINE_uint64(tera_quota_max_retry_queue_length, 1000, + "max length of quota retry queue work after check quota failed"); \ No newline at end of file diff --git a/src/tabletnode/tabletnode_impl.cc b/src/tabletnode/tabletnode_impl.cc index 85e637f68..7e07383d3 100644 --- a/src/tabletnode/tabletnode_impl.cc +++ b/src/tabletnode/tabletnode_impl.cc @@ -4,8 +4,10 @@ #include "tabletnode/tabletnode_impl.h" +#include #include #include +#include #include #include @@ -16,13 +18,12 @@ #include "db/table_cache.h" #include "common/base/string_ext.h" #include "common/metric/cache_collector.h" +#include "common/metric/tcmalloc_collector.h" #include "common/metric/prometheus_subscriber.h" #include "common/metric/ratio_collector.h" #include "common/metric/metric_counter.h" -#include "common/thread.h" #include "io/io_utils.h" #include "io/utils_leveldb.h" -#include "leveldb/env_flash_block_cache.h" #include "leveldb/cache.h" #include "leveldb/env_dfs.h" #include "leveldb/env_flash.h" @@ -30,6 +31,8 @@ #include "leveldb/config.h" #include "leveldb/slog.h" #include "leveldb/table_utils.h" +#include "leveldb/util/stop_watch.h" +#include "leveldb/util/dfs_read_thread_limiter.h" #include "proto/kv_helper.h" #include "proto/proto_helper.h" #include "proto/tabletnode_client.h" @@ -58,6 +61,9 @@ DECLARE_bool(tera_mock_zk_enabled); DECLARE_string(tera_master_meta_table_name); DECLARE_int32(tera_tabletnode_retry_period); DECLARE_string(tera_leveldb_log_path); +DECLARE_int32(leveldb_max_log_size_MB); +DECLARE_int32(leveldb_log_flush_trigger_size_B); +DECLARE_int32(leveldb_log_flush_trigger_interval_ms); DECLARE_bool(tera_tabletnode_rpc_limit_enabled); DECLARE_int32(tera_tabletnode_rpc_limit_max_inflow); @@ -72,16 +78,9 @@ DECLARE_string(tera_tabletnode_path_prefix); // cache-related DECLARE_int32(tera_memenv_block_cache_size); -DECLARE_bool(tera_tabletnode_flash_block_cache_enabled); DECLARE_bool(tera_tabletnode_delete_old_flash_cache_enabled); -DECLARE_bool(flash_block_cache_force_update_conf_enabled); -DECLARE_int64(flash_block_cache_size); -DECLARE_int64(flash_block_cache_blockset_size); -DECLARE_int64(flash_block_cache_block_size); -DECLARE_int64(flash_block_cache_fid_batch_num); DECLARE_int64(meta_block_cache_size); DECLARE_int64(meta_table_cache_size); -DECLARE_int64(flash_block_cache_write_buffer_size); DECLARE_string(tera_tabletnode_cache_paths); DECLARE_int32(tera_tabletnode_cache_block_size); DECLARE_string(tera_tabletnode_cache_name); @@ -105,6 +104,14 @@ DECLARE_int64(tera_refresh_tablets_status_interval_ms); DECLARE_string(flagfile); +DECLARE_int64(tera_tabletnode_parallel_read_task_num); +DECLARE_int64(tera_tabletnode_parallel_read_rows_per_task); +DECLARE_bool(tera_enable_persistent_cache); +DECLARE_bool(tera_tabletnode_clean_persistent_cache_paths); + +DECLARE_int32(tera_tabletnode_read_thread_num); +DECLARE_double(dfs_read_thread_ratio); + using namespace std::placeholders; static const int GC_LOG_LEVEL = FLAGS_tera_tabletnode_gc_log_level; @@ -117,31 +124,43 @@ extern tera::Counter snappy_after_size_counter; namespace tera { namespace tabletnode { using tera::SubscriberType; +using std::make_shared; tera::MetricCounter read_error_counter(kErrorCountMetric, kApiLabelRead, {SubscriberType::QPS, SubscriberType::SUM}); tera::MetricCounter write_error_counter(kErrorCountMetric, kApiLabelWrite, {SubscriberType::QPS, SubscriberType::SUM}); tera::MetricCounter scan_error_counter(kErrorCountMetric, kApiLabelScan, - {SubscriberType::QPS, SubscriberType::SUM}); + {SubscriberType::QPS, SubscriberType::SUM}); + +tera::MetricCounter read_range_error_counter(kRangeErrorMetric, kApiLabelRead, + {SubscriberType::QPS}); +tera::MetricCounter write_range_error_counter(kRangeErrorMetric, kApiLabelWrite, + {SubscriberType::QPS}); +tera::MetricCounter scan_range_error_counter(kRangeErrorMetric, kApiLabelScan, + {SubscriberType::QPS}); -tera::MetricCounter read_range_error_counter(kRangeErrorMetric, kApiLabelRead, {SubscriberType::QPS}); -tera::MetricCounter write_range_error_counter(kRangeErrorMetric, kApiLabelWrite, {SubscriberType::QPS}); -tera::MetricCounter scan_range_error_counter(kRangeErrorMetric, kApiLabelScan, {SubscriberType::QPS}); +extern tera::MetricCounter read_reject_counter; -TabletNodeImpl::CacheMetrics::CacheMetrics(leveldb::Cache* block_cache, leveldb::TableCache* table_cache) +TabletNodeImpl::CacheMetrics::CacheMetrics(leveldb::Cache* block_cache, + leveldb::TableCache* table_cache) : block_cache_hitrate_(kBlockCacheHitRateMetric, - std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kHitRate))), + std::unique_ptr( + new LRUCacheCollector(block_cache, CacheCollectType::kHitRate))), block_cache_entries_(kBlockCacheEntriesMetric, - std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kEntries))), - block_cache_charge_(kBlockCacheChargeMetric, - std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kCharge))), + std::unique_ptr( + new LRUCacheCollector(block_cache, CacheCollectType::kEntries))), + block_cache_charge_(kBlockCacheChargeMetric, std::unique_ptr(new LRUCacheCollector( + block_cache, CacheCollectType::kCharge))), table_cache_hitrate_(kTableCacheHitRateMetric, - std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kHitRate))), + std::unique_ptr( + new TableCacheCollector(table_cache, CacheCollectType::kHitRate))), table_cache_entries_(kTableCacheEntriesMetric, - std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kEntries))), + std::unique_ptr( + new TableCacheCollector(table_cache, CacheCollectType::kEntries))), table_cache_charge_(kTableCacheChargeMetric, - std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kCharge))) {} + std::unique_ptr( + new TableCacheCollector(table_cache, CacheCollectType::kCharge))) {} TabletNodeImpl::TabletNodeImpl() : status_(kNotInited), @@ -151,936 +170,758 @@ TabletNodeImpl::TabletNodeImpl() release_cache_timer_id_(kInvalidTimerId), thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_impl_thread_max_num)), cache_metrics_(NULL) { - if (FLAGS_tera_local_addr == "") { - local_addr_ = utils::GetLocalHostName()+ ":" + FLAGS_tera_tabletnode_port; - } else { - local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_tabletnode_port; - } - sysinfo_.SetServerAddr(local_addr_); - - leveldb::Env::Default()->SetBackgroundThreads(FLAGS_tera_tabletnode_compact_thread_num); - leveldb::Env::Default()->RenameFile(FLAGS_tera_leveldb_log_path, - FLAGS_tera_leveldb_log_path + ".bak"); - leveldb::Status s = - leveldb::Env::Default()->NewLogger(FLAGS_tera_leveldb_log_path, &ldb_logger_); - leveldb::Env::Default()->SetLogger(ldb_logger_); - - ldb_block_cache_ = - leveldb::NewLRUCache(FLAGS_tera_tabletnode_block_cache_size * 1024UL * 1024); - m_memory_cache = - leveldb::NewLRUCache(FLAGS_tera_memenv_block_cache_size * 1024UL * 1024); - ldb_table_cache_ = - new leveldb::TableCache(FLAGS_tera_tabletnode_table_cache_size * 1024UL * 1024); - if (!s.ok()) { - ldb_logger_ = NULL; - } - - if (FLAGS_tera_leveldb_env_type != "local") { - io::InitDfsEnv(); - } - - InitCacheSystem(); - - if (FLAGS_tera_tabletnode_tcm_cache_release_enabled) { - LOG(INFO) << "enable tcmalloc cache release timer"; - EnableReleaseMallocCacheTimer(); - } - const char* tcm_property = "tcmalloc.max_total_thread_cache_bytes"; - MallocExtension::instance()->SetNumericProperty( - tcm_property, FLAGS_tera_tabletnode_tcm_cache_size); - size_t tcm_t; - CHECK(MallocExtension::instance()->GetNumericProperty(tcm_property, &tcm_t)); - LOG(INFO) << tcm_property << "=" << tcm_t; - sysinfo_.SetProcessStartTime(get_micros()); - for (int level = 0; level != leveldb::config::kNumLevels; ++level) { - level_size_.push_back(tera::MetricCounter{kLevelSize, "level:" + std::to_string(level), - {tera::SubscriberType::LATEST}, false}); - level_size_.back().Set(0); - } + if (FLAGS_tera_local_addr == "") { + local_addr_ = utils::GetLocalHostName() + ":" + FLAGS_tera_tabletnode_port; + } else { + local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_tabletnode_port; + } + sysinfo_.SetServerAddr(local_addr_); + + leveldb::Env::Default()->SetBackgroundThreads(FLAGS_tera_tabletnode_compact_thread_num); + + uint64_t max_log_size = static_cast(FLAGS_leveldb_max_log_size_MB) << 20; + leveldb::LogOption log_opt = + leveldb::LogOption::LogOptionBuilder() + .SetMaxLogSize(max_log_size) + .SetFlushTriggerSize(FLAGS_leveldb_log_flush_trigger_size_B) + .SetFlushTriggerIntervalMs(FLAGS_leveldb_log_flush_trigger_interval_ms) + .Build(); + leveldb::Status s = + leveldb::Env::Default()->NewLogger(FLAGS_tera_leveldb_log_path, log_opt, &ldb_logger_); + leveldb::Env::Default()->SetLogger(ldb_logger_); + LOG(INFO) << "leveldb logger inited, log_file:" << FLAGS_tera_leveldb_log_path + << ", options:" << log_opt.ToString(); + + ldb_block_cache_ = leveldb::NewLRUCache(FLAGS_tera_tabletnode_block_cache_size * 1024UL * 1024); + m_memory_cache = leveldb::NewLRUCache(FLAGS_tera_memenv_block_cache_size * 1024UL * 1024); + ldb_table_cache_ = + new leveldb::TableCache(FLAGS_tera_tabletnode_table_cache_size * 1024UL * 1024); + if (!s.ok()) { + ldb_logger_ = NULL; + } + + if (FLAGS_tera_leveldb_env_type != "local") { + io::InitDfsEnv(); + } + + if (FLAGS_tera_tabletnode_tcm_cache_release_enabled) { + LOG(INFO) << "enable tcmalloc cache release timer"; + EnableReleaseMallocCacheTimer(); + } + const char* tcm_property = "tcmalloc.max_total_thread_cache_bytes"; + MallocExtension::instance()->SetNumericProperty(tcm_property, + FLAGS_tera_tabletnode_tcm_cache_size); + size_t tcm_t; + CHECK(MallocExtension::instance()->GetNumericProperty(tcm_property, &tcm_t)); + LOG(INFO) << tcm_property << "=" << tcm_t; + sysinfo_.SetProcessStartTime(get_micros()); + for (int level = 0; level != leveldb::config::kNumLevels; ++level) { + level_size_.push_back(tera::MetricCounter{ + kLevelSize, "level:" + std::to_string(level), {tera::SubscriberType::LATEST}, false}); + level_size_.back().Set(0); + } } TabletNodeImpl::~TabletNodeImpl() {} bool TabletNodeImpl::Init() { - if (FLAGS_tera_coord_type.empty()) { - LOG(ERROR) << "Note: We don't recommend that use '" - << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" - << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" - << " flag is usually recommended."; - } - if (FLAGS_tera_coord_type == "zk" || - (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { - zk_adapter_.reset(new TabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "ins" || - (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { - LOG(INFO) << "ins mode!"; - zk_adapter_.reset(new InsTabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "mock_zk" || - (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { - LOG(INFO) << "mock zk mode!"; - zk_adapter_.reset(new MockTabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "mock_ins" || - (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { - LOG(INFO) << "mock ins mode!"; - zk_adapter_.reset(new MockInsTabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_coord_type == "fake_zk" || - FLAGS_tera_coord_type.empty()) { - LOG(INFO) << "fake zk mode!"; - zk_adapter_.reset(new FakeTabletNodeZkAdapter(this, local_addr_)); - } - - SetTabletNodeStatus(kIsIniting); - thread_pool_->AddTask(std::bind(&TabletNodeZkAdapterBase::Init, zk_adapter_.get())); - - // register cache metrics - cache_metrics_.reset(new CacheMetrics(ldb_block_cache_, ldb_table_cache_)); - // register snappy metrics - snappy_ratio_metric_.reset(new AutoCollectorRegister(kSnappyCompressionRatioMetric, std::unique_ptr( - new RatioCollector(&leveldb::snappy_before_size_counter, &leveldb::snappy_after_size_counter, true)))); - - // update tablets status at background - tablet_healthcheck_thread_.Start(std::bind(&TabletNodeImpl::RefreshTabletsStatus, this)); - return true; + if (!InitCacheSystem()) { + LOG(ERROR) << "Init cache system failed, exit."; + return false; + } + + InitDfsReadThreadLimiter(); + + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '" + << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster " + "coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" + << " flag is usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + zk_adapter_.reset(new TabletNodeZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + LOG(INFO) << "ins mode!"; + zk_adapter_.reset(new InsTabletNodeZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "mock_zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { + LOG(INFO) << "mock zk mode!"; + zk_adapter_.reset(new MockTabletNodeZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "mock_ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { + LOG(INFO) << "mock ins mode!"; + zk_adapter_.reset(new MockInsTabletNodeZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "fake_zk" || FLAGS_tera_coord_type.empty()) { + LOG(INFO) << "fake zk mode!"; + zk_adapter_.reset(new FakeTabletNodeZkAdapter(this, local_addr_)); + } + + SetTabletNodeStatus(kIsIniting); + thread_pool_->AddTask(std::bind(&TabletNodeZkAdapterBase::Init, zk_adapter_.get())); + + // register cache metrics + cache_metrics_.reset(new CacheMetrics(ldb_block_cache_, ldb_table_cache_)); + RegisterTcmallocCollectors(); + // register snappy metrics + snappy_ratio_metric_.reset(new AutoCollectorRegister( + kSnappyCompressionRatioMetric, + std::unique_ptr(new RatioCollector(&leveldb::snappy_before_size_counter, + &leveldb::snappy_after_size_counter, true)))); + + // update tablets status at background + tablet_healthcheck_thread_ = std::thread{&TabletNodeImpl::RefreshTabletsStatus, this}; + return true; } -void TabletNodeImpl::InitCacheSystem() { - if (FLAGS_tera_tabletnode_flash_block_cache_enabled) { - LOG(INFO) << "flash block cache path: " << FLAGS_tera_tabletnode_cache_paths; - std::vector path_list; - SplitString(FLAGS_tera_tabletnode_cache_paths, ";", &path_list); - - leveldb::Env* posix_env = leveldb::Env::Default(); - for (uint32_t i = 0; i < path_list.size(); ++i) { - posix_env->CreateDir(path_list[i]); - } - - if (FLAGS_tera_tabletnode_delete_old_flash_cache_enabled) { - tera::io::DeleteOldFlashCache(path_list); - } +bool TabletNodeImpl::InitCacheSystem() { + sysinfo_.SetPersistentCacheSize(0); + if (!io::GetCachePaths().empty()) { + if (FLAGS_tera_enable_persistent_cache) { + LOG(INFO) << "Enable persistent cache."; + std::shared_ptr persistent_cache; + auto status = ::tera::io::GetPersistentCache(&persistent_cache); - LOG(INFO) << "activate flash block cache system"; - leveldb::Env* block_cache_env = io::DefaultFlashBlockCacheEnv(); - for (uint32_t i = 0; i < path_list.size(); ++i) { - leveldb::FlashBlockCacheOptions opts; - - //opts.force_update_conf_enabled = FLAGS_flash_block_cache_force_update_conf_enabled; - opts.force_update_conf_enabled = false; - opts.cache_size = FLAGS_flash_block_cache_size; - opts.blockset_size = FLAGS_flash_block_cache_blockset_size; - opts.block_size = FLAGS_flash_block_cache_block_size; - - opts.fid_batch_num = FLAGS_flash_block_cache_fid_batch_num; - opts.meta_block_cache_size = FLAGS_meta_block_cache_size; - opts.meta_table_cache_size = FLAGS_meta_table_cache_size; - opts.write_buffer_size = FLAGS_flash_block_cache_write_buffer_size; - LOG(INFO) << "load cache: " << path_list[i]; - reinterpret_cast(block_cache_env)->LoadCache(opts, path_list[i] + "/flash_block_cache"); + if (!status.ok() || !persistent_cache) { + LOG(ERROR) << "Init persistent cache failed: " << status.ToString() << ", exit on error."; + return false; + } + sysinfo_.SetPersistentCacheSize(persistent_cache->GetCapacity()); + } else { + LOG(INFO) << "Enable flash env."; + // compitable with legacy FlashEnv + leveldb::FlashEnv* flash_env = (leveldb::FlashEnv*)io::LeveldbFlashEnv(); + if (FLAGS_tera_tabletnode_clean_persistent_cache_paths) { + for (const auto& path : io::GetCachePaths()) { + auto s = leveldb::Env::Default()->DeleteDirRecursive(path); + if (!s.ok()) { + LOG(WARNING) << "Remove persistent cache paths " << path << " failed: " << s.ToString(); + } } - return; - } - // compitable with legacy FlashEnv - leveldb::FlashEnv* flash_env = (leveldb::FlashEnv*)io::LeveldbFlashEnv(); - flash_env->SetFlashPath(FLAGS_tera_tabletnode_cache_paths, - FLAGS_tera_io_cache_path_vanish_allowed); - flash_env->SetUpdateFlashThreadNumber(FLAGS_tera_tabletnode_cache_update_thread_num); - flash_env->SetIfForceReadFromCache(FLAGS_tera_tabletnode_cache_force_read_from_cache); - return; + } + assert(flash_env); + flash_env->SetFlashPath(FLAGS_tera_tabletnode_cache_paths, + FLAGS_tera_io_cache_path_vanish_allowed); + flash_env->SetUpdateFlashThreadNumber(FLAGS_tera_tabletnode_cache_update_thread_num); + flash_env->SetIfForceReadFromCache(FLAGS_tera_tabletnode_cache_force_read_from_cache); + flash_env->TryRollbackPersistentCacheFiles(); + } + } + return true; } bool TabletNodeImpl::Exit() { - running_ = false; - exit_event_.Set(); - - cache_metrics_.reset(NULL); - - std::vector tablet_ios; - tablet_manager_->GetAllTablets(&tablet_ios); - - std::vector unload_threads; - unload_threads.resize(tablet_ios.size()); - - Counter worker_count; - worker_count.Set(tablet_ios.size()); - - for (uint32_t i = 0; i < tablet_ios.size(); ++i) { - io::TabletIO* tablet_io = tablet_ios[i]; - common::Thread& thread = unload_threads[i]; - thread.Start(std::bind(&TabletNodeImpl::UnloadTabletProc, - this, tablet_io, &worker_count)); - } - int64_t print_ms_ = get_millis(); - int64_t left = 0; - while ((left = worker_count.Get()) > 0) { - if (get_millis() - print_ms_ > 1000) { - LOG(INFO) << "[Exit] " << left << " tablets are still unloading ..."; - print_ms_ = get_millis(); - } - ThisThread::Sleep(100); - } - for (uint32_t i = 0; i < tablet_ios.size(); ++i) { - unload_threads[i].Join(); - } - tablet_healthcheck_thread_.Join(); - - zk_adapter_->Exit(); - return true; + running_ = false; + exit_event_.Set(); + + cache_metrics_.reset(NULL); + + std::vector tablet_ios; + tablet_manager_->GetAllTablets(&tablet_ios); + + std::vector unload_threads; + unload_threads.reserve(tablet_ios.size()); + + Counter worker_count; + worker_count.Set(tablet_ios.size()); + + for (uint32_t i = 0; i < tablet_ios.size(); ++i) { + io::TabletIO* tablet_io = tablet_ios[i]; + unload_threads.emplace_back( + std::bind(&TabletNodeImpl::UnloadTabletProc, this, tablet_io, &worker_count)); + } + int64_t print_ms_ = get_millis(); + int64_t left = 0; + while ((left = worker_count.Get()) > 0) { + if (get_millis() - print_ms_ > 1000) { + LOG(INFO) << "[Exit] " << left << " tablets are still unloading ..."; + print_ms_ = get_millis(); + } + ThisThread::Sleep(100); + } + for (uint32_t i = 0; i < tablet_ios.size(); ++i) { + unload_threads[i].join(); + } + tablet_healthcheck_thread_.join(); + + std::unique_ptr logger(leveldb::Logger::DefaultLogger()); + if (logger) { + logger->Exit(); + } + + zk_adapter_->Exit(); + return true; } void TabletNodeImpl::RefreshTabletsStatus() { - while (running_) { - int64_t ts = get_millis(); - LOG(INFO) << "begin refresh tablets status..."; - sysinfo_.RefreshTabletsStatus(tablet_manager_.get()); - - LOG(INFO) << "finish refresh tablets status. cost: " - << get_millis() - ts << " ms, next round after " - << FLAGS_tera_refresh_tablets_status_interval_ms << " ms"; - exit_event_.TimeWait(FLAGS_tera_refresh_tablets_status_interval_ms); - } - LOG(INFO) << "exit refresh tablets status"; + while (running_) { + int64_t ts = get_millis(); + LOG(INFO) << "begin refresh tablets status..."; + sysinfo_.RefreshTabletsStatus(tablet_manager_.get()); + + LOG(INFO) << "finish refresh tablets status. cost: " << get_millis() - ts + << " ms, next round after " << FLAGS_tera_refresh_tablets_status_interval_ms << " ms"; + exit_event_.TimeWait(FLAGS_tera_refresh_tablets_status_interval_ms); + } + LOG(INFO) << "exit refresh tablets status"; } void TabletNodeImpl::UnloadTabletProc(io::TabletIO* tablet_io, Counter* worker_count) { - LOG(INFO) << "begin to unload tablet: " << *tablet_io; - StatusCode status; - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << "fail to unload tablet: " << *tablet_io - << ", status: " << StatusCodeToString(status); - } else { - LOG(INFO) << "unload tablet success: " << *tablet_io; - } - tablet_io->DecRef(); - worker_count->Dec(); + LOG(INFO) << "begin to unload tablet: " << *tablet_io; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << "fail to unload tablet: " << *tablet_io + << ", status: " << StatusCodeToString(status); + } else { + LOG(INFO) << "unload tablet success: " << *tablet_io; + } + tablet_io->DecRef(); + worker_count->Dec(); } -void TabletNodeImpl::LoadTablet(const LoadTabletRequest* request, - LoadTabletResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - std::string sid = GetSessionId(); - if (!request->has_session_id() || - (sid.size() == 0) || - request->session_id().compare(0, sid.size(), sid) != 0) { - LOG(WARNING) << "load session id not match: tablet " << request->path() - << ", session_id " << request->session_id() << ", ts_id " << sid; - response->set_status(kIllegalAccess); - done->Run(); - return; - } - if (request->schema().locality_groups_size() < 1) { - LOG(WARNING) << "No localitygroups in schema: " << request->tablet_name(); - response->set_status(kIllegalAccess); - done->Run(); - return; - } - - const std::string& key_start = request->key_range().key_start(); - const std::string& key_end = request->key_range().key_end(); - const TableSchema& schema = request->schema(); +StatusCode TabletNodeImpl::QueryTabletStatus(const std::string& table_name, + const std::string& key_start, + const std::string& key_end) { + StatusCode status; + io::TabletIO* tablet_io = tablet_manager_->GetTablet(table_name, key_start, key_end, &status); + if (tablet_io == NULL) { + VLOG(15) << "fail to get tablet: " << table_name << " [" << DebugString(key_start) << ", " + << DebugString(key_end) << "], status: " << StatusCodeToString(status); + return kKeyNotInRange; + } + return static_cast(tablet_io->GetStatus()); +} - std::vector parent_tablets; - for (int i = 0; i < request->parent_tablets_size(); ++i) { - CHECK(i < 2) << "parent_tablets should less than 2: " << i; - parent_tablets.push_back(request->parent_tablets(i)); - } - std::set ignore_err_lgs; - for (int i = 0; i < request->ignore_err_lgs_size(); ++i) { - VLOG(10) << "oops lg:" << request->ignore_err_lgs(i); - ignore_err_lgs.insert(request->ignore_err_lgs(i)); +void TabletNodeImpl::LoadTablet(const LoadTabletRequest* request, LoadTabletResponse* response) { + response->set_sequence_id(request->sequence_id()); + std::string sid = GetSessionId(); + if (!request->has_session_id() || (sid.size() == 0) || + request->session_id().compare(0, sid.size(), sid) != 0) { + LOG(WARNING) << "load session id not match: tablet " << request->path() << ", session_id " + << request->session_id() << ", ts_id " << sid; + response->set_status(kIllegalAccess); + return; + } + if (request->schema().locality_groups_size() < 1) { + LOG(WARNING) << "No localitygroups in schema: " << request->tablet_name(); + response->set_status(kIllegalAccess); + return; + } + + const std::string& key_start = request->key_range().key_start(); + const std::string& key_end = request->key_range().key_end(); + const TableSchema& schema = request->schema(); + int64_t create_time = request->create_time(); + uint64_t version = request->version(); + + std::vector parent_tablets; + for (int i = 0; i < request->parent_tablets_size(); ++i) { + CHECK(i < 2) << "parent_tablets should less than 2: " << i; + parent_tablets.push_back(request->parent_tablets(i)); + } + std::set ignore_err_lgs; + for (int i = 0; i < request->ignore_err_lgs_size(); ++i) { + VLOG(10) << "oops lg:" << request->ignore_err_lgs(i); + ignore_err_lgs.insert(request->ignore_err_lgs(i)); + } + + io::TabletIO* tablet_io = NULL; + StatusCode status = kTabletNodeOk; + if (!tablet_manager_->AddTablet(request->tablet_name(), request->path(), key_start, key_end, + create_time, version, &tablet_io, &status)) { + io::TabletIO::TabletStatus tablet_status = tablet_io->GetStatus(); + if (tablet_status == io::TabletIO::TabletStatus::kOnLoad || + tablet_status == io::TabletIO::TabletStatus::kReady) { + VLOG(6) << "ignore this load tablet request, tablet: " << request->path() << " [" + << DebugString(key_start) << ", " << DebugString(key_end) + << "], status: " << StatusCodeToString((StatusCode)tablet_status); + } else { + LOG(ERROR) << "fail to add tablet: " << request->path() << " [" << DebugString(key_start) + << ", " << DebugString(key_end) + << "], status: " << StatusCodeToString((StatusCode)tablet_status); } - - io::TabletIO* tablet_io = NULL; - StatusCode status = kTabletNodeOk; - if (!tablet_manager_->AddTablet(request->tablet_name(), request->path(), - key_start, key_end, &tablet_io, &status)) { - io::TabletIO::TabletStatus tablet_status = tablet_io->GetStatus(); - if (tablet_status == io::TabletIO::TabletStatus::kOnLoad || - tablet_status == io::TabletIO::TabletStatus::kReady) { - VLOG(6) << "ignore this load tablet request, tablet: " << request->path() - << " [" << DebugString(key_start) << ", " - << DebugString(key_end) << "], status: " - << StatusCodeToString((StatusCode)tablet_status); - } - else { - LOG(ERROR) << "fail to add tablet: " << request->path() - << " [" << DebugString(key_start) << ", " - << DebugString(key_end) << "], status: " - << StatusCodeToString((StatusCode)tablet_status); - } - response->set_status((StatusCode)tablet_status); - tablet_io->DecRef(); + response->set_status((StatusCode)tablet_status); + tablet_io->DecRef(); + } else { + LOG(INFO) << "start load tablet, id: " << request->sequence_id() << ", sessionid " + << request->session_id() << ", ts_id " << sid << ", table: " << request->tablet_name() + << ", range: [" << DebugString(key_start) << ", " << DebugString(key_end) + << "], path: " << request->path() << ", ctimestamp(us): " << create_time + << ", version: " << version << ", parent: " + << (request->parent_tablets_size() > 0 ? request->parent_tablets(0) : 0) + << ", schema: " << request->schema().ShortDebugString(); + /// TODO: User per user memery_cache according to user quota. + tablet_io->SetMemoryCache(m_memory_cache); + if (!tablet_io->Load(schema, request->path(), parent_tablets, ignore_err_lgs, ldb_logger_, + ldb_block_cache_, ldb_table_cache_, &status)) { + std::string err_msg = tablet_io->GetLastErrorMessage(); + tablet_io->DecRef(); + LOG(ERROR) << "fail to load tablet: " << request->path() << " [" << DebugString(key_start) + << ", " << DebugString(key_end) << "], status: " << StatusCodeToString(status) + << ",err_msg: " << err_msg; + if (!tablet_manager_->RemoveTablet(request->tablet_name(), key_start, key_end, &status)) { + LOG(ERROR) << "fail to remove tablet: " << request->path() << " [" << DebugString(key_start) + << ", " << DebugString(key_end) << "], status: " << StatusCodeToString(status); + } + response->set_status(kIOError); + std::string load_context = tera::sdk::StatTable::SerializeLoadContext(*request, sid); + std::string msg = tera::sdk::StatTable::SerializeCorrupt( + sdk::CorruptPhase::kLoading, local_addr_, request->path(), load_context, err_msg); + response->set_detail_fail_msg(msg); } else { - LOG(INFO) << "start load tablet, id: " << request->sequence_id() - << ", sessionid " << request->session_id() - << ", ts_id " << sid - << ", table: " << request->tablet_name() - << ", range: [" << DebugString(key_start) - << ", " << DebugString(key_end) - << "], path: " << request->path() - << ", parent: " << (request->parent_tablets_size() > 0 ? request->parent_tablets(0) : 0) - << ", schema: " << request->schema().ShortDebugString(); - ///TODO: User per user memery_cache according to user quota. - tablet_io->SetMemoryCache(m_memory_cache); - if (!tablet_io->Load(schema, request->path(), parent_tablets, - ignore_err_lgs, ldb_logger_, - ldb_block_cache_, ldb_table_cache_, &status)) { - std::string err_msg = tablet_io->GetLastErrorMessage(); - tablet_io->DecRef(); - LOG(ERROR) << "fail to load tablet: " << request->path() - << " [" << DebugString(key_start) << ", " - << DebugString(key_end) << "], status: " - << StatusCodeToString(status) << ",err_msg: " << err_msg; - if (!tablet_manager_->RemoveTablet(request->tablet_name(), key_start, - key_end, &status)) { - LOG(ERROR) << "fail to remove tablet: " << request->path() - << " [" << DebugString(key_start) << ", " - << DebugString(key_end) << "], status: " - << StatusCodeToString(status); - } - response->set_status(kIOError); - std::string load_context = - tera::sdk::StatTable::SerializeLoadContext(*request, sid); - std::string msg = - tera::sdk::StatTable::SerializeCorrupt(sdk::CorruptPhase::kLoading, - local_addr_, request->path(), - load_context, err_msg); - response->set_detail_fail_msg(msg); - } else { - tablet_io->DecRef(); - response->set_status(kTabletNodeOk); - } + tablet_io->DecRef(); + response->set_status(kTabletNodeOk); } + } - LOG(INFO) << "load tablet: " << request->path() << " [" - << DebugString(key_start) << ", " << DebugString(key_end) << "]"; - done->Run(); + LOG(INFO) << "load tablet: " << request->path() << " [" << DebugString(key_start) << ", " + << DebugString(key_end) << "]"; } -bool TabletNodeImpl::UnloadTablet(const std::string& tablet_name, - const std::string& start, - const std::string& end, - StatusCode* status) { - io::TabletIO* tablet_io = tablet_manager_->GetTablet( - tablet_name, start, end, status); - if (tablet_io == NULL) { - LOG(WARNING) << "unload fail to get tablet: " << tablet_name - << " [" << DebugString(start) << ", " << DebugString(end) - << "], status: " << StatusCodeToString(*status); - *status = kKeyNotInRange; - return false; - } - - if (!tablet_io->Unload(status)) { - io::TabletIO::TabletStatus tablet_status = tablet_io->GetStatus(); - if (tablet_status == io::TabletIO::TabletStatus::kUnLoading || - tablet_status == io::TabletIO::TabletStatus::kUnLoading2) { - VLOG(6) << "ignore this unload tablet request: " << tablet_io->GetTablePath() - << "[" << DebugString(start) << "," << DebugString(end) - << "], status: " << StatusCodeToString((StatusCode)tablet_status); - } - else { - LOG(ERROR) << "fail to unload tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(start) << ", " << DebugString(end) - << "], status: " << StatusCodeToString(*status); - } - *status = (StatusCode)tablet_status; - tablet_io->DecRef(); - return false; +bool TabletNodeImpl::UnloadTablet(const std::string& tablet_name, const std::string& start, + const std::string& end, StatusCode* status) { + io::TabletIO* tablet_io = tablet_manager_->GetTablet(tablet_name, start, end, status); + if (tablet_io == NULL) { + LOG(WARNING) << "unload fail to get tablet: " << tablet_name << " [" << DebugString(start) + << ", " << DebugString(end) << "], status: " << StatusCodeToString(*status); + *status = kKeyNotInRange; + return false; + } + + if (!tablet_io->Unload(status)) { + io::TabletIO::TabletStatus tablet_status = tablet_io->GetStatus(); + if (tablet_status == io::TabletIO::TabletStatus::kUnloading || + tablet_status == io::TabletIO::TabletStatus::kUnloading2) { + VLOG(6) << "ignore this unload tablet request: " << tablet_io->GetTablePath() << "[" + << DebugString(start) << "," << DebugString(end) + << "], status: " << StatusCodeToString((StatusCode)tablet_status); + } else { + LOG(ERROR) << "fail to unload tablet: " << tablet_io->GetTablePath() << " [" + << DebugString(start) << ", " << DebugString(end) + << "], status: " << StatusCodeToString(*status); } - LOG(INFO) << "unload tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(start) << ", " << DebugString(end) << "]"; + *status = (StatusCode)tablet_status; tablet_io->DecRef(); - - if (!tablet_manager_->RemoveTablet(tablet_name, start, end, status)) { - LOG(ERROR) << "fail to remove tablet: " << tablet_name - << " [" << DebugString(start) << ", " << DebugString(end) - << "], status: " << StatusCodeToString(*status); - } - *status = kTabletNodeOk; - return true; + return false; + } + LOG(INFO) << "unload tablet: " << tablet_io->GetTablePath() << " [" << DebugString(start) << ", " + << DebugString(end) << "]"; + tablet_io->DecRef(); + + if (!tablet_manager_->RemoveTablet(tablet_name, start, end, status)) { + LOG(ERROR) << "fail to remove tablet: " << tablet_name << " [" << DebugString(start) << ", " + << DebugString(end) << "], status: " << StatusCodeToString(*status); + } + *status = kTabletNodeOk; + return true; } void TabletNodeImpl::UnloadTablet(const UnloadTabletRequest* request, - UnloadTabletResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - std::string sid = GetSessionId(); - // master vervison lower than 2.10 has not session_id field, so just - if (request->has_session_id() && - ((sid.size() == 0) || - request->session_id().compare(0, sid.size(), sid) != 0)) { - LOG(WARNING) << "unload session id not match, seq_id: " << request->sequence_id() << "tablet: " - << request->tablet_name() << ", [" << request->key_range().key_start() << ", " - << request->key_range().key_end() << "], session_id " << request->session_id() << ", ts_id " << sid; - response->set_status(kIllegalAccess); - done->Run(); - return; - } + UnloadTabletResponse* response) { + response->set_sequence_id(request->sequence_id()); + std::string sid = GetSessionId(); + // master vervison lower than 2.10 has not session_id field, so just + if (request->has_session_id() && + ((sid.size() == 0) || request->session_id().compare(0, sid.size(), sid) != 0)) { + LOG(WARNING) << "unload session id not match, seq_id: " << request->sequence_id() + << "tablet: " << request->tablet_name() << ", [" + << request->key_range().key_start() << ", " << request->key_range().key_end() + << "], session_id " << request->session_id() << ", ts_id " << sid; + response->set_status(kIllegalAccess); + return; + } - StatusCode status = kTabletNodeOk; - UnloadTablet(request->tablet_name(), request->key_range().key_start(), - request->key_range().key_end(), &status); - response->set_status(status); - done->Run(); + StatusCode status = kTabletNodeOk; + UnloadTablet(request->tablet_name(), request->key_range().key_start(), + request->key_range().key_end(), &status); + response->set_status(status); } void TabletNodeImpl::CompactTablet(const CompactTabletRequest* request, CompactTabletResponse* response, - google::protobuf::Closure* done) -{ - response->set_sequence_id(request->sequence_id()); - StatusCode status = kTabletNodeOk; - io::TabletIO* tablet_io = tablet_manager_->GetTablet( - request->tablet_name(), request->key_range().key_start(), - request->key_range().key_end(), &status); - if (tablet_io == NULL) { - LOG(WARNING) << "compact fail to get tablet: " << request->tablet_name() - << " [" << DebugString(request->key_range().key_start()) - << ", " << DebugString(request->key_range().key_end()) - << "], status: " << StatusCodeToString(status); - response->set_status(kKeyNotInRange); - done->Run(); - return; - } - LOG(INFO) << "start compact tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) << "]"; - - if (request->has_lg_no() && request->lg_no() >= 0) { - tablet_io->Compact(request->lg_no(), &status); - } else { - tablet_io->Compact(-1, &status); - } - CompactStatus compact_status = tablet_io->GetCompactStatus(); - response->set_status(status); - response->set_compact_status(compact_status); - uint64_t compact_size = 0; - tablet_io->GetDataSize(&compact_size); - response->set_compact_size(compact_size); - LOG(INFO) << "compact tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], status: " << StatusCodeToString(status) - << ", compacted size: " << compact_size; - tablet_io->DecRef(); + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + StatusCode status = kTabletNodeOk; + io::TabletIO* tablet_io = + tablet_manager_->GetTablet(request->tablet_name(), request->key_range().key_start(), + request->key_range().key_end(), &status); + if (tablet_io == NULL) { + LOG(WARNING) << "compact fail to get tablet: " << request->tablet_name() << " [" + << DebugString(request->key_range().key_start()) << ", " + << DebugString(request->key_range().key_end()) + << "], status: " << StatusCodeToString(status); + response->set_status(kKeyNotInRange); done->Run(); + return; + } + LOG(INFO) << "start compact tablet: " << tablet_io->GetTablePath() << " [" + << DebugString(tablet_io->GetStartKey()) << ", " << DebugString(tablet_io->GetEndKey()) + << "]"; + + if (request->has_lg_no() && request->lg_no() >= 0) { + tablet_io->Compact(request->lg_no(), &status); + } else { + tablet_io->Compact(-1, &status); + } + CompactStatus compact_status = tablet_io->GetCompactStatus(); + response->set_status(status); + response->set_compact_status(compact_status); + uint64_t compact_size = 0; + tablet_io->GetDataSize(&compact_size); + response->set_compact_size(compact_size); + LOG(INFO) << "compact tablet: " << tablet_io->GetTablePath() << " [" + << DebugString(tablet_io->GetStartKey()) << ", " << DebugString(tablet_io->GetEndKey()) + << "], status: " << StatusCodeToString(status) << ", compacted size: " << compact_size; + tablet_io->DecRef(); + done->Run(); } -void TabletNodeImpl::Update(const UpdateRequest* request, - UpdateResponse* response, +void TabletNodeImpl::Update(const UpdateRequest* request, UpdateResponse* response, google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - switch (request->type()) { + response->set_sequence_id(request->sequence_id()); + switch (request->type()) { case kUpdateSchema: - LOG(INFO) << "[update] new schema:" << request->schema().DebugString(); - if(ApplySchema(request)) { - LOG(INFO) << "[update] ok"; - response->set_status(kTabletNodeOk); - } else { - LOG(INFO) << "[update] failed"; - response->set_status(kInvalidArgument); - } - done->Run(); - break; - default: - LOG(INFO) << "[update] unknown cmd"; - response->set_status(kInvalidArgument); - done->Run(); - break; - } -} - -void TabletNodeImpl::ReadTablet(int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done) { - bool is_timeout = false; - int32_t row_num = request->row_info_list_size(); - uint64_t snapshot_id = request->snapshot_id() == 0 ? 0 : request->snapshot_id(); - uint32_t read_success_num = 0; - - int64_t client_timeout_ms = std::numeric_limits::max() / 2; - if (request->has_client_timeout_ms()) { - client_timeout_ms = request->client_timeout_ms(); - } - int64_t end_time_ms = start_micros / 1000 + client_timeout_ms; - VLOG(20) << "start_ms: " << start_micros / 1000 << ", client_timeout_ms: " << client_timeout_ms - << " end_ms: " << end_time_ms; - - for (int32_t i = 0; i < row_num; i++) { - int64_t time_remain_ms = end_time_ms - GetTimeStampInMs(); - StatusCode row_status = kTabletNodeOk; - io::TabletIO* tablet_io = tablet_manager_->GetTablet( - request->tablet_name(), request->row_info_list(i).key(), &row_status); - if (tablet_io == NULL) { - read_error_counter.Inc(); - read_range_error_counter.Inc(); - response->mutable_detail()->add_status(kKeyNotInRange); - } else { - VLOG(20) << "time_remain_ms: " << time_remain_ms; - if (tablet_io->ReadCells(request->row_info_list(i), - response->mutable_detail()->add_row_result(), - snapshot_id, &row_status, time_remain_ms)) { - read_success_num++; - } else { - if (row_status != kKeyNotExist && row_status != kRPCTimeout) { - read_error_counter.Inc(); - } - response->mutable_detail()->mutable_row_result()->RemoveLast(); - } - tablet_io->DecRef(); - response->mutable_detail()->add_status(row_status); - } - - if (row_status == kRPCTimeout) { - is_timeout = true; - LOG(WARNING) << "seq_id: " << request->sequence_id() << " timeout," - << " clinet_timeout_ms: " << request->client_timeout_ms(); - break; - } - } - - VLOG(10) << "seq_id: " << request->sequence_id() - << ", req_row: " << row_num - << ", read_suc: " << read_success_num; - response->set_sequence_id(request->sequence_id()); - response->set_success_num(read_success_num); - - if (is_timeout) { - response->set_status(kRPCTimeout); - } else { + LOG(INFO) << "[update] new schema:" << request->schema().DebugString(); + if (ApplySchema(request)) { + LOG(INFO) << "[update] ok"; response->set_status(kTabletNodeOk); - } - - done->Run(); + } else { + LOG(INFO) << "[update] failed"; + response->set_status(kInvalidArgument); + } + done->Run(); + break; + default: + LOG(INFO) << "[update] unknown cmd"; + response->set_status(kInvalidArgument); + done->Run(); + break; + } } -void TabletNodeImpl::WriteTablet(const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, - WriteRpcTimer* timer) { - response->set_sequence_id(request->sequence_id()); - StatusCode status = kTabletNodeOk; - - std::map tablet_task_map; - std::map::iterator it; +void TabletNodeImpl::WriteTablet(const WriteTabletRequest* request, WriteTabletResponse* response, + google::protobuf::Closure* done, WriteRpcTimer* timer) { + response->set_sequence_id(request->sequence_id()); + StatusCode status = kTabletNodeOk; - int32_t row_num = request->row_list_size(); - if (row_num == 0) { - response->set_status(kTabletNodeOk); - done->Run(); - if (NULL != timer) { - RpcTimerList::Instance()->Erase(timer); - delete timer; - } - return; - } + std::map tablet_task_map; + std::map::iterator it; - std::shared_ptr row_done_counter(new Counter); - for (int32_t i = 0; i < row_num; i++) { - io::TabletIO* tablet_io = tablet_manager_->GetTablet( - request->tablet_name(), request->row_list(i).row_key(), &status); - if (tablet_io == NULL) { - write_range_error_counter.Inc(); - } - it = tablet_task_map.find(tablet_io); - WriteTabletTask* tablet_task = NULL; - if (it == tablet_task_map.end()) { - // keep one ref to tablet_io - tablet_task = tablet_task_map[tablet_io] = - new WriteTabletTask(request, response, done, timer, row_done_counter); - } else { - if (tablet_io != NULL) { - tablet_io->DecRef(); - } - tablet_task = it->second; - } - tablet_task->row_mutation_vec.push_back(&request->row_list(i)); - tablet_task->row_status_vec.push_back(kTabletNodeOk); - tablet_task->row_index_vec.push_back(i); - } - - // reserve response status list space + int32_t row_num = request->row_list_size(); + if (row_num == 0) { response->set_status(kTabletNodeOk); - response->mutable_row_status_list()->Reserve(row_num); - for (int32_t i = 0; i < row_num; i++) { - response->mutable_row_status_list()->AddAlreadyReserved(); + done->Run(); + if (NULL != timer) { + RpcTimerList::Instance()->Erase(timer); + delete timer; } + return; + } - for (it = tablet_task_map.begin(); it != tablet_task_map.end(); ++it) { - io::TabletIO* tablet_io = it->first; - WriteTabletTask* tablet_task = it->second; - if (tablet_io == NULL) { - WriteTabletFail(tablet_task, kKeyNotInRange); - } else if (!tablet_io->Write(&tablet_task->row_mutation_vec, - &tablet_task->row_status_vec, - request->is_instant(), - std::bind(&TabletNodeImpl::WriteTabletCallback, this, - tablet_task, _1, _2), - &status)) { - tablet_io->DecRef(); - WriteTabletFail(tablet_task, status); - } else { - tablet_io->DecRef(); - } + std::shared_ptr row_done_counter(new Counter); + for (int32_t i = 0; i < row_num; i++) { + io::TabletIO* tablet_io = + tablet_manager_->GetTablet(request->tablet_name(), request->row_list(i).row_key(), &status); + if (tablet_io == NULL) { + write_range_error_counter.Inc(); + } + it = tablet_task_map.find(tablet_io); + WriteTabletTask* tablet_task = NULL; + if (it == tablet_task_map.end()) { + // keep one ref to tablet_io + tablet_task = tablet_task_map[tablet_io] = + new WriteTabletTask(request, response, done, timer, row_done_counter); + } else { + if (tablet_io != NULL) { + tablet_io->DecRef(); + } + tablet_task = it->second; + } + tablet_task->row_mutation_vec.push_back(&request->row_list(i)); + tablet_task->row_status_vec.push_back(kTabletNodeOk); + tablet_task->row_index_vec.push_back(i); + } + + // reserve response status list space + response->set_status(kTabletNodeOk); + response->mutable_row_status_list()->Reserve(row_num); + for (int32_t i = 0; i < row_num; i++) { + response->mutable_row_status_list()->AddAlreadyReserved(); + } + + for (it = tablet_task_map.begin(); it != tablet_task_map.end(); ++it) { + io::TabletIO* tablet_io = it->first; + WriteTabletTask* tablet_task = it->second; + if (tablet_io == NULL) { + WriteTabletFail(tablet_task, kKeyNotInRange); + } else if (!tablet_io->Write( + &tablet_task->row_mutation_vec, &tablet_task->row_status_vec, + request->is_instant(), + std::bind(&TabletNodeImpl::WriteTabletCallback, this, tablet_task, _1, _2), + &status)) { + tablet_io->DecRef(); + WriteTabletFail(tablet_task, status); + } else { + tablet_io->DecRef(); } + } } void TabletNodeImpl::WriteTabletFail(WriteTabletTask* tablet_task, StatusCode status) { - int32_t row_num = tablet_task->row_status_vec.size(); + int32_t row_num = tablet_task->row_status_vec.size(); + if (status != kKeyNotInRange) { write_error_counter.Add(row_num); - for (int32_t i = 0; i < row_num; i++) { - tablet_task->row_status_vec[i] = status; - } - WriteTabletCallback(tablet_task, &tablet_task->row_mutation_vec, &tablet_task->row_status_vec); + } + for (int32_t i = 0; i < row_num; i++) { + tablet_task->row_status_vec[i] = status; + } + WriteTabletCallback(tablet_task, &tablet_task->row_mutation_vec, &tablet_task->row_status_vec); } void TabletNodeImpl::WriteTabletCallback(WriteTabletTask* tablet_task, std::vector* row_mutation_vec, std::vector* status_vec) { - int32_t index_num = tablet_task->row_index_vec.size(); - for (int32_t i = 0; i < index_num; i++) { - int32_t index = tablet_task->row_index_vec[i]; - tablet_task->response->mutable_row_status_list()->Set(index, (*status_vec)[i]); - } + int32_t index_num = tablet_task->row_index_vec.size(); + for (int32_t i = 0; i < index_num; i++) { + int32_t index = tablet_task->row_index_vec[i]; + tablet_task->response->mutable_row_status_list()->Set(index, (*status_vec)[i]); + } - if (tablet_task->row_done_counter->Add(index_num) == tablet_task->request->row_list_size()) { - tablet_task->done->Run(); - if (NULL != tablet_task->timer) { - RpcTimerList::Instance()->Erase(tablet_task->timer); - delete tablet_task->timer; - } + if (tablet_task->row_done_counter->Add(index_num) == tablet_task->request->row_list_size()) { + tablet_task->done->Run(); + if (NULL != tablet_task->timer) { + RpcTimerList::Instance()->Erase(tablet_task->timer); + delete tablet_task->timer; } + } - delete tablet_task; + delete tablet_task; } -void TabletNodeImpl::CmdCtrl(const TsCmdCtrlRequest* request, - TsCmdCtrlResponse* response, +void TabletNodeImpl::CmdCtrl(const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - if (request->command() == "reload config") { - if (utils::LoadFlagFile(FLAGS_flagfile)) { - LOG(INFO) << "[reload config] done"; - response->set_status(kTabletNodeOk); - } else { - LOG(ERROR) << "[reload config] config file not found"; - response->set_status(kInvalidArgument); - } + response->set_sequence_id(request->sequence_id()); + if (request->command() == "reload config") { + if (utils::LoadFlagFile(FLAGS_flagfile)) { + LOG(INFO) << "[reload config] done"; + response->set_status(kTabletNodeOk); } else { - response->set_status(kInvalidArgument); + LOG(ERROR) << "[reload config] config file not found"; + response->set_status(kInvalidArgument); } - done->Run(); + } else { + response->set_status(kInvalidArgument); + } + done->Run(); } bool TabletNodeImpl::ApplySchema(const UpdateRequest* request) { - StatusCode status; - io::TabletIO* tablet_io = tablet_manager_->GetTablet( - request->tablet_name(), request->key_range().key_start(), request->key_range().key_end(), &status); - if (tablet_io == NULL) { - LOG(INFO) << "[update] tablet not found"; - return false; - } - tablet_io->ApplySchema(request->schema()); - tablet_io->DecRef(); - return true; + StatusCode status; + io::TabletIO* tablet_io = + tablet_manager_->GetTablet(request->tablet_name(), request->key_range().key_start(), + request->key_range().key_end(), &status); + if (tablet_io == NULL) { + LOG(INFO) << "[update] tablet not found"; + return false; + } + tablet_io->ApplySchema(request->schema()); + tablet_io->DecRef(); + return true; } -void TabletNodeImpl::Query(const QueryRequest* request, - QueryResponse* response, +void TabletNodeImpl::Query(const QueryRequest* request, QueryResponse* response, google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - response->set_status(kTabletNodeOk); - - TabletNodeInfo* ts_info = response->mutable_tabletnode_info(); - sysinfo_.GetTabletNodeInfo(ts_info); - TabletMetaList* meta_list = response->mutable_tabletmeta_list(); - sysinfo_.GetTabletMetaList(meta_list); - - if (request->has_is_gc_query() && request->is_gc_query()) { - std::vector inh_infos; - GetInheritedLiveFiles(&inh_infos); - for (size_t i = 0; i < inh_infos.size(); i++) { - TabletInheritedFileInfo* inh_info = response->add_tablet_inh_file_infos(); - inh_info->CopyFrom(inh_infos[i]); - } - - // only for compatible with old master - std::vector inherited; - GetInheritedLiveFiles(inherited); - for (size_t i = 0; i < inherited.size(); ++i) { - InheritedLiveFiles* files = response->add_inh_live_files(); - *files = inherited[i]; - } - } - - // if have background errors, package into 'response' and return to 'master' - std::vector background_errors; - GetBackgroundErrors(&background_errors); - for (auto background_error : background_errors) { - TabletBackgroundErrorInfo* tablet_background_error = - response->add_tablet_background_errors(); - tablet_background_error->CopyFrom(background_error); - } - done->Run(); + response->set_sequence_id(request->sequence_id()); + response->set_status(kTabletNodeOk); + + TabletNodeInfo* ts_info = response->mutable_tabletnode_info(); + sysinfo_.GetTabletNodeInfo(ts_info); + TabletMetaList* meta_list = response->mutable_tabletmeta_list(); + sysinfo_.GetTabletMetaList(meta_list); + + if (request->has_is_gc_query() && request->is_gc_query()) { + std::vector inh_infos; + GetInheritedLiveFiles(&inh_infos); + for (size_t i = 0; i < inh_infos.size(); i++) { + TabletInheritedFileInfo* inh_info = response->add_tablet_inh_file_infos(); + inh_info->CopyFrom(inh_infos[i]); + } + + // only for compatible with old master + std::vector inherited; + GetInheritedLiveFiles(inherited); + for (size_t i = 0; i < inherited.size(); ++i) { + InheritedLiveFiles* files = response->add_inh_live_files(); + *files = inherited[i]; + } + } + + // if have background errors, package into 'response' and return to 'master' + std::vector background_errors; + GetBackgroundErrors(&background_errors); + for (auto background_error : background_errors) { + TabletBackgroundErrorInfo* tablet_background_error = response->add_tablet_background_errors(); + tablet_background_error->CopyFrom(background_error); + } + done->Run(); } -void TabletNodeImpl::RefreshSysInfo() { - int64_t cur_ts = get_micros(); +void TabletNodeImpl::RefreshAndDumpSysInfo() { + int64_t cur_ts = get_micros(); - sysinfo_.CollectTabletNodeInfo(tablet_manager_.get(), local_addr_); - sysinfo_.CollectHardwareInfo(); - sysinfo_.SetTimeStamp(cur_ts); + sysinfo_.CollectTabletNodeInfo(tablet_manager_.get(), local_addr_); + sysinfo_.CollectHardwareInfo(); + sysinfo_.SetTimeStamp(cur_ts); + sysinfo_.UpdateWriteFlowController(); + sysinfo_.DumpLog(); - VLOG(15) << "collect sysinfo finished, time used: " << get_micros() - cur_ts << " us."; + VLOG(15) << "collect sysinfo finished, time used: " << get_micros() - cur_ts << " us."; } -void TabletNodeImpl::ScanTablet(const ScanTabletRequest* request, - ScanTabletResponse* response, +void TabletNodeImpl::ScanTablet(const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { - const int64_t PACK_MAX_SIZE = - static_cast(FLAGS_tera_tabletnode_scan_pack_max_size)<<10; - //const std::string& start_key = request->key_range().key_start(); - //const std::string& end_key = request->key_range().key_end(); - int64_t buffer_limit = request->buffer_limit(); - if (buffer_limit > PACK_MAX_SIZE) { - buffer_limit = PACK_MAX_SIZE; - } - //VLOG(5) << "ScanTablet() start=[" << start_key - // << "], end=[" << end_key << "]"; - if (request->has_sequence_id()) { - response->set_sequence_id(request->sequence_id()); - } - StatusCode status = kTabletNodeOk; - io::TabletIO* tablet_io = NULL; - tablet_io = tablet_manager_->GetTablet(request->table_name(), - request->start(), &status); - - if (tablet_io == NULL) { - scan_range_error_counter.Inc(); - response->set_status(status); - done->Run(); - } else { - response->set_end(tablet_io->GetEndKey()); - if (!tablet_io->ScanRows(request, response, done)) { - scan_error_counter.Inc(); - } - tablet_io->DecRef(); - } -} - -void TabletNodeImpl::SplitTablet(const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { + const int64_t PACK_MAX_SIZE = static_cast(FLAGS_tera_tabletnode_scan_pack_max_size) + << 10; + // const std::string& start_key = request->key_range().key_start(); + // const std::string& end_key = request->key_range().key_end(); + int64_t buffer_limit = request->buffer_limit(); + if (buffer_limit > PACK_MAX_SIZE) { + buffer_limit = PACK_MAX_SIZE; + } + // VLOG(5) << "ScanTablet() start=[" << start_key + // << "], end=[" << end_key << "]"; + if (request->has_sequence_id()) { response->set_sequence_id(request->sequence_id()); + } - std::string split_key = request->split_key(); - std::string path; - StatusCode status = kTabletNodeOk; - io::TabletIO* tablet_io = tablet_manager_->GetTablet(request->tablet_name(), - request->key_range().key_start(), - request->key_range().key_end(), - &status); - if (tablet_io == NULL) { - LOG(WARNING) << "split fail to get tablet: " << request->tablet_name() - << " [" << DebugString(request->key_range().key_start()) - << ", " << DebugString(request->key_range().key_end()) - << "], status: " << StatusCodeToString(status); - response->set_status(kKeyNotInRange); - done->Run(); - return; - } - // Master is not responsible for update children tablets to meta table, refuse to split - if (!request->has_master_update_meta() || !request->master_update_meta()) { - LOG(ERROR) << kSms <<"SplitRequest without master_update_meta, maybe " - "request from old master, refuse split!" << *tablet_io; - response->set_status(kTableNotSupport); - done->Run(); - - } - - // Master is not responsible for update children tablets to meta table, refuse to split - if (!request->has_master_update_meta() || !request->master_update_meta()) { - LOG(WARNING) <<"SplitRequest without master_update_meta, maybe " - "request from old master, refuse split!" << *tablet_io; - response->set_status(kTableNotSupport); - done->Run(); - - } + StatusCode status = kTabletNodeOk; + io::TabletIO* tablet_io = NULL; + tablet_io = tablet_manager_->GetTablet(request->table_name(), request->start(), &status); - if (!tablet_io->Split(&split_key, &status)) { - LOG(ERROR) << "fail to split tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], split_key: " << DebugString(split_key) << ". status: " << StatusCodeToString(status); - if (status == kTableNotSupport) { - response->set_status(kTableNotSupport); - } else { - response->set_status((StatusCode)tablet_io->GetStatus()); - } - tablet_io->DecRef(); - done->Run(); - return; - } - LOG(INFO) << "split tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], split key: " << DebugString(split_key); - - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << "fail to unload tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], status: " << StatusCodeToString(status); - response->set_status((StatusCode)tablet_io->GetStatus()); - tablet_io->DecRef(); - done->Run(); - return; + if (tablet_io == NULL) { + scan_range_error_counter.Inc(); + response->set_status(status); + done->Run(); + } else { + response->set_end(tablet_io->GetEndKey()); + if (!tablet_io->ScanRows(request, response, done)) { + scan_error_counter.Inc(); } - TableSchema schema; - schema.CopyFrom(tablet_io->GetSchema()); - path = tablet_io->GetTablePath(); - LOG(INFO) << "unload tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) << "]"; tablet_io->DecRef(); - - if (!tablet_manager_->RemoveTablet(request->tablet_name(), - request->key_range().key_start(), - request->key_range().key_end(), - &status)) { - LOG(ERROR) << "fail to remove tablet: " << request->tablet_name() - << " [" << DebugString(request->key_range().key_start()) - << ", " << DebugString(request->key_range().key_end()) - << "], status: " << StatusCodeToString(status); - } - response->set_status(kTabletNodeOk); - response->add_split_keys(split_key); - done->Run(); + } } void TabletNodeImpl::ComputeSplitKey(const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done) { - response->set_sequence_id(request->sequence_id()); - - std::string split_key; - StatusCode status = kTabletNodeOk; - io::TabletIO* tablet_io = tablet_manager_->GetTablet(request->tablet_name(), - request->key_range().key_start(), - request->key_range().key_end(), - &status); - if (tablet_io == NULL) { - LOG(WARNING) << "split fail to get tablet: " << request->tablet_name() - << " [" << DebugString(request->key_range().key_start()) - << ", " << DebugString(request->key_range().key_end()) - << "], status: " << StatusCodeToString(status); - response->set_status(kKeyNotInRange); - done->Run(); - return; - } - - if (!tablet_io->Split(&split_key, &status)) { - LOG(ERROR) << "fail to split tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], split_key: " << DebugString(split_key) << ". status: " << StatusCodeToString(status); - if (status == kTableNotSupport) { - response->set_status(kTableNotSupport); - } else { - response->set_status((StatusCode)tablet_io->GetStatus()); - } - tablet_io->DecRef(); - done->Run(); - return; + SplitTabletResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + + std::string split_key; + StatusCode status = kTabletNodeOk; + io::TabletIO* tablet_io = + tablet_manager_->GetTablet(request->tablet_name(), request->key_range().key_start(), + request->key_range().key_end(), &status); + if (tablet_io == NULL) { + LOG(WARNING) << "split fail to get tablet: " << request->tablet_name() << " [" + << DebugString(request->key_range().key_start()) << ", " + << DebugString(request->key_range().key_end()) + << "], status: " << StatusCodeToString(status); + response->set_status(kKeyNotInRange); + done->Run(); + return; + } + + if (!tablet_io->Split(&split_key, &status)) { + LOG(ERROR) << "fail to split tablet: " << tablet_io->GetTablePath() << " [" + << DebugString(tablet_io->GetStartKey()) << ", " + << DebugString(tablet_io->GetEndKey()) << "], split_key: " << DebugString(split_key) + << ". status: " << StatusCodeToString(status); + if (status == kTableNotSupport) { + response->set_status(kTableNotSupport); + } else { + response->set_status((StatusCode)tablet_io->GetStatus()); } - LOG(INFO) << "split tablet: " << tablet_io->GetTablePath() - << " [" << DebugString(tablet_io->GetStartKey()) - << ", " << DebugString(tablet_io->GetEndKey()) - << "], split key: " << DebugString(split_key); - response->set_status(kTabletNodeOk); - response->add_split_keys(split_key); tablet_io->DecRef(); done->Run(); + return; + } + LOG(INFO) << "split tablet: " << tablet_io->GetTablePath() << " [" + << DebugString(tablet_io->GetStartKey()) << ", " << DebugString(tablet_io->GetEndKey()) + << "], split key: " << DebugString(split_key); + response->set_status(kTabletNodeOk); + response->add_split_keys(split_key); + tablet_io->DecRef(); + done->Run(); } - -bool TabletNodeImpl::CheckInKeyRange(const KeyList& key_list, - const std::string& key_start, +bool TabletNodeImpl::CheckInKeyRange(const KeyList& key_list, const std::string& key_start, const std::string& key_end) { - for (int32_t i = 0; i < key_list.size(); ++i) { - const std::string& key = key_list.Get(i); - if (key < key_start || (key_end != "" && key >= key_end)) { - return false; - } + for (int32_t i = 0; i < key_list.size(); ++i) { + const std::string& key = key_list.Get(i); + if (key < key_start || (key_end != "" && key >= key_end)) { + return false; } - return true; + } + return true; } -bool TabletNodeImpl::CheckInKeyRange(const KeyValueList& pair_list, - const std::string& key_start, +bool TabletNodeImpl::CheckInKeyRange(const KeyValueList& pair_list, const std::string& key_start, const std::string& key_end) { - for (int32_t i = 0; i < pair_list.size(); ++i) { - const std::string& key = pair_list.Get(i).key(); - if (key < key_start || (key_end != "" && key >= key_end)) { - return false; - } + for (int32_t i = 0; i < pair_list.size(); ++i) { + const std::string& key = pair_list.Get(i).key(); + if (key < key_start || (key_end != "" && key >= key_end)) { + return false; } - return true; + } + return true; } -bool TabletNodeImpl::CheckInKeyRange(const RowReaderList& reader_list, - const std::string& key_start, +bool TabletNodeImpl::CheckInKeyRange(const RowReaderList& reader_list, const std::string& key_start, const std::string& key_end) { - for (int32_t i = 0; i < reader_list.size(); ++i) { - const std::string& key = reader_list.Get(i).key(); - if (key < key_start || (key_end != "" && key >= key_end)) { - return false; - } + for (int32_t i = 0; i < reader_list.size(); ++i) { + const std::string& key = reader_list.Get(i).key(); + if (key < key_start || (key_end != "" && key >= key_end)) { + return false; } - return true; + } + return true; } -bool TabletNodeImpl::CheckInKeyRange(const RowMutationList& row_list, - const std::string& key_start, +bool TabletNodeImpl::CheckInKeyRange(const RowMutationList& row_list, const std::string& key_start, const std::string& key_end) { - for (int32_t i = 0; i < row_list.size(); ++i) { - const std::string& key = row_list.Get(i).row_key(); - if (key < key_start || (key_end != "" && key >= key_end)) { - return false; - } + for (int32_t i = 0; i < row_list.size(); ++i) { + const std::string& key = row_list.Get(i).row_key(); + if (key < key_start || (key_end != "" && key >= key_end)) { + return false; } - return true; + } + return true; } - /////////// common //////////// -void TabletNodeImpl::EnterSafeMode() { - SetTabletNodeStatus(kIsReadonly); -} +void TabletNodeImpl::EnterSafeMode() { SetTabletNodeStatus(kIsReadonly); } -void TabletNodeImpl::LeaveSafeMode() { - SetTabletNodeStatus(kIsRunning); -} +void TabletNodeImpl::LeaveSafeMode() { SetTabletNodeStatus(kIsRunning); } void TabletNodeImpl::ExitService() { - LOG(FATAL) << "master kick me!"; - _exit(1); + LOG(FATAL) << "master kick me!"; + _exit(1); } void TabletNodeImpl::SetTabletNodeStatus(const TabletNodeStatus& status) { - MutexLock lock(&status_mutex_); - status_ = status; + MutexLock lock(&status_mutex_); + status_ = status; } TabletNodeImpl::TabletNodeStatus TabletNodeImpl::GetTabletNodeStatus() { - MutexLock lock(&status_mutex_); - return status_; + MutexLock lock(&status_mutex_); + return status_; } void TabletNodeImpl::SetRootTabletAddr(const std::string& root_tablet_addr) { - root_tablet_addr_ = root_tablet_addr; + root_tablet_addr_ = root_tablet_addr; } /* @@ -1094,338 +935,561 @@ void TabletNodeImpl::SetRootTabletAddr(const std::string& root_tablet_addr) { * ------------------------------------------ */ void TabletNodeImpl::GarbageCollect() { - if (FLAGS_tera_tabletnode_flash_block_cache_enabled) { - return; - } - int64_t start_ms = get_micros(); - LOG(INFO) << "[gc] start..."; - - // get all inherited sst files - std::vector table_files; - GetInheritedLiveFiles(table_files); - std::set inherited_files; - for (size_t t = 0; t < table_files.size(); ++t) { - const InheritedLiveFiles& live = table_files[t]; - int lg_num = live.lg_live_files_size(); - for (int lg = 0; lg < lg_num; ++lg) { - const LgInheritedLiveFiles& lg_live_files = live.lg_live_files(lg); - for (int f = 0; f < lg_live_files.file_number_size(); ++f) { - std::string file_path = leveldb::BuildTableFilePath( - live.table_name(), lg, lg_live_files.file_number(f)); - inherited_files.insert(file_path); - // file_path : table-name/tablet-xxx/lg-num/xxx.sst - VLOG(GC_LOG_LEVEL) << "[gc] inherited live file: " << file_path; - } - } - } - - // get all active tablets - std::vector tablet_meta_list; - std::set active_tablets; - tablet_manager_->GetAllTabletMeta(&tablet_meta_list); - std::vector::iterator it = tablet_meta_list.begin(); - for (; it != tablet_meta_list.end(); ++it) { - VLOG(GC_LOG_LEVEL) << "[gc] Active Tablet: " << (*it)->path(); - active_tablets.insert((*it)->path()); - delete (*it); - } - - // collect flash directories - leveldb::FlashEnv* flash_env = (leveldb::FlashEnv*)io::LeveldbFlashEnv(); + int64_t start_ms = get_micros(); + LOG(INFO) << "[gc] start..."; + + // get all inherited sst files + std::vector table_files; + GetInheritedLiveFiles(table_files); + std::set inherited_files; + for (size_t t = 0; t < table_files.size(); ++t) { + const InheritedLiveFiles& live = table_files[t]; + int lg_num = live.lg_live_files_size(); + for (int lg = 0; lg < lg_num; ++lg) { + const LgInheritedLiveFiles& lg_live_files = live.lg_live_files(lg); + for (int f = 0; f < lg_live_files.file_number_size(); ++f) { + std::string file_path = + leveldb::BuildTableFilePath(live.table_name(), lg, lg_live_files.file_number(f)); + inherited_files.insert(file_path); + // file_path : table-name/tablet-xxx/lg-num/xxx.sst + VLOG(GC_LOG_LEVEL) << "[gc] inherited live file: " << file_path; + } + } + } + + // get all active tablets + std::vector tablet_meta_list; + std::set active_tablets; + tablet_manager_->GetAllTabletMeta(&tablet_meta_list); + std::vector::iterator it = tablet_meta_list.begin(); + for (; it != tablet_meta_list.end(); ++it) { + VLOG(GC_LOG_LEVEL) << "[gc] Active Tablet: " << (*it)->path(); + active_tablets.insert((*it)->path()); + delete (*it); + } + + // collect persistent cache garbage + PersistentCacheGarbageCollect(inherited_files, active_tablets); + + // collect flash directories + leveldb::FlashEnv* flash_env = (leveldb::FlashEnv*)io::LeveldbFlashEnv(); + if (flash_env) { const std::vector& flash_paths = flash_env->GetFlashPaths(); for (size_t d = 0; d < flash_paths.size(); ++d) { - std::string flash_dir = flash_paths[d] + FLAGS_tera_tabletnode_path_prefix; - GarbageCollectInPath(flash_dir, leveldb::Env::Default(), - inherited_files, active_tablets); + std::string flash_dir = flash_paths[d] + FLAGS_tera_tabletnode_path_prefix; + GarbageCollectInPath(flash_dir, leveldb::Env::Default(), inherited_files, active_tablets); } + } - // collect memory env - leveldb::Env* mem_env = io::LeveldbMemEnv()->CacheEnv(); - GarbageCollectInPath(FLAGS_tera_tabletnode_path_prefix, mem_env, - inherited_files, active_tablets); + // collect memory env + leveldb::Env* mem_env = io::LeveldbMemEnv()->CacheEnv(); + GarbageCollectInPath(FLAGS_tera_tabletnode_path_prefix, mem_env, inherited_files, active_tablets); - LOG(INFO) << "[gc] finished, time used: " << get_micros() - start_ms << " us."; + LOG(INFO) << "[gc] finished, time used: " << get_micros() - start_ms << " us."; } void TabletNodeImpl::GarbageCollectInPath(const std::string& path, leveldb::Env* env, const std::set& inherited_files, - const std::set active_tablets) { - std::vector table_dirs; - env->GetChildren(path, &table_dirs); - for (size_t i = 0; i < table_dirs.size(); ++i) { - std::vector cached_tablets; - env->GetChildren(path + "/" + table_dirs[i], &cached_tablets); - if (cached_tablets.size() == 0) { - VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " - << path + "/" + table_dirs[i]; - env->DeleteDir(path + "/" + table_dirs[i]); - continue; + const std::set& active_tablets) { + std::vector table_dirs; + env->GetChildren(path, &table_dirs); + for (size_t i = 0; i < table_dirs.size(); ++i) { + std::vector cached_tablets; + env->GetChildren(path + "/" + table_dirs[i], &cached_tablets); + if (cached_tablets.size() == 0) { + VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " + << path + "/" + table_dirs[i]; + env->DeleteDir(path + "/" + table_dirs[i]); + continue; + } + for (size_t j = 0; j < cached_tablets.size(); ++j) { + std::string tablet_dir = table_dirs[i] + "/" + cached_tablets[j]; + VLOG(GC_LOG_LEVEL) << "[gc] Cached Tablet: " << tablet_dir; + if (active_tablets.find(tablet_dir) != active_tablets.end()) { + // active tablets + continue; + } + std::string inactive_tablet_dir = path + "/" + tablet_dir; + VLOG(GC_LOG_LEVEL) << "[gc] inactive_tablet directory:" << inactive_tablet_dir; + std::vector lgs; + env->GetChildren(inactive_tablet_dir, &lgs); + if (lgs.size() == 0) { + VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " << inactive_tablet_dir; + env->DeleteDir(inactive_tablet_dir); + continue; + } + for (size_t lg = 0; lg < lgs.size(); ++lg) { + std::vector files; + env->GetChildren(inactive_tablet_dir + "/" + lgs[lg], &files); + if (files.size() == 0) { + VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " + << inactive_tablet_dir + "/" + lgs[lg]; + env->DeleteDir(inactive_tablet_dir + "/" + lgs[lg]); + continue; } - for (size_t j = 0; j < cached_tablets.size(); ++j) { - std::string tablet_dir = table_dirs[i] + "/" + cached_tablets[j]; - VLOG(GC_LOG_LEVEL) << "[gc] Cached Tablet: " << tablet_dir; - if (active_tablets.find(tablet_dir) != active_tablets.end()) { - // active tablets - continue; - } - std::string inactive_tablet_dir = path + "/" + tablet_dir; - VLOG(GC_LOG_LEVEL) << "[gc] inactive_tablet directory:" << inactive_tablet_dir; - std::vector lgs; - env->GetChildren(inactive_tablet_dir, &lgs); - if (lgs.size() == 0) { - VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " << inactive_tablet_dir; - env->DeleteDir(inactive_tablet_dir); - continue; - } - for (size_t lg = 0; lg < lgs.size(); ++lg) { - std::vector files; - env->GetChildren(inactive_tablet_dir + "/" + lgs[lg], &files); - if (files.size() == 0) { - VLOG(GC_LOG_LEVEL) << "[gc] this directory is empty, delete it: " - << inactive_tablet_dir + "/" + lgs[lg]; - env->DeleteDir(inactive_tablet_dir + "/" + lgs[lg]); - continue; - } - for (size_t f = 0; f < files.size(); ++f) { - std::string file = files[f]; - std::string pathname = inactive_tablet_dir + "/" + lgs[lg] + "/" + file; - if (inherited_files.find(tablet_dir + "/" + lgs[lg] + "/" + file) == inherited_files.end()) { - VLOG(GC_LOG_LEVEL) << "[gc] delete sst file: " << pathname; - env->DeleteFile(pathname); - - } else { - VLOG(GC_LOG_LEVEL) << "[gc] skip inherited file: " << pathname; - } - } // sst file - } // lg - } // tablet - } // table - + for (size_t f = 0; f < files.size(); ++f) { + std::string file = files[f]; + std::string pathname = inactive_tablet_dir + "/" + lgs[lg] + "/" + file; + if (inherited_files.find(tablet_dir + "/" + lgs[lg] + "/" + file) == + inherited_files.end()) { + VLOG(GC_LOG_LEVEL) << "[gc] delete sst file: " << pathname; + env->DeleteFile(pathname); + + } else { + VLOG(GC_LOG_LEVEL) << "[gc] skip inherited file: " << pathname; + } + } // sst file + } // lg + } // tablet + } // table } void TabletNodeImpl::SetSessionId(const std::string& session_id) { - MutexLock lock(&status_mutex_); - session_id_ = session_id; + MutexLock lock(&status_mutex_); + session_id_ = session_id; } std::string TabletNodeImpl::GetSessionId() { - MutexLock lock(&status_mutex_); - return session_id_; + MutexLock lock(&status_mutex_); + return session_id_; } -TabletNodeSysInfo& TabletNodeImpl::GetSysInfo() { - return sysinfo_; -} +TabletNodeSysInfo& TabletNodeImpl::GetSysInfo() { return sysinfo_; } void TabletNodeImpl::TryReleaseMallocCache() { - LOG(INFO) << "TryReleaseMallocCache()"; - size_t free_heap_bytes = 0; - MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes", - &free_heap_bytes); - if (free_heap_bytes == 0) { - return; - } - - VLOG(5) << "tcmalloc cache size: " << free_heap_bytes; - - if (free_heap_bytes < 10 * 1024 * 1024) { - MallocExtension::instance()->ReleaseFreeMemory(); - VLOG(5) << "release tcmalloc cache size: " << free_heap_bytes; - } else { - // have workload - MallocExtension::instance()->ReleaseToSystem(free_heap_bytes / 2); - VLOG(5) << "release tcmalloc cache size: " << free_heap_bytes / 2; - } + LOG(INFO) << "TryReleaseMallocCache()"; + size_t free_heap_bytes = 0; + MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes", &free_heap_bytes); + if (free_heap_bytes == 0) { + return; + } + + VLOG(5) << "tcmalloc cache size: " << free_heap_bytes; + + if (free_heap_bytes < 10 * 1024 * 1024) { + MallocExtension::instance()->ReleaseFreeMemory(); + VLOG(5) << "release tcmalloc cache size: " << free_heap_bytes; + } else { + // have workload + MallocExtension::instance()->ReleaseToSystem(free_heap_bytes / 2); + VLOG(5) << "release tcmalloc cache size: " << free_heap_bytes / 2; + } } void TabletNodeImpl::ReleaseMallocCache() { - MutexLock locker(&mutex_); + MutexLock locker(&mutex_); - TryReleaseMallocCache(); + TryReleaseMallocCache(); - release_cache_timer_id_ = kInvalidTimerId; - EnableReleaseMallocCacheTimer(); + release_cache_timer_id_ = kInvalidTimerId; + EnableReleaseMallocCacheTimer(); } void TabletNodeImpl::EnableReleaseMallocCacheTimer(int32_t expand_factor) { - assert(release_cache_timer_id_ == kInvalidTimerId); - ThreadPool::Task task = - std::bind(&TabletNodeImpl::ReleaseMallocCache, this); - int64_t timeout_period = expand_factor * 1000LL * - FLAGS_tera_tabletnode_tcm_cache_release_period; - release_cache_timer_id_ = thread_pool_->DelayTask(timeout_period, task); + assert(release_cache_timer_id_ == kInvalidTimerId); + ThreadPool::Task task = std::bind(&TabletNodeImpl::ReleaseMallocCache, this); + int64_t timeout_period = expand_factor * 1000LL * FLAGS_tera_tabletnode_tcm_cache_release_period; + release_cache_timer_id_ = thread_pool_->DelayTask(timeout_period, task); } void TabletNodeImpl::DisableReleaseMallocCacheTimer() { - if (release_cache_timer_id_ != kInvalidTimerId) { - thread_pool_->CancelTask(release_cache_timer_id_); - release_cache_timer_id_ = kInvalidTimerId; - } + if (release_cache_timer_id_ != kInvalidTimerId) { + thread_pool_->CancelTask(release_cache_timer_id_); + release_cache_timer_id_ = kInvalidTimerId; + } } void TabletNodeImpl::GetInheritedLiveFiles(std::vector* inherited) { - std::vector tablet_ios; - tablet_manager_->GetAllTablets(&tablet_ios); - for (size_t tablet_id = 0; tablet_id < tablet_ios.size(); tablet_id++) { - io::TabletIO* tablet_io = tablet_ios[tablet_id]; - std::vector > tablet_files; - if (tablet_io->AddInheritedLiveFiles(&tablet_files)) { - TabletInheritedFileInfo inh_file_info; - inh_file_info.set_table_name(tablet_io->GetTableName()); - inh_file_info.set_key_start(tablet_io->GetStartKey()); - inh_file_info.set_key_end(tablet_io->GetEndKey()); - for (size_t lg_id = 0; lg_id < tablet_files.size(); lg_id++) { - VLOG(10) << "[gc] " << tablet_io->GetTablePath() - << " add inherited file, lg " << lg_id << ", " - << tablet_files[lg_id].size() << " files total"; - LgInheritedLiveFiles* lg_files = inh_file_info.add_lg_inh_files(); - lg_files->set_lg_no(lg_id); - std::set::iterator file_it = tablet_files[lg_id].begin(); - for (; file_it != tablet_files[lg_id].end(); ++file_it) { - lg_files->add_file_number(*file_it); - } - } - inherited->push_back(inh_file_info); + std::vector tablet_ios; + tablet_manager_->GetAllTablets(&tablet_ios); + for (size_t tablet_id = 0; tablet_id < tablet_ios.size(); tablet_id++) { + io::TabletIO* tablet_io = tablet_ios[tablet_id]; + std::vector > tablet_files; + if (tablet_io->AddInheritedLiveFiles(&tablet_files)) { + TabletInheritedFileInfo inh_file_info; + inh_file_info.set_table_name(tablet_io->GetTableName()); + inh_file_info.set_key_start(tablet_io->GetStartKey()); + inh_file_info.set_key_end(tablet_io->GetEndKey()); + for (size_t lg_id = 0; lg_id < tablet_files.size(); lg_id++) { + VLOG(10) << "[gc] " << tablet_io->GetTablePath() << " add inherited file, lg " << lg_id + << ", " << tablet_files[lg_id].size() << " files total"; + LgInheritedLiveFiles* lg_files = inh_file_info.add_lg_inh_files(); + lg_files->set_lg_no(lg_id); + std::set::iterator file_it = tablet_files[lg_id].begin(); + for (; file_it != tablet_files[lg_id].end(); ++file_it) { + lg_files->add_file_number(*file_it); } - tablet_io->DecRef(); + } + inherited->push_back(inh_file_info); } + tablet_io->DecRef(); + } } void TabletNodeImpl::GetInheritedLiveFiles(std::vector& inherited) { - std::set not_ready_tables; - typedef std::vector > TableSet; - std::map live; - - std::vector tablet_ios; - tablet_manager_->GetAllTablets(&tablet_ios); - std::vector::iterator it = tablet_ios.begin(); - for (; it != tablet_ios.end(); ++it) { - io::TabletIO* tablet_io = *it; - const std::string& tablename = tablet_io->GetTableName(); - if (not_ready_tables.find(tablename) == not_ready_tables.end() - && !tablet_io->AddInheritedLiveFiles(&live[tablename])) { - VLOG(10) << "[gc] " << tablet_io->GetTablePath() << " is not ready, skip it."; - not_ready_tables.insert(tablename); - live[tablename].clear(); - } - tablet_io->DecRef(); + std::set not_ready_tables; + typedef std::vector > TableSet; + std::map live; + + std::vector tablet_ios; + tablet_manager_->GetAllTablets(&tablet_ios); + std::vector::iterator it = tablet_ios.begin(); + for (; it != tablet_ios.end(); ++it) { + io::TabletIO* tablet_io = *it; + const std::string& tablename = tablet_io->GetTableName(); + if (not_ready_tables.find(tablename) == not_ready_tables.end() && + !tablet_io->AddInheritedLiveFiles(&live[tablename])) { + VLOG(10) << "[gc] " << tablet_io->GetTablePath() << " is not ready, skip it."; + not_ready_tables.insert(tablename); + live[tablename].clear(); } + tablet_io->DecRef(); + } + + int total = 0; + std::map::iterator live_it = live.begin(); + for (; live_it != live.end(); ++live_it) { + VLOG(10) << "[gc] add inherited file, table " << live_it->first; + if (not_ready_tables.find(live_it->first) != not_ready_tables.end()) { + VLOG(10) << "[gc] table: " << live_it->first << " is not ready, skip it."; + continue; + } + InheritedLiveFiles table; + table.set_table_name(live_it->first); + for (size_t i = 0; i < live_it->second.size(); ++i) { + VLOG(10) << "[gc] add inherited file, lg " << i << ", " << (live_it->second)[i].size() + << " files total"; + LgInheritedLiveFiles* lg_files = table.add_lg_live_files(); + lg_files->set_lg_no(i); + std::set::iterator file_it = (live_it->second)[i].begin(); + for (; file_it != (live_it->second)[i].end(); ++file_it) { + lg_files->add_file_number(*file_it); + total++; + } + } + inherited.push_back(table); + } + LOG(INFO) << "[gc] add inherited file " << total << " total"; +} - int total = 0; - std::map::iterator live_it = live.begin(); - for (; live_it != live.end(); ++live_it) { - VLOG(10) << "[gc] add inherited file, table " << live_it->first; - if (not_ready_tables.find(live_it->first) != not_ready_tables.end()) { - VLOG(10) << "[gc] table: " << live_it->first << " is not ready, skip it."; - continue; - } - InheritedLiveFiles table; - table.set_table_name(live_it->first); - for (size_t i = 0; i < live_it->second.size(); ++i) { - VLOG(10) << "[gc] add inherited file, lg " << i - << ", " << (live_it->second)[i].size() << " files total"; - LgInheritedLiveFiles* lg_files = table.add_lg_live_files(); - lg_files->set_lg_no(i); - std::set::iterator file_it = (live_it->second)[i].begin(); - for (; file_it != (live_it->second)[i].end(); ++file_it) { - lg_files->add_file_number(*file_it); - total++; - } - } - inherited.push_back(table); +void TabletNodeImpl::GetBackgroundErrors( + std::vector* background_errors) { + std::vector tablet_ios; + tablet_manager_->GetAllTablets(&tablet_ios); + std::vector::iterator it = tablet_ios.begin(); + uint64_t reported_error_msg_len = 0; + while (it != tablet_ios.end()) { + io::TabletIO* tablet_io = *it; + if (tablet_io->ShouldForceUnloadOnError()) { + LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << *tablet_io + << ", Unload tablet failed, status: " << StatusCodeToString(status); + } + if (!tablet_manager_->RemoveTablet(tablet_io->GetTableName(), tablet_io->GetStartKey(), + tablet_io->GetEndKey(), &status)) { + LOG(ERROR) << *tablet_io + << ", remove from TabletManager failed, status: " << StatusCodeToString(status); + } + tablet_io->DecRef(); + it = tablet_ios.erase(it); + continue; + } + std::string background_error_msg = ""; + tablet_io->CheckBackgroundError(&background_error_msg); + if (!background_error_msg.empty()) { + std::string msg = tera::sdk::StatTable::SerializeCorrupt( + sdk::CorruptPhase::kCompacting, local_addr_, tablet_io->GetTablePath(), "", + background_error_msg); + + VLOG(15) << "background error @ " << tablet_io->GetTablePath() << ":" << background_error_msg; + reported_error_msg_len += msg.length(); + + // if the length of error message overrun the limit, + // only part of them would be reported + if (reported_error_msg_len < kReportErrorSize) { + tera::TabletBackgroundErrorInfo background_error; + background_error.set_tablet_name(tablet_io->GetTablePath()); + background_error.set_detail_info(msg); + background_errors->push_back(background_error); + } + } + ++it; + tablet_io->DecRef(); + } +} + +void TabletNodeImpl::RefreshLevelSize() { + std::vector tablet_ios; + tablet_manager_->GetAllTablets(&tablet_ios); + std::vector::iterator it = tablet_ios.begin(); + std::vector level_size_total(leveldb::config::kNumLevels, 0); + std::vector db_level_size; + while (it != tablet_ios.end()) { + io::TabletIO* tablet_io = *it; + if (tablet_io->ShouldForceUnloadOnError()) { + LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << *tablet_io + << ", Unload tablet failed, status: " << StatusCodeToString(status); + } + if (!tablet_manager_->RemoveTablet(tablet_io->GetTableName(), tablet_io->GetStartKey(), + tablet_io->GetEndKey(), &status)) { + LOG(ERROR) << *tablet_io + << ", remove from TabletManager failed, status: " << StatusCodeToString(status); + } + tablet_io->DecRef(); + it = tablet_ios.erase(it); + continue; + } + if (tablet_io->GetDBLevelSize(&db_level_size)) { + assert(db_level_size.size() == level_size_total.size()); + for (int level = 0; level != leveldb::config::kNumLevels; ++level) { + level_size_total[level] += db_level_size[level]; + } } - LOG(INFO) << "[gc] add inherited file " << total << " total"; + tablet_io->DecRef(); + it = tablet_ios.erase(it); + } + for (int level = 0; level != leveldb::config::kNumLevels; ++level) { + level_size_[level].Set(level_size_total[level]); + } } -void TabletNodeImpl::GetBackgroundErrors(std::vector* background_errors) { - std::vector tablet_ios; - tablet_manager_->GetAllTablets(&tablet_ios); - std::vector::iterator it = tablet_ios.begin(); - uint64_t reported_error_msg_len = 0; - while (it != tablet_ios.end()) { - io::TabletIO* tablet_io = *it; - if (tablet_io->ShouldForceUnloadOnError()) { - LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; - StatusCode status; - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << *tablet_io << ", Unload tablet failed, status: " - << StatusCodeToString(status); - } - if (!tablet_manager_->RemoveTablet(tablet_io->GetTableName(), - tablet_io->GetStartKey(), tablet_io->GetEndKey(), &status)) { - LOG(ERROR) << *tablet_io << ", remove from TabletManager failed, status: " - << StatusCodeToString(status); - } - tablet_io->DecRef(); - it = tablet_ios.erase(it); - continue; - } - std::string background_error_msg = ""; - tablet_io->CheckBackgroundError(&background_error_msg); - if (!background_error_msg.empty()){ - std::string msg = - tera::sdk::StatTable::SerializeCorrupt(sdk::CorruptPhase::kCompacting, - local_addr_, - tablet_io->GetTablePath(), - "", - background_error_msg); - - VLOG(15) << "background error @ " << tablet_io->GetTablePath() - << ":" << background_error_msg; - reported_error_msg_len += msg.length(); - - // if the length of error message overrun the limit, - // only part of them would be reported - if (reported_error_msg_len < kReportErrorSize) { - tera::TabletBackgroundErrorInfo background_error; - background_error.set_tablet_name(tablet_io->GetTablePath()); - background_error.set_detail_info(msg); - background_errors->push_back(background_error); - } - } - ++it; - tablet_io->DecRef(); +ReadTabletTask::ReadTabletTask(int64_t start_micros, std::shared_ptr tablet_manager, + const ReadTabletRequest* request, ReadTabletResponse* response, + google::protobuf::Closure* done, ThreadPool* read_thread_pool) + : tablet_manager_(tablet_manager), + request_(request), + response_(response), + done_(done), + read_thread_pool_(read_thread_pool) { + total_row_num_ = request_->row_info_list_size(); + snapshot_id_ = request_->snapshot_id() == 0 ? 0 : request_->snapshot_id(); + response->set_sequence_id(request->sequence_id()); + + int64_t client_timeout_ms = std::numeric_limits::max() / 2; + if (request_->has_client_timeout_ms()) { + client_timeout_ms = request->client_timeout_ms(); + } + end_time_ms_ = start_micros / 1000 + client_timeout_ms; + VLOG(20) << "start_ms: " << start_micros / 1000 << ", client_timeout_ms: " << client_timeout_ms + << " end_ms: " << end_time_ms_; +} + +void ReadTabletTask::StartRead() { + if (total_row_num_ == 0) { + response_->set_status(kTabletNodeOk); + response_->set_success_num(read_success_num_.Get()); + done_->Run(); + return; + } + + response_->mutable_detail()->mutable_status()->Reserve(total_row_num_); + for (int i = 0; i != total_row_num_; ++i) { + response_->mutable_detail()->mutable_status()->AddAlreadyReserved(); + } + + int64_t max_task_num = FLAGS_tera_tabletnode_parallel_read_task_num; + int64_t min_rows_per_task = FLAGS_tera_tabletnode_parallel_read_rows_per_task; + int64_t max_size = max_task_num * min_rows_per_task; + int64_t rows_per_task; + + if (max_size >= total_row_num_) { + rows_per_task = min_rows_per_task; + } else { + if (max_task_num <= 1) { + rows_per_task = total_row_num_; + } else { + rows_per_task = total_row_num_ / max_task_num + 1; + } + } + int64_t shard_cnt = total_row_num_ / rows_per_task + 1; + + row_results_list_.reserve(shard_cnt); + + int64_t row_to_read = total_row_num_; + int64_t offset = 0; + while (row_to_read > 0) { + row_results_list_.emplace_back(); + auto shard_request = make_shared(offset, std::min(rows_per_task, row_to_read), + &row_results_list_.back()); + row_to_read -= rows_per_task; + offset += rows_per_task; + // We split one read request to several shard_request. + // row_to_read <= 0 means this is the last sharded request(No more rows need + // to read). + // So this sharded request is processed in current thread for reducing cost + // of switching thread. + // Otherwise, shard_request is added to read_thread_pool. + if (row_to_read <= 0) { + DoRead(shard_request); + } else { + read_thread_pool_->AddTask( + std::bind(&ReadTabletTask::DoRead, shared_from_this(), shard_request)); } + } } +void ReadTabletTask::DoRead(std::shared_ptr shard_req) { + bool is_timeout{false}; -void TabletNodeImpl::RefreshLevelSize() { - std::vector tablet_ios; - tablet_manager_->GetAllTablets(&tablet_ios); - std::vector::iterator it = tablet_ios.begin(); - std::vector level_size_total(leveldb::config::kNumLevels, 0); - std::vector db_level_size; - while (it != tablet_ios.end()) { - io::TabletIO* tablet_io = *it; - if (tablet_io->ShouldForceUnloadOnError()) { - LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; - StatusCode status; - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << *tablet_io << ", Unload tablet failed, status: " - << StatusCodeToString(status); - } - if (!tablet_manager_->RemoveTablet(tablet_io->GetTableName(), - tablet_io->GetStartKey(), tablet_io->GetEndKey(), &status)) { - LOG(ERROR) << *tablet_io << ", remove from TabletManager failed, status: " - << StatusCodeToString(status); - } - tablet_io->DecRef(); - it = tablet_ios.erase(it); - continue; - } - if (tablet_io->GetDBLevelSize(&db_level_size)) { - assert(db_level_size.size() == level_size_total.size()); - for (int level = 0; level != leveldb::config::kNumLevels; ++level) { - level_size_total[level] += db_level_size[level]; - } + auto& row_results = *shard_req->row_results; + int64_t index = shard_req->offset; + int64_t end_index = index + shard_req->row_num; + + while (index < end_index) { + int64_t time_remain_ms = end_time_ms_ - GetTimeStampInMs(); + StatusCode row_status = kTabletNodeOk; + + io::TabletIO* tablet_io = tablet_manager_->GetTablet( + request_->tablet_name(), request_->row_info_list(index).key(), &row_status); + if (tablet_io == NULL) { + response_->mutable_detail()->mutable_status()->Set(index, kKeyNotInRange); + read_range_error_counter.Inc(); + } else { + row_results.emplace_back(new RowResult{}); + VLOG(20) << "time_remain_ms: " << time_remain_ms; + if (tablet_io->ReadCells(request_->row_info_list(index), row_results.back().get(), + snapshot_id_, &row_status, time_remain_ms)) { + read_success_num_.Inc(); + } else { + if (row_status != kKeyNotExist && row_status != kRPCTimeout) { + if (row_status == kTabletNodeIsBusy) { + read_reject_counter.Inc(); + } else { + read_error_counter.Inc(); + } } - tablet_io->DecRef(); - it = tablet_ios.erase(it); + row_results.pop_back(); + } + tablet_io->DecRef(); + response_->mutable_detail()->mutable_status()->Set(index, row_status); + } + if (row_status == kRPCTimeout || has_timeout_.load()) { + is_timeout = true; + LOG(WARNING) << "seq_id: " << request_->sequence_id() << " timeout," + << " clinet_timeout_ms: " << request_->client_timeout_ms(); + break; + } + ++index; + } + + if (is_timeout) { + has_timeout_.store(true); + } + + FinishShardRequest(shard_req); +} + +void ReadTabletTask::FinishShardRequest(const std::shared_ptr& shard_req) { + if (finished_.Add(shard_req->row_num) == total_row_num_) { + if (has_timeout_.load()) { + response_->set_status(kRPCTimeout); + done_->Run(); + return; + } + + int64_t size = 0; + for (const auto& row_results : row_results_list_) { + size += row_results.size(); } - for (int level = 0; level != leveldb::config::kNumLevels; ++level) { - level_size_[level].Set(level_size_total[level]); + + response_->mutable_detail()->mutable_row_result()->Reserve(size); + for (auto& row_results : row_results_list_) { + for (auto result : row_results) { + response_->mutable_detail()->add_row_result()->Swap(result.get()); + } + } + response_->set_status(kTabletNodeOk); + response_->set_success_num(read_success_num_.Get()); + done_->Run(); + } + return; +} + +void TabletNodeImpl::ReadTablet(int64_t start_micros, const ReadTabletRequest* request, + ReadTabletResponse* response, google::protobuf::Closure* done, + ThreadPool* read_thread_pool) { + auto read_tablet_task = make_shared(start_micros, tablet_manager_, request, + response, done, read_thread_pool); + + read_tablet_task->StartRead(); +} + +void TabletNodeImpl::PersistentCacheGarbageCollect(const std::set& inherited_files, + const std::set& active_tablets) { + std::shared_ptr p_cache; + if (!io::GetPersistentCache(&p_cache).ok() || !p_cache) { + return; + } + leveldb::StopWatchMicro timer(leveldb::Env::Default(), true); + std::vector all_keys{p_cache->GetAllKeys()}; + /* + * all cached tablets/files: + * ------------------------------------------ + * | active tablets | inactive tablets | + * | | | + * | | all | to | + * | | inherited | *DELETE* | + * | | files | | + * ------------------------------------------ + * We need to save active tablets' files and inherited files. + * Try remove files of tablets not on this tabletserver. + * Here is the gc rule: + * + * Key format in persistent cache: |table_name/tablet_name/lg_num/xxxxxxxx.sst| + * | 1 | | + * string in active_tablets |table_name/tablet_name| | + * | 2 | + * string in inherited_files |table_name/tablet_name/lg_num/xxxxxxxx.sst| + * + * If part 1 of persistent cache key doesn't match any string in active tablets, + * and part 2 of it doesn't match any one in inherited_files, we'll remove it. + */ + std::unordered_set new_delayed_gc_files; + for (auto& key : all_keys) { + if (inherited_files.find(key) != inherited_files.end()) { + // 1. If file name in inherited_files, skip it. + continue; + } + std::vector splited_terms; + SplitString(key, "/", &splited_terms); + assert(splited_terms.size() > 2); + // 2. Extract table_name/tablet_name from persistent key. + std::string tablet_name = splited_terms[0] + "/" + splited_terms[1]; + if (active_tablets.find(tablet_name) != active_tablets.end()) { + // 3. Skip active tablets' file. + continue; + } + if (delayed_gc_files_.find(key) != delayed_gc_files_.end()) { + LOG(INFO) << "[Persistent Cache GC] Remove unused file: " << key << "."; + // 4. If this key has already be delayed for one gc period, remove it. + p_cache->ForceEvict(key); + } else { + LOG(INFO) << "[Persistent Cache GC] Add file: " << key << " to delayed gc files."; + // 5. Otherwise, it'll be add to delayed_gc_files, waiting for next gc process. + new_delayed_gc_files.emplace(key); } + } + + std::swap(delayed_gc_files_, new_delayed_gc_files); + p_cache->GarbageCollect(); + LOG(INFO) << "[Persistent Cache GC] Finished, cost: " << timer.ElapsedMicros() / 1000 << " ms."; +} + +void TabletNodeImpl::InitDfsReadThreadLimiter() { + auto thread_limit = + static_cast(FLAGS_dfs_read_thread_ratio * FLAGS_tera_tabletnode_read_thread_num); + thread_limit = + std::min(static_cast(FLAGS_tera_tabletnode_read_thread_num), thread_limit); + thread_limit = std::max(static_cast(1), thread_limit); + leveldb::DfsReadThreadLimiter::Instance().SetLimit(thread_limit); + LOG(INFO) << "Init dfs read thread limiter with " << thread_limit << ", total thread " + << FLAGS_tera_tabletnode_read_thread_num; } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/tabletnode_impl.h b/src/tabletnode/tabletnode_impl.h index 9b88e28b0..f33af375e 100644 --- a/src/tabletnode/tabletnode_impl.h +++ b/src/tabletnode/tabletnode_impl.h @@ -5,14 +5,16 @@ #ifndef TERA_TABLETNODE_TABLETNODE_IMPL_H_ #define TERA_TABLETNODE_TABLETNODE_IMPL_H_ -#include +#include #include +#include +#include +#include #include "common/base/scoped_ptr.h" #include "common/event.h" #include "common/metric/collector_report_publisher.h" #include "common/metric/metric_counter.h" -#include "common/thread.h" #include "common/thread_pool.h" #include "io/tablet_io.h" @@ -31,200 +33,225 @@ class TabletManager; class TabletNodeZkAdapterBase; class TabletNodeImpl { -public: - enum TabletNodeStatus { - kNotInited = kTabletNodeNotInited, - kIsIniting = kTabletNodeIsIniting, - kIsBusy = kTabletNodeIsBusy, - kIsReadonly = kTabletNodeIsReadonly, - kIsRunning = kTabletNodeIsRunning - }; - - struct WriteTabletTask { - std::vector row_mutation_vec; - std::vector row_status_vec; - std::vector row_index_vec; - std::shared_ptr row_done_counter; - - const WriteTabletRequest* request; - WriteTabletResponse* response; - google::protobuf::Closure* done; - WriteRpcTimer* timer; - - WriteTabletTask(const WriteTabletRequest* req, WriteTabletResponse* resp, - google::protobuf::Closure* d, WriteRpcTimer* t, std::shared_ptr c) - : row_done_counter(c), request(req), response(resp), done(d), timer(t) {} - }; - - TabletNodeImpl(); - ~TabletNodeImpl(); - - bool Init(); - - bool Exit(); - - void GarbageCollect(); - - void LoadTablet(const LoadTabletRequest* request, - LoadTabletResponse* response, - google::protobuf::Closure* done); - - bool UnloadTablet(const std::string& tablet_name, - const std::string& start, const std::string& end, - StatusCode* status); - - void UnloadTablet(const UnloadTabletRequest* request, - UnloadTabletResponse* response, - google::protobuf::Closure* done); - - void CompactTablet(const CompactTabletRequest* request, - CompactTabletResponse* response, - google::protobuf::Closure* done); + public: + enum TabletNodeStatus { + kNotInited = kTabletNodeNotInited, + kIsIniting = kTabletNodeIsIniting, + kIsBusy = kTabletNodeIsBusy, + kIsReadonly = kTabletNodeIsReadonly, + kIsRunning = kTabletNodeIsRunning + }; - void Update(const UpdateRequest* request, - UpdateResponse* response, - google::protobuf::Closure* done); + struct WriteTabletTask { + std::vector row_mutation_vec; + std::vector row_status_vec; + std::vector row_index_vec; + std::shared_ptr row_done_counter; - void ReadTablet(int64_t start_micros, - const ReadTabletRequest* request, - ReadTabletResponse* response, - google::protobuf::Closure* done); + const WriteTabletRequest* request; + WriteTabletResponse* response; + google::protobuf::Closure* done; + WriteRpcTimer* timer; - void WriteTablet(const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, - WriteRpcTimer* timer = NULL); + WriteTabletTask(const WriteTabletRequest* req, WriteTabletResponse* resp, + google::protobuf::Closure* d, WriteRpcTimer* t, std::shared_ptr c) + : row_done_counter(c), request(req), response(resp), done(d), timer(t) {} + }; - void ScanTablet(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done); + TabletNodeImpl(); + ~TabletNodeImpl(); - void CmdCtrl(const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, - google::protobuf::Closure* done); + bool Init(); - void Query(const QueryRequest* request, QueryResponse* response, - google::protobuf::Closure* done); + bool Exit(); - void SplitTablet(const SplitTabletRequest* request, - SplitTabletResponse* response, - google::protobuf::Closure* done); - - void ComputeSplitKey(const SplitTabletRequest* request, - SplitTabletResponse* response, + void GarbageCollect(); + void PersistentCacheGarbageCollect(const std::set& inherited_files, + const std::set& active_tablets); + + StatusCode QueryTabletStatus(const std::string& table_name, const std::string& key_start, + const std::string& key_end); + + void LoadTablet(const LoadTabletRequest* request, LoadTabletResponse* response); + + bool UnloadTablet(const std::string& tablet_name, const std::string& start, + const std::string& end, StatusCode* status); + + void UnloadTablet(const UnloadTabletRequest* request, UnloadTabletResponse* response); + + void CompactTablet(const CompactTabletRequest* request, CompactTabletResponse* response, google::protobuf::Closure* done); - void EnterSafeMode(); - void LeaveSafeMode(); - void ExitService(); - - void SetTabletNodeStatus(const TabletNodeStatus& status); - TabletNodeStatus GetTabletNodeStatus(); - - void SetRootTabletAddr(const std::string& root_tablet_addr); - - void SetSessionId(const std::string& session_id); - std::string GetSessionId(); - - TabletNodeSysInfo& GetSysInfo(); - - void RefreshSysInfo(); - - void GetBackgroundErrors(std::vector* background_errors); - - void TryReleaseMallocCache(); - - void RefreshLevelSize(); - -private: - // call this when fail to write TabletIO - void WriteTabletFail(WriteTabletTask* tablet_task, StatusCode status); - - // write callback for TabletIO::Write() - void WriteTabletCallback(WriteTabletTask* tablet_task, - std::vector* row_mutation_vec, - std::vector* status_vec); - - bool CheckInKeyRange(const KeyList& key_list, - const std::string& key_start, - const std::string& key_end); - bool CheckInKeyRange(const KeyValueList& pair_list, - const std::string& key_start, - const std::string& key_end); - bool CheckInKeyRange(const RowMutationList& row_list, - const std::string& key_start, - const std::string& key_end); - bool CheckInKeyRange(const RowReaderList& reader_list, - const std::string& key_start, - const std::string& key_end); - - - void InitCacheSystem(); - - void ReleaseMallocCache(); - void EnableReleaseMallocCacheTimer(int32_t expand_factor = 1); - void DisableReleaseMallocCacheTimer(); - - void RefreshTabletsStatus(); - - void GetInheritedLiveFiles(std::vector* inherited); - void GetInheritedLiveFiles(std::vector& inherited); - - void GarbageCollectInPath(const std::string& path, leveldb::Env* env, - const std::set& inherited_files, - const std::set active_tablets); - - bool ApplySchema(const UpdateRequest* request); - - void UnloadTabletProc(io::TabletIO* tablet_io, Counter* worker_count); - -private: - mutable Mutex status_mutex_; - TabletNodeStatus status_; - Mutex mutex_; - bool running_; - - scoped_ptr tablet_manager_; - scoped_ptr zk_adapter_; - - uint64_t this_sequence_id_; - std::string local_addr_; - std::string root_tablet_addr_; - std::string session_id_; - int64_t release_cache_timer_id_; - - TabletNodeSysInfo sysinfo_; - std::vector level_size_; - - // do some tablets health check with a timer - common::Thread tablet_healthcheck_thread_; - // Exit() called should set this event - common::AutoResetEvent exit_event_; - - scoped_ptr thread_pool_; - - leveldb::Logger* ldb_logger_; - leveldb::Cache* ldb_block_cache_; - leveldb::Cache* m_memory_cache; - leveldb::TableCache* ldb_table_cache_; - - // metric for caches - struct CacheMetrics { - tera::AutoCollectorRegister block_cache_hitrate_; - tera::AutoCollectorRegister block_cache_entries_; - tera::AutoCollectorRegister block_cache_charge_; - - tera::AutoCollectorRegister table_cache_hitrate_; - tera::AutoCollectorRegister table_cache_entries_; - tera::AutoCollectorRegister table_cache_charge_; - - CacheMetrics(leveldb::Cache* block_cache, leveldb::TableCache* table_cache); - }; - - scoped_ptr cache_metrics_; - scoped_ptr snappy_ratio_metric_; + void Update(const UpdateRequest* request, UpdateResponse* response, + google::protobuf::Closure* done); + + void ReadTablet(int64_t start_micros, const ReadTabletRequest* request, + ReadTabletResponse* response, google::protobuf::Closure* done, + ThreadPool* read_thread_pool); + + void WriteTablet(const WriteTabletRequest* request, WriteTabletResponse* response, + google::protobuf::Closure* done, WriteRpcTimer* timer = NULL); + + void ScanTablet(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done); + + void CmdCtrl(const TsCmdCtrlRequest* request, TsCmdCtrlResponse* response, + google::protobuf::Closure* done); + + void Query(const QueryRequest* request, QueryResponse* response, google::protobuf::Closure* done); + + void ComputeSplitKey(const SplitTabletRequest* request, SplitTabletResponse* response, + google::protobuf::Closure* done); + + void EnterSafeMode(); + void LeaveSafeMode(); + void ExitService(); + + void SetTabletNodeStatus(const TabletNodeStatus& status); + TabletNodeStatus GetTabletNodeStatus(); + + void SetRootTabletAddr(const std::string& root_tablet_addr); + + void SetSessionId(const std::string& session_id); + std::string GetSessionId(); + + TabletNodeSysInfo& GetSysInfo(); + + void RefreshAndDumpSysInfo(); + + void GetBackgroundErrors(std::vector* background_errors); + + void TryReleaseMallocCache(); + + void RefreshLevelSize(); + + private: + // call this when fail to write TabletIO + void WriteTabletFail(WriteTabletTask* tablet_task, StatusCode status); + + // write callback for TabletIO::Write() + void WriteTabletCallback(WriteTabletTask* tablet_task, + std::vector* row_mutation_vec, + std::vector* status_vec); + + bool CheckInKeyRange(const KeyList& key_list, const std::string& key_start, + const std::string& key_end); + bool CheckInKeyRange(const KeyValueList& pair_list, const std::string& key_start, + const std::string& key_end); + bool CheckInKeyRange(const RowMutationList& row_list, const std::string& key_start, + const std::string& key_end); + bool CheckInKeyRange(const RowReaderList& reader_list, const std::string& key_start, + const std::string& key_end); + + bool InitCacheSystem(); + void InitDfsReadThreadLimiter(); + + void ReleaseMallocCache(); + void EnableReleaseMallocCacheTimer(int32_t expand_factor = 1); + void DisableReleaseMallocCacheTimer(); + + void RefreshTabletsStatus(); + + void GetInheritedLiveFiles(std::vector* inherited); + void GetInheritedLiveFiles(std::vector& inherited); + + void GarbageCollectInPath(const std::string& path, leveldb::Env* env, + const std::set& inherited_files, + const std::set& active_tablets); + + bool ApplySchema(const UpdateRequest* request); + + void UnloadTabletProc(io::TabletIO* tablet_io, Counter* worker_count); + + private: + mutable Mutex status_mutex_; + TabletNodeStatus status_; + Mutex mutex_; + bool running_; + + std::shared_ptr tablet_manager_; + scoped_ptr zk_adapter_; + + uint64_t this_sequence_id_; + std::string local_addr_; + std::string root_tablet_addr_; + std::string session_id_; + int64_t release_cache_timer_id_; + + TabletNodeSysInfo sysinfo_; + std::vector level_size_; + + // do some tablets health check with a timer + std::thread tablet_healthcheck_thread_; + // Exit() called should set this event + common::AutoResetEvent exit_event_; + + scoped_ptr thread_pool_; + + leveldb::Logger* ldb_logger_; + leveldb::Cache* ldb_block_cache_; + leveldb::Cache* m_memory_cache; + leveldb::TableCache* ldb_table_cache_; + + // metric for caches + struct CacheMetrics { + tera::AutoCollectorRegister block_cache_hitrate_; + tera::AutoCollectorRegister block_cache_entries_; + tera::AutoCollectorRegister block_cache_charge_; + + tera::AutoCollectorRegister table_cache_hitrate_; + tera::AutoCollectorRegister table_cache_entries_; + tera::AutoCollectorRegister table_cache_charge_; + + CacheMetrics(leveldb::Cache* block_cache, leveldb::TableCache* table_cache); + }; + + scoped_ptr cache_metrics_; + scoped_ptr snappy_ratio_metric_; + // persistent cache's garbage files will be delayed for one gc period before remove. + std::unordered_set delayed_gc_files_; +}; + +class ReadTabletTask : public std::enable_shared_from_this { + public: + using RowResults = std::vector>; + ReadTabletTask(int64_t start_micros, std::shared_ptr tablet_manager, + const ReadTabletRequest* request, ReadTabletResponse* response, + google::protobuf::Closure* done, ThreadPool* read_thread_pool); + + void StartRead(); + + private: + struct ShardRequest { + int64_t offset; + int64_t row_num; + RowResults* row_results; + ShardRequest(int64_t off, int64_t r_num, RowResults* r_results) + : offset(off), row_num(r_num), row_results(r_results) {} + }; + + void DoRead(std::shared_ptr shard_req); + void FinishShardRequest(const std::shared_ptr& shard_req); + + private: + std::shared_ptr tablet_manager_; + const ReadTabletRequest* request_; + ReadTabletResponse* response_; + google::protobuf::Closure* done_; + ThreadPool* read_thread_pool_; + + Counter finished_; + Counter read_success_num_; + std::atomic has_timeout_{false}; + + int64_t end_time_ms_; + uint64_t snapshot_id_; + int32_t total_row_num_; + + std::vector row_results_list_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_IMPL_H_ +#endif // TERA_TABLETNODE_TABLETNODE_IMPL_H_ diff --git a/src/tabletnode/tabletnode_metric_name.h b/src/tabletnode/tabletnode_metric_name.h index 9aad5f661..d860bffcc 100644 --- a/src/tabletnode/tabletnode_metric_name.h +++ b/src/tabletnode/tabletnode_metric_name.h @@ -28,6 +28,7 @@ const char* const kEnvLabelOther = "env:other"; const char* const kRequestCountMetric = "tera_ts_request_count"; const char* const kPendingCountMetric = "tera_ts_pending_count"; const char* const kRejectCountMetric = "tera_ts_reject_count"; +const char* const kQuotaRejectCountMetric = "tera_ts_quota_reject_count"; const char* const kErrorCountMetric = "tera_ts_error_count"; const char* const kRangeErrorMetric = "tera_ts_range_error_count"; @@ -36,6 +37,7 @@ const char* const kRowCountMetric = "tera_ts_row_count"; const char* const kRowThroughPutMetric = "tera_ts_row_through_put"; const char* const kLowLevelReadMetric = "tera_ts_low_level_read"; const char* const kScanDropCountMetric = "tera_ts_scan_drop_count"; +const char* const kScanFilterCountMetric = "tera_ts_scan_filter_count"; const char* const kRequestDelayMetric = "tera_ts_request_delay_us_total"; const char* const kFinishedRequestCountMetric = "tera_ts_finished_request_count"; @@ -127,10 +129,10 @@ const char* const kSnappyCompressionRatioMetric = "tera_ts_snappy_compression_pe const char* const kNotReadyCountMetric = "tera_ts_not_ready_count"; const char* const kTabletSizeCounter = "tera_ts_tablet_size_count"; const char* const kTabletNumCounter = "tera_ts_tablet_num_count"; -} // end namespace tabletnode -} // end namespace tera +const char* const kMemTableSize = "tera_ts_mem_table_size"; +} // end namespace tabletnode +} // end namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_METRIC_NAME_H_ +#endif // TERA_TABLETNODE_TABLETNODE_METRIC_NAME_H_ /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ - diff --git a/src/tabletnode/tabletnode_sysinfo.cc b/src/tabletnode/tabletnode_sysinfo.cc index 16519ae13..3b8fc0feb 100644 --- a/src/tabletnode/tabletnode_sysinfo.cc +++ b/src/tabletnode/tabletnode_sysinfo.cc @@ -22,14 +22,18 @@ #include "common/this_thread.h" #include "tabletnode/tabletnode_sysinfo.h" #include "proto/proto_helper.h" +#include "quota/ts_write_flow_controller.h" #include "tabletnode/tabletnode_metric_name.h" #include "utils/tprinter.h" #include "utils/utils_cmd.h" +#include "io/utils_leveldb.h" +#include "leveldb/persistent_cache.h" DECLARE_bool(tera_tabletnode_dump_running_info); DECLARE_bool(tera_tabletnode_dump_level_size_info_enabled); DECLARE_string(tera_tabletnode_running_info_dump_file); DECLARE_int64(tera_tabletnode_sysinfo_check_interval); +DECLARE_bool(tera_enable_persistent_cache); namespace leveldb { extern tera::Counter rawkey_compare_counter; @@ -104,914 +108,1180 @@ extern tera::Counter ssd_write_counter; extern tera::Counter ssd_write_size_counter; } - namespace tera { namespace tabletnode { - // dfs metrics -tera::AutoCollectorRegister dfs_read_size_metric(kDfsReadBytesThroughPut, - std::unique_ptr(new CounterCollector(&leveldb::dfs_read_size_counter, true)), {SubscriberType::THROUGHPUT}); -tera::AutoCollectorRegister dfs_write_size_metric(kDfsWriteBytesThroughPut, - std::unique_ptr(new CounterCollector(&leveldb::dfs_write_size_counter, true)), {SubscriberType::THROUGHPUT}); - -tera::AutoCollectorRegister dfs_read_metric(kDfsRequestMetric, kDfsReadLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_read_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_write_metric(kDfsRequestMetric, kDfsWriteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_write_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_sync_metric(kDfsRequestMetric, kDfsSyncLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_flush_metric(kDfsRequestMetric, kDfsFlushLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_list_metric(kDfsRequestMetric, kDfsListLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_list_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_exists_metric(kDfsRequestMetric, kDfsExistsLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_open_metric(kDfsRequestMetric, kDfsOpenLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_open_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_close_metric(kDfsRequestMetric, kDfsCloseLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_close_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_delete_metric(kDfsRequestMetric, kDfsDeleteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_tell_metric(kDfsRequestMetric, kDfsTellLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_info_metric(kDfsRequestMetric, kDfsInfoLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_info_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_other_metric(kDfsRequestMetric, kDfsOtherLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_other_counter)), {SubscriberType::QPS}); - -tera::AutoCollectorRegister dfs_read_error_counter(kDfsErrorMetric, kDfsReadLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_read_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_write_error_counter(kDfsErrorMetric, kDfsWriteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_write_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_sync_error_counter(kDfsErrorMetric, kDfsSyncLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_flush_error_counter(kDfsErrorMetric, kDfsFlushLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_list_error_counter(kDfsErrorMetric, kDfsListLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_list_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_exists_error_counter(kDfsErrorMetric, kDfsExistsLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_open_error_counter(kDfsErrorMetric, kDfsOpenLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_open_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_close_error_counter(kDfsErrorMetric, kDfsCloseLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_close_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_delete_error_counter(kDfsErrorMetric, kDfsDeleteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_tell_error_counter(kDfsErrorMetric, kDfsTellLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_info_error_counter(kDfsErrorMetric, kDfsInfoLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_info_error_counter)), {SubscriberType::QPS}); -tera::AutoCollectorRegister dfs_other_error_counter(kDfsErrorMetric, kDfsOtherLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_other_error_counter)), {SubscriberType::QPS}); - -tera::AutoCollectorRegister dfs_read_delay_metric(kDfsReadDelayMetric, +tera::AutoCollectorRegister dfs_read_size_metric( + kDfsReadBytesThroughPut, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_size_counter, true)), + {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister dfs_write_size_metric( + kDfsWriteBytesThroughPut, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_size_counter, true)), + {SubscriberType::THROUGHPUT}); + +tera::AutoCollectorRegister dfs_read_metric( + kDfsRequestMetric, kDfsReadLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_write_metric( + kDfsRequestMetric, kDfsWriteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_sync_metric( + kDfsRequestMetric, kDfsSyncLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_flush_metric( + kDfsRequestMetric, kDfsFlushLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_list_metric( + kDfsRequestMetric, kDfsListLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_list_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_exists_metric( + kDfsRequestMetric, kDfsExistsLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_open_metric( + kDfsRequestMetric, kDfsOpenLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_open_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_close_metric( + kDfsRequestMetric, kDfsCloseLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_close_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_delete_metric( + kDfsRequestMetric, kDfsDeleteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_tell_metric( + kDfsRequestMetric, kDfsTellLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_info_metric( + kDfsRequestMetric, kDfsInfoLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_info_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_other_metric( + kDfsRequestMetric, kDfsOtherLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_other_counter)), + {SubscriberType::QPS}); + +tera::AutoCollectorRegister dfs_read_error_counter( + kDfsErrorMetric, kDfsReadLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_write_error_counter( + kDfsErrorMetric, kDfsWriteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_sync_error_counter( + kDfsErrorMetric, kDfsSyncLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_flush_error_counter( + kDfsErrorMetric, kDfsFlushLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_list_error_counter( + kDfsErrorMetric, kDfsListLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_list_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_exists_error_counter( + kDfsErrorMetric, kDfsExistsLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_open_error_counter( + kDfsErrorMetric, kDfsOpenLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_open_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_close_error_counter( + kDfsErrorMetric, kDfsCloseLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_close_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_delete_error_counter( + kDfsErrorMetric, kDfsDeleteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_tell_error_counter( + kDfsErrorMetric, kDfsTellLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_info_error_counter( + kDfsErrorMetric, kDfsInfoLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_info_error_counter)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_other_error_counter( + kDfsErrorMetric, kDfsOtherLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_other_error_counter)), + {SubscriberType::QPS}); + +tera::AutoCollectorRegister dfs_read_delay_metric( + kDfsReadDelayMetric, std::unique_ptr(new CounterCollector(&leveldb::dfs_read_delay_counter, true)), {}); -tera::AutoCollectorRegister dfs_write_delay_metric(kDfsWriteDelayMetric, +tera::AutoCollectorRegister dfs_write_delay_metric( + kDfsWriteDelayMetric, std::unique_ptr(new CounterCollector(&leveldb::dfs_write_delay_counter, true)), {}); -tera::AutoCollectorRegister dfs_sync_delay_metric(kDfsSyncDelayMetric, +tera::AutoCollectorRegister dfs_sync_delay_metric( + kDfsSyncDelayMetric, std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_delay_counter, true)), {}); -tera::AutoSubscriberRegister dfs_read_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( - MetricId(kDfsReadDelayPerRequestMetric), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsReadDelayMetric), SubscriberType::SUM)), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsRequestMetric, kDfsReadLabel), SubscriberType::SUM))))); - -tera::AutoSubscriberRegister dfs_write_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( - MetricId(kDfsWriteDelayPerRequestMetric), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsWriteDelayMetric), SubscriberType::SUM)), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsRequestMetric, kDfsWriteLabel), SubscriberType::SUM))))); - -tera::AutoSubscriberRegister dfs_sync_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( - MetricId(kDfsSyncDelayPerRequestMetric), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsSyncDelayMetric), SubscriberType::SUM)), - std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsRequestMetric, kDfsSyncLabel), SubscriberType::SUM))))); - -tera::AutoCollectorRegister dfs_read_hang_metric(kDfsHangMetric, kDfsReadLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_read_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_write_hang_metric(kDfsHangMetric, kDfsWriteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_write_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_sync_hang_metric(kDfsHangMetric, kDfsSyncLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_flush_hang_metric(kDfsHangMetric, kDfsFlushLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_list_hang_metric(kDfsHangMetric, kDfsListLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_list_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_exists_hang_metric(kDfsHangMetric, kDfsExistsLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_open_hang_metric(kDfsHangMetric, kDfsOpenLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_open_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_close_hang_metric(kDfsHangMetric, kDfsCloseLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_close_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_delete_hang_metric(kDfsHangMetric, kDfsDeleteLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_tell_hang_metric(kDfsHangMetric, kDfsTellLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_info_hang_metric(kDfsHangMetric, kDfsInfoLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_info_hang_counter)), {SubscriberType::SUM}); -tera::AutoCollectorRegister dfs_other_hang_metric(kDfsHangMetric, kDfsOtherLabel, - std::unique_ptr(new CounterCollector(&leveldb::dfs_other_hang_counter)), {SubscriberType::SUM}); - -tera::AutoCollectorRegister dfs_opened_read_files_metric(kDfsOpenedReadFilesCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::dfs_opened_read_files_counter, false))); -tera::AutoCollectorRegister dfs_opened_write_files_metric(kDfsOpenedWriteFilesCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::dfs_opened_write_files_counter, false))); +tera::AutoSubscriberRegister dfs_read_delay_avg_subscriber(std::unique_ptr( + new RatioSubscriber(MetricId(kDfsReadDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsReadDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsRequestMetric, kDfsReadLabel), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister dfs_write_delay_avg_subscriber(std::unique_ptr( + new RatioSubscriber(MetricId(kDfsWriteDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsWriteDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsRequestMetric, kDfsWriteLabel), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister dfs_sync_delay_avg_subscriber(std::unique_ptr( + new RatioSubscriber(MetricId(kDfsSyncDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsSyncDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber( + MetricId(kDfsRequestMetric, kDfsSyncLabel), SubscriberType::SUM))))); + +tera::AutoCollectorRegister dfs_read_hang_metric( + kDfsHangMetric, kDfsReadLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_write_hang_metric( + kDfsHangMetric, kDfsWriteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_sync_hang_metric( + kDfsHangMetric, kDfsSyncLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_flush_hang_metric( + kDfsHangMetric, kDfsFlushLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_list_hang_metric( + kDfsHangMetric, kDfsListLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_list_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_exists_hang_metric( + kDfsHangMetric, kDfsExistsLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_open_hang_metric( + kDfsHangMetric, kDfsOpenLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_open_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_close_hang_metric( + kDfsHangMetric, kDfsCloseLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_close_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_delete_hang_metric( + kDfsHangMetric, kDfsDeleteLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_tell_hang_metric( + kDfsHangMetric, kDfsTellLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_info_hang_metric( + kDfsHangMetric, kDfsInfoLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_info_hang_counter)), + {SubscriberType::SUM}); +tera::AutoCollectorRegister dfs_other_hang_metric( + kDfsHangMetric, kDfsOtherLabel, + std::unique_ptr(new CounterCollector(&leveldb::dfs_other_hang_counter)), + {SubscriberType::SUM}); + +tera::AutoCollectorRegister dfs_opened_read_files_metric( + kDfsOpenedReadFilesCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_opened_read_files_counter, + false))); +tera::AutoCollectorRegister dfs_opened_write_files_metric( + kDfsOpenedWriteFilesCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_opened_write_files_counter, + false))); // ssd metrics -tera::AutoCollectorRegister ssd_read_through_put_metric(kSsdReadThroughPutMetric, - std::unique_ptr(new CounterCollector(&leveldb::ssd_read_size_counter, true)), {SubscriberType::THROUGHPUT}); -tera::AutoCollectorRegister ssd_write_through_put_metric(kSsdWriteThroughPutMetric, - std::unique_ptr(new CounterCollector(&leveldb::ssd_write_size_counter, true)), {SubscriberType::THROUGHPUT}); -tera::AutoCollectorRegister ssd_read_metric(kSsdReadCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::ssd_read_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister ssd_write_metric(kSsdWriteCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::ssd_write_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister ssd_read_through_put_metric( + kSsdReadThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_read_size_counter, true)), + {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister ssd_write_through_put_metric( + kSsdWriteThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_write_size_counter, true)), + {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister ssd_read_metric( + kSsdReadCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_read_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister ssd_write_metric( + kSsdWriteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_write_counter, true)), + {SubscriberType::QPS}); // local metrics -tera::AutoCollectorRegister posix_read_size_metric(kPosixReadThroughPutMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_read_size_counter, true)), {SubscriberType::THROUGHPUT}); -tera::AutoCollectorRegister posix_write_size_metric(kPosixWriteThroughPutMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_write_size_counter, true)), {SubscriberType::THROUGHPUT}); -tera::AutoCollectorRegister posix_read_metric(kPosixReadCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_read_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_write_metric(kPosixWriteCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_write_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_sync_metric(kPosixSyncCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_sync_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_list_metric(kPosixListCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_list_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_exists_metric(kPosixExistsCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_exists_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_open_metric(kPosixOpenCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_open_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_close_metric(kPosixCloseCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_close_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_delete_metric(kPosixDeleteCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_delete_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_tell_metric(kPosixTellCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_tell_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_seek_metric(kPosixSeekCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_seek_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_info_metric(kPosixInfoCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_info_counter, true)), {SubscriberType::QPS}); -tera::AutoCollectorRegister posix_other_metric(kPosixOtherCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::posix_other_counter, true)), {SubscriberType::QPS}); - -tera::AutoCollectorRegister rawkey_compare_metric(kRawkeyCompareCountMetric, - std::unique_ptr(new CounterCollector(&leveldb::rawkey_compare_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_read_size_metric( + kPosixReadThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_read_size_counter, true)), + {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister posix_write_size_metric( + kPosixWriteThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_write_size_counter, true)), + {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister posix_read_metric( + kPosixReadCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_read_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_write_metric( + kPosixWriteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_write_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_sync_metric( + kPosixSyncCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_sync_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_list_metric( + kPosixListCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_list_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_exists_metric( + kPosixExistsCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_exists_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_open_metric( + kPosixOpenCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_open_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_close_metric( + kPosixCloseCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_close_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_delete_metric( + kPosixDeleteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_delete_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_tell_metric( + kPosixTellCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_tell_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_seek_metric( + kPosixSeekCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_seek_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_info_metric( + kPosixInfoCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_info_counter, true)), + {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_other_metric( + kPosixOtherCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_other_counter, true)), + {SubscriberType::QPS}); + +tera::AutoCollectorRegister rawkey_compare_metric( + kRawkeyCompareCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::rawkey_compare_counter, true)), + {SubscriberType::QPS}); tera::MetricCounter not_ready_counter(kNotReadyCountMetric, {SubscriberType::LATEST}, false); tera::MetricCounter ts_tablet_size_counter(kTabletSizeCounter, {SubscriberType::LATEST}, false); tera::MetricCounter ts_tablet_num_counter(kTabletNumCounter, {SubscriberType::LATEST}, false); +tera::MetricCounter mem_table_size(kMemTableSize, {SubscriberType::LATEST}, false); class TabletNodeSysInfoDumper { -public: - TabletNodeSysInfoDumper(const std::string& filename) : - filename_(filename), fp_(NULL) { - - } - ~TabletNodeSysInfoDumper() { - if (fp_) { - fclose(fp_); - fp_ = NULL; - } + public: + explicit TabletNodeSysInfoDumper(const std::string& filename) + : filename_(filename), + fp_(NULL, [](FILE* f) { + if (f) { + fclose(f); + } + }) { + std::string dirname = filename_.substr(0, filename_.rfind('/')); + int ret = mkdir(dirname.c_str(), 0755); + if (ret != 0 && errno != EEXIST) { + LOG(ERROR) << "fail to make dump dir " << dirname; } - template - bool DumpData(const std::string& item_name, T data) { - if (!fp_) { - std::string dirname = filename_.substr(0, filename_.rfind('/')); - int ret = mkdir(dirname.c_str(), 0755); - if (ret != 0 && errno != EEXIST) { - LOG(ERROR) << "fail to make dump dir " << dirname; - return false; - } - fp_ = fopen(filename_.c_str(), "w"); - if (!fp_) { - LOG(ERROR) << "fail to open dump file " << filename_; - return false; - } - } - std::stringstream ss; - ss << item_name << " : " << data; - fprintf(fp_, "%s\r\n", ss.str().c_str()); - return true; + auto tmp_file = fopen(filename_.c_str(), "w"); + if (!tmp_file) { + LOG(ERROR) << "fail to open dump file " << filename_; } -private: - std::string filename_; - FILE* fp_; + fp_.reset(tmp_file); + } + ~TabletNodeSysInfoDumper() = default; + + template + bool DumpData(const std::string& item_name, T data) const { + std::stringstream ss; + ss << item_name << " : " << data; + fprintf(fp_.get(), "%s\r\n", ss.str().c_str()); + return true; + } + + private: + std::string filename_; + std::unique_ptr> fp_; }; -TabletNodeSysInfo::TabletNodeSysInfo() { -} - -TabletNodeSysInfo::TabletNodeSysInfo(const TabletNodeInfo& info) - : info_(info) { +TabletNodeSysInfo::TabletNodeSysInfo() + : info_{new TabletNodeInfo}, tablet_list_{new TabletMetaList} { + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpSysInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpHardWareInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpIoInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpCacheInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpRequestInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpDfsInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpPosixInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpLevelSizeInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpPersistentCacheInfo); + RegisterDumpInfoFunction(&TabletNodeSysInfo::DumpOtherInfo); } TabletNodeSysInfo::~TabletNodeSysInfo() {} void TabletNodeSysInfo::AddExtraInfo(const std::string& name, int64_t value) { - MutexLock lock(&mutex_); - ExtraTsInfo* e_info = info_.add_extra_info(); - e_info->set_name(name); - e_info->set_value(value); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + ExtraTsInfo* e_info = info_->add_extra_info(); + e_info->set_name(name); + e_info->set_value(value); } void TabletNodeSysInfo::SetProcessStartTime(int64_t ts) { - MutexLock lock(&mutex_); - info_.set_process_start_time(ts); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + info_->set_process_start_time(ts); } void TabletNodeSysInfo::SetTimeStamp(int64_t ts) { - MutexLock lock(&mutex_); - info_.set_timestamp(ts); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + info_->set_timestamp(ts); } struct DBSize { - uint64_t size; - std::vector lg_size; + uint64_t size; + std::vector lg_size; }; void TabletNodeSysInfo::RefreshTabletsStatus(TabletManager* tablet_manager) { - std::vector tablet_ios; - tablet_manager->GetAllTablets(&tablet_ios); - std::vector::iterator it = tablet_ios.begin(); - while (it != tablet_ios.end()) { - io::TabletIO* tablet_io = *it; - if (tablet_io->ShouldForceUnloadOnError()) { - LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; - StatusCode status; - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << *tablet_io << ", Unload tablet failed, status: " - << StatusCodeToString(status); - } - if (!tablet_manager->RemoveTablet(tablet_io->GetTableName(), - tablet_io->GetStartKey(), tablet_io->GetEndKey(), &status)) { - LOG(ERROR) << *tablet_io << ", remove from TabletManager failed, status: " - << StatusCodeToString(status); - } - tablet_io->DecRef(); - it = tablet_ios.erase(it); - continue; - } - - // sham random sleep (0-999 ms) to relieve nfs pressure - ThisThread::Sleep(get_millis() % 1000); - // refresh db status whether is corruption - tablet_io->RefreshDBStatus(); - tablet_io->DecRef(); - ++it; + std::vector tablet_ios; + tablet_manager->GetAllTablets(&tablet_ios); + std::vector::iterator it = tablet_ios.begin(); + while (it != tablet_ios.end()) { + io::TabletIO* tablet_io = *it; + if (tablet_io->ShouldForceUnloadOnError()) { + LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << *tablet_io + << ", Unload tablet failed, status: " << StatusCodeToString(status); + } + if (!tablet_manager->RemoveTablet(tablet_io->GetTableName(), tablet_io->GetStartKey(), + tablet_io->GetEndKey(), &status)) { + LOG(ERROR) << *tablet_io + << ", remove from TabletManager failed, status: " << StatusCodeToString(status); + } + tablet_io->DecRef(); + it = tablet_ios.erase(it); + continue; } + + // sham random sleep (0-999 ms) to relieve nfs pressure + ThisThread::Sleep(get_millis() % 1000); + // refresh db status whether is corruption + tablet_io->RefreshDBStatus(); + tablet_io->DecRef(); + ++it; + } } void TabletNodeSysInfo::CollectTabletNodeInfo(TabletManager* tablet_manager, const string& server_addr) { - std::vector tablet_ios; - std::vector db_status_vec; - std::vector db_size_vec; - - tablet_manager->GetAllTablets(&tablet_ios); - ts_tablet_num_counter.Set(tablet_ios.size()); - - std::vector::iterator it = tablet_ios.begin(); - while (it != tablet_ios.end()) { - io::TabletIO* tablet_io = *it; - if (tablet_io->ShouldForceUnloadOnError()) { - LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; - StatusCode status; - if (!tablet_io->Unload(&status)) { - LOG(ERROR) << *tablet_io << ", Unload tablet failed, status: " - << StatusCodeToString(status); - } - if (!tablet_manager->RemoveTablet(tablet_io->GetTableName(), - tablet_io->GetStartKey(), tablet_io->GetEndKey(), &status)) { - LOG(ERROR) << *tablet_io << ", remove from TabletManager failed, status: " - << StatusCodeToString(status); - } - tablet_io->DecRef(); - it = tablet_ios.erase(it); - continue; - } - - // check db status whether is corruption - TabletMeta::TabletStatus tablet_status = static_cast(kTabletReady); - tablet_io->GetDBStatus(&tablet_status); - db_status_vec.push_back(tablet_status); - - DBSize db_size; - tablet_io->GetDataSize(&db_size.size, &db_size.lg_size); - db_size_vec.push_back(db_size); - - ++it; + std::vector tablet_ios; + std::vector db_status_vec; + std::vector db_size_vec; + + tablet_manager->GetAllTablets(&tablet_ios); + ts_tablet_num_counter.Set(tablet_ios.size()); + + std::vector::iterator it = tablet_ios.begin(); + while (it != tablet_ios.end()) { + io::TabletIO* tablet_io = *it; + if (tablet_io->ShouldForceUnloadOnError()) { + LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << *tablet_io + << ", Unload tablet failed, status: " << StatusCodeToString(status); + } + if (!tablet_manager->RemoveTablet(tablet_io->GetTableName(), tablet_io->GetStartKey(), + tablet_io->GetEndKey(), &status)) { + LOG(ERROR) << *tablet_io + << ", remove from TabletManager failed, status: " << StatusCodeToString(status); + } + tablet_io->DecRef(); + it = tablet_ios.erase(it); + continue; } - MutexLock lock(&mutex_); - std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - int64_t interval = latest_report->interval_ms; - if (interval <= 0) { - // maybe happen at first report, the metric values must be 0 - // set to any non-zero value to avoid div 0 - VLOG(16) << "Metric Report interval is 0"; - interval = 1000; + // check db status whether is corruption + TabletMeta::TabletStatus tablet_status = static_cast(kTabletReady); + tablet_io->GetDBStatus(&tablet_status); + db_status_vec.push_back(tablet_status); + + DBSize db_size; + uint64_t tmp_mem_table_size{0}; + tablet_io->GetDataSize(&db_size.size, &db_size.lg_size, &tmp_mem_table_size); + mem_table_size.Set((int64_t)tmp_mem_table_size); + db_size_vec.push_back(db_size); + + ++it; + } + + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + std::shared_ptr latest_report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + int64_t interval = latest_report->interval_ms; + if (interval <= 0) { + // maybe happen at first report, the metric values must be 0 + // set to any non-zero value to avoid div 0 + VLOG(16) << "Metric Report interval is 0"; + interval = 1000; + } + + tablet_list_->Clear(); + int64_t total_size = 0; + int64_t scan_kvs = 0; + int64_t read_kvs = 0; + int64_t write_kvs = 0; + int64_t busy_cnt = 0; + int64_t not_ready = 0; + int64_t db_corruption_cnt = 0; + + for (uint32_t i = 0; i < tablet_ios.size(); i++) { + io::TabletIO* tablet_io = tablet_ios[i]; + TabletMeta::TabletStatus tablet_status = db_status_vec[i]; + DBSize db_size = db_size_vec[i]; + + TabletMeta* tablet_meta = tablet_list_->add_meta(); + tablet_meta->set_status(TabletMeta::TabletStatus(tablet_io->GetStatus())); + if (tablet_meta->status() != TabletMeta::kTabletReady) { + ++not_ready; } - - tablet_list_.Clear(); - int64_t total_size = 0; - int64_t scan_kvs = 0; - int64_t read_kvs = 0; - int64_t write_kvs = 0; - int64_t busy_cnt = 0; - int64_t not_ready = 0; - int64_t db_corruption_cnt = 0; - - for (uint32_t i = 0; i < tablet_ios.size(); i++) { - io::TabletIO* tablet_io = tablet_ios[i]; - TabletMeta::TabletStatus tablet_status = db_status_vec[i]; - DBSize db_size = db_size_vec[i]; - - TabletMeta* tablet_meta = tablet_list_.add_meta(); - tablet_meta->set_status(TabletMeta::TabletStatus(tablet_io->GetStatus())); - if (tablet_meta->status() != TabletMeta::kTabletReady) { - ++ not_ready; - } - tablet_meta->set_server_addr(server_addr); - tablet_meta->set_table_name(tablet_io->GetTableName()); - tablet_meta->set_path(tablet_io->GetTablePath()); - tablet_meta->mutable_key_range()->set_key_start(tablet_io->GetStartKey()); - tablet_meta->mutable_key_range()->set_key_end(tablet_io->GetEndKey()); - - tablet_meta->set_size(db_size.size); - for (size_t i = 0; i < db_size.lg_size.size(); ++i) { - tablet_meta->add_lg_size(db_size.lg_size[i]); - } - tablet_meta->set_compact_status(tablet_io->GetCompactStatus()); - total_size += tablet_meta->size(); - - TabletCounter* counter = tablet_list_.add_counter(); - const std::string& label_str = tablet_io->GetMetricLabel(); - counter->set_low_read_cell(latest_report->FindMetricValue(kLowReadCellMetricName, label_str)); - counter->set_scan_rows(latest_report->FindMetricValue(kScanRowsMetricName, label_str)); - counter->set_scan_kvs(latest_report->FindMetricValue(kScanKvsMetricName, label_str)); - counter->set_scan_size(latest_report->FindMetricValue(kScanThroughPutMetricName, label_str)); - counter->set_read_rows(latest_report->FindMetricValue(kReadRowsMetricName, label_str)); - counter->set_read_kvs(latest_report->FindMetricValue(kReadKvsMetricName, label_str)); - counter->set_read_size(latest_report->FindMetricValue(kReadThroughPutMetricName, label_str)); - counter->set_write_rows(latest_report->FindMetricValue(kWriteRowsMetricName, label_str)); - counter->set_write_kvs(latest_report->FindMetricValue(kWriteKvsMetricName, label_str)); - counter->set_write_size(latest_report->FindMetricValue(kWriteThroughPutMetricName, label_str)); - counter->set_is_on_busy(tablet_io->IsBusy()); - double write_workload = 0; - tablet_io->Workload(&write_workload); - counter->set_write_workload(write_workload); - counter->set_db_status(tablet_status); // set runtime counter - - scan_kvs += counter->scan_kvs(); - read_kvs += counter->read_kvs(); - write_kvs += counter->write_kvs(); - - if (counter->is_on_busy()) { - busy_cnt++; - } - if (counter->db_status() == TabletMeta::kTabletCorruption) { - db_corruption_cnt++; - } - tablet_io->DecRef(); + tablet_meta->set_server_addr(server_addr); + tablet_meta->set_table_name(tablet_io->GetTableName()); + tablet_meta->set_path(tablet_io->GetTablePath()); + tablet_meta->mutable_key_range()->set_key_start(tablet_io->GetStartKey()); + tablet_meta->mutable_key_range()->set_key_end(tablet_io->GetEndKey()); + tablet_meta->set_create_time(tablet_io->CreateTime()); + tablet_meta->set_version(tablet_io->Version()); + tablet_meta->set_size(db_size.size); + for (size_t i = 0; i < db_size.lg_size.size(); ++i) { + tablet_meta->add_lg_size(db_size.lg_size[i]); + } + tablet_meta->set_compact_status(tablet_io->GetCompactStatus()); + total_size += tablet_meta->size(); + + TabletCounter* counter = tablet_list_->add_counter(); + const std::string& label_str = tablet_io->GetMetricLabel(); + counter->set_low_read_cell(latest_report->FindMetricValue(kLowReadCellMetricName, label_str)); + counter->set_scan_rows(latest_report->FindMetricValue(kScanRowsMetricName, label_str)); + counter->set_scan_kvs(latest_report->FindMetricValue(kScanKvsMetricName, label_str)); + counter->set_scan_size(latest_report->FindMetricValue(kScanThroughPutMetricName, label_str)); + counter->set_read_rows(latest_report->FindMetricValue(kReadRowsMetricName, label_str)); + counter->set_read_kvs(latest_report->FindMetricValue(kReadKvsMetricName, label_str)); + counter->set_read_size(latest_report->FindMetricValue(kReadThroughPutMetricName, label_str)); + counter->set_write_rows(latest_report->FindMetricValue(kWriteRowsMetricName, label_str)); + counter->set_write_kvs(latest_report->FindMetricValue(kWriteKvsMetricName, label_str)); + counter->set_write_size(latest_report->FindMetricValue(kWriteThroughPutMetricName, label_str)); + counter->set_is_on_busy(tablet_io->IsBusy()); + double write_workload = 0; + tablet_io->Workload(&write_workload); + counter->set_write_workload(write_workload); + counter->set_db_status(tablet_status); // set runtime counter + + scan_kvs += counter->scan_kvs(); + read_kvs += counter->read_kvs(); + write_kvs += counter->write_kvs(); + + if (counter->is_on_busy()) { + busy_cnt++; + } + if (counter->db_status() == TabletMeta::kTabletCorruption) { + db_corruption_cnt++; } - not_ready_counter.Set(not_ready); - ts_tablet_size_counter.Set(total_size); - - int64_t low_read_cell = - latest_report->FindMetricValue(kLowLevelReadMetric); - int64_t read_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); - int64_t read_size = - latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelRead); - int64_t write_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); - int64_t write_size = - latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelWrite); - int64_t scan_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); - int64_t scan_size = - latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelScan); - - info_.set_low_read_cell(low_read_cell * 1000 / interval); - info_.set_scan_rows(scan_rows * 1000 / interval); - info_.set_scan_kvs(scan_kvs * 1000 / interval); - info_.set_scan_size(scan_size * 1000 / interval); - info_.set_read_rows(read_rows * 1000 / interval); - info_.set_read_kvs(read_kvs * 1000 / interval); - info_.set_read_size(read_size * 1000 / interval); - info_.set_write_rows(write_rows * 1000 / interval); - info_.set_write_kvs(write_kvs * 1000 / interval); - info_.set_write_size(write_size * 1000 / interval); - info_.set_tablet_onbusy(busy_cnt); - info_.set_tablet_corruption(db_corruption_cnt); - - // refresh tabletnodeinfo - info_.set_load(total_size); - info_.set_tablet_total(tablet_ios.size()); - - int64_t tmp; - tmp = latest_report->FindMetricValue(kDfsReadBytesThroughPut) * 1000 / interval; - info_.set_dfs_io_r(tmp); - tmp = latest_report->FindMetricValue(kDfsWriteBytesThroughPut) * 1000 / interval; - info_.set_dfs_io_w(tmp); - tmp = latest_report->FindMetricValue(kPosixReadThroughPutMetric) * 1000 / interval; - info_.set_local_io_r(tmp); - tmp = latest_report->FindMetricValue(kPosixWriteThroughPutMetric) * 1000 / interval; - info_.set_local_io_w(tmp); - - int64_t read_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelRead); - int64_t write_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelWrite); - int64_t scan_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelScan); - int64_t compact_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelCompact); - - info_.set_read_pending(read_pending); - info_.set_write_pending(write_pending); - info_.set_scan_pending(scan_pending); - - // collect extra infos - info_.clear_extra_info(); - ExtraTsInfo* einfo = info_.add_extra_info(); - - int64_t range_error_sum = - latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelRead) + - latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelWrite) + - latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelScan); - - tmp = range_error_sum * 1000 / interval; - einfo->set_name("range_error"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - einfo->set_name("read_pending"); - einfo->set_value(read_pending); - - einfo = info_.add_extra_info(); - einfo->set_name("write_pending"); - einfo->set_value(write_pending); - - einfo = info_.add_extra_info(); - einfo->set_name("scan_pending"); - einfo->set_value(scan_pending); - - einfo = info_.add_extra_info(); - einfo->set_name("compact_pending"); - einfo->set_value(compact_pending); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelRead) * 1000 / interval; - einfo->set_name("read_reject"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelWrite) * 1000 / interval; - einfo->set_name("write_reject"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelScan) * 1000 / interval; - einfo->set_name("scan_reject"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelRead) * 1000 / interval; - einfo->set_name("read_request"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelWrite) * 1000 / interval; - einfo->set_name("write_request"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelScan) * 1000 / interval; - einfo->set_name("scan_request"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelRead) * 1000 / interval; - einfo->set_name("read_error"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelWrite) * 1000 / interval; - einfo->set_name("write_error"); - einfo->set_value(tmp); - - einfo = info_.add_extra_info(); - tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelScan) * 1000 / interval; - einfo->set_name("scan_error"); - einfo->set_value(tmp); + tablet_io->DecRef(); + } + not_ready_counter.Set(not_ready); + ts_tablet_size_counter.Set(total_size); + + int64_t low_read_cell = latest_report->FindMetricValue(kLowLevelReadMetric); + int64_t read_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); + int64_t read_size = latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelRead); + int64_t write_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); + int64_t write_size = latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelWrite); + int64_t scan_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); + int64_t scan_size = latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelScan); + + info_->set_low_read_cell(low_read_cell * 1000 / interval); + info_->set_scan_rows(scan_rows * 1000 / interval); + info_->set_scan_kvs(scan_kvs * 1000 / interval); + info_->set_scan_size(scan_size * 1000 / interval); + info_->set_read_rows(read_rows * 1000 / interval); + info_->set_read_kvs(read_kvs * 1000 / interval); + info_->set_read_size(read_size * 1000 / interval); + info_->set_write_rows(write_rows * 1000 / interval); + info_->set_write_kvs(write_kvs * 1000 / interval); + info_->set_write_size(write_size * 1000 / interval); + info_->set_tablet_onbusy(busy_cnt); + info_->set_tablet_corruption(db_corruption_cnt); + // refresh tabletnodeinfo + info_->set_load(total_size); + info_->set_tablet_total(tablet_ios.size()); + + int64_t tmp; + tmp = latest_report->FindMetricValue(kDfsReadBytesThroughPut) * 1000 / interval; + info_->set_dfs_io_r(tmp); + tmp = latest_report->FindMetricValue(kDfsWriteBytesThroughPut) * 1000 / interval; + info_->set_dfs_io_w(tmp); + tmp = latest_report->FindMetricValue(kPosixReadThroughPutMetric) * 1000 / interval; + info_->set_local_io_r(tmp); + tmp = latest_report->FindMetricValue(kPosixWriteThroughPutMetric) * 1000 / interval; + info_->set_local_io_w(tmp); + // Requests need to go through dfs's master + tmp = latest_report->FindMetricValue(kDfsRequestMetric, kDfsOpenLabel) + + latest_report->FindMetricValue(kDfsRequestMetric, kDfsCloseLabel) + + latest_report->FindMetricValue(kDfsRequestMetric, kDfsDeleteLabel); + info_->set_dfs_master_qps(tmp * 1000 / interval); + + int64_t read_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelRead); + int64_t write_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelWrite); + int64_t scan_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelScan); + int64_t compact_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelCompact); + + info_->set_read_pending(read_pending); + info_->set_write_pending(write_pending); + info_->set_scan_pending(scan_pending); + + // collect extra infos + info_->clear_extra_info(); + ExtraTsInfo* einfo = info_->add_extra_info(); + + int64_t range_error_sum = latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelRead) + + latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelWrite) + + latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelScan); + + tmp = range_error_sum * 1000 / interval; + einfo->set_name("range_error"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + einfo->set_name("read_pending"); + einfo->set_value(read_pending); + + einfo = info_->add_extra_info(); + einfo->set_name("write_pending"); + einfo->set_value(write_pending); + + einfo = info_->add_extra_info(); + einfo->set_name("scan_pending"); + einfo->set_value(scan_pending); + + einfo = info_->add_extra_info(); + einfo->set_name("compact_pending"); + einfo->set_value(compact_pending); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_reject"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_reject"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_reject"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_request"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_request"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_request"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_error"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_error"); + einfo->set_value(tmp); + + einfo = info_->add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_error"); + einfo->set_value(tmp); } void TabletNodeSysInfo::CollectHardwareInfo() { - MutexLock lock(&mutex_); - std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + std::shared_ptr latest_report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); - int64_t cpu_usage = latest_report->FindMetricValue(kInstCpuMetricName); - info_.set_cpu_usage(static_cast(cpu_usage)); + int64_t cpu_usage = latest_report->FindMetricValue(kInstCpuMetricName); + info_->set_cpu_usage(static_cast(cpu_usage)); - int64_t mem_usage = latest_report->FindMetricValue(kInstMemMetricName); - info_.set_mem_used(mem_usage); + int64_t mem_usage = latest_report->FindMetricValue(kInstMemMetricName); + info_->set_mem_used(mem_usage); - int64_t net_rx_usage = latest_report->FindMetricValue(kInstNetRXMetricName); - info_.set_net_rx(net_rx_usage); + int64_t net_rx_usage = latest_report->FindMetricValue(kInstNetRXMetricName); + info_->set_net_rx(net_rx_usage); - int64_t net_tx_usage = latest_report->FindMetricValue(kInstNetTXMetricName); - info_.set_net_tx(net_tx_usage); + int64_t net_tx_usage = latest_report->FindMetricValue(kInstNetTXMetricName); + info_->set_net_tx(net_tx_usage); } void TabletNodeSysInfo::GetTabletNodeInfo(TabletNodeInfo* info) { - MutexLock lock(&mutex_); - info->CopyFrom(info_); + MutexLock lock(&mutex_); + info->CopyFrom(*info_); } void TabletNodeSysInfo::GetTabletMetaList(TabletMetaList* meta_list) { - MutexLock lock(&mutex_); - meta_list->CopyFrom(tablet_list_); + MutexLock lock(&mutex_); + meta_list->CopyFrom(*tablet_list_); } void TabletNodeSysInfo::SetServerAddr(const std::string& addr) { - MutexLock lock(&mutex_); - info_.set_addr(addr); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + info_->set_addr(addr); +} + +void TabletNodeSysInfo::SetPersistentCacheSize(uint64_t size) { + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + info_->set_persistent_cache_size(size); } void TabletNodeSysInfo::SetStatus(StatusCode status) { - MutexLock lock(&mutex_); - info_.set_status_t(status); + MutexLock lock(&mutex_); + if (!info_.unique()) { + SwitchInfo(); + } + assert(info_.unique()); + info_->set_status_t(status); } -void TabletNodeSysInfo::DumpLog() { - MutexLock lock(&mutex_); - std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - int64_t interval = latest_report->interval_ms; +void TabletNodeSysInfo::DumpSysInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + double snappy_ratio = latest_report->FindMetricValue(kSnappyCompressionRatioMetric); + if (snappy_ratio > 0) { + snappy_ratio /= 100.0; + } + + int64_t rawkey_compare_count = latest_report->FindMetricValue(kRawkeyCompareCountMetric); + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("low_level", info_ptr->low_read_cell()); + dumper.DumpData("read", info_ptr->read_rows()); + dumper.DumpData("rspeed", info_ptr->read_size()); + dumper.DumpData("write", info_ptr->write_rows()); + dumper.DumpData("wspeed", info_ptr->write_size()); + dumper.DumpData("scan", info_ptr->scan_rows()); + dumper.DumpData("sspeed", info_ptr->scan_size()); + dumper.DumpData("snappy", snappy_ratio); + dumper.DumpData("rowcomp", rawkey_compare_count); + } + + LOG(INFO) << "[SysInfo]" + << " low_level " << info_ptr->low_read_cell() << " read " << info_ptr->read_rows() + << " rspeed " << utils::ConvertByteToString(info_ptr->read_size()) << " write " + << info_ptr->write_rows() << " wspeed " + << utils::ConvertByteToString(info_ptr->write_size()) << " scan " + << info_ptr->scan_rows() << " sspeed " + << utils::ConvertByteToString(info_ptr->scan_size()) << " snappy " << snappy_ratio + << " rawcomp " << rawkey_compare_count; +} - TabletNodeSysInfoDumper dumper(FLAGS_tera_tabletnode_running_info_dump_file); +void TabletNodeSysInfo::DumpHardWareInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + // hardware info + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("mem_used", info_ptr->mem_used()); + dumper.DumpData("net_tx", info_ptr->net_tx()); + dumper.DumpData("net_rx", info_ptr->net_rx()); + dumper.DumpData("cpu_usage", info_ptr->cpu_usage()); + } + + LOG(INFO) << "[HardWare Info] " + << " mem_used " << info_ptr->mem_used() << " " + << utils::ConvertByteToString(info_ptr->mem_used()) << " net_tx " << info_ptr->net_tx() + << " " << utils::ConvertByteToString(info_ptr->net_tx()) << " net_rx " + << info_ptr->net_rx() << " " << utils::ConvertByteToString(info_ptr->net_rx()) + << " cpu_usage " << info_ptr->cpu_usage() << "%"; +} - double snappy_ratio = latest_report->FindMetricValue(kSnappyCompressionRatioMetric); - if (snappy_ratio > 0) { - snappy_ratio /= 100.0; - } +void TabletNodeSysInfo::DumpIoInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + int64_t ssd_read_count = latest_report->FindMetricValue(kSsdReadCountMetric); + int64_t ssd_read_size = latest_report->FindMetricValue(kSsdReadThroughPutMetric); + int64_t ssd_write_count = latest_report->FindMetricValue(kSsdWriteCountMetric); + int64_t ssd_write_size = latest_report->FindMetricValue(kSsdWriteThroughPutMetric); + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("dfs_r", info_ptr->dfs_io_r()); + dumper.DumpData("dfs_w", info_ptr->dfs_io_w()); + dumper.DumpData("local_r", info_ptr->local_io_r()); + dumper.DumpData("local_w", info_ptr->local_io_w()); + dumper.DumpData("ssd_r_counter", ssd_read_count); + dumper.DumpData("ssd_r_size", ssd_read_size); + dumper.DumpData("ssd_w_counter", ssd_write_count); + dumper.DumpData("ssd_w_size", ssd_write_size); + } + + LOG(INFO) << "[IO]" + << " dfs_r " << info_ptr->dfs_io_r() << " " + << utils::ConvertByteToString(info_ptr->dfs_io_r()) << " dfs_w " << info_ptr->dfs_io_w() + << " " << utils::ConvertByteToString(info_ptr->dfs_io_w()) << " local_r " + << info_ptr->local_io_r() << " " << utils::ConvertByteToString(info_ptr->local_io_r()) + << " local_w " << info_ptr->local_io_w() << " " + << utils::ConvertByteToString(info_ptr->local_io_w()) << " ssd_r " << ssd_read_count + << " " << utils::ConvertByteToString(ssd_read_size) << " ssd_w " << ssd_write_count + << " " << utils::ConvertByteToString(ssd_write_size); +} - int64_t rawkey_compare_count = latest_report->FindMetricValue(kRawkeyCompareCountMetric); +void TabletNodeSysInfo::DumpCacheInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + double block_cache_hitrate = + static_cast(latest_report->FindMetricValue(kBlockCacheHitRateMetric)) / 100.0; + if (block_cache_hitrate < 0.0) { + block_cache_hitrate = NAN; + } + int64_t block_cache_entries = latest_report->FindMetricValue(kBlockCacheEntriesMetric); + int64_t block_cache_charge = latest_report->FindMetricValue(kBlockCacheChargeMetric); + double table_cache_hitrate = + static_cast(latest_report->FindMetricValue(kTableCacheHitRateMetric)) / 100.0; + if (table_cache_hitrate < 0.0) { + table_cache_hitrate = NAN; + } + int64_t table_cache_entries = latest_report->FindMetricValue(kTableCacheEntriesMetric); + int64_t table_cache_charge = latest_report->FindMetricValue(kTableCacheChargeMetric); + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("block_cache_hitrate", block_cache_hitrate); + dumper.DumpData("block_cache_entry", block_cache_entries); + dumper.DumpData("block_cache_bytes", block_cache_charge); + dumper.DumpData("table_cache_hitrate", table_cache_hitrate); + dumper.DumpData("table_cache_entry", table_cache_entries); + dumper.DumpData("table_cache_bytes", table_cache_charge); + } + LOG(INFO) << "[Cache HitRate/Cnt/Size] table_cache " << table_cache_hitrate << " " + << table_cache_entries << " " << table_cache_charge << ", block_cache " + << block_cache_hitrate << " " << block_cache_entries << " " << block_cache_charge; +} +void TabletNodeSysInfo::DumpRequestInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + auto interval = latest_report->interval_ms; + + int64_t finished_read_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelRead); + int64_t finished_write_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelWrite); + int64_t finished_scan_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelScan); + LOG(INFO) << "[Finished Requests] " + << "read: " << finished_read_request * 1000 / interval + << ", write: " << finished_write_request * 1000 / interval + << ", scan: " << finished_scan_request * 1000 / interval; + + int64_t read_request_delay_avg = + finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, + kApiLabelRead) / + finished_read_request; + int64_t write_request_delay_avg = + finished_write_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, + kApiLabelWrite) / + finished_write_request; + int64_t scan_request_delay_avg = + finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, + kApiLabelScan) / + finished_scan_request; + + int64_t read_delay_percentile_95 = + finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, + kReadLabelPercentile95); + int64_t read_delay_percentile_99 = + finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, + kReadLabelPercentile99); + int64_t write_delay_percentile_95 = + finished_write_request == 0 ? 0 : latest_report->FindMetricValue( + kRequestDelayPercentileMetric, kWriteLabelPercentile95); + int64_t write_delay_percentile_99 = + finished_write_request == 0 ? 0 : latest_report->FindMetricValue( + kRequestDelayPercentileMetric, kWriteLabelPercentile99); + int64_t scan_delay_percentile_95 = + finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, + kScanLabelPercentile95); + int64_t scan_delay_percentile_99 = + finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, + kScanLabelPercentile99); + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("read_delay_avg", read_request_delay_avg); + dumper.DumpData("read_delay_95", read_delay_percentile_95); + dumper.DumpData("read_delay_99", read_delay_percentile_99); + dumper.DumpData("write_delay_avg", write_request_delay_avg); + dumper.DumpData("write_delay_95", write_delay_percentile_95); + dumper.DumpData("write_delay_99", write_delay_percentile_99); + dumper.DumpData("scan_delay_avg", scan_request_delay_avg); + dumper.DumpData("scan_delay_95", scan_delay_percentile_95); + dumper.DumpData("scan_delay_99", scan_delay_percentile_99); + } + + LOG(INFO) << "[Requests Delay In Us] " + << "Read [Avg: " << read_request_delay_avg + << ", Percentile 95: " << read_delay_percentile_95 + << ", Percentile 99: " << read_delay_percentile_99 + << "]; Write [Avg: " << write_request_delay_avg + << ", Percentile 95: " << write_delay_percentile_95 + << ", Percentile 99: " << write_delay_percentile_99 + << "]; Scan [Avg: " << scan_request_delay_avg + << ", Percentile 95: " << scan_delay_percentile_95 + << ", Percentile 99: " << scan_delay_percentile_99 << "]"; + + int64_t read_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); + int64_t write_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); + int64_t scan_rows = latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); + int64_t row_read_delay = + (read_rows == 0 ? 0 + : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelRead) / read_rows); + int64_t row_write_delay = + (write_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelWrite) / + write_rows); + int64_t row_scan_delay = + (scan_rows == 0 ? 0 + : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelScan) / scan_rows); + LOG(INFO) << "[Row Delay In Ms] " + << "row_read_delay: " << row_read_delay / 1000.0 + << ", row_write_delay: " << row_write_delay / 1000.0 + << ", row_scan_delay: " << row_scan_delay / 1000.0; + + // extra info + std::ostringstream ss; + int cols = info_ptr->extra_info_size(); + ss << "[Pending] "; + for (int i = 0; i < cols; ++i) { + ss << info_ptr->extra_info(i).name() << " " << info_ptr->extra_info(i).value() << " "; if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("low_level", info_.low_read_cell()); - dumper.DumpData("read", info_.read_rows()); - dumper.DumpData("rspeed", info_.read_size()); - dumper.DumpData("write", info_.write_rows()); - dumper.DumpData("wspeed", info_.write_size()); - dumper.DumpData("scan", info_.scan_rows()); - dumper.DumpData("sspeed", info_.scan_size()); - dumper.DumpData("snappy", snappy_ratio); - dumper.DumpData("rowcomp", rawkey_compare_count); + dumper.DumpData(info_ptr->extra_info(i).name(), info_ptr->extra_info(i).value()); } + } + LOG(INFO) << ss.str(); +} - LOG(INFO) << "[SysInfo]" - << " low_level " << info_.low_read_cell() - << " read " << info_.read_rows() - << " rspeed " << utils::ConvertByteToString(info_.read_size()) - << " write " << info_.write_rows() - << " wspeed " << utils::ConvertByteToString(info_.write_size()) - << " scan " << info_.scan_rows() - << " sspeed " << utils::ConvertByteToString(info_.scan_size()) - << " snappy " << snappy_ratio - << " rawcomp " << rawkey_compare_count; - - // hardware info - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("mem_used", info_.mem_used()); - dumper.DumpData("net_tx", info_.net_tx()); - dumper.DumpData("net_rx", info_.net_rx()); - dumper.DumpData("cpu_usage", info_.cpu_usage()); - } +void TabletNodeSysInfo::DumpDfsInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + int64_t dfs_read_delay = latest_report->FindMetricValue(kDfsReadDelayMetric); + int64_t dfs_write_delay = latest_report->FindMetricValue(kDfsWriteDelayMetric); + int64_t dfs_sync_delay = latest_report->FindMetricValue(kDfsSyncDelayMetric); + int64_t dfs_read_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsReadLabel); + int64_t dfs_write_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsWriteLabel); + int64_t dfs_sync_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsSyncLabel); + int64_t dfs_flush_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsFlushLabel); + int64_t dfs_list_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsListLabel); + int64_t dfs_other_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsOtherLabel); + int64_t dfs_exists_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsExistsLabel); + int64_t dfs_open_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsOpenLabel); + int64_t dfs_close_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsCloseLabel); + int64_t dfs_delete_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsDeleteLabel); + int64_t dfs_tell_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsTellLabel); + int64_t dfs_info_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsInfoLabel); + int64_t dfs_read_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsReadLabel); + int64_t dfs_write_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsWriteLabel); + int64_t dfs_sync_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsSyncLabel); + int64_t dfs_flush_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsFlushLabel); + int64_t dfs_list_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsListLabel); + int64_t dfs_other_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsOtherLabel); + int64_t dfs_exists_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsExistsLabel); + int64_t dfs_open_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsOpenLabel); + int64_t dfs_close_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsCloseLabel); + int64_t dfs_delete_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsDeleteLabel); + int64_t dfs_tell_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsTellLabel); + int64_t dfs_info_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsInfoLabel); + double rdelay = + dfs_read_count ? static_cast(dfs_read_delay) / 1000.0 / dfs_read_count : 0; + double wdelay = + dfs_write_count ? static_cast(dfs_write_delay) / 1000.0 / dfs_write_count : 0; + double sdelay = + dfs_sync_count ? static_cast(dfs_sync_delay) / 1000.0 / dfs_sync_count : 0; + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("dfs_read", dfs_read_count); + dumper.DumpData("dfs_read_hang", dfs_read_hang); + dumper.DumpData("dfs_rdealy", rdelay); + dumper.DumpData("dfs_write", dfs_write_count); + dumper.DumpData("dfs_write_hang", dfs_write_hang); + dumper.DumpData("dfs_wdelay", wdelay); + dumper.DumpData("dfs_sync", dfs_sync_count); + dumper.DumpData("dfs_sync_hang", dfs_sync_hang); + dumper.DumpData("dfs_sdelay", sdelay); + dumper.DumpData("dfs_flush", dfs_flush_count); + dumper.DumpData("dfs_flush_hang", dfs_flush_hang); + dumper.DumpData("dfs_list", dfs_list_count); + dumper.DumpData("dfs_list_hang", dfs_list_hang); + dumper.DumpData("dfs_info", dfs_info_count); + dumper.DumpData("dfs_info_hang", dfs_info_hang); + dumper.DumpData("dfs_exists", dfs_exists_count); + dumper.DumpData("dfs_exists_hang", dfs_exists_hang); + dumper.DumpData("dfs_open", dfs_open_count); + dumper.DumpData("dfs_open_hang", dfs_open_hang); + dumper.DumpData("dfs_close", dfs_close_count); + dumper.DumpData("dfs_close_hang", dfs_close_hang); + dumper.DumpData("dfs_delete", dfs_delete_count); + dumper.DumpData("dfs_delete_hang", dfs_delete_hang); + dumper.DumpData("dfs_tell", dfs_tell_count); + dumper.DumpData("dfs_tell_hang", dfs_tell_hang); + dumper.DumpData("dfs_other", dfs_other_count); + dumper.DumpData("dfs_other_hang", dfs_other_hang); + } + + int64_t dfs_opened_read_files = latest_report->FindMetricValue(kDfsOpenedReadFilesCountMetric); + int64_t dfs_opened_write_files = latest_report->FindMetricValue(kDfsOpenedWriteFilesCountMetric); + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("dfs_opened_read_files_count", dfs_opened_read_files); + dumper.DumpData("dfs_opened_write_files_count", dfs_opened_write_files); + } + + LOG(INFO) << "[Dfs] read " << dfs_read_count << " " << dfs_read_hang << " " + << "rdelay " << rdelay << " " + << "rdelay_total " << dfs_read_delay << " " + << "write " << dfs_write_count << " " << dfs_write_hang << " " + << "wdelay " << wdelay << " " + << "wdelay_total " << dfs_write_delay << " " + << "sync " << dfs_sync_count << " " << dfs_sync_hang << " " + << "sdelay " << sdelay << " " + << "sdelay_total " << dfs_sync_delay << " " + << "flush " << dfs_flush_count << " " << dfs_flush_hang << " " + << "list " << dfs_list_count << " " << dfs_list_hang << " " + << "info " << dfs_info_count << " " << dfs_info_hang << " " + << "exists " << dfs_exists_count << " " << dfs_exists_hang << " " + << "open " << dfs_open_count << " " << dfs_open_hang << " " + << "close " << dfs_close_count << " " << dfs_close_hang << " " + << "delete " << dfs_delete_count << " " << dfs_delete_hang << " " + << "tell " << dfs_tell_count << " " << dfs_tell_hang << " " + << "other " << dfs_other_count << " " << dfs_other_hang << " " + << "opened: read " << dfs_opened_read_files << " " + << "write " << dfs_opened_write_files; +} - LOG(INFO) << "[HardWare Info] " - << " mem_used " << info_.mem_used() << " " - << utils::ConvertByteToString(info_.mem_used()) - << " net_tx " << info_.net_tx() << " " - << utils::ConvertByteToString(info_.net_tx()) - << " net_rx " << info_.net_rx() << " " - << utils::ConvertByteToString(info_.net_rx()) - << " cpu_usage " << info_.cpu_usage() << "%"; - - // net and io info - int64_t ssd_read_count = latest_report->FindMetricValue(kSsdReadCountMetric); - int64_t ssd_read_size = latest_report->FindMetricValue(kSsdReadThroughPutMetric); - int64_t ssd_write_count = latest_report->FindMetricValue(kSsdWriteCountMetric); - int64_t ssd_write_size = latest_report->FindMetricValue(kSsdWriteThroughPutMetric); - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("dfs_r", info_.dfs_io_r()); - dumper.DumpData("dfs_w", info_.dfs_io_w()); - dumper.DumpData("local_r", info_.local_io_r()); - dumper.DumpData("local_w", info_.local_io_w()); - dumper.DumpData("ssd_r_counter", ssd_read_count); - dumper.DumpData("ssd_r_size", ssd_read_size); - dumper.DumpData("ssd_w_counter", ssd_write_count); - dumper.DumpData("ssd_w_size", ssd_write_size); - } +void TabletNodeSysInfo::DumpPosixInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + int64_t posix_read_count = latest_report->FindMetricValue(kPosixReadCountMetric); + int64_t posix_write_count = latest_report->FindMetricValue(kPosixWriteCountMetric); + int64_t posix_sync_count = latest_report->FindMetricValue(kPosixSyncCountMetric); + int64_t posix_list_count = latest_report->FindMetricValue(kPosixListCountMetric); + int64_t posix_info_count = latest_report->FindMetricValue(kPosixInfoCountMetric); + int64_t posix_exists_count = latest_report->FindMetricValue(kPosixExistsCountMetric); + int64_t posix_open_count = latest_report->FindMetricValue(kPosixOpenCountMetric); + int64_t posix_close_count = latest_report->FindMetricValue(kPosixCloseCountMetric); + int64_t posix_delete_count = latest_report->FindMetricValue(kPosixDeleteCountMetric); + int64_t posix_tell_count = latest_report->FindMetricValue(kPosixTellCountMetric); + int64_t posix_seek_count = latest_report->FindMetricValue(kPosixSeekCountMetric); + int64_t posix_other_count = latest_report->FindMetricValue(kPosixOtherCountMetric); + + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("local_read", posix_read_count); + dumper.DumpData("local_write", posix_write_count); + dumper.DumpData("local_sync", posix_sync_count); + dumper.DumpData("local_list", posix_list_count); + dumper.DumpData("local_info", posix_info_count); + dumper.DumpData("local_exists", posix_exists_count); + dumper.DumpData("local_open", posix_open_count); + dumper.DumpData("local_close", posix_close_count); + dumper.DumpData("local_delete", posix_delete_count); + dumper.DumpData("local_tell", posix_tell_count); + dumper.DumpData("local_seek", posix_seek_count); + dumper.DumpData("local_other", posix_other_count); + } + + LOG(INFO) << "[Local] read " << posix_read_count << " " + << "write " << posix_write_count << " " + << "sync " << posix_sync_count << " " + << "list " << posix_list_count << " " + << "info " << posix_info_count << " " + << "exists " << posix_exists_count << " " + << "open " << posix_open_count << " " + << "close " << posix_close_count << " " + << "delete " << posix_delete_count << " " + << "tell " << posix_tell_count << " " + << "seek " << posix_seek_count << " " + << "other " << posix_other_count; +} - LOG(INFO) << "[IO]" - << " dfs_r " << info_.dfs_io_r() << " " - << utils::ConvertByteToString(info_.dfs_io_r()) - << " dfs_w " << info_.dfs_io_w() << " " - << utils::ConvertByteToString(info_.dfs_io_w()) - << " local_r " << info_.local_io_r() << " " - << utils::ConvertByteToString(info_.local_io_r()) - << " local_w " << info_.local_io_w() << " " - << utils::ConvertByteToString(info_.local_io_w()) - << " ssd_r " << ssd_read_count << " " - << utils::ConvertByteToString(ssd_read_size) - << " ssd_w " << ssd_write_count << " " - << utils::ConvertByteToString(ssd_write_size); - - // cache info - double block_cache_hitrate = static_cast(latest_report->FindMetricValue(kBlockCacheHitRateMetric)) / 100.0; - if (block_cache_hitrate < 0.0) { - block_cache_hitrate = NAN; - } - int64_t block_cache_entries = latest_report->FindMetricValue(kBlockCacheEntriesMetric); - int64_t block_cache_charge = latest_report->FindMetricValue(kBlockCacheChargeMetric); - double table_cache_hitrate = static_cast(latest_report->FindMetricValue(kTableCacheHitRateMetric)) / 100.0; - if (table_cache_hitrate < 0.0) { - table_cache_hitrate = NAN; - } - int64_t table_cache_entries = latest_report->FindMetricValue(kTableCacheEntriesMetric); - int64_t table_cache_charge = latest_report->FindMetricValue(kTableCacheChargeMetric); - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("block_cache_hitrate", block_cache_hitrate); - dumper.DumpData("block_cache_entry", block_cache_entries); - dumper.DumpData("block_cache_bytes", block_cache_charge); - dumper.DumpData("table_cache_hitrate", table_cache_hitrate); - dumper.DumpData("table_cache_entry", table_cache_entries); - dumper.DumpData("table_cache_bytes", table_cache_charge); - } - LOG(INFO) << "[Cache HitRate/Cnt/Size] table_cache " - << table_cache_hitrate << " " - << table_cache_entries << " " - << table_cache_charge - << ", block_cache " - << block_cache_hitrate << " " - << block_cache_entries << " " - << block_cache_charge; - - int64_t finished_read_request = - latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelRead); - int64_t finished_write_request = - latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelWrite); - int64_t finished_scan_request = - latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelScan); - LOG(INFO) << "[Finished Requests] " - << "read: " << finished_read_request * 1000 / interval - << ", write: " << finished_write_request * 1000 / interval - << ", scan: " << finished_scan_request * 1000 / interval; - - int64_t read_request_delay_avg = - finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelRead) / finished_read_request; - int64_t write_request_delay_avg = - finished_write_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelWrite) / finished_write_request; - int64_t scan_request_delay_avg = - finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelScan) / finished_scan_request; - - int64_t read_delay_percentile_95 = - finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kReadLabelPercentile95); - int64_t read_delay_percentile_99 = - finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kReadLabelPercentile99); - int64_t write_delay_percentile_95 = - finished_write_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kWriteLabelPercentile95); - int64_t write_delay_percentile_99 = - finished_write_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kWriteLabelPercentile99); - int64_t scan_delay_percentile_95 = - finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kScanLabelPercentile95); - int64_t scan_delay_percentile_99 = - finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayPercentileMetric, kScanLabelPercentile99); +void TabletNodeSysInfo::DumpLevelSizeInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + if (FLAGS_tera_tabletnode_dump_level_size_info_enabled) { + int64_t level0_size = latest_report->FindMetricValue(kLevelSize, "level:0"); + int64_t level1_size = latest_report->FindMetricValue(kLevelSize, "level:1"); + int64_t level2_size = latest_report->FindMetricValue(kLevelSize, "level:2"); + int64_t level3_size = latest_report->FindMetricValue(kLevelSize, "level:3"); + int64_t level4_size = latest_report->FindMetricValue(kLevelSize, "level:4"); + int64_t level5_size = latest_report->FindMetricValue(kLevelSize, "level:5"); + int64_t level6_size = latest_report->FindMetricValue(kLevelSize, "level:6"); if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("read_delay_avg", read_request_delay_avg); - dumper.DumpData("read_delay_95", read_delay_percentile_95); - dumper.DumpData("read_delay_99", read_delay_percentile_99); - dumper.DumpData("write_delay_avg", write_request_delay_avg); - dumper.DumpData("write_delay_95", write_delay_percentile_95); - dumper.DumpData("write_delay_99", write_delay_percentile_99); - dumper.DumpData("scan_delay_avg", scan_request_delay_avg); - dumper.DumpData("scan_delay_95", scan_delay_percentile_95); - dumper.DumpData("scan_delay_99", scan_delay_percentile_99); + dumper.DumpData("level0_size", level0_size); + dumper.DumpData("level1_size", level1_size); + dumper.DumpData("level2_size", level2_size); + dumper.DumpData("level3_size", level3_size); + dumper.DumpData("level4_size", level4_size); + dumper.DumpData("level5_size", level5_size); + dumper.DumpData("level6_size", level6_size); } - LOG(INFO) << "[Requests Delay In Us] " - << "Read [Avg: " << read_request_delay_avg - << ", Percentile 95: " << read_delay_percentile_95 - << ", Percentile 99: " << read_delay_percentile_99 - << "]; Write [Avg: " << write_request_delay_avg - << ", Percentile 95: " << write_delay_percentile_95 - << ", Percentile 99: " << write_delay_percentile_99 - << "]; Scan [Avg: " << scan_request_delay_avg - << ", Percentile 95: " << scan_delay_percentile_95 - << ", Percentile 99: " << scan_delay_percentile_99 - << "]"; - - int64_t read_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); - int64_t write_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); - int64_t scan_rows = - latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); - int64_t row_read_delay = - (read_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelRead) / read_rows); - int64_t row_write_delay = - (write_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelWrite) / write_rows); - int64_t row_scan_delay = - (scan_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelScan) / scan_rows); - LOG(INFO) << "[Row Delay In Ms] " - << "row_read_delay: " << row_read_delay / 1000.0 - << ", row_write_delay: " << row_write_delay / 1000.0 - << ", row_scan_delay: " << row_scan_delay / 1000.0; - - // extra info - std::ostringstream ss; - int cols = info_.extra_info_size(); - ss << "[Pending] "; - for (int i = 0; i < cols; ++i) { - ss << info_.extra_info(i).name() << " " << info_.extra_info(i).value() << " "; - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData(info_.extra_info(i).name(), info_.extra_info(i).value()); - } - } - LOG(INFO) << ss.str(); - - // DFS info - int64_t dfs_read_delay = latest_report->FindMetricValue(kDfsReadDelayMetric); - int64_t dfs_write_delay = latest_report->FindMetricValue(kDfsWriteDelayMetric); - int64_t dfs_sync_delay = latest_report->FindMetricValue(kDfsSyncDelayMetric); - int64_t dfs_read_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsReadLabel); - int64_t dfs_write_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsWriteLabel); - int64_t dfs_sync_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsSyncLabel); - int64_t dfs_flush_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsFlushLabel); - int64_t dfs_list_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsListLabel); - int64_t dfs_other_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsOtherLabel); - int64_t dfs_exists_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsExistsLabel); - int64_t dfs_open_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsOpenLabel); - int64_t dfs_close_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsCloseLabel); - int64_t dfs_delete_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsDeleteLabel); - int64_t dfs_tell_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsTellLabel); - int64_t dfs_info_count = latest_report->FindMetricValue(kDfsRequestMetric, kDfsInfoLabel); - int64_t dfs_read_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsReadLabel); - int64_t dfs_write_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsWriteLabel); - int64_t dfs_sync_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsSyncLabel); - int64_t dfs_flush_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsFlushLabel); - int64_t dfs_list_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsListLabel); - int64_t dfs_other_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsOtherLabel); - int64_t dfs_exists_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsExistsLabel); - int64_t dfs_open_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsOpenLabel); - int64_t dfs_close_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsCloseLabel); - int64_t dfs_delete_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsDeleteLabel); - int64_t dfs_tell_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsTellLabel); - int64_t dfs_info_hang = latest_report->FindMetricValue(kDfsHangMetric, kDfsInfoLabel); - double rdelay = dfs_read_count ? static_cast(dfs_read_delay) / 1000.0 / dfs_read_count : 0; - double wdelay = dfs_write_count ? static_cast(dfs_write_delay) / 1000.0 / dfs_write_count : 0; - double sdelay = dfs_sync_count ? static_cast(dfs_sync_delay) / 1000.0 / dfs_sync_count : 0; + LOG(INFO) << "[Level Size] L0: " << utils::ConvertByteToString(level0_size) + << " , L1: " << utils::ConvertByteToString(level1_size) + << " , L2: " << utils::ConvertByteToString(level2_size) + << " , L3: " << utils::ConvertByteToString(level3_size) + << " , L4: " << utils::ConvertByteToString(level4_size) + << " , L5: " << utils::ConvertByteToString(level5_size) + << " , L6: " << utils::ConvertByteToString(level6_size); + } +} - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("dfs_read", dfs_read_count); - dumper.DumpData("dfs_read_hang", dfs_read_hang); - dumper.DumpData("dfs_rdealy", rdelay); - dumper.DumpData("dfs_write", dfs_write_count); - dumper.DumpData("dfs_write_hang", dfs_write_hang); - dumper.DumpData("dfs_wdelay", wdelay); - dumper.DumpData("dfs_sync", dfs_sync_count); - dumper.DumpData("dfs_sync_hang", dfs_sync_hang); - dumper.DumpData("dfs_sdelay", sdelay); - dumper.DumpData("dfs_flush", dfs_flush_count); - dumper.DumpData("dfs_flush_hang", dfs_flush_hang); - dumper.DumpData("dfs_list", dfs_list_count); - dumper.DumpData("dfs_list_hang", dfs_list_hang); - dumper.DumpData("dfs_info", dfs_info_count); - dumper.DumpData("dfs_info_hang", dfs_info_hang); - dumper.DumpData("dfs_exists", dfs_exists_count); - dumper.DumpData("dfs_exists_hang", dfs_exists_hang); - dumper.DumpData("dfs_open", dfs_open_count); - dumper.DumpData("dfs_open_hang", dfs_open_hang); - dumper.DumpData("dfs_close", dfs_close_count); - dumper.DumpData("dfs_close_hang", dfs_close_hang); - dumper.DumpData("dfs_delete", dfs_delete_count); - dumper.DumpData("dfs_delete_hang", dfs_delete_hang); - dumper.DumpData("dfs_tell", dfs_tell_count); - dumper.DumpData("dfs_tell_hang", dfs_tell_hang); - dumper.DumpData("dfs_other", dfs_other_count); - dumper.DumpData("dfs_other_hang", dfs_other_hang); +void TabletNodeSysInfo::DumpPersistentCacheInfo( + const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, const TabletNodeSysInfoDumper& dumper) { + if (FLAGS_tera_enable_persistent_cache && !tera::io::GetCachePaths().empty()) { + auto& persistent_cache_paths = tera::io::GetPersistentCachePaths(); + int64_t write_count = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kWriteCount); + int64_t write_throughput = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kWriteThroughput); + int64_t read_throughput = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kReadThroughput); + int64_t cache_hits = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kCacheHits); + int64_t cache_misses = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kCacheMisses); + int64_t cache_errors = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kCacheErrors); + int64_t file_entries = + latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kFileEntries); + int64_t cache_capacity{0}; + int64_t cache_size{0}; + int64_t meta_size{0}; + for (const auto& path : persistent_cache_paths) { + cache_capacity += latest_report->FindMetricValue( + leveldb::PersistentCacheMetricNames::kCacheCapacity, "path:" + path); + cache_size += latest_report->FindMetricValue(leveldb::PersistentCacheMetricNames::kCacheSize, + "path:" + path); + meta_size += latest_report->FindMetricValue( + leveldb::PersistentCacheMetricNames::kMetaDataSize, "path:" + path); } - - int64_t dfs_opened_read_files = latest_report->FindMetricValue(kDfsOpenedReadFilesCountMetric); - int64_t dfs_opened_write_files = latest_report->FindMetricValue(kDfsOpenedWriteFilesCountMetric); - - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("dfs_opened_read_files_count", dfs_opened_read_files); - dumper.DumpData("dfs_opened_write_files_count", dfs_opened_write_files); + double hit_pct = 0; + double miss_pct = 0; + if (cache_hits || cache_misses) { + hit_pct = (double)cache_hits / (cache_hits + cache_misses); + miss_pct = (double)cache_misses / (cache_hits + cache_misses); } + LOG(INFO) << "[Persistent Cache] write_size: " << utils::ConvertByteToString(write_throughput) + << ", write_count: " << write_count + << ", read_size: " << utils::ConvertByteToString(read_throughput) + << ", cache_hits: " << cache_hits << ", cache_misses: " << cache_misses + << ", hit_percent: " << hit_pct * 100 << ", miss_percent: " << miss_pct * 100 + << ", cache_errors: " << cache_errors << ", file_entries: " << file_entries + << ", cache_capacity: " << utils::ConvertByteToString(cache_capacity) + << ", cache_size: " << utils::ConvertByteToString(cache_size) + << ", metadata_size: " << utils::ConvertByteToString(meta_size); + } +} - LOG(INFO) << "[Dfs] read " << dfs_read_count << " " - << dfs_read_hang << " " - << "rdelay " << rdelay << " " - << "rdelay_total " << dfs_read_delay << " " - << "write " << dfs_write_count << " " - << dfs_write_hang << " " - << "wdelay " << wdelay << " " - << "wdelay_total " << dfs_write_delay << " " - << "sync " << dfs_sync_count << " " - << dfs_sync_hang << " " - << "sdelay " << sdelay << " " - << "sdelay_total " << dfs_sync_delay << " " - << "flush " << dfs_flush_count << " " - << dfs_flush_hang << " " - << "list " << dfs_list_count << " " - << dfs_list_hang << " " - << "info " << dfs_info_count << " " - << dfs_info_hang << " " - << "exists " << dfs_exists_count << " " - << dfs_exists_hang << " " - << "open " << dfs_open_count << " " - << dfs_open_hang << " " - << "close " << dfs_close_count << " " - << dfs_close_hang << " " - << "delete " << dfs_delete_count << " " - << dfs_delete_hang << " " - << "tell " << dfs_tell_count << " " - << dfs_tell_hang << " " - << "other " << dfs_other_count << " " - << dfs_other_hang << " " - << "opened: read " << dfs_opened_read_files << " " - << "write " << dfs_opened_write_files; - - // local info - int64_t posix_read_count = latest_report->FindMetricValue(kPosixReadCountMetric); - int64_t posix_write_count = latest_report->FindMetricValue(kPosixWriteCountMetric); - int64_t posix_sync_count = latest_report->FindMetricValue(kPosixSyncCountMetric); - int64_t posix_list_count = latest_report->FindMetricValue(kPosixListCountMetric); - int64_t posix_info_count = latest_report->FindMetricValue(kPosixInfoCountMetric); - int64_t posix_exists_count = latest_report->FindMetricValue(kPosixExistsCountMetric); - int64_t posix_open_count = latest_report->FindMetricValue(kPosixOpenCountMetric); - int64_t posix_close_count = latest_report->FindMetricValue(kPosixCloseCountMetric); - int64_t posix_delete_count = latest_report->FindMetricValue(kPosixDeleteCountMetric); - int64_t posix_tell_count = latest_report->FindMetricValue(kPosixTellCountMetric); - int64_t posix_seek_count = latest_report->FindMetricValue(kPosixSeekCountMetric); - int64_t posix_other_count = latest_report->FindMetricValue(kPosixOtherCountMetric); +void TabletNodeSysInfo::DumpOtherInfo(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper) { + auto mem_table_size = latest_report->FindMetricValue(kMemTableSize); + auto bloom_filter_size = latest_report->FindMetricValue("tera_filter_block_size"); - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("local_read", posix_read_count); - dumper.DumpData("local_write", posix_write_count); - dumper.DumpData("local_sync", posix_sync_count); - dumper.DumpData("local_list", posix_list_count); - dumper.DumpData("local_info", posix_info_count); - dumper.DumpData("local_exists", posix_exists_count); - dumper.DumpData("local_open", posix_open_count); - dumper.DumpData("local_close", posix_close_count); - dumper.DumpData("local_delete", posix_delete_count); - dumper.DumpData("local_tell", posix_tell_count); - dumper.DumpData("local_seek", posix_seek_count); - dumper.DumpData("local_other", posix_other_count); - } + LOG(INFO) << "[Others] mem_table_size: " << utils::ConvertByteToString(mem_table_size) + << ", bloom_filter_size: " << utils::ConvertByteToString(bloom_filter_size); - LOG(INFO) << "[Local] read " << posix_read_count << " " - << "write " << posix_write_count << " " - << "sync " << posix_sync_count << " " - << "list " << posix_list_count << " " - << "info " << posix_info_count << " " - << "exists " << posix_exists_count << " " - << "open " << posix_open_count << " " - << "close " << posix_close_count << " " - << "delete " << posix_delete_count << " " - << "tell " << posix_tell_count << " " - << "seek " << posix_seek_count << " " - << "other " << posix_other_count; - - if (FLAGS_tera_tabletnode_dump_level_size_info_enabled) { - int64_t level0_size = latest_report->FindMetricValue(kLevelSize, "level:0"); - int64_t level1_size = latest_report->FindMetricValue(kLevelSize, "level:1"); - int64_t level2_size = latest_report->FindMetricValue(kLevelSize, "level:2"); - int64_t level3_size = latest_report->FindMetricValue(kLevelSize, "level:3"); - int64_t level4_size = latest_report->FindMetricValue(kLevelSize, "level:4"); - int64_t level5_size = latest_report->FindMetricValue(kLevelSize, "level:5"); - int64_t level6_size = latest_report->FindMetricValue(kLevelSize, "level:6"); - - if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("level0_size", level0_size); - dumper.DumpData("level1_size", level1_size); - dumper.DumpData("level2_size", level2_size); - dumper.DumpData("level3_size", level3_size); - dumper.DumpData("level4_size", level4_size); - dumper.DumpData("level5_size", level5_size); - dumper.DumpData("level6_size", level6_size); - } - - LOG(INFO) << "[Level Size] L0: " << utils::ConvertByteToString(level0_size) - << " , L1: " << utils::ConvertByteToString(level1_size) - << " , L2: " << utils::ConvertByteToString(level2_size) - << " , L3: " << utils::ConvertByteToString(level3_size) - << " , L4: " << utils::ConvertByteToString(level4_size) - << " , L5: " << utils::ConvertByteToString(level5_size) - << " , L6: " << utils::ConvertByteToString(level6_size); - } + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("mem_table_size", mem_table_size); + dumper.DumpData("bloom_filter_size", bloom_filter_size); + } +} + +void TabletNodeSysInfo::DumpLog() { + decltype(info_) info_ptr; + { + MutexLock lock(&mutex_); + info_ptr = info_; + } + std::shared_ptr latest_report = + CollectorReportPublisher::GetInstance().GetCollectorReport(); + TabletNodeSysInfoDumper dumper(FLAGS_tera_tabletnode_running_info_dump_file); + + std::for_each(std::begin(dump_info_functions_), std::end(dump_info_functions_), + [&latest_report, &dumper, &info_ptr](std::function& f) { + f(info_ptr, latest_report, dumper); + }); +} +void TabletNodeSysInfo::UpdateWriteFlowController() { + MutexLock lock(&mutex_); + TsWriteFlowController::Instance().Append(info_->timestamp() / 1000, info_->write_size()); } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/tabletnode_sysinfo.h b/src/tabletnode/tabletnode_sysinfo.h index e0f12de15..5112e94fa 100644 --- a/src/tabletnode/tabletnode_sysinfo.h +++ b/src/tabletnode/tabletnode_sysinfo.h @@ -8,6 +8,7 @@ #define TERA_TABLETNODE_TABLETNODE_SYSINFO_H_ #include +#include #include #include "common/mutex.h" @@ -17,45 +18,74 @@ namespace tera { namespace tabletnode { +class TabletNodeSysInfoDumper; + class TabletNodeSysInfo { -public: - TabletNodeSysInfo(); - TabletNodeSysInfo(const TabletNodeInfo& info); + public: + TabletNodeSysInfo(); + + ~TabletNodeSysInfo(); + + void CollectTabletNodeInfo(TabletManager* tablet_manager, const std::string& server_addr); + + void CollectHardwareInfo(); + + void AddExtraInfo(const std::string& name, int64_t value); + + void SetTimeStamp(int64_t ts); - ~TabletNodeSysInfo(); + void SetServerAddr(const std::string& addr); - void CollectTabletNodeInfo(TabletManager* tablet_manager, - const std::string& server_addr); + void SetPersistentCacheSize(uint64_t size); - void CollectHardwareInfo(); + void SetStatus(StatusCode status); - void AddExtraInfo(const std::string& name, int64_t value); + void GetTabletNodeInfo(TabletNodeInfo* info); - void Reset(); + void GetTabletMetaList(TabletMetaList* meta_list); - void SetTimeStamp(int64_t ts); + void DumpLog(); - void SetServerAddr(const std::string& addr); + void SetProcessStartTime(int64_t ts); - void SetStatus(StatusCode status); + void RefreshTabletsStatus(TabletManager* tablet_manager); - void GetTabletNodeInfo(TabletNodeInfo* info); + void UpdateWriteFlowController(); - void GetTabletMetaList(TabletMetaList* meta_list); + private: + void SwitchInfo() { + auto new_info = new TabletNodeInfo{*info_}; + info_.reset(new_info); + } - void DumpLog(); + using DumpInfoFunction = void(const std::shared_ptr& info_ptr, + const std::shared_ptr& latest_report, + const TabletNodeSysInfoDumper& dumper); + DumpInfoFunction DumpSysInfo; + DumpInfoFunction DumpHardWareInfo; + DumpInfoFunction DumpIoInfo; + DumpInfoFunction DumpCacheInfo; + DumpInfoFunction DumpRequestInfo; + DumpInfoFunction DumpDfsInfo; + DumpInfoFunction DumpPosixInfo; + DumpInfoFunction DumpLevelSizeInfo; + DumpInfoFunction DumpPersistentCacheInfo; + DumpInfoFunction DumpOtherInfo; - void SetProcessStartTime(int64_t ts); + void RegisterDumpInfoFunction(DumpInfoFunction TabletNodeSysInfo::*f) { + dump_info_functions_.emplace_back( + std::bind(f, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3)); + } - void RefreshTabletsStatus(TabletManager* tablet_manager); + std::vector> dump_info_functions_; -private: - TabletNodeInfo info_; - TabletMetaList tablet_list_; + private: + std::shared_ptr info_; + std::unique_ptr tablet_list_; - mutable Mutex mutex_; + mutable Mutex mutex_; }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_SYSINFO_H_ +#endif // TERA_TABLETNODE_TABLETNODE_SYSINFO_H_ diff --git a/src/tabletnode/tabletnode_zk_adapter.cc b/src/tabletnode/tabletnode_zk_adapter.cc index d3e3d7322..0864615ce 100644 --- a/src/tabletnode/tabletnode_zk_adapter.cc +++ b/src/tabletnode/tabletnode_zk_adapter.cc @@ -24,144 +24,137 @@ namespace tabletnode { TabletNodeZkAdapter::TabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr) - : tabletnode_impl_(tabletnode_impl), server_addr_(server_addr) { -} + : tabletnode_impl_(tabletnode_impl), server_addr_(server_addr) {} -TabletNodeZkAdapter::~TabletNodeZkAdapter() { -} +TabletNodeZkAdapter::~TabletNodeZkAdapter() {} void TabletNodeZkAdapter::Init() { - int zk_errno; - - // init zk client - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_tera_zk_root_path, FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "init zk success"; - - // enter running state - int64_t session_id_int = 0; - if (!GetSessionId(&session_id_int, &zk_errno)) { - LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); - return; - } - char session_id_str[32]; - sprintf(session_id_str, "%016lx", session_id_int); - tabletnode_impl_->SetSessionId(session_id_str); - tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); - - // create my node - while (!Register(tabletnode_impl_->GetSessionId(), &zk_errno)) { - LOG(ERROR) << "fail to create serve-node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "create serve-node success"; - - bool is_exist = false; - - // watch my node - while (!WatchSelfNode(&is_exist, &zk_errno)) { - LOG(ERROR) << "fail to watch serve-node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch serve-node success"; - if (!is_exist) { - OnSelfNodeDeleted(); - } - - // watch kick node - while (!WatchKickMark(&is_exist, &zk_errno)) { - LOG(ERROR) << "fail to watch kick mark : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch kick mark success"; - if (is_exist) { - OnKickMarkCreated(); - } - - // watch safemode node - while (!WatchSafeModeMark(&is_exist, &zk_errno)) { - LOG(ERROR) << "fail to watch safemode mark : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch safemode mark success"; - if (is_exist) { - OnSafeModeMarkCreated(); - } - - // watch root node - std::string root_tablet_addr; - while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { - LOG(ERROR) << "fail to watch root node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch root node success"; - if (!root_tablet_addr.empty()) { - tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); - } -} - -bool TabletNodeZkAdapter::GetRootTableAddr(std::string* root_table_addr) { - return true; + int zk_errno; + + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "init zk success"; + + // enter running state + int64_t session_id_int = 0; + if (!GetSessionId(&session_id_int, &zk_errno)) { + LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + return; + } + char session_id_str[32]; + sprintf(session_id_str, "%016lx", session_id_int); + tabletnode_impl_->SetSessionId(session_id_str); + tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); + + // create my node + while (!Register(tabletnode_impl_->GetSessionId(), &zk_errno)) { + LOG(ERROR) << "fail to create serve-node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "create serve-node success"; + + bool is_exist = false; + + // watch my node + while (!WatchSelfNode(&is_exist, &zk_errno)) { + LOG(ERROR) << "fail to watch serve-node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch serve-node success"; + if (!is_exist) { + OnSelfNodeDeleted(); + } + + // watch kick node + while (!WatchKickMark(&is_exist, &zk_errno)) { + LOG(ERROR) << "fail to watch kick mark : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch kick mark success"; + if (is_exist) { + OnKickMarkCreated(); + } + + // watch safemode node + while (!WatchSafeModeMark(&is_exist, &zk_errno)) { + LOG(ERROR) << "fail to watch safemode mark : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch safemode mark success"; + if (is_exist) { + OnSafeModeMarkCreated(); + } + + // watch root node + std::string root_tablet_addr; + while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { + LOG(ERROR) << "fail to watch root node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch root node success"; + if (!root_tablet_addr.empty()) { + tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); + } } +bool TabletNodeZkAdapter::GetRootTableAddr(std::string* root_table_addr) { return true; } + bool TabletNodeZkAdapter::Register(const std::string& session_id, int* zk_errno) { - // create serve node - std::string node_path = kTsListPath + "/" + session_id + "#"; - std::string node_value = server_addr_; - std::string ret_node_path; - if (!CreateSequentialEphemeralNode(node_path, node_value, &ret_node_path, - zk_errno)) { - LOG(ERROR) << "create serve node fail"; - return false; - } - serve_node_path_ = ret_node_path; - kick_node_path_ = kKickPath + "/" + zk::ZooKeeperUtil::GetNodeName(serve_node_path_.c_str()); - LOG(INFO) << "create serve node success, node_path " << node_path - << ", " << serve_node_path_ << ", " << kick_node_path_; - SetZkAdapterCode(zk::ZE_OK, zk_errno); - return true; + // create serve node + std::string node_path = kTsListPath + "/" + session_id + "#"; + std::string node_value = server_addr_; + std::string ret_node_path; + if (!CreateSequentialEphemeralNode(node_path, node_value, &ret_node_path, zk_errno)) { + LOG(ERROR) << "create serve node fail"; + return false; + } + server_node_path_ = ret_node_path; + kick_node_path_ = kKickPath + "/" + zk::ZooKeeperUtil::GetNodeName(server_node_path_.c_str()); + LOG(INFO) << "create serve node success, node_path " << node_path << ", " << server_node_path_ + << ", " << kick_node_path_; + SetZkAdapterCode(zk::ZE_OK, zk_errno); + return true; } bool TabletNodeZkAdapter::Unregister(int* zk_errno) { - if (!DeleteNode(serve_node_path_, zk_errno)) { - LOG(ERROR) << "delete serve node fail"; - return false; - } - LOG(INFO) << "delete serve node success"; - SetZkAdapterCode(zk::ZE_OK, zk_errno); - return true; + if (!DeleteNode(server_node_path_, zk_errno)) { + LOG(ERROR) << "delete serve node fail"; + return false; + } + LOG(INFO) << "delete serve node success"; + SetZkAdapterCode(zk::ZE_OK, zk_errno); + return true; } bool TabletNodeZkAdapter::WatchMaster(std::string* master, int* zk_errno) { - return ReadAndWatchNode(kMasterNodePath, master, zk_errno); + return ReadAndWatchNode(kMasterNodePath, master, zk_errno); } bool TabletNodeZkAdapter::WatchSafeModeMark(bool* is_exist, int* zk_errno) { - return CheckAndWatchExist(kSafeModeNodePath, is_exist, zk_errno); + return CheckAndWatchExist(kSafeModeNodePath, is_exist, zk_errno); } bool TabletNodeZkAdapter::WatchKickMark(bool* is_exist, int* zk_errno) { - return CheckAndWatchExist(kick_node_path_, is_exist, zk_errno); + return CheckAndWatchExist(kick_node_path_, is_exist, zk_errno); } bool TabletNodeZkAdapter::WatchSelfNode(bool* is_exist, int* zk_errno) { - return CheckAndWatchExist(serve_node_path_, is_exist, zk_errno); + return CheckAndWatchExist(server_node_path_, is_exist, zk_errno); } -bool TabletNodeZkAdapter::WatchRootNode(bool* is_exist, - std::string* root_tablet_addr, +bool TabletNodeZkAdapter::WatchRootNode(bool* is_exist, std::string* root_tablet_addr, int* zk_errno) { - if (!CheckAndWatchExist(kRootTabletNodePath, is_exist, zk_errno)) { - return false; - } - if (!*is_exist) { - return true; - } - return ReadAndWatchNode(kRootTabletNodePath, root_tablet_addr, zk_errno); + if (!CheckAndWatchExist(kRootTabletNodePath, is_exist, zk_errno)) { + return false; + } + if (!*is_exist) { + return true; + } + return ReadAndWatchNode(kRootTabletNodePath, root_tablet_addr, zk_errno); } /* void TabletNodeZkAdapter::OnMasterNodeCreated(const std::string& master) { @@ -181,266 +174,264 @@ void TabletNodeZkAdapter::OnMasterNodeChanged(const std::string& master) { */ void TabletNodeZkAdapter::OnRootNodeCreated() { - LOG(INFO) << "root node is created"; - // watch root node - int zk_errno = zk::ZE_OK; - bool is_exist = false; - std::string root_tablet_addr; - while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { - LOG(ERROR) << "fail to root node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch root node success"; - if (!root_tablet_addr.empty()) { - tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); - } + LOG(INFO) << "root node is created"; + // watch root node + int zk_errno = zk::ZE_OK; + bool is_exist = false; + std::string root_tablet_addr; + while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { + LOG(ERROR) << "fail to root node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch root node success"; + if (!root_tablet_addr.empty()) { + tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); + } } void TabletNodeZkAdapter::OnRootNodeDeleted() { - LOG(INFO) << "root node is deleted"; - // watch root node - int zk_errno = zk::ZE_OK; - bool is_exist = false; - std::string root_tablet_addr; - while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { - LOG(ERROR) << "fail to root node : " << zk::ZkErrnoToString(zk_errno); - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - } - LOG(INFO) << "watch root node success"; - if (!root_tablet_addr.empty()) { - tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); - } + LOG(INFO) << "root node is deleted"; + // watch root node + int zk_errno = zk::ZE_OK; + bool is_exist = false; + std::string root_tablet_addr; + while (!WatchRootNode(&is_exist, &root_tablet_addr, &zk_errno)) { + LOG(ERROR) << "fail to root node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch root node success"; + if (!root_tablet_addr.empty()) { + tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); + } } void TabletNodeZkAdapter::OnRootNodeChanged(const std::string& root_tablet_addr) { - LOG(INFO) << "root node is changed"; - tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); + LOG(INFO) << "root node is changed"; + tabletnode_impl_->SetRootTabletAddr(root_tablet_addr); } void TabletNodeZkAdapter::OnSafeModeMarkCreated() { - LOG(INFO) << "safemode mark node is created"; - tabletnode_impl_->EnterSafeMode(); + LOG(INFO) << "safemode mark node is created"; + tabletnode_impl_->EnterSafeMode(); } void TabletNodeZkAdapter::OnSafeModeMarkDeleted() { - LOG(INFO) << "safemode mark node is deleted"; - tabletnode_impl_->LeaveSafeMode(); + LOG(INFO) << "safemode mark node is deleted"; + tabletnode_impl_->LeaveSafeMode(); } void TabletNodeZkAdapter::OnKickMarkCreated() { - LOG(ERROR) << "kick mark node is created"; - _Exit(EXIT_FAILURE); -// Finalize(); -// tabletnode_impl_->ExitService(); + LOG(ERROR) << "kick mark node is created"; + int zk_errno = zk::ZE_OK; + // try delete kicknode, despite retcode + DeleteNode(kick_node_path_, &zk_errno); + _Exit(EXIT_FAILURE); } void TabletNodeZkAdapter::OnSelfNodeDeleted() { - LOG(ERROR) << "self node is deleted"; - _Exit(EXIT_FAILURE); -// tabletnode_impl_->ExitService(); + LOG(ERROR) << "self node is deleted! try reconnect..."; + Finalize(); + Init(); } void TabletNodeZkAdapter::OnChildrenChanged(const std::string& path, const std::vector& name_list, const std::vector& data_list) { - LOG(ERROR) << "unexpected children event on path : " << path; + LOG(ERROR) << "unexpected children event on path : " << path; } -void TabletNodeZkAdapter::OnNodeValueChanged(const std::string& path, - const std::string& value) { - if (path.compare(kRootTabletNodePath) == 0) { - OnRootNodeChanged(value); - } else { - LOG(ERROR) << "unexpected value event on path : " << path; - } +void TabletNodeZkAdapter::OnNodeValueChanged(const std::string& path, const std::string& value) { + if (path.compare(kRootTabletNodePath) == 0) { + OnRootNodeChanged(value); + } else { + LOG(ERROR) << "unexpected value event on path : " << path; + } } void TabletNodeZkAdapter::OnNodeCreated(const std::string& path) { - if (path.compare(kSafeModeNodePath) == 0) { - OnSafeModeMarkCreated(); - } else if (path.compare(kRootTabletNodePath) == 0) { - OnRootNodeCreated(); - } else if (path.compare(kick_node_path_) == 0) { - OnKickMarkCreated(); - } else { - LOG(ERROR) << "unexcepted node create event on path : " << path; - } + if (path.compare(kSafeModeNodePath) == 0) { + OnSafeModeMarkCreated(); + } else if (path.compare(kRootTabletNodePath) == 0) { + OnRootNodeCreated(); + } else if (path.compare(kick_node_path_) == 0) { + OnKickMarkCreated(); + } else { + LOG(ERROR) << "unexcepted node create event on path : " << path; + } } void TabletNodeZkAdapter::OnNodeDeleted(const std::string& path) { - if (path.compare(kSafeModeNodePath) == 0) { - OnSafeModeMarkDeleted(); - } else if (path.compare(kRootTabletNodePath) == 0) { - OnRootNodeDeleted(); - } else if (path.compare(serve_node_path_) == 0) { - OnSelfNodeDeleted(); - } else { - LOG(ERROR) << "unexcepted node delete event on path : " << path; - } -} - -void TabletNodeZkAdapter::OnWatchFailed(const std::string& path, - int watch_type, int err) { - LOG(ERROR) << "watch " << path << " fail!"; - _Exit(EXIT_FAILURE); + if (path.compare(kSafeModeNodePath) == 0) { + OnSafeModeMarkDeleted(); + } else if (path.compare(kRootTabletNodePath) == 0) { + OnRootNodeDeleted(); + } else if (path.compare(server_node_path_) == 0) { + OnSelfNodeDeleted(); + } else { + LOG(ERROR) << "unexcepted node delete event on path : " << path; + } +} + +void TabletNodeZkAdapter::OnWatchFailed(const std::string& path, int watch_type, int err) { + if (path.compare(server_node_path_) != 0 || path.compare(kick_node_path_) != 0) { + LOG(WARNING) << "unknown/stale node path: " << path; + return; + } + LOG(ERROR) << "watch " << path << " fail!"; + _Exit(EXIT_FAILURE); } void TabletNodeZkAdapter::OnSessionTimeout() { - LOG(ERROR) << "zk session timeout!"; - _Exit(EXIT_FAILURE); + LOG(ERROR) << "zk session timeout! try reconnect..."; + Finalize(); + Init(); } FakeTabletNodeZkAdapter::FakeTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr) : tabletnode_impl_(tabletnode_impl), server_addr_(server_addr) { - fake_path_ = FLAGS_tera_fake_zk_path_prefix + "/"; + fake_path_ = FLAGS_tera_fake_zk_path_prefix + "/"; } void FakeTabletNodeZkAdapter::Init() { - // get session - tabletnode_impl_->SetSessionId(FLAGS_tera_tabletnode_port); - tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); + // get session + tabletnode_impl_->SetSessionId(FLAGS_tera_tabletnode_port); + tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); - if (!Register(tabletnode_impl_->GetSessionId())) { - LOG(ERROR) << "fail to create fake serve-node."; - _Exit(EXIT_FAILURE); - } - LOG(INFO) << "create fake serve-node success: " << tabletnode_impl_->GetSessionId(); + if (!Register(tabletnode_impl_->GetSessionId())) { + LOG(ERROR) << "fail to create fake serve-node."; + _Exit(EXIT_FAILURE); + } + LOG(INFO) << "create fake serve-node success: " << tabletnode_impl_->GetSessionId(); } bool FakeTabletNodeZkAdapter::Register(const std::string& session_id, int* zk_code) { - MutexLock locker(&mutex_); - std::string node_name = fake_path_ + kTsListPath + "/" + session_id; + MutexLock locker(&mutex_); + std::string node_name = fake_path_ + kTsListPath + "/" + session_id; - if (!zk::FakeZkUtil::WriteNode(node_name, server_addr_)) { - LOG(ERROR) << "fake zk error: " << node_name - << ", " << server_addr_; - _Exit(EXIT_FAILURE); - } - return true; + if (!zk::FakeZkUtil::WriteNode(node_name, server_addr_)) { + LOG(ERROR) << "fake zk error: " << node_name << ", " << server_addr_; + _Exit(EXIT_FAILURE); + } + return true; } bool FakeTabletNodeZkAdapter::GetRootTableAddr(std::string* root_table_addr) { - MutexLock locker(&mutex_); - std::string root_table = fake_path_ + kRootTabletNodePath; - if (!zk::FakeZkUtil::ReadNode(root_table, root_table_addr)) { - LOG(ERROR) << "fake zk error: " << root_table - << ", " << *root_table_addr; - _Exit(EXIT_FAILURE); - } - return true; + MutexLock locker(&mutex_); + std::string root_table = fake_path_ + kRootTabletNodePath; + if (!zk::FakeZkUtil::ReadNode(root_table, root_table_addr)) { + LOG(ERROR) << "fake zk error: " << root_table << ", " << *root_table_addr; + _Exit(EXIT_FAILURE); + } + return true; } InsTabletNodeZkAdapter::InsTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr) - : tabletnode_impl_(tabletnode_impl), server_addr_(server_addr), ins_sdk_(NULL) { - -} + : tabletnode_impl_(tabletnode_impl), server_addr_(server_addr), ins_sdk_(NULL) {} -static void InsOnKick(const galaxy::ins::sdk::WatchParam& param, - galaxy::ins::sdk::SDKError error) { - LOG(INFO) << "recv kick event" ; - InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnKickMarkCreated(); +static void InsOnKick(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { + LOG(INFO) << "recv kick event"; + InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnKickMarkCreated(); } static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, - galaxy::ins::sdk::SDKError error) { - LOG(INFO) << "recv lock change event" ; - InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnLockChange(param.value, param.deleted); + galaxy::ins::sdk::SDKError error) { + LOG(INFO) << "recv lock change event"; + InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); } static void InsOnMetaChange(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { - LOG(INFO) << "recv meta change event" ; - InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); - ins_adp->OnMetaChange(param.value, param.deleted); + LOG(INFO) << "recv meta change event"; + InsTabletNodeZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnMetaChange(param.value, param.deleted); } void InsTabletNodeZkAdapter::Init() { - std::string root_path = FLAGS_tera_ins_root_path; - galaxy::ins::sdk::SDKError err; - // create session - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - ins_sdk_->SetTimeoutTime(FLAGS_tera_ins_session_timeout); - - // create node - std::string lock_key = root_path + kTsListPath + "/" + server_addr_; - ins_sdk_->Delete(lock_key, &err); - CHECK(ins_sdk_->Lock(lock_key, &err)) << "register fail"; - - // get session id - // session-id may be changed during Lock(), so we must be call Lock() first, and then get the session-id. - std::string session_id = ins_sdk_->GetSessionID(); - tabletnode_impl_->SetSessionId(session_id); - tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); - LOG(INFO) << "create ts-node success: " << session_id; - - // create watch node - std::string kick_key = root_path + kKickPath + "/" + session_id; - CHECK(ins_sdk_->Watch(kick_key, &InsOnKick, this, &err)) << "watch kick fail"; - CHECK(ins_sdk_->Watch(lock_key, &InsOnLockChange, this, &err)) - << "watch lock fail"; - std::string meta_table = root_path + kRootTabletNodePath; - CHECK(ins_sdk_->Watch(meta_table, &InsOnMetaChange, this, &err)) - << "watch meta table fail"; -} - -InsTabletNodeZkAdapter::~InsTabletNodeZkAdapter() { -} + std::string root_path = FLAGS_tera_ins_root_path; + galaxy::ins::sdk::SDKError err; + // create session + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_ins_session_timeout); + + // create node + std::string lock_key = root_path + kTsListPath + "/" + server_addr_; + ins_sdk_->Delete(lock_key, &err); + CHECK(ins_sdk_->Lock(lock_key, &err)) << "register fail"; + + // get session id + // session-id may be changed during Lock(), so we must be call Lock() first, + // and then get the session-id. + std::string session_id = ins_sdk_->GetSessionID(); + tabletnode_impl_->SetSessionId(session_id); + tabletnode_impl_->SetTabletNodeStatus(TabletNodeImpl::kIsRunning); + LOG(INFO) << "create ts-node success: " << session_id; + + // create watch node + kick_node_path_ = root_path + kKickPath + "/" + session_id; + CHECK(ins_sdk_->Watch(kick_node_path_, &InsOnKick, this, &err)) << "watch kick fail"; + CHECK(ins_sdk_->Watch(lock_key, &InsOnLockChange, this, &err)) << "watch lock fail"; + std::string meta_table = root_path + kRootTabletNodePath; + CHECK(ins_sdk_->Watch(meta_table, &InsOnMetaChange, this, &err)) << "watch meta table fail"; +} + +InsTabletNodeZkAdapter::~InsTabletNodeZkAdapter() {} void InsTabletNodeZkAdapter::Exit() { - std::string root_path = FLAGS_tera_ins_root_path; - galaxy::ins::sdk::SDKError err; - std::string lock_key = root_path + kTsListPath + "/" + server_addr_; - LOG(INFO) << "tabletserver exit, unlock " << lock_key; - ins_sdk_->UnLock(lock_key, &err); - return; + std::string root_path = FLAGS_tera_ins_root_path; + galaxy::ins::sdk::SDKError err; + std::string lock_key = root_path + kTsListPath + "/" + server_addr_; + LOG(INFO) << "tabletserver exit, unlock " << lock_key; + ins_sdk_->UnLock(lock_key, &err); + return; } void InsTabletNodeZkAdapter::OnMetaChange(std::string meta_addr, bool deleted) { - (void) meta_addr; - (void) deleted; - std::string cur_meta; - std::string root_path = FLAGS_tera_ins_root_path; - std::string meta_table = root_path + kRootTabletNodePath; - galaxy::ins::sdk::SDKError err; - GetRootTableAddr(&cur_meta); - CHECK(ins_sdk_->Watch(meta_table, &InsOnMetaChange, this, &err)) - << "watch meta table fail"; - if (!cur_meta.empty()) { - MutexLock locker(&mutex_); - tabletnode_impl_->SetRootTabletAddr(cur_meta); - } + (void)meta_addr; + (void)deleted; + std::string cur_meta; + std::string root_path = FLAGS_tera_ins_root_path; + std::string meta_table = root_path + kRootTabletNodePath; + galaxy::ins::sdk::SDKError err; + GetRootTableAddr(&cur_meta); + CHECK(ins_sdk_->Watch(meta_table, &InsOnMetaChange, this, &err)) << "watch meta table fail"; + if (!cur_meta.empty()) { + MutexLock locker(&mutex_); + tabletnode_impl_->SetRootTabletAddr(cur_meta); + } } void InsTabletNodeZkAdapter::OnKickMarkCreated() { - LOG(ERROR) << "I am kicked by master"; - this->Exit(); - _Exit(EXIT_FAILURE); + LOG(ERROR) << "I am kicked by master"; + // try delete kicknode, despite retcode + galaxy::ins::sdk::SDKError error; + ins_sdk_->Delete(kick_node_path_, &error); + this->Exit(); + _Exit(EXIT_FAILURE); } void InsTabletNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { - LOG(INFO) << "[OnLockChange] session_id = " << session_id - << " deleted = " << deleted - << " now_session_id = " << ins_sdk_->GetSessionID(); - if (deleted || session_id != ins_sdk_->GetSessionID()) { - LOG(ERROR) << "I lost my lock , so quit"; - _Exit(EXIT_FAILURE); - } + LOG(INFO) << "[OnLockChange] session_id = " << session_id << " deleted = " << deleted + << " now_session_id = " << ins_sdk_->GetSessionID(); + if (deleted || session_id != ins_sdk_->GetSessionID()) { + LOG(ERROR) << "I lost my lock , try reconnect..."; + Init(); + } } bool InsTabletNodeZkAdapter::GetRootTableAddr(std::string* root_table_addr) { - MutexLock locker(&mutex_); - std::string root_path = FLAGS_tera_ins_root_path; - std::string meta_table = root_path + kRootTabletNodePath; - galaxy::ins::sdk::SDKError err; - std::string value; - CHECK(ins_sdk_->Get(meta_table, &value, &err)); - *root_table_addr = value; - return true; -} - -} // namespace tabletnode -} // namespace tera + MutexLock locker(&mutex_); + std::string root_path = FLAGS_tera_ins_root_path; + std::string meta_table = root_path + kRootTabletNodePath; + galaxy::ins::sdk::SDKError err; + std::string value; + CHECK(ins_sdk_->Get(meta_table, &value, &err)); + *root_table_addr = value; + return true; +} + +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/tabletnode_zk_adapter.h b/src/tabletnode/tabletnode_zk_adapter.h index f11b73384..00b9905fd 100644 --- a/src/tabletnode/tabletnode_zk_adapter.h +++ b/src/tabletnode/tabletnode_zk_adapter.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ -#define TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ +#ifndef TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ +#define TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ #include #include @@ -11,10 +11,10 @@ #include "tabletnode/tabletnode_impl.h" #include "zk/zk_adapter.h" -namespace galaxy{ -namespace ins{ -namespace sdk{ - class InsSDK; +namespace galaxy { +namespace ins { +namespace sdk { +class InsSDK; } } } @@ -23,141 +23,131 @@ namespace tera { namespace tabletnode { class TabletNodeZkAdapterBase : public zk::ZooKeeperAdapter { -public: - virtual ~TabletNodeZkAdapterBase() {}; - virtual void Init() = 0; - virtual bool GetRootTableAddr(std::string* root_table_addr) = 0; - virtual void Exit() {}; + public: + virtual ~TabletNodeZkAdapterBase(){}; + virtual void Init() = 0; + virtual bool GetRootTableAddr(std::string* root_table_addr) = 0; + virtual void Exit(){}; }; class TabletNodeZkAdapter : public TabletNodeZkAdapterBase { -public: - TabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, - const std::string & server_addr); - virtual ~TabletNodeZkAdapter(); - virtual void Init(); - virtual bool GetRootTableAddr(std::string* root_table_addr); - -private: - virtual bool Register(const std::string& session_id, int* zk_code); - virtual bool Unregister(int* zk_code); - - virtual bool WatchMaster(std::string* master, int* zk_code); - virtual bool WatchSafeModeMark(bool* is_exist, int* zk_code); - virtual bool WatchKickMark(bool* is_exist, int* zk_code); - virtual bool WatchSelfNode(bool* is_exist, int* zk_code); - virtual bool WatchRootNode(bool* is_exist, std::string* root_tablet_addr, int* zk_errno); - - virtual void OnSafeModeMarkCreated(); - virtual void OnSafeModeMarkDeleted(); - virtual void OnKickMarkCreated(); - virtual void OnSelfNodeDeleted(); - virtual void OnRootNodeCreated(); - virtual void OnRootNodeDeleted(); - virtual void OnRootNodeChanged(const std::string& root_tablet_addr); - - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list); - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value); - virtual void OnNodeCreated(const std::string& path); - virtual void OnNodeDeleted(const std::string& path); - virtual void OnWatchFailed(const std::string& path, int watch_type, int err); - virtual void OnSessionTimeout(); - -private: - TabletNodeImpl * tabletnode_impl_; - std::string server_addr_; - std::string serve_node_path_; - std::string kick_node_path_; + public: + TabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr); + virtual ~TabletNodeZkAdapter(); + virtual void Init(); + virtual bool GetRootTableAddr(std::string* root_table_addr); + + private: + virtual bool Register(const std::string& session_id, int* zk_code); + virtual bool Unregister(int* zk_code); + + virtual bool WatchMaster(std::string* master, int* zk_code); + virtual bool WatchSafeModeMark(bool* is_exist, int* zk_code); + virtual bool WatchKickMark(bool* is_exist, int* zk_code); + virtual bool WatchSelfNode(bool* is_exist, int* zk_code); + virtual bool WatchRootNode(bool* is_exist, std::string* root_tablet_addr, int* zk_errno); + + virtual void OnSafeModeMarkCreated(); + virtual void OnSafeModeMarkDeleted(); + virtual void OnKickMarkCreated(); + virtual void OnSelfNodeDeleted(); + virtual void OnRootNodeCreated(); + virtual void OnRootNodeDeleted(); + virtual void OnRootNodeChanged(const std::string& root_tablet_addr); + + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list); + virtual void OnNodeValueChanged(const std::string& path, const std::string& value); + virtual void OnNodeCreated(const std::string& path); + virtual void OnNodeDeleted(const std::string& path); + virtual void OnWatchFailed(const std::string& path, int watch_type, int err); + virtual void OnSessionTimeout(); + + private: + TabletNodeImpl* tabletnode_impl_; + std::string server_addr_; + std::string server_node_path_; + std::string kick_node_path_; }; class MockTabletNodeZkAdapter : public TabletNodeZkAdapter { -public: - MockTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, - const std::string & server_addr) : - TabletNodeZkAdapter(tabletnode_impl, server_addr) {} - virtual ~MockTabletNodeZkAdapter() {} -private: - virtual void OnKickMarkCreated() {} - virtual void OnSelfNodeDeleted() {} - virtual void OnWatchFailed(const std::string& /*path*/, int /*watch_type*/, int /*err*/) {} - virtual void OnSessionTimeout() {} + public: + MockTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr) + : TabletNodeZkAdapter(tabletnode_impl, server_addr) {} + virtual ~MockTabletNodeZkAdapter() {} + + private: + virtual void OnKickMarkCreated() {} + virtual void OnSelfNodeDeleted() {} + virtual void OnWatchFailed(const std::string& /*path*/, int /*watch_type*/, int /*err*/) {} + virtual void OnSessionTimeout() {} }; class FakeTabletNodeZkAdapter : public TabletNodeZkAdapterBase { -public: - FakeTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, - const std::string& server_addr); - virtual ~FakeTabletNodeZkAdapter() {} - virtual void Init(); - virtual bool GetRootTableAddr(std::string* root_table_addr); - -private: - bool Register(const std::string& session_id, int* zk_code = NULL); - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} - virtual void OnSessionTimeout() {} - -private: - mutable Mutex mutex_; - TabletNodeImpl * tabletnode_impl_; - std::string server_addr_; - std::string serve_node_path_; - std::string kick_node_path_; - std::string fake_path_; + public: + FakeTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr); + virtual ~FakeTabletNodeZkAdapter() {} + virtual void Init(); + virtual bool GetRootTableAddr(std::string* root_table_addr); + + private: + bool Register(const std::string& session_id, int* zk_code = NULL); + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} + + private: + mutable Mutex mutex_; + TabletNodeImpl* tabletnode_impl_; + std::string server_addr_; + std::string server_node_path_; + std::string kick_node_path_; + std::string fake_path_; }; - class InsTabletNodeZkAdapter : public TabletNodeZkAdapterBase { -public: - InsTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, - const std::string& server_addr); - virtual ~InsTabletNodeZkAdapter(); - virtual void Init(); - virtual bool GetRootTableAddr(std::string* root_table_addr); - virtual void Exit(); - virtual void OnKickMarkCreated(); - virtual void OnLockChange(std::string session_id, bool deleted); - void OnMetaChange(std::string meta_addr, bool deleted); -private: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} - virtual void OnSessionTimeout() {} - -private: - mutable Mutex mutex_; - TabletNodeImpl * tabletnode_impl_; - std::string server_addr_; - std::string serve_node_path_; - std::string kick_node_path_; - galaxy::ins::sdk::InsSDK* ins_sdk_; + public: + InsTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr); + virtual ~InsTabletNodeZkAdapter(); + virtual void Init(); + virtual bool GetRootTableAddr(std::string* root_table_addr); + virtual void Exit(); + virtual void OnKickMarkCreated(); + virtual void OnLockChange(std::string session_id, bool deleted); + void OnMetaChange(std::string meta_addr, bool deleted); + + private: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} + + private: + mutable Mutex mutex_; + TabletNodeImpl* tabletnode_impl_; + std::string server_addr_; + std::string server_node_path_; + std::string kick_node_path_; + galaxy::ins::sdk::InsSDK* ins_sdk_; }; class MockInsTabletNodeZkAdapter : public InsTabletNodeZkAdapter { -public: - MockInsTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, - const std::string& server_addr) : - InsTabletNodeZkAdapter(tabletnode_impl, server_addr) {} - virtual ~MockInsTabletNodeZkAdapter() {} - virtual void OnKickMarkCreated() {} - virtual void OnLockChange(std::string /*session_id*/, bool /*deleted*/) {} + public: + MockInsTabletNodeZkAdapter(TabletNodeImpl* tabletnode_impl, const std::string& server_addr) + : InsTabletNodeZkAdapter(tabletnode_impl, server_addr) {} + virtual ~MockInsTabletNodeZkAdapter() {} + virtual void OnKickMarkCreated() {} + virtual void OnLockChange(std::string /*session_id*/, bool /*deleted*/) {} }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ +#endif // TERA_TABLETNODE_TABLETNODE_ZK_ADAPTER_H_ diff --git a/src/tabletnode/test/mock_tablet_manager.h b/src/tabletnode/test/mock_tablet_manager.h index c6b8f323f..516e1d700 100644 --- a/src/tabletnode/test/mock_tablet_manager.h +++ b/src/tabletnode/test/mock_tablet_manager.h @@ -13,37 +13,22 @@ namespace tera { namespace tabletnode { class MockTabletManager : public TabletManager { -public: - MOCK_METHOD6(AddTablet, - bool(const std::string& table_name, - const std::string& table_path, - const std::string& key_start, - const std::string& key_end, - io::TabletIO** tablet_io, - StatusCode* status)); - MOCK_METHOD4(RemoveTablet, - bool(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status)); - MOCK_METHOD4(GetTablet, - io::TabletIO*(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status)); - MOCK_METHOD3(GetTablet, - io::TabletIO*(const std::string& table_name, - const std::string& key, - StatusCode* status)); - MOCK_METHOD1(GetAllTabletMeta, - void(std::vector* tablet_meta_list)); - MOCK_METHOD1(GetAllTablets, - void(std::vector* taletio_list)); - MOCK_METHOD2(RemoveAllTablets, - bool(bool force, StatusCode* status)); + public: + MOCK_METHOD6(AddTablet, bool(const std::string& table_name, const std::string& table_path, + const std::string& key_start, const std::string& key_end, + io::TabletIO** tablet_io, StatusCode* status)); + MOCK_METHOD4(RemoveTablet, bool(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status)); + MOCK_METHOD4(GetTablet, io::TabletIO*(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status)); + MOCK_METHOD3(GetTablet, io::TabletIO*(const std::string& table_name, const std::string& key, + StatusCode* status)); + MOCK_METHOD1(GetAllTabletMeta, void(std::vector* tablet_meta_list)); + MOCK_METHOD1(GetAllTablets, void(std::vector* taletio_list)); + MOCK_METHOD2(RemoveAllTablets, bool(bool force, StatusCode* status)); }; } // namespace tabletnode } // namespace tera -#endif // TERA_TABLETNODE_MOCK_TABLET_MANAGER_H_ +#endif // TERA_TABLETNODE_MOCK_TABLET_MANAGER_H_ diff --git a/src/tabletnode/test/mock_zk_adapter.h b/src/tabletnode/test/mock_zk_adapter.h index 701581c89..d42c7ff7d 100644 --- a/src/tabletnode/test/mock_zk_adapter.h +++ b/src/tabletnode/test/mock_zk_adapter.h @@ -11,27 +11,18 @@ namespace tera { namespace tabletnode { class MockDummyNodeZkAdapter : public zk::DummyNodeZkAdapter { -public: - MOCK_METHOD3(OnChildrenChanged, - void(const std::string& path, - const std::vector& name_list, - const std::vector& data_list)); - MOCK_METHOD2(OnNodeValueChanged, - void(const std::string& path, - const std::string& value)); - MOCK_METHOD1(OnNodeCreated, - void(const std::string& path)); - MOCK_METHOD1(OnNodeDeleted, - void(const std::string& path)); - MOCK_METHOD3(OnWatchFailed, - void(const std::string& path, - int watch_type, - int err)); - MOCK_METHOD1(OnSessionTimeout, - void()); + public: + MOCK_METHOD3(OnChildrenChanged, + void(const std::string& path, const std::vector& name_list, + const std::vector& data_list)); + MOCK_METHOD2(OnNodeValueChanged, void(const std::string& path, const std::string& value)); + MOCK_METHOD1(OnNodeCreated, void(const std::string& path)); + MOCK_METHOD1(OnNodeDeleted, void(const std::string& path)); + MOCK_METHOD3(OnWatchFailed, void(const std::string& path, int watch_type, int err)); + MOCK_METHOD1(OnSessionTimeout, void()); }; -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera -#endif // TERA_TABLETNODE_MOCK_ZK_ADAPTER_H_ +#endif // TERA_TABLETNODE_MOCK_ZK_ADAPTER_H_ diff --git a/src/tabletnode/test/readtablet_test.cc b/src/tabletnode/test/readtablet_test.cc new file mode 100644 index 000000000..b8f9ebed2 --- /dev/null +++ b/src/tabletnode/test/readtablet_test.cc @@ -0,0 +1,224 @@ +// Copyright (c) 2018, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include "gmock/gmock.h" + +#include "io/mock_tablet_io.h" +#include "mock_tablet_manager.h" +#include "tabletnode/tabletnode_impl.h" +#include "common/thread_pool.h" + +using ::testing::Invoke; +using ::testing::Return; +using ::testing::_; + +namespace tera { +namespace tabletnode { +class ReadTabletTest : public ::testing::Test { + public: + ReadTabletTest() + : mock_tablet_manager_(std::make_shared()), thread_pool_(new ThreadPool) {} + + private: + std::shared_ptr mock_tablet_manager_; + io::MockTabletIO mock_io_; + ThreadPool* thread_pool_; + ReadTabletResponse res_; + ReadTabletRequest req_; + AutoResetEvent done_event_; + std::atomic done_time{0}; + + public: + io::TabletIO* GetTablet(const std::string& table_name, const std::string& key, + StatusCode* status) { + *status = StatusCode::kTabletNodeOk; + mock_io_.AddRef(); + return &mock_io_; + } + + bool ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, uint64_t snapshot_id, + StatusCode* status, int64_t timeout_ms) { + return true; + } + + bool ReadCellsInc(const RowReaderInfo& row_reader, RowResult* value_list, uint64_t snapshot_id, + StatusCode* status, int64_t timeout_ms) { + auto kv = value_list->add_key_values(); + kv->set_key(row_reader.key()); + kv->set_value(std::to_string(std::stoi(row_reader.key()) + 1)); + return true; + } + + bool ReadCellsWithNull(const RowReaderInfo& row_reader, RowResult* value_list, + uint64_t snapshot_id, StatusCode* status, int64_t timeout_ms) { + int key = std::stoi(row_reader.key()); + if (key % 3 == 0) { + *status = kKeyNotExist; + return false; + } else { + auto kv = value_list->add_key_values(); + kv->set_key(row_reader.key()); + kv->set_value(row_reader.key()); + return true; + } + } + + bool ReadCellsTimeout(const RowReaderInfo& row_reader, RowResult* value_list, + uint64_t snapshot_id, StatusCode* status, int64_t timeout_ms) { + SetStatusCode(kRPCTimeout, status); + } + + void BaseDone() { + EXPECT_EQ(res_.sequence_id(), req_.sequence_id()); + EXPECT_EQ(res_.status(), kTabletNodeOk); + EXPECT_EQ(res_.detail().status_size(), req_.row_info_list_size()); + EXPECT_EQ(done_time.fetch_add(1), 0); + done_event_.Set(); + } + + void CheckIncDone() { + EXPECT_EQ(res_.sequence_id(), req_.sequence_id()); + EXPECT_EQ(res_.status(), kTabletNodeOk); + EXPECT_EQ(res_.detail().status_size(), req_.row_info_list_size()); + EXPECT_EQ(done_time.fetch_add(1), 0); + EXPECT_GT(res_.detail().row_result_size(), 0); + int base_key = std::stoi(res_.detail().row_result(0).key_values(0).key()); + int base_value = std::stoi(res_.detail().row_result(0).key_values(0).value()); + EXPECT_EQ(base_key + 1, base_value); + for (int i = 1; i < res_.detail().row_result_size(); ++i) { + int key = std::stoi(res_.detail().row_result(i).key_values(0).key()); + int value = std::stoi(res_.detail().row_result(i).key_values(0).value()); + EXPECT_EQ(key, base_key + 1); + EXPECT_EQ(value, base_value + 1); + base_key = key; + base_value = value; + } + done_event_.Set(); + } + + void CheckNullDone() { + EXPECT_EQ(res_.sequence_id(), req_.sequence_id()); + EXPECT_EQ(res_.status(), kTabletNodeOk); + EXPECT_EQ(res_.detail().status_size(), req_.row_info_list_size()); + EXPECT_EQ(done_time.fetch_add(1), 0); + EXPECT_GT(res_.detail().row_result_size(), 0); + int row_result_index = 0; + for (int i = 0; i < res_.detail().status_size(); ++i) { + if (i % 3 == 0) { + EXPECT_EQ(res_.detail().status(i), kKeyNotExist); + } else { + EXPECT_EQ(res_.detail().status(i), kTabletNodeOk); + EXPECT_EQ(res_.detail().row_result(row_result_index).key_values(0).key(), + res_.detail().row_result(row_result_index).key_values(0).value()); + ++row_result_index; + } + } + done_event_.Set(); + } + + void CheckTimeoutDone() { + EXPECT_EQ(res_.sequence_id(), req_.sequence_id()); + EXPECT_EQ(res_.status(), kRPCTimeout); + EXPECT_EQ(res_.detail().status_size(), req_.row_info_list_size()); + EXPECT_EQ(done_time.fetch_add(1), 0); + EXPECT_EQ(res_.detail().row_result_size(), 0); + done_event_.Set(); + } + + google::protobuf::Closure* CreateDone(void (ReadTabletTest::*p)()) { + return google::protobuf::NewCallback(this, p); + } +}; + +TEST_F(ReadTabletTest, ParallelReadTablet) { + EXPECT_CALL(*mock_tablet_manager_, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::GetTablet)); + EXPECT_CALL(mock_io_, ReadCells(_, _, _, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::ReadCellsInc)); + + for (int i = 0; i != 280; ++i) { + auto row_info = req_.add_row_info_list(); + row_info->set_key(std::to_string(i)); + } + req_.set_sequence_id(2); + req_.set_tablet_name("read_table"); + + std::shared_ptr task = + std::make_shared(0, mock_tablet_manager_, &req_, &res_, + CreateDone(&ReadTabletTest::CheckIncDone), thread_pool_); + task->StartRead(); + done_event_.Wait(); + EXPECT_EQ(task->row_results_list_.size(), 10); +} + +TEST_F(ReadTabletTest, ParallelReadTabletWithNullValue) { + EXPECT_CALL(*mock_tablet_manager_, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::GetTablet)); + EXPECT_CALL(mock_io_, ReadCells(_, _, _, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::ReadCellsWithNull)); + + for (int i = 0; i != 180; ++i) { + auto row_info = req_.add_row_info_list(); + row_info->set_key(std::to_string(i)); + } + req_.set_sequence_id(2); + req_.set_tablet_name("read_table"); + + std::shared_ptr task = + std::make_shared(0, mock_tablet_manager_, &req_, &res_, + CreateDone(&ReadTabletTest::CheckNullDone), thread_pool_); + task->StartRead(); + done_event_.Wait(); + EXPECT_EQ(task->row_results_list_.size(), 6); +} + +TEST_F(ReadTabletTest, ReadTabletInOneThread) { + EXPECT_CALL(*mock_tablet_manager_, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::GetTablet)); + EXPECT_CALL(mock_io_, ReadCells(_, _, _, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::ReadCells)); + + req_.set_sequence_id(1); + req_.set_tablet_name("read_table"); + RowReaderInfo* row_info = req_.add_row_info_list(); + row_info->set_key("key"); + + std::shared_ptr task = std::make_shared( + 0, mock_tablet_manager_, &req_, &res_, CreateDone(&ReadTabletTest::BaseDone), thread_pool_); + task->StartRead(); + done_event_.Wait(); + EXPECT_EQ(task->row_results_list_.size(), 1); +} + +TEST_F(ReadTabletTest, ParallelReadTimeout) { + EXPECT_CALL(*mock_tablet_manager_, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::GetTablet)); + EXPECT_CALL(mock_io_, ReadCells(_, _, _, _, _)) + .WillRepeatedly(Invoke(this, &ReadTabletTest::ReadCellsTimeout)); + + req_.set_sequence_id(1); + req_.set_tablet_name("read_table"); + for (int i = 0; i != 360; ++i) { + auto row_info = req_.add_row_info_list(); + row_info->set_key(std::to_string(i)); + } + + std::shared_ptr task = + std::make_shared(0, mock_tablet_manager_, &req_, &res_, + CreateDone(&ReadTabletTest::CheckTimeoutDone), thread_pool_); + task->StartRead(); + done_event_.Wait(); + EXPECT_EQ(task->row_results_list_.size(), 10); +} + +} // namespace tabletnode +} // namespace tera + +int main(int argc, char** argv) { + ::google::InitGoogleLogging(argv[0]); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/tabletnode/test/remote_tabletnode_test.cc b/src/tabletnode/test/remote_tabletnode_test.cc new file mode 100644 index 000000000..365f4f57b --- /dev/null +++ b/src/tabletnode/test/remote_tabletnode_test.cc @@ -0,0 +1,227 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "tabletnode/remote_tabletnode.h" +#include "tabletnode/tabletnode_impl.h" + +DECLARE_int32(tera_tabletnode_ctrl_thread_num); +DECLARE_string(tera_leveldb_env_type); +DECLARE_string(tera_tabletnode_path_prefix); + +namespace tera { +namespace tabletnode { +namespace test { + +class MockClosure : public google::protobuf::Closure { + public: + MockClosure() {} + virtual ~MockClosure() {} + void Run() {} +}; + +class RemoteTabletNodeTest : public ::testing::Test { + public: + RemoteTabletNodeTest() { + FLAGS_tera_leveldb_env_type = "local"; + FLAGS_tera_tabletnode_path_prefix = "./remote_tabletnode_test"; + tabletnode_impl_.reset(new TabletNodeImpl); + FLAGS_tera_tabletnode_ctrl_thread_num = 1; + remote_ts_.reset(new RemoteTabletNode(tabletnode_impl_.get())); + } + + virtual ~RemoteTabletNodeTest() {} + virtual void SetUp() { + remote_ts_->ctrl_thread_pool_->Stop(true); + remote_ts_->lightweight_ctrl_thread_pool_->Stop(true); + } + virtual void TearDown() {} + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + private: + std::unique_ptr tabletnode_impl_; + std::unique_ptr remote_ts_; +}; + +int main(int argc, char* argv[]) { + ::google::InitGoogleLogging(argv[0]); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +TEST_F(RemoteTabletNodeTest, LoadTablet) { + std::unique_ptr request(new LoadTabletRequest); + request->set_path("test/tablet00000001"); + std::unique_ptr response(new LoadTabletResponse); + std::unique_ptr done(new MockClosure); + std::unique_ptr controller(new sofa::pbrpc::RpcController); + remote_ts_->LoadTablet(controller.get(), request.get(), response.get(), done.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 1); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitLoad); + + std::unique_ptr request4(new LoadTabletRequest); + request4->set_path("test/tablet00000001"); + std::unique_ptr response4(new LoadTabletResponse); + std::unique_ptr done4(new MockClosure); + std::unique_ptr controller4(new sofa::pbrpc::RpcController); + remote_ts_->LoadTablet(controller4.get(), request4.get(), response4.get(), done4.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 1); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request4->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitLoad); + EXPECT_EQ(remote_ts_->lightweight_ctrl_thread_pool_->queue_.size(), 1); + + std::unique_ptr request2(new LoadTabletRequest); + request2->set_path("test/tablet00000002"); + std::unique_ptr response2(new LoadTabletResponse); + std::unique_ptr done2(new MockClosure); + std::unique_ptr controller2(new sofa::pbrpc::RpcController); + remote_ts_->LoadTablet(controller2.get(), request2.get(), response2.get(), done2.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request2->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitLoad); + + std::unique_ptr request3(new LoadTabletRequest); + request3->set_path("test/tablet00000003"); + std::unique_ptr response3(new LoadTabletResponse); + std::unique_ptr done3(new MockClosure); + std::unique_ptr controller3(new sofa::pbrpc::RpcController); + remote_ts_->LoadTablet(controller3.get(), request3.get(), response3.get(), done3.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_TRUE(remote_ts_->tablets_ctrl_status_.find(request3->path()) == + remote_ts_->tablets_ctrl_status_.end()); + EXPECT_EQ(response3->status(), kTabletNodeIsBusy); + + remote_ts_->ctrl_thread_pool_->Start(); + remote_ts_->ctrl_thread_pool_->Stop(true); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 0); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 0); +} + +TEST_F(RemoteTabletNodeTest, UnloadTablet) { + std::unique_ptr request(new UnloadTabletRequest); + std::unique_ptr response(new UnloadTabletResponse); + std::unique_ptr done(new MockClosure); + std::unique_ptr controller(new sofa::pbrpc::RpcController); + request->set_path("test/tablet00000001"); + remote_ts_->UnloadTablet(controller.get(), request.get(), response.get(), done.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 1); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitUnload); + + std::unique_ptr request_2(new UnloadTabletRequest); + std::unique_ptr response_2(new UnloadTabletResponse); + std::unique_ptr done_2(new MockClosure); + std::unique_ptr controller_2(new sofa::pbrpc::RpcController); + request_2->set_path("test/tablet00000001"); + remote_ts_->UnloadTablet(controller_2.get(), request_2.get(), response_2.get(), done_2.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 1); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request_2->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitUnload); + EXPECT_EQ(remote_ts_->lightweight_ctrl_thread_pool_->queue_.size(), 1); + + std::unique_ptr request_3(new UnloadTabletRequest); + std::unique_ptr response_3(new UnloadTabletResponse); + std::unique_ptr done_3(new MockClosure); + std::unique_ptr controller_3(new sofa::pbrpc::RpcController); + request_3->set_path("test/tablet00000002"); + remote_ts_->UnloadTablet(controller_3.get(), request_3.get(), response_3.get(), done_3.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[request_3->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitUnload); + + std::unique_ptr request_4(new UnloadTabletRequest); + std::unique_ptr response_4(new UnloadTabletResponse); + std::unique_ptr done_4(new MockClosure); + std::unique_ptr controller_4(new sofa::pbrpc::RpcController); + request_4->set_path("test/tablet00000003"); + remote_ts_->UnloadTablet(controller_4.get(), request_4.get(), response_4.get(), done_4.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_TRUE(remote_ts_->tablets_ctrl_status_.find(request_4->path()) == + remote_ts_->tablets_ctrl_status_.end()); + EXPECT_EQ(response_4->status(), kTabletNodeIsBusy); + + remote_ts_->ctrl_thread_pool_->Start(); + remote_ts_->ctrl_thread_pool_->Stop(true); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 0); +} + +TEST_F(RemoteTabletNodeTest, LoadAndUnloadTablet) { + std::unique_ptr load_req(new LoadTabletRequest); + std::unique_ptr load_resp(new LoadTabletResponse); + std::unique_ptr load_done(new MockClosure); + std::unique_ptr load_ctrl(new sofa::pbrpc::RpcController); + load_req->set_path("test/tablet00000001"); + remote_ts_->LoadTablet(load_ctrl.get(), load_req.get(), load_resp.get(), load_done.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 1); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 1); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[load_req->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitLoad); + + std::unique_ptr unload_req(new UnloadTabletRequest); + std::unique_ptr unload_resp(new UnloadTabletResponse); + std::unique_ptr unload_done(new MockClosure); + std::unique_ptr unload_ctrl(new sofa::pbrpc::RpcController); + unload_req->set_path("test/tablet00000002"); + remote_ts_->UnloadTablet(unload_ctrl.get(), unload_req.get(), unload_resp.get(), + unload_done.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_[unload_req->path()], + RemoteTabletNode::TabletCtrlStatus::kCtrlWaitUnload); + + std::unique_ptr load_req_2(new LoadTabletRequest); + std::unique_ptr load_resp_2(new LoadTabletResponse); + std::unique_ptr load_done_2(new MockClosure); + std::unique_ptr load_ctrl_2(new sofa::pbrpc::RpcController); + load_req_2->set_path("test/tablet00000002"); + remote_ts_->LoadTablet(load_ctrl_2.get(), load_req_2.get(), load_resp_2.get(), load_done_2.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + EXPECT_EQ(remote_ts_->lightweight_ctrl_thread_pool_->queue_.size(), 1); + + std::unique_ptr unload_req_2(new UnloadTabletRequest); + std::unique_ptr unload_resp_2(new UnloadTabletResponse); + std::unique_ptr unload_done_2(new MockClosure); + std::unique_ptr unload_ctrl_2(new sofa::pbrpc::RpcController); + unload_req_2->set_path("test/tablet00000001"); + remote_ts_->UnloadTablet(unload_ctrl_2.get(), unload_req_2.get(), unload_resp_2.get(), + unload_done_2.get()); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->queue_.size(), 2); + EXPECT_EQ(remote_ts_->ctrl_thread_pool_->PendingNum(), 2); + EXPECT_EQ(remote_ts_->tablets_ctrl_status_.size(), 2); + + EXPECT_EQ(remote_ts_->lightweight_ctrl_thread_pool_->queue_.size(), 2); + remote_ts_->lightweight_ctrl_thread_pool_->Start(); + remote_ts_->lightweight_ctrl_thread_pool_->Stop(true); + EXPECT_EQ(remote_ts_->lightweight_ctrl_thread_pool_->queue_.size(), 0); + EXPECT_EQ(load_resp_2->status(), kTabletWaitUnload); + EXPECT_EQ(unload_resp_2->status(), kTabletWaitLoad); +} +} +} +} diff --git a/src/tabletnode/test/tablet_manager_test.cc b/src/tabletnode/test/tablet_manager_test.cc index d5d5ec906..ad466d54f 100644 --- a/src/tabletnode/test/tablet_manager_test.cc +++ b/src/tabletnode/test/tablet_manager_test.cc @@ -16,320 +16,293 @@ namespace tera { namespace tabletnode { class TabletManagerTest : public ::testing::Test { -public: - TabletManagerTest() {} - ~TabletManagerTest() {} + public: + TabletManagerTest() {} + ~TabletManagerTest() {} -protected: - TabletManager m_tablet_manager; + protected: + TabletManager m_tablet_manager; }; TEST_F(TabletManagerTest, TabletRange_General) { - TabletRange tr1("t1", "start1", "end1"); - TabletRange tr2("t2", "start2", "end2"); - TabletRange tr3("t1", "start2", "end2"); - TabletRange tr4("t1", "start2", "end2"); - - EXPECT_TRUE(tr1 < tr2); - EXPECT_TRUE(tr1 < tr3); - EXPECT_TRUE(tr3 == tr4); + TabletRange tr1("t1", "start1", "end1"); + TabletRange tr2("t2", "start2", "end2"); + TabletRange tr3("t1", "start2", "end2"); + TabletRange tr4("t1", "start2", "end2"); + + EXPECT_TRUE(tr1 < tr2); + EXPECT_TRUE(tr1 < tr3); + EXPECT_TRUE(tr3 == tr4); } TEST_F(TabletManagerTest, AddTabletSuccess) { - std::string table_name = "add_tablet"; - std::string table_path = "add_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "add_tablet"; + std::string table_path = "add_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, AddTabletFailureForExist) { - std::string table_name = "add_tablet"; - std::string table_path = "add_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io1 = NULL; - io::TabletIO* tablet_io2 = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io1, &err_code)); - EXPECT_TRUE(tablet_io1 != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - EXPECT_FALSE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io2, &err_code)); - EXPECT_TRUE(tablet_io2 != NULL); - EXPECT_TRUE(tablet_io2 == tablet_io1); - EXPECT_EQ(err_code, kTableExist); - - tablet_io1->DecRef(); - tablet_io2->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "add_tablet"; + std::string table_path = "add_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io1 = NULL; + io::TabletIO* tablet_io2 = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io1, + &err_code)); + EXPECT_TRUE(tablet_io1 != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + EXPECT_FALSE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io2, + &err_code)); + EXPECT_TRUE(tablet_io2 != NULL); + EXPECT_TRUE(tablet_io2 == tablet_io1); + EXPECT_EQ(err_code, kTableExist); + + tablet_io1->DecRef(); + tablet_io2->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, RemoveTabletFailureNotExist) { - std::string table_name = "add_tablet"; - std::string table_path = "add_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - EXPECT_FALSE(m_tablet_manager.RemoveTablet( - "not_exist_table", start_key, end_key)); - EXPECT_FALSE(m_tablet_manager.RemoveTablet( - table_name, "incorrect_start_key", end_key)); - EXPECT_FALSE(m_tablet_manager.RemoveTablet( - table_name, start_key, "incorrect_end_key")); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "add_tablet"; + std::string table_path = "add_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + EXPECT_FALSE(m_tablet_manager.RemoveTablet("not_exist_table", start_key, end_key)); + EXPECT_FALSE(m_tablet_manager.RemoveTablet(table_name, "incorrect_start_key", end_key)); + EXPECT_FALSE(m_tablet_manager.RemoveTablet(table_name, start_key, "incorrect_end_key")); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, GetTabletSuccess) { - std::string table_name = "get_tablet"; - std::string table_path = "get_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - io::TabletIO* get_tablet_io = m_tablet_manager.GetTablet( - table_name, start_key, end_key, &err_code); - EXPECT_TRUE(get_tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - EXPECT_EQ(get_tablet_io, tablet_io); - - tablet_io->DecRef(); - get_tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "get_tablet"; + std::string table_path = "get_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + io::TabletIO* get_tablet_io = + m_tablet_manager.GetTablet(table_name, start_key, end_key, &err_code); + EXPECT_TRUE(get_tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + EXPECT_EQ(get_tablet_io, tablet_io); + + tablet_io->DecRef(); + get_tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, GetTabletSuccessForNoEndKey) { - std::string table_name = "get_tablet"; - std::string table_path = "get_tablet"; - std::string start_key = "start_key"; - std::string end_key = ""; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - io::TabletIO* get_tablet_io = m_tablet_manager.GetTablet( - table_name, start_key, &err_code); - EXPECT_TRUE(get_tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - EXPECT_EQ(get_tablet_io, tablet_io); - - tablet_io->DecRef(); - get_tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "get_tablet"; + std::string table_path = "get_tablet"; + std::string start_key = "start_key"; + std::string end_key = ""; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + io::TabletIO* get_tablet_io = m_tablet_manager.GetTablet(table_name, start_key, &err_code); + EXPECT_TRUE(get_tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + EXPECT_EQ(get_tablet_io, tablet_io); + + tablet_io->DecRef(); + get_tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, GetTabletFailure) { - std::string table_name = "get_tablet"; - std::string table_path = "get_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - io::TabletIO* get_tablet_io = NULL; - get_tablet_io = m_tablet_manager.GetTablet( - "not_exist_tablet", start_key, end_key, &err_code); - EXPECT_TRUE(get_tablet_io == NULL); - EXPECT_EQ(err_code, kKeyNotInRange); - - get_tablet_io = m_tablet_manager.GetTablet( - table_name, "incorrect_start_key", end_key, &err_code); - EXPECT_TRUE(get_tablet_io == NULL); - EXPECT_EQ(err_code, kKeyNotInRange); - - get_tablet_io = m_tablet_manager.GetTablet( - table_name, start_key, "incorrect_end_key", &err_code); - EXPECT_TRUE(get_tablet_io == NULL); - EXPECT_EQ(err_code, kKeyNotInRange); - - get_tablet_io = m_tablet_manager.GetTablet( - "not_exist_tablet", start_key, &err_code); - EXPECT_TRUE(get_tablet_io == NULL); - EXPECT_EQ(err_code, kKeyNotInRange); - - get_tablet_io = m_tablet_manager.GetTablet( - table_name, "incorrect_start_key", &err_code); - EXPECT_TRUE(get_tablet_io == NULL); - EXPECT_EQ(err_code, kKeyNotInRange); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::string table_name = "get_tablet"; + std::string table_path = "get_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + io::TabletIO* get_tablet_io = NULL; + get_tablet_io = m_tablet_manager.GetTablet("not_exist_tablet", start_key, end_key, &err_code); + EXPECT_TRUE(get_tablet_io == NULL); + EXPECT_EQ(err_code, kKeyNotInRange); + + get_tablet_io = m_tablet_manager.GetTablet(table_name, "incorrect_start_key", end_key, &err_code); + EXPECT_TRUE(get_tablet_io == NULL); + EXPECT_EQ(err_code, kKeyNotInRange); + + get_tablet_io = m_tablet_manager.GetTablet(table_name, start_key, "incorrect_end_key", &err_code); + EXPECT_TRUE(get_tablet_io == NULL); + EXPECT_EQ(err_code, kKeyNotInRange); + + get_tablet_io = m_tablet_manager.GetTablet("not_exist_tablet", start_key, &err_code); + EXPECT_TRUE(get_tablet_io == NULL); + EXPECT_EQ(err_code, kKeyNotInRange); + + get_tablet_io = m_tablet_manager.GetTablet(table_name, "incorrect_start_key", &err_code); + EXPECT_TRUE(get_tablet_io == NULL); + EXPECT_EQ(err_code, kKeyNotInRange); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, GetAllTabletSuccess) { - std::vector meta_list; - m_tablet_manager.GetAllTabletMeta(&meta_list); - EXPECT_TRUE(meta_list.size() == 0U); - - std::string table_name = "get_all_tablet"; - std::string table_path = "get_all_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), std::map()); - m_tablet_manager.GetAllTabletMeta(&meta_list); - EXPECT_TRUE(meta_list.size() == 1U); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::vector meta_list; + m_tablet_manager.GetAllTabletMeta(&meta_list); + EXPECT_TRUE(meta_list.size() == 0U); + + std::string table_name = "get_all_tablet"; + std::string table_path = "get_all_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), + std::map()); + m_tablet_manager.GetAllTabletMeta(&meta_list); + EXPECT_TRUE(meta_list.size() == 1U); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, GetAllTabletIOSuccess) { - std::vector io_list; - m_tablet_manager.GetAllTablets(&io_list); - EXPECT_TRUE(io_list.size() == 0U); - - std::string table_name = "get_all_tablet"; - std::string table_path = "get_all_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), std::map()); - m_tablet_manager.GetAllTablets(&io_list); - EXPECT_TRUE(io_list.size() == 1U); - - tablet_io->DecRef(); - io_list[0]->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + std::vector io_list; + m_tablet_manager.GetAllTablets(&io_list); + EXPECT_TRUE(io_list.size() == 0U); + + std::string table_name = "get_all_tablet"; + std::string table_path = "get_all_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), + std::map()); + m_tablet_manager.GetAllTablets(&io_list); + EXPECT_TRUE(io_list.size() == 1U); + + tablet_io->DecRef(); + io_list[0]->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } TEST_F(TabletManagerTest, RemoveAllTabletSuccess) { - EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); - - std::string table_name = "remove_all_tablet"; - std::string table_path = "remove_all_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), std::map()); - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); + EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); + + std::string table_name = "remove_all_tablet"; + std::string table_path = "remove_all_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + tablet_io->Load(TableSchema(), start_key, end_key, table_path, std::vector(), + std::map()); + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); } TEST_F(TabletManagerTest, ForceRemoveAllTabletSuccess) { - EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); - - std::string table_name = "remove_all_tablet"; - std::string table_path = "remove_all_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveAllTablets(true)); + EXPECT_TRUE(m_tablet_manager.RemoveAllTablets()); + + std::string table_name = "remove_all_tablet"; + std::string table_path = "remove_all_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveAllTablets(true)); } TEST_F(TabletManagerTest, Size) { - EXPECT_EQ(m_tablet_manager.Size(), 0U); - - std::string table_name = "get_all_tablet"; - std::string table_path = "get_all_tablet"; - std::string start_key = "start_key"; - std::string end_key = "end_key"; - io::TabletIO* tablet_io = NULL; - StatusCode err_code = kTabletNodeOk; - - EXPECT_TRUE(m_tablet_manager.AddTablet( - table_name, table_path, start_key, end_key, - &tablet_io, &err_code)); - EXPECT_TRUE(tablet_io != NULL); - EXPECT_EQ(err_code, kTabletNodeOk); - EXPECT_EQ(m_tablet_manager.Size(), 1U); - - tablet_io->DecRef(); - EXPECT_TRUE(m_tablet_manager.RemoveTablet( - table_name, start_key, end_key)); + EXPECT_EQ(m_tablet_manager.Size(), 0U); + + std::string table_name = "get_all_tablet"; + std::string table_path = "get_all_tablet"; + std::string start_key = "start_key"; + std::string end_key = "end_key"; + io::TabletIO* tablet_io = NULL; + StatusCode err_code = kTabletNodeOk; + + EXPECT_TRUE(m_tablet_manager.AddTablet(table_name, table_path, start_key, end_key, &tablet_io, + &err_code)); + EXPECT_TRUE(tablet_io != NULL); + EXPECT_EQ(err_code, kTabletNodeOk); + EXPECT_EQ(m_tablet_manager.Size(), 1U); + + tablet_io->DecRef(); + EXPECT_TRUE(m_tablet_manager.RemoveTablet(table_name, start_key, end_key)); } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera int main(int argc, char** argv) { - FLAGS_tera_leveldb_env_type = "local"; - ::google::InitGoogleLogging(argv[0]); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + FLAGS_tera_leveldb_env_type = "local"; + ::google::InitGoogleLogging(argv[0]); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/tabletnode/test/tabletnode_entry_test.cc b/src/tabletnode/test/tabletnode_entry_test.cc index a56f33ade..60fa92d44 100644 --- a/src/tabletnode/test/tabletnode_entry_test.cc +++ b/src/tabletnode/test/tabletnode_entry_test.cc @@ -16,78 +16,82 @@ namespace tera { namespace tabletnode { class TabletNodeEntryTest : public ::testing::Test { -public: - static void *CpuBusyCallback(void*) { - while (1); - return NULL; + public: + static void* CpuBusyCallback(void*) { + while (1) + ; + return NULL; + } + + void MakeCpuBusy() { + int thread_num = 50; + pthread_t thread[thread_num]; + + for (int i = 0; i < thread_num; ++i) { + if (pthread_create(&thread[i], NULL, &CpuBusyCallback, NULL) != 0) { + LOG(ERROR) << "fail to create thread"; + return; + } } - - void MakeCpuBusy() { - int thread_num = 50; - pthread_t thread[thread_num]; - - for (int i = 0; i < thread_num; ++i) { - if (pthread_create(&thread[i], NULL, &CpuBusyCallback, NULL) != 0) { - LOG(ERROR) << "fail to create thread"; - return; - } - } - } - - int GetCoreNum() { - int core_num = 0; - FILE* pf; - pf = popen("more /proc/cpuinfo | " - "grep \"physical id\" | " - "uniq | wc -l", "r"); - fscanf(pf, "%d", &core_num); - return core_num; - } - - void GetCpuInfo(std::vector* cpu_info) { - int core_num; - core_num = GetCoreNum(); - char s[100]; - - FILE* pf; - pf = popen("mpstat -A 1 2 | " - "grep \"Average\" |" - "awk '{print $3}'", "r"); - // skip useless 2 line from start - fscanf(pf, "%s", s); - fscanf(pf, "%s", s); - for (int i = 0; i < core_num; ++i) { - double info; - fscanf(pf, "%s", s); - info = strtod(s, NULL); - (*cpu_info).push_back(info); - } + } + + int GetCoreNum() { + int core_num = 0; + FILE* pf; + pf = popen( + "more /proc/cpuinfo | " + "grep \"physical id\" | " + "uniq | wc -l", + "r"); + fscanf(pf, "%d", &core_num); + return core_num; + } + + void GetCpuInfo(std::vector* cpu_info) { + int core_num; + core_num = GetCoreNum(); + char s[100]; + + FILE* pf; + pf = popen( + "mpstat -A 1 2 | " + "grep \"Average\" |" + "awk '{print $3}'", + "r"); + // skip useless 2 line from start + fscanf(pf, "%s", s); + fscanf(pf, "%s", s); + for (int i = 0; i < core_num; ++i) { + double info; + fscanf(pf, "%s", s); + info = strtod(s, NULL); + (*cpu_info).push_back(info); } + } - void TestBody() {} + void TestBody() {} - TabletNodeEntry ts_entry; + TabletNodeEntry ts_entry; }; TEST_F(TabletNodeEntryTest, ProcessorAffinity) { - TabletNodeEntryTest test_instance; - - FLAGS_tera_tabletnode_cpu_affinity_set = "4,5"; - FLAGS_tera_tabletnode_cpu_affinity_enabled = true; - test_instance.ts_entry.SetProcessorAffinity(); - test_instance.MakeCpuBusy(); - - std::vector cpu_info; - test_instance.GetCpuInfo(&cpu_info); - - for (int i = 0; i < cpu_info.size(); ++i) { - if (i == 4 || i == 5) { - EXPECT_GT(cpu_info[i], 50.0) << "core num: " << i; - } else { - EXPECT_LT(cpu_info[i], 50.0) << "core num: " << i; - } + TabletNodeEntryTest test_instance; + + FLAGS_tera_tabletnode_cpu_affinity_set = "4,5"; + FLAGS_tera_tabletnode_cpu_affinity_enabled = true; + test_instance.ts_entry.SetProcessorAffinity(); + test_instance.MakeCpuBusy(); + + std::vector cpu_info; + test_instance.GetCpuInfo(&cpu_info); + + for (int i = 0; i < cpu_info.size(); ++i) { + if (i == 4 || i == 5) { + EXPECT_GT(cpu_info[i], 50.0) << "core num: " << i; + } else { + EXPECT_LT(cpu_info[i], 50.0) << "core num: " << i; } + } } -} // namespace tabletnode -} // namespace tera - +} // namespace tabletnode +} // namespace tera diff --git a/src/tabletnode/test/tabletnode_impl_test.cc b/src/tabletnode/test/tabletnode_impl_test.cc index efc1d61b7..b987cb442 100644 --- a/src/tabletnode/test/tabletnode_impl_test.cc +++ b/src/tabletnode/test/tabletnode_impl_test.cc @@ -28,489 +28,458 @@ namespace tera { namespace tabletnode { class TabletNodeImplTest : public ::testing::Test { -public: - TabletNodeImplTest() - : m_tablet_manager(new MockTabletManager()), - m_tabletnode_impl(m_tabletnode_info, &m_master_client, - m_tablet_manager), - m_ret_status(kTabletNodeOk), m_ret_tm_add(false), - m_ret_tm_remove(false), m_ret_io_load(false), m_ret_io_unload(false), - m_ret_io_compact(false), m_ret_io_readcell(false), - m_ret_io_write(false), m_ret_io_scan(false), m_ret_io_scanrow(false), - m_ret_io_split(false), - m_start_key("start_key"), m_end_key("end_key"), - m_schema(DefaultTableSchema()) { - FLAGS_tera_coord_type = "fake_zk"; - - m_tablet_meta.set_table_name("name"); - m_tablet_meta.set_path("path"); - CreateKeyRange("", "", m_tablet_meta.mutable_key_range()); - } - ~TabletNodeImplTest() {} - - void Done() {} - - void CreateCallback() { - m_done = google::protobuf::NewCallback(this, &TabletNodeImplTest::Done); - } - - void CreateKeyRange(const std::string& start_key, - const std::string& end_key, - KeyRange* key_range) { - key_range->set_key_start(""); - key_range->set_key_end(""); - } - - // mock tablet manager - - bool AddTablet(const std::string& table_name, const std::string& table_path, - const std::string& key_start, const std::string& key_end, - io::TabletIO** tablet_io, StatusCode* status) { - m_tablet_io.AddRef(); - *tablet_io = &m_tablet_io; - *status = m_ret_status; - return m_ret_tm_add; - } - bool RemoveTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status) { - return m_ret_tm_remove; - } - io::TabletIO* GetTablet(const std::string& table_name, - const std::string& key_start, - const std::string& key_end, - StatusCode* status) { - m_tablet_io.AddRef(); - return &m_tablet_io; - } - io::TabletIO* GetTablet2(const std::string& table_name, - const std::string& key, - StatusCode* status) { - m_tablet_io.AddRef(); - return &m_tablet_io; - } - void GetAllTabletMeta(std::vector* tablet_meta_list) { - TabletMeta* meta = new TabletMeta(m_tablet_meta); - tablet_meta_list->push_back(meta); - } - - // mock tablet io - - bool IO_Load(const TableSchema& schema, - const std::string& key_start, const std::string& key_end, - const std::string& path, - const std::vector& parent_tablets, - std::map snapshots, - leveldb::Logger* logger, - leveldb::Cache* block_cache, - leveldb::TableCache* table_cache, - StatusCode* status) { - return m_ret_io_load; - } - bool IO_Unload(StatusCode* status) { - return m_ret_io_unload; - } - bool IO_Compact(StatusCode* status) { - return m_ret_io_compact; - } - CompactStatus IO_GetCompactStatus() { - return kTableCompacted; - } - int64_t IO_GetDataSize(StatusCode* status) { - return 0; - } - bool IO_Read(const KeyList& key_list, BytesList* value_list, - uint32_t* success_num, uint64_t snapshot_id, + public: + TabletNodeImplTest() + : m_tablet_manager(new MockTabletManager()), + m_tabletnode_impl(m_tabletnode_info, &m_master_client, m_tablet_manager), + m_ret_status(kTabletNodeOk), + m_ret_tm_add(false), + m_ret_tm_remove(false), + m_ret_io_load(false), + m_ret_io_unload(false), + m_ret_io_compact(false), + m_ret_io_readcell(false), + m_ret_io_write(false), + m_ret_io_scan(false), + m_ret_io_scanrow(false), + m_ret_io_split(false), + m_start_key("start_key"), + m_end_key("end_key"), + m_schema(DefaultTableSchema()) { + FLAGS_tera_coord_type = "fake_zk"; + + m_tablet_meta.set_table_name("name"); + m_tablet_meta.set_path("path"); + CreateKeyRange("", "", m_tablet_meta.mutable_key_range()); + } + ~TabletNodeImplTest() {} + + void Done() {} + + void CreateCallback() { m_done = google::protobuf::NewCallback(this, &TabletNodeImplTest::Done); } + + void CreateKeyRange(const std::string& start_key, const std::string& end_key, + KeyRange* key_range) { + key_range->set_key_start(""); + key_range->set_key_end(""); + } + + // mock tablet manager + + bool AddTablet(const std::string& table_name, const std::string& table_path, + const std::string& key_start, const std::string& key_end, io::TabletIO** tablet_io, StatusCode* status) { - return true; - } - bool IO_ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, - StatusCode* status) { - return m_ret_io_readcell; - } - bool IO_Write(const WriteTabletRequest* request, - WriteTabletResponse* response, - google::protobuf::Closure* done, - const std::vector* index_list, - Counter* done_counter, WriteRpcTimer* timer = NULL, - StatusCode* status = NULL) { - return m_ret_io_write; - } - bool IO_Scan(const ScanOption& option, KeyValueList* kv_list, - bool* complete, StatusCode* status) { - return m_ret_io_scan; - } - bool IO_ScanRows(const ScanTabletRequest* request, - ScanTabletResponse* response, - google::protobuf::Closure* done) { - response->set_status(kTabletNodeOk); - return m_ret_io_scanrow; - } - bool IO_Split(std::string* split_key, StatusCode* status) { - return m_ret_io_split; - } - -protected: - TabletNodeInfo m_tabletnode_info; - master::MockMasterClient m_master_client; - MockTabletManager* m_tablet_manager; - google::protobuf::Closure* m_done; - TabletNodeImpl m_tabletnode_impl; - - io::MockTabletIO m_tablet_io; - StatusCode m_ret_status; - bool m_ret_tm_add; - bool m_ret_tm_remove; - bool m_ret_io_load; - bool m_ret_io_unload; - bool m_ret_io_compact; - bool m_ret_io_readcell; - bool m_ret_io_write; - bool m_ret_io_scan; - bool m_ret_io_scanrow; - bool m_ret_io_split; - - std::string m_table_name; - std::string m_start_key; - std::string m_end_key; - TableSchema m_schema; - TabletMeta m_tablet_meta; + m_tablet_io.AddRef(); + *tablet_io = &m_tablet_io; + *status = m_ret_status; + return m_ret_tm_add; + } + bool RemoveTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status) { + return m_ret_tm_remove; + } + io::TabletIO* GetTablet(const std::string& table_name, const std::string& key_start, + const std::string& key_end, StatusCode* status) { + m_tablet_io.AddRef(); + return &m_tablet_io; + } + io::TabletIO* GetTablet2(const std::string& table_name, const std::string& key, + StatusCode* status) { + m_tablet_io.AddRef(); + return &m_tablet_io; + } + void GetAllTabletMeta(std::vector* tablet_meta_list) { + TabletMeta* meta = new TabletMeta(m_tablet_meta); + tablet_meta_list->push_back(meta); + } + + // mock tablet io + + bool IO_Load(const TableSchema& schema, const std::string& key_start, const std::string& key_end, + const std::string& path, const std::vector& parent_tablets, + std::map snapshots, leveldb::Logger* logger, + leveldb::Cache* block_cache, leveldb::TableCache* table_cache, StatusCode* status) { + return m_ret_io_load; + } + bool IO_Unload(StatusCode* status) { return m_ret_io_unload; } + bool IO_Compact(StatusCode* status) { return m_ret_io_compact; } + CompactStatus IO_GetCompactStatus() { return kTableCompacted; } + int64_t IO_GetDataSize(StatusCode* status) { return 0; } + bool IO_Read(const KeyList& key_list, BytesList* value_list, uint32_t* success_num, + uint64_t snapshot_id, StatusCode* status) { + return true; + } + bool IO_ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, StatusCode* status) { + return m_ret_io_readcell; + } + bool IO_Write(const WriteTabletRequest* request, WriteTabletResponse* response, + google::protobuf::Closure* done, const std::vector* index_list, + Counter* done_counter, WriteRpcTimer* timer = NULL, StatusCode* status = NULL) { + return m_ret_io_write; + } + bool IO_Scan(const ScanOption& option, KeyValueList* kv_list, bool* complete, + StatusCode* status) { + return m_ret_io_scan; + } + bool IO_ScanRows(const ScanTabletRequest* request, ScanTabletResponse* response, + google::protobuf::Closure* done) { + response->set_status(kTabletNodeOk); + return m_ret_io_scanrow; + } + bool IO_Split(std::string* split_key, StatusCode* status) { return m_ret_io_split; } + + protected: + TabletNodeInfo m_tabletnode_info; + master::MockMasterClient m_master_client; + MockTabletManager* m_tablet_manager; + google::protobuf::Closure* m_done; + TabletNodeImpl m_tabletnode_impl; + + io::MockTabletIO m_tablet_io; + StatusCode m_ret_status; + bool m_ret_tm_add; + bool m_ret_tm_remove; + bool m_ret_io_load; + bool m_ret_io_unload; + bool m_ret_io_compact; + bool m_ret_io_readcell; + bool m_ret_io_write; + bool m_ret_io_scan; + bool m_ret_io_scanrow; + bool m_ret_io_split; + + std::string m_table_name; + std::string m_start_key; + std::string m_end_key; + TableSchema m_schema; + TabletMeta m_tablet_meta; }; TEST_F(TabletNodeImplTest, Init_Exit) { - EXPECT_TRUE(m_tabletnode_impl.Init()); - EXPECT_TRUE(m_tabletnode_impl.Exit()); + EXPECT_TRUE(m_tabletnode_impl.Init()); + EXPECT_TRUE(m_tabletnode_impl.Exit()); } TEST_F(TabletNodeImplTest, LoadTabletSuccess) { - EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); - EXPECT_CALL(m_tablet_io, Load(_, _, _, _, _, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Load)); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_sequence_id(1); - request.set_session_id("1"); - request.mutable_schema()->CopyFrom(m_schema); - - m_tabletnode_impl.SetSessionId("1"); - m_ret_tm_add = true; - m_ret_io_load = true; - CreateCallback(); - m_tabletnode_impl.LoadTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk) - << ": " << StatusCodeToString(response.status()) - << " vs. " << StatusCodeToString(kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); + EXPECT_CALL(m_tablet_io, Load(_, _, _, _, _, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Load)); + + LoadTabletRequest request; + LoadTabletResponse response; + request.set_sequence_id(1); + request.set_session_id("1"); + request.mutable_schema()->CopyFrom(m_schema); + + m_tabletnode_impl.SetSessionId("1"); + m_ret_tm_add = true; + m_ret_io_load = true; + CreateCallback(); + m_tabletnode_impl.LoadTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk) << ": " << StatusCodeToString(response.status()) + << " vs. " << StatusCodeToString(kTabletNodeOk); } TEST_F(TabletNodeImplTest, LoadTabletFailureForSessionId) { - EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_sequence_id(1); - request.set_session_id("1"); - request.mutable_schema()->CopyFrom(m_schema); - - m_tabletnode_impl.SetSessionId("2"); - m_ret_tm_add = true; - CreateCallback(); - m_tabletnode_impl.LoadTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kIllegalAccess); + EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); + + LoadTabletRequest request; + LoadTabletResponse response; + request.set_sequence_id(1); + request.set_session_id("1"); + request.mutable_schema()->CopyFrom(m_schema); + + m_tabletnode_impl.SetSessionId("2"); + m_ret_tm_add = true; + CreateCallback(); + m_tabletnode_impl.LoadTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kIllegalAccess); } TEST_F(TabletNodeImplTest, LoadTabletFailureForInvalidSchema) { - EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_sequence_id(1); - request.set_session_id("1"); - - m_tabletnode_impl.SetSessionId("1"); - m_ret_tm_add = true; - CreateCallback(); - m_tabletnode_impl.LoadTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kIllegalAccess); + EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); + + LoadTabletRequest request; + LoadTabletResponse response; + request.set_sequence_id(1); + request.set_session_id("1"); + + m_tabletnode_impl.SetSessionId("1"); + m_ret_tm_add = true; + CreateCallback(); + m_tabletnode_impl.LoadTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kIllegalAccess); } TEST_F(TabletNodeImplTest, LoadTabletFailureForAddTablet) { - EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_sequence_id(1); - request.set_session_id("1"); - request.mutable_schema()->CopyFrom(m_schema); - - m_tabletnode_impl.SetSessionId("1"); - m_ret_tm_add = false; - CreateCallback(); - m_tabletnode_impl.LoadTablet(&request, &response, m_done); - EXPECT_NE(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); + + LoadTabletRequest request; + LoadTabletResponse response; + request.set_sequence_id(1); + request.set_session_id("1"); + request.mutable_schema()->CopyFrom(m_schema); + + m_tabletnode_impl.SetSessionId("1"); + m_ret_tm_add = false; + CreateCallback(); + m_tabletnode_impl.LoadTablet(&request, &response, m_done); + EXPECT_NE(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, LoadTabletFailureForIOLoad) { - EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); - EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); - EXPECT_CALL(m_tablet_io, Load(_, _, _, _, _, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Load)); - - LoadTabletRequest request; - LoadTabletResponse response; - request.set_sequence_id(1); - request.set_session_id("1"); - request.mutable_schema()->CopyFrom(m_schema); - - m_tabletnode_impl.SetSessionId("1"); - m_ret_tm_add = true; - m_ret_tm_remove = true; - m_ret_io_load = false; - CreateCallback(); - m_tabletnode_impl.LoadTablet(&request, &response, m_done); - EXPECT_NE(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, AddTablet(_, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::AddTablet)); + EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); + EXPECT_CALL(m_tablet_io, Load(_, _, _, _, _, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Load)); + + LoadTabletRequest request; + LoadTabletResponse response; + request.set_sequence_id(1); + request.set_session_id("1"); + request.mutable_schema()->CopyFrom(m_schema); + + m_tabletnode_impl.SetSessionId("1"); + m_ret_tm_add = true; + m_ret_tm_remove = true; + m_ret_io_load = false; + CreateCallback(); + m_tabletnode_impl.LoadTablet(&request, &response, m_done); + EXPECT_NE(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, UnloadTabletSuccess) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); - EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); - EXPECT_CALL(m_tablet_io, Unload(_)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); - - UnloadTabletRequest request; - UnloadTabletResponse response; - request.set_sequence_id(2); - request.set_tablet_name("unload_table"); - KeyRange* key_range = request.mutable_key_range(); - key_range->set_key_start(""); - key_range->set_key_end(""); - - m_ret_tm_remove = true; - m_ret_io_unload = true; - CreateCallback(); - m_tabletnode_impl.UnloadTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); + EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); + EXPECT_CALL(m_tablet_io, Unload(_)).WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); + + UnloadTabletRequest request; + UnloadTabletResponse response; + request.set_sequence_id(2); + request.set_tablet_name("unload_table"); + KeyRange* key_range = request.mutable_key_range(); + key_range->set_key_start(""); + key_range->set_key_end(""); + + m_ret_tm_remove = true; + m_ret_io_unload = true; + CreateCallback(); + m_tabletnode_impl.UnloadTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, UnloadTabletFailureForIOError) { - FLAGS_tera_tabletnode_retry_period = 0; - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); - EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); - EXPECT_CALL(m_tablet_io, Unload(_)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); - - UnloadTabletRequest request; - UnloadTabletResponse response; - request.set_sequence_id(2); - request.set_tablet_name("unload_table"); - KeyRange* key_range = request.mutable_key_range(); - key_range->set_key_start(""); - key_range->set_key_end(""); - - m_ret_tm_remove = true; - m_ret_io_unload = false; - CreateCallback(); - m_tabletnode_impl.UnloadTablet(&request, &response, m_done); - EXPECT_NE(response.status(), kTabletNodeOk); + FLAGS_tera_tabletnode_retry_period = 0; + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); + EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); + EXPECT_CALL(m_tablet_io, Unload(_)).WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); + + UnloadTabletRequest request; + UnloadTabletResponse response; + request.set_sequence_id(2); + request.set_tablet_name("unload_table"); + KeyRange* key_range = request.mutable_key_range(); + key_range->set_key_start(""); + key_range->set_key_end(""); + + m_ret_tm_remove = true; + m_ret_io_unload = false; + CreateCallback(); + m_tabletnode_impl.UnloadTablet(&request, &response, m_done); + EXPECT_NE(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, UnloadTabletButRemoveFailure) { - FLAGS_tera_tabletnode_retry_period = 0; - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); - EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); - EXPECT_CALL(m_tablet_io, Unload(_)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); - - UnloadTabletRequest request; - UnloadTabletResponse response; - request.set_sequence_id(2); - request.set_tablet_name("unload_table"); - CreateKeyRange("", "", request.mutable_key_range()); - - m_ret_tm_remove = false; - m_ret_io_unload = true; - CreateCallback(); - m_tabletnode_impl.UnloadTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + FLAGS_tera_tabletnode_retry_period = 0; + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); + EXPECT_CALL(*m_tablet_manager, RemoveTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::RemoveTablet)); + EXPECT_CALL(m_tablet_io, Unload(_)).WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Unload)); + + UnloadTabletRequest request; + UnloadTabletResponse response; + request.set_sequence_id(2); + request.set_tablet_name("unload_table"); + CreateKeyRange("", "", request.mutable_key_range()); + + m_ret_tm_remove = false; + m_ret_io_unload = true; + CreateCallback(); + m_tabletnode_impl.UnloadTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, CompactTabletSuccess) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); - EXPECT_CALL(m_tablet_io, Compact(_)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Compact)); - EXPECT_CALL(m_tablet_io, GetCompactStatus()) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_GetCompactStatus)); - EXPECT_CALL(m_tablet_io, GetDataSize(_)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_GetDataSize)); - - CompactTabletRequest request; - CompactTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("compact_table"); - CreateKeyRange("", "", request.mutable_key_range()); - - m_ret_io_compact = true; - CreateCallback(); - m_tabletnode_impl.CompactTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); + EXPECT_CALL(m_tablet_io, Compact(_)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Compact)); + EXPECT_CALL(m_tablet_io, GetCompactStatus()) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_GetCompactStatus)); + EXPECT_CALL(m_tablet_io, GetDataSize(_)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_GetDataSize)); + + CompactTabletRequest request; + CompactTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("compact_table"); + CreateKeyRange("", "", request.mutable_key_range()); + + m_ret_io_compact = true; + CreateCallback(); + m_tabletnode_impl.CompactTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, ReadTabletSuccessOfKeyList) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - EXPECT_CALL(m_tablet_io, Read(_, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Read)); - - ReadTabletRequest request; - ReadTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("read_table"); - RowReaderInfo* row = request.add_row_info_list(); - row->set_key("key"); - CreateCallback(); - m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(m_tablet_io, Read(_, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Read)); + + ReadTabletRequest request; + ReadTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("read_table"); + RowReaderInfo* row = request.add_row_info_list(); + row->set_key("key"); + CreateCallback(); + m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, ReadTabletSuccessOfRowList) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - EXPECT_CALL(m_tablet_io, ReadCells(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_ReadCells)); - - ReadTabletRequest request; - ReadTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("read_table"); - RowReaderInfo* row_info = request.add_row_info_list(); - row_info->set_key("key"); - - - m_ret_io_readcell = true; - CreateCallback(); - m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(m_tablet_io, ReadCells(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_ReadCells)); + + ReadTabletRequest request; + ReadTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("read_table"); + RowReaderInfo* row_info = request.add_row_info_list(); + row_info->set_key("key"); + + m_ret_io_readcell = true; + CreateCallback(); + m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, ReadTabletSuccessOfNullData) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - ReadTabletRequest request; - ReadTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("read_table"); + ReadTabletRequest request; + ReadTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("read_table"); - CreateCallback(); - m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + CreateCallback(); + m_tabletnode_impl.ReadTablet(1111, &request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, WriteTabletSuccessOfKeyValue) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - EXPECT_CALL(m_tablet_io, Write(_, _, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Write)); - - WriteTabletRequest request; - WriteTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("write_table"); - RowMutationSequence* mu_seq = request.add_row_list(); - mu_seq->set_row_key("key"); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value("value"); - - m_ret_io_write = true; - CreateCallback(); - m_tabletnode_impl.WriteTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(m_tablet_io, Write(_, _, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Write)); + + WriteTabletRequest request; + WriteTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("write_table"); + RowMutationSequence* mu_seq = request.add_row_list(); + mu_seq->set_row_key("key"); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value("value"); + + m_ret_io_write = true; + CreateCallback(); + m_tabletnode_impl.WriteTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, WriteTabletSuccessOfTable) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - EXPECT_CALL(m_tablet_io, Write(_, _, _, _, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Write)); - - WriteTabletRequest request; - WriteTabletResponse response; - request.set_sequence_id(1); - request.set_tablet_name("write_table"); - RowMutationSequence* row_list = request.add_row_list(); - row_list->set_row_key("row_key"); - Mutation* mutation = row_list->add_mutation_sequence(); - mutation->set_type(kDeleteRow); - mutation->set_ts_start(1111); - mutation->set_ts_end(2222); - - m_ret_io_write = true; - CreateCallback(); - m_tabletnode_impl.WriteTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(m_tablet_io, Write(_, _, _, _, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Write)); + + WriteTabletRequest request; + WriteTabletResponse response; + request.set_sequence_id(1); + request.set_tablet_name("write_table"); + RowMutationSequence* row_list = request.add_row_list(); + row_list->set_row_key("row_key"); + Mutation* mutation = row_list->add_mutation_sequence(); + mutation->set_type(kDeleteRow); + mutation->set_ts_start(1111); + mutation->set_ts_end(2222); + + m_ret_io_write = true; + CreateCallback(); + m_tabletnode_impl.WriteTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, QuerySuccess) { - QueryRequest request; - QueryResponse response; - request.set_sequence_id(1); + QueryRequest request; + QueryResponse response; + request.set_sequence_id(1); - CreateCallback(); - m_tabletnode_impl.Query(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + CreateCallback(); + m_tabletnode_impl.Query(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } TEST_F(TabletNodeImplTest, ScanTabletSuccessOfTable) { - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); - EXPECT_CALL(m_tablet_io, ScanRows(_, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_ScanRows)); - - ScanTabletRequest request; - ScanTabletResponse response; - request.set_sequence_id(1); - request.set_table_name("scan_table"); - - m_ret_io_scanrow = true; - CreateCallback(); - m_tabletnode_impl.ScanTablet(&request, &response, m_done); - EXPECT_EQ(response.status(), kTabletNodeOk); + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet2)); + EXPECT_CALL(m_tablet_io, ScanRows(_, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_ScanRows)); + + ScanTabletRequest request; + ScanTabletResponse response; + request.set_sequence_id(1); + request.set_table_name("scan_table"); + + m_ret_io_scanrow = true; + CreateCallback(); + m_tabletnode_impl.ScanTablet(&request, &response, m_done); + EXPECT_EQ(response.status(), kTabletNodeOk); } - TEST_F(TabletNodeImplTest, SplitTabletSuccess) { - FLAGS_tera_tabletnode_retry_period = 0; - EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); - EXPECT_CALL(m_tablet_io, Split(_, _)) - .WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Split)); + FLAGS_tera_tabletnode_retry_period = 0; + EXPECT_CALL(*m_tablet_manager, GetTablet(_, _, _, _)) + .WillRepeatedly(Invoke(this, &TabletNodeImplTest::GetTablet)); + EXPECT_CALL(m_tablet_io, Split(_, _)).WillRepeatedly(Invoke(this, &TabletNodeImplTest::IO_Split)); - // not finished yet + // not finished yet } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera int main(int argc, char** argv) { - ::google::InitGoogleLogging(argv[0]); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::testing::InitGoogleTest(&argc, argv); - FLAGS_tera_leveldb_env_type = "local"; - return RUN_ALL_TESTS(); + ::google::InitGoogleLogging(argv[0]); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + FLAGS_tera_leveldb_env_type = "local"; + return RUN_ALL_TESTS(); } - diff --git a/src/tabletnode/test/tabletnode_sysinfo_test.cc b/src/tabletnode/test/tabletnode_sysinfo_test.cc index e15c83a7c..6aa7e75ac 100644 --- a/src/tabletnode/test/tabletnode_sysinfo_test.cc +++ b/src/tabletnode/test/tabletnode_sysinfo_test.cc @@ -12,21 +12,21 @@ namespace tera { namespace tabletnode { class TabletNodeSysInfoTest : public ::testing::Test, public TabletNodeSysInfo { -public: - TabletNodeSysInfoTest() {} - ~TabletNodeSysInfoTest() {} + public: + TabletNodeSysInfoTest() {} + ~TabletNodeSysInfoTest() {} }; TEST_F(TabletNodeSysInfoTest, CollectHardwareInfo) { - int64_t ts = get_micros(); - CollectHardwareInfo(); - ts = get_micros() - ts; - LOG(ERROR) << "cost: " << ts << " ms."; + int64_t ts = get_micros(); + CollectHardwareInfo(); + ts = get_micros() - ts; + LOG(ERROR) << "cost: " << ts << " ms."; } TEST_F(TabletNodeSysInfoTest, ToString) { - SetCurrentTime(); - AddExtraInfo("read", 100); + SetCurrentTime(); + AddExtraInfo("read", 100); } -} // namespace tabletnode -} // namespace tera +} // namespace tabletnode +} // namespace tera diff --git a/src/tera_c.cc b/src/tera_c.cc index 7b4480bb1..c40bddb90 100644 --- a/src/tera_c.cc +++ b/src/tera_c.cc @@ -26,36 +26,47 @@ using tera::Table; extern "C" { -struct tera_client_t { Client* rep; }; -struct tera_result_stream_t { ResultStream* rep; }; -struct tera_row_mutation_t { RowMutation* rep; }; -struct tera_row_reader_t { RowReader* rep; }; -struct tera_scan_descriptor_t { ScanDescriptor* rep; }; -struct tera_table_t { Table* rep; }; +struct tera_client_t { + Client* rep; +}; +struct tera_result_stream_t { + ResultStream* rep; +}; +struct tera_row_mutation_t { + RowMutation* rep; +}; +struct tera_row_reader_t { + RowReader* rep; +}; +struct tera_scan_descriptor_t { + ScanDescriptor* rep; +}; +struct tera_table_t { + Table* rep; +}; static bool SaveError(char** errptr, const ErrorCode& s) { - if (s.GetType() == ErrorCode::kOK) { - return false; - } - if (errptr == NULL) { - fprintf(stderr, "%s tera error: %s.\n", - tera::get_curtime_str().c_str(), s.GetReason().c_str()); - return true; - } - - if (*errptr == NULL) { - *errptr = strdup(s.GetReason().c_str()); - } else { - free(*errptr); - *errptr = strdup(s.GetReason().c_str()); - } + if (s.GetType() == ErrorCode::kOK) { + return false; + } + if (errptr == NULL) { + fprintf(stderr, "%s tera error: %s.\n", tera::get_curtime_str().c_str(), s.GetReason().c_str()); return true; + } + + if (*errptr == NULL) { + *errptr = strdup(s.GetReason().c_str()); + } else { + free(*errptr); + *errptr = strdup(s.GetReason().c_str()); + } + return true; } static char* CopyString(const std::string& str) { - char* result = reinterpret_cast(malloc(sizeof(char) * str.size())); - memcpy(result, str.data(), sizeof(char) * str.size()); - return result; + char* result = reinterpret_cast(malloc(sizeof(char) * str.size())); + memcpy(result, str.data(), sizeof(char) * str.size()); + return result; } // > @@ -69,504 +80,465 @@ static reader_callback_map_t g_reader_callback_map; static Mutex g_reader_mutex; tera_client_t* tera_client_open(const char* conf_path, const char* log_prefix, char** errptr) { - ErrorCode err; - tera_client_t* result = new tera_client_t; - result->rep = Client::NewClient(conf_path, log_prefix, &err); - if (SaveError(errptr, err) || !result->rep) { - delete result; - return NULL; - } - return result; + ErrorCode err; + tera_client_t* result = new tera_client_t; + result->rep = Client::NewClient(conf_path, log_prefix, &err); + if (SaveError(errptr, err) || !result->rep) { + delete result; + return NULL; + } + return result; } void tera_client_close(tera_client_t* client) { - delete client->rep; - delete client; + delete client->rep; + delete client; } tera_table_t* tera_table_open(tera_client_t* client, const char* table_name, char** errptr) { - ErrorCode err; - tera_table_t* result = new tera_table_t; - result->rep = client->rep->OpenTable(table_name, &err); - if (SaveError(errptr, err) || !result->rep) { - delete result; - return NULL; - } - return result; + ErrorCode err; + tera_table_t* result = new tera_table_t; + result->rep = client->rep->OpenTable(table_name, &err); + if (SaveError(errptr, err) || !result->rep) { + delete result; + return NULL; + } + return result; } void tera_table_close(tera_table_t* table) { - delete table->rep; - delete table; + delete table->rep; + delete table; } -bool tera_table_get(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, char** value, uint64_t* vallen, +bool tera_table_get(tera_table_t* table, const char* row_key, uint64_t keylen, const char* family, + const char* qualifier, uint64_t qulen, char** value, uint64_t* vallen, char** errptr, uint64_t snapshot_id) { - ErrorCode err; - std::string key_str(row_key, keylen); - std::string qu_str(qualifier, qulen); - std::string value_str; - bool result = table->rep->Get(key_str, family, qu_str, &value_str, &err, snapshot_id); - if (result) { - *value = CopyString(value_str); - *vallen = value_str.size(); - } - if (SaveError(errptr, err)) { - *vallen = 0; - } - return result; -} - -bool tera_table_getint64(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, int64_t* value, + ErrorCode err; + std::string key_str(row_key, keylen); + std::string qu_str(qualifier, qulen); + std::string value_str; + bool result = table->rep->Get(key_str, family, qu_str, &value_str, &err, snapshot_id); + if (result) { + *value = CopyString(value_str); + *vallen = value_str.size(); + } + if (SaveError(errptr, err)) { + *vallen = 0; + } + return result; +} + +bool tera_table_getint64(tera_table_t* table, const char* row_key, uint64_t keylen, + const char* family, const char* qualifier, uint64_t qulen, int64_t* value, char** errptr, uint64_t snapshot_id) { - ErrorCode err; - std::string key_str(row_key, keylen); - std::string qu_str(qualifier, qulen); - bool result = table->rep->Get(key_str, family, qu_str, value, &err, snapshot_id); - if (SaveError(errptr, err)) { - return false; - } - return result; -} - -bool tera_table_put(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, const char* value, uint64_t vallen, + ErrorCode err; + std::string key_str(row_key, keylen); + std::string qu_str(qualifier, qulen); + bool result = table->rep->Get(key_str, family, qu_str, value, &err, snapshot_id); + if (SaveError(errptr, err)) { + return false; + } + return result; +} + +bool tera_table_put(tera_table_t* table, const char* row_key, uint64_t keylen, const char* family, + const char* qualifier, uint64_t qulen, const char* value, uint64_t vallen, char** errptr) { - ErrorCode err; - std::string key_str(row_key, keylen); - std::string qu_str(qualifier, qulen); - std::string value_str(value, vallen); - bool result = table->rep->Put(key_str, family, qu_str, value_str, &err); - if (SaveError(errptr, err)) { - return false; - } - return result; -} - -bool tera_table_put_kv(tera_table_t* table, const char* key, uint64_t keylen, - const char* value, uint64_t vallen, int32_t ttl, - char** errptr) { - ErrorCode err; - std::string key_str(key, keylen); - std::string val_str(value, vallen); - RowMutation* mutation = table->rep->NewRowMutation(key); - mutation->Put(val_str, ttl); - table->rep->ApplyMutation(mutation); - err = mutation->GetError(); - delete mutation; - if (SaveError(errptr, err)) { - fprintf(stderr, "%s tera error: %s.\n", - tera::get_curtime_str().c_str(), err.GetReason().c_str()); - return false; - } - return true; -} - -bool tera_table_putint64(tera_table_t* table, - const char* row_key, uint64_t keylen, - const char* family, const char* qualifier, - uint64_t qulen, int64_t value, + ErrorCode err; + std::string key_str(row_key, keylen); + std::string qu_str(qualifier, qulen); + std::string value_str(value, vallen); + bool result = table->rep->Put(key_str, family, qu_str, value_str, &err); + if (SaveError(errptr, err)) { + return false; + } + return result; +} + +bool tera_table_put_kv(tera_table_t* table, const char* key, uint64_t keylen, const char* value, + uint64_t vallen, int32_t ttl, char** errptr) { + ErrorCode err; + std::string key_str(key, keylen); + std::string val_str(value, vallen); + RowMutation* mutation = table->rep->NewRowMutation(key); + mutation->Put(val_str, ttl); + table->rep->ApplyMutation(mutation); + err = mutation->GetError(); + delete mutation; + if (SaveError(errptr, err)) { + fprintf(stderr, "%s tera error: %s.\n", tera::get_curtime_str().c_str(), + err.GetReason().c_str()); + return false; + } + return true; +} + +bool tera_table_putint64(tera_table_t* table, const char* row_key, uint64_t keylen, + const char* family, const char* qualifier, uint64_t qulen, int64_t value, char** errptr) { - ErrorCode err; - std::string key_str(row_key, keylen); - std::string qu_str(qualifier, qulen); - bool result = table->rep->Put(key_str, family, qu_str, value, &err); - if (SaveError(errptr, err)) { - return false; - } - return result; + ErrorCode err; + std::string key_str(row_key, keylen); + std::string qu_str(qualifier, qulen); + bool result = table->rep->Put(key_str, family, qu_str, value, &err); + if (SaveError(errptr, err)) { + return false; + } + return result; } bool tera_table_delete(tera_table_t* table, const char* row_key, uint64_t keylen, const char* family, const char* qualifier, uint64_t qulen) { - ErrorCode err; - std::string key_str(row_key, keylen); - std::string qu_str(qualifier, qulen); - RowMutation* mutation = table->rep->NewRowMutation(key_str); - mutation->DeleteColumn(family, qu_str); - table->rep->ApplyMutation(mutation); - err = mutation->GetError(); - delete mutation; - if (SaveError(NULL, err)) { - fprintf(stderr, "%s tera delete error: %s.\n", - tera::get_curtime_str().c_str(), err.GetReason().c_str()); - return false; - } - return true; + ErrorCode err; + std::string key_str(row_key, keylen); + std::string qu_str(qualifier, qulen); + RowMutation* mutation = table->rep->NewRowMutation(key_str); + mutation->DeleteColumn(family, qu_str); + table->rep->ApplyMutation(mutation); + err = mutation->GetError(); + delete mutation; + if (SaveError(NULL, err)) { + fprintf(stderr, "%s tera delete error: %s.\n", tera::get_curtime_str().c_str(), + err.GetReason().c_str()); + return false; + } + return true; } tera_row_mutation_t* tera_row_mutation(tera_table_t* table, const char* row_key, uint64_t keylen) { - tera_row_mutation_t* result = new tera_row_mutation_t; - result->rep = table->rep->NewRowMutation(std::string(row_key, keylen)); - return result; + tera_row_mutation_t* result = new tera_row_mutation_t; + result->rep = table->rep->NewRowMutation(std::string(row_key, keylen)); + return result; } int64_t tera_row_mutation_get_status_code(tera_row_mutation_t* mu) { - return mu->rep->GetError().GetType(); + return mu->rep->GetError().GetType(); } void tera_row_mutation_destroy(tera_row_mutation_t* mu) { - delete mu->rep; - delete mu; + delete mu->rep; + delete mu; } void tera_table_apply_mutation(tera_table_t* table, tera_row_mutation_t* mutation) { - table->rep->ApplyMutation(mutation->rep); + table->rep->ApplyMutation(mutation->rep); } -void tera_table_apply_mutation_batch(tera_table_t* table, tera_row_mutation_t** mutation_batch, int64_t num) { - std::vector mutation_list; - for (int64_t i = 0; i < num; i++) { - mutation_list.push_back((*(mutation_batch + i))->rep); - } - table->rep->ApplyMutation(mutation_list); +void tera_table_apply_mutation_batch(tera_table_t* table, tera_row_mutation_t** mutation_batch, + int64_t num) { + std::vector mutation_list; + for (int64_t i = 0; i < num; i++) { + mutation_list.push_back((*(mutation_batch + i))->rep); + } + table->rep->ApplyMutation(mutation_list); } tera_row_reader_t* tera_row_reader(tera_table_t* table, const char* row_key, uint64_t keylen) { - tera_row_reader_t* result = new tera_row_reader_t; - result->rep = table->rep->NewRowReader(std::string(row_key, keylen)); - return result; + tera_row_reader_t* result = new tera_row_reader_t; + result->rep = table->rep->NewRowReader(std::string(row_key, keylen)); + return result; } void tera_row_reader_rowkey(tera_row_reader_t* reader, char** str, uint64_t* strlen) { - std::string val = reader->rep->RowName(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = reader->rep->RowName(); + *str = CopyString(val); + *strlen = val.size(); } void tera_row_reader_add_column_family(tera_row_reader_t* reader, const char* family) { - reader->rep->AddColumnFamily(family); + reader->rep->AddColumnFamily(family); } -bool tera_row_reader_done(tera_row_reader_t* reader) { - return reader->rep->Done(); -} +bool tera_row_reader_done(tera_row_reader_t* reader) { return reader->rep->Done(); } -void tera_row_reader_next(tera_row_reader_t* reader) { - reader->rep->Next(); -} +void tera_row_reader_next(tera_row_reader_t* reader) { reader->rep->Next(); } -int64_t tera_row_reader_value_int64(tera_row_reader_t* reader) { - return reader->rep->ValueInt64(); -} +int64_t tera_row_reader_value_int64(tera_row_reader_t* reader) { return reader->rep->ValueInt64(); } void tera_row_reader_value(tera_row_reader_t* reader, char** str, uint64_t* strlen) { - std::string val = reader->rep->Value(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = reader->rep->Value(); + *str = CopyString(val); + *strlen = val.size(); } void tera_row_reader_callback_stub(RowReader* reader) { - MutexLock locker(&g_reader_mutex); - void* sdk_reader = reader; // C++ sdk RowReader* - reader_callback_map_t::iterator it = g_reader_callback_map.find(sdk_reader); - assert (it != g_reader_callback_map.end()); + MutexLock locker(&g_reader_mutex); + void* sdk_reader = reader; // C++ sdk RowReader* + reader_callback_map_t::iterator it = g_reader_callback_map.find(sdk_reader); + assert(it != g_reader_callback_map.end()); - std::pair apair = it->second; - void* c_reader = apair.first; // C tera_row_reader_t* - ReaderCallbackType callback = (ReaderCallbackType)apair.second; + std::pair apair = it->second; + void* c_reader = apair.first; // C tera_row_reader_t* + ReaderCallbackType callback = (ReaderCallbackType)apair.second; - g_reader_callback_map.erase(it); - g_reader_mutex.Unlock(); - // users use C tera_row_reader_t* to construct it's own object - callback(c_reader); - g_reader_mutex.Lock(); + g_reader_callback_map.erase(it); + g_reader_mutex.Unlock(); + // users use C tera_row_reader_t* to construct it's own object + callback(c_reader); + g_reader_mutex.Lock(); } void tera_row_reader_set_callback(tera_row_reader_t* reader, ReaderCallbackType callback) { - MutexLock locker(&g_reader_mutex); - g_reader_callback_map.insert( std::pair >( - reader->rep, - std::pair(reader, (void*)callback)) - ); - reader->rep->SetCallBack(tera_row_reader_callback_stub); + MutexLock locker(&g_reader_mutex); + g_reader_callback_map.insert(std::pair >( + reader->rep, std::pair(reader, (void*)callback))); + reader->rep->SetCallBack(tera_row_reader_callback_stub); } -void tera_row_reader_add_column(tera_row_reader_t* reader, const char* cf, const char* qu, uint64_t len) { - reader->rep->AddColumn(cf, std::string(qu, len)); +void tera_row_reader_add_column(tera_row_reader_t* reader, const char* cf, const char* qu, + uint64_t len) { + reader->rep->AddColumn(cf, std::string(qu, len)); } void tera_row_reader_set_timestamp(tera_row_reader_t* reader, int64_t ts) { - reader->rep->SetTimestamp(ts); + reader->rep->SetTimestamp(ts); } void tera_row_reader_set_time_range(tera_row_reader_t* reader, int64_t start, int64_t end) { - reader->rep->SetTimeRange(start, end); + reader->rep->SetTimeRange(start, end); } void tera_row_reader_set_snapshot(tera_row_reader_t* reader, uint64_t snapshot) { - reader->rep->SetSnapshot(snapshot); + reader->rep->SetSnapshot(snapshot); } void tera_row_reader_set_max_versions(tera_row_reader_t* reader, uint32_t maxversions) { - reader->rep->SetMaxVersions(maxversions); + reader->rep->SetMaxVersions(maxversions); } void tera_row_reader_set_timeout(tera_row_reader_t* reader, int64_t timeout) { - reader->rep->SetTimeOut(timeout); + reader->rep->SetTimeOut(timeout); } void tera_row_reader_family(tera_row_reader_t* reader, char** str, uint64_t* strlen) { - std::string val = reader->rep->Family(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = reader->rep->Family(); + *str = CopyString(val); + *strlen = val.size(); } void tera_row_reader_qualifier(tera_row_reader_t* reader, char** str, uint64_t* strlen) { - std::string val = reader->rep->Qualifier(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = reader->rep->Qualifier(); + *str = CopyString(val); + *strlen = val.size(); } -int64_t tera_row_reader_timestamp(tera_row_reader_t* reader) { - return reader->rep->Timestamp(); -} +int64_t tera_row_reader_timestamp(tera_row_reader_t* reader) { return reader->rep->Timestamp(); } int64_t tera_row_reader_get_status_code(tera_row_reader_t* reader) { - return reader->rep->GetError().GetType(); + return reader->rep->GetError().GetType(); } void tera_row_reader_destroy(tera_row_reader_t* reader) { - delete reader->rep; - delete reader; + delete reader->rep; + delete reader; } void tera_table_apply_reader(tera_table_t* table, tera_row_reader_t* reader) { - table->rep->Get(reader->rep); + table->rep->Get(reader->rep); } -void tera_table_apply_reader_batch(tera_table_t* table, tera_row_reader_t** reader_batch, int64_t num) { - std::vector reader_list; - for (int64_t i = 0; i < num; i++) { - reader_list.push_back((*(reader_batch + i))->rep); - } - table->rep->Get(reader_list); +void tera_table_apply_reader_batch(tera_table_t* table, tera_row_reader_t** reader_batch, + int64_t num) { + std::vector reader_list; + for (int64_t i = 0; i < num; i++) { + reader_list.push_back((*(reader_batch + i))->rep); + } + table->rep->Get(reader_list); } -bool tera_table_is_put_finished(tera_table_t* table) { - return table->rep->IsPutFinished(); -} +bool tera_table_is_put_finished(tera_table_t* table) { return table->rep->IsPutFinished(); } -bool tera_table_is_get_finished(tera_table_t* table) { - return table->rep->IsGetFinished(); -} +bool tera_table_is_get_finished(tera_table_t* table) { return table->rep->IsGetFinished(); } -void tera_row_mutation_put_kv(tera_row_mutation_t* mu, - const char* val, uint64_t vallen, int32_t ttl) { - mu->rep->Put(std::string(val, vallen), ttl); +void tera_row_mutation_put_kv(tera_row_mutation_t* mu, const char* val, uint64_t vallen, + int32_t ttl) { + mu->rep->Put(std::string(val, vallen), ttl); } -void tera_row_mutation_put_int64(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, - int64_t val) { - mu->rep->Put(cf, std::string(qu, qulen), val); +void tera_row_mutation_put_int64(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen, int64_t val) { + mu->rep->Put(cf, std::string(qu, qulen), val); } -void tera_row_mutation_put(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, +void tera_row_mutation_put(tera_row_mutation_t* mu, const char* cf, const char* qu, uint64_t qulen, const char* val, uint64_t vallen) { - mu->rep->Put(cf, std::string(qu, qulen), std::string(val, vallen)); + mu->rep->Put(cf, std::string(qu, qulen), std::string(val, vallen)); } -void tera_row_mutation_put_with_timestamp(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, - int64_t timestamp, - const char* val, uint64_t vallen) { - mu->rep->Put(cf, std::string(qu, qulen), std::string(val, vallen), (int64_t)timestamp); +void tera_row_mutation_put_with_timestamp(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen, int64_t timestamp, const char* val, + uint64_t vallen) { + mu->rep->Put(cf, std::string(qu, qulen), std::string(val, vallen), (int64_t)timestamp); } -void tera_row_mutation_delete_column(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen) { - mu->rep->DeleteColumn(cf, std::string(qu, qulen)); +void tera_row_mutation_delete_column(tera_row_mutation_t* mu, const char* cf, const char* qu, + uint64_t qulen) { + mu->rep->DeleteColumn(cf, std::string(qu, qulen)); } void tera_row_mutation_delete_column_all_versions(tera_row_mutation_t* mu, const char* cf, const char* qu, uint64_t qulen) { - mu->rep->DeleteColumns(cf, std::string(qu, qulen)); + mu->rep->DeleteColumns(cf, std::string(qu, qulen)); } void tera_row_mutation_delete_column_with_version(tera_row_mutation_t* mu, const char* cf, - const char* qu, uint64_t qulen, int64_t timestamp) { - mu->rep->DeleteColumn(cf, std::string(qu, qulen), timestamp); + const char* qu, uint64_t qulen, + int64_t timestamp) { + mu->rep->DeleteColumn(cf, std::string(qu, qulen), timestamp); } -void tera_row_mutation_delete_row(tera_row_mutation_t* mu) { - mu->rep->DeleteRow(); -} +void tera_row_mutation_delete_row(tera_row_mutation_t* mu) { mu->rep->DeleteRow(); } void tera_row_mutation_delete_family(tera_row_mutation_t* mu, const char* cf) { - mu->rep->DeleteFamily(std::string(cf)); + mu->rep->DeleteFamily(std::string(cf)); } void tera_row_mutation_callback_stub(RowMutation* mu) { - MutexLock locker(&g_mutation_mutex); - void* sdk_mu = mu; // C++ sdk RowMutation* - mutation_callback_map_t::iterator it = g_mutation_callback_map.find(sdk_mu); - assert (it != g_mutation_callback_map.end()); + MutexLock locker(&g_mutation_mutex); + void* sdk_mu = mu; // C++ sdk RowMutation* + mutation_callback_map_t::iterator it = g_mutation_callback_map.find(sdk_mu); + assert(it != g_mutation_callback_map.end()); - std::pair apair = it->second; - void* c_mu = apair.first; // C tera_row_mutation_t* - MutationCallbackType callback = (MutationCallbackType)apair.second; + std::pair apair = it->second; + void* c_mu = apair.first; // C tera_row_mutation_t* + MutationCallbackType callback = (MutationCallbackType)apair.second; - g_mutation_callback_map.erase(it); - g_mutation_mutex.Unlock(); - // users use C tera_row_mutation_t* to construct it's own object - callback(c_mu); - g_mutation_mutex.Lock(); + g_mutation_callback_map.erase(it); + g_mutation_mutex.Unlock(); + // users use C tera_row_mutation_t* to construct it's own object + callback(c_mu); + g_mutation_mutex.Lock(); } void tera_row_mutation_set_callback(tera_row_mutation_t* mu, MutationCallbackType callback) { - MutexLock locker(&g_mutation_mutex); - g_mutation_callback_map.insert( std::pair >( - mu->rep, - std::pair(mu, (void*)callback)) - ); - mu->rep->SetCallBack(tera_row_mutation_callback_stub); + MutexLock locker(&g_mutation_mutex); + g_mutation_callback_map.insert(std::pair >( + mu->rep, std::pair(mu, (void*)callback))); + mu->rep->SetCallBack(tera_row_mutation_callback_stub); } void tera_row_mutation_rowkey(tera_row_mutation_t* mu, char** val, uint64_t* vallen) { - std::string row = mu->rep->RowKey(); - *val = CopyString(row); - *vallen = row.size(); + std::string row = mu->rep->RowKey(); + *val = CopyString(row); + *vallen = row.size(); } -tera_result_stream_t* tera_table_scan(tera_table_t* table, - const tera_scan_descriptor_t* desc, +tera_result_stream_t* tera_table_scan(tera_table_t* table, const tera_scan_descriptor_t* desc, char** errptr) { - ErrorCode err; - tera_result_stream_t* result = new tera_result_stream_t; - result->rep = table->rep->Scan(*desc->rep, &err); - if (SaveError(errptr, err)) { - delete result; - return NULL; - } - return result; + ErrorCode err; + tera_result_stream_t* result = new tera_result_stream_t; + result->rep = table->rep->Scan(*desc->rep, &err); + if (SaveError(errptr, err)) { + delete result; + return NULL; + } + return result; } void tera_result_stream_destroy(tera_result_stream_t* desc) { - delete desc->rep; - delete desc; + delete desc->rep; + delete desc; } tera_scan_descriptor_t* tera_scan_descriptor(const char* start_key, uint64_t keylen) { - std::string key(start_key, keylen); - tera_scan_descriptor_t* result = new tera_scan_descriptor_t; - result->rep = new ScanDescriptor(key); - return result; + std::string key(start_key, keylen); + tera_scan_descriptor_t* result = new tera_scan_descriptor_t; + result->rep = new ScanDescriptor(key); + return result; } void tera_scan_descriptor_destroy(tera_scan_descriptor_t* desc) { - delete desc->rep; - delete desc; + delete desc->rep; + delete desc; } void tera_scan_descriptor_add_column(tera_scan_descriptor_t* desc, const char* cf, const char* qualifier, uint64_t qulen) { - std::string qu(qualifier, qulen); - desc->rep->AddColumn(cf, qu); + std::string qu(qualifier, qulen); + desc->rep->AddColumn(cf, qu); } void tera_scan_descriptor_add_column_family(tera_scan_descriptor_t* desc, const char* cf) { - desc->rep->AddColumnFamily(cf); -} - -bool tera_scan_descriptor_is_async(tera_scan_descriptor_t* desc) { - return desc->rep->IsAsync(); -} - -void tera_scan_descriptor_set_is_async(tera_scan_descriptor_t* desc, bool is_async) { - desc->rep->SetAsync(is_async); + desc->rep->AddColumnFamily(cf); } void tera_scan_descriptor_set_buffer_size(tera_scan_descriptor_t* desc, int64_t size) { - desc->rep->SetBufferSize(size); + desc->rep->SetBufferSize(size); } -void tera_scan_descriptor_set_end(tera_scan_descriptor_t* desc, const char* end_key, uint64_t keylen) { - std::string key(end_key, keylen); - desc->rep->SetEnd(key); +void tera_scan_descriptor_set_end(tera_scan_descriptor_t* desc, const char* end_key, + uint64_t keylen) { + std::string key(end_key, keylen); + desc->rep->SetEnd(key); } void tera_scan_descriptor_set_pack_interval(tera_scan_descriptor_t* desc, int64_t interval) { - desc->rep->SetPackInterval(interval); + desc->rep->SetPackInterval(interval); } void tera_scan_descriptor_set_max_versions(tera_scan_descriptor_t* desc, int32_t versions) { - desc->rep->SetMaxVersions(versions); + desc->rep->SetMaxVersions(versions); } void tera_scan_descriptor_set_snapshot(tera_scan_descriptor_t* desc, uint64_t snapshot_id) { - desc->rep->SetSnapshot(snapshot_id); + desc->rep->SetSnapshot(snapshot_id); } // NOTE: arguments order is different from C++ sdk(tera.h) -void tera_scan_descriptor_set_time_range(tera_scan_descriptor_t* desc, int64_t ts_start, int64_t ts_end) { - desc->rep->SetTimeRange(ts_end, ts_start); -} - -bool tera_scan_descriptor_set_filter(tera_scan_descriptor_t* desc, char* filter_str) { - return desc->rep->SetFilter(filter_str); +void tera_scan_descriptor_set_time_range(tera_scan_descriptor_t* desc, int64_t ts_start, + int64_t ts_end) { + desc->rep->SetTimeRange(ts_end, ts_start); } bool tera_result_stream_done(tera_result_stream_t* stream, char** errptr) { - ErrorCode err; - if (!stream->rep->Done(&err)) { - SaveError(errptr, err); - return false; - } - return true; + ErrorCode err; + if (!stream->rep->Done(&err)) { + SaveError(errptr, err); + return false; + } + return true; } int64_t tera_result_stream_timestamp(tera_result_stream_t* stream) { - int64_t ts = stream->rep->Timestamp(); - //fprintf(stderr, "%lld\n", ts); - return ts; + int64_t ts = stream->rep->Timestamp(); + // fprintf(stderr, "%lld\n", ts); + return ts; } void tera_result_stream_qualifier(tera_result_stream_t* stream, char** str, uint64_t* strlen) { - std::string val = stream->rep->Qualifier(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = stream->rep->Qualifier(); + *str = CopyString(val); + *strlen = val.size(); } void tera_result_stream_column_name(tera_result_stream_t* stream, char** str, uint64_t* strlen) { - std::string val = stream->rep->ColumnName(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = stream->rep->ColumnName(); + *str = CopyString(val); + *strlen = val.size(); } void tera_result_stream_family(tera_result_stream_t* stream, char** str, uint64_t* strlen) { - std::string val = stream->rep->Family(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = stream->rep->Family(); + *str = CopyString(val); + *strlen = val.size(); } -void tera_result_stream_next(tera_result_stream_t* stream) { - stream->rep->Next(); -} +void tera_result_stream_next(tera_result_stream_t* stream) { stream->rep->Next(); } void tera_result_stream_row_name(tera_result_stream_t* stream, char** str, uint64_t* strlen) { - std::string val = stream->rep->RowName(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = stream->rep->RowName(); + *str = CopyString(val); + *strlen = val.size(); } void tera_result_stream_value(tera_result_stream_t* stream, char** str, uint64_t* strlen) { - std::string val = stream->rep->Value(); - *str = CopyString(val); - *strlen = val.size(); + std::string val = stream->rep->Value(); + *str = CopyString(val); + *strlen = val.size(); } int64_t tera_result_stream_value_int64(tera_result_stream_t* stream) { - return stream->rep->ValueInt64(); + return stream->rep->ValueInt64(); } } // end extern "C" diff --git a/src/tera_entry.cc b/src/tera_entry.cc deleted file mode 100644 index 51fcd88ac..000000000 --- a/src/tera_entry.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "tera_entry.h" -#include "common/this_thread.h" - -namespace tera { - -TeraEntry::TeraEntry() - : started_(false) {} - -TeraEntry::~TeraEntry() {} - -bool TeraEntry::Start() { - if (ShouldStart()) { - return StartServer(); - } - return false; -} - -bool TeraEntry::Run() { - ThisThread::Sleep(2000); - return true; -} - -bool TeraEntry::Shutdown() { - if (ShouldShutdown()) { - ShutdownServer(); - return true; - } - return false; -} - -bool TeraEntry::ShouldStart() { - MutexLock lock(&mutex_); - if (started_) { - return false; - } - started_ = true; - return true; -} - -bool TeraEntry::ShouldShutdown() { - MutexLock lock(&mutex_); - if (!started_) { - return false; - } - started_ = false; - return true; -} - -} // namespace tera diff --git a/src/tera_flags.cc b/src/tera_flags.cc index d9ee6ecbc..c8ced65fe 100644 --- a/src/tera_flags.cc +++ b/src/tera_flags.cc @@ -11,7 +11,8 @@ DEFINE_int64(tera_stat_table_ttl, 8000000, "default ttl for stat table (s / 100d DEFINE_int64(tera_stat_table_splitsize, 100, "(MB) default split size of stat table"); ///////// common ///////// -DEFINE_string(tera_role, "", "the role of tera running binary, should be one of (master | tabletnode)"); +DEFINE_string(tera_role, "", + "the role of tera running binary, should be one of (master | tabletnode)"); DEFINE_string(tera_user_identity, "", "the identity of tera user"); DEFINE_string(tera_user_passcode, "", "the passcode of tera user"); DEFINE_bool(tera_acl_enabled, false, "enable access control"); @@ -19,10 +20,15 @@ DEFINE_bool(tera_only_root_create_table, false, "only the root user can create t DEFINE_string(tera_working_dir, "./", "the base dir for system data"); -DEFINE_string(tera_coord_type, "", "the coordinator service type for tera cluster [zk,ins,mock_zk,mock_ins,fake_zk]"); +DEFINE_string(tera_coord_type, "", + "the coordinator service type for tera cluster " + "[zk,ins,mock_zk,mock_ins,fake_zk]"); -DEFINE_bool(tera_zk_enabled, true, "[obsoleted replace by --tera_coord_type=zk] enable zk adapter to coord"); -DEFINE_bool(tera_mock_zk_enabled, false, "[obsoleted replace by --tera_coord_type=mock_zk] enable mock zk adapter to coord"); +DEFINE_bool(tera_zk_enabled, true, + "[obsoleted replace by --tera_coord_type=zk] enable zk adapter to coord"); +DEFINE_bool(tera_mock_zk_enabled, false, + "[obsoleted replace by --tera_coord_type=mock_zk] enable mock zk " + "adapter to coord"); DEFINE_string(tera_zk_addr_list, "localhost:2180", "zookeeper server list"); DEFINE_string(tera_zk_root_path, "/tera", "zookeeper root path"); DEFINE_string(tera_fake_zk_path_prefix, "../fakezk", "fake zk path prefix in onebox tera"); @@ -34,48 +40,84 @@ DEFINE_string(tera_log_prefix, "", "prefix of log file (INFO, WARNING)"); DEFINE_string(tera_local_addr, "", "local host's ip address"); DEFINE_bool(tera_online_schema_update_enabled, false, "enable online-schema-update"); DEFINE_bool(tera_info_log_clean_enable, true, "enable log cleaner task, enable as default"); -DEFINE_int64(tera_info_log_clean_period_second, 2592000, "time period (in second) for log cleaner task, 30 days as default"); -DEFINE_int64(tera_info_log_expire_second, 2592000, "expire time (in second) of log file, 30 days as default"); +DEFINE_int64(tera_info_log_clean_period_second, 2592000, + "time period (in second) for log cleaner task, 30 days as default"); +DEFINE_int64(tera_info_log_expire_second, 2592000, + "expire time (in second) of log file, 30 days as default"); DEFINE_string(tera_tabletnode_path_prefix, "../data/", "the path prefix for table storage"); -DEFINE_int32(tera_tabletnode_scan_pack_max_size, 10240, "the max size(KB) of the package for scan rpc"); -DEFINE_bool(tera_tabletnode_flash_block_cache_enabled, false, "enable flash block cache mechasism"); +DEFINE_int32(tera_tabletnode_scan_pack_max_size, 10240, + "the max size(KB) of the package for scan rpc"); + +DEFINE_string(tera_auth_policy, "none", "none/ugi/giano"); +DEFINE_string(tera_auth_name, "", + "if tera_auth_policy == default, name should be user_name; " + "otherwise group_name"); +DEFINE_string(tera_auth_token, "", + "if tera_auth_policy == default, token should be passwd; " + "otherwise credential"); + +DEFINE_bool(tera_quota_enabled, false, "quota enable or not"); +DEFINE_string(tera_quota_limiter_type, "general_quota_limiter", + "quota_limiter for generic purpose"); +DEFINE_int64(tera_quota_normal_estimate_value, 1024, + "default estimate value per read/scan request is 1KB"); +DEFINE_double(tera_quota_adjust_estimate_ratio, 0.9, + "quota adjust estimate ratio for read and scan"); ///////// io ///////// DEFINE_int64(tera_tablet_write_block_size, 4, "the block size (in KB) for teblet write block"); -DEFINE_int64(tera_tablet_memtable_ldb_block_size, 4, "the block size (in KB) for memtable on leveldb"); +DEFINE_int64(tera_tablet_memtable_ldb_block_size, 4, + "the block size (in KB) for memtable on leveldb"); DEFINE_int64(tera_tablet_ldb_sst_size, 8, "the sstable file size (in MB) on leveldb"); -DEFINE_string(tera_leveldb_env_type, "dfs", "the default type for leveldb IO environment, should be [local | dfs]"); +DEFINE_string(tera_leveldb_env_type, "dfs", + "the default type for leveldb IO environment, should be [local | dfs]"); DEFINE_string(tera_leveldb_log_path, "../log/leveldb.log", "the default path for leveldb logger"); - -DEFINE_int32(tera_rpc_client_max_inflow, -1, "the max input flow (in MB/s) for rpc-client, -1 means no limit"); -DEFINE_int32(tera_rpc_client_max_outflow, -1, "the max input flow (in MB/s) for rpc-client, -1 means no limit"); +DEFINE_int32(leveldb_max_log_size_MB, 1024, + "create a new log file if the file size is larger than this value "); +DEFINE_int32(leveldb_log_flush_trigger_size_B, 1048576, + "trigger force flush log to disk by either leveldb_log_flush_trigger_size_B or " + "leveldb_log_flush_trigger_interval_ms"); +DEFINE_int32(leveldb_log_flush_trigger_interval_ms, 1000, + "trigger force flush log to disk by either leveldb_log_flush_trigger_size_B or " + "leveldb_log_flush_trigger_interval_ms"); + +DEFINE_int32(tera_rpc_client_max_inflow, -1, + "the max input flow (in MB/s) for rpc-client, -1 means no limit"); +DEFINE_int32(tera_rpc_client_max_outflow, -1, + "the max input flow (in MB/s) for rpc-client, -1 means no limit"); DEFINE_int32(tera_rpc_timeout_period, 60000, "the timeout period (in ms) for rpc"); -// those flags prefixed with "tera_master" are shared by several modules which cannot be move to -// master/master_flags, so they are kept here for compatibility until all flags are moved to their own dir +// those flags prefixed with "tera_master" are shared by several modules which +// cannot be move to +// master/master_flags, so they are kept here for compatibility until all flags +// are moved to their own dir DEFINE_string(tera_master_meta_table_name, "meta_table", "the meta table name"); DEFINE_string(tera_master_meta_table_path, "meta", "the path of meta table"); DEFINE_int64(tera_master_split_tablet_size, 512, "the size (in MB) of tablet to trigger split"); DEFINE_int64(tera_master_merge_tablet_size, 0, "the size (in MB) of tablet to trigger merge"); -DEFINE_int64(tera_master_gc_trash_expire_time_s, 86400, "time (in second) for gc file keeped in trash"); +DEFINE_int64(tera_master_gc_trash_expire_time_s, 86400, + "time (in second) for gc file keeped in trash"); DEFINE_int64(tera_master_ins_session_timeout, 10000000, "ins session timeout(us), default 10sec"); ///////// http ///////// DEFINE_string(tera_http_port, "8657", "the http proxy port of tera"); -DEFINE_int32(tera_http_request_thread_num, 30, "the http proxy thread num for handle client request"); +DEFINE_int32(tera_http_request_thread_num, 30, + "the http proxy thread num for handle client request"); DEFINE_int32(tera_http_ctrl_thread_num, 10, "the http proxy thread num for it self"); ///////// timeoracle ///////// DEFINE_string(tera_timeoracle_port, "30000", "the timeoracle port of tera"); DEFINE_int32(tera_timeoracle_max_lease_second, 30, "(s) timeoracle work this seconds for a lease"); -DEFINE_int32(tera_timeoracle_refresh_lease_second, 10, "(s) timeoracle refresh lease before this seconds"); +DEFINE_int32(tera_timeoracle_refresh_lease_second, 10, + "(s) timeoracle refresh lease before this seconds"); // only used by timeoracle DEFINE_bool(tera_timeoracle_mock_enabled, false, "used local filesystem replace zk and ins."); DEFINE_string(tera_timeoracle_mock_root_path, "/tmp/", "the root path of local filesystem."); DEFINE_int32(tera_timeoracle_work_thread_num, 16, "timeoracle sofarpc server work_thread_number"); -DEFINE_int32(tera_timeoracle_io_service_pool_size, 4, "timeoracle sofarpc server io_service_pool_size"); +DEFINE_int32(tera_timeoracle_io_service_pool_size, 4, + "timeoracle sofarpc server io_service_pool_size"); //////// observer /////// DEFINE_int32(observer_proc_thread_num, 20, ""); @@ -85,12 +127,18 @@ DEFINE_int32(observer_read_thread_num, 20, "observer read thread num"); DEFINE_int32(observer_ack_conflict_timeout, 3600, "(ms) timeout for ack column conflict check"); DEFINE_int32(observer_rowlock_client_thread_num, 20, "rowlock client thread number"); DEFINE_int32(observer_random_access_thread_num, 20, "async read and write thread number"); +DEFINE_int64(observer_update_table_info_period_s, 60, + "the period of update table info for select key to observe"); //////// rowlock server //////// DEFINE_bool(rowlock_rpc_limit_enabled, false, "enable the rpc traffic limit in sdk"); -DEFINE_int32(rowlock_rpc_limit_max_inflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on input flow"); -DEFINE_int32(rowlock_rpc_limit_max_outflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on output flow"); -DEFINE_int32(rowlock_rpc_max_pending_buffer_size, 200, "max pending buffer size (in MB) for sdk rpc"); +DEFINE_int32(rowlock_rpc_limit_max_inflow, 10, + "the max bandwidth (in MB/s) for sdk rpc traffic limitation on input flow"); +DEFINE_int32(rowlock_rpc_limit_max_outflow, 10, + "the max bandwidth (in MB/s) for sdk rpc traffic limitation on " + "output flow"); +DEFINE_int32(rowlock_rpc_max_pending_buffer_size, 200, + "max pending buffer size (in MB) for sdk rpc"); DEFINE_int32(rowlock_rpc_work_thread_num, 2, "thread num of sdk rpc client"); DEFINE_string(rowlock_server_ip, "0.0.0.0", "rowlock server ip"); @@ -101,7 +149,9 @@ DEFINE_string(rowlock_ins_root_path, "/rowlock", "ins rowlock root path"); DEFINE_int32(rowlock_server_node_num, 1, "number of rowlock servers in cluster"); DEFINE_int32(rowlock_db_ttl, 600000, "(ms) timeout for an unlocked lock, 10min"); -DEFINE_int32(rowlock_timing_wheel_patch_num, 600, "the number of timing wheel, every patch_num step the oldest data will be cleared"); +DEFINE_int32(rowlock_timing_wheel_patch_num, 600, + "the number of timing wheel, every patch_num step the oldest data " + "will be cleared"); DEFINE_int32(rowlock_db_sharding_number, 1024, "sharding number, enhance concurrency"); DEFINE_string(rowlock_fake_root_path, "../fakezk/rowlock", "one box fake zk root path"); DEFINE_int32(rowlock_thread_max_num, 20, "the max thread number of rowlock server"); diff --git a/src/tera_main.cc b/src/tera_main.cc index d5e0e2723..12c4460f7 100644 --- a/src/tera_main.cc +++ b/src/tera_main.cc @@ -3,15 +3,18 @@ // found in the LICENSE file. #include +#include #include #include +#include +#include #include "common/base/scoped_ptr.h" #include "common/log/log_cleaner.h" #include "common/heap_profiler.h" #include "common/cpu_profiler.h" -#include "tera_entry.h" +#include "tera/tera_entry.h" #include "utils/utils_cmd.h" #include "version.h" @@ -29,74 +32,105 @@ extern tera::TeraEntry* GetTeraEntry(); volatile sig_atomic_t g_quit = 0; -static void SignalIntHandler(int sig) { - g_quit = 1; +static void DumpStringToFile(const std::string& s, const std::string& filename) { + std::fstream file; + file.open(filename, std::ios::out); + if (!file.is_open()) { + LOG(ERROR) << "Open file " << filename << " failed"; + return; + } + file << s; + file.close(); } -int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); +static void SignalIntHandler(int sig) { g_quit = 1; } + +// Dump a memory profile after receive SIRUSR1. +// NOTE: by default, tcmalloc does not do any heap sampling, and this +// function will always return an empty sample. To get useful +// data from it, you must also set the environment +// variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288(bytes). +static void SignalUsr1Handler(int sig) { + static std::atomic idx{0}; + std::string str; + MallocExtension::instance()->GetHeapSample(&str); + DumpStringToFile(str, std::to_string(getpid()) + "." + + (std::to_string(idx.fetch_add(1)) + ".sample.heap").c_str()); +} + +// Dump current detail memory usage to file. +static void SignalUsr2Handler(int sig) { + static std::atomic idx{0}; + char buffer[1024000]; + MallocExtension::instance()->GetStats(buffer, sizeof(buffer)); + DumpStringToFile(buffer, std::to_string(getpid()) + "." + + (std::to_string(idx.fetch_add(1)) + ".mem.detail").c_str()); +} +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = GetTeraEntryName(); if (FLAGS_tera_log_prefix.empty()) { - FLAGS_tera_log_prefix = GetTeraEntryName(); - if (FLAGS_tera_log_prefix.empty()) { - FLAGS_tera_log_prefix = "tera"; - } - } - tera::utils::SetupLog(FLAGS_tera_log_prefix); - - tera::CpuProfiler cpu_profiler; - cpu_profiler.SetEnable(FLAGS_cpu_profiler_enabled) - .SetInterval(FLAGS_cpu_profiler_dump_interval); - - tera::HeapProfiler heap_profiler; - heap_profiler.SetEnable(FLAGS_heap_profiler_enabled) - .SetInterval(FLAGS_heap_profiler_dump_interval); - - if (argc > 1) { - std::string ext_cmd = argv[1]; - if (ext_cmd == "version") { - PrintSystemVersion(); - return 0; - } + FLAGS_tera_log_prefix = "tera"; } + } + tera::utils::SetupLog(FLAGS_tera_log_prefix); - signal(SIGINT, SignalIntHandler); - signal(SIGTERM, SignalIntHandler); + tera::CpuProfiler cpu_profiler; + cpu_profiler.SetEnable(FLAGS_cpu_profiler_enabled).SetInterval(FLAGS_cpu_profiler_dump_interval); - scoped_ptr entry(GetTeraEntry()); - if (entry.get() == NULL) { - return -1; - } + tera::HeapProfiler heap_profiler; + heap_profiler.SetEnable(FLAGS_heap_profiler_enabled) + .SetInterval(FLAGS_heap_profiler_dump_interval); - if (!entry->Start()) { - return -1; + if (argc > 1) { + std::string ext_cmd = argv[1]; + if (ext_cmd == "version") { + PrintSystemVersion(); + return 0; } - - // start log cleaner - if (FLAGS_tera_info_log_clean_enable) { - common::LogCleaner::StartCleaner(); - LOG(INFO) << "start log cleaner"; - } else { - LOG(INFO) << "log cleaner is disable"; + } + + signal(SIGINT, SignalIntHandler); + signal(SIGTERM, SignalIntHandler); + signal(SIGUSR1, SignalUsr1Handler); + signal(SIGUSR2, SignalUsr2Handler); + + scoped_ptr entry(GetTeraEntry()); + if (entry.get() == NULL) { + return -1; + } + + if (!entry->Start()) { + return -1; + } + + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } + + while (!g_quit) { + if (!entry->Run()) { + LOG(ERROR) << "Server run error ,and then exit now "; + break; } + } + if (g_quit) { + LOG(INFO) << "received interrupt signal from user, will stop"; + } - while (!g_quit) { - if (!entry->Run()) { - LOG(ERROR) << "Server run error ,and then exit now "; - break; - } - } - if (g_quit) { - LOG(INFO) << "received interrupt signal from user, will stop"; - } + common::LogCleaner::StopCleaner(); - common::LogCleaner::StopCleaner(); - - if (!entry->Shutdown()) { - return -1; - } + if (!entry->Shutdown()) { + return -1; + } - return 0; + return 0; } diff --git a/src/tera_main_wrapper.cc b/src/tera_main_wrapper.cc index 8ce87b54a..134a20284 100644 --- a/src/tera_main_wrapper.cc +++ b/src/tera_main_wrapper.cc @@ -15,34 +15,34 @@ DECLARE_string(tera_role); int main(int argc, char** argv) { - if (argc > 1 && strcmp(argv[1], "version") == 0) { - PrintSystemVersion(); - return 0; - } - - google::AllowCommandLineReparsing(); - google::ParseCommandLineFlags(&argc, &argv, false); - - const char* program = NULL; - if (FLAGS_tera_role == "master") { - program = "./tera_master"; - } else if (FLAGS_tera_role == "tabletnode") { - program = "./tabletserver"; - } else { - std::cerr << "FLAGS_tera_role should be one of (master | tabletnode)" << std::endl; - return -1; - } - - std::vector myargv; - myargv.resize(argc + 1); - myargv[0] = (char*)"tera_main"; - for (int i = 1; i < argc; i++) { - myargv[i] = argv[i]; - } - myargv[argc] = NULL; - if (-1 == execv(program, &myargv[0])) { - std::cerr << "execv " << program << " error: " << errno << std::endl; - return -1; - } + if (argc > 1 && strcmp(argv[1], "version") == 0) { + PrintSystemVersion(); return 0; + } + + google::AllowCommandLineReparsing(); + google::ParseCommandLineFlags(&argc, &argv, false); + + const char* program = NULL; + if (FLAGS_tera_role == "master") { + program = "./tera_master"; + } else if (FLAGS_tera_role == "tabletnode") { + program = "./tabletserver"; + } else { + std::cerr << "FLAGS_tera_role should be one of (master | tabletnode)" << std::endl; + return -1; + } + + std::vector myargv; + myargv.resize(argc + 1); + myargv[0] = (char*)"tera_main"; + for (int i = 1; i < argc; i++) { + myargv[i] = argv[i]; + } + myargv[argc] = NULL; + if (-1 == execv(program, &myargv[0])) { + std::cerr << "execv " << program << " error: " << errno << std::endl; + return -1; + } + return 0; } diff --git a/src/tera_test_main.cc b/src/tera_test_main.cc index 915c172d9..8ab965a1d 100644 --- a/src/tera_test_main.cc +++ b/src/tera_test_main.cc @@ -36,7 +36,7 @@ DECLARE_string(flagfile); using namespace tera; void Usage(const std::string& prg_name) { - std::cout << "DESCRIPTION \n\ + std::cout << "DESCRIPTION \n\ rw-consistency-test \n\ shared-tableimpl-test\n\ version \n"; @@ -51,286 +51,280 @@ static Counter r_total; static Counter launch_time; void PrintStat() { - LOG(INFO) << "Write total " << w_total.Get() - << " succ " << w_succ.Get() << " pending " << w_pending.Get() - << ", Read total " << r_total.Get() - << " succ " << r_succ.Get() << " pending: " << r_pending.Get(); + LOG(INFO) << "Write total " << w_total.Get() << " succ " << w_succ.Get() << " pending " + << w_pending.Get() << ", Read total " << r_total.Get() << " succ " << r_succ.Get() + << " pending: " << r_pending.Get(); } class KeySet { -public: - KeySet() : key_num_(0) {} - void Init(const uint32_t key_num) { - key_num_ = key_num; - - // gen row keys - while (keys_.size() < key_num) { - std::stringstream ss; - ss << ((uint64_t)rand()) * ((uint64_t)rand()) << "abcdefghijklmnopqrstuvwxyz"; - std::string key = ss.str(); - keys_[key] = 0; - keys_stat_[key] = false; - } - CHECK(keys_.size() == key_num); - srand(get_micros() % 1000000); - - // fill key index_ - std::map::iterator it = keys_.begin(); - index_.clear(); - for (; it != keys_.end(); ++it) { - index_.push_back(&(it->first)); - } - - // fill column families - SplitString(FLAGS_column_families, ",", &cfs_); - CHECK(cfs_.size() > 0); + public: + KeySet() : key_num_(0) {} + void Init(const uint32_t key_num) { + key_num_ = key_num; + + // gen row keys + while (keys_.size() < key_num) { + std::stringstream ss; + ss << ((uint64_t)rand()) * ((uint64_t)rand()) << "abcdefghijklmnopqrstuvwxyz"; + std::string key = ss.str(); + keys_[key] = 0; + keys_stat_[key] = false; } - - std::string RandKey() { - MutexLock l(&mu_); - return *(index_[rand() % key_num_]); + CHECK(keys_.size() == key_num); + srand(get_micros() % 1000000); + + // fill key index_ + std::map::iterator it = keys_.begin(); + index_.clear(); + for (; it != keys_.end(); ++it) { + index_.push_back(&(it->first)); } - std::string RandCF() { - return cfs_[rand() % cfs_.size()]; - } + // fill column families + SplitString(FLAGS_column_families, ",", &cfs_); + CHECK(cfs_.size() > 0); + } - std::string RandIdleKey() { - MutexLock l(&mu_); - std::string key = *(index_[rand() % key_num_]); - if (keys_stat_[key] == false) { - return key; - } else { - // key is busy - return ""; - } - } + std::string RandKey() { + MutexLock l(&mu_); + return *(index_[rand() % key_num_]); + } + + std::string RandCF() { return cfs_[rand() % cfs_.size()]; } - void SetKeyStatus(const std::string& key, bool busy_or_not) { - MutexLock l(&mu_); - if (busy_or_not) { - CHECK(!keys_stat_[key]); - keys_stat_[key] = true; - } else { - keys_stat_[key] = false; - } + std::string RandIdleKey() { + MutexLock l(&mu_); + std::string key = *(index_[rand() % key_num_]); + if (keys_stat_[key] == false) { + return key; + } else { + // key is busy + return ""; } + } - void UpdateTime(const std::string& key, uint64_t ts) { - MutexLock l(&mu_); - uint64_t ts_t = keys_[key]; - if (ts_t <= ts) { - keys_[key] = ts; - } else { - uint64_t diff = ts_t - ts; - LOG(ERROR) << "CONSISTENCY ERROR: " << key << " " << ts_t << " > " << ts - << ", diff " << diff << "us " << diff / 1000000 << "s."; - PrintStat(); - _Exit(-10); - } - CHECK(keys_.size() == key_num_); - keys_stat_[key] = false; + void SetKeyStatus(const std::string& key, bool busy_or_not) { + MutexLock l(&mu_); + if (busy_or_not) { + CHECK(!keys_stat_[key]); + keys_stat_[key] = true; + } else { + keys_stat_[key] = false; } + } -private: - Mutex mu_; - uint32_t key_num_; - std::map keys_; // key, update time - std::map keys_stat_; // key, reading or not - std::vector index_; - std::vector cfs_; + void UpdateTime(const std::string& key, uint64_t ts) { + MutexLock l(&mu_); + uint64_t ts_t = keys_[key]; + if (ts_t <= ts) { + keys_[key] = ts; + } else { + uint64_t diff = ts_t - ts; + LOG(ERROR) << "CONSISTENCY ERROR: " << key << " " << ts_t << " > " << ts << ", diff " << diff + << "us " << diff / 1000000 << "s."; + PrintStat(); + _Exit(-10); + } + CHECK(keys_.size() == key_num_); + keys_stat_[key] = false; + } + + private: + Mutex mu_; + uint32_t key_num_; + std::map keys_; // key, update time + std::map keys_stat_; // key, reading or not + std::vector index_; + std::vector cfs_; }; KeySet g_key_set; void ReaderCallBack(RowReader* reader) { - const ErrorCode& error_code = reader->GetError(); - if (error_code.GetType() == ErrorCode::kOK) { - //std::cout << reader->RowName() << "\t" - // << reader->Timestamp() << std::endl; - if (reader->Qualifier() != "" && reader->RowName() != reader->Qualifier()) { - LOG(ERROR) << "CONSISTENCY ERROR: rowkey[" << reader->RowName() - << "] vs qualifier[" << reader->Qualifier() << "]"; - _Exit(-11); - } - g_key_set.UpdateTime(reader->RowName(), reader->Timestamp()); - r_succ.Inc(); - } else if (error_code.GetType() != ErrorCode::kNotFound) { - //LOG(ERROR) << "exception occured, reason:" << error_code.GetReason() - // << ", key: " << reader->RowName(); - } else { - r_succ.Inc(); + const ErrorCode& error_code = reader->GetError(); + if (error_code.GetType() == ErrorCode::kOK) { + // std::cout << reader->RowName() << "\t" + // << reader->Timestamp() << std::endl; + if (reader->Qualifier() != "" && reader->RowName() != reader->Qualifier()) { + LOG(ERROR) << "CONSISTENCY ERROR: rowkey[" << reader->RowName() << "] vs qualifier[" + << reader->Qualifier() << "]"; + _Exit(-11); } - g_key_set.SetKeyStatus(reader->RowName(), false); - r_total.Inc(); - r_pending.Dec(); - delete reader; + g_key_set.UpdateTime(reader->RowName(), reader->Timestamp()); + r_succ.Inc(); + } else if (error_code.GetType() != ErrorCode::kNotFound) { + // LOG(ERROR) << "exception occured, reason:" << error_code.GetReason() + // << ", key: " << reader->RowName(); + } else { + r_succ.Inc(); + } + g_key_set.SetKeyStatus(reader->RowName(), false); + r_total.Inc(); + r_pending.Dec(); + delete reader; } void MutationCallBack(RowMutation* mutation) { - const ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != ErrorCode::kOK) { - //LOG(ERROR) << "exception occured, reason:" << error_code.GetReason() - // << ", key: " << mutation->RowKey(); - } else { - w_succ.Inc(); - } - w_total.Inc(); - w_pending.Dec(); - delete mutation; + const ErrorCode& error_code = mutation->GetError(); + if (error_code.GetType() != ErrorCode::kOK) { + // LOG(ERROR) << "exception occured, reason:" << error_code.GetReason() + // << ", key: " << mutation->RowKey(); + } else { + w_succ.Inc(); + } + w_total.Inc(); + w_pending.Dec(); + delete mutation; } -std::string RandomNumString(int32_t size){ - std::stringstream ss; - for(int i = 0; i != size; ++i) { - ss << rand() % 10; - } - return ss.str(); +std::string RandomNumString(int32_t size) { + std::stringstream ss; + for (int i = 0; i != size; ++i) { + ss << rand() % 10; + } + return ss.str(); } int32_t RWConsistencyTest(int32_t argc, char** argv, ErrorCode* err) { - if (FLAGS_table.empty()) { - Usage(argv[0]); - return -1; + if (FLAGS_table.empty()) { + Usage(argv[0]); + return -1; + } + + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -2; + } + + std::string tablename = FLAGS_table; + Table* table = client->OpenTable(tablename, err); + if (table == NULL) { + LOG(ERROR) << "fail to open table: " << tablename; + return -3; + } + + LOG(INFO) << "Write " << FLAGS_key_set_size << " keys to " << FLAGS_table << " total."; + + g_key_set.Init(FLAGS_key_set_size); + + uint64_t last_print_time = 0; + launch_time.Set(get_micros()); + for (int64_t cnt = 0; cnt < FLAGS_row_num; ++cnt) { + uint64_t cur_ts = get_micros(); + std::string rowkey; + std::string cf; + std::string qu; + std::string value; + + // write + if (w_pending.Get() < FLAGS_pending_num) { + rowkey = g_key_set.RandKey(); + cf = g_key_set.RandCF(); + qu = rowkey; + value = RandomNumString(FLAGS_value_size); + RowMutation* mutation = table->NewRowMutation(rowkey); + mutation->Put(cf, qu, value); + mutation->SetCallBack(MutationCallBack); + table->ApplyMutation(mutation); + w_pending.Inc(); + } else { + usleep(FLAGS_pending_sleep_interval * 1000); } - Client* client = Client::NewClient(FLAGS_flagfile, NULL); - if (client == NULL) { - LOG(ERROR) << "client instance not exist"; - return -2; + // read + rowkey = g_key_set.RandIdleKey(); + if (!rowkey.empty() && r_pending.Get() < FLAGS_pending_num) { + RowReader* reader = table->NewRowReader(rowkey); + reader->SetCallBack(ReaderCallBack); + table->Get(reader); + g_key_set.SetKeyStatus(rowkey, true); + r_pending.Inc(); } - - std::string tablename = FLAGS_table; - Table* table = client->OpenTable(tablename, err); - if (table == NULL) { - LOG(ERROR) << "fail to open table: " << tablename; - return -3; + // while (r_pending.Get() > 10000) { + // usleep(100000); + // } + + // print + if (cur_ts > last_print_time + 1000000) { + PrintStat(); + last_print_time = cur_ts; } + } - LOG(INFO) << "Write " << FLAGS_key_set_size << " keys to " << FLAGS_table - << " total."; - - g_key_set.Init(FLAGS_key_set_size); - - uint64_t last_print_time = 0; - launch_time.Set(get_micros()); - for (int64_t cnt = 0; cnt < FLAGS_row_num; ++cnt) { - uint64_t cur_ts = get_micros(); - std::string rowkey; - std::string cf; - std::string qu; - std::string value; - - // write - if (w_pending.Get() < FLAGS_pending_num) { - rowkey = g_key_set.RandKey(); - cf = g_key_set.RandCF(); - qu = rowkey; - value = RandomNumString(FLAGS_value_size); - RowMutation* mutation = table->NewRowMutation(rowkey); - mutation->Put(cf, qu, value); - mutation->SetCallBack(MutationCallBack); - table->ApplyMutation(mutation); - w_pending.Inc(); - } else { - usleep(FLAGS_pending_sleep_interval * 1000); - } - - // read - rowkey = g_key_set.RandIdleKey(); - if (!rowkey.empty() && r_pending.Get() < FLAGS_pending_num) { - RowReader* reader = table->NewRowReader(rowkey); - reader->SetCallBack(ReaderCallBack); - table->Get(reader); - g_key_set.SetKeyStatus(rowkey, true); - r_pending.Inc(); - } - // while (r_pending.Get() > 10000) { - // usleep(100000); - // } - - // print - if (cur_ts > last_print_time + 1000000) { - PrintStat(); - last_print_time = cur_ts; - } - } + while (w_pending.Get() > 0 || r_pending.Get() > 0) { + usleep(1000000); + LOG(INFO) << "wait r_pending: " << r_pending.Get() << ", w_pending: " << w_pending.Get(); + } - while (w_pending.Get() > 0 || r_pending.Get() > 0) { - usleep(1000000); - LOG(INFO) << "wait r_pending: " << r_pending.Get() - << ", w_pending: " << w_pending.Get(); - } - - delete table; - delete client; - return 0; + delete table; + delete client; + return 0; } int32_t SharedTableImplTask(Client* client, ErrorCode* err) { - std::string tablename = FLAGS_table; - Table* table = client->OpenTable(tablename, err); - if (table == NULL) { - LOG(ERROR) << "fail to open table: " << tablename; - return -1; - } - delete table; - return 0; + std::string tablename = FLAGS_table; + Table* table = client->OpenTable(tablename, err); + if (table == NULL) { + LOG(ERROR) << "fail to open table: " << tablename; + return -1; + } + delete table; + return 0; } int32_t SharedTableImplTest(int32_t argc, char** argv, ErrorCode* err) { - if (FLAGS_table.empty()) { - Usage(argv[0]); - return -1; - } - - Client* client = Client::NewClient(FLAGS_flagfile, NULL); - if (client == NULL) { - LOG(ERROR) << "client instance not exist"; - return -2; - } - - ThreadPool thread_pool(100); - for (int i = 0; i < 1000000; ++i) { - ThreadPool::Task task = - std::bind(&SharedTableImplTask, client, err); - thread_pool.AddTask(task); - } - while (thread_pool.PendingNum() > 0) { - std::cerr << get_time_str(time(NULL)) << " " - << "waiting for test finish, pending " << thread_pool.PendingNum() - << " tasks ..." << std::endl; - sleep(1); - } - thread_pool.Stop(true); - delete client; - return 0; + if (FLAGS_table.empty()) { + Usage(argv[0]); + return -1; + } + + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -2; + } + + ThreadPool thread_pool(100); + for (int i = 0; i < 1000000; ++i) { + ThreadPool::Task task = std::bind(&SharedTableImplTask, client, err); + thread_pool.AddTask(task); + } + while (thread_pool.PendingNum() > 0) { + std::cerr << get_time_str(time(NULL)) << " " + << "waiting for test finish, pending " << thread_pool.PendingNum() << " tasks ..." + << std::endl; + sleep(1); + } + thread_pool.Stop(true); + delete client; + return 0; } int ExecuteCommand(int argc, char* argv[]) { - int ret = 0; - ErrorCode error_code; - if (argc <= 1) { - Usage(argv[0]); - return 0; - } - std::string cmd = argv[1]; - if (cmd == "rw-consistency-test") { - ret = RWConsistencyTest(argc, argv, &error_code); - } else if (cmd == "shared-tableimpl-test") { - ret = SharedTableImplTest(argc, argv, &error_code); - } else if (cmd == "version") { - PrintSystemVersion(); - ret = 0; - } else { - Usage(argv[0]); - return -1; - } - return ret; + int ret = 0; + ErrorCode error_code; + if (argc <= 1) { + Usage(argv[0]); + return 0; + } + std::string cmd = argv[1]; + if (cmd == "rw-consistency-test") { + ret = RWConsistencyTest(argc, argv, &error_code); + } else if (cmd == "shared-tableimpl-test") { + ret = SharedTableImplTest(argc, argv, &error_code); + } else if (cmd == "version") { + PrintSystemVersion(); + ret = 0; + } else { + Usage(argv[0]); + return -1; + } + return ret; } int main(int argc, char* argv[]) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - int ret = ExecuteCommand(argc, argv); - return ret; + ::google::ParseCommandLineFlags(&argc, &argv, true); + int ret = ExecuteCommand(argc, argv); + return ret; } diff --git a/src/teracli_main.cc b/src/teracli_main.cc index 0e6ac4241..c7c58e997 100644 --- a/src/teracli_main.cc +++ b/src/teracli_main.cc @@ -1,7 +1,7 @@ // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// + #include #include #include @@ -53,7 +53,6 @@ DECLARE_string(log_dir); DECLARE_string(tera_master_meta_table_name); DECLARE_string(tera_zk_addr_list); DECLARE_string(tera_zk_root_path); -DECLARE_bool(tera_sdk_batch_scan_enabled); DECLARE_int64(tera_sdk_status_timeout); DECLARE_string(tera_leveldb_env_type); @@ -79,6 +78,7 @@ DEFINE_string(tablets_file, "", "tablet set file"); DEFINE_bool(readable, true, "readable input"); DEFINE_bool(printable, true, "printable output"); DEFINE_bool(print_data, true, "is print data when scan"); +DEFINE_bool(print_count, false, "is print count data when scan"); DEFINE_bool(rowkey_count, false, "is print rowkey count when scan"); // using FLAGS instead of isatty() for compatibility @@ -94,10 +94,20 @@ DEFINE_bool(override, false, "dfs put file override the existing one"); DEFINE_bool(attribute, false, "dfs list file detail attribute"); DEFINE_bool(recursive, false, "dfs remove file recursively"); +// tablet num when create hash table +DEFINE_int64(hash_num, 1, + "tablet num when create hash table, " + "divide key space [0x0000000000000000, 0xFFFFFFFFFFFFFFF] into " + "'hash_num' equal parts"); +DEFINE_bool(convert_hash_key, true, "convert hash key when find tablet"); + +// batch mutation op max limit +DEFINE_int64(max_mutation_limit, 16 << 10, "max mutation operators for one batch"); + volatile int32_t g_start_time = 0; volatile int32_t g_end_time = 0; volatile int32_t g_used_time = 0; -volatile int32_t g_last_time = 0; +volatile int32_t g_last_time = 0; volatile int64_t g_total_size = 0; volatile int32_t g_key_num = 0; Mutex g_stat_lock; @@ -110,4091 +120,3995 @@ using namespace tera; typedef std::shared_ptr
TablePtr; typedef std::shared_ptr TableImplPtr; -typedef std::map CommandTable; +typedef std::map CommandTable; // FileSystem command table -typedef std::map FSCommandTable; -//typedef std::map > FSCommandTable; +typedef std::map FSCommandTable; +// typedef std::map > FSCommandTable; /// global variables of single-row-txn used in interactive mode tera::Transaction* g_row_txn = NULL; Table* g_row_txn_table = NULL; leveldb::Dfs* g_dfs = NULL; -static CommandTable& GetCommandTable(){ - static CommandTable command_table; - return command_table; +static CommandTable& GetCommandTable() { + static CommandTable command_table; + return command_table; } static FSCommandTable& GetFSCommandTable() { - static FSCommandTable fs_command_table; - return fs_command_table; + static FSCommandTable fs_command_table; + return fs_command_table; } const char* builtin_cmd_list[] = { "create", - "create [] \n\ - - schema syntax (all properties are optional): \n\ - tablename { \n\ - lg1 { \n\ - cf1 , \n\ - cf2...}, \n\ - lg2... \n\ - } \n\ - - kv mode schema: \n\ - tablename \n\ - - simple mode schema: \n\ - tablename{cf1, cf2, cf3, ...}", - - "createbyfile", - "createbyfile []", + "create [] [<--hash_num=n>] \n\ + - schema syntax (all properties are optional): \n\ + tablename { \n\ + lg1 { \n\ + cf1 , \n\ + cf2...}, \n\ + lg2... \n\ + } \n\ + - kv mode schema: \n\ + tablename \n\ + - simple mode schema: \n\ + tablename{cf1, cf2, cf3, ...} \n\ + - hash mode schema: \n\ + tablename [--hash_num=n]", + + "createbyfile", "createbyfile []", "update", - "update \n\ - - kv schema: \n\ - e.g. tablename \n\ - - table schema: \n\ - - update properties \n\ - e.g. tablename \n\ - e.g. tablename{lg0{cf0}} \n\ - e.g. tablename{lg0{cf0}} \n\ - - add new cf \n\ - e.g. tablename{lg0{cf0,new_cf}}\n\ - - delete cf \n\ + "update \n\ + - kv schema: \n\ + e.g. tablename \n\ + - table schema: \n\ + - update properties \n\ + e.g. tablename \n\ + e.g. tablename{lg0{cf0}} \n\ + e.g. tablename{lg0{cf0}} \n\ + - add new cf \n\ + e.g. tablename{lg0{cf0,new_cf}} \n\ + - delete cf \n\ e.g. tablename{lg0{cf0}}", "update-check", - "update-check \n\ + "update-check \n\ check status of last online-schema-update", - "enable", - "enable ", + "enable", "enable ", - "disable", - "disable ", + "disable", "disable ", - "drop", - "drop ", + "drop", "drop ", - "put", - "put [] ", + "put", "put [] ", "put-ttl", - "put-ttl [] ", + "put-ttl [] " + "", - "putif", - "putif [] ", + "putif", "putif [] ", - "get", - "get []", + "get", "get []", "scan", - "scan[allv] [<\"cf1|cf2\">] \n\ - scan table from startkey to endkey. \n\ + "scan[allv] [<\"cf1|cf2\">] \n\ + scan table from startkey to endkey. \n\ (return all qulifier version when using suffix \"allv\")", "delete", - "delete[1v] [] \n\ - delete row/columnfamily/qualifiers. \n\ + "delete[1v] [] \n\ + delete row/columnfamily/qualifiers. \n\ (only delete latest version when using suffix \"1v\")", "put_counter", - "put_counter [] ", + "put_counter [] " + "", - "get_counter", - "get_counter []", + "get_counter", "get_counter []", "add", - "add delta \n\ + "add delta \n\ add 'delta'(int64_t) to specified cell", "putint64", - "putint64 [] ", + "putint64 [] " + "", - "getint64", - "getint64 []", + "getint64", "getint64 []", "addint64", - "addint64 delta \n\ + "addint64 delta \n\ add 'delta'(int64_t) to specified cell", - "append", - "append [] ", + "append", "append [] ", "batchput", - "batchput ", + "batchput [use_batch_mutation=true|false], \n\ + when use_batch_mutation=true, flag --max_mutation_limit can \n\ + limit one batch_mutation opterators. \n\ + \n\ + one row format: rowkey cf_name:qu_name value", - "batchget", - "batchget ", + "batchget", "batchget ", "show", - "show[x] [] \n\ - show table list or tablets info. \n\ + "show[x] [] \n\ + show table list or tablets info. \n\ (show more detail when using suffix \"x\")", "showschema", - "showschema[x] \n\ + "showschema[x] \n\ show table schema (show more detail when using suffix \"x\")", "showts", - "showts[x] [] \n\ - show all tabletnodes or single tabletnode info. \n\ + "showts[x] [] \n\ + show all tabletnodes or single tabletnode info. \n\ (show more detail when using suffix \"x\")", "range", - "range[x] \n\ - get all tablets range. \n\ - --reorder_tablets=true ordered tablets by ts addr \n\ + "range[x] \n\ + get all tablets range. \n\ + --reorder_tablets=true ordered tablets by ts addr \n\ (show more detail when using suffix \"x\")", "txn", - "txn \n\ - start \n\ - commit \n\ + "txn \n\ + start \n\ + commit \n\ (only support single row transaction)", "cas", - "cas \n\ - Compare and set a value atomically. (The txn value of table schema must be 'on') \n\ - This command will compare the value at rowkey:columnfamily:qualifier with : \n\ - -> equal : put to this location. \n\ + "cas \n\ + Compare and set a value atomically. (The txn value of table schema must be 'on') \n\ + This command will compare the value at rowkey:columnfamily:qualifier with : \n\ + -> equal : put to this location. \n\ -> not equal: do nothing.", "user", - "user \n\ - create \n\ - changepwd \n\ - show \n\ - delete \n\ - addtogroup \n\ + "user \n\ + create \n\ + changepwd \n\ + show \n\ + delete \n\ + addtogroup \n\ deletefromgroup ", "tablet", - "tablet \n\ - move \n\ - movex \n\ - * only for force move tablet ignore error \n\ - reload \n\ - force to unload and load on the same ts \n\ - reloadx \n\ - force to unload and load on the same ts \n\ - * only for force reload tablet ignore error \n\ - lg_list : lg1:lg2:lg3 \n\ - compact \n\ - split \n\ - merge \n\ + "tablet \n\ + move \n\ + movex \n\ + * only for force move tablet ignore error \n\ + reload \n\ + force to unload and load on the same ts \n\ + reloadx \n\ + force to unload and load on the same ts \n\ + * only for force reload tablet ignore error \n\ + lg_list : lg1:lg2:lg3 \n\ + compact \n\ + split \n\ + merge \n\ scan ", "compact", - "compact [--lg=] [--concurrency=] [--tablets_file=] \n\ - run manual compaction on a table. \n\ - --lg: only run compact on specified lg number. \n\ - --concurrency: compacting tablets number at the same time. \n\ + "compact [--lg=] [--concurrency=] [--tablets_file=] \n\ + run manual compaction on a table. \n\ + --lg: only run compact on specified lg number. \n\ + --concurrency: compacting tablets number at the same time. \n\ --tablets_file: specify tablet set, one tablet_path each line.", "safemode", - "safemode [get|enter|leave]", + "safemode [get|enter|leave|leave keep_time] \n\ + leave [keep_time]: leave safemode and keep in running status at \n\ + least this period(minutes), default 0 \n\ + enter [keep_time]: enter safemode and keep in safemode at most this period(minutes), \n\ + or renew safemode lease for another keep_time minutes if already in safemode,\n\ + default 2880 minutes(2days)", - "meta", - "meta[2] [backup|check|repair|show] \n\ - meta for master memory, meta2 for meta table.", + "meta", "meta[2] has moved to metacli.", "findmaster", - "findmaster \n\ + "findmaster \n\ find the address of master", "findts", - "findts [rowkey] \n\ - find the specify tabletnode serving 'rowkey'. \n\ + "findts [rowkey] \n\ + find the specify tabletnode serving 'rowkey'. \n\ if 'rowkey' is omited, read from stdin with one rowkey per line.", "reload", - "reload config hostname:port \n\ - notify master | ts reload flag file \n\ + "reload config hostname:port \n\ + notify master | ts reload flag file \n\ *** at your own risk ***", "kick", - "kick hostname:port \n\ - ask master to kick a tabletserver \n\ + "kick hostname:port \n\ + ask master to kick a tabletserver, \n\ + will kick fail if master in safemode \n\ *** at your own risk ***", + "forcekick", + "forcekick hostname:port \n\ + ask master to kick a tabletserver, \n\ + always kick despite master in safemode \n\ + *** at your own risk ***", "findtablet", - "findtablet \n\ - ", - + "findtablet [--convert_hash_key=] \n\ + [--convert_hash_key=]", + "stat", - "stat \n\ - corruption [tabletnode] [tabletpath] \n\ - -- show tablet load or compaction corruption detail message \n\ - \n\ - example: \n\ - ./teracli stat corruption hostname::port t1/tablet00000001", + "stat corruption [filters] \n\ + -- show tablet load or compaction corruption messages \n\ + \n\ + filters: \n\ + [type=Load|Comp] \n\ + * 'Load' means corruption during loading \n\ + * 'Comp' means corrutions during compaction \n\ + [ts=target_ts:ts_port] \n\ + [table=target_tablename] \n\ + [tablet=target_tablet] \n\ + [time_range=start_time,end_time] \n\ + * time format: %4d%2d%2d-%d:%d:%d \n\ + [detail=true|false] \n\ + * default true \n\ + example: \n\ + ./teracli stat corruption \n\ + ./teracli stat corruption type=Load detail=false \n\ + ./teracli stat corruption type=Comp ts=host1:port \n\ + ./teracli stat corruption ts=host1:port table=table1 \n\ + ./teracli stat corruption time_range=20180530-13:58:58,20180630-15:8:57", "cookie", - "cookie \n\ - dump cookie-file -- dump contents of specified files \n\ + "cookie \n\ + dump cookie-file -- dump contents of specified files \n\ findkey cookie-file key -- find a key's info", "help", - "help [cmd] \n\ + "help [cmd] \n\ show manual for a or all cmd(s)", "dfs", - "dfs [cmd] args \n\ - mkdir $NFS_PATH \n\ - touchz $NFS_PATH \n\ - test [-e|-z|-d] $NFS_PATH \n\ - get $NFS_PATH $LOCAL_PATH \n\ - put [--override] $LOCAL_PATH $NFS_PATH \n\ - ls [--attribute] $NFS_PATH \n\ - lsr [--attribute] $NFS_PATH \n\ - dus $NFS_PATH \n\ - rm [--recursive] $NFS_PATH \n\ - stat $NFS_PATH \n\ - rename $NFS_PATH_SRC $NFS_PATH_DEST \n\ - unlockdir $NFS_PATH \n\ - checksum $NFS_PATH $OFFSET $LENGTH \n\ + "dfs [cmd] args \n\ + mkdir $NFS_PATH \n\ + touchz $NFS_PATH \n\ + test [-e|-z|-d] $NFS_PATH \n\ + get $NFS_PATH $LOCAL_PATH \n\ + put [--override] $LOCAL_PATH $NFS_PATH \n\ + ls [--attribute] $NFS_PATH \n\ + lsr [--attribute] $NFS_PATH \n\ + dus $NFS_PATH \n\ + rm [--recursive] $NFS_PATH \n\ + stat $NFS_PATH \n\ + rename $NFS_PATH_SRC $NFS_PATH_DEST \n\ + unlockdir $NFS_PATH \n\ + checksum $NFS_PATH $OFFSET $LENGTH \n\ forcerelease $NFS_PATH", "gtxn", - "gtxn \n\ - complete a global transaction with input from std input or file. \n\ - pay attention to set flags '--tera_sdk_client_for_gtxn' and \n\ - '--tera_sdk_tso_client_enabled' equal to true. \n\ - \n\ - input file format: \n\ - BEGIN \n\ - statement [\\n statement] ... \n\ - COMMIT \n\ - \n\ - statement: \n\ - PUT|GET|DEL [put_value] \n\ - \n\ - example: cat input_file | ./teracli gtxn \n\ - \n\ - input_file example: \n\ - BEGIN \n\ - PUT table1 key1 cf1:qu1 value1 \n\ - DEL table2 key2 cf2:qu2 \n\ - GET table3 key3 cf3:qu3 \n\ + "gtxn \n\ + complete a global transaction with input from std input or file. \n\ + pay attention to set flags '--tera_sdk_client_for_gtxn' and \n\ + '--tera_sdk_tso_client_enabled' equal to true. \n\ + \n\ + input file format: \n\ + BEGIN \n\ + statement [\\n statement] ... \n\ + COMMIT \n\ + \n\ + statement: \n\ + PUT|GET|DEL [put_value] \n\ + \n\ + example: cat input_file | ./teracli gtxn \n\ + \n\ + input_file example: \n\ + BEGIN \n\ + PUT table1 key1 cf1:qu1 value1 \n\ + DEL table2 key2 cf2:qu2 \n\ + GET table3 key3 cf3:qu3 \n\ COMMIT", - + "quota", + "quota [args] \n\ + show show brief quotas info \n\ + showx \n\ + show detailed ts all tables' quotas info \n\ + set \n\ + limit_args: WRITEREQS|WRITEBYTES|READREQS| \n\ + READBYTES|SCANREQS|SCANBYTES=[limit]/[period] \n\ + limit unit is bytes and period unit is seconds, \n\ + limit = -1 means unlimited, doesn't set period means 1s \n\ + e.g. quota set test WRITEREQS=1000/2 READBYTES=4000/3 SCANREQS=100 SCANBYTES=-1", "version", - "version \n\ + "version \n\ show version info", + "hash", + "hash key \n\ + caculate hashed key with default murmurhash method", }; static void PrintCmdHelpInfo(const char* msg) { - if (msg == NULL) { - return; - } - int count = sizeof(builtin_cmd_list)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if(strncmp(msg, builtin_cmd_list[i], 32) == 0) { - std::cout << builtin_cmd_list[i + 1] << std::endl; - return; - } - } + if (msg == NULL) { + return; + } + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (strncmp(msg, builtin_cmd_list[i], 32) == 0) { + std::cout << builtin_cmd_list[i + 1] << std::endl; + return; + } + } } -static void PrintCmdHelpInfo(const std::string& msg) { - PrintCmdHelpInfo(msg.c_str()); -} +static void PrintCmdHelpInfo(const std::string& msg) { PrintCmdHelpInfo(msg.c_str()); } static void PrintAllCmd() { - std::cout << "there is cmd list:" << std::endl; - int count = sizeof(builtin_cmd_list)/sizeof(char*); - bool newline = false; - for (int i = 0; i < count; i+=2) { - std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; - if (newline) { - std::cout << std::endl; - newline = false; - } else { - newline = true; - } + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(builtin_cmd_list) / sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i += 2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; } + } - std::cout << std::endl << "help [cmd] for details." << std::endl; + std::cout << std::endl + << "help [cmd] for details." << std::endl; } // return false if similar command(s) not found static bool PromptSimilarCmd(const char* msg) { - if (msg == NULL) { - return false; - } - bool found = false; - int64_t len = strlen(msg); - int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); - int count = sizeof(builtin_cmd_list)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { - if (!found) { - std::cout << "Did you mean:" << std::endl; - found = true; - } - std::cout << " " << builtin_cmd_list[i] << std::endl; - } - } - return found; + if (msg == NULL) { + return false; + } + bool found = false; + int64_t len = strlen(msg); + int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); + int count = sizeof(builtin_cmd_list) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { + if (!found) { + std::cout << "Did you mean:" << std::endl; + found = true; + } + std::cout << " " << builtin_cmd_list[i] << std::endl; + } + } + return found; } static void PrintUnknownCmdHelpInfo(const char* msg) { - if (msg != NULL) { - std::cout << "'" << msg << "' is not a valid command." << std::endl << std::endl; - } - if ((msg != NULL) - && PromptSimilarCmd(msg)) { - return; - } - PrintAllCmd(); + if (msg != NULL) { + std::cout << "'" << msg << "' is not a valid command." << std::endl + << std::endl; + } + if ((msg != NULL) && PromptSimilarCmd(msg)) { + return; + } + PrintAllCmd(); } int32_t CreateOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - TableDescriptor table_desc; - std::vector delimiters; - std::string schema = argv[2]; - if (!ParseTableSchema(schema, &table_desc, err)) { - LOG(ERROR) << "fail to parse input table schema."; - return -1; - } - if (argc == 4) { - // have tablet delimiters - if (!ParseDelimiterFile(argv[3], &delimiters)) { - LOG(ERROR) << "fail to parse delimiter file."; - return -1; - } - } else if (argc > 4) { - PrintCmdHelpInfo("create"); - return -1; - } - if (!client->CreateTable(table_desc, delimiters, err)) { - LOG(ERROR) << "fail to create table, " << err->ToString(); - return -1; + TableDescriptor table_desc; + std::vector delimiters; + std::string schema = argv[2]; + if (!ParseTableSchema(schema, &table_desc, err)) { + LOG(ERROR) << "fail to parse input table schema."; + return -1; + } + if (argc == 4) { + // have tablet delimiters + if (!ParseDelimiterFile(argv[3], &delimiters)) { + LOG(ERROR) << "fail to parse delimiter file."; + return -1; + } + } else if (argc > 4) { + PrintCmdHelpInfo("create"); + return -1; + } else { + if (table_desc.IsHashEnabled() && FLAGS_hash_num > 1) { + GenerateHashDelimiters(FLAGS_hash_num, &delimiters); } - ShowTableDescriptor(table_desc); - return 0; + } + if (!client->CreateTable(table_desc, delimiters, err)) { + LOG(ERROR) << "fail to create table, " << err->ToString(); + return -1; + } + ShowTableDescriptor(table_desc); + return 0; } int32_t CreateByFileOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - TableDescriptor table_desc; - if (!ParseTableSchemaFile(argv[2], &table_desc, err)) { - LOG(ERROR) << "fail to parse input table schema."; - return -1; - } + TableDescriptor table_desc; + if (!ParseTableSchemaFile(argv[2], &table_desc, err)) { + LOG(ERROR) << "fail to parse input table schema."; + return -1; + } - std::vector delimiters; - if (argc == 4) { - // have tablet delimiters - if (!ParseDelimiterFile(argv[3], &delimiters)) { - LOG(ERROR) << "fail to parse delimiter file."; - return -1; - } - } else if (argc > 4) { - LOG(ERROR) << "too many args: " << argc; - return -1; + std::vector delimiters; + if (argc == 4) { + // have tablet delimiters + if (!ParseDelimiterFile(argv[3], &delimiters)) { + LOG(ERROR) << "fail to parse delimiter file."; + return -1; } - if (!client->CreateTable(table_desc, delimiters, err)) { - LOG(ERROR) << "fail to create table, " << err->ToString(); - return -1; - } - ShowTableDescriptor(table_desc); - return 0; + } else if (argc > 4) { + LOG(ERROR) << "too many args: " << argc; + return -1; + } + if (!client->CreateTable(table_desc, delimiters, err)) { + LOG(ERROR) << "fail to create table, " << err->ToString(); + return -1; + } + ShowTableDescriptor(table_desc); + return 0; } int32_t UpdateCheckOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - bool done = false; - if (!client->UpdateCheck(argv[2], &done, err)) { - std::cerr << err->ToString() << std::endl; - return -1; - } - std::cout << "update " << (done ? "successed" : "is running...") << std::endl; - return 0; + if (argc != 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + bool done = false; + if (!client->UpdateCheck(argv[2], &done, err)) { + std::cerr << err->ToString() << std::endl; + return -1; + } + std::cout << "update " << (done ? "successed" : "is running...") << std::endl; + return 0; } int32_t UpdateOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - std::string schema = argv[2]; - PropTree schema_tree; - if (!schema_tree.ParseFromString(schema)) { - LOG(ERROR) << "[update] invalid schema: " << schema; - LOG(ERROR) << "[update] state: " << schema_tree.State(); - return -1; - } - std::string tablename = schema_tree.GetRootNode()->name_; - TableDescriptor* table_desc = client->GetTableDescriptor(tablename, err); - if (table_desc == NULL) { - LOG(ERROR) << "[update] can't get the TableDescriptor of table: " << tablename; - return -1; - } + if (argc != 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + std::string schema = argv[2]; + PropTree schema_tree; + if (!schema_tree.ParseFromString(schema)) { + LOG(ERROR) << "[update] invalid schema: " << schema; + LOG(ERROR) << "[update] state: " << schema_tree.State(); + return -1; + } + std::string tablename = schema_tree.GetRootNode()->name_; + TableDescriptor* table_desc = client->GetTableDescriptor(tablename, err); + if (table_desc == NULL) { + LOG(ERROR) << "[update] can't get the TableDescriptor of table: " << tablename; + return -1; + } - if (!UpdateTableDescriptor(schema_tree, table_desc, err)) { - LOG(ERROR) << "[update] update failed"; - return -1; - } + if (!UpdateTableDescriptor(schema_tree, table_desc, err)) { + LOG(ERROR) << "[update] update failed"; + return -1; + } - if (!client->UpdateTable(*table_desc, err)) { - LOG(ERROR) << "[update] fail to update table, " << err->ToString(); - return -1; - } - ShowTableDescriptor(*table_desc); - delete table_desc; - return 0; + if (!client->UpdateTable(*table_desc, err)) { + LOG(ERROR) << "[update] fail to update table, " << err->ToString(); + return -1; + } + ShowTableDescriptor(*table_desc); + delete table_desc; + return 0; } int32_t DropOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string tablename = argv[2]; - if (!client->DeleteTable(tablename, err)) { - LOG(ERROR) << "fail to delete table, " << err->ToString(); - return -1; - } - return 0; + std::string tablename = argv[2]; + if (!client->DeleteTable(tablename, err)) { + LOG(ERROR) << "fail to delete table, " << err->ToString(); + return -1; + } + return 0; } int32_t EnableOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string tablename = argv[2]; - if (!client->EnableTable(tablename, err)) { - LOG(ERROR) << "fail to enable table"; - return -1; - } - return 0; + std::string tablename = argv[2]; + if (!client->EnableTable(tablename, err)) { + LOG(ERROR) << "fail to enable table"; + return -1; + } + return 0; } int32_t DisableOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string tablename = argv[2]; - if (!client->DisableTable(tablename, err)) { - LOG(ERROR) << "fail to disable table"; - return -1; - } - TableMeta table_meta; - TabletMetaList tablet_list; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, err)) { - LOG(ERROR) << "table not exist: " << tablename; - return -1; - } + std::string tablename = argv[2]; + if (!client->DisableTable(tablename, err)) { + LOG(ERROR) << "fail to disable table"; + return -1; + } - uint64_t tablet_num = tablet_list.meta_size(); - common::ProgressBar progress_bar(common::ProgressBar::ENHANCED, tablet_num, 100); - while (true) { - if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, err)) { - LOG(ERROR) << "table not exist: " << tablename; - return -1; - } - uint64_t tablet_cnt = 0; - for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - const TabletMeta& tablet = tablet_list.meta(i); - VLOG(10) << "tablet status: " << StatusCodeToString(tablet.status()); - if (tablet.status() == TabletMeta::kTabletDisable || - tablet.status() == TabletMeta::kTabletOffline) { - tablet_cnt++; - } - } - progress_bar.Refresh(tablet_cnt); - if (tablet_cnt == tablet_num) { - // disable finish - progress_bar.Done(); - break; - } - sleep(1); - } - return 0; + return 0; } -void ParseCfQualifier(const std::string& input, std::string* columnfamily, - std::string* qualifier, bool *has_qualifier = NULL) { - std::string::size_type pos = input.find(":", 0); - if (pos != std::string::npos) { - *columnfamily = input.substr(0, pos); - *qualifier = input.substr(pos + 1); - if (has_qualifier) { - *has_qualifier = true; - } - } else { - *columnfamily = input; - if (has_qualifier) { - *has_qualifier = false; - } - } +void ParseCfQualifier(const std::string& input, std::string* columnfamily, std::string* qualifier, + bool* has_qualifier = NULL) { + std::string::size_type pos = input.find(":", 0); + if (pos != std::string::npos) { + *columnfamily = input.substr(0, pos); + *qualifier = input.substr(pos + 1); + if (has_qualifier) { + *has_qualifier = true; + } + } else { + *columnfamily = input; + if (has_qualifier) { + *has_qualifier = false; + } + } } int32_t PutInt64Op(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - int64_t value_int; - if (!StringToNumber(value.c_str(), &value_int)) { - LOG(ERROR) << "invalid Integer number Got: " << value; - return -1; - } - if (!table->Put(rowkey, columnfamily, qualifier, value_int, err)) { - LOG(ERROR) << "fail to put record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + int64_t value_int; + if (!StringToNumber(value.c_str(), &value_int)) { + LOG(ERROR) << "invalid Integer number Got: " << value; + return -1; + } + if (!table->Put(rowkey, columnfamily, qualifier, value_int, err)) { + LOG(ERROR) << "fail to put record to table: " << tablename; + return -1; + } + return 0; } int32_t PutCounterOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - int64_t counter; - if (!StringToNumber(value.c_str(), &counter)) { - LOG(ERROR) << "invalid Integer number Got: " << value; - return -1; - } + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + int64_t counter; + if (!StringToNumber(value.c_str(), &counter)) { + LOG(ERROR) << "invalid Integer number Got: " << value; + return -1; + } - std::string s_counter = tera::CounterCoding::EncodeCounter(counter); - if (!table->Put(rowkey, columnfamily, qualifier, s_counter, err)) { - LOG(ERROR) << "fail to put record to table: " << tablename; - return -1; - } - return 0; + std::string s_counter = tera::CounterCoding::EncodeCounter(counter); + if (!table->Put(rowkey, columnfamily, qualifier, s_counter, err)) { + LOG(ERROR) << "fail to put record to table: " << tablename; + return -1; + } + return 0; } int32_t PutOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::unique_ptr mutation(table->NewRowMutation(rowkey)); - if (FLAGS_timestamp == -1) { - mutation->Put(columnfamily, qualifier, value); - } else { - mutation->Put(columnfamily, qualifier, FLAGS_timestamp, value); - } - if (g_row_txn != NULL) { - g_row_txn->ApplyMutation(mutation.get()); - } else { - table->ApplyMutation(mutation.get()); - } - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << mutation->GetError().ToString() << std::endl; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + + std::unique_ptr mutation(table->NewRowMutation(rowkey)); + if (FLAGS_timestamp == -1) { + mutation->Put(columnfamily, qualifier, value); + } else { + mutation->Put(columnfamily, qualifier, FLAGS_timestamp, value); + } + if (g_row_txn != NULL) { + g_row_txn->ApplyMutation(mutation.get()); + } else { + table->ApplyMutation(mutation.get()); + } + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << mutation->GetError().ToString() << std::endl; + return -1; + } + return 0; } int32_t PutTTLOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 6 && argc != 7) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 6 && argc != 7) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - int32_t ttl = -1; - if (argc == 6) { - // use table as kv - value = argv[4]; - ttl = atoi(argv[5].c_str()); - } else if (argc == 7) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - ttl = atoi(argv[6].c_str()); - } - if (!table->Put(rowkey, columnfamily, qualifier, value, ttl, err)) { - LOG(ERROR) << "fail to put record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + int32_t ttl = -1; + if (argc == 6) { + // use table as kv + value = argv[4]; + ttl = atoi(argv[5].c_str()); + } else if (argc == 7) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + ttl = atoi(argv[6].c_str()); + } + if (!table->Put(rowkey, columnfamily, qualifier, value, ttl, err)) { + LOG(ERROR) << "fail to put record to table: " << tablename; + return -1; + } + return 0; } int32_t AppendOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - if (!table->Append(rowkey, columnfamily, qualifier, value, err)) { - LOG(ERROR) << "fail to append record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + if (!table->Append(rowkey, columnfamily, qualifier, value, err)) { + LOG(ERROR) << "fail to append record to table: " << tablename; + return -1; + } + return 0; } int32_t PutIfAbsentOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - if (!table->PutIfAbsent(rowkey, columnfamily, qualifier, value, err)) { - LOG(ERROR) << "fail to put record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + if (!table->PutIfAbsent(rowkey, columnfamily, qualifier, value, err)) { + LOG(ERROR) << "fail to put record to table: " << tablename; + return -1; + } + return 0; } int32_t AddOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR)<< "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - int64_t delta; - if (!StringToNumber(value.c_str(), &delta)) { - LOG(ERROR) << "invalid Integer number Got: " << value; - return -1; - } - if (!table->Add(rowkey, columnfamily, qualifier, delta, err)) { - LOG(ERROR) << "fail to add record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + int64_t delta; + if (!StringToNumber(value.c_str(), &delta)) { + LOG(ERROR) << "invalid Integer number Got: " << value; + return -1; + } + if (!table->Add(rowkey, columnfamily, qualifier, delta, err)) { + LOG(ERROR) << "fail to add record to table: " << tablename; + return -1; + } + return 0; } int32_t AddInt64Op(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - LOG(ERROR)<< "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5 && argc != 6) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 5) { - // use table as kv - value = argv[4]; - } else if (argc == 6) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - value = argv[5]; - } - int64_t delta; - if (!StringToNumber(value.c_str(), &delta)) { - LOG(ERROR) << "invalid Integer number Got: " << value; - return -1; - } - if (!table->AddInt64(rowkey, columnfamily, qualifier, delta, err)) { - LOG(ERROR) << "fail to add record to table: " << tablename; - return -1; - } - return 0; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 5) { + // use table as kv + value = argv[4]; + } else if (argc == 6) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + value = argv[5]; + } + int64_t delta; + if (!StringToNumber(value.c_str(), &delta)) { + LOG(ERROR) << "invalid Integer number Got: " << value; + return -1; + } + if (!table->AddInt64(rowkey, columnfamily, qualifier, delta, err)) { + LOG(ERROR) << "fail to add record to table: " << tablename; + return -1; + } + return 0; } int32_t GetInt64Op(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc != 4 && argc != 5) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - int64_t value; - if (argc == 4) { - // use table as kv - } else if (argc == 5) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - } + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + int64_t value; + if (argc == 4) { + // use table as kv + } else if (argc == 5) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + } - if (!table->Get(rowkey, columnfamily, qualifier, &value, err, FLAGS_snapshot)) { - LOG(ERROR) << "fail to get record from table: " << tablename; - return -1; - } + if (!table->Get(rowkey, columnfamily, qualifier, &value, err, FLAGS_snapshot)) { + LOG(ERROR) << "fail to get record from table: " << tablename; + return -1; + } - std::cout << value << std::endl; - return 0; + std::cout << value << std::endl; + return 0; } std::string PrintableFormatter(const std::string& value) { - if (FLAGS_printable) { - return DebugString(value); - } else { - return value; - } + if (FLAGS_printable) { + return DebugString(value); + } else { + return value; + } } int32_t GetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 4 && argc != 5) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - std::unique_ptr reader(table->NewRowReader(rowkey)); - if (argc == 4) { - // use table as kv or get row - } else if (argc == 5) { - bool has_qu; - ParseCfQualifier(argv[4], &columnfamily, &qualifier, &has_qu); - if (has_qu) { - reader->AddColumn(columnfamily, qualifier); - } else { - reader->AddColumnFamily(columnfamily); - } - } - reader->SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); - if (g_row_txn != NULL) { - g_row_txn->Get(reader.get()); + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + std::unique_ptr reader(table->NewRowReader(rowkey)); + if (argc == 4) { + // use table as kv or get row + } else if (argc == 5) { + bool has_qu; + ParseCfQualifier(argv[4], &columnfamily, &qualifier, &has_qu); + if (has_qu) { + reader->AddColumn(columnfamily, qualifier); } else { - table->Get(reader.get()); - } - while (!reader->Done()) { - std::cout << PrintableFormatter(reader->RowName()) << ":" - << PrintableFormatter(reader->ColumnName()) << ":" - << reader->Timestamp() << ":" - << PrintableFormatter(reader->Value()) << std::endl; - reader->Next(); - } - if (reader->GetError().GetType() != tera::ErrorCode::kOK - && reader->GetError().GetType() != tera::ErrorCode::kNotFound) { - std::cout << reader->GetError().ToString() << std::endl; - return -1; - } - return 0; + reader->AddColumnFamily(columnfamily); + } + } + reader->SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); + if (g_row_txn != NULL) { + g_row_txn->Get(reader.get()); + } else { + table->Get(reader.get()); + } + while (!reader->Done()) { + std::cout << PrintableFormatter(reader->RowName()) << ":" + << PrintableFormatter(reader->ColumnName()) << ":" << reader->Timestamp() << ":" + << PrintableFormatter(reader->Value()) << std::endl; + reader->Next(); + } + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + std::cout << reader->GetError().ToString() << std::endl; + return -1; + } + return 0; } int32_t GetCounterOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - - std::string rowkey = argv[3]; - std::string columnfamily = ""; - std::string qualifier = ""; - std::string value; - if (argc == 4) { - // use table as kv - } else if (argc == 5) { - ParseCfQualifier(argv[4], &columnfamily, &qualifier); - } + if (argc != 4 && argc != 5) { + LOG(ERROR) << "args number error: " << argc << ", need 5 | 6."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - if (!table->Get(rowkey, columnfamily, qualifier, &value, err)) { - LOG(ERROR) << "fail to get record from table: " << tablename; - return -1; - } + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string rowkey = argv[3]; + std::string columnfamily = ""; + std::string qualifier = ""; + std::string value; + if (argc == 4) { + // use table as kv + } else if (argc == 5) { + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + } - int64_t counter = 0; - bool ret = tera::CounterCoding::DecodeCounter(value, &counter); - if (!ret) { - LOG(ERROR) << "invalid counter read, fail to parse"; - } else { - std::cout << counter << std::endl; - } - return 0; + if (!table->Get(rowkey, columnfamily, qualifier, &value, err)) { + LOG(ERROR) << "fail to get record from table: " << tablename; + return -1; + } + + int64_t counter = 0; + bool ret = tera::CounterCoding::DecodeCounter(value, &counter); + if (!ret) { + LOG(ERROR) << "invalid counter read, fail to parse"; + } else { + std::cout << counter << std::endl; + } + return 0; } - int32_t DeleteOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - PrintCmdHelpInfo("delete"); - return -1; - } - - std::string tablename = argv[2]; - std::string rowkey = argv[3]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 4 && argc != 5) { + PrintCmdHelpInfo("delete"); + return -1; + } - std::string op = argv[1]; - RowMutation* mutation = table->NewRowMutation(rowkey); - if (argc == 4) { - // delete a row - mutation->DeleteRow(); - } else if (argc == 5) { - // delete a family or column - std::string input(argv[4]); - if (input.find(":", 0) == std::string::npos) { - // delete a family - if (FLAGS_timestamp == -1) { - mutation->DeleteFamily(input); - } else { - mutation->DeleteFamily(input, FLAGS_timestamp); - } + std::string tablename = argv[2]; + std::string rowkey = argv[3]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string op = argv[1]; + RowMutation* mutation = table->NewRowMutation(rowkey); + if (argc == 4) { + // delete a row + mutation->DeleteRow(); + } else if (argc == 5) { + // delete a family or column + std::string input(argv[4]); + if (input.find(":", 0) == std::string::npos) { + // delete a family + if (FLAGS_timestamp == -1) { + mutation->DeleteFamily(input); + } else { + mutation->DeleteFamily(input, FLAGS_timestamp); + } + } else { + std::string family; + std::string qualifier; + ParseCfQualifier(input, &family, &qualifier); + if (op == "delete") { + // delete a column (all versions) + if (FLAGS_timestamp == -1) { + mutation->DeleteColumns(family, qualifier); } else { - std::string family; - std::string qualifier; - ParseCfQualifier(input, &family, &qualifier); - if (op == "delete") { - // delete a column (all versions) - if (FLAGS_timestamp == -1) { - mutation->DeleteColumns(family, qualifier); - } else { - mutation->DeleteColumns(family, qualifier, FLAGS_timestamp); - } - } else if (op == "delete1v") { - // delete the newest version - if (FLAGS_timestamp == -1) { - mutation->DeleteColumn(family, qualifier); - } else { - mutation->DeleteColumn(family, qualifier, FLAGS_timestamp); - } - } + mutation->DeleteColumns(family, qualifier, FLAGS_timestamp); } - } else { - LOG(FATAL) << "should not run here."; - } - if (g_row_txn != NULL) { - g_row_txn->ApplyMutation(mutation); - } else { - table->ApplyMutation(mutation); - } - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << mutation->GetError().ToString() << std::endl; - } - delete mutation; - return 0; + } else if (op == "delete1v") { + // delete the newest version + if (FLAGS_timestamp == -1) { + mutation->DeleteColumn(family, qualifier); + } else { + mutation->DeleteColumn(family, qualifier, FLAGS_timestamp); + } + } + } + } else { + LOG(FATAL) << "should not run here."; + } + if (g_row_txn != NULL) { + g_row_txn->ApplyMutation(mutation); + } else { + table->ApplyMutation(mutation); + } + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << mutation->GetError().ToString() << std::endl; + } + delete mutation; + return 0; } int32_t ScanRange(TablePtr& table, ScanDescriptor& desc, ErrorCode* err) { - desc.SetBufferSize(FLAGS_tera_client_scan_package_size << 10); - desc.SetAsync(FLAGS_tera_sdk_batch_scan_enabled); - desc.SetSnapshot(FLAGS_snapshot); - desc.SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); - - ResultStream* result_stream; - if ((result_stream = table->Scan(desc, err)) == NULL) { - LOG(ERROR) << "fail to scan records from table: " << table->GetName(); - return -7; - } - g_start_time = time(NULL); - - std::string last_key = ""; - int64_t found_num = 0; - while (!result_stream->Done(err)) { - if (result_stream->RowName() != last_key) { - found_num++; - } - last_key = result_stream->RowName(); - - int32_t len = result_stream->RowName().size() - + result_stream->ColumnName().size() - + sizeof(result_stream->Timestamp()) - + result_stream->Value().size(); - g_total_size += len; - g_key_num ++; - g_cur_batch_num ++; - if (FLAGS_print_data) { - std::cout << PrintableFormatter(result_stream->RowName()) << ":" + desc.SetBufferSize(FLAGS_tera_client_scan_package_size << 10); + desc.SetSnapshot(FLAGS_snapshot); + desc.SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); + + ResultStream* result_stream; + if ((result_stream = table->Scan(desc, err)) == NULL) { + LOG(ERROR) << "fail to scan records from table: " << table->GetName(); + return -7; + } + g_start_time = time(NULL); + + std::string last_key = ""; + int64_t found_num = 0; + uint64_t last_data = 0; + while (!result_stream->Done(err)) { + if (result_stream->RowName() != last_key) { + found_num++; + } + last_key = result_stream->RowName(); + + int32_t len = result_stream->RowName().size() + result_stream->ColumnName().size() + + sizeof(result_stream->Timestamp()) + result_stream->Value().size(); + g_total_size += len; + g_key_num++; + g_cur_batch_num++; + if (FLAGS_print_data) { + std::cout << PrintableFormatter(result_stream->RowName()) << ":" << PrintableFormatter(result_stream->ColumnName()) << ":" - << result_stream->Timestamp() << ":" - << PrintableFormatter(result_stream->Value()) << std::endl; - } - - result_stream->Next(); - if (g_cur_batch_num >= FLAGS_tera_client_batch_put_num) { - int32_t time_cur=time(NULL); - uint32_t time_used = time_cur - g_start_time; - LOG(INFO) << "Scaning " << g_key_num << " keys " << g_key_num/(time_used?time_used:1) - << " keys/S " << g_total_size/1024.0/1024/(time_used?time_used:1) << " MB/S "; - g_cur_batch_num = 0; - g_last_time = time_cur; - } - } - if (FLAGS_rowkey_count) { - std::cout << found_num << std::endl; - } - delete result_stream; - if (err->GetType() != ErrorCode::kOK) { - LOG(ERROR) << "fail to finish scan: " << err->ToString(); - return -1; - } - g_end_time = time(NULL); - g_used_time = g_end_time - g_start_time; - LOG(INFO) << "Scan done " << g_key_num << " keys " << g_key_num/(g_used_time?g_used_time:1) - <<" keys/S " << g_total_size/1024.0/1024/(g_used_time?g_used_time:1) << " MB/S "; - return 0; + << result_stream->Timestamp() << ":" << PrintableFormatter(result_stream->Value()) + << std::endl; + } + if (FLAGS_print_count) { + if (result_stream->GetDataSize() != last_data) { // print iff data changed + std::cout << "### scan count info: size_bytes=" << result_stream->GetDataSize() + << " row_number=" << result_stream->GetRowCount() + << " last_key=" << result_stream->GetLastKey() << " ###" << std::endl; + } + last_data = result_stream->GetDataSize(); + } + + result_stream->Next(); + if (g_cur_batch_num >= FLAGS_tera_client_batch_put_num) { + int32_t time_cur = time(NULL); + uint32_t time_used = time_cur - g_start_time; + LOG(INFO) << "Scaning " << g_key_num << " keys " << g_key_num / (time_used ? time_used : 1) + << " keys/S " << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) + << " MB/S "; + g_cur_batch_num = 0; + g_last_time = time_cur; + } + } + if (FLAGS_rowkey_count) { + std::cout << found_num << std::endl; + } + delete result_stream; + if (err->GetType() != ErrorCode::kOK) { + LOG(ERROR) << "fail to finish scan: " << err->ToString(); + return -1; + } + g_end_time = time(NULL); + g_used_time = g_end_time - g_start_time; + LOG(INFO) << "Scan done " << g_key_num << " keys " << g_key_num / (g_used_time ? g_used_time : 1) + << " keys/S " << g_total_size / 1024.0 / 1024 / (g_used_time ? g_used_time : 1) + << " MB/S "; + return 0; } int32_t ScanOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5 && argc != 6) { - PrintCmdHelpInfo("scan"); - return -1; - } - - std::string op = argv[1]; - std::string tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + if (argc != 5) { + PrintCmdHelpInfo("scan"); + return -1; + } - std::string start_rowkey = argv[3]; - std::string end_rowkey = argv[4]; - if (start_rowkey == "\"\"") { - start_rowkey = ""; - } - if (end_rowkey == "\"\"") { - end_rowkey = ""; - } - LOG(INFO) << "start_key=" << start_rowkey << " end_key=" << end_rowkey; - ScanDescriptor desc(start_rowkey); - desc.SetEnd(end_rowkey); - if (op == "scanallv") { - desc.SetMaxVersions(std::numeric_limits::max()); - } - if (argc == 6 && !desc.SetFilter(argv[5])) { - LOG(ERROR) << "fail to parse scan schema: " << argv[5]; - return -1; - } - return ScanRange(table, desc, err); + std::string op = argv[1]; + std::string tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + std::string start_rowkey = argv[3]; + std::string end_rowkey = argv[4]; + if (start_rowkey == "\"\"") { + start_rowkey = ""; + } + if (end_rowkey == "\"\"") { + end_rowkey = ""; + } + LOG(INFO) << "start_key=" << start_rowkey << " end_key=" << end_rowkey; + ScanDescriptor desc(start_rowkey); + desc.SetEnd(end_rowkey); + if (op == "scanallv") { + desc.SetMaxVersions(std::numeric_limits::max()); + } + return ScanRange(table, desc, err); } -static std::string DoubleToStr(double value) -{ - const int len_max = 32; - char buffer[len_max]; - int len = snprintf(buffer, len_max, "%.2g", value); - return std::string(buffer, len); +static std::string DoubleToStr(double value) { + const int len_max = 32; + char buffer[len_max]; + int len = snprintf(buffer, len_max, "%.2g", value); + return std::string(buffer, len); } std::string BytesNumberToString(const uint64_t size) { - if (FLAGS_stdout_is_tty) { - // 1024 -> 1K - // 1024*1024 -> 1M - return utils::ConvertByteToString(size); - } - return NumberToString(size); + if (FLAGS_stdout_is_tty) { + // 1024 -> 1K + // 1024*1024 -> 1M + return utils::ConvertByteToString(size); + } + return NumberToString(size); } std::string DateNumberToString(int64_t ts) { - if (FLAGS_stdout_is_tty) { - return get_time_str(ts); - } - return NumberToString(ts); + if (FLAGS_stdout_is_tty) { + return get_time_str(ts); + } + return NumberToString(ts); } -static std::string GetTabletStatusString(const TabletMetaList& tablet_list, int64_t now, int32_t i) { - // old tera master will not return timestamp #963 - if ((tablet_list.timestamp_size() > 0)) { - // new tera master - int64_t delta = now - tablet_list.timestamp(i); - TabletMeta::TabletStatus status = tablet_list.meta(i).status(); - TabletMeta::TabletStatus db_status = tablet_list.counter(i).db_status(); - if (db_status == TabletMeta::kTabletCorruption) { - return StatusCodeToString(db_status); - } - if ((status == TabletMeta::kTabletReady) && (delta > FLAGS_tera_sdk_status_timeout * 1000000)) { - return "kUnknown"; - } else { - return StatusCodeToString(tablet_list.meta(i).status()); - } +static std::string GetTabletStatusString(const TabletMetaList& tablet_list, int64_t now, + int32_t i) { + // old tera master will not return timestamp #963 + if ((tablet_list.timestamp_size() > 0)) { + // new tera master + int64_t delta = now - tablet_list.timestamp(i); + TabletMeta::TabletStatus status = tablet_list.meta(i).status(); + TabletMeta::TabletStatus db_status = tablet_list.counter(i).db_status(); + if (db_status == TabletMeta::kTabletCorruption) { + return StatusCodeToString(db_status); + } + if (db_status == TabletMeta::kTabletManifestError) { + return StatusCodeToString(db_status); + } + if ((status == TabletMeta::kTabletReady) && (delta > FLAGS_tera_sdk_status_timeout * 1000000)) { + return "kUnknown"; } else { - // old tera master - return StatusCodeToString(tablet_list.meta(i).status()); + return StatusCodeToString(tablet_list.meta(i).status()); } + } else { + // old tera master + return StatusCodeToString(tablet_list.meta(i).status()); + } } int32_t ShowTabletList(const TabletMetaList& tablet_list, bool is_server_addr, bool is_x) { - TPrinter printer; - int cols; - std::vector row; - int64_t now = get_micros(); - if (is_x) { - if (is_server_addr) { - cols = 14; - printer.Reset(cols, - " ", "server_addr", "path", "status", "size", - "lread", "read", "rspeed", "write", "wspeed", - "scan", "sspeed", "wwl", "startkey"); - } else { - cols = 13; - printer.Reset(cols, - " ", "path", "status", "size", "lread", - "read", "rspeed", "write", "wspeed", - "scan", "sspeed", "wwl", "startkey"); - } - - for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - const TabletMeta& meta = tablet_list.meta(i); - row.clear(); - row.push_back(NumberToString(i)); - if (is_server_addr) { - row.push_back(meta.server_addr()); - } - row.push_back(meta.path()); - row.push_back(GetTabletStatusString(tablet_list, now, i)); - - uint64_t size = meta.size(); - std::string size_str = - BytesNumberToString(size) + - "["; - for (int l = 0; l < meta.lg_size_size(); ++l) { - size_str += BytesNumberToString(meta.lg_size(l)); - if (l < meta.lg_size_size() - 1) { - size_str += ","; - } - } - size_str += "]"; - row.push_back(size_str); - - if (tablet_list.counter_size() > 0) { - const TabletCounter& counter = tablet_list.counter(i); - row.push_back(NumberToString(counter.low_read_cell())); - row.push_back(NumberToString(counter.read_rows())); - row.push_back(BytesNumberToString(counter.read_size()) + "B/s"); - row.push_back(NumberToString(counter.write_rows())); - row.push_back(BytesNumberToString(counter.write_size()) + "B/s"); - row.push_back(NumberToString(counter.scan_rows())); - row.push_back(BytesNumberToString(counter.scan_size()) + "B/s"); - row.push_back(DoubleToStr(counter.write_workload())); - } - row.push_back(DebugString(meta.key_range().key_start().substr(0, 20))); - printer.AddRow(row); - } + TPrinter printer; + int cols; + std::vector row; + int64_t now = get_micros(); + if (is_x) { + if (is_server_addr) { + cols = 14; + printer.Reset(cols, " ", "server_addr", "path", "status", "size", "lread", "read", "rspeed", + "write", "wspeed", "scan", "sspeed", "wwl", "startkey"); } else { - cols = 7; - printer.Reset(cols, - " ", "server_addr", "path", "status", - "size", "startkey", "endkey"); - for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - const TabletMeta& meta = tablet_list.meta(i); - row.clear(); - row.push_back(NumberToString(i)); - row.push_back(meta.server_addr()); - row.push_back(meta.path()); - row.push_back(GetTabletStatusString(tablet_list, now, i)); - - uint64_t size = meta.size(); - row.push_back(BytesNumberToString(size)); - row.push_back(DebugString(meta.key_range().key_start())); - row.push_back(DebugString(meta.key_range().key_end())); - printer.AddRow(row); - } + cols = 13; + printer.Reset(cols, " ", "path", "status", "size", "lread", "read", "rspeed", "write", + "wspeed", "scan", "sspeed", "wwl", "startkey"); } - printer.Print(g_printer_opt); - return 0; -} -void SetTableCounter(const std::string& table_name, - const TabletMetaList& tablet_list, - TableCounter* counter) { - int64_t size = 0; - int64_t tablet = 0; - int64_t notready = 0; - int64_t lread = 0; - int64_t read = 0; - int64_t rmax = 0; - int64_t rspeed = 0; - int64_t write = 0; - int64_t wmax = 0; - int64_t wspeed = 0; - int64_t scan = 0; - int64_t smax = 0; - int64_t sspeed = 0; - int64_t lg_num = 0; - std::vector lg_size; for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { - if (tablet_list.meta(i).table_name() != table_name) { - continue; - } - size += tablet_list.meta(i).size(); - tablet++; - if (tablet_list.meta(i).status() != TabletMeta::kTabletReady) { - notready++; - } - lread += tablet_list.counter(i).low_read_cell(); - read += tablet_list.counter(i).read_rows(); - if (tablet_list.counter(i).read_rows() > rmax) { - rmax = tablet_list.counter(i).read_rows(); - } - rspeed += tablet_list.counter(i).read_size(); - write += tablet_list.counter(i).write_rows(); - if (tablet_list.counter(i).write_rows() > wmax) { - wmax = tablet_list.counter(i).write_rows(); + const TabletMeta& meta = tablet_list.meta(i); + row.clear(); + row.push_back(NumberToString(i)); + if (is_server_addr) { + row.push_back(meta.server_addr()); + } + row.push_back(meta.path()); + row.push_back(GetTabletStatusString(tablet_list, now, i)); + + uint64_t size = meta.size(); + std::string size_str = BytesNumberToString(size) + "["; + for (int l = 0; l < meta.lg_size_size(); ++l) { + size_str += BytesNumberToString(meta.lg_size(l)); + if (l < meta.lg_size_size() - 1) { + size_str += ","; } - wspeed += tablet_list.counter(i).write_size(); - scan += tablet_list.counter(i).scan_rows(); - if (tablet_list.counter(i).scan_rows() > smax) { - smax = tablet_list.counter(i).scan_rows(); - } - sspeed += tablet_list.counter(i).scan_size(); + } + size_str += "]"; + row.push_back(size_str); + + if (tablet_list.counter_size() > 0) { + const TabletCounter& counter = tablet_list.counter(i); + row.push_back(NumberToString(counter.low_read_cell())); + row.push_back(NumberToString(counter.read_rows())); + row.push_back(BytesNumberToString(counter.read_size()) + "B/s"); + row.push_back(NumberToString(counter.write_rows())); + row.push_back(BytesNumberToString(counter.write_size()) + "B/s"); + row.push_back(NumberToString(counter.scan_rows())); + row.push_back(BytesNumberToString(counter.scan_size()) + "B/s"); + row.push_back(DoubleToStr(counter.write_workload())); + } + row.push_back(DebugString(meta.key_range().key_start().substr(0, 20))); + printer.AddRow(row); + } + } else { + cols = 7; + printer.Reset(cols, " ", "server_addr", "path", "status", "size", "startkey", "endkey"); + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& meta = tablet_list.meta(i); + row.clear(); + row.push_back(NumberToString(i)); + row.push_back(meta.server_addr()); + row.push_back(meta.path()); + row.push_back(GetTabletStatusString(tablet_list, now, i)); + + uint64_t size = meta.size(); + row.push_back(BytesNumberToString(size)); + row.push_back(DebugString(meta.key_range().key_start())); + row.push_back(DebugString(meta.key_range().key_end())); + printer.AddRow(row); + } + } + printer.Print(g_printer_opt); + return 0; +} - if (lg_num == 0) { - lg_num = tablet_list.meta(i).lg_size_size(); - lg_size.resize(lg_num, 0); - } - for (int l = 0; l < lg_num; ++l) { - if (tablet_list.meta(i).lg_size_size() > l) { - lg_size[l] += tablet_list.meta(i).lg_size(l); - } - } +void SetTableCounter(const std::string& table_name, const TabletMetaList& tablet_list, + TableCounter* counter) { + int64_t size = 0; + int64_t tablet = 0; + int64_t notready = 0; + int64_t lread = 0; + int64_t read = 0; + int64_t rmax = 0; + int64_t rspeed = 0; + int64_t write = 0; + int64_t wmax = 0; + int64_t wspeed = 0; + int64_t scan = 0; + int64_t smax = 0; + int64_t sspeed = 0; + int64_t lg_num = 0; + std::vector lg_size; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + if (tablet_list.meta(i).table_name() != table_name) { + continue; + } + size += tablet_list.meta(i).size(); + tablet++; + if (tablet_list.meta(i).status() != TabletMeta::kTabletReady) { + notready++; + } + lread += tablet_list.counter(i).low_read_cell(); + read += tablet_list.counter(i).read_rows(); + if (tablet_list.counter(i).read_rows() > rmax) { + rmax = tablet_list.counter(i).read_rows(); + } + rspeed += tablet_list.counter(i).read_size(); + write += tablet_list.counter(i).write_rows(); + if (tablet_list.counter(i).write_rows() > wmax) { + wmax = tablet_list.counter(i).write_rows(); + } + wspeed += tablet_list.counter(i).write_size(); + scan += tablet_list.counter(i).scan_rows(); + if (tablet_list.counter(i).scan_rows() > smax) { + smax = tablet_list.counter(i).scan_rows(); + } + sspeed += tablet_list.counter(i).scan_size(); + + if (lg_num == 0) { + lg_num = tablet_list.meta(i).lg_size_size(); + lg_size.resize(lg_num, 0); } - counter->set_size(size); - counter->set_tablet_num(tablet); - counter->set_notready_num(notready); - counter->set_lread(lread); - counter->set_read_rows(read); - counter->set_read_max(rmax); - counter->set_read_size(rspeed); - counter->set_write_rows(write); - counter->set_write_max(wmax); - counter->set_write_size(wspeed); - counter->set_scan_rows(scan); - counter->set_scan_max(smax); - counter->set_scan_size(sspeed); for (int l = 0; l < lg_num; ++l) { - counter->add_lg_size(lg_size[l]); - } + if (tablet_list.meta(i).lg_size_size() > l) { + lg_size[l] += tablet_list.meta(i).lg_size(l); + } + } + } + counter->set_size(size); + counter->set_tablet_num(tablet); + counter->set_notready_num(notready); + counter->set_lread(lread); + counter->set_read_rows(read); + counter->set_read_max(rmax); + counter->set_read_size(rspeed); + counter->set_write_rows(write); + counter->set_write_max(wmax); + counter->set_write_size(wspeed); + counter->set_scan_rows(scan); + counter->set_scan_max(smax); + counter->set_scan_size(sspeed); + for (int l = 0; l < lg_num; ++l) { + counter->add_lg_size(lg_size[l]); + } } int32_t ShowAllTables(Client* client, bool is_x, bool show_all, ErrorCode* err) { - TableMetaList table_list; - TabletMetaList tablet_list; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, !show_all, err)) { - LOG(ERROR) << "fail to get meta data from tera."; - return -1; - } - - TPrinter printer; - int64_t sum_size = 0; - int64_t sum_tablet = 0; - int64_t sum_notready = 0; - int64_t sum_lread = 0; - int64_t sum_read = 0; - int64_t sum_rspeed = 0; - int64_t sum_write = 0; - int64_t sum_wspeed = 0; - int64_t sum_scan = 0; - int64_t sum_sspeed = 0; - int cols; - if (is_x) { - cols = 17; - printer.Reset(cols, - " ", "tablename", "status", "size", "lg_size", - "tablet", "notready", "lread", "read", - "rmax", "rspeed", "write", "wmax", "wspeed", - "scan", "smax", "sspeed"); + TableMetaList table_list; + TabletMetaList tablet_list; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, !show_all, err)) { + LOG(ERROR) << "fail to get meta data from tera."; + return -1; + } + + TPrinter printer; + int64_t sum_size = 0; + int64_t sum_tablet = 0; + int64_t sum_notready = 0; + int64_t sum_lread = 0; + int64_t sum_read = 0; + int64_t sum_rspeed = 0; + int64_t sum_write = 0; + int64_t sum_wspeed = 0; + int64_t sum_scan = 0; + int64_t sum_sspeed = 0; + int cols; + if (is_x) { + cols = 17; + printer.Reset(cols, " ", "tablename", "status", "size", "lg_size", "tablet", "notready", + "lread", "read", "rmax", "rspeed", "write", "wmax", "wspeed", "scan", "smax", + "sspeed"); + } else { + cols = 7; + printer.Reset(cols, " ", "tablename", "status", "size", "lg_size", "tablet", "notready"); + } + for (int32_t table_no = 0; table_no < table_list.meta_size(); ++table_no) { + std::string tablename = table_list.meta(table_no).table_name(); + TableCounter counter; + if (table_list.counter_size() > 0) { + counter = table_list.counter(table_no); } else { - cols = 7; - printer.Reset(cols, - " ", "tablename", "status", "size", "lg_size", - "tablet", "notready"); - } - for (int32_t table_no = 0; table_no < table_list.meta_size(); ++table_no) { - std::string tablename = table_list.meta(table_no).table_name(); - TableCounter counter; - if (table_list.counter_size() > 0) { - counter = table_list.counter(table_no); - } else { - SetTableCounter(tablename, tablet_list, &counter); - } - TableStatus status = table_list.meta(table_no).status(); - std::string lg_size_str = ""; - for (int l = 0; l < counter.lg_size_size(); ++l) { - lg_size_str += BytesNumberToString(counter.lg_size(l)); - if (l < counter.lg_size_size() - 1) { - lg_size_str += ","; - } - } - if (lg_size_str.empty()) { - lg_size_str = "-"; - } - int64_t notready; - std::string print_size; - if (status == kTableDisable) { - notready = 0; - print_size = "-"; - lg_size_str = "-"; - } else { - notready = counter.notready_num(); - print_size = BytesNumberToString(counter.size()); - } - sum_size += counter.size(); - sum_tablet += counter.tablet_num(); - sum_notready += notready; - sum_lread += counter.lread(); - sum_read += counter.read_rows(); - sum_rspeed += counter.read_size(); - sum_write += counter.write_rows(); - sum_wspeed += counter.write_size(); - sum_scan += counter.scan_rows(); - sum_sspeed += counter.scan_size(); - if (is_x) { - printer.AddRow(cols, - NumberToString(table_no).data(), - tablename.data(), - StatusCodeToString(status).data(), - print_size.data(), - lg_size_str.data(), - NumberToString(counter.tablet_num()).data(), - NumberToString(notready).data(), - BytesNumberToString(counter.lread()).data(), - BytesNumberToString(counter.read_rows()).data(), - BytesNumberToString(counter.read_max()).data(), - (BytesNumberToString(counter.read_size()) + "B/s").data(), - BytesNumberToString(counter.write_rows()).data(), - BytesNumberToString(counter.write_max()).data(), - (BytesNumberToString(counter.write_size()) + "B/s").data(), - BytesNumberToString(counter.scan_rows()).data(), - BytesNumberToString(counter.scan_max()).data(), - (BytesNumberToString(counter.scan_size()) + "B/s").data()); - } else { - printer.AddRow(cols, - NumberToString(table_no).data(), - tablename.data(), - StatusCodeToString(status).data(), - print_size.data(), - lg_size_str.data(), - NumberToString(counter.tablet_num()).data(), - NumberToString(notready).data()); - } - } - if (!FLAGS_stdout_is_tty) { - // we don't need total infos - } else if (is_x) { - printer.AddRow(cols, - "-", - "total", - "-", - BytesNumberToString(sum_size).data(), - "-", - NumberToString(sum_tablet).data(), - NumberToString(sum_notready).data(), - BytesNumberToString(sum_lread).data(), - BytesNumberToString(sum_read).data(), - "-", - (BytesNumberToString(sum_rspeed) + "B/s").data(), - BytesNumberToString(sum_write).data(), - "-", - (BytesNumberToString(sum_wspeed) + "B/s").data(), - BytesNumberToString(sum_scan).data(), - "-", - (BytesNumberToString(sum_sspeed) + "B/s").data()); + SetTableCounter(tablename, tablet_list, &counter); + } + TableStatus status = table_list.meta(table_no).status(); + std::string lg_size_str = ""; + for (int l = 0; l < counter.lg_size_size(); ++l) { + lg_size_str += BytesNumberToString(counter.lg_size(l)); + if (l < counter.lg_size_size() - 1) { + lg_size_str += ","; + } + } + if (lg_size_str.empty()) { + lg_size_str = "-"; + } + int64_t notready; + std::string print_size; + if (status == kTableDisable) { + notready = 0; + print_size = "-"; + lg_size_str = "-"; } else { - printer.AddRow(cols, - "-", - "total", - "-", - BytesNumberToString(sum_size).data(), - "-", - NumberToString(sum_tablet).data(), - NumberToString(sum_notready).data()); - } - printer.Print(g_printer_opt); - std::cout << std::endl; - if (show_all) { - ShowTabletList(tablet_list, true, true); - } - return 0; + notready = counter.notready_num(); + print_size = BytesNumberToString(counter.size()); + } + sum_size += counter.size(); + sum_tablet += counter.tablet_num(); + sum_notready += notready; + sum_lread += counter.lread(); + sum_read += counter.read_rows(); + sum_rspeed += counter.read_size(); + sum_write += counter.write_rows(); + sum_wspeed += counter.write_size(); + sum_scan += counter.scan_rows(); + sum_sspeed += counter.scan_size(); + if (is_x) { + printer.AddRow(cols, NumberToString(table_no).data(), tablename.data(), + StatusCodeToString(status).data(), print_size.data(), lg_size_str.data(), + NumberToString(counter.tablet_num()).data(), NumberToString(notready).data(), + BytesNumberToString(counter.lread()).data(), + BytesNumberToString(counter.read_rows()).data(), + BytesNumberToString(counter.read_max()).data(), + (BytesNumberToString(counter.read_size()) + "B/s").data(), + BytesNumberToString(counter.write_rows()).data(), + BytesNumberToString(counter.write_max()).data(), + (BytesNumberToString(counter.write_size()) + "B/s").data(), + BytesNumberToString(counter.scan_rows()).data(), + BytesNumberToString(counter.scan_max()).data(), + (BytesNumberToString(counter.scan_size()) + "B/s").data()); + } else { + printer.AddRow(cols, NumberToString(table_no).data(), tablename.data(), + StatusCodeToString(status).data(), print_size.data(), lg_size_str.data(), + NumberToString(counter.tablet_num()).data(), NumberToString(notready).data()); + } + } + if (!FLAGS_stdout_is_tty) { + // we don't need total infos + } else if (is_x) { + printer.AddRow( + cols, "-", "total", "-", BytesNumberToString(sum_size).data(), "-", + NumberToString(sum_tablet).data(), NumberToString(sum_notready).data(), + BytesNumberToString(sum_lread).data(), BytesNumberToString(sum_read).data(), "-", + (BytesNumberToString(sum_rspeed) + "B/s").data(), BytesNumberToString(sum_write).data(), + "-", (BytesNumberToString(sum_wspeed) + "B/s").data(), BytesNumberToString(sum_scan).data(), + "-", (BytesNumberToString(sum_sspeed) + "B/s").data()); + } else { + printer.AddRow(cols, "-", "total", "-", BytesNumberToString(sum_size).data(), "-", + NumberToString(sum_tablet).data(), NumberToString(sum_notready).data()); + } + printer.Print(g_printer_opt); + std::cout << std::endl; + if (show_all) { + ShowTabletList(tablet_list, true, true); + } + return 0; } -int32_t ShowSingleTable(Client* client, const string& table_name, - bool is_x, ErrorCode* err) { - TableMeta table_meta; - TabletMetaList tablet_list; +int32_t ShowSingleTable(Client* client, const string& table_name, bool is_x, ErrorCode* err) { + TableMeta table_meta; + TabletMetaList tablet_list; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTablesInfo(table_name, &table_meta, &tablet_list, err)) { - LOG(ERROR) << "table not exist: " << table_name; - return -1; - } + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTablesInfo(table_name, &table_meta, &tablet_list, err)) { + LOG(ERROR) << "table not exist: " << table_name; + return -1; + } - if (FLAGS_stdout_is_tty) { - std::cout << std::endl; - std::cout << "create time: " - << get_time_str(table_meta.create_time()) << std::endl; - std::cout << std::endl; - } - ShowTabletList(tablet_list, true, is_x); + if (FLAGS_stdout_is_tty) { std::cout << std::endl; - return 0; -} - -int32_t ShowSingleTabletNodeInfo(Client* client, const string& addr, - bool is_x, ErrorCode* err) { - TabletNodeInfo info; - TabletMetaList tablet_list; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTabletNodesInfo(addr, &info, &tablet_list, err)) { - LOG(ERROR) << "fail to show tabletnode: " << addr; - return -1; - } - - std::cout << "\nTabletNode Info:\n"; - std::cout << " address: " << info.addr() << std::endl; - std::cout << " status: " << info.status_m() << std::endl; - std::cout << " update time: " - << get_time_str(info.timestamp() / 1000000) << "\n\n"; - - int cols = 5; - TPrinter printer(cols, "workload", "tablets", "load", "split", "unload"); - std::vector row; - row.push_back(BytesNumberToString(info.load())); - row.push_back(NumberToString(info.tablet_total())); - row.push_back(NumberToString(info.tablet_onload())); - row.push_back(NumberToString(info.tablet_onsplit())); - row.push_back(NumberToString(info.tablet_unloading())); - printer.AddRow(row); - printer.Print(g_printer_opt); - + std::cout << "create time: " << get_time_str(table_meta.create_time()) << std::endl; std::cout << std::endl; - cols = 7; - printer.Reset(cols, "lread", "read", "rspeed", "write", "wspeed", "scan", "sspeed"); - row.clear(); - row.push_back(NumberToString(info.low_read_cell())); - row.push_back(NumberToString(info.read_rows())); - row.push_back(BytesNumberToString(info.read_size()) + "B/s"); - row.push_back(NumberToString(info.write_rows())); - row.push_back(BytesNumberToString(info.write_size()) + "B/s"); - row.push_back(NumberToString(info.scan_rows())); - row.push_back(BytesNumberToString(info.scan_size()) + "B/s"); - printer.AddRow(row); - printer.Print(g_printer_opt); - - std::cout << "\nHardware Info:\n"; - cols = 8; - printer.Reset(cols, "cpu", "mem_used", "net_tx", "net_rx", "dfs_r", "dfs_w", "local_r", "local_w"); - row.clear(); - row.push_back(NumberToString(info.cpu_usage())); - row.push_back(BytesNumberToString(info.mem_used())); - row.push_back(BytesNumberToString(info.net_tx()) + "B/s"); - row.push_back(BytesNumberToString(info.net_rx()) + "B/s"); - row.push_back(BytesNumberToString(info.dfs_io_r()) + "B/s"); - row.push_back(BytesNumberToString(info.dfs_io_w()) + "B/s"); - row.push_back(BytesNumberToString(info.local_io_r()) + "B/s"); - row.push_back(BytesNumberToString(info.local_io_w()) + "B/s"); - printer.AddRow(row); - printer.Print(g_printer_opt); - - std::cout << "\nOther Infos:\n"; - cols = info.extra_info_size(); - row.clear(); - for (int i = 0; i < cols; ++i) { - row.push_back(info.extra_info(i).name()); - } - printer.Reset(row); - std::vector row_int; - for (int i = 0; i < cols; ++i) { - row_int.push_back(info.extra_info(i).value()); - } - printer.AddRow(row_int); - printer.Print(g_printer_opt); + } + ShowTabletList(tablet_list, true, is_x); + std::cout << std::endl; + return 0; +} - std::cout << "\nTablets In this TabletNode:\n"; - ShowTabletList(tablet_list, false, is_x); - return 0; +int32_t ShowSingleTabletNodeInfo(Client* client, const string& addr, bool is_x, ErrorCode* err) { + TabletNodeInfo info; + TabletMetaList tablet_list; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTabletNodesInfo(addr, &info, &tablet_list, err)) { + LOG(ERROR) << "fail to show tabletnode: " << addr; + return -1; + } + + std::cout << "\nTabletNode Info:\n"; + std::cout << " address: " << info.addr() << std::endl; + std::cout << " status: " << info.status_m() << std::endl; + std::cout << " update time: " << get_time_str(info.timestamp() / 1000000) << "\n\n"; + + int cols = 5; + TPrinter printer(cols, "workload", "tablets", "load", "split", "unload"); + std::vector row; + row.push_back(BytesNumberToString(info.load())); + row.push_back(NumberToString(info.tablet_total())); + row.push_back(NumberToString(info.tablet_onload())); + row.push_back(NumberToString(info.tablet_onsplit())); + row.push_back(NumberToString(info.tablet_unloading())); + printer.AddRow(row); + printer.Print(g_printer_opt); + + std::cout << std::endl; + cols = 7; + printer.Reset(cols, "lread", "read", "rspeed", "write", "wspeed", "scan", "sspeed"); + row.clear(); + row.push_back(NumberToString(info.low_read_cell())); + row.push_back(NumberToString(info.read_rows())); + row.push_back(BytesNumberToString(info.read_size()) + "B/s"); + row.push_back(NumberToString(info.write_rows())); + row.push_back(BytesNumberToString(info.write_size()) + "B/s"); + row.push_back(NumberToString(info.scan_rows())); + row.push_back(BytesNumberToString(info.scan_size()) + "B/s"); + printer.AddRow(row); + printer.Print(g_printer_opt); + + std::cout << "\nHardware Info:\n"; + cols = 8; + printer.Reset(cols, "cpu", "mem_used", "net_tx", "net_rx", "dfs_r", "dfs_w", "local_r", + "local_w"); + row.clear(); + row.push_back(NumberToString(info.cpu_usage())); + row.push_back(BytesNumberToString(info.mem_used())); + row.push_back(BytesNumberToString(info.net_tx()) + "B/s"); + row.push_back(BytesNumberToString(info.net_rx()) + "B/s"); + row.push_back(BytesNumberToString(info.dfs_io_r()) + "B/s"); + row.push_back(BytesNumberToString(info.dfs_io_w()) + "B/s"); + row.push_back(BytesNumberToString(info.local_io_r()) + "B/s"); + row.push_back(BytesNumberToString(info.local_io_w()) + "B/s"); + printer.AddRow(row); + printer.Print(g_printer_opt); + + std::cout << "\nOther Infos:\n"; + cols = info.extra_info_size(); + row.clear(); + for (int i = 0; i < cols; ++i) { + row.push_back(info.extra_info(i).name()); + } + printer.Reset(row); + std::vector row_int; + for (int i = 0; i < cols; ++i) { + row_int.push_back(info.extra_info(i).value()); + } + printer.AddRow(row_int); + printer.Print(g_printer_opt); + + std::cout << "\nTablets In this TabletNode:\n"; + ShowTabletList(tablet_list, false, is_x); + return 0; } int32_t ShowTabletNodesInfo(Client* client, bool is_x, ErrorCode* err) { - std::vector infos; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTabletNodesInfo(&infos, err)) { - LOG(ERROR) << "fail to get meta data from tera."; - return -1; - } - - int64_t now = get_micros(); - int cols; - TPrinter printer; - if (is_x) { - cols = 25; - printer.Reset(cols, - " ", "address", "status", "size", "num", - "lread", "r", "rspd", "w", "wspd", - "s", "sspd", "rdly", "rp", "wp", - "sp", "ld", "bs", "mem", "cpu", - "net_tx", "net_rx", "dfs_r", "dfs_w", "start_time"); - std::vector row; - for (size_t i = 0; i < infos.size(); ++i) { - std::map extra; - for (int j = 0; j < infos[i].extra_info_size(); ++j) { - extra[infos[i].extra_info(j).name()] = - NumberToString(infos[i].extra_info(j).value()); - } - - row.clear(); - row.push_back(NumberToString(i)); - row.push_back(infos[i].addr()); - if (now - (int64_t)infos[i].timestamp() > FLAGS_tera_sdk_status_timeout * 1000000) { - // tabletnode status timeout - row.push_back("kZombie"); - } else { - row.push_back(infos[i].status_m()); - } - row.push_back(BytesNumberToString(infos[i].load())); - row.push_back(NumberToString(infos[i].tablet_total())); - row.push_back(NumberToString(infos[i].low_read_cell())); - row.push_back(NumberToString(infos[i].read_rows())); - row.push_back(BytesNumberToString(infos[i].read_size()) + "B"); - row.push_back(NumberToString(infos[i].write_rows())); - row.push_back(BytesNumberToString(infos[i].write_size()) + "B"); - row.push_back(NumberToString(infos[i].scan_rows())); - row.push_back(BytesNumberToString(infos[i].scan_size()) + "B"); - row.push_back(extra["rand_read_delay"] + "ms"); - row.push_back(extra["read_pending"]); - row.push_back(extra["write_pending"]); - row.push_back(extra["scan_pending"]); - row.push_back(NumberToString(infos[i].tablet_onload())); - row.push_back(NumberToString(infos[i].tablet_onbusy())); - row.push_back(BytesNumberToString(infos[i].mem_used())); - row.push_back(NumberToString(infos[i].cpu_usage())); - row.push_back(BytesNumberToString(infos[i].net_tx())); - row.push_back(BytesNumberToString(infos[i].net_rx())); - row.push_back(BytesNumberToString(infos[i].dfs_io_r())); - row.push_back(BytesNumberToString(infos[i].dfs_io_w())); - row.push_back(DateNumberToString(infos[i].process_start_time() / 1000 / 1000)); - printer.AddRow(row); - } - } else { - cols = 8; - printer.Reset(cols, - " ", "address", "status", "workload", - "tablet", "load", "busy", "unload"); - std::vector row; - for (size_t i = 0; i < infos.size(); ++i) { - row.clear(); - row.push_back(NumberToString(i)); - row.push_back(infos[i].addr()); - if (now - (int64_t)infos[i].timestamp() > FLAGS_tera_sdk_status_timeout * 1000000) { - row.push_back("kZombie"); - } else { - row.push_back(infos[i].status_m()); - } - row.push_back(BytesNumberToString(infos[i].load())); - row.push_back(NumberToString(infos[i].tablet_total())); - row.push_back(NumberToString(infos[i].tablet_onload())); - row.push_back(NumberToString(infos[i].tablet_onbusy())); - row.push_back(NumberToString(infos[i].tablet_unloading())); - printer.AddRow(row); - } - } - printer.Print(g_printer_opt); - std::cout << std::endl; - return 0; + std::vector infos; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTabletNodesInfo(&infos, err)) { + LOG(ERROR) << "fail to get meta data from tera."; + return -1; + } + + int64_t now = get_micros(); + int cols; + TPrinter printer; + if (is_x) { + cols = 25; + printer.Reset(cols, " ", "address", "status", "size", "num", "lread", "r", "rspd", "w", "wspd", + "s", "sspd", "rdly", "rp", "wp", "sp", "ld", "bs", "mem", "cpu", "net_tx", + "net_rx", "dfs_r", "dfs_w", "start_time"); + std::vector row; + for (size_t i = 0; i < infos.size(); ++i) { + std::map extra; + for (int j = 0; j < infos[i].extra_info_size(); ++j) { + extra[infos[i].extra_info(j).name()] = NumberToString(infos[i].extra_info(j).value()); + } + + row.clear(); + row.push_back(NumberToString(i)); + row.push_back(infos[i].addr()); + if (now - (int64_t)infos[i].timestamp() > FLAGS_tera_sdk_status_timeout * 1000000) { + // tabletnode status timeout + row.push_back("kZombie"); + } else { + row.push_back(infos[i].status_m()); + } + row.push_back(BytesNumberToString(infos[i].load())); + row.push_back(NumberToString(infos[i].tablet_total())); + row.push_back(NumberToString(infos[i].low_read_cell())); + row.push_back(NumberToString(infos[i].read_rows())); + row.push_back(BytesNumberToString(infos[i].read_size()) + "B"); + row.push_back(NumberToString(infos[i].write_rows())); + row.push_back(BytesNumberToString(infos[i].write_size()) + "B"); + row.push_back(NumberToString(infos[i].scan_rows())); + row.push_back(BytesNumberToString(infos[i].scan_size()) + "B"); + row.push_back(extra["rand_read_delay"] + "ms"); + row.push_back(extra["read_pending"]); + row.push_back(extra["write_pending"]); + row.push_back(extra["scan_pending"]); + row.push_back(NumberToString(infos[i].tablet_onload())); + row.push_back(NumberToString(infos[i].tablet_onbusy())); + row.push_back(BytesNumberToString(infos[i].mem_used())); + row.push_back(NumberToString(infos[i].cpu_usage())); + row.push_back(BytesNumberToString(infos[i].net_tx())); + row.push_back(BytesNumberToString(infos[i].net_rx())); + row.push_back(BytesNumberToString(infos[i].dfs_io_r())); + row.push_back(BytesNumberToString(infos[i].dfs_io_w())); + row.push_back(DateNumberToString(infos[i].process_start_time() / 1000 / 1000)); + printer.AddRow(row); + } + } else { + cols = 8; + printer.Reset(cols, " ", "address", "status", "workload", "tablet", "load", "busy", "unload"); + std::vector row; + for (size_t i = 0; i < infos.size(); ++i) { + row.clear(); + row.push_back(NumberToString(i)); + row.push_back(infos[i].addr()); + if (now - (int64_t)infos[i].timestamp() > FLAGS_tera_sdk_status_timeout * 1000000) { + row.push_back("kZombie"); + } else { + row.push_back(infos[i].status_m()); + } + row.push_back(BytesNumberToString(infos[i].load())); + row.push_back(NumberToString(infos[i].tablet_total())); + row.push_back(NumberToString(infos[i].tablet_onload())); + row.push_back(NumberToString(infos[i].tablet_onbusy())); + row.push_back(NumberToString(infos[i].tablet_unloading())); + printer.AddRow(row); + } + } + printer.Print(g_printer_opt); + std::cout << std::endl; + return 0; } int32_t ShowTabletNodesOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 2) { - LOG(ERROR) << "args number error: " << argc << ", need >2."; - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 2) { + LOG(ERROR) << "args number error: " << argc << ", need >2."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - int32_t ret_val; - string cmd = argv[1]; + int32_t ret_val; + string cmd = argv[1]; - if (argc == 3) { - ret_val = ShowSingleTabletNodeInfo(client, argv[2], cmd == "showtsx", err); - } else { - ret_val = ShowTabletNodesInfo(client, cmd == "showtsx", err); - } - return ret_val; + if (argc == 3) { + ret_val = ShowSingleTabletNodeInfo(client, argv[2], cmd == "showtsx", err); + } else { + ret_val = ShowTabletNodesInfo(client, cmd == "showtsx", err); + } + return ret_val; } int32_t ShowOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 2) { - LOG(ERROR) << "args number error: " << argc << ", need >2."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - int32_t ret_val; - std::string cmd = argv[1]; - if (argc == 3) { - ret_val = ShowSingleTable(client, argv[2], cmd == "showx", err); - } else if (argc == 2 && (cmd == "show" || cmd == "showx" || cmd == "showall")) { - ret_val = ShowAllTables(client, cmd == "showx", cmd == "showall", err); - } else { - ret_val = -1; - LOG(ERROR) << "error: arg num: " << argc; - } - return ret_val; + if (argc < 2) { + LOG(ERROR) << "args number error: " << argc << ", need >2."; + PrintCmdHelpInfo(argv[1]); + return -1; + } + + int32_t ret_val; + std::string cmd = argv[1]; + if (argc == 3) { + ret_val = ShowSingleTable(client, argv[2], cmd == "showx", err); + } else if (argc == 2 && (cmd == "show" || cmd == "showx" || cmd == "showall")) { + ret_val = ShowAllTables(client, cmd == "showx", cmd == "showall", err); + } else { + ret_val = -1; + LOG(ERROR) << "error: arg num: " << argc; + } + return ret_val; } int32_t ShowSchemaOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo("showschema"); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo("showschema"); + return -1; + } - std::string cmd = argv[1]; - std::string table_name = argv[2]; - TableSchema table_schema; + std::string cmd = argv[1]; + std::string table_name = argv[2]; + TableSchema table_schema; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTableSchema(table_name, &table_schema, err)) { - LOG(ERROR) << "table not exist: " << table_name; - return -1; - } - ShowTableSchema(table_schema, cmd == "showschemax"); - return 0; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTableSchema(table_name, &table_schema, err)) { + LOG(ERROR) << "table not exist: " << table_name; + return -1; + } + ShowTableSchema(table_schema, cmd == "showschemax"); + return 0; } -void BatchPutCallBack(RowMutation* mutation) { - const ErrorCode& error_code = mutation->GetError(); - if (error_code.GetType() != ErrorCode::kOK) { - LOG(ERROR) << "exception occured, reason:" << error_code.ToString(); - } - int32_t mut_num = mutation->MutationNum(); - - { - // for performance testing - MutexLock locker(&g_stat_lock); - g_total_size += mutation->Size(); - g_key_num += mut_num; - int32_t time_cur = time(NULL); - int32_t time_used = time_cur - g_start_time; - if (time_cur > g_last_time) { - g_last_time = time_cur; - LOG(INFO) << "Write file "<GetError(); + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "exception occured, reason:" << error_code.ToString(); + } + { + // for performance testing + MutexLock locker(&g_stat_lock); + g_total_size += batch_mu->Size(); + int32_t time_cur = time(NULL); + int32_t time_used = time_cur - g_start_time; + if (time_cur > g_last_time) { + g_last_time = time_cur; + LOG(INFO) << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) << " MB/S "; + } + } + delete batch_mu; +} - delete mutation; +void BatchPutCallBack(RowMutation* mutation) { + const ErrorCode& error_code = mutation->GetError(); + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "exception occured, reason:" << error_code.ToString(); + } + int32_t mut_num = mutation->MutationNum(); + + { + // for performance testing + MutexLock locker(&g_stat_lock); + g_total_size += mutation->Size(); + g_key_num += mut_num; + int32_t time_cur = time(NULL); + int32_t time_used = time_cur - g_start_time; + if (time_cur > g_last_time) { + g_last_time = time_cur; + LOG(INFO) << "Write file " << g_key_num << " keys " + << g_key_num / (time_used ? time_used : 1) << " keys/S " + << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) << " MB/S "; + } + } + + delete mutation; } int32_t BatchPutOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4) { - LOG(ERROR) << "args number error: " << argc << ", need 4."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - std::string record_file = argv[3]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - const int32_t buf_size = 1024 * 1024; - char buf[buf_size]; - std::ifstream stream(record_file.c_str()); - - // input record format: rowkey columnfamily:qualifier value - // or: key:value - std::vector input_v; - g_start_time = time(NULL); - while (stream.getline(buf, buf_size)) { - SplitString(buf, " ", &input_v); - if (input_v.size() != 3 && input_v.size() != 2) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - std::string rowkey = input_v[0]; - if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - std::string family; - std::string qualifier; - std::string value = input_v[input_v.size() - 1]; - if (FLAGS_readable && !ParseDebugString(input_v[input_v.size() - 1], &value)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - RowMutation* mutation = table->NewRowMutation(rowkey); - if (input_v.size() == 2) { - // for kv mode - mutation->Put(value); - } else { - // for table mode, put(family, qulifier, value) - ParseCfQualifier(input_v[1], &family, &qualifier); - mutation->Put(family, qualifier, value); - } - mutation->SetCallBack(BatchPutCallBack); - table->ApplyMutation(mutation); - } - while (!table->IsPutFinished()) { - usleep(100000); - } - - g_end_time = time(NULL); - g_used_time = g_end_time-g_start_time; - LOG(INFO) << "Write done,write_key_num=" << g_key_num << " used_time=" << g_used_time < 5) { + LOG(ERROR) << "args number error: " << argc << ", need 4 or 5."; + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string tablename = argv[2]; + std::string record_file = argv[3]; + bool use_batch_mutation = false; + if (argc == 5 && argv[4] == "use_batch_mutation=true") { + use_batch_mutation = true; + } + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + const int32_t buf_size = 1024 * 1024; + char buf[buf_size]; + std::ifstream stream(record_file.c_str()); + + // input record format: rowkey columnfamily:qualifier value + // or: key:value + std::vector input_v; + g_start_time = time(NULL); + int mutation_num = 0; + BatchMutation* batch_mu = nullptr; + while (stream.getline(buf, buf_size)) { + SplitString(buf, " ", &input_v); + if (input_v.size() != 3 && input_v.size() != 2) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + std::string rowkey = input_v[0]; + if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + std::string family; + std::string qualifier; + std::string value = input_v[input_v.size() - 1]; + if (FLAGS_readable && !ParseDebugString(input_v[input_v.size() - 1], &value)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + if (use_batch_mutation) { + if (batch_mu != nullptr && + (mutation_num >= FLAGS_max_mutation_limit || batch_mu->Size() >= kMaxRpcSize)) { + // commit this and reset mutation_num + batch_mu->SetCallBack(BatchMutationCallBack); + table->ApplyMutation(batch_mu); + mutation_num = 0; + } + if (mutation_num == 0) { + batch_mu = table->NewBatchMutation(); + } + ++mutation_num; + if (input_v.size() == 2) { + // for kv mode + batch_mu->Put(rowkey, value); + } else { + // for table mode, put(family, qulifier, value) + ParseCfQualifier(input_v[1], &family, &qualifier); + batch_mu->Put(rowkey, family, qualifier, value); + } + } else { + RowMutation* mutation = table->NewRowMutation(rowkey); + if (input_v.size() == 2) { + // for kv mode + mutation->Put(value); + } else { + // for table mode, put(family, qulifier, value) + ParseCfQualifier(input_v[1], &family, &qualifier); + mutation->Put(family, qualifier, value); + } + mutation->SetCallBack(BatchPutCallBack); + table->ApplyMutation(mutation); + } + } + if (use_batch_mutation && mutation_num > 0) { + // the last and not full batch + batch_mu->SetCallBack(BatchMutationCallBack); + table->ApplyMutation(batch_mu); + } + while (!table->IsPutFinished()) { + usleep(100000); + } + + g_end_time = time(NULL); + g_used_time = g_end_time - g_start_time; + LOG(INFO) << "Write done,write_key_num=" << g_key_num << " used_time=" << g_used_time + << std::endl; + return 0; } int32_t BatchPutInt64Op(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4) { - LOG(ERROR) << "args number error: " << argc << ", need 4."; - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc != 4) { + LOG(ERROR) << "args number error: " << argc << ", need 4."; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::string tablename = argv[2]; - std::string record_file = argv[3]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - const int32_t buf_size = 1024 * 1024; - char buf[buf_size]; - std::ifstream stream(record_file.c_str()); - - // input record format: rowkey columnfamily:qualifier value - // or: key:value - std::vector input_v; - g_start_time = time(NULL); - while (stream.getline(buf, buf_size)) { - SplitString(buf, " ", &input_v); - if (input_v.size() != 3 && input_v.size() != 2) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - std::string rowkey = input_v[0]; - if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - std::string family; - std::string qualifier; - std::string value = input_v[input_v.size() - 1]; - if (FLAGS_readable && !ParseDebugString(input_v[input_v.size() - 1], &value)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - RowMutation* mutation = table->NewRowMutation(rowkey); - int64_t value_int; - if (!StringToNumber(value.c_str(), &value_int)) { - LOG(ERROR) << "invalid Integer number Got: " << value; - return -1; - } - if (input_v.size() == 2) { - // for kv mode - mutation->Put(value_int); - } else { - // for table mode, put(family, qulifier, value) - ParseCfQualifier(input_v[1], &family, &qualifier); - mutation->Put(family, qualifier, value_int); - } - mutation->SetCallBack(BatchPutCallBack); - table->ApplyMutation(mutation); + std::string tablename = argv[2]; + std::string record_file = argv[3]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + const int32_t buf_size = 1024 * 1024; + char buf[buf_size]; + std::ifstream stream(record_file.c_str()); + + // input record format: rowkey columnfamily:qualifier value + // or: key:value + std::vector input_v; + g_start_time = time(NULL); + while (stream.getline(buf, buf_size)) { + SplitString(buf, " ", &input_v); + if (input_v.size() != 3 && input_v.size() != 2) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + std::string rowkey = input_v[0]; + if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + std::string family; + std::string qualifier; + std::string value = input_v[input_v.size() - 1]; + if (FLAGS_readable && !ParseDebugString(input_v[input_v.size() - 1], &value)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; } - while (!table->IsPutFinished()) { - usleep(100000); + RowMutation* mutation = table->NewRowMutation(rowkey); + int64_t value_int; + if (!StringToNumber(value.c_str(), &value_int)) { + LOG(ERROR) << "invalid Integer number Got: " << value; + return -1; } - - g_end_time = time(NULL); - g_used_time = g_end_time-g_start_time; - LOG(INFO) << "Write done,write_key_num=" << g_key_num << " used_time=" << g_used_time <Put(value_int); + } else { + // for table mode, put(family, qulifier, value) + ParseCfQualifier(input_v[1], &family, &qualifier); + mutation->Put(family, qualifier, value_int); + } + mutation->SetCallBack(BatchPutCallBack); + table->ApplyMutation(mutation); + } + while (!table->IsPutFinished()) { + usleep(100000); + } + + g_end_time = time(NULL); + g_used_time = g_end_time - g_start_time; + LOG(INFO) << "Write done,write_key_num=" << g_key_num << " used_time=" << g_used_time + << std::endl; + return 0; } void BatchGetCallBack(RowReader* reader) { - while (!reader->Done()) { - { - // for performance testing - MutexLock locker(&g_stat_lock); - g_key_num ++; - g_total_size += reader->RowName().size() - + reader->ColumnName().size() - + sizeof(reader->Timestamp()) - + reader->Value().size(); - int32_t time_cur = time(NULL); - int32_t time_used = time_cur - g_start_time; - if (time_cur > g_last_time) { - g_last_time = time_cur; - LOG(INFO) << "Read file "<RowName() << ":" - << reader->ColumnName() << ":" - << reader->Timestamp() << ":" - << reader->Value() << std::endl; - reader->Next(); - } - delete reader; + while (!reader->Done()) { + { + // for performance testing + MutexLock locker(&g_stat_lock); + g_key_num++; + g_total_size += reader->RowName().size() + reader->ColumnName().size() + + sizeof(reader->Timestamp()) + reader->Value().size(); + int32_t time_cur = time(NULL); + int32_t time_used = time_cur - g_start_time; + if (time_cur > g_last_time) { + g_last_time = time_cur; + LOG(INFO) << "Read file " << g_key_num << " keys " + << g_key_num / (time_used ? time_used : 1) << " keys/S " + << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) << " MB/S "; + } + } + std::cout << reader->RowName() << ":" << reader->ColumnName() << ":" << reader->Timestamp() + << ":" << reader->Value() << std::endl; + reader->Next(); + } + delete reader; } int32_t BatchGetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - LOG(ERROR) << "args number error: " << argc << ", need 4 | 5."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - uint64_t snapshot = 0; - if (argc == 5) { - std::stringstream is; - is << std::string(argv[4]); - is >> snapshot; - } - - std::string tablename = argv[2]; - std::string input_file = argv[3]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - const int32_t buf_size = 1024 * 1024; - char buf[buf_size]; - std::ifstream stream(input_file.c_str()); - - // input file format: rowkey [columnfamily|cf:qualifier]... - // std::cout << "rowkey:columnfamily:qualifier:timestamp:value" << std::endl; - std::vector input_v; - while (stream.getline(buf, buf_size)) { - SplitString(buf, " ", &input_v); - if (input_v.size() <= 0) { - LOG(ERROR) << "input file format error: " << buf; - continue; - } - std::string rowkey = input_v[0]; - if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; - } - RowReader* reader = table->NewRowReader(rowkey); - for (size_t i = 1; i < input_v.size(); ++i) { - std::string& cfqu = input_v[i]; - std::string::size_type pos = cfqu.find(":", 0); - if (pos != std::string::npos) { - // add column - reader->AddColumn(cfqu.substr(0, pos), cfqu.substr(pos + 1)); - } else { - // add columnfamily - reader->AddColumnFamily(cfqu); - } - reader->SetSnapshot(snapshot); - } - reader->SetCallBack(BatchGetCallBack); - table->Get(reader); - } - while (!table->IsGetFinished()) { - // waiting async get finishing - usleep(100000); - } - g_end_time = time(NULL); - g_used_time = g_end_time-g_start_time; - LOG(INFO) << "Read done,write_key_num=" << g_key_num << " used_time=" << g_used_time <> snapshot; + } + + std::string tablename = argv[2]; + std::string input_file = argv[3]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + const int32_t buf_size = 1024 * 1024; + char buf[buf_size]; + std::ifstream stream(input_file.c_str()); + + // input file format: rowkey [columnfamily|cf:qualifier]... + // std::cout << "rowkey:columnfamily:qualifier:timestamp:value" << std::endl; + std::vector input_v; + while (stream.getline(buf, buf_size)) { + SplitString(buf, " ", &input_v); + if (input_v.size() <= 0) { + LOG(ERROR) << "input file format error: " << buf; + continue; + } + std::string rowkey = input_v[0]; + if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + RowReader* reader = table->NewRowReader(rowkey); + for (size_t i = 1; i < input_v.size(); ++i) { + std::string& cfqu = input_v[i]; + std::string::size_type pos = cfqu.find(":", 0); + if (pos != std::string::npos) { + // add column + reader->AddColumn(cfqu.substr(0, pos), cfqu.substr(pos + 1)); + } else { + // add columnfamily + reader->AddColumnFamily(cfqu); + } + reader->SetSnapshot(snapshot); + } + reader->SetCallBack(BatchGetCallBack); + table->Get(reader); + } + while (!table->IsGetFinished()) { + // waiting async get finishing + usleep(100000); + } + g_end_time = time(NULL); + g_used_time = g_end_time - g_start_time; + LOG(INFO) << "Read done,write_key_num=" << g_key_num << " used_time=" << g_used_time << std::endl; + return 0; } void BatchGetInt64CallBack(RowReader* reader) { - while (!reader->Done()) { - { - // for performance testing - MutexLock locker(&g_stat_lock); - g_key_num ++; - g_total_size += reader->RowName().size() - + reader->ColumnName().size() - + sizeof(reader->Timestamp()) - + reader->Value().size(); - int32_t time_cur = time(NULL); - int32_t time_used = time_cur - g_start_time; - if (time_cur > g_last_time) { - g_last_time = time_cur; - LOG(INFO) << "Read file "<Value().c_str()); - int64_t value_int = tmp_data - std::numeric_limits::max(); - std::cout << reader->RowName() << ":" - << reader->ColumnName() << ":" - << reader->Timestamp() << ":" - << value_int << std::endl; - reader->Next(); - } - delete reader; + while (!reader->Done()) { + { + // for performance testing + MutexLock locker(&g_stat_lock); + g_key_num++; + g_total_size += reader->RowName().size() + reader->ColumnName().size() + + sizeof(reader->Timestamp()) + reader->Value().size(); + int32_t time_cur = time(NULL); + int32_t time_used = time_cur - g_start_time; + if (time_cur > g_last_time) { + g_last_time = time_cur; + LOG(INFO) << "Read file " << g_key_num << " keys " + << g_key_num / (time_used ? time_used : 1) << " keys/S " + << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) << " MB/S "; + } + } + uint64_t tmp_data = io::DecodeBigEndain(reader->Value().c_str()); + int64_t value_int = tmp_data - std::numeric_limits::max(); + std::cout << reader->RowName() << ":" << reader->ColumnName() << ":" << reader->Timestamp() + << ":" << value_int << std::endl; + reader->Next(); + } + delete reader; } int32_t BatchGetInt64Op(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 5) { - LOG(ERROR) << "args number error: " << argc << ", need 4 | 5."; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - uint64_t snapshot = 0; - if (argc == 5) { - std::stringstream is; - is << std::string(argv[4]); - is >> snapshot; - } - - std::string tablename = argv[2]; - std::string input_file = argv[3]; - TablePtr table(client->OpenTable(tablename, err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - const int32_t buf_size = 1024 * 1024; - char buf[buf_size]; - std::ifstream stream(input_file.c_str()); - - // input file format: rowkey [columnfamily|cf:qualifier]... - // std::cout << "rowkey:columnfamily:qualifier:timestamp:value" << std::endl; - std::vector input_v; - while (stream.getline(buf, buf_size)) { - SplitString(buf, " ", &input_v); - if (input_v.size() <= 0) { - LOG(ERROR) << "input file format error: " << buf; - continue; - } - std::string rowkey = input_v[0]; - if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { - LOG(ERROR) << "input file format error, skip it: " << buf; - continue; + if (argc != 4 && argc != 5) { + LOG(ERROR) << "args number error: " << argc << ", need 4 | 5."; + PrintCmdHelpInfo(argv[1]); + return -1; + } + + uint64_t snapshot = 0; + if (argc == 5) { + std::stringstream is; + is << std::string(argv[4]); + is >> snapshot; + } + + std::string tablename = argv[2]; + std::string input_file = argv[3]; + TablePtr table(client->OpenTable(tablename, err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } + const int32_t buf_size = 1024 * 1024; + char buf[buf_size]; + std::ifstream stream(input_file.c_str()); + + // input file format: rowkey [columnfamily|cf:qualifier]... + // std::cout << "rowkey:columnfamily:qualifier:timestamp:value" << std::endl; + std::vector input_v; + while (stream.getline(buf, buf_size)) { + SplitString(buf, " ", &input_v); + if (input_v.size() <= 0) { + LOG(ERROR) << "input file format error: " << buf; + continue; + } + std::string rowkey = input_v[0]; + if (FLAGS_readable && !ParseDebugString(input_v[0], &rowkey)) { + LOG(ERROR) << "input file format error, skip it: " << buf; + continue; + } + if (input_v.size() == 1) { + // only rowkey explicit, scan all records out + ScanDescriptor desc(rowkey); + ResultStream* result_stream; + desc.SetEnd(rowkey); + if ((result_stream = table->Scan(desc, err)) == NULL) { + LOG(ERROR) << "fail to get records from table: " << tablename; + return -1; + } + + while (!result_stream->Done()) { + { + // for performance testing + MutexLock locker(&g_stat_lock); + g_key_num++; + g_total_size += result_stream->RowName().size() + result_stream->ColumnName().size() + + sizeof(result_stream->Timestamp()) + result_stream->Value().size(); + int32_t time_cur = time(NULL); + int32_t time_used = time_cur - g_start_time; + if (time_cur > g_last_time) { + g_last_time = time_cur; + LOG(INFO) << "Read file " << g_key_num << " keys " + << g_key_num / (time_used ? time_used : 1) << " keys/S " + << g_total_size / 1024.0 / 1024 / (time_used ? time_used : 1) << " MB/S "; + } } - if (input_v.size() == 1) { - // only rowkey explicit, scan all records out - ScanDescriptor desc(rowkey); - ResultStream* result_stream; - desc.SetEnd(rowkey); - if ((result_stream = table->Scan(desc, err)) == NULL) { - LOG(ERROR) << "fail to get records from table: " << tablename; - return -1; - } - while (!result_stream->Done()) { - { - // for performance testing - MutexLock locker(&g_stat_lock); - g_key_num ++; - g_total_size += result_stream->RowName().size() - + result_stream->ColumnName().size() - + sizeof(result_stream->Timestamp()) - + result_stream->Value().size(); - int32_t time_cur = time(NULL); - int32_t time_used = time_cur - g_start_time; - if (time_cur > g_last_time) { - g_last_time = time_cur; - LOG(INFO) << "Read file "<Value().c_str()); - int value_int = tmp_data - std::numeric_limits::max(); - std::cout << result_stream->RowName() << ":" - << result_stream->ColumnName() << ":" - << result_stream->Timestamp() << ":" - << value_int << std::endl; - result_stream->Next(); - } + uint64_t tmp_data = io::DecodeBigEndain(result_stream->Value().c_str()); + int value_int = tmp_data - std::numeric_limits::max(); + std::cout << result_stream->RowName() << ":" << result_stream->ColumnName() << ":" + << result_stream->Timestamp() << ":" << value_int << std::endl; + result_stream->Next(); + } + } else { + // get specific records with RowReader + RowReader* reader = table->NewRowReader(rowkey); + for (size_t i = 1; i < input_v.size(); ++i) { + std::string& cfqu = input_v[i]; + std::string::size_type pos = cfqu.find(":", 0); + if (pos != std::string::npos) { + // add column + reader->AddColumn(cfqu.substr(0, pos), cfqu.substr(pos + 1)); } else { - // get specific records with RowReader - RowReader* reader = table->NewRowReader(rowkey); - for (size_t i = 1; i < input_v.size(); ++i) { - std::string& cfqu = input_v[i]; - std::string::size_type pos = cfqu.find(":", 0); - if (pos != std::string::npos) { - // add column - reader->AddColumn(cfqu.substr(0, pos), cfqu.substr(pos + 1)); - } else { - // add columnfamily - reader->AddColumnFamily(cfqu); - } - reader->SetSnapshot(snapshot); - } - reader->SetCallBack(BatchGetInt64CallBack); - table->Get(reader); + // add columnfamily + reader->AddColumnFamily(cfqu); } - } - while (!table->IsGetFinished()) { - // waiting async get finishing - usleep(100000); - } - g_end_time = time(NULL); - g_used_time = g_end_time-g_start_time; - LOG(INFO) << "Read done,write_key_num=" << g_key_num << " used_time=" << g_used_time <SetSnapshot(snapshot); + } + reader->SetCallBack(BatchGetInt64CallBack); + table->Get(reader); + } + } + while (!table->IsGetFinished()) { + // waiting async get finishing + usleep(100000); + } + g_end_time = time(NULL); + g_used_time = g_end_time - g_start_time; + LOG(INFO) << "Read done,write_key_num=" << g_key_num << " used_time=" << g_used_time << std::endl; + return 0; } -int32_t GetRandomNumKey(int32_t key_size,std::string *p_key){ - std::stringstream ss; - std::string temp_str; - int32_t temp_num; - *p_key = ""; - for(int i=0;i!=key_size;++i) { - temp_num=rand()%10; - ss << temp_num; - ss >> temp_str; - *p_key += temp_str; - ss.clear(); - } - return 0; +int32_t GetRandomNumKey(int32_t key_size, std::string* p_key) { + std::stringstream ss; + std::string temp_str; + int32_t temp_num; + *p_key = ""; + for (int i = 0; i != key_size; ++i) { + temp_num = rand() % 10; + ss << temp_num; + ss >> temp_str; + *p_key += temp_str; + ss.clear(); + } + return 0; } int32_t SafeModeOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string op = argv[2]; - if (op != "get" && op != "leave" && op != "enter") { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - bool is_safemode = false; - std::vector arg_list; - arg_list.push_back(op); - if (!client->CmdCtrl("safemode", arg_list, &is_safemode, NULL, err)) { - std::cout << "fail to " << op << " safemode" << std::endl; - return -1; - } - if (op == "get") { - if (is_safemode) { - std::cout << "master is in safemode" << std::endl; - } else { - std::cout << "master is not in safemode" << std::endl; - } + std::string op = argv[2]; + if (op != "get" && op != "leave" && op != "enter") { + PrintCmdHelpInfo(argv[1]); + return -1; + } + bool is_safemode = false; + std::vector arg_list; + arg_list.push_back(op); + if ((op == "leave" || op == "enter") && argc == 4) { + arg_list.push_back(argv[3]); + } + std::string status; + if (!client->CmdCtrl("safemode", arg_list, &is_safemode, &status, err)) { + std::cout << "fail to " << op << " safemode" << std::endl; + return -1; + } + if (op == "get") { + if (is_safemode) { + std::cout << "master is in safemode: " << status << std::endl; } else { - std::cout << "master " << op << " safemode success" << std::endl; + std::cout << "master is not in safemode" << std::endl; } + } else { + std::cout << "master " << op << " safemode success" << std::endl; + } - return 0; + return 0; } int32_t CookieOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { - std::string command; - if (argc == 4) { - command = argv[2]; - if (command == "dump") { - return ::tera::sdk::DumpCookieFile(argv[3]); - } - } else if (argc == 5) { - command = argv[2]; - if (command == "findkey") { - return ::tera::sdk::FindKeyInCookieFile(argv[3], argv[4]); - } - } - PrintCmdHelpInfo(argv[1]); - return -1; + std::string command; + if (argc == 4) { + command = argv[2]; + if (command == "dump") { + return ::tera::sdk::DumpCookieFile(argv[3]); + } + } else if (argc == 5) { + command = argv[2]; + if (command == "findkey") { + return ::tera::sdk::FindKeyInCookieFile(argv[3], argv[4]); + } + } + PrintCmdHelpInfo(argv[1]); + return -1; } // e.g. ./teracli kick : int32_t KickTabletServerOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if ((argc != 3)) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - std::string addr(argv[2]); - std::vector arg_list; - arg_list.push_back(addr); - if (!client->CmdCtrl("kick", arg_list, NULL, NULL, err)) { - LOG(ERROR) << "fail to kick tabletserver: " << addr; - return -1; - } - std::cout << "master will kick: " << addr << std::endl; - return 0; + if ((argc != 3)) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + std::string operation = argv[1]; + + std::string addr(argv[2]); + std::vector arg_list; + arg_list.push_back(addr); + if (!client->CmdCtrl(operation, arg_list, NULL, NULL, err)) { + LOG(ERROR) << "fail to kick tabletserver: " << addr; + return -1; + } + std::cout << "master will kick: " << addr << std::endl; + return 0; } int32_t ReloadConfigOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if ((argc != 4) || (std::string(argv[2]) != "config")) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - std::string addr(argv[3]); - - scoped_ptr finder(tera::sdk::NewClusterFinder()); - if (finder->MasterAddr() == addr) { - // master - std::vector arg_list; - if (!client->CmdCtrl("reload config", arg_list, NULL, NULL, err)) { - LOG(ERROR) << "fail to reload config: " << addr; - return -1; - } - } else { - // tabletnode - TsCmdCtrlRequest request; - TsCmdCtrlResponse response; - request.set_sequence_id(0); - request.set_command("reload config"); - common::ThreadPool thread_pool(FLAGS_concurrency); - tabletnode::TabletNodeClient tabletnode_client(&thread_pool, addr, 3600000); - if (!tabletnode_client.CmdCtrl(&request, &response) - || (response.status() != kTabletNodeOk)) { - LOG(ERROR) << "fail to reload config: " << addr; - return -1; - } - } - std::cout << "reload config success" << std::endl; - return 0; -} + if ((argc != 4) || (std::string(argv[2]) != "config")) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + std::string addr(argv[3]); -int32_t CompactTablet(TabletInfo& tablet, int lg) { - CompactTabletRequest request; - CompactTabletResponse response; + scoped_ptr finder(tera::sdk::NewClusterFinder()); + if (finder->MasterAddr() == addr) { + // master + std::vector arg_list; + if (!client->CmdCtrl("reload config", arg_list, NULL, NULL, err)) { + LOG(ERROR) << "fail to reload config: " << addr; + return -1; + } + } else { + // tabletnode + TsCmdCtrlRequest request; + TsCmdCtrlResponse response; request.set_sequence_id(0); - request.set_tablet_name(tablet.table_name); - request.mutable_key_range()->set_key_start(tablet.start_key); - request.mutable_key_range()->set_key_end(tablet.end_key); + request.set_command("reload config"); common::ThreadPool thread_pool(FLAGS_concurrency); - tabletnode::TabletNodeClient tabletnode_client(&thread_pool, tablet.server_addr, FLAGS_compact_timeout); - - std::string path; - if (lg >= 0) { - request.set_lg_no(lg); - path = tablet.path + "/" + NumberToString(lg); - } else { - path = tablet.path; - } - - std::cout << "try compact tablet: " << path - << " on " << tabletnode_client.GetConnectAddr() << std::endl; - - if (!tabletnode_client.CompactTablet(&request, &response)) { - LOG(ERROR) << "no response from [" - << tabletnode_client.GetConnectAddr() << "]"; - return -7; - } + tabletnode::TabletNodeClient tabletnode_client(&thread_pool, addr, 3600000); + if (!tabletnode_client.CmdCtrl(&request, &response) || (response.status() != kTabletNodeOk)) { + LOG(ERROR) << "fail to reload config: " << addr; + return -1; + } + } + std::cout << "reload config success" << std::endl; + return 0; +} - if (response.status() != kTabletNodeOk) { - LOG(ERROR) << "fail to compact tablet: " << path - << ", status: " << StatusCodeToString(response.status()); - return -1; - } +int32_t CompactTablet(TabletInfo& tablet, int lg) { + CompactTabletRequest request; + CompactTabletResponse response; + request.set_sequence_id(0); + request.set_tablet_name(tablet.table_name); + request.mutable_key_range()->set_key_start(tablet.start_key); + request.mutable_key_range()->set_key_end(tablet.end_key); + common::ThreadPool thread_pool(FLAGS_concurrency); + tabletnode::TabletNodeClient tabletnode_client(&thread_pool, tablet.server_addr, + FLAGS_compact_timeout); + + std::string path; + if (lg >= 0) { + request.set_lg_no(lg); + path = tablet.path + "/" + NumberToString(lg); + } else { + path = tablet.path; + } + + std::cout << "try compact tablet: " << path << " on " << tabletnode_client.GetConnectAddr() + << std::endl; + + if (!tabletnode_client.CompactTablet(&request, &response)) { + LOG(ERROR) << "no response from [" << tabletnode_client.GetConnectAddr() << "]"; + return -7; + } + + if (response.status() != kTabletNodeOk) { + LOG(ERROR) << "fail to compact tablet: " << path + << ", status: " << StatusCodeToString(response.status()); + return -1; + } - if (response.compact_status() != kTableCompacted) { - LOG(ERROR) << "fail to compact tablet: " << path - << ", status: " << StatusCodeToString(response.compact_status()); - return -1; - } + if (response.compact_status() != kTableCompacted) { + LOG(ERROR) << "fail to compact tablet: " << path + << ", status: " << StatusCodeToString(response.compact_status()); + return -1; + } - std::cout << "compact tablet success: " << path << ", data size: " - << BytesNumberToString(response.compact_size()) << std::endl; - return 0; + std::cout << "compact tablet success: " << path + << ", data size: " << BytesNumberToString(response.compact_size()) << std::endl; + return 0; } static bool ComputeCompactInsertKeys(RawKey rawkey, std::string* start_key, std::string* end_key) { - static std::string x0("\x0", 1); - static std::string x1("\x1", 1); - *start_key = (rawkey == Readable ? *start_key + x1 : *start_key + x0); - - // pop all '\x0' charcters at the tailing of end_key. Note that Readable should not contain any - // '\x0' characters but here we do not - while (end_key->size() > 0) { - unsigned char last = end_key->at(end_key->size() - 1); - if (last == '\x0') { - end_key->pop_back(); - } - // for Readable key, if the last nonzero character of end_key is '\x1', the wanted key that - // is barely smaller than end_key is computed as: end_key.substr(0, end_key.rfind('\x1')); - // eg: end_key: abcde'\x1' -> wanted key: abcde - else if (rawkey == Readable && last == '\x1'){ - end_key->pop_back(); - return true; - } - else { - break; - } - } - // for other case, the wanted key that is barely smaller than end_key is computed as: minus the - // last char of end_key with 1 and append '\x255' to end key until it reaches the max keysize - // allowed. Notice that the last char of end_key will not be '\x0' for Binary key and not be - // '\x0' nor '\x1' for Readable key here - if (end_key->size() > 0) { - (*end_key)[end_key->size() - 1] = char((*end_key)[end_key->size() - 1] - 1); - } - end_key->resize(kRowkeySize - 1, char(255)); - return true; + static std::string x0("\x0", 1); + static std::string x1("\x1", 1); + *start_key = (rawkey == Readable ? *start_key + x1 : *start_key + x0); + + // pop all '\x0' charcters at the tailing of end_key. Note that Readable + // should not contain any + // '\x0' characters but here we do not + while (end_key->size() > 0) { + unsigned char last = end_key->at(end_key->size() - 1); + if (last == '\x0') { + end_key->pop_back(); + } + // for Readable key, if the last nonzero character of end_key is '\x1', the + // wanted key that + // is barely smaller than end_key is computed as: end_key.substr(0, + // end_key.rfind('\x1')); + // eg: end_key: abcde'\x1' -> wanted key: abcde + else if (rawkey == Readable && last == '\x1') { + end_key->pop_back(); + return true; + } else { + break; + } + } + // for other case, the wanted key that is barely smaller than end_key is + // computed as: minus the + // last char of end_key with 1 and append '\x255' to end key until it reaches + // the max keysize + // allowed. Notice that the last char of end_key will not be '\x0' for Binary + // key and not be + // '\x0' nor '\x1' for Readable key here + if (end_key->size() > 0) { + (*end_key)[end_key->size() - 1] = char((*end_key)[end_key->size() - 1] - 1); + } + end_key->resize(kRowkeySize - 1, char(255)); + return true; } void CompactPreprocess(TableImplPtr table, const std::vector& tablet_infos) { + std::vector readers; + for (std::size_t i = 0; i < tablet_infos.size(); ++i) { + const TabletInfo& tablet_info = tablet_infos[i]; + std::string start_key(tablet_info.start_key); + std::string end_key(tablet_info.end_key); + ComputeCompactInsertKeys(table->GetTableSchema().raw_key(), &start_key, &end_key); std::vector readers; - for (std::size_t i = 0; i < tablet_infos.size(); ++i) { - const TabletInfo& tablet_info = tablet_infos[i]; - std::string start_key(tablet_info.start_key); - std::string end_key(tablet_info.end_key); - ComputeCompactInsertKeys(table->GetTableSchema().raw_key(), &start_key, &end_key); - std::vector readers; - RowReader* start_reader = table->NewRowReader(start_key); - RowReader* end_reader = table->NewRowReader(end_key); - readers.push_back(start_reader); - readers.push_back(end_reader); - } - if (readers.size() > 0) { - table->Get(readers); - } - std::vector mutations; - for (std::size_t i = 0; i < readers.size(); ++i) { - if (readers[i]->GetError().GetType() == tera::ErrorCode::kNotFound) { - RowMutation* mutation = table->NewRowMutation(readers[i]->RowKey()); - mutation->DeleteRow(); - mutations.push_back(mutation); - } - delete readers[i]; - } - if (mutations.size() > 0) { - table->ApplyMutation(mutations); - for (std::size_t i = 0; i < mutations.size(); ++i) { - if (mutations[i]->GetError().GetType() != tera::ErrorCode::kOK) { - LOG(WARNING) <<"write key " << DebugString(mutations[i]->RowKey()) - << " failed, error: " << mutations[i]->GetError().ToString(); - } - delete mutations[i]; - } - } + RowReader* start_reader = table->NewRowReader(start_key); + RowReader* end_reader = table->NewRowReader(end_key); + readers.push_back(start_reader); + readers.push_back(end_reader); + } + if (readers.size() > 0) { + table->Get(readers); + } + std::vector mutations; + for (std::size_t i = 0; i < readers.size(); ++i) { + if (readers[i]->GetError().GetType() == tera::ErrorCode::kNotFound) { + RowMutation* mutation = table->NewRowMutation(readers[i]->RowKey()); + mutation->DeleteRow(); + mutations.push_back(mutation); + } + delete readers[i]; + } + if (mutations.size() > 0) { + table->ApplyMutation(mutations); + for (std::size_t i = 0; i < mutations.size(); ++i) { + if (mutations[i]->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "write key " << DebugString(mutations[i]->RowKey()) + << " failed, error: " << mutations[i]->GetError().ToString(); + } + delete mutations[i]; + } + } } int32_t CompactTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::vector subs; - std::string table, tablet, tablet_path; - int lg = -1; - SplitString(argv[3], "/", &subs); - if (subs.size() == 2) { - table = subs[0]; - tablet = subs[1]; - tablet_path = table + "/" + tablet; - } else if (subs.size() == 3) { - table = subs[0]; - tablet = subs[1]; - tablet_path = table + "/" + tablet; - if (!StringToNumber(subs[2], &lg)) { - LOG(ERROR) << "lg no error: " << subs[2]; - return -5; - } - } else if (subs.size() != 2 && subs.size() != 3) { - LOG(ERROR) << "tablet path error, format [table/tablet] " - << "or [table/tablet/lg]: " << tablet_path; - return -2; - } - - std::vector tablet_list; - if (!client->GetTabletLocation(table, &tablet_list, err)) { - LOG(ERROR) << "fail to list tablet info"; - return -3; - } - - std::vector::iterator tablet_it = tablet_list.begin(); - for (; tablet_it != tablet_list.end(); ++tablet_it) { - if (tablet_it->path == tablet_path) { - break; - } - } - if (tablet_it == tablet_list.end()) { - LOG(ERROR) << "fail to find tablet: " << tablet_path - << ", total tablets: " << tablet_list.size(); - return -4; - } - std::string command = argv[1]; - if (command == "compactx") - { - TablePtr table_ptr; - table_ptr.reset(client->OpenTable(table, err)); - TableImplPtr table_impl(static_cast(table_ptr.get())->GetTableImpl()); - if (table_impl == NULL) { - LOG(ERROR) << "fail to open table: " << table; - return -5; - } - std::vector tablet_infos(1, *tablet_it); - CompactPreprocess(table_impl, tablet_infos); + if (argc != 4) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::vector subs; + std::string table, tablet, tablet_path; + int lg = -1; + SplitString(argv[3], "/", &subs); + if (subs.size() == 2) { + table = subs[0]; + tablet = subs[1]; + tablet_path = table + "/" + tablet; + } else if (subs.size() == 3) { + table = subs[0]; + tablet = subs[1]; + tablet_path = table + "/" + tablet; + if (!StringToNumber(subs[2], &lg)) { + LOG(ERROR) << "lg no error: " << subs[2]; + return -5; + } + } else if (subs.size() != 2 && subs.size() != 3) { + LOG(ERROR) << "tablet path error, format [table/tablet] " + << "or [table/tablet/lg]: " << tablet_path; + return -2; + } + + std::vector tablet_list; + if (!client->GetTabletLocation(table, &tablet_list, err)) { + LOG(ERROR) << "fail to list tablet info"; + return -3; + } + + std::vector::iterator tablet_it = tablet_list.begin(); + for (; tablet_it != tablet_list.end(); ++tablet_it) { + if (tablet_it->path == tablet_path) { + break; + } + } + if (tablet_it == tablet_list.end()) { + LOG(ERROR) << "fail to find tablet: " << tablet_path + << ", total tablets: " << tablet_list.size(); + return -4; + } + std::string command = argv[1]; + if (command == "compactx") { + TablePtr table_ptr; + table_ptr.reset(client->OpenTable(table, err)); + TableImplPtr table_impl(static_cast(table_ptr.get())->GetTableImpl()); + if (table_impl == NULL) { + LOG(ERROR) << "fail to open table: " << table; + return -5; } + std::vector tablet_infos(1, *tablet_it); + CompactPreprocess(table_impl, tablet_infos); + } - return CompactTablet(*tablet_it, lg); + return CompactTablet(*tablet_it, lg); } -bool GetTabletInfo(Client* client, const std::string& tablename, - const std::string& tablet_path, TabletInfo* tablet, - ErrorCode* err) { - std::vector tablet_list; - if (!client->GetTabletLocation(tablename, &tablet_list, err)) { - LOG(ERROR) << "fail to list tablet info"; - return false; - } - - std::vector::iterator tablet_it = tablet_list.begin(); - for (; tablet_it != tablet_list.end(); ++tablet_it) { - if (tablet_it->path == tablet_path) { - *tablet = *tablet_it; - break; - } - } - if (tablet_it == tablet_list.end()) { - LOG(ERROR) << "fail to find tablet: " << tablet_path - << ", total tablets: " << tablet_list.size(); - return false; - } - return true; +bool GetTabletInfo(Client* client, const std::string& tablename, const std::string& tablet_path, + TabletInfo* tablet, ErrorCode* err) { + std::vector tablet_list; + if (!client->GetTabletLocation(tablename, &tablet_list, err)) { + LOG(ERROR) << "fail to list tablet info"; + return false; + } + + std::vector::iterator tablet_it = tablet_list.begin(); + for (; tablet_it != tablet_list.end(); ++tablet_it) { + if (tablet_it->path == tablet_path) { + *tablet = *tablet_it; + break; + } + } + if (tablet_it == tablet_list.end()) { + LOG(ERROR) << "fail to find tablet: " << tablet_path + << ", total tablets: " << tablet_list.size(); + return false; + } + return true; } int32_t ScanTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 4) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::vector subs; - std::string op = argv[2]; - std::string tablet_path = argv[3]; - SplitString(tablet_path, "/", &subs); - if (subs.size() != 2) { - LOG(ERROR) << "tablet path error, format [table/tablet]: " << tablet_path; - return -2; - } - - TablePtr table(client->OpenTable(subs[0], err)); - if (table == NULL) { - LOG(ERROR) << "fail to open table: " << subs[0]; - return -3; - } - - TabletInfo tablet; - if (!GetTabletInfo(client, subs[0], tablet_path, &tablet, err)) { - LOG(ERROR) << "fail to parse tablet: " << tablet_path; - return -4; - } - - ScanDescriptor desc(tablet.start_key); - desc.SetEnd(tablet.end_key); - - if (op == "scanallv") { - desc.SetMaxVersions(std::numeric_limits::max()); - } - - if (argc > 4 && !desc.SetFilter(argv[4])) { - LOG(ERROR) << "fail to parse scan schema: " << argv[4]; - return -5; - } - - int32_t ret = ScanRange(table, desc, err); - return ret; + if (argc != 4) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::vector subs; + std::string op = argv[2]; + std::string tablet_path = argv[3]; + SplitString(tablet_path, "/", &subs); + if (subs.size() != 2) { + LOG(ERROR) << "tablet path error, format [table/tablet]: " << tablet_path; + return -2; + } + + TablePtr table(client->OpenTable(subs[0], err)); + if (table == NULL) { + LOG(ERROR) << "fail to open table: " << subs[0]; + return -3; + } + + TabletInfo tablet; + if (!GetTabletInfo(client, subs[0], tablet_path, &tablet, err)) { + LOG(ERROR) << "fail to parse tablet: " << tablet_path; + return -4; + } + + ScanDescriptor desc(tablet.start_key); + desc.SetEnd(tablet.end_key); + + if (op == "scanallv") { + desc.SetMaxVersions(std::numeric_limits::max()); + } + + int32_t ret = ScanRange(table, desc, err); + return ret; } int32_t TabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if ((argc != 4) && (argc != 5) && (argc != 6)) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string op = argv[2]; - std::string tablet_id = argv[3]; - std::string server_addr; - - std::vector arg_list; - arg_list.push_back(op); - arg_list.push_back(tablet_id); - if (op == "compact" || op == "compactx") { - return CompactTabletOp(client, argc, argv, err); - } else if (op == "scan" || op == "scanallv") { - return ScanTabletOp(client, argc, argv, err); - } else if (argc == 4 && (op == "reload" || op == "merge" || op == "split")) { - // nothing to do - } else if (argc == 5 && (op == "reloadx" || op == "move" || op == "split")) { - // reloadx->lg_list move->server_addr split->split_key - arg_list.push_back(argv[4]); - } else if (argc == 6 && op == "movex") { - arg_list.push_back(argv[4]); // server_addr - arg_list.push_back(argv[5]); // lg_list - } else { - PrintCmdHelpInfo(argv[1]); - return -1; - } - if (!client->CmdCtrl("tablet", arg_list, NULL, NULL, err)) { - LOG(ERROR) << "fail to " << op << " tablet " << tablet_id; - return -1; - } - std::cout << op << " tablet " << tablet_id << " success" << std::endl; + if ((argc != 4) && (argc != 5) && (argc != 6)) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string op = argv[2]; + std::string tablet_id = argv[3]; + std::string server_addr; + + std::vector arg_list; + arg_list.push_back(op); + arg_list.push_back(tablet_id); + if (op == "compact" || op == "compactx") { + return CompactTabletOp(client, argc, argv, err); + } else if (op == "scan" || op == "scanallv") { + return ScanTabletOp(client, argc, argv, err); + } else if (argc == 4 && (op == "reload" || op == "merge" || op == "split")) { + // nothing to do + } else if (argc == 5 && (op == "reloadx" || op == "move" || op == "split")) { + // reloadx->lg_list move->server_addr split->split_key + arg_list.push_back(argv[4]); + } else if (argc == 6 && op == "movex") { + arg_list.push_back(argv[4]); // server_addr + arg_list.push_back(argv[5]); // lg_list + } else { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (!client->CmdCtrl("tablet", arg_list, NULL, NULL, err)) { + LOG(ERROR) << "fail to " << op << " tablet " << tablet_id; + return -1; + } + std::cout << op << " tablet " << tablet_id << " success" << std::endl; - return 0; + return 0; } void ReorderTabletList(std::vector* tablets) { - if (tablets->size() <= 1) { - return; - } - - // ordered all tablets by ts - std::map > tablet_map; - size_t max_tablet = 0; - for (size_t i = 0; i < tablets->size(); ++i) { - std::vector& v = tablet_map[tablets->at(i).server_addr]; - v.push_back(tablets->at(i)); - if (v.size() > max_tablet) { - max_tablet = v.size(); - } - } - - size_t ts_num = tablet_map.size(); - std::vector > tablet_vector; - tablet_vector.resize(ts_num); - std::map >::iterator it = - tablet_map.begin(); - for (size_t i = 0; it != tablet_map.end(); ++it, ++i) { - tablet_vector[i].swap(it->second); - } - - // recover tablet list - std::vector tablets_t; - for (size_t y = 0; y < max_tablet; y++) { - for (size_t x = 0; x < ts_num; x++) { - if (y < tablet_vector[x].size()) { - tablets_t.push_back(tablet_vector[x][y]); - } - } - } - CHECK(tablets_t.size() == tablets->size()); - tablets->swap(tablets_t); + if (tablets->size() <= 1) { + return; + } + + // ordered all tablets by ts + std::map> tablet_map; + size_t max_tablet = 0; + for (size_t i = 0; i < tablets->size(); ++i) { + std::vector& v = tablet_map[tablets->at(i).server_addr]; + v.push_back(tablets->at(i)); + if (v.size() > max_tablet) { + max_tablet = v.size(); + } + } + + size_t ts_num = tablet_map.size(); + std::vector> tablet_vector; + tablet_vector.resize(ts_num); + std::map>::iterator it = tablet_map.begin(); + for (size_t i = 0; it != tablet_map.end(); ++it, ++i) { + tablet_vector[i].swap(it->second); + } + + // recover tablet list + std::vector tablets_t; + for (size_t y = 0; y < max_tablet; y++) { + for (size_t x = 0; x < ts_num; x++) { + if (y < tablet_vector[x].size()) { + tablets_t.push_back(tablet_vector[x][y]); + } + } + } + CHECK(tablets_t.size() == tablets->size()); + tablets->swap(tablets_t); } bool FiltrateTabletsByFile(std::vector& tablet_list) { - if (FLAGS_tablets_file.empty()) { - return true; - } - std::ifstream fin(FLAGS_tablets_file.c_str()); - if (fin.fail()) { - LOG(ERROR) << "fail to read tablets file: " << FLAGS_tablets_file; - return false; - } - std::set tablets_filter; - string str; - while (fin >> str) { - tablets_filter.insert(str); - } - - std::vector tablets; - std::vector::iterator tablet_it = tablet_list.begin(); - for (; tablet_it != tablet_list.end(); ++tablet_it) { - if (tablets_filter.find(tablet_it->path) != tablets_filter.end()) { - tablets.push_back(*tablet_it); - } - } - tablet_list.swap(tablets); + if (FLAGS_tablets_file.empty()) { return true; + } + std::ifstream fin(FLAGS_tablets_file.c_str()); + if (fin.fail()) { + LOG(ERROR) << "fail to read tablets file: " << FLAGS_tablets_file; + return false; + } + std::set tablets_filter; + string str; + while (fin >> str) { + tablets_filter.insert(str); + } + + std::vector tablets; + std::vector::iterator tablet_it = tablet_list.begin(); + for (; tablet_it != tablet_list.end(); ++tablet_it) { + if (tablets_filter.find(tablet_it->path) != tablets_filter.end()) { + tablets.push_back(*tablet_it); + } + } + tablet_list.swap(tablets); + return true; } int32_t CompactOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; - std::vector tablet_list; - if (!client->GetTabletLocation(tablename, &tablet_list, err)) { - LOG(ERROR) << "fail to list tablets info: " << tablename; - return -3; - } - if (!FiltrateTabletsByFile(tablet_list)) { - return -4; - } - ReorderTabletList(&tablet_list); - - std::string command = argv[1]; - if (command == "compactx") - { - TablePtr table; - table.reset(client->OpenTable(tablename, err)); - TableImplPtr table_impl(static_cast(table.get())->GetTableImpl()); - if (table_impl == NULL) { - LOG(ERROR) << "fail to open table: " << tablename; - return -5; - } - std::cout << "begin compact preprocess tablet: " << tablename << std::endl; - CompactPreprocess(table_impl, tablet_list); - } - - int conc = FLAGS_concurrency; - if (conc <= 0 || conc > 1000) { - LOG(ERROR) << "compact concurrency illegal: " << conc; - } - - ThreadPool thread_pool(conc); - std::vector::iterator tablet_it = tablet_list.begin(); - for (; tablet_it != tablet_list.end(); ++tablet_it) { - ThreadPool::Task task = - std::bind(&CompactTablet, *tablet_it, FLAGS_lg); - thread_pool.AddTask(task); - } - while (thread_pool.PendingNum() > 0) { - std::cerr << get_time_str(time(NULL)) << " " - << thread_pool.PendingNum() - << " tablets waiting for compact ..." << std::endl; - sleep(5); - } - thread_pool.Stop(true); - return 0; -} - -int32_t FindMasterOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 2) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - scoped_ptr finder(tera::sdk::NewClusterFinder()); - std::cout << "master addr:< " << finder->MasterAddr() << " >\n"; - return 0; -} - -int32_t FindTsOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3 && argc != 4) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string tablename = argv[2]; + if (argc != 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string tablename = argv[2]; + std::vector tablet_list; + if (!client->GetTabletLocation(tablename, &tablet_list, err)) { + LOG(ERROR) << "fail to list tablets info: " << tablename; + return -3; + } + if (!FiltrateTabletsByFile(tablet_list)) { + return -4; + } + ReorderTabletList(&tablet_list); + + std::string command = argv[1]; + if (command == "compactx") { TablePtr table; table.reset(client->OpenTable(tablename, err)); TableImplPtr table_impl(static_cast(table.get())->GetTableImpl()); if (table_impl == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } - - if (argc == 4) { - std::string rowkey = argv[3]; - table_impl->ScanMetaTable(rowkey, rowkey + '\0'); - - TabletMeta meta; - if (!table_impl->GetTabletMetaForKey(rowkey, &meta)) { - LOG(ERROR) << "fail to get tablet meta for " << rowkey; - return -1; - } - std::cout << meta.server_addr() << "\t" << meta.path() << std::endl; - return 0; - } - - table_impl->ScanMetaTable("", ""); - const int32_t buf_size = 1024 * 1024; - char rowkey[buf_size]; - while (std::cin.getline(rowkey, buf_size)) { - TabletMeta meta; - if (!table_impl->GetTabletMetaForKey(rowkey, &meta)) { - LOG(ERROR) << "fail to get tablet meta for " << rowkey; - continue; - } - std::cout << rowkey << "\t" << meta.server_addr() << "\t" << meta.path() << std::endl; - } - - return 0; -} - -void WriteToStream(std::ofstream& ofs, - const std::string& key, - const std::string& value) { - uint32_t key_size = key.size(); - uint32_t value_size = value.size(); - ofs.write((char*)&key_size, sizeof(key_size)); - ofs.write(key.data(), key_size); - ofs.write((char*)&value_size, sizeof(value_size)); - ofs.write(value.data(), value_size); + LOG(ERROR) << "fail to open table: " << tablename; + return -5; + } + std::cout << "begin compact preprocess tablet: " << tablename << std::endl; + CompactPreprocess(table_impl, tablet_list); + } + + int conc = FLAGS_concurrency; + if (conc <= 0 || conc > 1000) { + LOG(ERROR) << "compact concurrency illegal: " << conc; + } + + ThreadPool thread_pool(conc); + std::vector::iterator tablet_it = tablet_list.begin(); + for (; tablet_it != tablet_list.end(); ++tablet_it) { + ThreadPool::Task task = std::bind(&CompactTablet, *tablet_it, FLAGS_lg); + thread_pool.AddTask(task); + } + while (thread_pool.PendingNum() > 0) { + std::cerr << get_time_str(time(NULL)) << " " << thread_pool.PendingNum() + << " tablets waiting for compact ..." << std::endl; + sleep(5); + } + thread_pool.Stop(true); + return 0; } -void WriteTable(const TableMeta& meta, std::ofstream& ofs) { - std::string key, value; - MakeMetaTableKeyValue(meta, &key, &value); - WriteToStream(ofs, key, value); -} - -void WriteTablet(const TabletMeta& meta, std::ofstream& ofs) { - std::string key, value; - MakeMetaTableKeyValue(meta, &key, &value); - WriteToStream(ofs, key, value); +int32_t FindMasterOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 2) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + scoped_ptr finder(tera::sdk::NewClusterFinder()); + std::cout << "master addr:< " << finder->MasterAddr() << " >\n"; + return 0; } -int32_t ProcessMeta(const std::string& op, const TableMetaList& table_list, - const TabletMetaList& tablet_list) { - int32_t table_num = table_list.meta_size(); - int32_t tablet_num = tablet_list.meta_size(); - if (table_num == 0 && tablet_num == 0) { - std::cout << "meta table is empty" << std::endl; - return 0; - } - - std::ofstream bak; - if (op == "backup" || op == "repair") { - bak.open("meta.bak", std::ofstream::trunc|std::ofstream::binary); - } - - for (int32_t i = 0; i < table_num; ++i) { - const tera::TableMeta& meta = table_list.meta(i); - if (op == "show") { - std::cout << "table: " << meta.table_name() << std::endl; - int32_t lg_size = meta.schema().locality_groups_size(); - for (int32_t lg_id = 0; lg_id < lg_size; lg_id++) { - const tera::LocalityGroupSchema& lg = - meta.schema().locality_groups(lg_id); - std::cout << " lg" << lg_id << ": " << lg.name() << " (" - << lg.store_type() << ", " - << lg.compress_type() << ", " - << lg.block_size() << ")" << std::endl; - } - int32_t cf_size = meta.schema().column_families_size(); - for (int32_t cf_id = 0; cf_id < cf_size; cf_id++) { - const tera::ColumnFamilySchema& cf = - meta.schema().column_families(cf_id); - std::cout << " cf" << cf_id << ": " << cf.name() << " (" - << cf.locality_group() << ", " - << cf.type() << ", " - << cf.max_versions() << ", " - << cf.time_to_live() << ")" << std::endl; - } - } - if (op == "backup" || op == "repair") { - WriteTable(meta, bak); - } - } - - tera::TabletMeta last; - bool table_start = true; - for (int32_t i = 0; i < tablet_num; ++i) { - const tera::TabletMeta& meta = tablet_list.meta(i); - if (op == "show") { - std::cout << "tablet: " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "], " - << meta.path() << ", " << meta.server_addr() << ", " - << meta.size() << ", " - << StatusCodeToString(meta.status()) << ", " - << StatusCodeToString(meta.compact_status()) << std::endl; - } - if (op == "backup") { - WriteTablet(meta, bak); - } - // check self range - if (!meta.key_range().key_end().empty() && - meta.key_range().key_start() >= meta.key_range().key_end()) { - std::cerr << "invalid tablet " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "], " - << meta.path() << ", " << meta.server_addr() << ", " - << meta.size() << ", " - << StatusCodeToString(meta.status()) << ", " - << StatusCodeToString(meta.compact_status()) << std::endl; - // ignore invalid tablet - continue; - } +int32_t FindTsOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 3 && argc != 4) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string tablename = argv[2]; + TablePtr table; + table.reset(client->OpenTable(tablename, err)); + TableImplPtr table_impl(static_cast(table.get())->GetTableImpl()); + if (table_impl == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } - tera::TabletMeta repair_meta; - bool covered = false; - // check miss/cover/overlap with previous tablet - if (!table_start) { - assert(!last.key_range().key_end().empty()); - if (meta.table_name() != last.table_name()) { - std::cerr << "miss tablet: " << last.table_name() << ", [" - << last.key_range().key_end() << ",-]" << std::endl; - if (op == "repair") { - tera::TabletMeta miss_meta; - miss_meta.set_table_name(last.table_name()); - miss_meta.mutable_key_range()->set_key_start(last.key_range().key_end()); - miss_meta.mutable_key_range()->set_key_end(""); - WriteTablet(miss_meta, bak); - } - table_start = true; - } else if (meta.key_range().key_start() > last.key_range().key_end()) { - std::cerr << "miss tablet " << last.table_name() << " [" - << last.key_range().key_end() << "," - << meta.key_range().key_start() << "]" << std::endl; - if (op == "repair") { - tera::TabletMeta miss_meta; - miss_meta.set_table_name(last.table_name()); - miss_meta.mutable_key_range()->set_key_start(last.key_range().key_end()); - miss_meta.mutable_key_range()->set_key_end(meta.key_range().key_start()); - WriteTablet(miss_meta, bak); - WriteTablet(meta, bak); - } - } else if (meta.key_range().key_start() == last.key_range().key_end()) { - if (op == "repair") { - WriteTablet(meta, bak); - } - } else if (!meta.key_range().key_end().empty() - && meta.key_range().key_end() <= last.key_range().key_end()) { - std::cerr << "tablet " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "] is coverd by tablet " - << last.table_name() << " [" - << last.key_range().key_start() << "," - << last.key_range().key_end() << "]" << std::endl; - covered = true; - } else { - std::cerr << "tablet " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "] overlap with tablet " - << last.table_name() << " [" - << last.key_range().key_start() << "," - << last.key_range().key_end() << "]" << std::endl; - if (op == "repair") { - tera::TabletMeta repair_meta = meta; - repair_meta.mutable_key_range()->set_key_start(last.key_range().key_end()); - WriteTablet(repair_meta, bak); - } - } - } - if (table_start) { - if (meta.table_name() == last.table_name()) { - std::cerr << "tablet " << meta.table_name() << " [" - << meta.key_range().key_start() << "," - << meta.key_range().key_end() << "] is coverd by tablet " - << last.table_name() << " [" - << last.key_range().key_start() << "," - << last.key_range().key_end() << "]" << std::endl; - covered = true; - } else { - if (!meta.key_range().key_start().empty()) { - std::cerr << "miss tablet " << meta.table_name() << " [-," - << meta.key_range().key_start() << "]" << std::endl; - if (op == "repair") { - tera::TabletMeta miss_meta; - miss_meta.set_table_name(meta.table_name()); - miss_meta.mutable_key_range()->set_key_start(""); - miss_meta.mutable_key_range()->set_key_end(meta.key_range().key_start()); - WriteTablet(miss_meta, bak); - } - } - if (op == "repair") { - WriteTablet(meta, bak); - } - } - } + if (argc == 4) { + std::string rowkey = argv[3]; + table_impl->ScanMetaTable(rowkey, rowkey + '\0'); - // ignore covered tablet - if (!covered) { - last.CopyFrom(meta); - table_start = meta.key_range().key_end().empty(); - } - } - if (op == "backup" || op == "repair") { - bak.close(); + TabletMeta meta; + if (!table_impl->GetTabletMetaForKey(rowkey, &meta)) { + LOG(ERROR) << "fail to get tablet meta for " << rowkey; + return -1; } + std::cout << meta.server_addr() << "\t" << meta.path() << std::endl; return 0; -} + } -int32_t MetaOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 4 && argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; + table_impl->ScanMetaTable("", ""); + const int32_t buf_size = 1024 * 1024; + char rowkey[buf_size]; + while (std::cin.getline(rowkey, buf_size)) { + TabletMeta meta; + if (!table_impl->GetTabletMetaForKey(rowkey, &meta)) { + LOG(ERROR) << "fail to get tablet meta for " << rowkey; + continue; } + std::cout << rowkey << "\t" << meta.server_addr() << "\t" << meta.path() << std::endl; + } - std::string op = argv[2]; - if (op == "backup") { - if (argc < 4) { - LOG(ERROR) << "need backup file name."; - return -1; - } - std::string filename = argv[3]; - std::vector arg_list; - arg_list.push_back(op); - arg_list.push_back(filename); - if (!client->CmdCtrl("meta", arg_list, NULL, NULL, err)) { - LOG(ERROR) << "fail to " << op << " meta"; - return -5; - } - } else if (op == "check" || op == "repair" || op == "show") { - TableMetaList table_list; - TabletMetaList tablet_list; - std::shared_ptr client_impl((static_cast(client))->GetClientImpl()); - if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, false, err)) { - LOG(ERROR) << "fail to get meta data from tera."; - return -3; - } - ProcessMeta(op, table_list, tablet_list); - } else { - PrintCmdHelpInfo(argv[1]); - return -2; - } - - std::cout << op << " tablet success" << std::endl; - return 0; + return 0; } -int32_t FindTabletOp(Client*, int32_t argc, std::string* argv, ErrorCode* err) { - if ((argc != 4) && (argc != 5)) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - if (argc == 5) { - std::string s = argv[3]; - std::string e = argv[4]; - if ((e != "") && (s.compare(e) >= 0)) { - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, "note: start_key <= end_key"); - } - return -1; - } - } - // get meta address - scoped_ptr finder(tera::sdk::NewClusterFinder()); - std::string meta_tablet_addr = finder->RootTableAddr(); - if (meta_tablet_addr.empty()) { - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, "read root addr from zk fail"); - } - return -1; +int32_t FindTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if ((argc != 4) && (argc != 5)) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (argc == 5) { + std::string s = argv[3]; + std::string e = argv[4]; + if ((e != "") && (s.compare(e) >= 0)) { + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, "note: start_key <= end_key"); + } + return -1; + } + } + // get meta address + scoped_ptr finder(tera::sdk::NewClusterFinder()); + std::string meta_tablet_addr = finder->RootTableAddr(); + if (meta_tablet_addr.empty()) { + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, "read root addr from zk fail"); } - uint64_t seq_id = 0; - tera::TabletMetaList tablet_list; - tera::ScanTabletRequest request; - tera::ScanTabletResponse response; - request.set_sequence_id(seq_id++); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(std::string(argv[2]) + '#'); + return -1; + } + + std::string table_name = argv[2]; + TableSchema table_schema; + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->ShowTableSchema(table_name, &table_schema, err)) { + LOG(ERROR) << "table not exist: " << table_name; + return -1; + } + + uint64_t seq_id = 0; + tera::TabletMetaList tablet_list; + tera::ScanTabletRequest request; + tera::ScanTabletResponse response; + request.set_sequence_id(seq_id++); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(argv[2] + '#'); + request.set_end(argv[2] + '$'); + common::ThreadPool thread_pool(FLAGS_concurrency); + tera::tabletnode::TabletNodeClient meta_node_client(&thread_pool, meta_tablet_addr); + while (meta_node_client.ScanTablet(&request, &response)) { + if (response.status() != tera::kTabletNodeOk) { + std::stringstream ss; + ss << "fail to load meta table: " << StatusCodeToString(response.status()) << std::endl; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + return -1; + } + int32_t record_size = response.results().key_values_size(); + if (record_size <= 0) { + break; + } + + std::string last_record_key; + for (int32_t i = 0; i < record_size; i++) { + const tera::KeyValuePair& record = response.results().key_values(i); + last_record_key = record.key(); + ParseMetaTableKeyValue(record.key(), record.value(), tablet_list.add_meta()); + } + std::string next_record_key = tera::NextKey(last_record_key); + request.set_start(next_record_key); request.set_end(std::string(argv[2]) + '$'); - common::ThreadPool thread_pool(FLAGS_concurrency); - tera::tabletnode::TabletNodeClient meta_node_client(&thread_pool, meta_tablet_addr); - while (meta_node_client.ScanTablet(&request, &response)) { - if (response.status() != tera::kTabletNodeOk) { - std::stringstream ss; - ss << "fail to load meta table: " - << StatusCodeToString(response.status()) << std::endl; - if (err != NULL) { - err->SetFailed(ErrorCode::kBadParam, ss.str()); - } - return -1; - } - int32_t record_size = response.results().key_values_size(); - if (record_size <= 0) { - break; - } + request.set_sequence_id(seq_id++); + response.Clear(); + } - std::string last_record_key; - for (int32_t i = 0; i < record_size; i++) { - const tera::KeyValuePair& record = response.results().key_values(i); - last_record_key = record.key(); - ParseMetaTableKeyValue(record.key(), record.value(), tablet_list.add_meta()); - } - std::string next_record_key = tera::NextKey(last_record_key); - request.set_start(next_record_key); - request.set_end(std::string(argv[2]) + '$'); - request.set_sequence_id(seq_id++); - response.Clear(); - } - std::string start_key; - std::string end_key; + std::string start_key; + std::string end_key; + + if (table_schema.has_enable_hash() && table_schema.enable_hash() && FLAGS_convert_hash_key) { + auto hash_method = [](const string& key) { return MurmurHash(key) + key; }; if (argc == 4) { - start_key = std::string(argv[3]); - end_key = tera::NextKey(start_key); + start_key = hash_method(argv[3]); + end_key = tera::NextKey(start_key); } else { - start_key = std::string(argv[3]); - end_key = std::string(argv[4]); - } - int32_t tablet_num = tablet_list.meta_size(); - for (int32_t i = 0; i < tablet_num; ++i) { - const tera::TabletMeta& meta = tablet_list.meta(i); - if ((meta.key_range().key_end() != "") - && (start_key.compare(meta.key_range().key_end()) >= 0)) { - continue; - } - if ((end_key != "") - && (end_key.compare(meta.key_range().key_start()) < 0)) { - break; - } - std::cout << meta.path() << " " << meta.server_addr() << std::endl; - } - return 0; + start_key = hash_method(argv[3]); + end_key = hash_method(argv[4]); + } + } else if (argc == 4) { + start_key = argv[3]; + end_key = tera::NextKey(start_key); + } else { + start_key = argv[3]; + end_key = argv[4]; + } + + int32_t tablet_num = tablet_list.meta_size(); + for (int32_t i = 0; i < tablet_num; ++i) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if ((meta.key_range().key_end() != "") && + (start_key.compare(meta.key_range().key_end()) >= 0)) { + continue; + } + if ((end_key != "") && (end_key.compare(meta.key_range().key_start()) < 0)) { + break; + } + std::cout << meta.path() << " " << meta.server_addr() << std::endl; + } + return 0; } int32_t StatOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { - if (argc < 3) { - PrintCmdHelpInfo("stat"); - return -1; - } - std::string op = argv[2]; - if (op != "corruption") { - PrintCmdHelpInfo(argv[1]); - return -1; - } - std::string tabletpath = ""; - if (argc == 4) { - tabletpath = argv[3]; - } - std::string tabletnode = ""; - if (argc == 5) { - tabletnode = argv[4]; - } - int conc = FLAGS_concurrency; - if (conc <= 0 || conc > 1000) { - LOG(ERROR) << "compact concurrency illegal: " << conc; - return -1; - } - ThreadPool thread_pool(conc); - std::unique_ptr stat_table( - new tera::sdk::StatTable(&thread_pool, - tera::sdk::StatTableCustomer::kClient)); - stat_table->OpenStatTable(); - stat_table->SelectTabletsFailMessages(tabletpath, tabletnode); - return 0; -} - -int32_t Meta2Op(Client*, int32_t argc, std::string* argv, ErrorCode*) { - if (argc < 3) { - PrintCmdHelpInfo("meta"); - return -1; - } - - std::string op = argv[2]; - if (op != "check" && op != "show" && op != "backup" && op != "repair") { - PrintCmdHelpInfo(argv[1]); - return -1; - } - - // get meta address - scoped_ptr finder(tera::sdk::NewClusterFinder()); - std::string meta_tablet_addr = finder->RootTableAddr(); - if (meta_tablet_addr.empty()) { - std::cerr << "read root addr from zk fail"; - return -1; - } + if (argc < 3) { + PrintCmdHelpInfo("stat"); + return -1; + } - // scan meta - uint64_t seq_id = 0; - tera::TableMetaList table_list; - tera::TabletMetaList tablet_list; - tera::ScanTabletRequest request; - tera::ScanTabletResponse response; - request.set_sequence_id(seq_id++); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(""); - request.set_end(""); - common::ThreadPool thread_pool(FLAGS_concurrency); - tera::tabletnode::TabletNodeClient meta_node_client(&thread_pool, meta_tablet_addr); - while (meta_node_client.ScanTablet(&request, &response)) { - if (response.status() != tera::kTabletNodeOk) { - std::cerr << "fail to load meta table: " - << StatusCodeToString(response.status()) << std::endl; - return -1; - } - int32_t record_size = response.results().key_values_size(); - if (record_size <= 0) { - std::cout << "scan meta table success" << std::endl; - break; - } - std::cerr << "scan meta table: " << record_size << " records" << std::endl; - - std::string last_record_key; - for (int32_t i = 0; i < record_size; i++) { - const tera::KeyValuePair& record = response.results().key_values(i); - last_record_key = record.key(); - char first_key_char = record.key()[0]; - if (first_key_char == '~') { - std::cout << "(user: " << record.key().substr(1) << ")" << std::endl; - } else if (first_key_char == '@') { - ParseMetaTableKeyValue(record.key(), record.value(), table_list.add_meta()); - } else if (first_key_char > '@') { - ParseMetaTableKeyValue(record.key(), record.value(), tablet_list.add_meta()); - } else { - std::cerr << "invalid record: " << record.key(); - } + int conc = FLAGS_concurrency; + if (conc <= 0 || conc > 1000) { + LOG(ERROR) << "concurrency illegal: " << conc; + return -1; + } + ThreadPool thread_pool(conc); + std::unique_ptr stat_table(new tera::sdk::StatTable( + &thread_pool, std::shared_ptr(), tera::sdk::StatTableCustomer::kClient)); + stat_table->OpenStatTable(); + + std::string op = argv[2]; + if (op == "corruption" && argc >= 3) { + std::vector filters = {"type=", "ts=", "table=", + "tablet=", "time_range=", "detail="}; + + for (auto& filter : filters) { + std::string tmp_filter = filter; + for (int32_t i = 3; i < argc; ++i) { + std::size_t found = argv[i].find(filter); + if (found != std::string::npos) { + filter = (argv[i]).replace(0, filter.size(), ""); + break; } - std::string next_record_key = tera::NextKey(last_record_key); - request.set_start(next_record_key); - request.set_end(""); - request.set_sequence_id(seq_id++); - response.Clear(); + } + if (tmp_filter == filter) { + filter = ""; + } } - - return ProcessMeta(op, table_list, tablet_list); + bool is_detail = true; + if (filters[5] == "false") { + is_detail = false; + } + stat_table->SelectTabletsFailMessages(filters, is_detail); + } else { + PrintCmdHelpInfo(argv[1]); + return -1; + } + return 0; } -static int32_t CreateUser(Client* client, const std::string& user, - const std::string& password, ErrorCode* err) { - if (!client->CreateUser(user, password, err)) { - LOG(ERROR) << "fail to create user: " << user << ", " << err->ToString(); - return -1; - } - return 0; +static int32_t CreateUser(Client* client, const std::string& user, const std::string& password, + ErrorCode* err) { + if (!client->CreateUser(user, password, err)) { + LOG(ERROR) << "fail to create user: " << user << ", " << err->ToString(); + return -1; + } + return 0; } static int32_t DeleteUser(Client* client, const std::string& user, ErrorCode* err) { - if (!client->DeleteUser(user, err)) { - LOG(ERROR) << "fail to delete user: " << user << ", " << err->ToString(); - return -1; - } - return 0; + if (!client->DeleteUser(user, err)) { + LOG(ERROR) << "fail to delete user: " << user << ", " << err->ToString(); + return -1; + } + return 0; } -static int32_t ChangePwd(Client* client, const std::string& user, - const std::string& password, ErrorCode* err) { - if (!client->ChangePwd(user, password, err)) { - LOG(ERROR) << "fail to update user: " << user << ", " << err->ToString(); - return -1; - } - return 0; +static int32_t ChangePwd(Client* client, const std::string& user, const std::string& password, + ErrorCode* err) { + if (!client->ChangePwd(user, password, err)) { + LOG(ERROR) << "fail to update user: " << user << ", " << err->ToString(); + return -1; + } + return 0; } static int32_t ShowUser(Client* client, const std::string& user, ErrorCode* err) { - std::vector user_infos; - if (!client->ShowUser(user, user_infos, err)) { - LOG(ERROR) << "fail to show user: " << user << ", " << err->ToString(); - return -1; - } - if (user_infos.size() < 1) { - return -1; - } - std::cout << "user:" << user_infos[0] - << "\ngroups (" << user_infos.size() - 1 << "):"; - for (size_t i = 1; i < user_infos.size(); ++i) { - std::cout << user_infos[i] << " "; - } - std::cout << std::endl; - return 0; + std::vector user_infos; + if (!client->ShowUser(user, user_infos, err)) { + LOG(ERROR) << "fail to show user: " << user << ", " << err->ToString(); + return -1; + } + if (user_infos.size() < 1) { + return -1; + } + std::cout << "user:" << user_infos[0] << "\ngroups (" << user_infos.size() - 1 << "):"; + for (size_t i = 1; i < user_infos.size(); ++i) { + std::cout << user_infos[i] << " "; + } + std::cout << std::endl; + return 0; } -static int32_t AddUserToGroup(Client* client, const std::string& user, - const std::string& group, ErrorCode* err) { - if (!client->AddUserToGroup(user, group, err)) { - LOG(ERROR) << "fail to add user: " << user - << " to group:" << group << err->ToString(); - return -1; - } - return 0; +static int32_t AddUserToGroup(Client* client, const std::string& user, const std::string& group, + ErrorCode* err) { + if (!client->AddUserToGroup(user, group, err)) { + LOG(ERROR) << "fail to add user: " << user << " to group:" << group << err->ToString(); + return -1; + } + return 0; } static int32_t DeleteUserFromGroup(Client* client, const std::string& user, - const std::string& group, ErrorCode* err) { - if (!client->DeleteUserFromGroup(user, group, err)) { - LOG(ERROR) << "fail to delete user: " << user - << " from group: " << group << err->ToString(); - return -1; - } - return 0; + const std::string& group, ErrorCode* err) { + if (!client->DeleteUserFromGroup(user, group, err)) { + LOG(ERROR) << "fail to delete user: " << user << " from group: " << group << err->ToString(); + return -1; + } + return 0; } int32_t UserOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 4) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - std::string op = argv[2]; - if ((argc == 5) && (op == "create")) { - return CreateUser(client, argv[3], argv[4], err); - } else if ((argc == 5) && (op == "changepwd")) { - return ChangePwd(client, argv[3], argv[4], err); - } else if ((argc == 4) && (op == "show")) { - return ShowUser(client, argv[3], err); - } else if ((argc == 4) && (op == "delete")) { - return DeleteUser(client, argv[3], err); - } else if ((argc == 5) && (op == "addtogroup")) { - return AddUserToGroup(client, argv[3], argv[4], err); - } else if ((argc == 5) && (op == "deletefromgroup")) { - return DeleteUserFromGroup(client, argv[3], argv[4], err); - } + if (argc < 4) { PrintCmdHelpInfo(argv[1]); return -1; + } + std::string op = argv[2]; + if ((argc == 5) && (op == "create")) { + return CreateUser(client, argv[3], argv[4], err); + } else if ((argc == 5) && (op == "changepwd")) { + return ChangePwd(client, argv[3], argv[4], err); + } else if ((argc == 4) && (op == "show")) { + return ShowUser(client, argv[3], err); + } else if ((argc == 4) && (op == "delete")) { + return DeleteUser(client, argv[3], err); + } else if ((argc == 5) && (op == "addtogroup")) { + return AddUserToGroup(client, argv[3], argv[4], err); + } else if ((argc == 5) && (op == "deletefromgroup")) { + return DeleteUserFromGroup(client, argv[3], argv[4], err); + } + PrintCmdHelpInfo(argv[1]); + return -1; } int32_t RangeOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc != 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } - bool is_x = (std::string(argv[1]) == "rangex"); - std::string tablename = argv[2]; + bool is_x = (std::string(argv[1]) == "rangex"); + std::string tablename = argv[2]; - std::vector tablet_list; - if (!client->GetTabletLocation(tablename, &tablet_list, err)) { - LOG(ERROR) << "fail to list tablet info: " << tablename; - return -2; - } + std::vector tablet_list; + if (!client->GetTabletLocation(tablename, &tablet_list, err)) { + LOG(ERROR) << "fail to list tablet info: " << tablename; + return -2; + } - if (FLAGS_reorder_tablets) { - ReorderTabletList(&tablet_list); - } + if (FLAGS_reorder_tablets) { + ReorderTabletList(&tablet_list); + } - std::vector::iterator it = tablet_list.begin(); - for (; it != tablet_list.end(); ++it) { - if (it->start_key.empty()) { - it->start_key = "-"; - } - if (it->end_key.empty()) { - it->end_key = "-"; - } - if (is_x) { - std::cout << it->server_addr << "\t" << it->path << "\t" - << it->start_key << "\t" << it->end_key << std::endl; - } else { - std::cout << it->start_key << "\t" << it->end_key << std::endl; - } + std::vector::iterator it = tablet_list.begin(); + for (; it != tablet_list.end(); ++it) { + if (it->start_key.empty()) { + it->start_key = "-"; } - return 0; + if (it->end_key.empty()) { + it->end_key = "-"; + } + if (is_x) { + std::cout << it->server_addr << "\t" << it->path << "\t" << it->start_key << "\t" + << it->end_key << std::endl; + } else { + std::cout << it->start_key << "\t" << it->end_key << std::endl; + } + } + return 0; } int StartRowTxnOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - if (g_row_txn != NULL) { - LOG(ERROR) << "txn has started"; - return -1; - } + if (argc != 5) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (g_row_txn != NULL) { + LOG(ERROR) << "txn has started"; + return -1; + } - CHECK(g_row_txn_table == NULL); - std::string tablename = argv[3]; - g_row_txn_table = client->OpenTable(tablename, err); - if (g_row_txn_table == NULL) { - LOG(ERROR) << "fail to open table"; - return -1; - } + CHECK(g_row_txn_table == NULL); + std::string tablename = argv[3]; + g_row_txn_table = client->OpenTable(tablename, err); + if (g_row_txn_table == NULL) { + LOG(ERROR) << "fail to open table"; + return -1; + } - std::string row_key = argv[4]; - g_row_txn = g_row_txn_table->StartRowTransaction(row_key); - if (g_row_txn == NULL) { - LOG(ERROR) << "fail to start row txn"; - delete g_row_txn_table; - g_row_txn_table = NULL; - return -1; - } - return 0; + std::string row_key = argv[4]; + g_row_txn = g_row_txn_table->StartRowTransaction(row_key); + if (g_row_txn == NULL) { + LOG(ERROR) << "fail to start row txn"; + delete g_row_txn_table; + g_row_txn_table = NULL; + return -1; + } + return 0; } int CommitRowTxnOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 3) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - if (g_row_txn == NULL) { - LOG(ERROR) << "txn has not started"; - return -1; - } - g_row_txn_table->CommitRowTransaction(g_row_txn); - std::cout << g_row_txn->GetError().ToString() << std::endl; - - delete g_row_txn; - g_row_txn = NULL; - delete g_row_txn_table; - g_row_txn_table = NULL; - return 0; + if (argc != 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (g_row_txn == NULL) { + LOG(ERROR) << "txn has not started"; + return -1; + } + g_row_txn_table->CommitRowTransaction(g_row_txn); + std::cout << g_row_txn->GetError().ToString() << std::endl; + + delete g_row_txn; + g_row_txn = NULL; + delete g_row_txn_table; + g_row_txn_table = NULL; + return 0; } int TxnOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 3) { - LOG(ERROR) << "args number error: " << argc << ", need > 2"; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - std::string txn_op = argv[2]; - if (txn_op == "start") { - return StartRowTxnOp(client, argc, argv, err); - } else if (txn_op == "commit") { - return CommitRowTxnOp(client, argc, argv, err); - } else { - PrintCmdHelpInfo(argv[1]); - return -1; - } + if (argc < 3) { + LOG(ERROR) << "args number error: " << argc << ", need > 2"; + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string txn_op = argv[2]; + if (txn_op == "start") { + return StartRowTxnOp(client, argc, argv, err); + } else if (txn_op == "commit") { + return CommitRowTxnOp(client, argc, argv, err); + } else { + PrintCmdHelpInfo(argv[1]); + return -1; + } } int GTxnOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - std::vector> statements; - std::string line; - while (getline(std::cin, line)) { - bool on_skipping = false; - uint32_t pos = 0; - std::vector terms; - std::string term; - while (pos < line.length()) { - char c = line[pos]; - if (!on_skipping) { - if (c == ' ' || c == '\t') { - on_skipping = true; - if (term.length() > 0) { - terms.push_back(term); - term.clear(); - } - } else { - term.push_back(c); - } - } else { - if (c != ' ' && c != '\t') { - term.push_back(c); - on_skipping = false; - } - } - ++pos; - } - if (term.length() > 0) { + std::vector> statements; + std::string line; + while (getline(std::cin, line)) { + bool on_skipping = false; + uint32_t pos = 0; + std::vector terms; + std::string term; + while (pos < line.length()) { + char c = line[pos]; + if (!on_skipping) { + if (c == ' ' || c == '\t') { + on_skipping = true; + if (term.length() > 0) { terms.push_back(term); term.clear(); + } + } else { + term.push_back(c); } - if (terms.size() > 0) { - statements.push_back(terms); + } else { + if (c != ' ' && c != '\t') { + term.push_back(c); + on_skipping = false; } - } - bool begin_txn = false; - std::unique_ptr txn; - std::vector tables; - for (auto statement : statements) { - if (!begin_txn) { - if (statement.size() == 1 && (statement[0] == "BEGIN")) { - txn.reset(client->NewGlobalTransaction()); - if (txn != NULL) { - begin_txn = true; - } else { - std::cerr << "new global txn failed" << std::endl; - return -1; - } - } else { - std::cerr << "intput syntax error @ " << statement[0] << std::endl; - return -1; - } + } + ++pos; + } + if (term.length() > 0) { + terms.push_back(term); + term.clear(); + } + if (terms.size() > 0) { + statements.push_back(terms); + } + } + bool begin_txn = false; + std::unique_ptr txn; + std::vector tables; + for (auto statement : statements) { + if (!begin_txn) { + if (statement.size() == 1 && (statement[0] == "BEGIN")) { + txn.reset(client->NewGlobalTransaction()); + if (txn != NULL) { + begin_txn = true; } else { - std::string family; - std::string qualifier; - switch (statement.size()) { - case 1: { - // COMMIT - if (statement[0] != "COMMIT") { - std::cerr << "input syntax error @ " << statement[0] << std::endl; - return -1; - } - txn->Commit(); - begin_txn = false; - if (txn->GetError().GetType() != ErrorCode::kOK) { - std::cerr << "txn commit failed" - << txn->GetError().ToString() << std::endl; - return -1; - } else { - std::cout << "ok" << std::endl; - } - break; - } - case 5: { - // PUT tablename rowkey cf:qu value - if (statement[0] != "PUT") { - std::cerr << "input syntax error @ " << statement[0] << std::endl; - return -1; - } - TablePtr mu_table(client->OpenTable(statement[1], err)); - if (!mu_table) { - LOG(ERROR) << "fail to open table"; - return -1; - } - tables.push_back(mu_table); - std::unique_ptr mu(mu_table->NewRowMutation(statement[2])); - ParseCfQualifier(statement[3], &family, &qualifier); - mu->Put(family, qualifier, statement[4]); - txn->ApplyMutation(mu.get()); - break; - } - case 4: { - // DEL tablename rowkey cf:qu - if (statement[0] == "DEL") { - TablePtr del_table(client->OpenTable(statement[1], err)); - if (!del_table) { - LOG(ERROR) << "fail to open table"; - return -1; - } - tables.push_back(del_table); - std::unique_ptr mu(del_table->NewRowMutation(statement[2])); - ParseCfQualifier(statement[3], &family, &qualifier); - mu->DeleteColumn(family, qualifier); - txn->ApplyMutation(mu.get()); - } else if (statement[0] == "GET") { - // GET tablename rowkey cf:qu - TablePtr r_table(client->OpenTable(statement[1], err)); - if (!r_table) { - LOG(ERROR) << "fail to open table"; - return -1; - } - tables.push_back(r_table); - std::unique_ptr reader(r_table->NewRowReader(statement[2])); - ParseCfQualifier(statement[3], &family, &qualifier); - reader->AddColumn(family, qualifier); - txn->Get(reader.get()); - if (reader->GetError().GetType() != ErrorCode::kOK) { - std::cerr << "txn get failed" - << reader->GetError().ToString() << std::endl; - return -1; - } - while (!reader->Done()) { - std::cout << reader->Value() << std::endl; - reader->Next(); - } - } else { - std::cerr << "input syntax error @ " << statement[0] << std::endl; - return -1; - } - break; - } - default: - std::cerr << "input syntax error : statement size should 1,4,5 " << std::endl; - return -1; + std::cerr << "new global txn failed" << std::endl; + return -1; + } + } else { + std::cerr << "intput syntax error @ " << statement[0] << std::endl; + return -1; + } + } else { + std::string family; + std::string qualifier; + switch (statement.size()) { + case 1: { + // COMMIT + if (statement[0] != "COMMIT") { + std::cerr << "input syntax error @ " << statement[0] << std::endl; + return -1; + } + txn->Commit(); + begin_txn = false; + if (txn->GetError().GetType() != ErrorCode::kOK) { + std::cerr << "txn commit failed" << txn->GetError().ToString() << std::endl; + return -1; + } else { + std::cout << "ok" << std::endl; + } + break; + } + case 5: { + // PUT tablename rowkey cf:qu value + if (statement[0] != "PUT") { + std::cerr << "input syntax error @ " << statement[0] << std::endl; + return -1; + } + TablePtr mu_table(client->OpenTable(statement[1], err)); + if (!mu_table) { + LOG(ERROR) << "fail to open table"; + return -1; + } + tables.push_back(mu_table); + std::unique_ptr mu(mu_table->NewRowMutation(statement[2])); + ParseCfQualifier(statement[3], &family, &qualifier); + mu->Put(family, qualifier, statement[4]); + txn->ApplyMutation(mu.get()); + break; + } + case 4: { + // DEL tablename rowkey cf:qu + if (statement[0] == "DEL") { + TablePtr del_table(client->OpenTable(statement[1], err)); + if (!del_table) { + LOG(ERROR) << "fail to open table"; + return -1; + } + tables.push_back(del_table); + std::unique_ptr mu(del_table->NewRowMutation(statement[2])); + ParseCfQualifier(statement[3], &family, &qualifier); + mu->DeleteColumn(family, qualifier); + txn->ApplyMutation(mu.get()); + } else if (statement[0] == "GET") { + // GET tablename rowkey cf:qu + TablePtr r_table(client->OpenTable(statement[1], err)); + if (!r_table) { + LOG(ERROR) << "fail to open table"; + return -1; + } + tables.push_back(r_table); + std::unique_ptr reader(r_table->NewRowReader(statement[2])); + ParseCfQualifier(statement[3], &family, &qualifier); + reader->AddColumn(family, qualifier); + txn->Get(reader.get()); + if (reader->GetError().GetType() != ErrorCode::kOK) { + std::cerr << "txn get failed" << reader->GetError().ToString() << std::endl; + return -1; } + while (!reader->Done()) { + std::cout << reader->Value() << std::endl; + reader->Next(); + } + } else { + std::cerr << "input syntax error @ " << statement[0] << std::endl; + return -1; + } + break; } + default: + std::cerr << "input syntax error : statement size should 1,4,5 " << std::endl; + return -1; + } } - return 0; + } + return 0; } int32_t CasOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 7) { - LOG(ERROR) << "args number error: " << argc << ", need 7"; - PrintCmdHelpInfo(argv[1]); - return -1; - } - - const std::string& tablename = argv[2]; - TablePtr table(client->OpenTable(tablename, err)); - if (!table) { - LOG(ERROR) << "fail to open table"; - return -1; - } - - const std::string& rowkey = argv[3]; - const std::string& old_val = argv[5]; - const std::string& new_val = argv[6]; - std::string columnfamily = ""; - std::string qualifier = ""; - ParseCfQualifier(argv[4], &columnfamily, &qualifier); + if (argc != 7) { + LOG(ERROR) << "args number error: " << argc << ", need 7"; + PrintCmdHelpInfo(argv[1]); + return -1; + } - std::unique_ptr txn(table->StartRowTransaction(rowkey)); - if (!txn) { - LOG(ERROR) << "fail to start row txn"; - return -1; - } + const std::string& tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (!table) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + const std::string& rowkey = argv[3]; + const std::string& old_val = argv[5]; + const std::string& new_val = argv[6]; + std::string columnfamily = ""; + std::string qualifier = ""; + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + + std::unique_ptr txn(table->StartRowTransaction(rowkey)); + if (!txn) { + LOG(ERROR) << "fail to start row txn"; + return -1; + } - std::unique_ptr reader(table->NewRowReader(rowkey)); - reader->AddColumn(columnfamily, qualifier); - txn->Get(reader.get()); - if (reader->Done()) { - std::cout << "cas failed: NotFound" << std::endl; - return -1; - } - std::string cur_val = reader->Value(); - if (cur_val != old_val) { - std::cout << "cas failed: NotEqual" << std::endl; - return -1; - } + std::unique_ptr reader(table->NewRowReader(rowkey)); + reader->AddColumn(columnfamily, qualifier); + txn->Get(reader.get()); + if (reader->Done()) { + std::cout << "cas failed: NotFound" << std::endl; + return -1; + } + std::string cur_val = reader->Value(); + if (cur_val != old_val) { + std::cout << "cas failed: NotEqual" << std::endl; + return -1; + } - std::unique_ptr mutation(table->NewRowMutation(rowkey)); - mutation->Put(columnfamily, qualifier, new_val); - txn->ApplyMutation(mutation.get()); - if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { - std::cout << "cas failed: " << tera::strerr(mutation->GetError()) << std::endl; - return -1; - } + std::unique_ptr mutation(table->NewRowMutation(rowkey)); + mutation->Put(columnfamily, qualifier, new_val); + txn->ApplyMutation(mutation.get()); + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << tera::strerr(mutation->GetError()) << std::endl; + return -1; + } - auto error_code = txn->Commit(); - if (error_code.GetType() != tera::ErrorCode::kOK) { - std::cout << "cas failed: " << tera::strerr(error_code) << std::endl; - return -1; - } else { - std::cout << "cas success" << std::endl; - } + auto error_code = txn->Commit(); + if (error_code.GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << tera::strerr(error_code) << std::endl; + return -1; + } else { + std::cout << "cas success" << std::endl; + } - return 0; + return 0; } int32_t HelpOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { - if (argc == 2) { - PrintAllCmd(); - } else if (argc == 3) { - PrintCmdHelpInfo(argv[2]); - } else { - PrintCmdHelpInfo("help"); - } - return 0; + if (argc == 2) { + PrintAllCmd(); + } else if (argc == 3) { + PrintCmdHelpInfo(argv[2]); + } else { + PrintCmdHelpInfo("help"); + } + return 0; } int32_t HelpOp(int32_t argc, char** argv) { - std::vector argv_svec(argv, argv + argc); - return HelpOp(NULL, argc, &argv_svec[0], NULL); + std::vector argv_svec(argv, argv + argc); + return HelpOp(NULL, argc, &argv_svec[0], NULL); } bool ParseCommand(int argc, char** arg_list, std::vector* parsed_arg_list) { - for (int i = 0; i < argc; i++) { - std::string parsed_arg = arg_list[i]; - if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { - std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; - return false; - } - parsed_arg_list->push_back(parsed_arg); - } - return true; + for (int i = 0; i < argc; i++) { + std::string parsed_arg = arg_list[i]; + if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { + std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; + return false; + } + parsed_arg_list->push_back(parsed_arg); + } + return true; } - int32_t InitDfsClient() { - if (g_dfs != NULL) { - return 0; - } - if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { - if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { - LOG(INFO) << "init nfs system: use configure file" << FLAGS_tera_leveldb_env_nfs_conf_path; - leveldb::Nfs::Init(FLAGS_tera_leveldb_env_nfs_mountpoint, FLAGS_tera_leveldb_env_nfs_conf_path); - g_dfs = leveldb::Nfs::GetInstance(); - } - else { - LOG(FATAL) << "init nfs system: no configure file found"; - return -1; - } - } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { - LOG(INFO) << "hdfs2 system support currently, please use hadoop-client"; - g_dfs = new leveldb::Hdfs2(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); - } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { - g_dfs = new leveldb::Hdfs(); - } - else { - LOG(INFO) << "init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; - g_dfs = leveldb::Dfs::NewDfs(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); - } + if (g_dfs != NULL) { return 0; + } + if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { + if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { + LOG(INFO) << "init nfs system: use configure file" << FLAGS_tera_leveldb_env_nfs_conf_path; + leveldb::Nfs::Init(FLAGS_tera_leveldb_env_nfs_mountpoint, + FLAGS_tera_leveldb_env_nfs_conf_path); + g_dfs = leveldb::Nfs::GetInstance(); + } else { + LOG(FATAL) << "init nfs system: no configure file found"; + return -1; + } + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { + LOG(INFO) << "hdfs2 system support currently, please use hadoop-client"; + g_dfs = new leveldb::Hdfs2(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { + g_dfs = new leveldb::Hdfs(); + } else { + LOG(INFO) << "init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; + g_dfs = leveldb::Dfs::NewDfs(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + } + return 0; } int32_t FileSystemOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if (argc < 4) { - PrintCmdHelpInfo(argv[1]); - return -1; - } - if (0 != InitDfsClient()) { - LOG(FATAL) << "InitDfsClient failed"; - return -1; - } - std::string operation = argv[2]; - if (GetFSCommandTable().find(operation) == GetFSCommandTable().end()) { - std::cerr << "unsupported dfs command: " << operation << std::endl; - return -1; - } - int ret = (GetFSCommandTable().find(operation)->second)(argc, argv, err); - return ret; + if (argc < 4) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (0 != InitDfsClient()) { + LOG(FATAL) << "InitDfsClient failed"; + return -1; + } + std::string operation = argv[2]; + if (GetFSCommandTable().find(operation) == GetFSCommandTable().end()) { + std::cerr << "unsupported dfs command: " << operation << std::endl; + return -1; + } + int ret = (GetFSCommandTable().find(operation)->second)(argc, argv, err); + return ret; } int DfsPrintAttr(const char* pathname, struct stat* st, void* arg = NULL) { - char mode_str[10]; - memset(mode_str, '-', sizeof(mode_str)); - char time_str[64]; - strftime(time_str, sizeof(time_str), "%b %d %H:%M %Y", localtime(&st->st_mtime)); - printf("%c%c%c%c%c%c%c%c%c%c %16lx %16ld %s %s", - (S_IFDIR & st->st_mode) ? 'd' : '-', - (S_IRUSR & st->st_mode) ? 'r' : '-', - (S_IWUSR & st->st_mode) ? 'w' : '-', - (S_IXUSR & st->st_mode) ? 'x' : '-', - (S_IRGRP & st->st_mode) ? 'r' : '-', - (S_IWGRP & st->st_mode) ? 'w' : '-', - (S_IXGRP & st->st_mode) ? 'x' : '-', - (S_IROTH & st->st_mode) ? 'r' : '-', - (S_IWOTH & st->st_mode) ? 'w' : '-', - (S_IXOTH & st->st_mode) ? 'x' : '-', - st->st_ino, - st->st_size, time_str, pathname); - if (S_IFDIR & st->st_mode) { - printf("/"); - } - printf("\n"); - return 0; + char mode_str[10]; + memset(mode_str, '-', sizeof(mode_str)); + char time_str[64]; + strftime(time_str, sizeof(time_str), "%b %d %H:%M %Y", localtime(&st->st_mtime)); + printf("%c%c%c%c%c%c%c%c%c%c %16lx %16ld %s %s", (S_IFDIR & st->st_mode) ? 'd' : '-', + (S_IRUSR & st->st_mode) ? 'r' : '-', (S_IWUSR & st->st_mode) ? 'w' : '-', + (S_IXUSR & st->st_mode) ? 'x' : '-', (S_IRGRP & st->st_mode) ? 'r' : '-', + (S_IWGRP & st->st_mode) ? 'w' : '-', (S_IXGRP & st->st_mode) ? 'x' : '-', + (S_IROTH & st->st_mode) ? 'r' : '-', (S_IWOTH & st->st_mode) ? 'w' : '-', + (S_IXOTH & st->st_mode) ? 'x' : '-', st->st_ino, st->st_size, time_str, pathname); + if (S_IFDIR & st->st_mode) { + printf("/"); + } + printf("\n"); + return 0; } -static std::string FormatPath(const std::string pathname) { - std::string result; - bool need_strip = false; - for (std::string::size_type i = 0; i < pathname.length(); ++i) { - if (pathname.at(i) == '/') { - if (need_strip) { - continue; - } - else { - result.push_back(pathname.at(i)); - need_strip = true; - } - } else { - need_strip = false; - result.push_back(pathname.at(i)); - } - } - if (result.at(result.length() - 1) == '/') { - result.pop_back(); - } - return result; +static std::string FormatPath(const std::string& pathname) { + std::string result; + bool need_strip = false; + for (std::string::size_type i = 0; i < pathname.length(); ++i) { + if (pathname.at(i) == '/') { + if (need_strip) { + continue; + } else { + result.push_back(pathname.at(i)); + need_strip = true; + } + } else { + need_strip = false; + result.push_back(pathname.at(i)); + } + } + if (result.at(result.length() - 1) == '/') { + result.pop_back(); + } + return result; } int32_t DfsPrintPath(const char* pathname, struct stat* st, void* arg = NULL) { - printf("%s", FormatPath(pathname).c_str()); - if (S_IFDIR & st->st_mode) { - printf("/"); - } - printf("\n"); - return 0; + printf("%s", FormatPath(pathname).c_str()); + if (S_IFDIR & st->st_mode) { + printf("/"); + } + printf("\n"); + return 0; } int32_t DfsSizeSum(const char* pathname, struct stat* st, void* arg) { - uint64_t* sum = reinterpret_cast(arg); - if (!(S_IFDIR & st->st_mode)) { - *sum += st->st_size; - } - return 0; + uint64_t* sum = reinterpret_cast(arg); + if (!(S_IFDIR & st->st_mode)) { + *sum += st->st_size; + } + return 0; } -int32_t DfsTryLockParentPath(const std::string path) { - std::string parent_path = path; - if (parent_path.at(parent_path.length() - 1) == '/') { - parent_path.pop_back(); - } - std::string::size_type pos = parent_path.rfind("/"); - if (pos == std::string::npos) { - fprintf(stderr, "invalid path: %s\n", path.c_str()); - return -1; - } - if (pos == 0) { - parent_path = "/"; - } - parent_path = parent_path.substr(0, pos); - return g_dfs->LockDirectory(parent_path); +int32_t DfsTryLockParentPath(const std::string& path) { + std::string parent_path = path; + if (parent_path.at(parent_path.length() - 1) == '/') { + parent_path.pop_back(); + } + std::string::size_type pos = parent_path.rfind("/"); + if (pos == std::string::npos) { + fprintf(stderr, "invalid path: %s\n", path.c_str()); + return -1; + } + if (pos == 0) { + parent_path = "/"; + } + parent_path = parent_path.substr(0, pos); + return g_dfs->LockDirectory(parent_path); } int32_t DfsRmPath(const char* pathname, struct stat* st, void*) { - int ret = 0; - if (S_IFDIR & st->st_mode) { - ret = g_dfs->DeleteDirectory(pathname); - if (0 != ret) { - perror("RmDir fail"); - return ret; - } - } else { - ret = g_dfs->Delete(pathname); - if (0 != ret) { - perror("unlink fail"); - } + int ret = 0; + if (S_IFDIR & st->st_mode) { + ret = g_dfs->DeleteDirectory(pathname); + if (0 != ret) { + perror("RmDir fail"); + return ret; + } + } else { + ret = g_dfs->Delete(pathname); + if (0 != ret) { + perror("unlink fail"); } - return ret; + } + return ret; } -typedef int(*WalkFunc)(const char*, struct stat*, void* arg); +typedef int (*WalkFunc)(const char*, struct stat*, void* arg); int32_t DfsDirWalk(const char* dir_name, WalkFunc func, bool is_recursive, void* arg = NULL) { - struct stat st; + struct stat st; + memset(&st, 0, sizeof(struct stat)); + char fullpath[4096] = {0}; + // not a directory, end of recursive call + if (0 == g_dfs->Stat(dir_name, &st) && !(S_IFDIR & st.st_mode)) { + return 0; + } + std::vector sub_paths; + if (0 != g_dfs->ListDirectory(dir_name, &sub_paths)) { + return -1; + } + if (func == DfsRmPath && FLAGS_asowner) { + if (0 != g_dfs->LockDirectory(dir_name)) { + fprintf(stderr, "Lock Directory %s failed", dir_name); + return -1; + } + } + for (std::size_t i = 0; i < sub_paths.size(); ++i) { + snprintf(fullpath, sizeof(fullpath), "%s/%s", dir_name, sub_paths[i].c_str()); memset(&st, 0, sizeof(struct stat)); - char fullpath[4096] = {0}; - // not a directory, end of recursive call - if (0 == g_dfs->Stat(dir_name, &st) && !(S_IFDIR & st.st_mode)) { - return 0; - } - std::vector sub_paths; - if (0 != g_dfs->ListDirectory(dir_name, &sub_paths)) { - return -1; + if (g_dfs->Stat(fullpath, &st) < 0) { + perror("Stat failed"); + continue; } - if (func == DfsRmPath && FLAGS_asowner) { - if (0 != g_dfs->LockDirectory(dir_name)) { - fprintf(stderr, "Lock Directory %s failed", dir_name); - return -1; - } - } - for (std::size_t i = 0; i < sub_paths.size(); ++i) { - snprintf(fullpath, sizeof(fullpath), "%s/%s", dir_name, sub_paths[i].c_str()); - memset(&st, 0, sizeof(struct stat)); - if (g_dfs->Stat(fullpath, &st) < 0) { - perror("Stat failed"); - continue; - } - if (is_recursive && (S_IFDIR & st.st_mode)) { - DfsDirWalk(fullpath, func, true, arg); - } - func(fullpath, &st, arg); + if (is_recursive && (S_IFDIR & st.st_mode)) { + DfsDirWalk(fullpath, func, true, arg); } - return 0; + func(fullpath, &st, arg); + } + return 0; } - int32_t DfsGetOp(int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5) { - fprintf(stderr, "Invalid arguments"); - return -1; - } - int ret = 0; - const std::string& src_path = argv[3]; - const std::string& local_path = argv[4]; - std::string local_file_path = local_path; - int local_fd = 0; - if (local_path != "-") { - struct stat st; - if (stat(local_path.c_str(), &st) == 0 && (S_IFDIR & st.st_mode)) { - char* tmp_src_path = strdup(src_path.c_str()); - char* filename = basename(tmp_src_path); - local_file_path.append("/").append(filename); - free(tmp_src_path); - } - local_fd = open(local_file_path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0644); - if (local_fd < 0) { - fprintf(stderr, "local file open fail, path=%s, errno=%d", local_file_path.c_str(), errno); - return errno; - } - } - leveldb::DfsFile* file = g_dfs->OpenFile(src_path, leveldb::RDONLY); - if (NULL == file) { - fprintf(stderr, "open dfs file fail, path=%s, errno=%d", src_path.c_str(), errno); - return errno; - } - char buf[128 * 1024]; - ssize_t ret_size = 0; - while ((ret_size = file->Read(buf, sizeof(buf))) > 0) { - ssize_t writelen = write(local_fd, buf, ret_size); - if (writelen < 0) { - fprintf(stderr, "write local file fail, path=%s, errno=%d", local_file_path.c_str(), errno); - ret = errno; - break; - } - } - if (local_fd > 0) { - close(local_fd); - } - file->CloseFile(); - - return ret; + if (argc != 5) { + fprintf(stderr, "Invalid arguments"); + return -1; + } + int ret = 0; + const std::string& src_path = argv[3]; + const std::string& local_path = argv[4]; + std::string local_file_path = local_path; + int local_fd = 0; + if (local_path != "-") { + struct stat st; + if (stat(local_path.c_str(), &st) == 0 && (S_IFDIR & st.st_mode)) { + char* tmp_src_path = strdup(src_path.c_str()); + char* filename = basename(tmp_src_path); + local_file_path.append("/").append(filename); + free(tmp_src_path); + } + local_fd = open(local_file_path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (local_fd < 0) { + fprintf(stderr, "local file open fail, path=%s, errno=%d", local_file_path.c_str(), errno); + return errno; + } + } + leveldb::DfsFile* file = g_dfs->OpenFile(src_path, leveldb::RDONLY); + if (NULL == file) { + fprintf(stderr, "open dfs file fail, path=%s, errno=%d", src_path.c_str(), errno); + return errno; + } + char buf[128 * 1024]; + ssize_t ret_size = 0; + while ((ret_size = file->Read(buf, sizeof(buf))) > 0) { + ssize_t writelen = write(local_fd, buf, ret_size); + if (writelen < 0) { + fprintf(stderr, "write local file fail, path=%s, errno=%d", local_file_path.c_str(), errno); + ret = errno; + break; + } + } + if (local_fd > 0) { + close(local_fd); + } + file->CloseFile(); + + return ret; } int32_t DfsPutOp(int32_t argc, std::string* argv, ErrorCode* err) { - fprintf(stderr, "not implemented"); - return -1; + fprintf(stderr, "not implemented"); + return -1; } - int32_t DfsLsOp(int32_t argc, std::string* argv, ErrorCode* err) { - const std::string& filename = argv[3]; - struct stat fstat; - int ret = 0; - if (0 == g_dfs->Stat(filename.c_str(), &fstat)) { - if (S_IFDIR & fstat.st_mode) { - if (FLAGS_attribute) { - DfsPrintAttr(filename.c_str(), &fstat); - ret = DfsDirWalk(filename.c_str(), DfsPrintAttr, FLAGS_recursive); - } else { - DfsPrintPath(filename.c_str(), &fstat); - ret = DfsDirWalk(filename.c_str(), DfsPrintPath, FLAGS_recursive); - } - } - else { - if (FLAGS_attribute) { - DfsPrintAttr(filename.c_str(), &fstat); - } - else { - DfsPrintPath(filename.c_str(), &fstat); - } - } - } - return ret; + const std::string& filename = argv[3]; + struct stat fstat; + int ret = 0; + if (0 == g_dfs->Stat(filename.c_str(), &fstat)) { + if (S_IFDIR & fstat.st_mode) { + if (FLAGS_attribute) { + DfsPrintAttr(filename.c_str(), &fstat); + ret = DfsDirWalk(filename.c_str(), DfsPrintAttr, FLAGS_recursive); + } else { + DfsPrintPath(filename.c_str(), &fstat); + ret = DfsDirWalk(filename.c_str(), DfsPrintPath, FLAGS_recursive); + } + } else { + if (FLAGS_attribute) { + DfsPrintAttr(filename.c_str(), &fstat); + } else { + DfsPrintPath(filename.c_str(), &fstat); + } + } + } + return ret; } int32_t DfsLsrOp(int32_t argc, std::string* argv, ErrorCode* err) { - - bool old_recursive_flag = FLAGS_recursive; - FLAGS_recursive = true; - DfsLsOp(argc, argv, err); - FLAGS_recursive = old_recursive_flag; - return errno; + bool old_recursive_flag = FLAGS_recursive; + FLAGS_recursive = true; + DfsLsOp(argc, argv, err); + FLAGS_recursive = old_recursive_flag; + return errno; } int32_t DfsDusOp(int32_t argc, std::string* argv, ErrorCode* err) { - struct stat st; - const std::string& path = argv[3]; - uint64_t size = 0; - if (g_dfs->Stat(path, &st) != 0) { - perror("Stat failed"); - return errno; - } - if (S_IFDIR & st.st_mode) { - DfsDirWalk(path.c_str(), DfsSizeSum, true, &size); - } else { - DfsSizeSum(path.c_str(), &st, &size); - } - fprintf(stdout, "%s:\t%lu\n", path.c_str(), size); - return 0; + struct stat st; + const std::string& path = argv[3]; + uint64_t size = 0; + if (g_dfs->Stat(path, &st) != 0) { + perror("Stat failed"); + return errno; + } + if (S_IFDIR & st.st_mode) { + DfsDirWalk(path.c_str(), DfsSizeSum, true, &size); + } else { + DfsSizeSum(path.c_str(), &st, &size); + } + fprintf(stdout, "%s:\t%lu\n", path.c_str(), size); + return 0; } int32_t DfsTouchzOp(int32_t argc, std::string* argv, ErrorCode* err) { - const std::string& path = argv[3]; - struct stat st; - std::string::size_type pos = path.rfind("/"); - if (pos == std::string::npos || pos == path.length() - 1) { - fprintf(stderr, "invalid filepath: %s", path.c_str()); - return -1; - } + const std::string& path = argv[3]; + struct stat st; + std::string::size_type pos = path.rfind("/"); + if (pos == std::string::npos || pos == path.length() - 1) { + fprintf(stderr, "invalid filepath: %s", path.c_str()); + return -1; + } - int ret = g_dfs->Stat(path, &st); + int ret = g_dfs->Stat(path, &st); + if (0 != ret) { + if (errno != ENOENT) { + perror("Stat failed"); + return errno; + } + std::string parent_path = path.substr(0, pos); + ret = g_dfs->CreateDirectory(parent_path); if (0 != ret) { - if (errno != ENOENT) { - perror("Stat failed"); - return errno; - } - std::string parent_path = path.substr(0, pos); - ret = g_dfs->CreateDirectory(parent_path); - if (0 != ret) { - perror("create parent path failed"); - return errno; - } - if (FLAGS_asowner) { - DfsTryLockParentPath(path); - } - leveldb::DfsFile* file = g_dfs->OpenFile(path, leveldb::WRONLY); - if (NULL == file) { - perror("create or open file fail"); - return errno; - } + perror("create parent path failed"); + return errno; + } + if (FLAGS_asowner) { + DfsTryLockParentPath(path); + } + leveldb::DfsFile* file = g_dfs->OpenFile(path, leveldb::WRONLY); + if (NULL == file) { + perror("create or open file fail"); + return errno; + } + } else { + if (S_IFDIR & st.st_mode) { + fprintf(stderr, "Touchz fail: %s not Regular file", path.c_str()); + ret = EISDIR; } else { - if (S_IFDIR & st.st_mode) { - fprintf(stderr, "Touchz fail: %s not Regular file", path.c_str()); - ret = EISDIR; - } else { - fprintf(stdout, "%s already exists", path.c_str()); - ret = EEXIST; - } + fprintf(stdout, "%s already exists", path.c_str()); + ret = EEXIST; } - return ret; + } + return ret; } int32_t DfsMkdirOp(int32_t argc, std::string* argv, ErrorCode* err) { - const std::string& path = argv[3]; - if (FLAGS_asowner) { - if (0 != DfsTryLockParentPath(path)) { - fprintf(stderr, "Try lock parent path failed"); - return -1; - } - } - int ret = g_dfs->CreateDirectory(path); - if (0 != ret) { - fprintf(stderr, "Create Path: %s failed, errno=%d\n", path.c_str(), errno); - ret = errno; - } - return ret; + const std::string& path = argv[3]; + if (FLAGS_asowner) { + if (0 != DfsTryLockParentPath(path)) { + fprintf(stderr, "Try lock parent path failed"); + return -1; + } + } + int ret = g_dfs->CreateDirectory(path); + if (0 != ret) { + fprintf(stderr, "Create Path: %s failed, errno=%d\n", path.c_str(), errno); + ret = errno; + } + return ret; } int32_t DfsRmOp(int32_t argc, std::string* argv, ErrorCode* err) { - const std::string& path = argv[3]; - struct stat st; - if (0 != g_dfs->Stat(path.c_str(), &st)) { - perror("Stat fail: "); - return -1; - } - int ret = 0; - if (FLAGS_asowner) { - DfsTryLockParentPath(path); - } - if (st.st_mode & S_IFDIR) { - if (FLAGS_recursive) { - DfsDirWalk(path.c_str(), DfsRmPath, true, NULL); - ret = g_dfs->DeleteDirectory(path); - } else { - ret = g_dfs->DeleteDirectory(path); - } + const std::string& path = argv[3]; + struct stat st; + if (0 != g_dfs->Stat(path.c_str(), &st)) { + perror("Stat fail: "); + return -1; + } + int ret = 0; + if (FLAGS_asowner) { + DfsTryLockParentPath(path); + } + if (st.st_mode & S_IFDIR) { + if (FLAGS_recursive) { + DfsDirWalk(path.c_str(), DfsRmPath, true, NULL); + ret = g_dfs->DeleteDirectory(path); } else { - ret = g_dfs->Delete(path); - } - if (0 != ret) { - perror("delete failed: "); + ret = g_dfs->DeleteDirectory(path); } + } else { + ret = g_dfs->Delete(path); + } + if (0 != ret) { + perror("delete failed: "); + } - return errno; + return errno; } int32_t DfsTestOp(int32_t argc, std::string* argv, ErrorCode* err) { - fprintf(stderr, "not implemented\n"); - return -1; + fprintf(stderr, "not implemented\n"); + return -1; } int32_t DfsStatOp(int32_t argc, std::string* argv, ErrorCode* err) { - struct stat st; - const std::string& filename = argv[3]; - if (0 != g_dfs->Stat(filename, &st)) { - return errno; - } - const char* file_type; - if (S_IFREG & st.st_mode) { - file_type = "Regular"; - } else if (S_IFDIR & st.st_mode) { - file_type = "Directory"; - } else { - file_type = "Symlink"; - } - fprintf(stdout, "File:\t%s\n", filename.c_str()); - fprintf(stdout, "Inode:\t0x%lx\n", st.st_ino); - fprintf(stdout, "Type:\t%s\n", file_type); - fprintf(stdout, "Size:\t%lu\n", st.st_size); - fprintf(stdout, "Mode:\t%o\n", st.st_mode & 0777); - fprintf(stdout, "Link:\t%lu\n", st.st_nlink); - fprintf(stdout, "Atime:\t%lu\t%s", st.st_atime, ctime(&st.st_atime)); - fprintf(stdout, "Mtime:\t%lu\t%s", st.st_mtime, ctime(&st.st_mtime)); - fprintf(stdout, "Ctime:\t%lu\t%s", st.st_ctime, ctime(&st.st_ctime)); - - return 0; + struct stat st; + const std::string& filename = argv[3]; + if (0 != g_dfs->Stat(filename, &st)) { + return errno; + } + const char* file_type; + if (S_IFREG & st.st_mode) { + file_type = "Regular"; + } else if (S_IFDIR & st.st_mode) { + file_type = "Directory"; + } else { + file_type = "Symlink"; + } + fprintf(stdout, "File:\t%s\n", filename.c_str()); + fprintf(stdout, "Inode:\t0x%lx\n", st.st_ino); + fprintf(stdout, "Type:\t%s\n", file_type); + fprintf(stdout, "Size:\t%lu\n", st.st_size); + fprintf(stdout, "Mode:\t%o\n", st.st_mode & 0777); + fprintf(stdout, "Link:\t%lu\n", st.st_nlink); + fprintf(stdout, "Atime:\t%lu\t%s", st.st_atime, ctime(&st.st_atime)); + fprintf(stdout, "Mtime:\t%lu\t%s", st.st_mtime, ctime(&st.st_mtime)); + fprintf(stdout, "Ctime:\t%lu\t%s", st.st_ctime, ctime(&st.st_ctime)); + + return 0; } int32_t DfsRenameOp(int32_t argc, std::string* argv, ErrorCode* err) { - if (argc != 5) { - fprintf(stderr, "invalid arguments\n"); - return -1; - } - std::string& src_path = argv[3]; - std::string& dest_path = argv[4]; - if (FLAGS_asowner) { - if (0 != DfsTryLockParentPath(dest_path)) { - fprintf(stderr, "Lock ParentPath failed"); - return -1; - } - } - - int ret = g_dfs->Rename(src_path, dest_path); - if (0 != ret) { - perror("Rename fail"); - ret = errno; - } - return ret; + if (argc != 5) { + fprintf(stderr, "invalid arguments\n"); + return -1; + } + std::string& src_path = argv[3]; + std::string& dest_path = argv[4]; + if (FLAGS_asowner) { + if (0 != DfsTryLockParentPath(dest_path)) { + fprintf(stderr, "Lock ParentPath failed"); + return -1; + } + } + + int ret = g_dfs->Rename(src_path, dest_path); + if (0 != ret) { + perror("Rename fail"); + ret = errno; + } + return ret; } int32_t DfsUnlockDirOp(int32_t argc, std::string* argv, ErrorCode* err) { - const std::string& path = argv[3]; - return g_dfs->ClearDirOwner(path); + const std::string& path = argv[3]; + return g_dfs->ClearDirOwner(path); } int32_t DfsChecksumOp(int32_t argc, std::string* argv, ErrorCode* err) { - fprintf(stderr, "Not Implemented"); - return -1; + fprintf(stderr, "Not Implemented"); + return -1; } int32_t DfsLChecksumOp(int32_t argc, std::string* argv, ErrorCode* err) { - fprintf(stderr, "Not Implemented"); - return -1; + fprintf(stderr, "Not Implemented"); + return -1; } int32_t DfsForceReleaseOp(int32_t argc, std::string* argv, ErrorCode* err) { - fprintf(stderr, "Not Implemented"); + fprintf(stderr, "Not Implemented"); + return -1; +} + +int32_t HashOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); return -1; + } + auto hash_method = [](const string& key) { return MurmurHash(key) + key; }; + std::cout << hash_method(argv[2]) << std::endl; + return 0; } -static void InitializeFileSystemCommandTable() { - FSCommandTable& fs_command_table = GetFSCommandTable(); - fs_command_table["get"] = DfsGetOp; - fs_command_table["put"] = DfsPutOp; - fs_command_table["lsr"] = DfsLsrOp; - fs_command_table["ls"] = DfsLsOp; - fs_command_table["dus"] = DfsDusOp; - fs_command_table["touchz"] = DfsTouchzOp; - fs_command_table["mkdir"] = DfsMkdirOp; - fs_command_table["rm"] = DfsRmOp; - fs_command_table["test"] = DfsTestOp; - fs_command_table["stat"] = DfsStatOp; - fs_command_table["rename"] = DfsRenameOp; - fs_command_table["unlockdir"] = DfsUnlockDirOp; - fs_command_table["checksum"] = DfsChecksumOp; - fs_command_table["lchecksum"] = DfsLChecksumOp; - fs_command_table["forcerelease"] = DfsForceReleaseOp; - return; +bool SplitKV(const std::string& str, std::string* key, std::string* value, char separator) { + std::string::size_type pos = str.find(separator); + if (std::string::npos == pos || str.length() < pos + 2) { + return false; + } + *key = str.substr(0, pos); + *value = str.substr(pos + 1, str.length() - pos - 1); + return true; } -static void InitializeCommandTable(){ - CommandTable& command_table = GetCommandTable(); - command_table["create"] = CreateOp; - command_table["createbyfile"] = CreateByFileOp; - command_table["update"] = UpdateOp; - command_table["update-check"] = UpdateCheckOp; - command_table["drop"] = DropOp; - command_table["enable"] = EnableOp; - command_table["disable"] = DisableOp; - command_table["show"] = ShowOp; - command_table["showx"] = ShowOp; - command_table["showall"] = ShowOp; - command_table["showschema"] = ShowSchemaOp; - command_table["showschemax"] = ShowSchemaOp; - command_table["showts"] = ShowTabletNodesOp; - command_table["showtsx"] = ShowTabletNodesOp; - command_table["put"] = PutOp; - command_table["putint64"] = PutInt64Op; - command_table["put-ttl"] = PutTTLOp; - command_table["put_counter"] = PutCounterOp; - command_table["add"] = AddOp; - command_table["addint64"] = AddInt64Op; - command_table["putif"] = PutIfAbsentOp; - command_table["append"] = AppendOp; - command_table["get"] = GetOp; - command_table["getint64"] = GetInt64Op; - command_table["get_counter"] = GetCounterOp; - command_table["delete"] = DeleteOp; - command_table["delete1v"] = DeleteOp; - command_table["batchput"] = BatchPutOp; - command_table["batchputint64"] = BatchPutInt64Op; - command_table["batchget"] = BatchGetOp; - command_table["batchgetint64"] = BatchGetInt64Op; - command_table["scan"] = ScanOp; - command_table["scanallv"] = ScanOp; - command_table["safemode"] = SafeModeOp; - command_table["tablet"] = TabletOp; - command_table["meta"] = MetaOp; - command_table["compact"] = CompactOp; - command_table["compactx"] = CompactOp; - command_table["findmaster"] = FindMasterOp; - command_table["findts"] = FindTsOp; - command_table["findtablet"] = FindTabletOp; - command_table["stat"] = StatOp; - command_table["meta2"] = Meta2Op; - command_table["user"] = UserOp; - command_table["reload"] = ReloadConfigOp; - command_table["kick"] = KickTabletServerOp; - command_table["cookie"] = CookieOp; - command_table["range"] = RangeOp; - command_table["rangex"] = RangeOp; - command_table["txn"] = TxnOp; - command_table["gtxn"] = GTxnOp; - command_table["help"] = HelpOp; - command_table["cas"] = CasOp; - command_table["dfs"] = FileSystemOp; - InitializeFileSystemCommandTable(); +bool ParseQuotaInfo(int32_t argc, std::string* argv, TableQuota* table_quota) { + // parse table/user name + std::string key; + std::string value; + std::string table_name; + int offset = 3; + table_quota->set_table_name(argv[offset++]); + table_quota->set_type(TableQuota::kSetQuota); + + // parse quota limit + std::map type_list; + type_list["WRITEREQS"] = kQuotaWriteReqs; + type_list["WRITEBYTES"] = kQuotaWriteBytes; + type_list["READREQS"] = kQuotaReadReqs; + type_list["READBYTES"] = kQuotaReadBytes; + type_list["SCANREQS"] = kQuotaScanReqs; + type_list["SCANBYTES"] = kQuotaScanBytes; + std::string type; + std::string number; + std::string limit; + for (int i = offset; i < argc; ++i) { + std::string period = "1"; + if (!SplitKV(argv[i], &type, &number, '=') || type_list.find(type) == type_list.end()) { + LOG(ERROR) << "error quota limit: " << argv[i]; + return false; + } + SplitKV(number, &limit, &period, '/'); + QuotaInfo* quota_infos = table_quota->add_quota_infos(); + quota_infos->set_type(type_list[type]); + quota_infos->set_limit(std::stol(limit)); + quota_infos->set_period(std::stoi(period)); + } + return true; } -int ExecuteCommand(Client* client, int argc, char** arg_list) { - int ret = 0; - ErrorCode error_code; +template +void BuildQuotaInfoToRow(const Quota& quota, std::vector* row) { + std::map limitstr_list; + limitstr_list[kQuotaWriteReqs] = "-"; + limitstr_list[kQuotaWriteBytes] = "-"; + limitstr_list[kQuotaReadReqs] = "-"; + limitstr_list[kQuotaReadBytes] = "-"; + limitstr_list[kQuotaScanReqs] = "-"; + limitstr_list[kQuotaScanBytes] = "-"; + for (int j = 0; j < quota.quota_infos_size(); ++j) { + std::stringstream ss; + ss << quota.quota_infos(j).limit() << "/" << quota.quota_infos(j).period(); + limitstr_list[quota.quota_infos(j).type()] = ss.str(); + } + for (const auto& iter : limitstr_list) { + row->push_back(iter.second); + } +} - std::vector parsed_arg_list; - if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { - return 1; - } - std::string* argv = &parsed_arg_list[0]; - - CommandTable& command_table = GetCommandTable(); - std::string cmd = argv[1]; - if (cmd == "version") { - PrintSystemVersion(); - } else if (command_table.find(cmd) != command_table.end()) { - ret = command_table[cmd](client, argc, argv, &error_code); - } else { - PrintUnknownCmdHelpInfo(argv[1].c_str()); - ret = 1; - } +void AppendTableQuotaInfoToRow(const TableQuota& table_quota, std::vector* row) { + row->push_back(table_quota.table_name()); + BuildQuotaInfoToRow(table_quota, row); +} - if (error_code.GetType() != ErrorCode::kOK) { - LOG(ERROR) << "fail reason: " << error_code.ToString(); - } - return ret; +void PrintTableQuota(const std::vector& table_quota_list) { + TPrinter printer; + std::vector row; + printer.Reset(8, "SEQ", "TABLENAME", "WRITEREQS(w/s)", "WRITEBYTES(B/s)", "READREQS(r/s)", + "READBYTES(B/s)", "SCANREQS(s/s)", "SCANBYTES(B/s)"); + for (uint32_t i = 0; i < table_quota_list.size(); ++i) { + row.clear(); + row.push_back(NumberToString(i)); + AppendTableQuotaInfoToRow(table_quota_list[i], &row); + if (!printer.AddRow(row)) { + LOG(ERROR) << "add row error"; + } + } + printer.Print(g_printer_opt); + std::cout << std::endl; +} + +void PrintTsQuota(const std::vector& ts_quota_list) { + TPrinter printer; + std::vector row; + printer.Reset(9, "SEQ", "TSADDR", "TABLENAME", "WRITEREQS(w/s)", "WRITEBYTES(B/s)", + "READREQS(r/s)", "READBYTES(B/s)", "SCANREQS(s/s)", "SCANBYTES(B/s)"); + for (uint32_t i = 0; i < ts_quota_list.size(); ++i) { + const TsQuota& ts_quota = ts_quota_list[i]; + int table_quotas_size = ts_quota.table_quotas_size(); + for (int table_quotas_index = 0; table_quotas_index < table_quotas_size; ++table_quotas_index) { + row.clear(); + row.push_back(NumberToString(i)); + row.push_back(ts_quota.ts_addr()); + AppendTableQuotaInfoToRow(ts_quota.table_quotas(table_quotas_index), &row); + if (!printer.AddRow(row)) { + LOG(ERROR) << "add row error"; + } + } + } + printer.Print(g_printer_opt); + std::cout << std::endl; +} + +static int32_t ShowQuotaBrief(Client* client, ErrorCode* err) { + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + std::vector table_quota_list; + if (!client_impl->ShowQuotaBrief(&table_quota_list, err)) { + LOG(ERROR) << "fail to show table_quota," << err->ToString(); + return -1; + } + + // Parse table quota + PrintTableQuota(table_quota_list); + return 0; +} + +static int32_t ShowQuotaDetail(Client* client, ErrorCode* err) { + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + std::vector ts_quota_list; + if (!client_impl->ShowQuotaDetail(&ts_quota_list, err)) { + LOG(ERROR) << "fail to show ts quota," << err->ToString(); + return -1; + } + + // Parse table&tablet quota + PrintTsQuota(ts_quota_list); + return 0; +} + +static int32_t SetQuota(Client* client, const TableQuota& table_quota, ErrorCode* err) { + std::shared_ptr client_impl( + (static_cast(client))->GetClientImpl()); + if (!client_impl->SetQuota(table_quota, err)) { + LOG(ERROR) << "fail to set table_quota," << err->ToString(); + return -1; + } + return 0; +} + +int32_t QuotaOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + TableQuota table_quota; + std::string op = argv[2]; + if (argc == 3) { + if (op == "show") { + return ShowQuotaBrief(client, err); + } else if (op == "showx") { + return ShowQuotaDetail(client, err); + } + } else if (argc > 4 && op == "set" && ParseQuotaInfo(argc, argv, &table_quota)) { + return SetQuota(client, table_quota, err); + } + PrintCmdHelpInfo(argv[1]); + return -1; +} + +static void InitializeFileSystemCommandTable() { + FSCommandTable& fs_command_table = GetFSCommandTable(); + fs_command_table["get"] = DfsGetOp; + fs_command_table["put"] = DfsPutOp; + fs_command_table["lsr"] = DfsLsrOp; + fs_command_table["ls"] = DfsLsOp; + fs_command_table["dus"] = DfsDusOp; + fs_command_table["touchz"] = DfsTouchzOp; + fs_command_table["mkdir"] = DfsMkdirOp; + fs_command_table["rm"] = DfsRmOp; + fs_command_table["test"] = DfsTestOp; + fs_command_table["stat"] = DfsStatOp; + fs_command_table["rename"] = DfsRenameOp; + fs_command_table["unlockdir"] = DfsUnlockDirOp; + fs_command_table["checksum"] = DfsChecksumOp; + fs_command_table["lchecksum"] = DfsLChecksumOp; + fs_command_table["forcerelease"] = DfsForceReleaseOp; + return; +} + +static void InitializeCommandTable() { + CommandTable& command_table = GetCommandTable(); + command_table["create"] = CreateOp; + command_table["createbyfile"] = CreateByFileOp; + command_table["update"] = UpdateOp; + command_table["update-check"] = UpdateCheckOp; + command_table["drop"] = DropOp; + command_table["enable"] = EnableOp; + command_table["disable"] = DisableOp; + command_table["show"] = ShowOp; + command_table["showx"] = ShowOp; + command_table["showall"] = ShowOp; + command_table["showschema"] = ShowSchemaOp; + command_table["showschemax"] = ShowSchemaOp; + command_table["showts"] = ShowTabletNodesOp; + command_table["showtsx"] = ShowTabletNodesOp; + command_table["put"] = PutOp; + command_table["putint64"] = PutInt64Op; + command_table["put-ttl"] = PutTTLOp; + command_table["put_counter"] = PutCounterOp; + command_table["add"] = AddOp; + command_table["addint64"] = AddInt64Op; + command_table["putif"] = PutIfAbsentOp; + command_table["append"] = AppendOp; + command_table["get"] = GetOp; + command_table["getint64"] = GetInt64Op; + command_table["get_counter"] = GetCounterOp; + command_table["delete"] = DeleteOp; + command_table["delete1v"] = DeleteOp; + command_table["batchput"] = BatchPutOp; + command_table["batchputint64"] = BatchPutInt64Op; + command_table["batchget"] = BatchGetOp; + command_table["batchgetint64"] = BatchGetInt64Op; + command_table["scan"] = ScanOp; + command_table["scanallv"] = ScanOp; + command_table["safemode"] = SafeModeOp; + command_table["tablet"] = TabletOp; + command_table["compact"] = CompactOp; + command_table["compactx"] = CompactOp; + command_table["findmaster"] = FindMasterOp; + command_table["findts"] = FindTsOp; + command_table["findtablet"] = FindTabletOp; + command_table["stat"] = StatOp; + command_table["user"] = UserOp; + command_table["reload"] = ReloadConfigOp; + command_table["kick"] = KickTabletServerOp; + command_table["forcekick"] = KickTabletServerOp; + command_table["cookie"] = CookieOp; + command_table["range"] = RangeOp; + command_table["rangex"] = RangeOp; + command_table["txn"] = TxnOp; + command_table["gtxn"] = GTxnOp; + command_table["help"] = HelpOp; + command_table["cas"] = CasOp; + command_table["dfs"] = FileSystemOp; + command_table["hash"] = HashOp; + command_table["quota"] = QuotaOp; + InitializeFileSystemCommandTable(); +} + +int ExecuteCommand(Client* client, int argc, char** arg_list) { + int ret = 0; + ErrorCode error_code; + + std::vector parsed_arg_list; + if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { + return 1; + } + std::string* argv = &parsed_arg_list[0]; + + CommandTable& command_table = GetCommandTable(); + std::string cmd = argv[1]; + if (cmd == "version") { + PrintSystemVersion(); + } else if (command_table.find(cmd) != command_table.end()) { + ret = command_table[cmd](client, argc, argv, &error_code); + } else { + PrintUnknownCmdHelpInfo(argv[1].c_str()); + ret = 1; + } + + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "fail reason: " << error_code.ToString(); + } + return ret; } int main(int argc, char* argv[]) { - FLAGS_minloglevel = 2; - ::google::ParseCommandLineFlags(&argc, &argv, true); - - if (argc > 1 && std::string(argv[1]) == "version") { - PrintSystemVersion(); - return 0; - } else if (argc > 1 && std::string(argv[1]) == "help") { - HelpOp(argc, argv); - return 0; - } + FLAGS_minloglevel = 2; + ::google::ParseCommandLineFlags(&argc, &argv, true); - Client* client = Client::NewClient(FLAGS_flagfile, NULL); - if (client == NULL) { - LOG(ERROR) << "client instance not exist"; - return -1; - } - g_printer_opt.print_head = FLAGS_stdout_is_tty; - - InitializeCommandTable(); - - int ret = 0; - if (argc == 1) { - char* line = NULL; - while ((line = readline("tera> ")) != NULL) { - char* line_copy = strdup(line); - std::vector arg_list; - arg_list.push_back(argv[0]); - char* tmp = NULL; - char* token = strtok_r(line, " \t", &tmp); - while (token != NULL) { - arg_list.push_back(token); - token = strtok_r(NULL, " \t", &tmp); - } - if (arg_list.size() == 2 && - (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { - free(line_copy); - free(line); - break; - } - if (arg_list.size() > 1) { - add_history(line_copy); - ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); - } - free(line_copy); - free(line); - } - } else { - ret = ExecuteCommand(client, argc, argv); - } + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + return 0; + } else if (argc > 1 && std::string(argv[1]) == "help") { + HelpOp(argc, argv); + return 0; + } - delete client; - return ret; + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -1; + } + g_printer_opt.print_head = FLAGS_stdout_is_tty; + + InitializeCommandTable(); + + int ret = 0; + if (argc == 1) { + char* line = NULL; + while ((line = readline("tera> ")) != NULL) { + char* line_copy = strdup(line); + std::vector arg_list; + arg_list.push_back(argv[0]); + char* tmp = NULL; + char* token = strtok_r(line, " \t", &tmp); + while (token != NULL) { + arg_list.push_back(token); + token = strtok_r(NULL, " \t", &tmp); + } + if (arg_list.size() == 2 && + (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { + free(line_copy); + free(line); + break; + } + if (arg_list.size() > 1) { + add_history(line_copy); + ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); + } + free(line_copy); + free(line); + } + } else { + ret = ExecuteCommand(client, argc, argv); + } + + delete client; + return ret; } diff --git a/src/terautil.cc b/src/terautil.cc index b0dd2b9b3..9bc913db4 100644 --- a/src/terautil.cc +++ b/src/terautil.cc @@ -14,9 +14,12 @@ #include #include #include +#include #include #include +#include +#include #include "ins_sdk.h" @@ -24,13 +27,22 @@ #include "common/base/string_number.h" #include "common/console/progress_bar.h" #include "common/file/file_path.h" +#include "common/log/log_cleaner.h" +#include "common/semaphore.h" +#include "common/func_scope_guard.h" +#include "common/thread_pool.h" +#include "leveldb/dfs.h" +#include "util/nfs.h" +#include "util/hdfs.h" #include "io/coding.h" #include "proto/kv_helper.h" #include "proto/proto_helper.h" +#include "proto/table_meta.pb.h" #include "proto/tabletnode.pb.h" #include "proto/tabletnode_client.h" #include "sdk/client_impl.h" #include "sdk/cookie.h" +#include "sdk/mutate_impl.h" #include "sdk/sdk_utils.h" #include "sdk/sdk_zk.h" #include "sdk/table_impl.h" @@ -47,687 +59,2979 @@ DECLARE_string(flagfile); DECLARE_string(log_dir); DECLARE_string(tera_master_meta_table_name); +DECLARE_bool(tera_info_log_clean_enable); + +// dfs +DECLARE_string(tera_leveldb_env_type); +DECLARE_string(tera_leveldb_env_dfs_type); +DECLARE_string(tera_leveldb_env_nfs_mountpoint); +DECLARE_string(tera_leveldb_env_nfs_conf_path); +DECLARE_string(tera_leveldb_env_hdfs2_nameservice_list); +DECLARE_string(tera_dfs_so_path); +DECLARE_string(tera_dfs_conf); DEFINE_string(dump_tera_src_conf, "../conf/src_tera.flag", "src cluster for tera"); DEFINE_string(dump_tera_dest_conf, "../conf/dest_tera.flag", "dest cluster for tera"); DEFINE_string(dump_tera_src_root_path, "/xxx_", "src tera root path"); DEFINE_string(dump_tera_dest_root_path, "/xxx_", "dest tera root path"); DEFINE_string(ins_cluster_addr, "terautil_ins", "terautil dump ins cluster conf"); -DEFINE_string(ins_cluster_root_path, "/terautil/dump/xxxx", "dump meta ins"); +DEFINE_string(ins_cluster_dump_root_path, "/terautil/dump/xxxx", "dump meta ins"); +DEFINE_string(ins_cluster_diff_root_path, "/terautil/diff", "diff meta ins"); DEFINE_string(dump_tera_src_meta_addr, "", "src addr for meta_table"); DEFINE_string(dump_tera_dest_meta_addr, "", "dest addr for meta_table"); DEFINE_int64(dump_manual_split_interval, 1000, "manual split interval in ms"); DEFINE_bool(dump_enable_manual_split, false, "manual split may take a long time, so disable it"); +DEFINE_int64(dump_concurrent_limit, 5000, "the qps limit of unit job to dump"); +DEFINE_int64(dump_startime, 0, "the start time of dump"); +DEFINE_int64(dump_endtime, 0, "the end time of dump"); +DEFINE_string(tables_map_file, "", + "tables_map_file to store the src table_name to dest table_name"); +DEFINE_string(diff_tables_map_file, "", + "tables_map_file for diff to store the src table_name to dest table_name"); + +DEFINE_string(dump_failed_kv_afs_path, "/user/tera/terautil", "afs path for dumping failed kv"); +DEFINE_bool(enable_dump_failed_kv, false, "enable dump failed kv to afs"); +DEFINE_string(dump_ut_kv_afs_path, "/user/tera/terautil_dump_ut", "dfs dir for dump ut"); +DEFINE_string(diff_data_afs_path, "/user/tera/diff", + "path for storing diff data for checking diff"); +DEFINE_string(diff_bin_data_afs_path, "/user/tera/diffbin", + "path for storing diff bin data for rewriting to dest claster"); +DEFINE_string(dump_tables_map_path, "tables_map", "tables_map path"); +DEFINE_string(dump_tables_lg_map_path, "tables_lg_map", "tables_lg_map path"); +DEFINE_string(dump_tables_cf_map_path, "tables_cf_map", "tables_cf_map path"); +DEFINE_string(dump_tables_cf_version_map_path, "tables_cf_version_map", "tables_cf_map path"); +DEFINE_string(lg_and_cf_delimiter, "|", "lg & cf delimiter"); +DEFINE_int64(pb_total_bytes_limit_MB, 1024, "pb_total_bytes_limit_MB"); +DEFINE_int64(pb_warning_threshold_MB, 256, "pb_warning_threshold_MB"); +DEFINE_int64(rewrite_retry_times, 5, "rewrite retry times"); +DEFINE_int64(diff_scan_interval_ns, 1, "scan interval(ns)"); +DEFINE_int64(diff_scan_count_per_interval, 1000, "scan count per interval"); +DEFINE_bool(readable, true, "readable input"); +DEFINE_bool(enable_copy_schema, true, "enable copy schema from src cluster to dest cluster"); +DEFINE_bool(enable_write_dfs_diff_only_in_src, true, "enable write dfs file"); +DEFINE_bool(enable_write_dfs_diff_only_in_dest, true, "enable write dfs file"); +DEFINE_bool(enable_write_dfs_diff_both_have_but_diff, true, "enable write dfs file"); +DEFINE_bool(enable_write_dfs_diffbin_only_in_src, true, "enable write dfs file"); +DEFINE_bool(enable_write_diff_only_in_src_to_dest, false, + "enable write diff only_in_src data to dest"); +DEFINE_int64(write_only_in_src_to_dest_concurrent_limit, 500, "the qps limit of unit job to write"); using namespace tera; +typedef std::pair TablePair; +typedef std::pair TabletPair; + +namespace { + +common::Semaphore* g_sem; +leveldb::Dfs* g_dfs = NULL; +} + +struct Progress { + int finish_range_num; + int total_range_num; +}; + +struct DstTableCf { + std::string dst_table_name; + std::vector cf_list; +}; + +struct DiffStatData { + unsigned long only_in_src; + unsigned long only_in_dest; + unsigned long both_have_but_diff; + unsigned long both_have_and_same; // 交集 + unsigned long in_src_or_in_dest; // 并集 + + void reset() { + only_in_src = 0; + only_in_dest = 0; + both_have_but_diff = 0; + both_have_and_same = 0; + in_src_or_in_dest = 0; + }; +}; + const char* terautil_builtin_cmds[] = { "dump", - "dump \n\ - prepare_safe \n\ - prepare \n\ - run \n\ - show \n\ - check", - + "dump \n\ + prepare_safe \n\ + prepare \n\ + load \n\ + load a --tables_map_file to nexus \n\ + prepare_tables \n\ + dump src_tables to dest_tables according tables_map \n\ + run \n\ + running dump specify by tables_map in nexus \n\ + rewrite \n\ + rewrite failed kv_pairs stored in afs to dest tera \n\ + progress \n\ + show the dump job completing status \n\ + clean \n\ + clean nexus & afs useless data in dump root dir", + "diff", + "diff \n\ + prepare \n\ + generate some dicts and put one to ins for diff \n\ + run \n\ + run the diff job \n\ + progress \n\ + show the diff job completing status \n\ + result \n\ + stat the diff result and show the result \n\ + clean \n\ + clean nexus & afs useless data in diff root dir", "help", - "help [cmd] \n\ + "help [cmd] \n\ show manual for a or all cmd(s)", "version", - "version \n\ + "version \n\ show version info", }; static void ShowCmdHelpInfo(const char* msg) { - if (msg == NULL) { - return; - } - int count = sizeof(terautil_builtin_cmds)/sizeof(char*); - for (int i = 0; i < count; i+=2) { - if(strncmp(msg, terautil_builtin_cmds[i], 32) == 0) { - std::cout << terautil_builtin_cmds[i + 1] << std::endl; - return; - } + if (msg == NULL) { + return; + } + int count = sizeof(terautil_builtin_cmds) / sizeof(char*); + for (int i = 0; i < count; i += 2) { + if (strncmp(msg, terautil_builtin_cmds[i], 32) == 0) { + std::cout << terautil_builtin_cmds[i + 1] << std::endl; + return; } + } } static void ShowAllCmd() { - std::cout << "there is cmd list:" << std::endl; - int count = sizeof(terautil_builtin_cmds)/sizeof(char*); - bool newline = false; - for (int i = 0; i < count; i+=2) { - std::cout << std::setiosflags(std::ios::left) << std::setw(20) << terautil_builtin_cmds[i]; - if (newline) { - std::cout << std::endl; - newline = false; - } else { - newline = true; - } + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(terautil_builtin_cmds) / sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i += 2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << terautil_builtin_cmds[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; } - std::cout << std::endl << "help [cmd] for details." << std::endl; + } + std::cout << std::endl + << "help [cmd] for details." << std::endl; } int32_t HelpOp(int32_t argc, char** argv) { - if (argc == 2) { - ShowAllCmd(); - } else if (argc == 3) { - ShowCmdHelpInfo(argv[2]); + if (argc == 2) { + ShowAllCmd(); + } else if (argc == 3) { + ShowCmdHelpInfo(argv[2]); + } else { + ShowCmdHelpInfo("help"); + } + return 0; +} + +int32_t InitDfsClient() { + if (g_dfs != NULL) { + return 0; + } + if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { + if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { + LOG(INFO) << "init nfs system: use configure file" << FLAGS_tera_leveldb_env_nfs_conf_path; + leveldb::Nfs::Init(FLAGS_tera_leveldb_env_nfs_mountpoint, + FLAGS_tera_leveldb_env_nfs_conf_path); + g_dfs = leveldb::Nfs::GetInstance(); } else { - ShowCmdHelpInfo("help"); + LOG(INFO) << "init nfs system: no configure file found"; + return -1; } - return 0; + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { + LOG(INFO) << "hdfs2 system support currently, please use hadoop-client"; + g_dfs = new leveldb::Hdfs2(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { + g_dfs = new leveldb::Hdfs(); + } else { + LOG(INFO) << "init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; + g_dfs = leveldb::Dfs::NewDfs(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + } + return 0; } -int DumpRange(const std::string& ins_cluster_addr, - const std::string& ins_cluster_root_path, - const tera::TableMetaList& table_list, - const tera::TabletMetaList& tablet_list) { - int res = 0; - galaxy::ins::sdk::SDKError ins_err; - galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); - std::string table_path = ins_cluster_root_path + "/table"; - std::string tablet_path = ins_cluster_root_path + "/tablet"; - //std::string lock_path = ins_cluster_root_path + "/lock"; - - for (int32_t i = 0; i < table_list.meta_size(); i++) { - const tera::TableMeta& meta = table_list.meta(i); - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - continue; - } - std::string key = table_path + "/" + meta.table_name(); - if(!ins_sdk.Put(key, meta.table_name(), &ins_err)) { - LOG(WARNING) << "ins put: " << key << ", error " << ins_err; - return -1; - } +int PutMapInNexus(const std::string& path, galaxy::ins::sdk::InsSDK* ins_sdk, + const std::map& tables_map) { + galaxy::ins::sdk::SDKError ins_err; + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + std::string key = path + "/" + it->first; + std::string value = it->second; + if (!ins_sdk->Put(key, value, &ins_err)) { + LOG(WARNING) << "ins put: key[" << key << "], value[" << value << "], error " << ins_err; + return -1; } + } + return 0; +} - for (int32_t i = 0; i < tablet_list.meta_size(); i++) { - const tera::TabletMeta& meta = tablet_list.meta(i); - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - continue; - } - std::string table_name = meta.table_name(); - std::string key = tablet_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); - std::string val = "0"; - val.append(meta.key_range().key_end()); - if(!ins_sdk.Put(key, val, &ins_err)) { - LOG(WARNING) << "ins put: " << key << ", error " << ins_err; - return -1; - } - //std::string lock_key = lock_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); +std::string InitInsValueFirstPartForDump(const std::string& tablet_id) { + return "0," + tablet_id + ":"; +} + +std::string InitInsValueFirstPartForDiff(const std::string& tablet_id) { + // 一共6个数字: + // 位置0: 是否已经比较完了,0:没比较完,1:比较完了 + // 位置1: 只在原表中 + // 位置2: 只在新表中 + // 位置3: 原表新表都在,但是不同 + // 位置4: 相同的(交集) + // 位置5: 并集 + // 位置6:tablet_id字符串 + std::string value_prefix = "0,0,0,0,0,0," + tablet_id + ":"; + return value_prefix; +} + +int DumpRange(const std::string& ins_cluster_root_path, galaxy::ins::sdk::InsSDK* ins_sdk, + const tera::TableMetaList& table_list, const tera::TabletMetaList& tablet_list, + std::function init_ins_value_first_part) { + int res = 0; + galaxy::ins::sdk::SDKError ins_err; + std::string table_path = ins_cluster_root_path + "/table"; + std::string tablet_path = ins_cluster_root_path + "/tablet"; + + for (int32_t i = 0; i < table_list.meta_size(); i++) { + const tera::TableMeta& meta = table_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; } - return res; + std::string key = table_path + "/" + meta.table_name(); + if (!ins_sdk->Put(key, meta.table_name(), &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + return -1; + } + } + + for (int32_t i = 0; i < tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; + } + std::string key_start = meta.key_range().key_start(); + std::string key_end = meta.key_range().key_end(); + + std::string debug_key_start; + std::string debug_key_end; + if (FLAGS_readable) { + debug_key_start = DebugString(key_start); + debug_key_end = DebugString(key_end); + if (debug_key_start != key_start || debug_key_end != key_end) { + LOG(INFO) << "debug_key_start[" << debug_key_start << "] <=> key_start[" << key_start + << "], debug_key_end[" << debug_key_end << "] <=> key_end[" << key_end << "]"; + } + } else { + debug_key_start = key_start; + debug_key_end = key_end; + } + + std::string table_name = meta.table_name(); + + const std::string& tablet_id_path = meta.path(); + std::size_t pos = tablet_id_path.find('/'); + CHECK(pos != std::string::npos); + std::string tablet_id = tablet_id_path.substr(pos + 1); + CHECK(tablet_id.length() > 0); + + std::string key = tablet_path + "/" + table_name + "/" + debug_key_start; + std::string val = init_ins_value_first_part(tablet_id); + val.append(debug_key_end); + if (!ins_sdk->Put(key, val, &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + return -1; + } + } + return res; } int ScanAndDumpMeta(const std::string& src_meta_tablet_addr, - const std::string& dest_meta_tablet_addr, - tera::TableMetaList* table_list, + const std::string& dest_meta_tablet_addr, tera::TableMetaList* table_list, tera::TabletMetaList* tablet_list) { - uint64_t seq_id = 0; - tera::ScanTabletRequest request; - tera::ScanTabletResponse response; - tera::WriteTabletRequest write_request; - tera::WriteTabletResponse write_response; - uint64_t request_size = 0; - write_request.set_sequence_id(seq_id++); - write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); - write_request.set_is_sync(true); - write_request.set_is_instant(true); - - request.set_sequence_id(seq_id++); - request.set_table_name(FLAGS_tera_master_meta_table_name); - request.set_start(""); - request.set_end(""); - common::ThreadPool thread_pool(2); - tera::tabletnode::TabletNodeClient src_meta_node_client(&thread_pool, src_meta_tablet_addr); - bool success = true; - while ((success = src_meta_node_client.ScanTablet(&request, &response))) { - if (response.status() != tera::kTabletNodeOk) { - LOG(WARNING) << "dump: fail to load meta table: " - << StatusCodeToString(response.status()); - return -1; - } - int32_t record_size = response.results().key_values_size(); - LOG(INFO) << "scan meta table: " << record_size << " records"; - - bool need_dump = false; - std::string last_record_key; - for (int32_t i = 0; i < record_size; i++) { - const tera::KeyValuePair& record = response.results().key_values(i); - last_record_key = record.key(); - char first_key_char = record.key()[0]; - - TableMeta table_meta; - TabletMeta tablet_meta; - if (first_key_char == '~') { - LOG(INFO) << "(user: " << record.key().substr(1) << ")"; - } else if (first_key_char == '@') { - //ParseMetaTableKeyValue(record.key(), record.value(), table_list->add_meta()); - table_meta.Clear(); - ParseMetaTableKeyValue(record.key(), record.value(), &table_meta); - - std::string key, val; - //table_meta.set_status(kTableDisable); - table_meta.mutable_schema()->set_merge_size(0); // never merge during dump - table_meta.mutable_schema()->set_split_size(10000000); // never split during dump - MakeMetaTableKeyValue(table_meta, &key, &val); - - RowMutationSequence* mu_seq = write_request.add_row_list(); - mu_seq->set_row_key(record.key()); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(tera::kPut); - mutation->set_value(val); - request_size += mu_seq->ByteSize(); - if (request_size >= kMaxRpcSize) { // write req too large, dump into new tera cluster - need_dump = true; - } - - TableMeta* table_meta2 = table_list->add_meta(); - table_meta2->CopyFrom(table_meta); - } else if (first_key_char > '@') { - //ParseMetaTableKeyValue(record.key(), record.value(), tablet_list->add_meta()); - tablet_meta.Clear(); - ParseMetaTableKeyValue(record.key(), record.value(), &tablet_meta); - - std::string key, val; - tablet_meta.clear_parent_tablets(); - //tablet_meta.set_status(kTabletDisable); - MakeMetaTableKeyValue(tablet_meta, &key, &val); - - RowMutationSequence* mu_seq = write_request.add_row_list(); - mu_seq->set_row_key(record.key()); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(tera::kPut); - mutation->set_value(val); - request_size += mu_seq->ByteSize(); - if (request_size >= kMaxRpcSize) { // write req too large, dump into new tera cluster - need_dump = true; - } - - TabletMeta* tablet_meta2 = tablet_list->add_meta(); - tablet_meta2->CopyFrom(tablet_meta); - } else { - LOG(WARNING) << "dump: invalid meta record: " << record.key(); - } + uint64_t seq_id = 0; + tera::ScanTabletRequest request; + tera::ScanTabletResponse response; + tera::WriteTabletRequest write_request; + tera::WriteTabletResponse write_response; + uint64_t request_size = 0; + write_request.set_sequence_id(seq_id++); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + write_request.set_is_sync(true); + write_request.set_is_instant(true); + + request.set_sequence_id(seq_id++); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(""); + request.set_end(""); + common::ThreadPool thread_pool(2); + tera::tabletnode::TabletNodeClient src_meta_node_client(&thread_pool, src_meta_tablet_addr); + bool success = true; + while ((success = src_meta_node_client.ScanTablet(&request, &response))) { + if (response.status() != tera::kTabletNodeOk) { + LOG(WARNING) << "dump: fail to load meta table: " << StatusCodeToString(response.status()); + return -1; + } + int32_t record_size = response.results().key_values_size(); + LOG(INFO) << "scan meta table: " << record_size << " records"; + + bool need_dump = false; + std::string last_record_key; + for (int32_t i = 0; i < record_size; i++) { + const tera::KeyValuePair& record = response.results().key_values(i); + last_record_key = record.key(); + char first_key_char = record.key()[0]; + + TableMeta table_meta; + TabletMeta tablet_meta; + if (first_key_char == '~') { + LOG(INFO) << "(user: " << record.key().substr(1) << ")"; + } else if (first_key_char == '|') { + // user&passwd&role&permission + } else if (first_key_char == '@') { + // ParseMetaTableKeyValue(record.key(), record.value(), + // table_list->add_meta()); + table_meta.Clear(); + ParseMetaTableKeyValue(record.key(), record.value(), &table_meta); + + std::string key, val; + // table_meta.set_status(kTableDisable); + table_meta.mutable_schema()->set_merge_size(0); // never merge during dump + table_meta.mutable_schema()->set_split_size(10000000); // never split during dump + MakeMetaTableKeyValue(table_meta, &key, &val); + + RowMutationSequence* mu_seq = write_request.add_row_list(); + mu_seq->set_row_key(record.key()); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + mutation->set_value(val); + request_size += mu_seq->ByteSize(); + if (request_size >= kMaxRpcSize) { // write req too large,dump into new tera cluster + need_dump = true; } - if ((need_dump || record_size <= 0) && - write_request.row_list_size() > 0) { - tabletnode::TabletNodeClient dest_meta_node_client(&thread_pool, dest_meta_tablet_addr); - if (!dest_meta_node_client.WriteTablet(&write_request, &write_response)) { - LOG(WARNING) << "dump: fail to dump meta tablet: " - << StatusCodeToString(kRPCError); - return -1; - } - tera::StatusCode status = write_response.status(); - if (status == tera::kTabletNodeOk && write_response.row_status_list_size() > 0) { - status = write_response.row_status_list(0); - } - if (status != kTabletNodeOk) { - LOG(WARNING) << "dump: fail to dump meta tablet: " - << StatusCodeToString(status); - return -1; - } - write_request.clear_row_list(); - write_response.Clear(); - request_size = 0; - } - if (record_size <= 0) { - response.Clear(); - LOG(INFO) << "dump: scan meta table success"; - break; + TableMeta* table_meta2 = table_list->add_meta(); + table_meta2->CopyFrom(table_meta); + } else if (first_key_char > '@') { + // ParseMetaTableKeyValue(record.key(), record.value(), + // tablet_list->add_meta()); + tablet_meta.Clear(); + ParseMetaTableKeyValue(record.key(), record.value(), &tablet_meta); + + std::string key, val; + tablet_meta.clear_parent_tablets(); + // tablet_meta.set_status(kTabletDisable); + MakeMetaTableKeyValue(tablet_meta, &key, &val); + + RowMutationSequence* mu_seq = write_request.add_row_list(); + mu_seq->set_row_key(record.key()); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + mutation->set_value(val); + request_size += mu_seq->ByteSize(); + if (request_size >= kMaxRpcSize) { // write req too large,dump into new tera cluster + need_dump = true; } - std::string next_record_key = tera::NextKey(last_record_key); - request.set_start(next_record_key); - request.set_end(""); - request.set_sequence_id(seq_id++); - response.Clear(); + TabletMeta* tablet_meta2 = tablet_list->add_meta(); + tablet_meta2->CopyFrom(tablet_meta); + } else { + LOG(WARNING) << "dump: invalid meta record: " << record.key(); + } } - return success? 0: -1; + + if ((need_dump || record_size <= 0) && write_request.row_list_size() > 0) { + tabletnode::TabletNodeClient dest_meta_node_client(&thread_pool, dest_meta_tablet_addr); + if (!dest_meta_node_client.WriteTablet(&write_request, &write_response)) { + LOG(WARNING) << "dump: fail to dump meta tablet: " << StatusCodeToString(kRPCError); + return -1; + } + tera::StatusCode status = write_response.status(); + if (status == tera::kTabletNodeOk && write_response.row_status_list_size() > 0) { + status = write_response.row_status_list(0); + } + if (status != kTabletNodeOk) { + LOG(WARNING) << "dump: fail to dump meta tablet: " << StatusCodeToString(status); + return -1; + } + write_request.clear_row_list(); + write_response.Clear(); + request_size = 0; + } + if (record_size <= 0) { + response.Clear(); + LOG(INFO) << "dump: scan meta table success"; + break; + } + + std::string next_record_key = tera::NextKey(last_record_key); + request.set_start(next_record_key); + request.set_end(""); + request.set_sequence_id(seq_id++); + response.Clear(); + } + return success ? 0 : -1; } int DumpPrepareOp() { - int res = 0; - std::string tera_src_conf = FLAGS_dump_tera_src_conf; - std::string tera_src_root = FLAGS_dump_tera_src_root_path; - std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; - std::string tera_dest_root = FLAGS_dump_tera_dest_root_path; - - // read src meta ts addr and dest meta ts addr - std::string src_meta_addr, dest_meta_addr; - src_meta_addr = FLAGS_dump_tera_src_meta_addr; - dest_meta_addr = FLAGS_dump_tera_dest_meta_addr; - - // scan and dump meta - tera::TableMetaList table_list; - tera::TabletMetaList tablet_list; - if ((res = ScanAndDumpMeta(src_meta_addr, dest_meta_addr, &table_list, &tablet_list)) >= 0) { - // create key range in nexus - std::string ins_cluster_addr = FLAGS_ins_cluster_addr; - std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; - res = DumpRange(ins_cluster_addr, ins_cluster_root_path, table_list, tablet_list); + int res = 0; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_src_root = FLAGS_dump_tera_src_root_path; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + std::string tera_dest_root = FLAGS_dump_tera_dest_root_path; + + // read src meta ts addr and dest meta ts addr + std::string src_meta_addr, dest_meta_addr; + src_meta_addr = FLAGS_dump_tera_src_meta_addr; + dest_meta_addr = FLAGS_dump_tera_dest_meta_addr; + + // scan and dump meta + tera::TableMetaList table_list; + tera::TabletMetaList tablet_list; + + if ((res = ScanAndDumpMeta(src_meta_addr, dest_meta_addr, &table_list, &tablet_list)) >= 0) { + // create key range in nexus + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + auto init_ins_value_first_part = std::bind(InitInsValueFirstPartForDump, std::placeholders::_1); + res = DumpRange(ins_cluster_root_path, &ins_sdk, table_list, tablet_list, + init_ins_value_first_part); + } + return res; +} + +int GetMapFromNexus(const std::string& path, galaxy::ins::sdk::InsSDK* ins_sdk, + std::map* nexus_map) { + int res = 0; + std::string nexus_start_key = path + "/"; + std::string nexus_end_key = path + "/"; + nexus_start_key.append(1, '\0'); + nexus_end_key.append(1, '\255'); + std::string delimiter("/"); + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(nexus_start_key, nexus_end_key); + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(WARNING) << "scan fail: start " << nexus_start_key << ", end " << nexus_end_key + << ", err " << result->Error(); + res = -1; + break; } - return res; + std::string key = result->Key(); + std::vector nexus_path; + SplitString(key, delimiter, &nexus_path); + std::string nexus_key = nexus_path[nexus_path.size() - 1]; + std::string nexus_value = result->Value(); + + (*nexus_map)[nexus_key] = nexus_value; + result->Next(); + } + delete result; + return res; } -int GetAndLockDumpRange(const std::string& ins_cluster_root_path, - std::string* table_name, - std::string* start_key, - std::string* end_key, - galaxy::ins::sdk::InsSDK* ins_sdk) { - int res = -1; - galaxy::ins::sdk::SDKError ins_err; - //std::string table_path = ins_cluster_root_path + "/table"; - std::string tablet_path = ins_cluster_root_path + "/tablet"; - std::string lock_path = ins_cluster_root_path + "/lock"; - - std::string start = tablet_path + "/"; - std::string end = tablet_path + "/"; - if (table_name->size()) { - start.append(*table_name); - start.append("/"); - start.append(*start_key); - if (*start_key == "") { - start.append(1, '\0'); - } +std::string GetEndKey(const std::string& val) { + std::size_t pos = val.find(':'); + CHECK(pos != std::string::npos); + return val.substr(pos + 1); +} + +std::string GetTabletIdForDiff(const std::string& val) { + std::size_t pos = val.find(':'); + CHECK(pos != std::string::npos); + std::string prefix = val.substr(0, pos); + std::vector prefix_datas; + SplitString(prefix, ",", &prefix_datas); + return prefix_datas[6]; +} + +std::string GetTabletIdForDump(const std::string& val) { + std::size_t pos = val.find(':'); + CHECK(pos != std::string::npos); + std::string prefix = val.substr(0, pos); + std::vector prefix_datas; + SplitString(prefix, ",", &prefix_datas); + return prefix_datas[1]; +} +int GetAndLockDumpRange(const std::string& ins_cluster_root_path, std::string* table_name, + std::string* tablet_id, std::string* start_key, std::string* end_key, + galaxy::ins::sdk::InsSDK* ins_sdk, + std::function get_tablet_id_func) { + int res = -1; + galaxy::ins::sdk::SDKError ins_err; + + std::string tablet_path = ins_cluster_root_path + "/tablet"; + std::string lock_path = ins_cluster_root_path + "/lock"; + + std::string start = tablet_path + "/"; + std::string end = tablet_path + "/"; + if (table_name->size()) { + start.append(*table_name); + start.append("/"); + start.append(*start_key); + // the start_key is the last end_key + if (*start_key == "") { + // when we finish scaning all ins tablet range, the last end_key is "", + // and here we restart a new scan loop + // and going on, untill all tablet range's result is 1 (finish dump or diff) + start.append(1, '\0'); + } + // if the last end_key is not "", it means we have not finished one scan loop, + // so we continue scaning ins tablet range, + // for speeding up scaning ins, here we set start by last end_key + } + end.append(1, '\255'); + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start, end); + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(WARNING) << "scan fail: start " << start << ", end " << end << ", err " + << result->Error(); + res = -1; + break; + } + std::string key = result->Key(); + std::string val = result->Value(); + std::string has_done = val.substr(0, 1); + if (has_done == "1") { // someone has copy it + result->Next(); + VLOG(1) << "key = " << key << ", value = " << val << ", has done"; + continue; } - end.append(1, '\255'); - galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start, end); - while (!result->Done()) { - if (result->Error() != galaxy::ins::sdk::kOK) { - LOG(INFO) << "scan fail: start " << start << ", end " << end << ", err " << result->Error(); - res = -1; - break; - } - std::string key = result->Key(); - std::string val = result->Value(); - std::string has_done = val.substr(0, 1); - if (has_done == "1") { // someone has copy it - result->Next(); - continue; - } - //std::string key = tablet_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); - std::string str = key.substr(tablet_path.length() + 1); - std::size_t pos = str.find('/'); - *table_name = str.substr(0, pos); - *start_key = str.substr(pos + 1); - *end_key = val.substr(1); - - std::string lock_key = lock_path + "/" + *table_name + "/" + *start_key + "/"; - if (!ins_sdk->TryLock(lock_key, &ins_err)) { - LOG(INFO) << "ins: TryLock fail: " << lock_key << ", err " << ins_err; - result->Next(); - continue; - } + std::string str = key.substr(tablet_path.length() + 1); + std::size_t pos = str.find('/'); + *table_name = str.substr(0, pos); + *start_key = str.substr(pos + 1); + *end_key = GetEndKey(val); + *tablet_id = get_tablet_id_func(val); - std::string val1; - if (ins_sdk->Get(key, &val1, &ins_err)) { - has_done = val1.substr(0, 1); - } else { - LOG(INFO) << "ins: get fail: " << key << ", err " << ins_err; - } - if (has_done == "1") { // someone has copy it - if (!ins_sdk->UnLock(lock_key, &ins_err)) { - LOG(INFO) << "ins: unlock fail: " << lock_key << ", err " << ins_err; - } - result->Next(); - continue; - } + VLOG(1) << "start_key = " << *start_key << ", end_key = " << *end_key << ", try lock"; - res = 0; - break; // begin to scan + std::string lock_key = lock_path + "/" + *table_name + "/" + *start_key + "/"; + if (!ins_sdk->TryLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins: TryLock fail: " << lock_key << ", err " << ins_err; + result->Next(); + continue; } - delete result; - return res; + + std::string val1; + if (ins_sdk->Get(key, &val1, &ins_err)) { + has_done = val1.substr(0, 1); + } else { + LOG(WARNING) << "ins: get fail: " << key << ", err " << ins_err; + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins: unlock fail: " << lock_key << ", err " << ins_err; + } + abort(); + } + if (has_done == "1") { // someone has copy it + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins: unlock fail: " << lock_key << ", err " << ins_err; + } + result->Next(); + continue; + } + + res = 0; + break; // begin to scan + } + delete result; + return res; } int ReleaseAndUnlockDumpRange(const std::string& ins_cluster_root_path, - const std::string& table_name, - const std::string& start_key, - const std::string& end_key, + const std::string& src_table_name, const std::string& tablet_id, + const std::string& start_key, const std::string& end_key, galaxy::ins::sdk::InsSDK* ins_sdk) { - int res = 0; - galaxy::ins::sdk::SDKError ins_err; - //std::string table_path = ins_cluster_root_path + "/table"; - std::string tablet_path = ins_cluster_root_path + "/tablet"; - std::string lock_path = ins_cluster_root_path + "/lock"; + int res = 0; + galaxy::ins::sdk::SDKError ins_err; + // std::string table_path = ins_cluster_root_path + "/table"; + std::string tablet_path = ins_cluster_root_path + "/tablet"; + std::string lock_path = ins_cluster_root_path + "/lock"; + + std::string key = tablet_path + "/" + src_table_name + "/" + start_key; + std::string val = "1," + tablet_id + ":"; + val.append(end_key); + + if (!ins_sdk->Put(key, val, &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + } + + std::string lock_key = lock_path + "/" + src_table_name + "/" + start_key + "/"; + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins unlock fail: " << lock_key << ", error " << ins_err; + } + return res; +} - std::string key = tablet_path + "/" + table_name + "/" + start_key; - std::string val = "1"; - val.append(end_key); +struct ScanDumpContext { + virtual ~ScanDumpContext() { row_result.clear_key_values(); } + Counter counter; + RowResult row_result; + Mutex mutex; + volatile bool fail; + std::string reason; +}; - if(!ins_sdk->Put(key, val, &ins_err)) { - LOG(WARNING) << "ins put: " << key << ", error " << ins_err; +void ScanAndDumpCallBack(RowMutation* mu) { + g_sem->Release(); + ScanDumpContext* ctx = (ScanDumpContext*)mu->GetContext(); + if (mu->GetError().GetType() != tera::ErrorCode::kOK) { + if (ctx->fail == false) { + ctx->fail = true; + ctx->reason = mu->GetError().ToString(); } - - std::string lock_key = lock_path + "/" + table_name + "/" + start_key + "/"; - if (!ins_sdk->UnLock(lock_key, &ins_err)) { - LOG(WARNING) << "ins unlock fail: " << lock_key << ", error " << ins_err; + if (FLAGS_enable_dump_failed_kv) { + MutexLock l(&ctx->mutex); + RowMutationImpl* mu_impl = dynamic_cast(mu); + for (uint32_t index = 0; index < mu_impl->MutationNum(); ++index) { + KeyValuePair* kv_pair = ctx->row_result.add_key_values(); + kv_pair->set_key(mu_impl->RowKey()); + kv_pair->set_column_family(mu_impl->GetMutation(index).family); + kv_pair->set_qualifier(mu_impl->GetMutation(index).qualifier); + kv_pair->set_value(mu_impl->GetMutation(index).value); + kv_pair->set_timestamp(mu_impl->GetMutation(index).timestamp); + } } - return res; + } + delete mu; + + ctx->counter.Dec(); + return; } -struct ScanDumpContext { - Counter counter; - volatile bool fail; - std::string reason; +struct RewriteContext { + Table* target_table; + KeyValuePair* kv_pair; + bool hold_kv_pair; + Counter* counter; + Counter failed_times; + + RewriteContext() : target_table(NULL), kv_pair(NULL), hold_kv_pair(false), counter(NULL) {} }; -void ScanAndDumpCallBack(RowMutation* mu) { - ScanDumpContext* ctx = (ScanDumpContext*)mu->GetContext(); - if (mu->GetError().GetType() != tera::ErrorCode::kOK) { - if (ctx->fail == false) { - ctx->fail = true; - ctx->reason = mu->GetError().ToString(); +void RewriteCallBack(RowMutation* mu) { + g_sem->Release(); + RewriteContext* ctx = (RewriteContext*)mu->GetContext(); + if (mu->GetError().GetType() != tera::ErrorCode::kOK) { + ctx->failed_times.Inc(); + if (ctx->failed_times.Get() <= FLAGS_rewrite_retry_times) { + // Retry write this mu + g_sem->Acquire(); + mu->Reset(ctx->kv_pair->key()); + mu->Put(ctx->kv_pair->column_family(), ctx->kv_pair->qualifier(), ctx->kv_pair->value(), + ctx->kv_pair->timestamp()); + mu->SetContext(ctx); + mu->SetCallBack(RewriteCallBack); + ctx->target_table->ApplyMutation(mu); + return; + } + LOG(WARNING) << "failed write key[" << ctx->kv_pair->key() << "], cf[" + << ctx->kv_pair->column_family() << "], qu[" << ctx->kv_pair->qualifier() + << "], value[" << ctx->kv_pair->value() << "], timestamp[" + << ctx->kv_pair->timestamp() << "], error : " << mu->GetError().ToString() + << std::endl; + } + delete mu; + if (ctx->hold_kv_pair) { + delete ctx->kv_pair; + } + ctx->counter->Dec(); + delete ctx; + return; +} + +bool DeserializationRowResult(const char* data, ssize_t data_len, RowResult* row_result) { + ::google::protobuf::io::ArrayInputStream input(data, data_len); + ::google::protobuf::io::CodedInputStream decoder(&input); + decoder.SetTotalBytesLimit(FLAGS_pb_total_bytes_limit_MB * 1024 * 1024, + FLAGS_pb_warning_threshold_MB * 1024 * 1024); + return (row_result->ParseFromCodedStream(&decoder) && decoder.ConsumedEntireMessage()); +} + +void WriteToDfs(const std::string& file_path, const std::string& write_string) { + int32_t to_write_len = write_string.length(); + if (to_write_len > 0) { + leveldb::DfsFile* file = g_dfs->OpenFile(file_path, leveldb::WRONLY); + int32_t len = file->Write(write_string.c_str(), to_write_len); + if (len == -1) { + LOG(WARNING) << "Write afs failed [" << file_path << "]"; + } else if (len != to_write_len) { + LOG(WARNING) << "Write afs miss some data [" << file_path << "]"; + } + file->CloseFile(); + } else { + LOG(WARNING) << "string len for Writing afs is 0, file_path: " << file_path; + } + return; +} + +bool SerializationRowResult(const RowResult& row_result, std::string* data) { + ::google::protobuf::io::StringOutputStream output(data); + ::google::protobuf::io::CodedOutputStream coder(&output); + return row_result.SerializeToCodedStream(&coder); +} + +int ScanAndDumpData(Table* src, Table* dest, const std::string& table_name, + const std::string& tablet_id, const std::string& start_key, + const std::string& end_key, + const std::map& tables_cf_map) { + int res = 0; + ErrorCode err; + + std::string raw_start_str; + std::string raw_end_str; + if (FLAGS_readable) { + if (!ParseDebugString(start_key, &raw_start_str) || !ParseDebugString(end_key, &raw_end_str)) { + LOG(WARNING) << "Parse debug string failed!"; + return -1; + } + } else { + raw_start_str = start_key; + raw_end_str = end_key; + } + VLOG(1) << "Start scan start_key[" << start_key << "], end_key[" << end_key << "]"; + + ScanDescriptor desc(raw_start_str); + desc.SetEnd(raw_end_str); + desc.SetMaxVersions(std::numeric_limits::max()); + if (FLAGS_dump_endtime != 0) { + desc.SetTimeRange(FLAGS_dump_endtime, FLAGS_dump_startime); + } + + ScanDumpContext* ctx = new ScanDumpContext; + ctx->counter.Set(1); + ctx->fail = false; + + // Deal with specifing cfs + std::vector cfs; + auto it = tables_cf_map.find(table_name); + if (it != tables_cf_map.cend()) { + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(it->second, delimiter, &cfs); + } + std::for_each(cfs.cbegin(), cfs.cend(), + [&desc](const std::string& cf) { desc.AddColumnFamily(cf); }); + + ResultStream* result_stream; + if ((result_stream = src->Scan(desc, &err)) == NULL) { + LOG(WARNING) << "scan dump fail(new scan): " << table_name << ", start " << start_key + << ", end " << end_key; + delete ctx; + return -1; + } + while (!result_stream->Done(&err)) { + g_sem->Acquire(); + RowMutation* mu = dest->NewRowMutation(result_stream->RowName()); + mu->Put(result_stream->Family(), result_stream->Qualifier(), result_stream->Value(), + result_stream->Timestamp()); + ctx->counter.Inc(); + mu->SetContext(ctx); + mu->SetCallBack(ScanAndDumpCallBack); + dest->ApplyMutation(mu); + + result_stream->Next(); + } + delete result_stream; + ctx->counter.Dec(); + + while (ctx->counter.Get() > 0) { + sleep(3); + } + + VLOG(1) << "Finish scan start_key[" << start_key << "], end_key[" << end_key << "]"; + + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "scan dump fail: " << table_name << ", start " << start_key << ", end " + << end_key << ", reason " << err.GetReason(); + res = -1; + } + + if (FLAGS_enable_dump_failed_kv && ctx->fail == true) { + LOG(WARNING) << "scan dump fail: " << table_name << ", start " << start_key << ", end " + << end_key << ", reason " << ctx->reason; + // Write the RowResult to afs file + if (ctx->row_result.key_values_size() > 0) { + std::string row_result_str; + // ctx->row_result.SerializeToString(&row_result_str); + if (!SerializationRowResult(ctx->row_result, &row_result_str)) { + LOG(WARNING) << "row_result serilize failed!"; + } else { + std::string file_path = + FLAGS_dump_failed_kv_afs_path + "/" + table_name + "/" + tablet_id + ".pbtxt"; + WriteToDfs(file_path, row_result_str); + } + } + } + delete ctx; + return res; +} + +int DumpRunOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + // get and lock range + ErrorCode err; + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(WARNING) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + LOG(WARNING) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + std::unique_ptr
src_table; + std::unique_ptr
dest_table; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string src_table_name, start_key, end_key, last_table_name, tablet_id; + + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "GetMapFromNexus failed in DumpRun"; + return -1; + } + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + LOG(INFO) << "src table[" << it->first << "] => dest table[" << it->second << "]"; + } + + std::map tables_cf_map; + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_cf_map)) { + LOG(WARNING) << "GetMapFromNexus failed in DumpRun"; + return -1; + } + for (auto it = tables_cf_map.cbegin(); it != tables_cf_map.cend(); ++it) { + LOG(INFO) << "src table[" << it->first << "] => src cf[" << it->second << "]"; + } + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed"; + return -1; + } + auto get_tablet_id_func = std::bind(GetTabletIdForDump, std::placeholders::_1); + while (GetAndLockDumpRange(ins_cluster_root_path, &src_table_name, &tablet_id, &start_key, + &end_key, &ins_sdk, get_tablet_id_func) == 0) { + if (last_table_name != src_table_name) { // table change + src_table.reset(); + dest_table.reset(); + src_table.reset(src_client->OpenTable(src_table_name, &err)); + if (src_table == nullptr) { + LOG(WARNING) << "open src table fail: " << src_table_name << ", err " << err.ToString(); + continue; + } + std::string dest_table_name = src_table_name; + if (tables_map.size() != 0) { + if (tables_map.find(src_table_name) != tables_map.cend()) { + dest_table_name = tables_map[src_table_name]; + } else { + LOG(WARNING) << "Couldn't find src_table_name[" << src_table_name << "] in tables_map"; + return -1; } + } + dest_table.reset(dest_client->OpenTable(dest_table_name, &err)); + if (dest_table == nullptr) { + src_table.reset(); + LOG(WARNING) << "open dest table fail: " << dest_table_name << ", err " << err.ToString(); + continue; + } } - delete mu; + last_table_name = src_table_name; - ctx->counter.Dec(); - return; + if ((res = ScanAndDumpData(src_table.get(), dest_table.get(), src_table_name, tablet_id, + start_key, end_key, tables_cf_map)) < 0) { + LOG(WARNING) << "scan dump data fail: " << src_table_name << ", start " << start_key + << ", end " << end_key; + } else { + VLOG(1) << "Set has_done for start_key[" << start_key << "], end_key[" << end_key << "]"; + ReleaseAndUnlockDumpRange(ins_cluster_root_path, src_table_name, tablet_id, start_key, + end_key, &ins_sdk); + } + start_key = end_key; + } + LOG(INFO) << "Finish DumpRunOp"; + return res; +} + +void GetTableKeyRange(const std::string& table_name, const TabletMetaList& tablet_list, + std::vector* delimiters) { + for (int32_t i = 0; i < tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if (table_name == meta.table_name() && meta.key_range().key_start().size() > 0) { + delimiters->push_back(meta.key_range().key_start()); + } + } } -int ScanAndDumpData(Table* src, Table* dest, - const std::string& table_name, - const std::string& start_key, - const std::string& end_key) { - int res = 0; - ErrorCode err; - - ScanDescriptor desc(start_key); - desc.SetEnd(end_key); - desc.SetMaxVersions(std::numeric_limits::max()); - ResultStream* result_stream; - if ((result_stream = src->Scan(desc, &err)) == NULL) { - LOG(INFO) << "scan dump fail(new scan): " << table_name << ", start " << start_key - << ", end " << end_key; +int ManualCreateTable(std::shared_ptr client, const std::string& table_name, + const TableSchema& schema, const std::vector& delimiters) { + ErrorCode err; + TableDescriptor table_desc; + table_desc.SetTableName(table_name); + TableSchemaToDesc(schema, &table_desc); + table_desc.SetSplitSize(10000000); + table_desc.SetMergeSize(0); + if (!client->CreateTable(table_desc, delimiters, &err)) { + LOG(WARNING) << "manual create error: " << table_name << ", err: " << err.ToString(); + return -1; + } + return 0; +} + +int ManualSplitTable(std::shared_ptr client, const std::string& table_name, + const std::vector& delimiters) { + ErrorCode err; + std::vector arg_list; + arg_list.push_back("split"); + arg_list.push_back(table_name); + for (uint32_t i = 0; i < delimiters.size(); i++) { + arg_list.push_back(delimiters[i]); + if (!client->CmdCtrl("table", arg_list, NULL, NULL, &err)) { + LOG(WARNING) << "manual split table fail(ignore old master): " << table_name + << ", delimiters_size: " << delimiters.size() << ", err: " << err.ToString(); + } + usleep(FLAGS_dump_manual_split_interval); + arg_list.pop_back(); + } + return 0; +} + +bool SchemaCompare(const TableSchema& src, const TableSchema& dest) { + return ((src.raw_key() == dest.raw_key()) && (src.kv_only() == dest.kv_only()) && + (src.name() == dest.name()) && (!IsSchemaCfDiff(src, dest)) && + (!IsSchemaLgDiff(src, dest))); +} + +int GetOrSetTabletLocationSafe(Client* src_client, Client* dest_client, TableMetaList* table_list, + TabletMetaList* tablet_list) { + // get src and dest tablet location + ErrorCode err; + TableMetaList src_table_list; + TabletMetaList src_tablet_list; + std::shared_ptr src_client_impl( + (static_cast(src_client))->GetClientImpl()); + if (!src_client_impl->ShowTablesInfo(&src_table_list, &src_tablet_list, false, &err)) { + LOG(WARNING) << "tera_master show src cluster fail: " << err.ToString(); + return -1; + } + + TableMetaList dest_table_list; + TabletMetaList dest_tablet_list; + std::shared_ptr dest_client_impl( + (static_cast(dest_client))->GetClientImpl()); + if (!dest_client_impl->ShowTablesInfo(&dest_table_list, &dest_tablet_list, false, &err)) { + LOG(WARNING) << "tera_master show dest cluster fail: " << err.ToString(); + return -1; + } + + // get table meta set + std::map src_table_set; + for (int32_t i = 0; i < src_table_list.meta_size(); i++) { + const tera::TableMeta& meta = src_table_list.meta(i); + TableSchema& schema = src_table_set[meta.table_name()]; + schema.CopyFrom(meta.schema()); + } + std::map dest_table_set; + for (int32_t i = 0; i < dest_table_list.meta_size(); i++) { + const tera::TableMeta& meta = dest_table_list.meta(i); + TableSchema& schema = dest_table_set[meta.table_name()]; + schema.CopyFrom(meta.schema()); + } + + // create or split table, and filter schema not match meta + for (int32_t i = 0; i < src_table_list.meta_size(); i++) { + const tera::TableMeta& meta = src_table_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; + } + std::vector delimiters; + GetTableKeyRange(meta.table_name(), src_tablet_list, &delimiters); + if (dest_table_set.find(meta.table_name()) == dest_table_set.end()) { + if (ManualCreateTable(dest_client_impl, meta.table_name(), meta.schema(), delimiters) < 0) { + return -1; + } + } else if (SchemaCompare(dest_table_set[meta.table_name()], meta.schema())) { + if (FLAGS_dump_enable_manual_split && + ManualSplitTable(dest_client_impl, meta.table_name(), delimiters) < 0) { return -1; + } + } else { + LOG(WARNING) << "table schema not match: " << meta.table_name() + << ", src schema: " << meta.schema().ShortDebugString() + << ", dest schema: " << dest_table_set[meta.table_name()].ShortDebugString(); + src_table_set.erase(meta.table_name()); + continue; } - ScanDumpContext* ctx = new ScanDumpContext; - ctx->counter.Set(1); - ctx->fail = false; - while (!result_stream->Done(&err)) { - RowMutation* mu = dest->NewRowMutation(result_stream->RowName()); - mu->Put(result_stream->Family(), result_stream->Qualifier(), - result_stream->Value(), result_stream->Timestamp()); - ctx->counter.Inc(); - mu->SetContext(ctx); - mu->SetCallBack(ScanAndDumpCallBack); - dest->ApplyMutation(mu); + tera::TableMeta* meta2 = table_list->add_meta(); + meta2->CopyFrom(meta); + } + + // filter key range + for (int32_t i = 0; i < src_tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = src_tablet_list.meta(i); + if (src_table_set.find(meta.table_name()) == src_table_set.end()) { + continue; + } + tera::TabletMeta* meta2 = tablet_list->add_meta(); + meta2->CopyFrom(meta); + } + return 0; +} - result_stream->Next(); +int LoadTablesMapFile(const std::string& file_name, std::map* tables_map, + std::map* tables_lg_map) { + std::fstream fin(file_name.c_str()); + std::string line; + while (getline(fin, line)) { + // line format : + // test1,test2 + // test3:lg1|lg2|lg3,test5:lg1|lg2|lg3 + std::vector tables; + std::string delimiter(","); + SplitString(line, delimiter, &tables); + if (tables.size() != 2) { + return -1; + } + if (tables_map->find(tables[0]) != tables_map->end()) { + LOG(WARNING) << "Reduplicative table name"; + return -1; + } + std::string src_table = tables[0]; + std::string dest_table = tables[1]; + std::string lg_delimiter(":"); + std::size_t pos = src_table.find(lg_delimiter); + if (pos != std::string::npos) { + std::string src_table_no_lg = src_table.substr(0, pos); + std::string lg = src_table.substr(pos + 1); + + pos = dest_table.find(lg_delimiter); + if (pos == std::string::npos) { + LOG(WARNING) << "Wrong arguement in specifing lg"; + return -1; + } + std::string dest_table_no_lg = dest_table.substr(0, pos); + std::string dest_lg = dest_table.substr(pos + 1); + if (lg != dest_lg) { + LOG(WARNING) << "Mismatch lg in src_table & dest_table is forbidden"; + return -1; + } + (*tables_map)[src_table_no_lg] = dest_table_no_lg; + (*tables_lg_map)[src_table_no_lg] = lg; + } else { + (*tables_map)[src_table] = dest_table; } - delete result_stream; - ctx->counter.Dec(); + } + return 0; +} - while (ctx->counter.Get() > 0) { - sleep(3); +int CheckTablesMapSensible(const TableMetaList& src_table_list, + const TableMetaList& dest_table_list, + const std::map& tables_map) { + // get table meta set + std::set src_table_set; + + LOG(INFO) << "print src tables : "; + for (int32_t i = 0; i < src_table_list.meta_size(); i++) { + const tera::TableMeta& meta = src_table_list.meta(i); + src_table_set.insert(meta.table_name()); + LOG(INFO) << "table = " << meta.table_name(); + } + + std::set dest_table_set; + LOG(INFO) << "print dest tables : "; + for (int32_t i = 0; i < dest_table_list.meta_size(); i++) { + const tera::TableMeta& meta = dest_table_list.meta(i); + dest_table_set.insert(meta.table_name()); + LOG(INFO) << "table = " << meta.table_name(); + } + + // make sure src_table in src_table_set + // dest_table not in dest_table_set + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + // Not work for specify lg + if (src_table_set.find(it->first) == src_table_set.cend()) { + LOG(WARNING) << "The src_table " << it->first << " not in src_table_set"; + return -1; } - if (ctx->fail == true) { - LOG(INFO) << "scan dump fail: " << table_name << ", start " << start_key - << ", end " << end_key << ", reason " << ctx->reason; - res = -1; + if (dest_table_set.find(it->second) != dest_table_set.cend()) { + LOG(WARNING) << "The dest_table " << it->second << " in dest_table_set"; + return -1; } - delete ctx; + } + return 0; +} - if (err.GetType() != tera::ErrorCode::kOK) { - LOG(INFO) << "scan dump fail: " << table_name << ", start " << start_key - << ", end " << end_key << ", reason " << err.GetReason(); - res = -1; +void MayBeAddCfMapByLgMap(const TableMeta& src_meta, + const std::map& tables_lg_map, + std::map* tables_cf_map) { + std::string src_table_name = src_meta.table_name(); + + auto it = tables_lg_map.find(src_table_name); + if (it != tables_lg_map.cend()) { + const TableSchema& src_schema = src_meta.schema(); + + // lg0|lg1|lg2 + std::string lg_str = it->second; + std::vector lgs; + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(lg_str, delimiter, &lgs); + + std::vector cfs; + for (int cf_index = 0; cf_index < src_schema.column_families_size(); ++cf_index) { + auto result = std::find(lgs.cbegin(), lgs.cend(), + src_schema.column_families(cf_index).locality_group()); + if (result != lgs.cend()) { + cfs.emplace_back(src_schema.column_families(cf_index).name()); + } } - return res; + if (cfs.size() > 0) { + (*tables_cf_map)[src_table_name] = + std::accumulate(cfs.cbegin(), cfs.cend(), std::string(), + [&delimiter](const std::string& a, const std::string& b) + -> std::string { return a + (a.length() > 0 ? delimiter : "") + b; }); + } + } + + return; } -int DumpRunOp() { - int res = 0; - std::string ins_cluster_addr = FLAGS_ins_cluster_addr; - std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; - std::string tera_src_conf = FLAGS_dump_tera_src_conf; - std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; - - // get and lock range - ErrorCode err; - Client* src_client = Client::NewClient(tera_src_conf, &err); - if (src_client == NULL) { - LOG(INFO) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); - return -1; +void MayBeChangeSchemaByLgMap(TableMeta& src_meta, + const std::map& tables_lg_map) { + std::string src_table_name = src_meta.table_name(); + + // Remove lg not in tables_lg_map if this tabls' exist + auto it = tables_lg_map.find(src_table_name); + if (it != tables_lg_map.cend()) { + TableSchema* src_schema = src_meta.mutable_schema(); + + TableSchema* src_schema_tmp = new TableSchema; + src_schema_tmp->CopyFrom(*src_schema); + src_schema->clear_locality_groups(); + + // lg0|lg1|lg2 + std::string lg_str = it->second; + std::vector lgs; + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(lg_str, delimiter, &lgs); + + for (int lg_index = 0; lg_index < src_schema_tmp->locality_groups_size(); ++lg_index) { + auto result = + std::find(lgs.cbegin(), lgs.cend(), src_schema_tmp->locality_groups(lg_index).name()); + if (result != lgs.cend()) { + LocalityGroupSchema* lg_schema = src_schema->add_locality_groups(); + lg_schema->CopyFrom(src_schema_tmp->locality_groups(lg_index)); + } + } + + src_schema->clear_column_families(); + std::vector cfs; + for (int cf_index = 0; cf_index < src_schema_tmp->column_families_size(); ++cf_index) { + auto result = std::find(lgs.cbegin(), lgs.cend(), + src_schema_tmp->column_families(cf_index).locality_group()); + if (result != lgs.cend()) { + ColumnFamilySchema* cf_schema = src_schema->add_column_families(); + cf_schema->CopyFrom(src_schema_tmp->column_families(cf_index)); + } + } + delete src_schema_tmp; + } + + return; +} + +int GetAndSetTableSchema(Client* src_client, Client* dest_client, TableMetaList* table_list, + TabletMetaList* tablet_list, + const std::map& tables_map, + const std::map& tables_lg_map, + std::map* tables_cf_map) { + ErrorCode err; + TableMetaList src_table_list; + TabletMetaList src_tablet_list; + std::shared_ptr src_client_impl( + (static_cast(src_client))->GetClientImpl()); + if (!src_client_impl->ShowTablesInfo(&src_table_list, &src_tablet_list, false, &err)) { + LOG(WARNING) << "tera_master show src cluster fail: " << err.ToString(); + return -1; + } + + TableMetaList dest_table_list; + TabletMetaList dest_tablet_list; + std::shared_ptr dest_client_impl( + (static_cast(dest_client))->GetClientImpl()); + if (!dest_client_impl->ShowTablesInfo(&dest_table_list, &dest_tablet_list, false, &err)) { + LOG(WARNING) << "tera_master show dest cluster fail: " << err.ToString(); + return -1; + } + if (FLAGS_enable_copy_schema && + (-1 == CheckTablesMapSensible(src_table_list, dest_table_list, tables_map))) { + LOG(WARNING) << "TablesMap not sensible!"; + return -1; + } + for (int32_t src_list_index = 0; src_list_index < src_table_list.meta_size(); ++src_list_index) { + TableMeta src_meta = src_table_list.meta(src_list_index); + std::string src_table_name = src_meta.table_name(); + auto it = tables_map.find(src_table_name); + if (it == tables_map.cend()) { + continue; + } + + MayBeChangeSchemaByLgMap(src_meta, tables_lg_map); + MayBeAddCfMapByLgMap(src_meta, tables_lg_map, tables_cf_map); + + std::vector delimiters; + GetTableKeyRange(src_table_name, src_tablet_list, &delimiters); + TableMeta dest_meta(src_meta); + dest_meta.set_table_name(it->second); + TableSchema* dest_table_schema = new TableSchema; + dest_table_schema->CopyFrom(dest_meta.schema()); + dest_table_schema->set_name(it->second); + dest_table_schema->set_alias(it->second); + dest_meta.release_schema(); + dest_meta.set_allocated_schema(dest_table_schema); + if (FLAGS_enable_copy_schema && + ManualCreateTable(dest_client_impl, dest_meta.table_name(), dest_meta.schema(), + delimiters) < 0) { + LOG(WARNING) << "Create table[" << dest_meta.table_name() << "] in dest cluster failed!"; + return -1; + } + TableMeta* meta2 = table_list->add_meta(); + meta2->CopyFrom(src_meta); + } + for (int32_t src_list_index = 0; src_list_index < src_tablet_list.meta_size(); ++src_list_index) { + const TabletMeta& meta = src_tablet_list.meta(src_list_index); + if (tables_map.find(meta.table_name()) == tables_map.end()) { + continue; + } + TabletMeta* meta2 = tablet_list->add_meta(); + meta2->CopyFrom(meta); + } + return 0; +} + +int LoadTablesMapOp() { + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + + if (FLAGS_tables_map_file == "") { + LOG(WARNING) << "Should set --tables_map_file before use prepare_tables!"; + return -1; + } + std::map tables_map; + std::map tables_lg_map; + if (-1 == LoadTablesMapFile(FLAGS_tables_map_file, &tables_map, &tables_lg_map)) { + LOG(WARNING) << "Load tables_map_file failed!"; + return -1; + } + + // Put t1=>t2 in nexus + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_map)) { + LOG(WARNING) << "PutMapInNexus failed"; + return -1; + } + + // Put src_t1=>lg in nexus + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_lg_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_lg_map)) { + LOG(WARNING) << "DumpTables lg Map failed"; + return -1; + } + return 0; +} + +int CreateDfsPath(const std::map& tables_map, + const std::string& dfs_parent_path) { + int ret = -1; + if (g_dfs == NULL) { + LOG(WARNING) << "Init afs client before create afs path"; + return ret; + } + + // Make sure dfs parent path exist + ret = g_dfs->CreateDirectory(dfs_parent_path); + if (0 != ret) { + LOG(WARNING) << "create parent path failed, errno = " << errno; + return ret; + } + + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + std::string src_table_path = dfs_parent_path + "/" + it->first; + LOG(INFO) << "create path[" << src_table_path << "] in dfs"; + ret = g_dfs->CreateDirectory(src_table_path); + if (0 != ret) { + LOG(WARNING) << "create dir[" << src_table_path << "] in dfs failed!"; + break; } - Client* dest_client = Client::NewClient(tera_dest_conf, &err); - if (dest_client == NULL) { - delete src_client; - src_client = NULL; - LOG(INFO) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + } + return ret; +} + +int64_t ReadAfsFile(const std::string& file_path, char* data, ssize_t max_size) { + leveldb::DfsFile* file = g_dfs->OpenFile(file_path, leveldb::RDONLY); + if (file == NULL) { + LOG(WARNING) << "Open file[" << file_path << "] failed! errno : " << errno; + return -1; + } + + ssize_t buf_len = 128 * 1024; + char buf[buf_len]; + memset(buf, 0, buf_len); + ssize_t ret_size = 0; + int64_t sum = 0; + while ((ret_size = file->Read(buf, sizeof(buf))) > 0) { + memcpy(data, buf, ret_size); + sum += ret_size; + data += ret_size; + memset(buf, 0, buf_len); + } + file->CloseFile(); + //*data = '\0'; + return sum; +} + +int GetRowResultFromAfsFile(const std::string& file_path, RowResult* row_result) { + ssize_t max_size = FLAGS_pb_total_bytes_limit_MB * 1024 * 1024; + char* data = new char[max_size]; + FuncScopeGuard on_exit([&data] { delete[] data; }); + int64_t read_len = ReadAfsFile(file_path, data, max_size); + if (read_len <= 0) { + LOG(WARNING) << "Read afs file failed!"; + return -1; + } + LOG(INFO) << "file[" << file_path << "] data size = " << read_len; + if (!DeserializationRowResult(data, read_len, row_result)) { + LOG(WARNING) << "Parse afs kv file[" << file_path << "] failed!"; + return -1; + } + return 0; +} + +// Re-write failed kv_pairs +int DumpRewriteOp() { + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + // Get tables_map from nexus + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "GetMapFromNexus failed"; + return -1; + } + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + LOG(INFO) << it->first << " => " << it->second; + } + + // target cluster op + ErrorCode err; + std::unique_ptr target_client(Client::NewClient(tera_dest_conf, &err)); + if (target_client == nullptr) { + LOG(WARNING) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + + // Afs op + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed!"; + return -1; + } + std::string dfs_parent_path = FLAGS_dump_failed_kv_afs_path; + struct stat fstat; + memset(&fstat, 0, sizeof(struct stat)); + std::map> src_table_files_map; + if (0 == g_dfs->Stat(dfs_parent_path, &fstat)) { + std::vector sub_paths; + if (0 != g_dfs->ListDirectory(dfs_parent_path, &sub_paths)) { + LOG(WARNING) << "dfs list dir[" << dfs_parent_path << "] failed!"; + return -1; + } + std::ostringstream fullpath; + for (const auto& sub_path : sub_paths) { + auto it = tables_map.find(sub_path); + if (it == tables_map.cend()) { + LOG(WARNING) << "Wrong nexus record!!!"; + return -1; + } + std::vector& file_paths = src_table_files_map[sub_path]; + + fullpath << dfs_parent_path << "/" << sub_path; + + std::vector files; + if (0 != g_dfs->ListDirectory(fullpath.str(), &files)) { + LOG(WARNING) << "dfs list dir[" << fullpath.str() << "] failed!"; return -1; + } + for_each(files.cbegin(), files.cend(), [&](const std::string& file) { + file_paths.emplace_back(dfs_parent_path + "/" + sub_path + "/" + file); + }); + fullpath.str(""); + fullpath.clear(); } - Table* src_table = NULL; - Table* dest_table = NULL; + } + + std::unique_ptr row_result(new RowResult); + for (auto it = src_table_files_map.cbegin(); it != src_table_files_map.cend(); ++it) { + const std::string& target_table_name = it->first; + Table* target_table = target_client->OpenTable(tables_map[target_table_name], &err); + if (target_table == nullptr) { + LOG(WARNING) << "Open table[" << target_table_name << "] failed!"; + continue; + } + for (const auto& file_path : it->second) { + row_result->clear_key_values(); + if (-1 == GetRowResultFromAfsFile(file_path, row_result.get())) { + LOG(WARNING) << "GetRowResultFromAfsFile[" << file_path << "] failed!"; + continue; + } + + // Write Rowresult to target table + Counter counter; + counter.Inc(); + for (int kv_index = 0; kv_index < row_result->key_values_size(); ++kv_index) { + g_sem->Acquire(); + RewriteContext* ctx = new RewriteContext; + KeyValuePair* kv_pair = row_result->mutable_key_values(kv_index); + RowMutation* mu = target_table->NewRowMutation(kv_pair->key()); + mu->Put(kv_pair->column_family(), kv_pair->qualifier(), kv_pair->value(), + kv_pair->timestamp()); + counter.Inc(); + ctx->counter = &counter; + ctx->target_table = target_table; + ctx->kv_pair = kv_pair; + mu->SetContext(ctx); + mu->SetCallBack(RewriteCallBack); + target_table->ApplyMutation(mu); + } + counter.Dec(); + while (counter.Get() > 0) { + sleep(3); + } + LOG(INFO) << "finish write diff file: " << file_path; + } + delete target_table; + } + return 0; +} - galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); - std::string table_name, start_key, end_key, last_table_name; - while (GetAndLockDumpRange(ins_cluster_root_path, &table_name, &start_key, &end_key, &ins_sdk) == 0) { - if (last_table_name != table_name) { // table change - delete src_table; - delete dest_table; - src_table = NULL; - dest_table = NULL; - src_table = src_client->OpenTable(table_name, &err); - if (src_table == NULL) { - LOG(INFO) << "open src table fail: " << table_name << ", err " << err.ToString(); - continue; - } - dest_table = dest_client->OpenTable(table_name, &err); - if (dest_table == NULL) { - delete src_table; - src_table = NULL; - LOG(INFO) << "open dest table fail: " << table_name << ", err " << err.ToString(); - continue; - } - } - last_table_name = table_name; - if ((res = ScanAndDumpData(src_table, dest_table, table_name, start_key, end_key)) < 0) { - LOG(INFO) << "scan dump data fail: " << table_name << ", start " << start_key - << ", end " << end_key; - } else { - ReleaseAndUnlockDumpRange(ins_cluster_root_path, table_name, start_key, end_key, &ins_sdk); +// Read write failed kv_pairs +int DumpReadOp(const std::string& afs_file_path) { + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed!"; + return -1; + } + std::unique_ptr row_result(new RowResult); + if (-1 == GetRowResultFromAfsFile(afs_file_path, row_result.get())) { + LOG(WARNING) << "GetRowResultFromAfsFile[" << afs_file_path << "] failed!"; + return -1; + } + std::cout << "afs file[" << afs_file_path << "] kv size = " << row_result->key_values_size() + << std::endl; + for (int kv_index = 0; kv_index < row_result->key_values_size(); ++kv_index) { + const KeyValuePair& kv_pair = row_result->key_values(kv_index); + std::cout << kv_pair.key() << ":" << kv_pair.column_family() << ":" << kv_pair.qualifier() + << ":" << kv_pair.timestamp() << "=>" << kv_pair.value() << std::endl; + } + return 0; +} + +int DeleteKeyRangeInNexus(const std::string& start_key, const std::string& end_key, + galaxy::ins::sdk::InsSDK* ins_sdk) { + int res = 0; + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start_key, end_key); + + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(WARNING) << "scan fail: start " << start_key << ", end " << end_key << ", err " + << result->Error(); + res = -1; + break; + } + galaxy::ins::sdk::SDKError err; + ins_sdk->Delete(result->Key(), &err); + if (err != galaxy::ins::sdk::kOK) { + LOG(WARNING) << "Delete failed[key = " << result->Key() << ", value = " << result->Value() + << "]"; + res = -1; + } + result->Next(); + } + delete result; + return res; +} + +std::string FormatPath(const std::string& pathname) { + std::string result; + bool need_strip = false; + for (std::string::size_type i = 0; i < pathname.length(); ++i) { + if (pathname.at(i) == '/') { + if (need_strip) { + continue; + } else { + result.push_back(pathname.at(i)); + need_strip = true; + } + } else { + need_strip = false; + result.push_back(pathname.at(i)); + } + } + if (result.at(result.length() - 1) == '/') { + result.pop_back(); + } + return result; +} + +int32_t DfsPrintPath(const char* pathname, struct stat* st) { + printf("%s", FormatPath(pathname).c_str()); + if (S_IFDIR & st->st_mode) { + printf("/"); + } + printf("\n"); + return 0; +} + +int ShowAfsDir(const std::string& dir_name) { + std::vector sub_paths; + if (0 != g_dfs->ListDirectory(dir_name, &sub_paths)) { + return -1; + } + + struct stat st; + std::ostringstream fullpath; + for (std::size_t i = 0; i < sub_paths.size(); ++i) { + fullpath.str(""); + fullpath.clear(); + fullpath << dir_name << "/" << sub_paths[i]; + memset(&st, 0, sizeof(struct stat)); + if (g_dfs->Stat(fullpath.str(), &st) < 0) { + perror("Stat failed"); + continue; + } + DfsPrintPath(fullpath.str().c_str(), &st); + } + return 0; +} + +bool Confirm() { + std::cout << "[Y/N] "; + std::string ensure; + if (!std::getline(std::cin, ensure)) { + std::cout << "Get input error" << std::endl; + return false; + } + if (ensure != "Y") { + return false; + } + return true; +} + +int CleanAfsData(const std::string& dfs_parent_path) { + int res = 0; + struct stat fstat; + if (0 == g_dfs->Stat(dfs_parent_path, &fstat)) { + if (S_IFDIR & fstat.st_mode) { + if (-1 == ShowAfsDir(dfs_parent_path)) { + LOG(WARNING) << "List afs dir failed!"; + res = -1; + } + std::cout << "Are you sure DELETE AFS PATH[" << dfs_parent_path << "]?" << std::endl; + if (Confirm()) { + // Delete afs path + res = g_dfs->DeleteDirectory(dfs_parent_path); + if (0 != res) { + LOG(WARNING) << "RmDir[" << dfs_parent_path << "] fail"; } - start_key = end_key; + } + } else { + LOG(WARNING) << "Please make sure input the right parent path for delete"; + res = -1; } - delete src_client; - delete dest_client; + } else { + LOG(WARNING) << "dfs stat failed!"; + res = -1; + } + return res; +} + +// Clean nexus&afs path data +int CleanUpDumpData(const std::string& ins_cluster_root_path, const std::string& dfs_parent_path) { + int res = 0; + // Clean nexus data + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string start = ins_cluster_root_path + "/"; + std::string end = ins_cluster_root_path + "/"; + + start.append(1, '\0'); + end.append(1, '\255'); + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + res = DeleteKeyRangeInNexus(start, end, &ins_sdk); + + // Clean afs data + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed!"; + return -1; + } + res = CleanAfsData(dfs_parent_path); + return res; +} + +// Clean nexus&afs path data +int CleanUpDiffData(const std::string& ins_cluster_root_path, const std::string& dfs_diff_path, + const std::string& dfs_diffbin_path) { + int res = 0; + // Clean nexus data + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string start = ins_cluster_root_path + "/"; + std::string end = ins_cluster_root_path + "/"; + + start.append(1, '\0'); + end.append(1, '\255'); + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + res = DeleteKeyRangeInNexus(start, end, &ins_sdk); + + // Clean afs data + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed!"; + return -1; + } + res = CleanAfsData(dfs_diff_path); + if (res != 0) { return res; + } + res = CleanAfsData(dfs_diffbin_path); + return res; +} + +int DumpCleanOp() { + return CleanUpDumpData(FLAGS_ins_cluster_dump_root_path, FLAGS_dump_failed_kv_afs_path); } -void GetTableKeyRange(const std::string& table_name, - const TabletMetaList& tablet_list, - std::vector* delimiters) { - for (int32_t i = 0; i < tablet_list.meta_size(); i++) { - const tera::TabletMeta& meta = tablet_list.meta(i); - if (table_name == meta.table_name() && - meta.key_range().key_start().size() > 0) { - delimiters->push_back(meta.key_range().key_start()); +int DiffCleanOp() { + return CleanUpDiffData(FLAGS_ins_cluster_diff_root_path, FLAGS_diff_data_afs_path, + FLAGS_diff_bin_data_afs_path); +} + +int DumpPrepareTablesOp() { + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + + // Get tables_map from nexus + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "GetMapFromNexus failed"; + return -1; + } + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + LOG(INFO) << it->first << " => " << it->second; + } + + // Get tables_lg_map from nexus + std::map tables_lg_map; + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_lg_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_lg_map)) { + LOG(WARNING) << "GetTablesLgMap failed"; + return -1; + } + for (auto it = tables_lg_map.cbegin(); it != tables_lg_map.cend(); ++it) { + LOG(INFO) << it->first << " => " << it->second; + } + + ErrorCode err; + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(WARNING) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + LOG(INFO) << "Open src client " << tera_src_conf << " success"; + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + LOG(WARNING) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + LOG(INFO) << "Open dest client " << tera_dest_conf << " success"; + // dump src cluster range into ins + TableMetaList table_list; + TabletMetaList tablet_list; + std::map tables_cf_map; + if (GetAndSetTableSchema(src_client.get(), dest_client.get(), &table_list, &tablet_list, + tables_map, tables_lg_map, &tables_cf_map) < 0) { + LOG(WARNING) << "GetAndSetTableSchema faield"; + return -1; + } + + // Put src_t1=>cf1|cf2 in nexus + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_cf_map)) { + LOG(WARNING) << "Dump Tables cf Map failed"; + return -1; + } + + auto init_ins_value_first_part = std::bind(InitInsValueFirstPartForDump, std::placeholders::_1); + if (-1 == DumpRange(ins_cluster_root_path, &ins_sdk, table_list, tablet_list, + init_ins_value_first_part)) { + LOG(WARNING) << "Dump range faield"; + return -1; + } + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "init dfs client failed!"; + return -1; + } + + if (-1 == CreateDfsPath(tables_map, FLAGS_dump_failed_kv_afs_path)) { + LOG(WARNING) << "init dfs path failed for storing failed kv"; + return -1; + } + + return 0; +} + +int DumpPrepareSafeOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + ErrorCode err; + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(WARNING) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + src_client = nullptr; + LOG(WARNING) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + + // dump src cluster range into ins + TableMetaList table_list; + TabletMetaList tablet_list; + if (GetOrSetTabletLocationSafe(src_client.get(), dest_client.get(), &table_list, &tablet_list) < + 0) { + return -1; + } + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + auto init_ins_value_first_part = std::bind(InitInsValueFirstPartForDump, std::placeholders::_1); + res = DumpRange(ins_cluster_root_path, &ins_sdk, table_list, tablet_list, + init_ins_value_first_part); + return res; +} + +int DumpUtOp() { + // Ut for pb convert + RowResult row_result; + int kv_num = 1000 * 10000; + for (int i = 0; i < kv_num; i++) { + KeyValuePair* kv_pair = row_result.add_key_values(); + kv_pair->set_key("aaa"); + kv_pair->set_column_family("bbb"); + kv_pair->set_qualifier("ccc"); + kv_pair->set_value("ddd"); + kv_pair->set_timestamp(111); + } + + std::string seri_str; + if (!SerializationRowResult(row_result, &seri_str)) { + std::cout << "SerializationRowResult failed" << std::endl; + return -1; + } + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "InitDfsClient FAILED"; + return -1; + } + int ret = CleanAfsData(FLAGS_dump_ut_kv_afs_path); + if (0 != ret) { + LOG(WARNING) << "CleanAfsData failed"; + return ret; + } + ret = g_dfs->CreateDirectory(FLAGS_dump_ut_kv_afs_path); + if (0 != ret) { + LOG(WARNING) << "create parent path failed, errno = " << errno; + return ret; + } + std::string file_path = FLAGS_dump_ut_kv_afs_path + "/test.pbtxt"; + WriteToDfs(file_path, seri_str); + + ssize_t max_size = FLAGS_pb_total_bytes_limit_MB * 1024 * 1024; + char* data = new char[max_size]; + FuncScopeGuard on_exit([&data] { delete[] data; }); + int64_t read_len = ReadAfsFile(file_path, data, max_size); + if (read_len <= 0) { + LOG(WARNING) << "Read afs file failed!"; + return -1; + } + + std::unique_ptr other_row_result(new RowResult); + if (!DeserializationRowResult(data, read_len, other_row_result.get())) { + std::cout << "DeserializationRowResult failed!" << std::endl; + return -1; + } + + if (other_row_result->key_values_size() != kv_num) { + std::cout << "DisMatch in convert pb, kv_num not match" << std::endl; + return -1; + } else { + std::cout << "kv_num Match" << std::endl; + } + + int dismatch_num = 0; + int match_num = 0; + for (int i = 0; i < kv_num; i++) { + const KeyValuePair& kv_pair = row_result.key_values(i); + const KeyValuePair& other_kv_pair = other_row_result->key_values(i); + if ((other_kv_pair.key() != kv_pair.key()) || + (other_kv_pair.column_family() != kv_pair.column_family()) || + (other_kv_pair.qualifier() != kv_pair.qualifier()) || + (other_kv_pair.value() != kv_pair.value()) || + (other_kv_pair.timestamp() != kv_pair.timestamp())) { + dismatch_num++; + } else { + match_num++; + } + } + if (match_num != kv_num) { + std::cout << "DisMatch in convert pb" << std::endl; + return -1; + } else { + std::cout << "Match, correct" << std::endl; + } + return 0; +} + +std::string GetIsMultiVersionFlag(const ColumnFamilySchema& cf_schema) { + if (cf_schema.max_versions() > 1) { + return "1"; + } else { + return "0"; + } +} + +void GetTablesCfVersionMap(const TableMetaList& table_list, + const std::map& tables_cf_map, + std::map* tables_cf_version_map) { + for (int32_t i = 0; i < table_list.meta_size(); i++) { + const TableMeta& meta = table_list.meta(i); + std::string table_name = meta.table_name(); + + const TableSchema& schema = meta.schema(); + auto it = tables_cf_map.find(table_name); + if (it == tables_cf_map.cend()) { + if (schema.column_families_size() == 0) { + (*tables_cf_version_map)[table_name + "::"] = "0"; + } else { + for (int j = 0; j < schema.column_families_size(); ++j) { + (*tables_cf_version_map)[table_name + "::" + schema.column_families(j).name()] = + GetIsMultiVersionFlag(schema.column_families(j)); } + } + } else { + std::vector cfs; + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(it->second, delimiter, &cfs); + for (int j = 0; j < schema.column_families_size(); ++j) { + auto result = std::find(cfs.cbegin(), cfs.cend(), schema.column_families(j).name()); + if (result != cfs.cend()) { + (*tables_cf_version_map)[table_name + "::" + schema.column_families(j).name()] = + GetIsMultiVersionFlag(schema.column_families(j)); + } + } } + } + + return; } -int ManualCreateTable(tera::ClientImpl* client, - const std::string& table_name, - const TableSchema& schema, - const std::vector& delimiters) { - ErrorCode err; - TableDescriptor table_desc; - table_desc.SetTableName(table_name); - TableSchemaToDesc(schema, &table_desc); - table_desc.SetSplitSize(10000000); - table_desc.SetMergeSize(0); - if (!client->CreateTable(table_desc, delimiters, &err)) { - LOG(INFO) << "manual create error: " << table_name << ", err: " << err.ToString(); - return -1; +int ShowTablesInfo(const std::string& tera_src_conf, TableMetaList* table_list, + TabletMetaList* tablet_list) { + ErrorCode err; + + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(WARNING) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + + std::shared_ptr src_client_impl( + (static_cast(src_client.get()))->GetClientImpl()); + if (!src_client_impl->ShowTablesInfo(table_list, tablet_list, false, &err)) { + LOG(WARNING) << "ShowTablesInfo fail: " << err.ToString(); + return -1; + } + + return 0; +} + +void FilterTables(const TableMetaList& all_tables, + const std::map& tables_map, TableMetaList* table_list) { + for (int32_t i = 0; i < all_tables.meta_size(); i++) { + const tera::TableMeta& meta = all_tables.meta(i); + if (tables_map.find(meta.table_name()) == tables_map.cend()) { + continue; } - return 0; + tera::TableMeta* meta2 = table_list->add_meta(); + meta2->CopyFrom(meta); + } + + return; } -int ManualSplitTable(tera::ClientImpl* client, - const std::string& table_name, - const std::vector& delimiters) { - ErrorCode err; - std::vector arg_list; - arg_list.push_back("split"); - arg_list.push_back(table_name); - for (uint32_t i = 0; i < delimiters.size(); i++) { - arg_list.push_back(delimiters[i]); - if (!client->CmdCtrl("table", arg_list, NULL, NULL, &err)) { - LOG(INFO) << "manual split table fail(ignore old master): " << table_name - << ", delimiters_size: " << delimiters.size() - << ", err: " << err.ToString(); - } - usleep(FLAGS_dump_manual_split_interval); - arg_list.pop_back(); +void FilterTablets(const TabletMetaList& all_tablets, + const std::map& tables_map, + TabletMetaList* tablet_list) { + for (int32_t i = 0; i < all_tablets.meta_size(); i++) { + const tera::TabletMeta& meta = all_tablets.meta(i); + if (tables_map.find(meta.table_name()) == tables_map.cend()) { + continue; } - return 0; + tera::TabletMeta* meta2 = tablet_list->add_meta(); + meta2->CopyFrom(meta); + } + + return; } -bool SchemaCompare(const TableSchema& src, const TableSchema& dest) { - return ((src.raw_key() == dest.raw_key()) && - (src.kv_only() == dest.kv_only()) && - (src.name() == dest.name()) && - (!IsSchemaCfDiff(src, dest)) && - (!IsSchemaLgDiff(src, dest))); +void GetTablesCfMap(const TableMetaList& table_list, + const std::map& tables_lg_map, + std::map* tables_cf_map) { + for (int32_t i = 0; i < table_list.meta_size(); ++i) { + const TableMeta& meta = table_list.meta(i); + MayBeAddCfMapByLgMap(meta, tables_lg_map, tables_cf_map); + } + return; } -int GetOrSetTabletLocationSafe(Client* src_client, - Client* dest_client, - TableMetaList* table_list, - TabletMetaList* tablet_list) { - // get src and dest tablet location - ErrorCode err; - TableMetaList src_table_list; - TabletMetaList src_tablet_list; - tera::ClientImpl* src_client_impl = static_cast(src_client); - if (!src_client_impl->ShowTablesInfo(&src_table_list, &src_tablet_list, false, &err)) { - LOG(INFO) << "tera_master show src cluster fail: " << err.ToString(); - return -1; +int DiffPrepareOp() { + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_diff_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + + if (FLAGS_diff_tables_map_file == "") { + LOG(WARNING) << "Should set --diff_tables_map_file before use diff prepare!"; + return -1; + } + std::map tables_map; + std::map tables_lg_map; + if (-1 == LoadTablesMapFile(FLAGS_diff_tables_map_file, &tables_map, &tables_lg_map)) { + LOG(WARNING) << "LoadTablesMapFile FAILED"; + return -1; + } + LOG(INFO) << "[STEP_1] LoadTablesMapFile OK"; + + TableMetaList src_all_tables; + TabletMetaList src_all_tablets; + if (ShowTablesInfo(tera_src_conf, &src_all_tables, &src_all_tablets) < 0) { + return -1; + } + + TableMetaList table_list; + FilterTables(src_all_tables, tables_map, &table_list); + LOG(INFO) << "[STEP_2] FilterTables OK, table_list.meta_size(): " << table_list.meta_size(); + + TabletMetaList tablet_list; + FilterTablets(src_all_tablets, tables_map, &tablet_list); + CHECK(tablet_list.meta_size() > 0); + LOG(INFO) << "[STEP_3] FilterTablets OK, tablet_list.meta_size(): " << tablet_list.meta_size(); + + std::map tables_cf_map; + GetTablesCfMap(table_list, tables_lg_map, &tables_cf_map); + LOG(INFO) << "[STEP_4] GetTablesCfMap OK, tables_cf_map.size(): " << tables_cf_map.size(); + + std::map tables_cf_version_map; + GetTablesCfVersionMap(table_list, tables_cf_map, &tables_cf_version_map); + LOG(INFO) << "[STEP_5] GetTablesCfVersionMap OK, tables_cf_version_map.size(): " + << tables_cf_version_map.size(); + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_map)) { + LOG(WARNING) << "PutMapInNexus tables_map FAILED"; + return -1; + } + LOG(INFO) << "[STEP_6] PutMapInNexus tables_map OK"; + + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_lg_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_lg_map)) { + LOG(WARNING) << "PutMapInNexus tables_lg_map FAILED"; + return -1; + } + LOG(INFO) << "[STEP_7] PutMapInNexus tables_lg_map OK"; + + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_cf_map)) { + LOG(WARNING) << "PutMapInNexus tables_cf_map FAILED"; + return -1; + } + LOG(INFO) << "[STEP_8] PutMapInNexus tables_cf_map OK"; + + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_version_map_path; + if (-1 == PutMapInNexus(path, &ins_sdk, tables_cf_version_map)) { + LOG(WARNING) << "PutMapInNexus tables_cf_version_map FAILED"; + return -1; + } + LOG(INFO) << "[STEP_9] PutMapInNexus tables_cf_version_map OK"; + + auto init_ins_value_first_part = std::bind(InitInsValueFirstPartForDiff, std::placeholders::_1); + if (DumpRange(ins_cluster_root_path, &ins_sdk, table_list, tablet_list, + init_ins_value_first_part) < 0) { + LOG(WARNING) << "DumpDiffRange FAILED"; + return -1; + } + LOG(INFO) << "[STEP_10] DumpDiffRange OK"; + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "InitDfsClient FAILED"; + return -1; + } + LOG(INFO) << "[STEP_11] InitDfsClient OK"; + + if (-1 == CreateDfsPath(tables_map, FLAGS_diff_data_afs_path)) { + LOG(WARNING) << "CreateDfsPath FAILED"; + return -1; + } + LOG(INFO) << "[STEP_12] CreateDfsPath OK"; + + if (-1 == CreateDfsPath(tables_map, FLAGS_diff_bin_data_afs_path)) { + LOG(WARNING) << "CreateDfsBinDataPath FAILED"; + return -1; + } + LOG(INFO) << "[STEP_13] CreateDfsBinDataPath OK"; + + LOG(INFO) << "diff prepare finish"; + + return 0; +} + +bool IsMultiVersion(const std::string& src_table_name, const std::string& cf, + const std::map& tables_cf_version_map) { + std::string table_cf_key = src_table_name + "::" + cf; + auto it = tables_cf_version_map.find(table_cf_key); + CHECK(it != tables_cf_version_map.cend()); + if (it->second == "1") { + return true; + } else { + return false; + } +} + +void WriteDiffDataToStream(std::ostringstream& diffdata_stream, ResultStream* result_stream) { + diffdata_stream << "--------------------" << std::endl; + diffdata_stream << "[rk] " << DebugString(result_stream->RowName()) << std::endl; + diffdata_stream << "[cf] " << result_stream->Family() << std::endl; + diffdata_stream << "[qu] " << DebugString(result_stream->Qualifier()) << std::endl; + diffdata_stream << "[ts] " << result_stream->Timestamp() << std::endl; +} + +void AddDiffRowResult(RowResult& row_result, ResultStream* result_stream) { + KeyValuePair* kv_pair = row_result.add_key_values(); + kv_pair->set_key(result_stream->RowName()); + kv_pair->set_column_family(result_stream->Family()); + kv_pair->set_qualifier(result_stream->Qualifier()); + kv_pair->set_timestamp(result_stream->Timestamp()); + kv_pair->set_value(result_stream->Value()); +} + +struct CompareContext { + ResultStream* src_result_stream; + ResultStream* dest_result_stream; + std::ostringstream& diffdata_only_in_src; + std::ostringstream& diffdata_only_in_dest; + std::ostringstream& diffdata_both_have_but_diff; + DiffStatData* diff_stat_data; + const std::string& src_table_name; + const std::map& tables_cf_version_map; + RowResult row_result_only_in_src; + Table* dest_table; + Counter counter; + + CompareContext(ResultStream* src_result_stream, ResultStream* dest_result_stream, + std::ostringstream& diffdata_only_in_src, + std::ostringstream& diffdata_only_in_dest, + std::ostringstream& diffdata_both_have_but_diff, DiffStatData* diff_stat_data, + const std::string& src_table_name, + const std::map& tables_cf_version_map, Table* dest_table) + : src_result_stream(src_result_stream), + dest_result_stream(dest_result_stream), + diffdata_only_in_src(diffdata_only_in_src), + diffdata_only_in_dest(diffdata_only_in_dest), + diffdata_both_have_but_diff(diffdata_both_have_but_diff), + diff_stat_data(diff_stat_data), + src_table_name(src_table_name), + tables_cf_version_map(tables_cf_version_map), + dest_table(dest_table) {} + virtual ~CompareContext() { row_result_only_in_src.clear_key_values(); } +}; + +void WriteDiffToDest(CompareContext& ctx) { + g_sem->Acquire(); + + RewriteContext* wctx = new RewriteContext; + + wctx->hold_kv_pair = true; + KeyValuePair* kv_pair = new KeyValuePair(); + + kv_pair->set_key(ctx.src_result_stream->RowName()); + kv_pair->set_column_family(ctx.src_result_stream->Family()); + kv_pair->set_qualifier(ctx.src_result_stream->Qualifier()); + kv_pair->set_timestamp(ctx.src_result_stream->Timestamp()); + kv_pair->set_value(ctx.src_result_stream->Value()); + + RowMutation* mu = ctx.dest_table->NewRowMutation(kv_pair->key()); + mu->Put(kv_pair->column_family(), kv_pair->qualifier(), kv_pair->value(), kv_pair->timestamp()); + ctx.counter.Inc(); + wctx->counter = &(ctx.counter); + wctx->target_table = ctx.dest_table; + wctx->kv_pair = kv_pair; + mu->SetContext(wctx); + mu->SetCallBack(RewriteCallBack); + ctx.dest_table->ApplyMutation(mu); +} + +void SrcRecordAndNext(CompareContext& ctx) { + if (FLAGS_enable_write_dfs_diff_only_in_src) { + WriteDiffDataToStream(ctx.diffdata_only_in_src, ctx.src_result_stream); + } + if (FLAGS_enable_write_dfs_diffbin_only_in_src) { + AddDiffRowResult(ctx.row_result_only_in_src, ctx.src_result_stream); + } + if (FLAGS_enable_write_diff_only_in_src_to_dest) { + WriteDiffToDest(ctx); + } + ctx.diff_stat_data->only_in_src++; + ctx.src_result_stream->Next(); +} + +void DestRecordAndNext(CompareContext& ctx) { + if (FLAGS_enable_write_dfs_diff_only_in_dest) { + WriteDiffDataToStream(ctx.diffdata_only_in_dest, ctx.dest_result_stream); + } + ctx.diff_stat_data->only_in_dest++; + ctx.dest_result_stream->Next(); +} + +void HandleNotEqual(int comp_res, CompareContext& ctx) { + if (comp_res < 0) { + SrcRecordAndNext(ctx); + } else if (comp_res > 0) { + DestRecordAndNext(ctx); + } +} + +void CompareValue(CompareContext& ctx) { + if (ctx.src_result_stream->Value().compare(ctx.dest_result_stream->Value()) != 0) { + if (FLAGS_enable_write_dfs_diff_both_have_but_diff) { + WriteDiffDataToStream(ctx.diffdata_both_have_but_diff, ctx.src_result_stream); } + ctx.diff_stat_data->both_have_but_diff++; + } else { + ctx.diff_stat_data->both_have_and_same++; + } + ctx.src_result_stream->Next(); + ctx.dest_result_stream->Next(); +} - TableMetaList dest_table_list; - TabletMetaList dest_tablet_list; - tera::ClientImpl* dest_client_impl = static_cast(dest_client); - if (!dest_client_impl->ShowTablesInfo(&dest_table_list, &dest_tablet_list, false, &err)) { - LOG(INFO) << "tera_master show dest cluster fail: " << err.ToString(); - return -1; +void CompareTimestamp(CompareContext& ctx) { + int comp_res = 0; + if (ctx.src_result_stream->Timestamp() > ctx.dest_result_stream->Timestamp()) { + comp_res = -1; + } else if (ctx.src_result_stream->Timestamp() < ctx.dest_result_stream->Timestamp()) { + comp_res = 1; + } + if (comp_res != 0) { + HandleNotEqual(comp_res, ctx); + VLOG(1) << "[diff] timestamp is diff"; + } else { + CompareValue(ctx); + } +} + +void CompareQualifier(CompareContext& ctx) { + int comp_res = ctx.src_result_stream->Qualifier().compare(ctx.dest_result_stream->Qualifier()); + if (comp_res != 0) { + HandleNotEqual(comp_res, ctx); + VLOG(1) << "[diff] qualifier is diff"; + } else { + if (IsMultiVersion(ctx.src_table_name, ctx.src_result_stream->Family(), + ctx.tables_cf_version_map)) { + CompareTimestamp(ctx); + } else { + CompareValue(ctx); } + } +} - // get table meta set - std::map src_table_set; - for (int32_t i = 0; i < src_table_list.meta_size(); i++) { - const tera::TableMeta& meta = src_table_list.meta(i); - TableSchema& schema = src_table_set[meta.table_name()]; - schema.CopyFrom(meta.schema()); +void CompareFamily(CompareContext& ctx) { + int comp_res = ctx.src_result_stream->Family().compare(ctx.dest_result_stream->Family()); + if (comp_res != 0) { + HandleNotEqual(comp_res, ctx); + VLOG(1) << "[diff] family is diff"; + } else { + CompareQualifier(ctx); + } +} + +void CompareRowName(CompareContext& ctx) { + int comp_res = ctx.src_result_stream->RowName().compare(ctx.dest_result_stream->RowName()); + if (comp_res != 0) { + HandleNotEqual(comp_res, ctx); + VLOG(1) << "[diff] rowkey is diff"; + } else { + CompareFamily(ctx); + } +} + +int StartScan(Table* table, const std::string& table_name, const std::string& start_key, + const std::string& end_key, const std::vector& cfs, + const std::string& cluster, ResultStream** result_stream) { + ErrorCode err; + + std::string raw_start_str; + std::string raw_end_str; + if (FLAGS_readable) { + if (!ParseDebugString(start_key, &raw_start_str) || !ParseDebugString(end_key, &raw_end_str)) { + LOG(WARNING) << "Parse debug string failed!"; + return -1; } - std::map dest_table_set; - for (int32_t i = 0; i < dest_table_list.meta_size(); i++) { - const tera::TableMeta& meta = dest_table_list.meta(i); - TableSchema& schema = dest_table_set[meta.table_name()]; - schema.CopyFrom(meta.schema()); + } else { + raw_start_str = start_key; + raw_end_str = end_key; + } + + ScanDescriptor desc(raw_start_str); + desc.SetEnd(raw_end_str); + desc.SetMaxVersions(std::numeric_limits::max()); + if (FLAGS_dump_endtime != 0) { + desc.SetTimeRange(FLAGS_dump_endtime, FLAGS_dump_startime); + } + std::for_each(cfs.cbegin(), cfs.cend(), + [&desc](const std::string& cf) { desc.AddColumnFamily(cf); }); + if ((*result_stream = table->Scan(desc, &err)) == NULL) { + LOG(WARNING) << cluster << " start scan fail: " << table_name << ", start " << start_key + << ", end " << end_key << ", reason " << err.GetReason(); + return -1; + } + return 0; +} + +bool IsScanFinish(const std::string& src_table_name, const std::string& dest_table_name, + const std::string& start_key, const std::string& end_key, + ResultStream* src_result_stream, ResultStream* dest_result_stream, bool* src_done, + bool* dest_done, int* res) { + ErrorCode src_err; + ErrorCode dest_err; + + *src_done = src_result_stream->Done(&src_err); + *dest_done = dest_result_stream->Done(&dest_err); + + if (src_err.GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "src scan fail: " << src_table_name << ", start " << start_key << ", end " + << end_key << ", reason " << src_err.GetReason(); + *res = -1; + return true; + } + if (dest_err.GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "dest scan fail: " << dest_table_name << ", start " << start_key << ", end " + << end_key << ", reason " << dest_err.GetReason(); + *res = -1; + return true; + } + + if (*src_done && *dest_done) { + return true; + } + + return false; +} + +int ScanAndDiffData(Table* src, Table* dest, const std::string& src_table_name, + const std::string& dest_table_name, const std::string& tablet_id, + const std::string& start_key, const std::string& end_key, + const std::map& tables_cf_map, + const std::map& tables_cf_version_map, + DiffStatData* diff_stat_data) { + int res = 0; + + diff_stat_data->reset(); + + std::vector cfs; + auto it = tables_cf_map.find(src_table_name); + if (it != tables_cf_map.cend()) { + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(it->second, delimiter, &cfs); + } + + ResultStream* src_result_stream = NULL; + ResultStream* dest_result_stream = NULL; + if (StartScan(src, src_table_name, start_key, end_key, cfs, "src", &src_result_stream) < 0) { + return -1; + } + if (StartScan(dest, dest_table_name, start_key, end_key, cfs, "dest", &dest_result_stream) < 0) { + delete src_result_stream; + return -1; + } + + std::ostringstream diffdata_only_in_src, diffdata_only_in_dest, diffdata_both_have_but_diff; + + CompareContext ctx(src_result_stream, dest_result_stream, diffdata_only_in_src, + diffdata_only_in_dest, diffdata_both_have_but_diff, diff_stat_data, + src_table_name, tables_cf_version_map, dest); + ctx.counter.Inc(); + uint64_t cnt = 0; + while (true) { + bool src_done = false; + bool dest_done = false; + + if (FLAGS_diff_scan_count_per_interval > 0) { + cnt++; + if (cnt % FLAGS_diff_scan_count_per_interval == 0) { + ThisThread::Sleep(FLAGS_diff_scan_interval_ns); + } } - // create or split table, and filter schema not match meta - for (int32_t i = 0; i < src_table_list.meta_size(); i++) { - const tera::TableMeta& meta = src_table_list.meta(i); - if (meta.table_name() == FLAGS_tera_master_meta_table_name) { - continue; - } - std::vector delimiters; - GetTableKeyRange(meta.table_name(), src_tablet_list, &delimiters); - if (dest_table_set.find(meta.table_name()) == dest_table_set.end()) { - if (ManualCreateTable(dest_client_impl, meta.table_name(), meta.schema(), delimiters) < 0) { - return -1; - } - } else if (SchemaCompare(dest_table_set[meta.table_name()], meta.schema())) { - if (FLAGS_dump_enable_manual_split && - ManualSplitTable(dest_client_impl, meta.table_name(), delimiters) < 0) { - return -1; - } - } else { - LOG(INFO) << "table schema not match: " << meta.table_name() << ", src schema: " << meta.schema().ShortDebugString() - << ", dest schema: " << dest_table_set[meta.table_name()].ShortDebugString(); - src_table_set.erase(meta.table_name()); - continue; - } - tera::TableMeta* meta2 = table_list->add_meta(); - meta2->CopyFrom(meta); + if (IsScanFinish(src_table_name, dest_table_name, start_key, end_key, src_result_stream, + dest_result_stream, &src_done, &dest_done, &res)) { + break; } - // filter key range - for (int32_t i = 0; i < src_tablet_list.meta_size(); i++) { - const tera::TabletMeta& meta = src_tablet_list.meta(i); - if (src_table_set.find(meta.table_name()) == src_table_set.end()) { - continue; - } - tera::TabletMeta* meta2 = tablet_list->add_meta(); - meta2->CopyFrom(meta); + diff_stat_data->in_src_or_in_dest++; + if (src_done && !dest_done) { + VLOG(1) << "dest: [" << dest_result_stream->RowName() << "] [" << dest_result_stream->Family() + << "] [" << dest_result_stream->Qualifier() << "] [" + << dest_result_stream->Timestamp() << "]"; + DestRecordAndNext(ctx); + VLOG(1) << "[diff] src_done"; + } else if (!src_done && dest_done) { + VLOG(1) << "src: [" << src_result_stream->RowName() << "] [" << src_result_stream->Family() + << "] [" << src_result_stream->Qualifier() << "] [" << src_result_stream->Timestamp() + << "]"; + SrcRecordAndNext(ctx); + VLOG(1) << "[diff] dest_done"; + } else { // !src_done && !dest_done + VLOG(1) << "src: [" << src_result_stream->RowName() << "] [" << src_result_stream->Family() + << "] [" << src_result_stream->Qualifier() << "] [" << src_result_stream->Timestamp() + << "], dest: [" << dest_result_stream->RowName() << "] [" + << dest_result_stream->Family() << "] [" << dest_result_stream->Qualifier() << "] [" + << dest_result_stream->Timestamp() << "]"; + CompareRowName(ctx); } - return 0; + } + delete src_result_stream; + delete dest_result_stream; + ctx.counter.Dec(); + while (ctx.counter.Get() > 0) { + sleep(3); + } + + if (res == 0) { + std::string file_path; + if (FLAGS_enable_write_dfs_diff_only_in_src) { + file_path = + FLAGS_diff_data_afs_path + "/" + src_table_name + "/" + tablet_id + ".only_in_src"; + WriteToDfs(file_path, diffdata_only_in_src.str()); + } + if (FLAGS_enable_write_dfs_diff_only_in_dest) { + file_path = + FLAGS_diff_data_afs_path + "/" + src_table_name + "/" + tablet_id + ".only_in_dest"; + WriteToDfs(file_path, diffdata_only_in_dest.str()); + } + if (FLAGS_enable_write_dfs_diff_both_have_but_diff) { + file_path = + FLAGS_diff_data_afs_path + "/" + src_table_name + "/" + tablet_id + ".both_have_but_diff"; + WriteToDfs(file_path, diffdata_both_have_but_diff.str()); + } + + if (FLAGS_enable_write_dfs_diffbin_only_in_src) { + std::string row_result_str; + if (!SerializationRowResult(ctx.row_result_only_in_src, &row_result_str)) { + LOG(WARNING) << "row_result_only_in_src serilize failed!"; + } else { + file_path = FLAGS_diff_bin_data_afs_path + "/" + src_table_name + "/" + tablet_id + + ".only_in_src.pbtxt"; + WriteToDfs(file_path, row_result_str); + } + } + } + + return res; } -int DumpPrepareSafeOp() { - int res = 0; - std::string ins_cluster_addr = FLAGS_ins_cluster_addr; - std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; - std::string tera_src_conf = FLAGS_dump_tera_src_conf; - std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; - - ErrorCode err; - std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); - if (src_client == nullptr) { - LOG(INFO) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); - return -1; +int ScanDest(Table* dest, const std::string& src_table_name, const std::string& dest_table_name, + const std::string& start_key, const std::string& end_key, + const std::map& tables_cf_map) { + int res = 0; + + std::vector cfs; + auto it = tables_cf_map.find(src_table_name); + if (it != tables_cf_map.cend()) { + std::string delimiter(FLAGS_lg_and_cf_delimiter); + SplitString(it->second, delimiter, &cfs); + } + + ResultStream* dest_result_stream = NULL; + if (StartScan(dest, dest_table_name, start_key, end_key, cfs, "dest", &dest_result_stream) < 0) { + return -1; + } + + uint64_t cnt = 0; + while (true) { + bool dest_done = false; + + if (FLAGS_diff_scan_count_per_interval > 0) { + cnt++; + if (cnt % FLAGS_diff_scan_count_per_interval == 0) { + ThisThread::Sleep(FLAGS_diff_scan_interval_ns); + } + } + + ErrorCode dest_err; + dest_done = dest_result_stream->Done(&dest_err); + if (dest_err.GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "dest scan fail: " << dest_table_name << ", start " << start_key << ", end " + << end_key << ", reason " << dest_err.GetReason(); + res = -1; + break; } - std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); - if (dest_client == nullptr) { - src_client = nullptr; - LOG(INFO) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + if (dest_done) { + break; + } + dest_result_stream->Next(); + } + delete dest_result_stream; + return res; +} + +int ReleaseAndUnlockDiffRange(const std::string& ins_cluster_root_path, + const std::string& src_table_name, const std::string& tablet_id, + const std::string& start_key, const std::string& end_key, + galaxy::ins::sdk::InsSDK* ins_sdk, + const DiffStatData& diff_stat_data) { + int res = 0; + galaxy::ins::sdk::SDKError ins_err; + std::string range_path = ins_cluster_root_path + "/tablet"; + std::string lock_path = ins_cluster_root_path + "/lock"; + + std::string key = range_path + "/" + src_table_name + "/" + start_key; + char stat_str[1024]; + snprintf(stat_str, 1024, "1,%lu,%lu,%lu,%lu,%lu,%s:", diff_stat_data.only_in_src, + diff_stat_data.only_in_dest, diff_stat_data.both_have_but_diff, + diff_stat_data.both_have_and_same, diff_stat_data.in_src_or_in_dest, tablet_id.c_str()); + LOG(INFO) << "range diff stat: " << stat_str; + std::string val = stat_str; + val.append(end_key); + + if (!ins_sdk->Put(key, val, &ins_err)) { + LOG(WARNING) << "ins put FAILED: " << key << ", error " << ins_err; + } else { + LOG(INFO) << "ins put OK: " << key; + } + + std::string lock_key = lock_path + "/" + src_table_name + "/" + start_key + "/"; + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins unlock FAILED: " << lock_key << ", error " << ins_err; + } else { + LOG(INFO) << "ins unlock OK: " << lock_key; + } + return res; +} + +int DiffRunOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_diff_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + ErrorCode err; + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(WARNING) << "DiffRun open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + LOG(WARNING) << "DiffRun open dest client fail: " << tera_dest_conf << ", err " + << err.ToString(); + return -1; + } + + LOG(INFO) << "[DiffRun_1] NewClient src and dest OK"; + + std::unique_ptr
src_table; + std::unique_ptr
dest_table; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string src_table_name, dest_table_name, start_key, end_key, last_table_name, tablet_id; + + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "DiffRun GetMapFromNexus tables_map FAILED"; + return -1; + } + + CHECK(tables_map.size() > 0); + + LOG(INFO) << "[DiffRun_2] GetMapFromNexus tables_map OK"; + LOG(INFO) << "DiffRun tables_map.size(): " << tables_map.size(); + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + LOG(INFO) << "DiffRun src table[" << it->first << "] => dest table[" << it->second << "]"; + } + + std::map tables_cf_map; + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_cf_map)) { + LOG(WARNING) << "DiffRun GetMapFromNexus tables_cf_map FAILED"; + return -1; + } + LOG(INFO) << "[DiffRun_3] GetMapFromNexus tables_cf_map OK"; + LOG(INFO) << "DiffRun tables_cf_map.size(): " << tables_cf_map.size(); + for (auto it = tables_cf_map.cbegin(); it != tables_cf_map.cend(); ++it) { + LOG(INFO) << "DiffRun src table[" << it->first << "] => src cf[" << it->second << "]"; + } + + std::map tables_cf_version_map; + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_version_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_cf_version_map)) { + LOG(WARNING) << "DiffRun GetMapFromNexus tables_cf_version_map FAILED"; + return -1; + } + CHECK(tables_cf_version_map.size() > 0); + LOG(INFO) << "[DiffRun_4] GetMapFromNexus tables_cf_version_map OK"; + LOG(INFO) << "DiffRun tables_cf_version_map.size(): " << tables_cf_version_map.size(); + for (auto it = tables_cf_version_map.cbegin(); it != tables_cf_version_map.cend(); ++it) { + LOG(INFO) << "DiffRun table_cf[" << it->first << "] => is_multi_version[" << it->second << "]"; + } + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "[DiffRun_5] InitDfsClient FAILED"; + return -1; + } + + LOG(INFO) << "[DiffRun_5] InitDfsClient OK"; + + auto get_tablet_id_func = std::bind(GetTabletIdForDiff, std::placeholders::_1); + LOG(INFO) << "[DiffRun_6] start diff range by range"; + while (GetAndLockDumpRange(ins_cluster_root_path, &src_table_name, &tablet_id, &start_key, + &end_key, &ins_sdk, get_tablet_id_func) == 0) { + if (last_table_name != src_table_name) { // table change + src_table.reset(); + dest_table.reset(); + src_table.reset(src_client->OpenTable(src_table_name, &err)); + if (src_table == nullptr) { + LOG(WARNING) << "open src table fail: " << src_table_name << ", err " << err.ToString(); + continue; + } + if (tables_map.find(src_table_name) != tables_map.cend()) { + dest_table_name = tables_map[src_table_name]; + } else { + LOG(WARNING) << "Couldn't find src_table_name[" << src_table_name << "] in tables_map"; return -1; + } + dest_table.reset(dest_client->OpenTable(dest_table_name, &err)); + if (dest_table == nullptr) { + src_table.reset(); + LOG(WARNING) << "open dest table fail: " << dest_table_name << ", err " << err.ToString(); + continue; + } + LOG(INFO) << "start diff new table: " << src_table_name << " vs " << dest_table_name; } + last_table_name = src_table_name; + DiffStatData diff_stat_data; + LOG(INFO) << "start diff table " << src_table_name << " vs " << dest_table_name + << ", new range: start " << start_key << ", end " << end_key; + if ((res = ScanAndDiffData(src_table.get(), dest_table.get(), src_table_name, dest_table_name, + tablet_id, start_key, end_key, tables_cf_map, tables_cf_version_map, + &diff_stat_data)) < 0) { + LOG(WARNING) << "scan and diff data fail: " << src_table_name << " vs " << dest_table_name + << ", start " << start_key << ", end " << end_key; + } else { + LOG(INFO) << "Set has_done for start_key[" << start_key << "], end_key[" << end_key << "]"; + ReleaseAndUnlockDiffRange(ins_cluster_root_path, src_table_name, tablet_id, start_key, + end_key, &ins_sdk, diff_stat_data); + } + start_key = end_key; + } + LOG(INFO) << "Finish DiffRunOp"; + return res; +} - // dump src cluster range into ins - TableMetaList table_list; - TabletMetaList tablet_list; - if (GetOrSetTabletLocationSafe(src_client.get(), dest_client.get(), &table_list, &tablet_list) < 0) { +int DiffScanDestOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_diff_root_path; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + ErrorCode err; + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + LOG(WARNING) << "DiffScanDest open dest client fail: " << tera_dest_conf << ", err " + << err.ToString(); + return -1; + } + + LOG(INFO) << "[DiffScanDest_1] NewClient OK"; + + std::unique_ptr
dest_table; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string src_table_name, dest_table_name, start_key, end_key, last_table_name, tablet_id; + + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "DiffScanDest GetMapFromNexus tables_map FAILED"; + return -1; + } + + CHECK(tables_map.size() > 0); + + LOG(INFO) << "[DiffScanDest_2] GetMapFromNexus tables_map OK"; + LOG(INFO) << "DiffScanDest tables_map.size(): " << tables_map.size(); + for (auto it = tables_map.cbegin(); it != tables_map.cend(); ++it) { + LOG(INFO) << "DiffScanDest src table[" << it->first << "] => dest table[" << it->second << "]"; + } + + std::map tables_cf_map; + path = ins_cluster_root_path + "/" + FLAGS_dump_tables_cf_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_cf_map)) { + LOG(WARNING) << "DiffScanDest GetMapFromNexus tables_cf_map FAILED"; + return -1; + } + LOG(INFO) << "[DiffScanDest_3] GetMapFromNexus tables_cf_map OK"; + LOG(INFO) << "DiffScanDest tables_cf_map.size(): " << tables_cf_map.size(); + for (auto it = tables_cf_map.cbegin(); it != tables_cf_map.cend(); ++it) { + LOG(INFO) << "DiffScanDest src table[" << it->first << "] => src cf[" << it->second << "]"; + } + + if (-1 == InitDfsClient()) { + LOG(WARNING) << "[DiffScanDest_4] InitDfsClient FAILED"; + return -1; + } + + LOG(INFO) << "[DiffScanDest_4] InitDfsClient OK"; + + auto get_tablet_id_func = std::bind(GetTabletIdForDiff, std::placeholders::_1); + LOG(INFO) << "[DiffScanDest_5] start diff range by range"; + while (GetAndLockDumpRange(ins_cluster_root_path, &src_table_name, &tablet_id, &start_key, + &end_key, &ins_sdk, get_tablet_id_func) == 0) { + if (last_table_name != src_table_name) { // table change + dest_table.reset(); + if (tables_map.find(src_table_name) != tables_map.cend()) { + dest_table_name = tables_map[src_table_name]; + } else { + LOG(WARNING) << "Couldn't find src_table_name[" << src_table_name << "] in tables_map"; return -1; + } + dest_table.reset(dest_client->OpenTable(dest_table_name, &err)); + if (dest_table == nullptr) { + LOG(WARNING) << "open dest table fail: " << dest_table_name << ", err " << err.ToString(); + continue; + } + LOG(INFO) << "start scan new table: " << dest_table_name; } - res = DumpRange(ins_cluster_addr, ins_cluster_root_path, table_list, tablet_list); - return res; + last_table_name = src_table_name; + DiffStatData diff_stat_data; + diff_stat_data.reset(); + LOG(INFO) << "start scan table " << dest_table_name << ", new range: start " << start_key + << ", end " << end_key; + if ((res = ScanDest(dest_table.get(), src_table_name, dest_table_name, start_key, end_key, + tables_cf_map)) < 0) { + LOG(WARNING) << "scan fail: " << dest_table_name << ", start " << start_key << ", end " + << end_key; + } else { + LOG(INFO) << "Set has_done for start_key[" << start_key << "], end_key[" << end_key << "]"; + ReleaseAndUnlockDiffRange(ins_cluster_root_path, src_table_name, tablet_id, start_key, + end_key, &ins_sdk, diff_stat_data); + } + start_key = end_key; + } + LOG(INFO) << "Finish DiffScanDestOp"; + return res; } -int main(int argc, char* argv[]) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_flagfile == "") { - FLAGS_flagfile = "../conf/tera.flag"; - if (access(FLAGS_flagfile.c_str(), R_OK) != 0) { - FLAGS_flagfile = "./tera.flag"; - } - utils::LoadFlagFile(FLAGS_flagfile); - } - - if (argc > 1 && std::string(argv[1]) == "version") { - PrintSystemVersion(); - } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "prepare") { - return DumpPrepareOp(); - } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "prepare_safe") { - return DumpPrepareSafeOp(); - } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "run") { - return DumpRunOp(); - //} else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "show") { - // return DumpShowOp(); - //} else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "check") { - // return DumpCheckOp(): +int StatProgress(const std::string& ins_cluster_root_path, galaxy::ins::sdk::InsSDK* ins_sdk, + const std::map& tables_map, + std::map* stat_res) { + int res = 0; + stat_res->clear(); + + std::string range_path = ins_cluster_root_path + "/tablet"; + + std::string start = range_path + "/"; + std::string end = range_path + "/"; + end.append(1, '\255'); + + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start, end); + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(INFO) << "scan fail: start " << start << ", end " << end << ", err " << result->Error(); + res = -1; + break; + } + + std::string key = result->Key(); + std::string val = result->Value(); + + std::string str = key.substr(range_path.length() + 1); + std::size_t pos = str.find('/'); + std::string src_table_name = str.substr(0, pos); + + if (tables_map.find(src_table_name) == tables_map.cend()) { + result->Next(); + continue; + } + + std::string has_done = val.substr(0, 1); + if (stat_res->find(src_table_name) == stat_res->cend()) { + (*stat_res)[src_table_name].finish_range_num = 0; + (*stat_res)[src_table_name].total_range_num = 1; } else { - HelpOp(argc, argv); - return -1; + (*stat_res)[src_table_name].total_range_num += 1; } - return 0; + + if (has_done == "1") { + (*stat_res)[src_table_name].finish_range_num += 1; + } + result->Next(); + } + delete result; + return res; +} + +int PrintStatProgrssInfo(const std::string& ins_cluster_root_path) { + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "GetTablesMap failed in ShowProgress"; + return -1; + } + + CHECK(tables_map.size() > 0); + + std::map stat_res; + if (StatProgress(ins_cluster_root_path, &ins_sdk, tables_map, &stat_res) < 0) { + return -1; + } + for (auto it = stat_res.cbegin(); it != stat_res.cend(); ++it) { + printf("%-20s%6d / %-6d\n", it->first.c_str(), it->second.finish_range_num, + it->second.total_range_num); + } + + return 0; +} + +int DumpProgressOp() { + std::string ins_cluster_root_path = FLAGS_ins_cluster_dump_root_path; + if (-1 == PrintStatProgrssInfo(ins_cluster_root_path)) { + LOG(WARNING) << "Print stat progress failed"; + return -1; + } + return 0; +} + +int DiffProgressOp() { + std::string ins_cluster_root_path = FLAGS_ins_cluster_diff_root_path; + if (-1 == PrintStatProgrssInfo(ins_cluster_root_path)) { + LOG(WARNING) << "Print stat progress failed"; + return -1; + } + return 0; +} + +int StatDiffResult(const std::string& ins_cluster_root_path, galaxy::ins::sdk::InsSDK* ins_sdk, + const std::map& tables_map, + std::map* stat_res) { + int res = 0; + stat_res->clear(); + + std::string range_path = ins_cluster_root_path + "/tablet"; + + std::string start = range_path + "/"; + std::string end = range_path + "/"; + end.append(1, '\255'); + + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start, end); + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(INFO) << "scan fail: start " << start << ", end " << end << ", err " << result->Error(); + res = -1; + break; + } + + std::string key = result->Key(); + std::string val = result->Value(); + + std::string str = key.substr(range_path.length() + 1); + std::size_t pos = str.find('/'); + std::string src_table_name = str.substr(0, pos); + + if (tables_map.find(src_table_name) == tables_map.cend()) { + result->Next(); + continue; + } + + std::string has_done = val.substr(0, 1); + if (has_done != "1") { + LOG(WARNING) << "still have range not finish diff running"; + res = -1; + break; + } + + if (stat_res->find(src_table_name) == stat_res->cend()) { + (*stat_res)[src_table_name].reset(); + } + + pos = val.find(':'); + std::string stat_res_str = val.substr(0, pos); + std::vector res_datas; + SplitString(stat_res_str, ",", &res_datas); + (*stat_res)[src_table_name].only_in_src += strtoul(res_datas[1].c_str(), NULL, 10); + (*stat_res)[src_table_name].only_in_dest += strtoul(res_datas[2].c_str(), NULL, 10); + (*stat_res)[src_table_name].both_have_but_diff += strtoul(res_datas[3].c_str(), NULL, 10); + (*stat_res)[src_table_name].both_have_and_same += strtoul(res_datas[4].c_str(), NULL, 10); + (*stat_res)[src_table_name].in_src_or_in_dest += strtoul(res_datas[5].c_str(), NULL, 10); + + result->Next(); + } + delete result; + return res; } +int DiffResultOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_diff_root_path; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::map tables_map; + std::string path = ins_cluster_root_path + "/" + FLAGS_dump_tables_map_path; + if (-1 == GetMapFromNexus(path, &ins_sdk, &tables_map)) { + LOG(WARNING) << "GetTablesMap failed in ShowProgress"; + return -1; + } + + CHECK(tables_map.size() > 0); + + std::map stat_res; + if (StatDiffResult(ins_cluster_root_path, &ins_sdk, tables_map, &stat_res) < 0) { + return -1; + } + printf("%-20s%20s%20s%20s%20s%20s%20s\n", "table_name", "only_in_src", "only_in_dest", + "both_have_but_diff", "both_have_and_same", "in_src_or_in_dest", "diff_rate"); + for (auto it = stat_res.cbegin(); it != stat_res.cend(); ++it) { + double diff_rate = 0.0; + if (it->second.in_src_or_in_dest) { + diff_rate = 1.0 - 1.0 * it->second.both_have_and_same / it->second.in_src_or_in_dest; + } + printf("%-20s%20lu%20lu%20lu%20lu%20lu%19.6f\n", it->first.c_str(), it->second.only_in_src, + it->second.only_in_dest, it->second.both_have_but_diff, it->second.both_have_and_same, + it->second.in_src_or_in_dest, diff_rate); + } + + return res; +} + +int main(int argc, char* argv[]) { + FLAGS_minloglevel = 2; + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_flagfile == "") { + FLAGS_flagfile = "../conf/terautil.flag"; + if (access(FLAGS_flagfile.c_str(), R_OK) != 0) { + FLAGS_flagfile = "./terautil.flag"; + } + utils::LoadFlagFile(FLAGS_flagfile); + } + + std::string log_prefix = "terautil"; + ::google::InitGoogleLogging(log_prefix.c_str()); + utils::SetupLog(log_prefix); + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } + Client::SetGlogIsInitialized(); + + if (argc == 2 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + } else if (argc > 2) { + std::string op(argv[1]); + std::string cmd(argv[2]); + if (op == "dump" && cmd == "prepare") { + return DumpPrepareOp(); + } else if (op == "dump" && cmd == "prepare_safe") { + return DumpPrepareSafeOp(); + } else if (op == "dump" && cmd == "load") { + return LoadTablesMapOp(); + } else if (op == "dump" && cmd == "prepare_tables") { + return DumpPrepareTablesOp(); + } else if (op == "dump" && cmd == "run") { + g_sem = new common::Semaphore(FLAGS_dump_concurrent_limit); + return DumpRunOp(); + } else if (op == "dump" && cmd == "rewrite") { + g_sem = new common::Semaphore(FLAGS_dump_concurrent_limit); + return DumpRewriteOp(); + } else if (op == "dump" && cmd == "read") { + if (argc != 4) { + std::cout << "leak argument" << std::endl; + return -1; + } + std::string afs_file_path(argv[3]); + return DumpReadOp(afs_file_path); + } else if (op == "dump" && cmd == "clean") { + return DumpCleanOp(); + } else if (op == "dump" && cmd == "progress") { + return DumpProgressOp(); + } else if (op == "dump" && cmd == "ut") { + return DumpUtOp(); + } else if (op == "diff" && cmd == "prepare") { + return DiffPrepareOp(); + } else if (op == "diff" && cmd == "run") { + g_sem = new common::Semaphore(FLAGS_write_only_in_src_to_dest_concurrent_limit); + return DiffRunOp(); + } else if (op == "diff" && cmd == "scan_dest") { + return DiffScanDestOp(); + } else if (op == "diff" && cmd == "progress") { + return DiffProgressOp(); + } else if (op == "diff" && cmd == "result") { + return DiffResultOp(); + } else if (op == "diff" && cmd == "clean") { + return DiffCleanOp(); + } else { + HelpOp(argc, argv); + return -1; + } + } else { + HelpOp(argc, argv); + return -1; + } + return 0; +} diff --git a/src/timeoracle/bench/timeoracle_bench.cc b/src/timeoracle/bench/timeoracle_bench.cc index 9d388375f..326a34779 100644 --- a/src/timeoracle/bench/timeoracle_bench.cc +++ b/src/timeoracle/bench/timeoracle_bench.cc @@ -14,41 +14,41 @@ using namespace tera::timeoracle; std::shared_ptr g_thread_pool; - void worker() { - tera::sdk::ClusterFinder* cluster_finder = sdk::NewTimeoracleClusterFinder(); - if (!cluster_finder) { - std::cerr << "Create cluster failed, use -h to see configuer items\n"; - exit(0); - } - tera::timeoracle::TimeoracleClientImpl client(g_thread_pool.get(), cluster_finder); - - while (true) { - int64_t st = client.GetTimestamp(1); - if (st <= 0) { - std::cout << "rpc failed" << std::endl; - ThisThread::Sleep(200); - } + tera::sdk::ClusterFinder* cluster_finder = sdk::NewTimeoracleClusterFinder(); + if (!cluster_finder) { + std::cerr << "Create cluster failed, use -h to see configuer items\n"; + exit(0); + } + tera::timeoracle::TimeoracleClientImpl client(g_thread_pool.get(), cluster_finder); + + while (true) { + int64_t st = client.GetTimestamp(1); + if (st <= 0) { + std::cout << "rpc failed" << std::endl; + ThisThread::Sleep(200); } + } } int main(int argc, char** argv) { - if (argc > 1 && (std::string(argv[1]) == "-h" || std::string(argv[1]) == "help")) { - std::cout << argv[0] << " --client_thread_num= \n" - << " and zk/ins configures should be set in tera.flag or via command line" << std::endl; - return 0; - } - ::google::ParseCommandLineFlags(&argc, &argv, true); - g_thread_pool.reset(new common::ThreadPool(FLAGS_client_thread_num + 1)); + if (argc > 1 && (std::string(argv[1]) == "-h" || std::string(argv[1]) == "help")) { + std::cout << argv[0] << " --client_thread_num= \n" + << " and zk/ins configures should be set in tera.flag or via " + "command line" << std::endl; + return 0; + } + ::google::ParseCommandLineFlags(&argc, &argv, true); + g_thread_pool.reset(new common::ThreadPool(FLAGS_client_thread_num + 1)); - std::vector thread_list; - for (int64_t i = 0; i < FLAGS_client_thread_num; ++i) { - thread_list.push_back(std::thread(&worker)); - } + std::vector thread_list; + for (int64_t i = 0; i < FLAGS_client_thread_num; ++i) { + thread_list.push_back(std::thread(&worker)); + } - for (auto& th : thread_list) { - th.join(); - } + for (auto& th : thread_list) { + th.join(); + } - return 0; + return 0; } diff --git a/src/timeoracle/remote_timeoracle.h b/src/timeoracle/remote_timeoracle.h index 588bd0547..20e3206d9 100644 --- a/src/timeoracle/remote_timeoracle.h +++ b/src/timeoracle/remote_timeoracle.h @@ -14,60 +14,57 @@ namespace tera { namespace timeoracle { class ClosureGuard { -public: - ClosureGuard(::google::protobuf::Closure* done) : done_(done) { - } + public: + ClosureGuard(::google::protobuf::Closure* done) : done_(done) {} - ~ClosureGuard() { - if (done_) { - done_->Run(); - } + ~ClosureGuard() { + if (done_) { + done_->Run(); } + } - ::google::protobuf::Closure* release() { - auto done = done_; - done_ = nullptr; - return done; - } + ::google::protobuf::Closure* release() { + auto done = done_; + done_ = nullptr; + return done; + } -private: - ClosureGuard(const ClosureGuard&) = delete; -private: - ::google::protobuf::Closure* done_; + private: + ClosureGuard(const ClosureGuard&) = delete; + + private: + ::google::protobuf::Closure* done_; }; class RemoteTimeoracle : public TimeoracleServer { -public: - RemoteTimeoracle(int64_t start_timestamp) : timeoracle_(start_timestamp) { - } + public: + RemoteTimeoracle(int64_t start_timestamp) : timeoracle_(start_timestamp) {} - virtual void GetTimestamp(::google::protobuf::RpcController* controller, - const ::tera::GetTimestampRequest* request, - ::tera::GetTimestampResponse* response, - ::google::protobuf::Closure* done) { - ClosureGuard closure_guard(done); - - int64_t count = request->count(); - int64_t start_timestamp = timeoracle_.GetTimestamp(count); - - if (start_timestamp) { - response->set_start_timestamp(start_timestamp); - response->set_count(count); - response->set_status(kTimeoracleOk); - } else { - response->set_status(kTimeoracleBusy); - } - } + virtual void GetTimestamp(::google::protobuf::RpcController* controller, + const ::tera::GetTimestampRequest* request, + ::tera::GetTimestampResponse* response, + ::google::protobuf::Closure* done) { + ClosureGuard closure_guard(done); + + int64_t count = request->count(); + int64_t start_timestamp = timeoracle_.GetTimestamp(count); - Timeoracle* GetTimeoracle() { - return &timeoracle_; + if (start_timestamp) { + response->set_start_timestamp(start_timestamp); + response->set_count(count); + response->set_status(kTimeoracleOk); + } else { + response->set_status(kTimeoracleBusy); } + } + + Timeoracle* GetTimeoracle() { return &timeoracle_; } -private: - Timeoracle timeoracle_; + private: + Timeoracle timeoracle_; }; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera -#endif // TERA_TIMEORACLE_REMOTE_TIMEORACLE_H +#endif // TERA_TIMEORACLE_REMOTE_TIMEORACLE_H diff --git a/src/timeoracle/test/timeoracle_test.cc b/src/timeoracle/test/timeoracle_test.cc index e7b6f4472..f2adde26a 100644 --- a/src/timeoracle/test/timeoracle_test.cc +++ b/src/timeoracle/test/timeoracle_test.cc @@ -18,61 +18,60 @@ DECLARE_string(tera_fake_zk_path_prefix); namespace tera { namespace timeoracle { -class TimeoracleTest: public ::testing::Test { -public: +class TimeoracleTest : public ::testing::Test { + public: }; TEST_F(TimeoracleTest, UniqueTimestampMsTest) { - int64_t ts0 = Timeoracle::UniqueTimestampMs(); - for (int i = 0; i < 10000; ++i) { - int64_t ts = Timeoracle::UniqueTimestampMs(); - EXPECT_LT(ts0, ts); - ts0 = ts; - } + int64_t ts0 = Timeoracle::UniqueTimestampMs(); + for (int i = 0; i < 10000; ++i) { + int64_t ts = Timeoracle::UniqueTimestampMs(); + EXPECT_LT(ts0, ts); + ts0 = ts; + } } TEST_F(TimeoracleTest, TimeoracleFunc) { - Timeoracle to(1024LL); + Timeoracle to(1024LL); - auto tmp = to.GetTimestamp(10LL); - EXPECT_EQ(tmp, 0); + auto tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 0); - tmp = to.UpdateLimitTimestamp(10LL); - EXPECT_EQ(tmp, 10); + tmp = to.UpdateLimitTimestamp(10LL); + EXPECT_EQ(tmp, 10); - tmp = to.GetTimestamp(10LL); - EXPECT_EQ(tmp, 0); + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 0); - tmp = to.UpdateLimitTimestamp(2000LL); - EXPECT_EQ(tmp, 2000); + tmp = to.UpdateLimitTimestamp(2000LL); + EXPECT_EQ(tmp, 2000); - tmp = to.GetTimestamp(10LL); - EXPECT_EQ(tmp, 1044); + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 1044); - tmp = to.GetTimestamp(10LL); - EXPECT_EQ(tmp, 1054); + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 1054); - EXPECT_EQ(to.GetStartTimestamp(), 1064); + EXPECT_EQ(to.GetStartTimestamp(), 1064); - tmp = to.UpdateStartTimestamp(); + tmp = to.UpdateStartTimestamp(); - EXPECT_GT(tmp, 1064); + EXPECT_GT(tmp, 1064); - auto new_ts = to.GetTimestamp(10LL); - EXPECT_EQ(new_ts, 0); + auto new_ts = to.GetTimestamp(10LL); + EXPECT_EQ(new_ts, 0); } -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); - FLAGS_tera_coord_type = "fake_zk"; - FLAGS_tera_leveldb_env_type = "local"; - - tera::utils::SetupLog("timeorcale_test"); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + FLAGS_tera_coord_type = "fake_zk"; + FLAGS_tera_leveldb_env_type = "local"; + + tera::utils::SetupLog("timeorcale_test"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } - diff --git a/src/timeoracle/timeoracle.cc b/src/timeoracle/timeoracle.cc index 9d755445b..1300cd362 100644 --- a/src/timeoracle/timeoracle.cc +++ b/src/timeoracle/timeoracle.cc @@ -7,7 +7,7 @@ namespace tera { namespace timeoracle { -std::atomic Timeoracle::s_last_timestamp_ms; +std::atomic Timeoracle::s_last_timestamp_ms; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle.h b/src/timeoracle/timeoracle.h index eb690de56..d6febe527 100644 --- a/src/timeoracle/timeoracle.h +++ b/src/timeoracle/timeoracle.h @@ -16,109 +16,101 @@ namespace timeoracle { constexpr int64_t kTimestampPerMilliSecond = 10000ULL; constexpr int64_t kTimestampPerSecond = kTimestampPerMilliSecond * 1000ULL; -constexpr int64_t kBaseTimestampMilliSecond = 1483200000000ULL; // 20170101 00:00 +constexpr int64_t kBaseTimestampMilliSecond = 1483200000000ULL; // 20170101 00:00 inline int64_t clock_realtime_ms() { - struct timespec tp; - ::clock_gettime(CLOCK_REALTIME, &tp); - return tp.tv_sec * 1000ULL + tp.tv_nsec / 1000000ULL - kBaseTimestampMilliSecond; + struct timespec tp; + ::clock_gettime(CLOCK_REALTIME, &tp); + return tp.tv_sec * 1000ULL + tp.tv_nsec / 1000000ULL - kBaseTimestampMilliSecond; } class Timeoracle { -public: - Timeoracle(int64_t start_timestamp) : start_timestamp_(start_timestamp), - limit_timestamp_(0) { - } - - // if num == 0, see next timstamp - // if return 0, allocate timestamp failed - int64_t GetTimestamp(int64_t num) { - int64_t start_timestamp = start_timestamp_.fetch_add(num); + public: + Timeoracle(int64_t start_timestamp) : start_timestamp_(start_timestamp), limit_timestamp_(0) {} - if ((start_timestamp + num) >= limit_timestamp_) { - return 0; - } + // if num == 0, see next timstamp + // if return 0, allocate timestamp failed + int64_t GetTimestamp(int64_t num) { + int64_t start_timestamp = start_timestamp_.fetch_add(num); - return start_timestamp; + if ((start_timestamp + num) >= limit_timestamp_) { + return 0; } - int64_t UpdateLimitTimestamp(int64_t limit_timestamp) { - if (limit_timestamp > limit_timestamp_) { - limit_timestamp_ = limit_timestamp; - } else { - LOG(ERROR) << "update limit timestamp failed, limit_timestamp_=" << limit_timestamp_ - << ",update to " << limit_timestamp; - return 0; - } - return limit_timestamp; - } + return start_timestamp; + } - int64_t UpdateStartTimestamp() { - const int64_t cur_timestamp = CurrentTimestamp(); - - int64_t start_timestamp = 0; - while (1) { - start_timestamp = start_timestamp_; - if (start_timestamp < cur_timestamp) { - if (start_timestamp_.compare_exchange_strong(start_timestamp, cur_timestamp)) { - return cur_timestamp; - } - continue; - } - - int64_t limit_timestamp = limit_timestamp_; - if (start_timestamp > limit_timestamp) { - if (start_timestamp_.compare_exchange_strong(start_timestamp, limit_timestamp)) { - LOG(WARNING) << "adjust start timestamp to limit timestamp " << limit_timestamp; - return limit_timestamp; - } - continue; - } - - break; + int64_t UpdateLimitTimestamp(int64_t limit_timestamp) { + if (limit_timestamp > limit_timestamp_) { + limit_timestamp_ = limit_timestamp; + } else { + LOG(ERROR) << "update limit timestamp failed, limit_timestamp_=" << limit_timestamp_ + << ",update to " << limit_timestamp; + return 0; + } + return limit_timestamp; + } + + int64_t UpdateStartTimestamp() { + const int64_t cur_timestamp = CurrentTimestamp(); + + int64_t start_timestamp = 0; + while (1) { + start_timestamp = start_timestamp_; + if (start_timestamp < cur_timestamp) { + if (start_timestamp_.compare_exchange_strong(start_timestamp, cur_timestamp)) { + return cur_timestamp; } + continue; + } + + int64_t limit_timestamp = limit_timestamp_; + if (start_timestamp > limit_timestamp) { + if (start_timestamp_.compare_exchange_strong(start_timestamp, limit_timestamp)) { + LOG(WARNING) << "adjust start timestamp to limit timestamp " << limit_timestamp; + return limit_timestamp; + } + continue; + } - LOG(INFO) << "ignore to adjust start timestamp, current timestamp is " << cur_timestamp; - return start_timestamp; + break; } - int64_t GetStartTimestamp() const { - return start_timestamp_; - } + LOG(INFO) << "ignore to adjust start timestamp, current timestamp is " << cur_timestamp; + return start_timestamp; + } - int64_t GetLimitTimestamp() const { - return limit_timestamp_; - } + int64_t GetStartTimestamp() const { return start_timestamp_; } -private: - std::atomic start_timestamp_; - std::atomic limit_timestamp_; + int64_t GetLimitTimestamp() const { return limit_timestamp_; } -public: - static int64_t UniqueTimestampMs() { - while (true) { - int64_t ts = clock_realtime_ms(); - int64_t last_timestamp_ms = s_last_timestamp_ms; + private: + std::atomic start_timestamp_; + std::atomic limit_timestamp_; - if (ts <= last_timestamp_ms) { - return s_last_timestamp_ms.fetch_add(1) + 1; - } + public: + static int64_t UniqueTimestampMs() { + while (true) { + int64_t ts = clock_realtime_ms(); + int64_t last_timestamp_ms = s_last_timestamp_ms; - if (s_last_timestamp_ms.compare_exchange_strong(last_timestamp_ms, ts)) { - return ts; - } - } - } + if (ts <= last_timestamp_ms) { + return s_last_timestamp_ms.fetch_add(1) + 1; + } - static int64_t CurrentTimestamp() { - return UniqueTimestampMs() * kTimestampPerMilliSecond; + if (s_last_timestamp_ms.compare_exchange_strong(last_timestamp_ms, ts)) { + return ts; + } } + } + + static int64_t CurrentTimestamp() { return UniqueTimestampMs() * kTimestampPerMilliSecond; } -private: - static std::atomic s_last_timestamp_ms; + private: + static std::atomic s_last_timestamp_ms; }; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera -#endif // TERA_TIMEORACLE_TIMEORACLE_H_ +#endif // TERA_TIMEORACLE_TIMEORACLE_H_ diff --git a/src/timeoracle/timeoracle_entry.cc b/src/timeoracle/timeoracle_entry.cc index 8bff587ad..e0eda719c 100644 --- a/src/timeoracle/timeoracle_entry.cc +++ b/src/timeoracle/timeoracle_entry.cc @@ -26,149 +26,147 @@ DECLARE_string(tera_coord_type); namespace tera { namespace timeoracle { -TimeoracleEntry::TimeoracleEntry() : - remote_timeoracle_(nullptr), - startup_timestamp_(0), - need_quit_(false) { - sofa::pbrpc::RpcServerOptions rpc_options; - rpc_options.work_thread_num = FLAGS_tera_timeoracle_work_thread_num; - rpc_options.io_service_pool_size = FLAGS_tera_timeoracle_io_service_pool_size; - rpc_options.no_delay = false; //use Nagle's Algorithm - rpc_options.write_buffer_base_block_factor = 0; //64Bytes per malloc - rpc_options.read_buffer_base_block_factor = 7; //8kBytes per malloc - sofa_pbrpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); - - if (FLAGS_tera_local_addr.empty()) { - local_addr_ = utils::GetLocalHostName()+ ":" + FLAGS_tera_timeoracle_port; - } else { - local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_timeoracle_port; - } +TimeoracleEntry::TimeoracleEntry() + : remote_timeoracle_(nullptr), startup_timestamp_(0), need_quit_(false) { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.work_thread_num = FLAGS_tera_timeoracle_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_tera_timeoracle_io_service_pool_size; + rpc_options.no_delay = false; // use Nagle's Algorithm + rpc_options.write_buffer_base_block_factor = 0; // 64Bytes per malloc + rpc_options.read_buffer_base_block_factor = 7; // 8kBytes per malloc + sofa_pbrpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + + if (FLAGS_tera_local_addr.empty()) { + local_addr_ = utils::GetLocalHostName() + ":" + FLAGS_tera_timeoracle_port; + } else { + local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_timeoracle_port; + } } bool TimeoracleEntry::Start() { - if (!InitZKAdaptor()) { - return false; - } - - int64_t current_timestamp = Timeoracle::CurrentTimestamp(); - if (startup_timestamp_ < current_timestamp) { - startup_timestamp_ = current_timestamp; - } else { - LOG(WARNING) << "startup timestamp big than current timestamp," - << "startup timestamp is " << startup_timestamp_ - << "current timestamp is " << current_timestamp; - } - - LOG(INFO) << "set startup timestamp to " << startup_timestamp_; - - if (!StartServer()) { - return false; - } - - return true; + if (!InitZKAdaptor()) { + return false; + } + + int64_t current_timestamp = Timeoracle::CurrentTimestamp(); + if (startup_timestamp_ < current_timestamp) { + startup_timestamp_ = current_timestamp; + } else { + LOG(WARNING) << "startup timestamp big than current timestamp," + << "startup timestamp is " << startup_timestamp_ << "current timestamp is " + << current_timestamp; + } + + LOG(INFO) << "set startup timestamp to " << startup_timestamp_; + + if (!StartServer()) { + return false; + } + + return true; } TimeoracleEntry::~TimeoracleEntry() { - need_quit_ = true; - if (lease_thread_.joinable()) { - lease_thread_.join(); - } + need_quit_ = true; + if (lease_thread_.joinable()) { + lease_thread_.join(); + } } bool TimeoracleEntry::InitZKAdaptor() { - if (FLAGS_tera_timeoracle_mock_enabled) { - LOG(INFO) << "mock mode" ; - zk_adapter_.reset(new TimeoracleMockAdapter(local_addr_)); - } else if (FLAGS_tera_coord_type == "zk") { - LOG(INFO) << "zk mode" ; - zk_adapter_.reset(new TimeoracleZkAdapter(local_addr_)); - } else if (FLAGS_tera_coord_type == "ins") { - LOG(INFO) << "ins mode" ; - zk_adapter_.reset(new TimeoracleInsAdapter(local_addr_)); - } else { - LOG(FATAL) << "invalid configure for coord service, please check " - << "--tera_timeoracle_mock_enabled=true or " - << "--tera_coord_type=zk|ins"; - assert(0); - } - - return zk_adapter_->Init(&startup_timestamp_); + if (FLAGS_tera_timeoracle_mock_enabled) { + LOG(INFO) << "mock mode"; + zk_adapter_.reset(new TimeoracleMockAdapter(local_addr_)); + } else if (FLAGS_tera_coord_type == "zk") { + LOG(INFO) << "zk mode"; + zk_adapter_.reset(new TimeoracleZkAdapter(local_addr_)); + } else if (FLAGS_tera_coord_type == "ins") { + LOG(INFO) << "ins mode"; + zk_adapter_.reset(new TimeoracleInsAdapter(local_addr_)); + } else { + LOG(FATAL) << "invalid configure for coord service, please check " + << "--tera_timeoracle_mock_enabled=true or " + << "--tera_coord_type=zk|ins"; + assert(0); + } + + return zk_adapter_->Init(&startup_timestamp_); } bool TimeoracleEntry::StartServer() { - IpAddress timeoracle_addr("0.0.0.0", FLAGS_tera_timeoracle_port); - LOG(INFO) << "Start timeoracle RPC server at: " << timeoracle_addr.ToString(); + IpAddress timeoracle_addr("0.0.0.0", FLAGS_tera_timeoracle_port); + LOG(INFO) << "Start timeoracle RPC server at: " << timeoracle_addr.ToString(); - remote_timeoracle_ = new RemoteTimeoracle(startup_timestamp_); - std::thread lease_thread(&TimeoracleEntry::LeaseThread, this); - lease_thread_ = std::move(lease_thread); + remote_timeoracle_ = new RemoteTimeoracle(startup_timestamp_); + std::thread lease_thread(&TimeoracleEntry::LeaseThread, this); + lease_thread_ = std::move(lease_thread); - auto timeoracle = remote_timeoracle_->GetTimeoracle(); + auto timeoracle = remote_timeoracle_->GetTimeoracle(); - while (startup_timestamp_ < timeoracle->GetLimitTimestamp()) { - if (need_quit_) { - return false; - } - ThisThread::Sleep(100); + while (startup_timestamp_ < timeoracle->GetLimitTimestamp()) { + if (need_quit_) { + return false; } + ThisThread::Sleep(100); + } - sofa_pbrpc_server_->RegisterService(remote_timeoracle_); - if (!sofa_pbrpc_server_->Start(timeoracle_addr.ToString())) { - LOG(ERROR) << "start timeoracle RPC server error"; - return false; - } + sofa_pbrpc_server_->RegisterService(remote_timeoracle_); + if (!sofa_pbrpc_server_->Start(timeoracle_addr.ToString())) { + LOG(ERROR) << "start timeoracle RPC server error"; + return false; + } - LOG(INFO) << "finish start timeoracle RPC server"; - return true; + LOG(INFO) << "finish start timeoracle RPC server"; + return true; } bool TimeoracleEntry::Run() { - if (need_quit_) { - return false; - } + if (need_quit_) { + return false; + } - int64_t start_timestamp = remote_timeoracle_->GetTimeoracle()->UpdateStartTimestamp(); + int64_t start_timestamp = remote_timeoracle_->GetTimeoracle()->UpdateStartTimestamp(); - VLOG(100) << "adjust start timestamp finished, start timestmap is " << start_timestamp; + VLOG(100) << "adjust start timestamp finished, start timestmap is " << start_timestamp; - ThisThread::Sleep(1000); - return true; + ThisThread::Sleep(1000); + return true; } void TimeoracleEntry::ShutdownServer() { - need_quit_ = true; - sofa_pbrpc_server_->Stop(); + need_quit_ = true; + sofa_pbrpc_server_->Stop(); } void TimeoracleEntry::LeaseThread() { - auto timeoracle = remote_timeoracle_->GetTimeoracle(); - - while (!need_quit_) { - int64_t start_timestamp = timeoracle->GetStartTimestamp(); - int64_t limit_timestamp = timeoracle->GetLimitTimestamp(); - int64_t refresh_lease_timestamp = - FLAGS_tera_timeoracle_refresh_lease_second * kTimestampPerSecond; + auto timeoracle = remote_timeoracle_->GetTimeoracle(); - if (start_timestamp + refresh_lease_timestamp >= limit_timestamp) { - // need to require lease - if (limit_timestamp < start_timestamp) { - limit_timestamp = start_timestamp; - } + while (!need_quit_) { + int64_t start_timestamp = timeoracle->GetStartTimestamp(); + int64_t limit_timestamp = timeoracle->GetLimitTimestamp(); + int64_t refresh_lease_timestamp = + FLAGS_tera_timeoracle_refresh_lease_second * kTimestampPerSecond; - int64_t next_limit_timestamp = - limit_timestamp + FLAGS_tera_timeoracle_max_lease_second * kTimestampPerSecond; + if (start_timestamp + refresh_lease_timestamp >= limit_timestamp) { + // need to require lease + if (limit_timestamp < start_timestamp) { + limit_timestamp = start_timestamp; + } - if (!zk_adapter_->UpdateTimestamp(next_limit_timestamp)) { - need_quit_ = true; - return; - } + int64_t next_limit_timestamp = + limit_timestamp + FLAGS_tera_timeoracle_max_lease_second * kTimestampPerSecond; - timeoracle->UpdateLimitTimestamp(next_limit_timestamp); - } + if (!zk_adapter_->UpdateTimestamp(next_limit_timestamp)) { + need_quit_ = true; + return; + } - ThisThread::Sleep(1000); + timeoracle->UpdateLimitTimestamp(next_limit_timestamp); } + + ThisThread::Sleep(1000); + } } -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle_entry.h b/src/timeoracle/timeoracle_entry.h index 356ae452a..9f78c2f5c 100644 --- a/src/timeoracle/timeoracle_entry.h +++ b/src/timeoracle/timeoracle_entry.h @@ -7,7 +7,7 @@ #include -#include "tera_entry.h" +#include "tera/tera_entry.h" #include #include #include @@ -19,31 +19,30 @@ class RemoteTimeoracle; class TimeoracleZkAdapterBase; class TimeoracleEntry : public TeraEntry { -public: - TimeoracleEntry(); - ~TimeoracleEntry(); - - - virtual bool Start() override; - virtual bool Run() override; - virtual void ShutdownServer() override; - -private: - bool InitZKAdaptor(); - bool StartServer(); - void LeaseThread(); - -private: - std::string local_addr_; - RemoteTimeoracle* remote_timeoracle_; - std::unique_ptr sofa_pbrpc_server_; - int64_t startup_timestamp_; - std::unique_ptr zk_adapter_; - std::thread lease_thread_; - std::atomic need_quit_; + public: + TimeoracleEntry(); + ~TimeoracleEntry(); + + virtual bool Start() override; + virtual bool Run() override; + virtual void ShutdownServer() override; + + private: + bool InitZKAdaptor(); + bool StartServer(); + void LeaseThread(); + + private: + std::string local_addr_; + RemoteTimeoracle* remote_timeoracle_; + std::unique_ptr sofa_pbrpc_server_; + int64_t startup_timestamp_; + std::unique_ptr zk_adapter_; + std::thread lease_thread_; + std::atomic need_quit_; }; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera -#endif // TERA_TIMEORACLE_TIMEORACLE_ENTRY_H_ +#endif // TERA_TIMEORACLE_TIMEORACLE_ENTRY_H_ diff --git a/src/timeoracle/timeoracle_zk_adapter.cc b/src/timeoracle/timeoracle_zk_adapter.cc index 58dd4a554..87ddc86b0 100644 --- a/src/timeoracle/timeoracle_zk_adapter.cc +++ b/src/timeoracle/timeoracle_zk_adapter.cc @@ -27,451 +27,443 @@ namespace tera { namespace timeoracle { void TimeoracleZkAdapterBase::OnNodeValueChanged(const std::string& path, - const std::string& value) { - LOG(INFO) << "zk OnNodeValueChanged, path=" << path; + const std::string& value) { + LOG(INFO) << "zk OnNodeValueChanged, path=" << path; } void TimeoracleZkAdapterBase::OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) { - LOG(INFO) << "zk OnChildrenChanged, path=" << path; + const std::vector& name_list, + const std::vector& data_list) { + LOG(INFO) << "zk OnChildrenChanged, path=" << path; } void TimeoracleZkAdapterBase::OnNodeCreated(const std::string& path) { - LOG(INFO) << "zk OnNodeCreated, path=" << path; + LOG(INFO) << "zk OnNodeCreated, path=" << path; } void TimeoracleZkAdapterBase::OnNodeDeleted(const std::string& path) { - LOG(INFO) << "zk OnNodeDeleted, path=" << path; - Finalize(); - _Exit(EXIT_FAILURE); + LOG(INFO) << "zk OnNodeDeleted, path=" << path; + Finalize(); + _Exit(EXIT_FAILURE); } -void TimeoracleZkAdapterBase::OnWatchFailed(const std::string& path, int watch_type, - int err) { - LOG(INFO) << "zk OnWatchFailed, path=" << path; - Finalize(); - _Exit(EXIT_FAILURE); +void TimeoracleZkAdapterBase::OnWatchFailed(const std::string& path, int watch_type, int err) { + LOG(INFO) << "zk OnWatchFailed, path=" << path; + Finalize(); + _Exit(EXIT_FAILURE); } void TimeoracleZkAdapterBase::OnSessionTimeout() { - LOG(ERROR) << "zk session timeout!"; - _Exit(EXIT_FAILURE); + LOG(ERROR) << "zk session timeout!"; + _Exit(EXIT_FAILURE); } -TimeoracleZkAdapter::~TimeoracleZkAdapter() { -} +TimeoracleZkAdapter::~TimeoracleZkAdapter() {} bool TimeoracleZkAdapter::Init(int64_t* last_timestamp) { - if (!InitZk()) { - return false; - } + if (!InitZk()) { + return false; + } - if (!LockTimeoracleLock()) { - return false; - } + if (!LockTimeoracleLock()) { + return false; + } - if (ReadTimestamp(last_timestamp)) { - LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; - return CreateTimeoracleNode(); - } + if (ReadTimestamp(last_timestamp)) { + LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; + return CreateTimeoracleNode(); + } - return false; + return false; } bool TimeoracleZkAdapter::CreateTimeoracleNode() { - LOG(INFO) << "try create timeoracle nod,path=" << kTimeoracleNodePath; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!CreateEphemeralNode(kTimeoracleNodePath, server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to create timeoracle node"; - return false; - } - LOG(ERROR) << "retry create timeoracle node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "create timeoracle node success"; - return true; + LOG(INFO) << "try create timeoracle nod,path=" << kTimeoracleNodePath; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CreateEphemeralNode(kTimeoracleNodePath, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create timeoracle node"; + return false; + } + LOG(ERROR) << "retry create timeoracle node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "create timeoracle node success"; + return true; } bool TimeoracleZkAdapter::InitZk() { - LOG(INFO) << "try to init zk,zk_addr_list=" << FLAGS_tera_zk_addr_list - << ",zk_root_path=" << FLAGS_tera_zk_root_path; - int zk_errno = zk::ZE_OK; - int32_t retry_count = 0; - while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, - FLAGS_tera_zk_root_path, - FLAGS_tera_zk_timeout, - server_addr_, &zk_errno)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); - return false; - } - LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) - << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " - << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "init zk success"; - return true; + LOG(INFO) << "try to init zk,zk_addr_list=" << FLAGS_tera_zk_addr_list + << ",zk_root_path=" << FLAGS_tera_zk_root_path; + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) << ". retry in " + << FLAGS_tera_zk_retry_period << " ms, retry: " << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + return true; } bool TimeoracleZkAdapter::LockTimeoracleLock() { - LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!SyncLock(kTimeoracleLockPath, &zk_errno, -1)) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to acquire timeoracle lock"; - return false; - } - LOG(ERROR) << "retry lock timeoracle lock in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - LOG(INFO) << "acquire timeoracle lock success"; - return true; + LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!SyncLock(kTimeoracleLockPath, &zk_errno, -1)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to acquire timeoracle lock"; + return false; + } + LOG(ERROR) << "retry lock timeoracle lock in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "acquire timeoracle lock success"; + return true; } bool TimeoracleZkAdapter::ReadTimestamp(int64_t* timestamp) { - LOG(INFO) << "try to read timestamp, path=" << kTimeoracleTimestampPath; - - std::string timestamp_str; - int32_t retry_count = 0; - int zk_errno = zk::ZE_OK; - while (!ReadNode(kTimeoracleTimestampPath, ×tamp_str, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(ERROR) << "fail to read timestamp node"; - return false; - } - LOG(ERROR) << "retry read timestamp node in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - } - if (zk_errno == zk::ZE_NOT_EXIST) { - *timestamp = 0; - return true; - } + LOG(INFO) << "try to read timestamp, path=" << kTimeoracleTimestampPath; + + std::string timestamp_str; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!ReadNode(kTimeoracleTimestampPath, ×tamp_str, &zk_errno) && + zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to read timestamp node"; + return false; + } + LOG(ERROR) << "retry read timestamp node in " << FLAGS_tera_zk_retry_period + << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + if (zk_errno == zk::ZE_NOT_EXIST) { + *timestamp = 0; + return true; + } - char * pEnd = nullptr; - *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); - if (*pEnd != '\0') { - // TODO (chenzongjia) - LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; - return false; - } + char* pEnd = nullptr; + *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; + return false; + } - LOG(INFO) << "read timestamp value=" << timestamp_str; + LOG(INFO) << "read timestamp value=" << timestamp_str; - return true; + return true; } bool TimeoracleZkAdapter::UpdateTimestamp(int64_t timestamp) { - char timestamp_str[64]; - snprintf(timestamp_str, sizeof(timestamp_str), "%lu", timestamp); - LOG(INFO) << "try to update timestamp to " << timestamp; - int zk_errno = zk::ZE_OK; - while (!WriteNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno) - && zk_errno != zk::ZE_NOT_EXIST) { + char timestamp_str[64]; + snprintf(timestamp_str, sizeof(timestamp_str), "%lu", timestamp); + LOG(INFO) << "try to update timestamp to " << timestamp; + int zk_errno = zk::ZE_OK; + while (!WriteNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno) && + zk_errno != zk::ZE_NOT_EXIST) { + return false; + /* + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(INFO) << "fail to update timestamp"; return false; - /* - if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { - LOG(INFO) << "fail to update timestamp"; - return false; - } - LOG(ERROR) << "retry update timestamp in " - << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; - ThisThread::Sleep(FLAGS_tera_zk_retry_period); - zk_errno = zk::ZE_OK; - */ - } - if (zk_errno == zk::ZE_OK) { - LOG(INFO) << "update zk path=" << kTimeoracleTimestampPath << " to " - << timestamp_str << " success."; - return true; } - - LOG(INFO) << "timestamp node not exist, try create timestamp node"; + LOG(ERROR) << "retry update timestamp in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); zk_errno = zk::ZE_OK; - while (!CreatePersistentNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno)) { - return false; - } - LOG(INFO) << "create timestamp node success"; + */ + } + if (zk_errno == zk::ZE_OK) { + LOG(INFO) << "update zk path=" << kTimeoracleTimestampPath << " to " << timestamp_str + << " success."; return true; + } + LOG(INFO) << "timestamp node not exist, try create timestamp node"; + zk_errno = zk::ZE_OK; + while (!CreatePersistentNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno)) { + return false; + } + LOG(INFO) << "create timestamp node success"; + return true; } TimeoracleInsAdapter::~TimeoracleInsAdapter() { - if (ins_sdk_) { - std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; - galaxy::ins::sdk::SDKError err; - ins_sdk_->UnLock(lock_path, &err); - } + if (ins_sdk_) { + std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; + galaxy::ins::sdk::SDKError err; + ins_sdk_->UnLock(lock_path, &err); + } } bool TimeoracleInsAdapter::Init(int64_t* last_timestamp) { - if (!InitInsAndLock()) { - return false; - } + if (!InitInsAndLock()) { + return false; + } - if (ReadTimestamp(last_timestamp)) { - LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; - return CreateTimeoracleNode(); - } + if (ReadTimestamp(last_timestamp)) { + LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; + return CreateTimeoracleNode(); + } - return false; + return false; } bool TimeoracleInsAdapter::CreateTimeoracleNode() { - std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleNodePath; + std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleNodePath; - LOG(INFO) << "try write timeoracle nod,path=" << put_path; + LOG(INFO) << "try write timeoracle nod,path=" << put_path; - galaxy::ins::sdk::SDKError err; + galaxy::ins::sdk::SDKError err; - if (!ins_sdk_->Put(put_path, server_addr_, &err)) { - LOG(ERROR) << "update timestamp node, path=" << put_path << ",failed " - << ins_sdk_->ErrorToString(err); - return false; - } + if (!ins_sdk_->Put(put_path, server_addr_, &err)) { + LOG(ERROR) << "update timestamp node, path=" << put_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } - LOG(INFO) << "update timeoracle node success"; - return true; + LOG(INFO) << "update timeoracle node success"; + return true; } -static void InsOnSessionTimeout(void * context) { - TimeoracleInsAdapter* ins_adp = static_cast(context); - ins_adp->OnSessionTimeout(); +static void InsOnSessionTimeout(void* context) { + TimeoracleInsAdapter* ins_adp = static_cast(context); + ins_adp->OnSessionTimeout(); } static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, galaxy::ins::sdk::SDKError error) { - TimeoracleInsAdapter* ins_adp = static_cast(param.context); - ins_adp->OnLockChange(param.value, param.deleted); + TimeoracleInsAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); } bool TimeoracleInsAdapter::InitInsAndLock() { - MutexLock lock(&mutex_); - LOG(INFO) << "try to init ins,ins_addr_list=" << FLAGS_tera_ins_addr_list - << ",ins_root_path=" << FLAGS_tera_ins_root_path; - ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); - ins_sdk_->SetTimeoutTime(FLAGS_tera_master_ins_session_timeout); + MutexLock lock(&mutex_); + LOG(INFO) << "try to init ins,ins_addr_list=" << FLAGS_tera_ins_addr_list + << ",ins_root_path=" << FLAGS_tera_ins_root_path; + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_master_ins_session_timeout); - std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; + std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; - galaxy::ins::sdk::SDKError err; + galaxy::ins::sdk::SDKError err; - ins_sdk_->RegisterSessionTimeout(InsOnSessionTimeout, this); + ins_sdk_->RegisterSessionTimeout(InsOnSessionTimeout, this); - if (!ins_sdk_->Lock(lock_path, &err)) { - LOG(ERROR) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " failed," - << ins_sdk_->ErrorToString(err); - return false; - } + if (!ins_sdk_->Lock(lock_path, &err)) { + LOG(ERROR) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } - LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " success"; + LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " success"; - if (!ins_sdk_->Watch(lock_path, InsOnLockChange, this, &err)) { - LOG(ERROR) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " failed," - << ins_sdk_->ErrorToString(err); - return false; - } + if (!ins_sdk_->Watch(lock_path, InsOnLockChange, this, &err)) { + LOG(ERROR) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } - LOG(INFO) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " success"; + LOG(INFO) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " success"; - return true; + return true; } bool TimeoracleInsAdapter::ReadTimestamp(int64_t* timestamp) { - std::string read_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; + std::string read_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; - LOG(INFO) << "try to read timestamp, path=" << read_path; - - std::string timestamp_str; - galaxy::ins::sdk::SDKError err; + LOG(INFO) << "try to read timestamp, path=" << read_path; - if (!ins_sdk_->Get(read_path, ×tamp_str, &err)) { - if (err == galaxy::ins::sdk::SDKError::kNoSuchKey) { - *timestamp = 0; - return true; - } + std::string timestamp_str; + galaxy::ins::sdk::SDKError err; - LOG(ERROR) << "try to read timestamp, path=" << read_path << ",failed " - << ins_sdk_->ErrorToString(err); - return false; + if (!ins_sdk_->Get(read_path, ×tamp_str, &err)) { + if (err == galaxy::ins::sdk::SDKError::kNoSuchKey) { + *timestamp = 0; + return true; } - char * pEnd = nullptr; - *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); - if (*pEnd != '\0') { - // TODO (chenzongjia) - LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; - return false; - } + LOG(ERROR) << "try to read timestamp, path=" << read_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } - LOG(INFO) << "read timestamp value=" << timestamp_str; - return true; + char* pEnd = nullptr; + *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; + return false; + } + + LOG(INFO) << "read timestamp value=" << timestamp_str; + return true; } bool TimeoracleInsAdapter::UpdateTimestamp(int64_t timestamp) { - char buf[64]; - snprintf(buf, sizeof(buf), "%lu", timestamp); - LOG(INFO) << "try to update timestamp to " << timestamp; + char buf[64]; + snprintf(buf, sizeof(buf), "%lu", timestamp); + LOG(INFO) << "try to update timestamp to " << timestamp; - std::string timestamp_str(buf); - galaxy::ins::sdk::SDKError err; - std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; + std::string timestamp_str(buf); + galaxy::ins::sdk::SDKError err; + std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; - if (!ins_sdk_->Put(put_path, timestamp_str, &err)) { - LOG(ERROR) << "update timestamp, path=" << put_path << ",failed " - << ins_sdk_->ErrorToString(err); - return false; - } + if (!ins_sdk_->Put(put_path, timestamp_str, &err)) { + LOG(ERROR) << "update timestamp, path=" << put_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } - return true; + return true; } void TimeoracleInsAdapter::OnLockChange(std::string session_id, bool deleted) { - if (deleted || session_id != ins_sdk_->GetSessionID()) { - LOG(ERROR) << "timeoracle lock losted"; - exit(1); - } + if (deleted || session_id != ins_sdk_->GetSessionID()) { + LOG(ERROR) << "timeoracle lock losted"; + exit(1); + } } class FdGuard { -public: - explicit FdGuard(int fd) : fd_(fd) {} + public: + explicit FdGuard(int fd) : fd_(fd) {} - FdGuard() : fd_(-1) {} + FdGuard() : fd_(-1) {} - ~FdGuard() { - if (fd_ >= 0) { - ::close(fd_); - } + ~FdGuard() { + if (fd_ >= 0) { + ::close(fd_); } + } - operator int() const { - return fd_; - } + operator int() const { return fd_; } - void reset(int fd) { - if (fd_ >= 0) { - ::close(fd_); - } - fd_ = fd; + void reset(int fd) { + if (fd_ >= 0) { + ::close(fd_); } + fd_ = fd; + } - int relese() { - const int ret = fd_; - fd_ = -1; - return ret; - } + int relese() { + const int ret = fd_; + fd_ = -1; + return ret; + } -private: - FdGuard(const FdGuard&) = delete; - void operator=(const FdGuard&) = delete; - int fd_; + private: + FdGuard(const FdGuard&) = delete; + void operator=(const FdGuard&) = delete; + int fd_; }; // not thread safe bool TimeoracleMockAdapter::Init(int64_t* last_timestamp) { - std::string lock_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleLockPath; - static FdGuard lock_fd(::open(lock_path.c_str(), O_CREAT | O_RDWR, 0666)); + std::string lock_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleLockPath; + static FdGuard lock_fd(::open(lock_path.c_str(), O_CREAT | O_RDWR, 0666)); - if (lock_fd < 0) { - return false; - } + if (lock_fd < 0) { + return false; + } - LOG(INFO) << "TimeoracleMockAdapter try to get lock for file=" << lock_path; + LOG(INFO) << "TimeoracleMockAdapter try to get lock for file=" << lock_path; - if (::flock(lock_fd, LOCK_EX) < 0) { - LOG(WARNING) << "lock file failed for path=" << lock_path; - return false; - } + if (::flock(lock_fd, LOCK_EX) < 0) { + LOG(WARNING) << "lock file failed for path=" << lock_path; + return false; + } - LOG(INFO) << "TimeoracleMockAdapter got the lock for file=" << lock_path; + LOG(INFO) << "TimeoracleMockAdapter got the lock for file=" << lock_path; - std::string get_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; + std::string get_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; - FdGuard tmp_fd(::open(get_path.c_str(), O_CREAT | O_RDWR, 0666)); + FdGuard tmp_fd(::open(get_path.c_str(), O_CREAT | O_RDWR, 0666)); - if (tmp_fd < 0) { - LOG(WARNING) << "open file failed for file=" << get_path; - return false; - } + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << get_path; + return false; + } - char buf[64]; + char buf[64]; - ssize_t len = pread(tmp_fd, buf, sizeof(buf), 0); - if (len < 0) { - LOG(WARNING) << "read file failed for file=" << get_path; - return false; - } - - if (len == 0) { - *last_timestamp = 0; - return true; - } + ssize_t len = pread(tmp_fd, buf, sizeof(buf), 0); + if (len < 0) { + LOG(WARNING) << "read file failed for file=" << get_path; + return false; + } - buf[len] = '\0'; - char * pEnd = nullptr; - *last_timestamp = ::strtoull(buf, &pEnd, 10); - if (*pEnd != '\0') { - // TODO (chenzongjia) - LOG(WARNING) << "read invalid timestamp value=" << buf; - return false; - } + if (len == 0) { + *last_timestamp = 0; + return true; + } + + buf[len] = '\0'; + char* pEnd = nullptr; + *last_timestamp = ::strtoull(buf, &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << buf; + return false; + } - LOG(INFO) << "read timestamp value=" << *last_timestamp; + LOG(INFO) << "read timestamp value=" << *last_timestamp; - std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleNodePath; + std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleNodePath; - tmp_fd.reset(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); + tmp_fd.reset(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); - if (tmp_fd < 0) { - LOG(WARNING) << "open file failed for file=" << put_path; - return false; - } + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << put_path; + return false; + } - if (::pwrite(tmp_fd, server_addr_.data(), server_addr_.size(), 0) - != (ssize_t)server_addr_.size()) { - LOG(WARNING) << "write file failed for file=" << put_path; - return false; - } + if (::pwrite(tmp_fd, server_addr_.data(), server_addr_.size(), 0) != + (ssize_t)server_addr_.size()) { + LOG(WARNING) << "write file failed for file=" << put_path; + return false; + } - return true; + return true; } // not thread safe bool TimeoracleMockAdapter::UpdateTimestamp(int64_t new_timestamp) { - std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; - FdGuard tmp_fd(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); + std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; + FdGuard tmp_fd(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); - if (tmp_fd < 0) { - LOG(WARNING) << "open file failed for file=" << put_path; - return false; - } + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << put_path; + return false; + } - char buf[64]; - snprintf(buf, sizeof(buf), "%lu", new_timestamp); - std::string timestamp_str(buf); - LOG(INFO) << "try to update timestamp to " << put_path; + char buf[64]; + snprintf(buf, sizeof(buf), "%lu", new_timestamp); + std::string timestamp_str(buf); + LOG(INFO) << "try to update timestamp to " << put_path; - if (::pwrite(tmp_fd, timestamp_str.data(), timestamp_str.size(), 0) - != (ssize_t)timestamp_str.size()) { - LOG(WARNING) << "write file failed for file=" << put_path; - return false; - } + if (::pwrite(tmp_fd, timestamp_str.data(), timestamp_str.size(), 0) != + (ssize_t)timestamp_str.size()) { + LOG(WARNING) << "write file failed for file=" << put_path; + return false; + } - return true; + return true; } -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle_zk_adapter.h b/src/timeoracle/timeoracle_zk_adapter.h index b0f6a970c..6db259f12 100644 --- a/src/timeoracle/timeoracle_zk_adapter.h +++ b/src/timeoracle/timeoracle_zk_adapter.h @@ -2,18 +2,18 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H -#define TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H +#ifndef TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H +#define TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H #include #include #include "zk/zk_adapter.h" // forward declare -namespace galaxy{ -namespace ins{ +namespace galaxy { +namespace ins { namespace sdk { - class InsSDK; +class InsSDK; } } } @@ -22,103 +22,98 @@ namespace tera { namespace timeoracle { class TimeoracleZkAdapterBase : public zk::ZooKeeperAdapter { -public: - virtual ~TimeoracleZkAdapterBase() {}; + public: + virtual ~TimeoracleZkAdapterBase(){}; - // not thread safe - virtual bool Init(int64_t* last_timestamp) = 0; + // not thread safe + virtual bool Init(int64_t* last_timestamp) = 0; - // not thread safe - virtual bool UpdateTimestamp(int64_t new_timestamp) = 0; + // not thread safe + virtual bool UpdateTimestamp(int64_t new_timestamp) = 0; - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) override; + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) override; - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) override; + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) override; - virtual void OnNodeCreated(const std::string& path) override; + virtual void OnNodeCreated(const std::string& path) override; - virtual void OnNodeDeleted(const std::string& path) override; + virtual void OnNodeDeleted(const std::string& path) override; - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) override; + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) override; - virtual void OnSessionTimeout() final; + virtual void OnSessionTimeout() final; }; class TimeoracleZkAdapter : public TimeoracleZkAdapterBase { -public: - TimeoracleZkAdapter(const std::string& server_addr) : server_addr_(server_addr) {} + public: + TimeoracleZkAdapter(const std::string& server_addr) : server_addr_(server_addr) {} - virtual ~TimeoracleZkAdapter(); + virtual ~TimeoracleZkAdapter(); - virtual bool Init(int64_t* last_timestamp) override; + virtual bool Init(int64_t* last_timestamp) override; - virtual bool UpdateTimestamp(int64_t new_timestamp) override; + virtual bool UpdateTimestamp(int64_t new_timestamp) override; -private: - bool InitZk(); + private: + bool InitZk(); - bool LockTimeoracleLock(); + bool LockTimeoracleLock(); - bool ReadTimestamp(int64_t* timestamp); + bool ReadTimestamp(int64_t* timestamp); - bool CreateTimeoracleNode(); + bool CreateTimeoracleNode(); -private: - std::string server_addr_; + private: + std::string server_addr_; }; class TimeoracleInsAdapter : public TimeoracleZkAdapterBase { -public: - TimeoracleInsAdapter(const std::string & server_addr) : server_addr_(server_addr) {} + public: + TimeoracleInsAdapter(const std::string& server_addr) : server_addr_(server_addr) {} - virtual ~TimeoracleInsAdapter(); + virtual ~TimeoracleInsAdapter(); - virtual bool Init(int64_t* last_timestamp) override; + virtual bool Init(int64_t* last_timestamp) override; - virtual bool UpdateTimestamp(int64_t new_timestamp) override; + virtual bool UpdateTimestamp(int64_t new_timestamp) override; - void OnLockChange(std::string session_id, bool deleted); + void OnLockChange(std::string session_id, bool deleted); -private: - bool InitInsAndLock(); + private: + bool InitInsAndLock(); - bool ReadTimestamp(int64_t* timestamp); + bool ReadTimestamp(int64_t* timestamp); - bool CreateTimeoracleNode(); + bool CreateTimeoracleNode(); -private: - mutable Mutex mutex_; - std::string server_addr_; - galaxy::ins::sdk::InsSDK* ins_sdk_{NULL}; + private: + mutable Mutex mutex_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_{NULL}; }; - /* * This is not zookeeper! * Just used on onebox for tasting tera briefly. * This is implemented through local file system. * Not support watching. */ -class TimeoracleMockAdapter: public TimeoracleZkAdapterBase { -public: - TimeoracleMockAdapter(const std::string& server_addr) : server_addr_(server_addr) { - } +class TimeoracleMockAdapter : public TimeoracleZkAdapterBase { + public: + TimeoracleMockAdapter(const std::string& server_addr) : server_addr_(server_addr) {} - // not thread safe - virtual bool Init(int64_t* last_timestamp) override; + // not thread safe + virtual bool Init(int64_t* last_timestamp) override; - // not thread safe - virtual bool UpdateTimestamp(int64_t new_timestamp) override; + // not thread safe + virtual bool UpdateTimestamp(int64_t new_timestamp) override; -private: - std::string server_addr_; + private: + std::string server_addr_; }; -} // namespace timeoracle -} // namespace tera +} // namespace timeoracle +} // namespace tera -#endif // TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H +#endif // TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H diff --git a/src/timeoracle_main.cc b/src/timeoracle_main.cc index 3c7f713be..6f1af94b0 100644 --- a/src/timeoracle_main.cc +++ b/src/timeoracle_main.cc @@ -9,7 +9,6 @@ #include #include "common/base/scoped_ptr.h" -#include "tera_entry.h" #include "utils/utils_cmd.h" #include "version.h" #include "timeoracle/timeoracle_entry.h" @@ -18,52 +17,50 @@ DECLARE_string(tera_log_prefix); volatile sig_atomic_t g_quit = 0; -static void SignalIntHandler(int sig) { - g_quit = 1; -} +static void SignalIntHandler(int sig) { g_quit = 1; } int main(int argc, char* argv[]) { - ::google::SetUsageMessage("./timeoracle --flagfile=xxx.flag"); - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); - if (!FLAGS_tera_log_prefix.empty()) { - tera::utils::SetupLog(FLAGS_tera_log_prefix); - } else { - tera::utils::SetupLog("timeoracle"); - } + ::google::SetUsageMessage("./timeoracle --flagfile=xxx.flag"); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + if (!FLAGS_tera_log_prefix.empty()) { + tera::utils::SetupLog(FLAGS_tera_log_prefix); + } else { + tera::utils::SetupLog("timeoracle"); + } - if (argc > 1) { - std::string ext_cmd = argv[1]; - if (ext_cmd == "version") { - PrintSystemVersion(); - return 0; - } + if (argc > 1) { + std::string ext_cmd = argv[1]; + if (ext_cmd == "version") { + PrintSystemVersion(); + return 0; } + } - signal(SIGINT, SignalIntHandler); - signal(SIGTERM, SignalIntHandler); + signal(SIGINT, SignalIntHandler); + signal(SIGTERM, SignalIntHandler); - scoped_ptr entry(new tera::timeoracle::TimeoracleEntry()); + scoped_ptr entry(new tera::timeoracle::TimeoracleEntry()); - if (!entry->Start()) { - return -1; - } + if (!entry->Start()) { + return -1; + } - while (!g_quit) { - if (!entry->Run()) { - LOG(ERROR) << "Server run error ,and then exit now "; - break; - } - } - if (g_quit) { - LOG(INFO) << "received interrupt signal from user, will stop"; + while (!g_quit) { + if (!entry->Run()) { + LOG(ERROR) << "Server run error ,and then exit now "; + break; } + } + if (g_quit) { + LOG(INFO) << "received interrupt signal from user, will stop"; + } - if (!entry->Shutdown()) { - return -1; - } + if (!entry->Shutdown()) { + return -1; + } - return 0; + return 0; } /* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/types.h b/src/types.h index 4724d2083..2ea41b905 100644 --- a/src/types.h +++ b/src/types.h @@ -35,6 +35,7 @@ const std::string kSms = "[SMS] "; const std::string kMail = "[MAIL] "; const int64_t kLatestTs = INT64_MAX; const int64_t kOldestTs = INT64_MIN; +const uint64_t kMaxMetaWriteSize = (4 << 20); // 4MB const uint64_t kMaxRpcSize = (16 << 20); // 16MB const uint64_t kRowkeySize = (64 << 10); // 64KB const uint64_t kQualifierSize = (64 << 10); // 64KB @@ -47,7 +48,6 @@ const std::string kRowlockNodeIdListPath = "/id_lock"; const std::string kRowlockNodeHostListPath = "/host_lock"; const std::string kRowlockNodeNumPath = "/node_num"; const std::string kRowlockProxyPath = "/proxy"; -const uint64_t kObserverWaitTimeMs = 10; // global transaction const char* const kNotifyColumnFamily = "_N_"; @@ -55,6 +55,6 @@ const char* const kNotifyColumnFamily = "_N_"; // stat table const char* const kStatTableName = "stat_table"; -} // namespace tera +} // namespace tera -#endif // TERA_TYPES_H_ +#endif // TERA_TYPES_H_ diff --git a/src/utils/config_utils.cc b/src/utils/config_utils.cc index fe74f2572..f01682358 100644 --- a/src/utils/config_utils.cc +++ b/src/utils/config_utils.cc @@ -11,20 +11,20 @@ namespace tera { namespace utils { bool LoadFlagFile(const std::string& file) { - if (!IsExist(file)) { - return false; - } - std::string flag = "--flagfile=" + file; - int argc = 2; - char** argv = new char*[3]; - argv[0] = const_cast("dummy"); - argv[1] = const_cast(flag.c_str()); - argv[2] = NULL; - ::google::ParseCommandLineFlags(&argc, &argv, false); - argv[1] = NULL; - delete[] argv; - return true; + if (!IsExist(file)) { + return false; + } + std::string flag = "--flagfile=" + file; + int argc = 2; + char** argv = new char* [3]; + argv[0] = const_cast("dummy"); + argv[1] = const_cast(flag.c_str()); + argv[2] = NULL; + ::google::ParseCommandLineFlags(&argc, &argv, false); + argv[1] = NULL; + delete[] argv; + return true; } -} // namespace utils -} // namespace tera +} // namespace utils +} // namespace tera diff --git a/src/utils/config_utils.h b/src/utils/config_utils.h index ee51284d6..b5be910d6 100644 --- a/src/utils/config_utils.h +++ b/src/utils/config_utils.h @@ -13,7 +13,7 @@ namespace utils { // `file' should be path/to/file, like "../conf/tera.flag" bool LoadFlagFile(const std::string& file); -} // namespace utils -} // namespace tera +} // namespace utils +} // namespace tera -#endif // TERA_UTILS_CONFIG_UTILS_H_ +#endif // TERA_UTILS_CONFIG_UTILS_H_ diff --git a/src/utils/crypt.cc b/src/utils/crypt.cc index 43bce37b7..0efd67ebd 100644 --- a/src/utils/crypt.cc +++ b/src/utils/crypt.cc @@ -10,55 +10,55 @@ namespace tera { int32_t GetHashString(const std::string& str, uint32_t seed, std::string* result) { - if (result == NULL) { - return -1; - } - uint32_t hash = 0; - if (GetHashNumber(str, seed, &hash) != 0) { - return -1; - } - char hash_str[9]; - sprintf(hash_str, "%08x", hash); + if (result == NULL) { + return -1; + } + uint32_t hash = 0; + if (GetHashNumber(str, seed, &hash) != 0) { + return -1; + } + char hash_str[9]; + sprintf(hash_str, "%08x", hash); - result->assign(hash_str, 8); - return 0; + result->assign(hash_str, 8); + return 0; } int32_t GetHashNumber(const std::string& str, uint32_t seed, uint32_t* result) { - const char* data = str.c_str(); - size_t n = str.length(); - if (result == NULL) { - return -1; - } - // Similar to murmur hash - const uint32_t m = 0xc6a4a793; - const uint32_t r = 24; - const char* limit = data + n; - uint32_t h = seed ^ (n * m); + const char* data = str.c_str(); + size_t n = str.length(); + if (result == NULL) { + return -1; + } + // Similar to murmur hash + const uint32_t m = 0xc6a4a793; + const uint32_t r = 24; + const char* limit = data + n; + uint32_t h = seed ^ (n * m); - // Pick up four bytes at a time - while (data + 4 <= limit) { - uint32_t w = *(uint32_t*)data; - data += 4; - h += w; - h *= m; - h ^= (h >> 16); - } + // Pick up four bytes at a time + while (data + 4 <= limit) { + uint32_t w = *(uint32_t*)data; + data += 4; + h += w; + h *= m; + h ^= (h >> 16); + } - // Pick up remaining bytes - switch (limit - data) { + // Pick up remaining bytes + switch (limit - data) { case 3: - h += data[2] << 16; + h += data[2] << 16; case 2: - h += data[1] << 8; + h += data[1] << 8; case 1: - h += data[0]; - h *= m; - h ^= (h >> r); - break; - } - *result = h; - return 0; + h += data[0]; + h *= m; + h ^= (h >> r); + break; + } + *result = h; + return 0; } -} // namespace tera +} // namespace tera diff --git a/src/utils/crypt.h b/src/utils/crypt.h index 0249f8ad0..9cbe2cada 100644 --- a/src/utils/crypt.h +++ b/src/utils/crypt.h @@ -14,7 +14,8 @@ namespace tera { -// return 0: all is ok, result(hash number) stored at the location given by @result; +// return 0: all is ok, result(hash number) stored at the location given by +// @result; // otherwise: invalid arguments. int32_t GetHashNumber(const std::string& str, uint32_t seed, uint32_t* result); @@ -22,4 +23,4 @@ int32_t GetHashString(const std::string& str, uint32_t seed, std::string* result } // namespace tera -#endif // TERA_UTILS_CRYPT_H +#endif // TERA_UTILS_CRYPT_H diff --git a/src/utils/fragment.cc b/src/utils/fragment.cc index ec4af1457..c965c3322 100644 --- a/src/utils/fragment.cc +++ b/src/utils/fragment.cc @@ -10,99 +10,93 @@ namespace tera { static int CompareTwoEndKey(const std::string& a, const std::string& b) { - if (a == "" && b == "") { - return 0; - } - if (a == "") { - return 1; - } - if (b == "") { - return -1; - } - return a.compare(b); + if (a == "" && b == "") { + return 0; + } + if (a == "") { + return 1; + } + if (b == "") { + return -1; + } + return a.compare(b); } bool RangeFragment::IsCoverRange(const std::string& start, const std::string& end) const { - std::list >::const_iterator it=range_.begin(); - for ( ; it != range_.end(); ++it ) { - if (it->second != "" - && start.compare(it->second) > 0) { - continue; - } - break; + std::list >::const_iterator it = range_.begin(); + for (; it != range_.end(); ++it) { + if (it->second != "" && start.compare(it->second) > 0) { + continue; } + break; + } - if (it == range_.end()) { - return false; - } - return (start.compare(it->first) >= 0) - && (CompareTwoEndKey(end, it->second) <= 0); + if (it == range_.end()) { + return false; + } + return (start.compare(it->first) >= 0) && (CompareTwoEndKey(end, it->second) <= 0); } bool RangeFragment::AddToRange(const std::string& start, const std::string& end) { - if (end != "" && start.compare(end) > 0) { - return false; - } - std::list >::iterator it=range_.begin(); - for ( ; it != range_.end(); ++it ) { - if (it->second != "" - && start.compare(it->second) > 0) { - continue; - } - break; + if (end != "" && start.compare(end) > 0) { + return false; + } + std::list >::iterator it = range_.begin(); + for (; it != range_.end(); ++it) { + if (it->second != "" && start.compare(it->second) > 0) { + continue; } - if (it == range_.end()) { - range_.push_back(std::pair(start, end)); - return true; - } - /* - * [ ) - * [-------) [------) - * - * - * [ ) - * [-------) [------) ..... [----) - * - * - * [ ) - * [-------) [------) ..... [----) [----) - * - * - * [ ) - * [-------) [------) ..... [----) - * - * - * [ ) - * [-------) [------) ..... [----) [-----) - */ - std::string new_start = start.compare(it->first) < 0 ? start : it->first; - std::string new_end = end; - while (it != range_.end()) { - if (end == "" - || end.compare(it->first) >= 0 ) { - new_end = CompareTwoEndKey(end, it->second) > 0 ? end : it->second; - it = range_.erase(it); - continue; - } - break; - } - range_.insert(it, std::pair(new_start, new_end)); + break; + } + if (it == range_.end()) { + range_.push_back(std::pair(start, end)); return true; + } + /* + * [ ) + * [-------) [------) + * + * + * [ ) + * [-------) [------) ..... [----) + * + * + * [ ) + * [-------) [------) ..... [----) [----) + * + * + * [ ) + * [-------) [------) ..... [----) + * + * + * [ ) + * [-------) [------) ..... [----) [-----) + */ + std::string new_start = start.compare(it->first) < 0 ? start : it->first; + std::string new_end = end; + while (it != range_.end()) { + if (end == "" || end.compare(it->first) >= 0) { + new_end = CompareTwoEndKey(end, it->second) > 0 ? end : it->second; + it = range_.erase(it); + continue; + } + break; + } + range_.insert(it, std::pair(new_start, new_end)); + return true; } std::string RangeFragment::DebugString() const { - std::list >::const_iterator it = range_.begin(); - std::stringstream ss; - for (; it != range_.end(); ++it) { - ss << it->first << ":" << it->second << " "; - } - return ss.str(); + std::list >::const_iterator it = range_.begin(); + std::stringstream ss; + for (; it != range_.end(); ++it) { + ss << it->first << ":" << it->second << " "; + } + return ss.str(); } bool RangeFragment::IsCompleteRange() const { - return (range_.size() == 1) - && (range_.begin()->first == "") - && (range_.begin()->second == ""); + return (range_.size() == 1) && (range_.begin()->first == "") && (range_.begin()->second == ""); } -} // namespace tera +} // namespace tera diff --git a/src/utils/fragment.h b/src/utils/fragment.h index 6ab7027e8..1ed2bbeb7 100644 --- a/src/utils/fragment.h +++ b/src/utils/fragment.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_FRAGMENT_UTIL_H_ -#define TERA_FRAGMENT_UTIL_H_ +#ifndef TERA_FRAGMENT_UTIL_H_ +#define TERA_FRAGMENT_UTIL_H_ #include #include @@ -11,21 +11,21 @@ namespace tera { class RangeFragment { -public: - // caller should use Lock to avoid data races - // On success, return true. Otherwise, return false due to invalid argumetns - bool AddToRange(const std::string& start, const std::string& end); + public: + // caller should use Lock to avoid data races + // On success, return true. Otherwise, return false due to invalid argumetns + bool AddToRange(const std::string& start, const std::string& end); - bool IsCompleteRange() const; + bool IsCompleteRange() const; - bool IsCoverRange(const std::string& start, const std::string& end) const; + bool IsCoverRange(const std::string& start, const std::string& end) const; - std::string DebugString() const; + std::string DebugString() const; -private: - std::list > range_; + private: + std::list > range_; }; -} // namespace tera +} // namespace tera -#endif // TERA_FRAGMENT_UTIL_H_ +#endif // TERA_FRAGMENT_UTIL_H_ diff --git a/src/utils/network_utils.cc b/src/utils/network_utils.cc index 6410262fc..0df110272 100644 --- a/src/utils/network_utils.cc +++ b/src/utils/network_utils.cc @@ -8,10 +8,10 @@ namespace tera { namespace utils { std::string GetRemoteAddress(google::protobuf::RpcController* controller) { - assert(controller != NULL); - sofa::pbrpc::RpcController* cntl = static_cast(controller); - return cntl->RemoteAddress(); + assert(controller != NULL); + sofa::pbrpc::RpcController* cntl = static_cast(controller); + return cntl->RemoteAddress(); } -} // namespace utils -} // namepsace tera +} // namespace utils +} // namepsace tera diff --git a/src/utils/network_utils.h b/src/utils/network_utils.h index 4022d0f96..f5e940e63 100644 --- a/src/utils/network_utils.h +++ b/src/utils/network_utils.h @@ -14,8 +14,7 @@ namespace utils { std::string GetRemoteAddress(google::protobuf::RpcController* controller); -} // namespace utils -} // namespace tera +} // namespace utils +} // namespace tera - -#endif // TERA_UTILS_NETWORK_UTILS_H_ +#endif // TERA_UTILS_NETWORK_UTILS_H_ diff --git a/src/utils/prop_tree.cc b/src/utils/prop_tree.cc index 0c94f6b8e..36169c2ac 100644 --- a/src/utils/prop_tree.cc +++ b/src/utils/prop_tree.cc @@ -15,330 +15,311 @@ namespace tera { // these chars are not belong to symbols bool IsIdentifierChar(const char c) { - return ((c <= '9' && c >= '0') || - (c <= 'z' && c >= 'a') || - (c <= 'Z' && c >= 'A') || - c == '.' || - c == '_' || - c == '-'); - // \# is also a valid identifier char + return ((c <= '9' && c >= '0') || (c <= 'z' && c >= 'a') || (c <= 'Z' && c >= 'A') || c == '.' || + c == '_' || c == '-'); + // \# is also a valid identifier char } -Tokenizer::Tokenizer(const std::string& input) - : origin_(input), cur_pos_(0) {} +Tokenizer::Tokenizer(const std::string& input) : origin_(input), cur_pos_(0) {} Tokenizer::~Tokenizer() {} void Tokenizer::ConsumeUselessChars() { - while (cur_pos_ < origin_.size()) { - char c_next = (cur_pos_ + 1 < origin_.size() ? origin_[cur_pos_ + 1] : 0); - switch (origin_[cur_pos_]) { - case '\n': - case '\t': - case '\v': - case ' ': - cur_pos_++; - continue; - case '\\': - if (c_next == '#') { - return; - } - case '#': - // reach a line-comment, discard all chars in this line - while (++cur_pos_ < origin_.size()) { - if (origin_[cur_pos_] == '\n') { - break; - } - } - continue; - default: - return; + while (cur_pos_ < origin_.size()) { + char c_next = (cur_pos_ + 1 < origin_.size() ? origin_[cur_pos_ + 1] : 0); + switch (origin_[cur_pos_]) { + case '\n': + case '\t': + case '\v': + case ' ': + cur_pos_++; + continue; + case '\\': + if (c_next == '#') { + return; } + case '#': + // reach a line-comment, discard all chars in this line + while (++cur_pos_ < origin_.size()) { + if (origin_[cur_pos_] == '\n') { + break; + } + } + continue; + default: + return; } + } } void Tokenizer::ConsumeIdentifier() { - current_.clear(); - current_.type = IDENTIFIER; - while (cur_pos_ < origin_.size()) { - char c = origin_[cur_pos_]; - char c_next = (cur_pos_ + 1 < origin_.size() ? origin_[cur_pos_ + 1] : 0); - if (c == '\\' && c_next == '#') { - current_.push_back(c_next); - cur_pos_ += 2; - } else if (IsIdentifierChar(c)) { - current_.push_back(c); - cur_pos_++; - } else { - break; - } + current_.clear(); + current_.type = IDENTIFIER; + while (cur_pos_ < origin_.size()) { + char c = origin_[cur_pos_]; + char c_next = (cur_pos_ + 1 < origin_.size() ? origin_[cur_pos_ + 1] : 0); + if (c == '\\' && c_next == '#') { + current_.push_back(c_next); + cur_pos_ += 2; + } else if (IsIdentifierChar(c)) { + current_.push_back(c); + cur_pos_++; + } else { + break; } + } } void Tokenizer::ConsumeSymbol() { - current_.clear(); - current_.type = SYMBOL; - current_.push_back(origin_[cur_pos_]); - cur_pos_++; + current_.clear(); + current_.type = SYMBOL; + current_.push_back(origin_[cur_pos_]); + cur_pos_++; } bool Tokenizer::Next() { - ConsumeUselessChars(); - if (cur_pos_ >= origin_.size()) { - return false; - } - if (IsIdentifierChar(origin_[cur_pos_])) { - ConsumeIdentifier(); - } else { - ConsumeSymbol(); - } - return true; + ConsumeUselessChars(); + if (cur_pos_ >= origin_.size()) { + return false; + } + if (IsIdentifierChar(origin_[cur_pos_])) { + ConsumeIdentifier(); + } else { + ConsumeSymbol(); + } + return true; } -PropTree::PropTree() - : root_(NULL), - max_depth_(0), - min_depth_(std::numeric_limits::max()) { -} +PropTree::PropTree() : root_(NULL), max_depth_(0), min_depth_(std::numeric_limits::max()) {} -PropTree::~PropTree() { - delete root_; -} +PropTree::~PropTree() { delete root_; } void PropTree::Reset() { - delete root_; - max_depth_ = 0; - min_depth_ = std::numeric_limits::max(); - state_.clear(); + delete root_; + max_depth_ = 0; + min_depth_ = std::numeric_limits::max(); + state_.clear(); } bool PropTree::ParseFromString(const std::string& input) { - Reset(); - int angle_braket_diff = 0; - int brace_diff = 0; - Tokenizer tr(input); - std::deque tokens; - while (tr.Next()) { - const Tokenizer::Token& token = tr.current(); - if (token.type == Tokenizer::SYMBOL) { - if (token.text == "<") { - angle_braket_diff++; - } else if (token.text == ">") { - if (angle_braket_diff <= 0) { - AddError("syntax error: \">\" should be after \"<\"."); - return false; - } - angle_braket_diff--; - } else if (token.text == "{") { - brace_diff++; - } else if (token.text == "}") { - if (brace_diff <= 0) { - AddError("syntax error: \"}\" should be after \"{\"."); - return false; - } - brace_diff--; - } + Reset(); + int angle_braket_diff = 0; + int brace_diff = 0; + Tokenizer tr(input); + std::deque tokens; + while (tr.Next()) { + const Tokenizer::Token& token = tr.current(); + if (token.type == Tokenizer::SYMBOL) { + if (token.text == "<") { + angle_braket_diff++; + } else if (token.text == ">") { + if (angle_braket_diff <= 0) { + AddError("syntax error: \">\" should be after \"<\"."); + return false; } - tokens.push_back(token); - } - if (tokens.size() == 0) { - AddError("syntax error: input string empty."); - return false; - } - if (angle_braket_diff != 0) { - AddError("syntax error: \"<\" and \">\" are not matching."); - return false; - } - if (brace_diff != 0) { - AddError("syntax error: \"{\" and \"}\" are not matching."); - return false; + angle_braket_diff--; + } else if (token.text == "{") { + brace_diff++; + } else if (token.text == "}") { + if (brace_diff <= 0) { + AddError("syntax error: \"}\" should be after \"{\"."); + return false; + } + brace_diff--; + } } + tokens.push_back(token); + } + if (tokens.size() == 0) { + AddError("syntax error: input string empty."); + return false; + } + if (angle_braket_diff != 0) { + AddError("syntax error: \"<\" and \">\" are not matching."); + return false; + } + if (brace_diff != 0) { + AddError("syntax error: \"{\" and \"}\" are not matching."); + return false; + } - return ParseNodeFromTokens(tokens, 1, &root_); + return ParseNodeFromTokens(tokens, 1, &root_); } bool PropTree::ParseFromFile(const std::string& file) { - std::ifstream fin(file.c_str()); - std::string input; - if (fin.good()) { - std::string str; - while (std::getline(fin, str)) { - input.append(str + "\n"); - } - } else { - AddError("syntax error: input file error."); - return false; + std::ifstream fin(file.c_str()); + std::string input; + if (fin.good()) { + std::string str; + while (std::getline(fin, str)) { + input.append(str + "\n"); } - return ParseFromString(input); + } else { + AddError("syntax error: input file error."); + return false; + } + return ParseFromString(input); } -bool PropTree::ParseNodeFromTokens(std::deque& tokens, - int depth, Node** node) { - if (tokens.size() == 0) { - return true; - } - if (tokens.front().type != Tokenizer::IDENTIFIER) { - AddError("syntax error: node name error: " + tokens.front().text); - return false; - } - *node = new Node(); - Node*& node_t = *node; +bool PropTree::ParseNodeFromTokens(std::deque& tokens, int depth, Node** node) { + if (tokens.size() == 0) { + return true; + } + if (tokens.front().type != Tokenizer::IDENTIFIER) { + AddError("syntax error: node name error: " + tokens.front().text); + return false; + } + *node = new Node(); + Node*& node_t = *node; - // get node name and pop it out from token queue - node_t->name_ = tokens.front().text; - tokens.pop_front(); + // get node name and pop it out from token queue + node_t->name_ = tokens.front().text; + tokens.pop_front(); - // get all props and pop them out from token queue - if (!ParsePropsFromTokens(tokens, node_t)) { - return false; - } + // get all props and pop them out from token queue + if (!ParsePropsFromTokens(tokens, node_t)) { + return false; + } - // get all children from token queue - if (!ParseChildrenFromTokens(tokens, depth, node_t)) { - return false; - } + // get all children from token queue + if (!ParseChildrenFromTokens(tokens, depth, node_t)) { + return false; + } - if (node_t->children_.size() == 0) { - // this is a leaf node - if (depth > max_depth_) { - max_depth_ = depth; - } - if (depth < min_depth_) { - min_depth_ = depth; - } + if (node_t->children_.size() == 0) { + // this is a leaf node + if (depth > max_depth_) { + max_depth_ = depth; } - - // check rest tokens - if (tokens.size() != 0) { - AddError("syntax error: \"" + tokens.front().text + "\""); - return false; + if (depth < min_depth_) { + min_depth_ = depth; } - return true; + } + + // check rest tokens + if (tokens.size() != 0) { + AddError("syntax error: \"" + tokens.front().text + "\""); + return false; + } + return true; } -bool PropTree::ParsePropsFromTokens(std::deque& tokens, - Node* node) { - if (tokens.size() <= 2 || tokens.front().text != "<") { - // have none properties - return true; - } +bool PropTree::ParsePropsFromTokens(std::deque& tokens, Node* node) { + if (tokens.size() <= 2 || tokens.front().text != "<") { + // have none properties + return true; + } - tokens.pop_front(); // pop "<" - while (tokens.size() > 3 && tokens.front().text != ">") { - if (tokens.front().text == ",") { - // reach a comma, discard it - tokens.pop_front(); - } - std::string prop_name = tokens.front().text; - tokens.pop_front(); - std::string eq = tokens.front().text; - tokens.pop_front(); - std::string prop_value = tokens.front().text; - tokens.pop_front(); - if (eq != "=" || prop_name == ">" || prop_value == ">") { - AddError("syntax error: property format error: " - + prop_name + eq + prop_value); - return false; - } - node->properties_[prop_name] = prop_value; + tokens.pop_front(); // pop "<" + while (tokens.size() > 3 && tokens.front().text != ">") { + if (tokens.front().text == ",") { + // reach a comma, discard it + tokens.pop_front(); } - if (tokens.front().text != ">") { - AddError("syntax error: property format error: " + tokens.front().text); - return false; + std::string prop_name = tokens.front().text; + tokens.pop_front(); + std::string eq = tokens.front().text; + tokens.pop_front(); + std::string prop_value = tokens.front().text; + tokens.pop_front(); + if (eq != "=" || prop_name == ">" || prop_value == ">") { + AddError("syntax error: property format error: " + prop_name + eq + prop_value); + return false; } - tokens.pop_front(); // pop ">" - return true; + node->properties_[prop_name] = prop_value; + } + if (tokens.front().text != ">") { + AddError("syntax error: property format error: " + tokens.front().text); + return false; + } + tokens.pop_front(); // pop ">" + return true; } -bool PropTree::ParseChildrenFromTokens(std::deque& tokens, - int depth, Node* node) { - if (tokens.size() <= 2) { - // have none child - return true; - } - if (tokens.front().text != "{" || tokens.back().text != "}") { - AddError("syntax error: child node format error: " + node->name_ - + tokens.front().text + " " + tokens.back().text); - return false; - } - tokens.pop_front(); // pop "{" - tokens.pop_back(); // pop "}" - if (tokens.back().text == ",") { - // discard the last "," - tokens.pop_back(); - } +bool PropTree::ParseChildrenFromTokens(std::deque& tokens, int depth, + Node* node) { + if (tokens.size() <= 2) { + // have none child + return true; + } + if (tokens.front().text != "{" || tokens.back().text != "}") { + AddError("syntax error: child node format error: " + node->name_ + tokens.front().text + " " + + tokens.back().text); + return false; + } + tokens.pop_front(); // pop "{" + tokens.pop_back(); // pop "}" + if (tokens.back().text == ",") { + // discard the last "," + tokens.pop_back(); + } - std::deque child_tokens; - int is_inner_comma = 0; - while (tokens.size() > 0) { - child_tokens.push_back(tokens.front()); - std::string tokentext = tokens.front().text; - tokens.pop_front(); + std::deque child_tokens; + int is_inner_comma = 0; + while (tokens.size() > 0) { + child_tokens.push_back(tokens.front()); + std::string tokentext = tokens.front().text; + tokens.pop_front(); - if (tokentext == "<" || tokentext == "{") { - is_inner_comma++; - } else if (tokentext == ">" || tokentext == "}") { - is_inner_comma--; - } else if (tokentext == "," && is_inner_comma == 0) { - child_tokens.pop_back(); // pop the last "," - node->children_.push_back(new Node); - node->children_.back()->mother_ = node; - if (!ParseNodeFromTokens(child_tokens, depth + 1, - &node->children_.back())) { - return false; - } - child_tokens.clear(); - } - } - // parse the last child - node->children_.push_back(new Node); - node->children_.back()->mother_ = node; - if (!ParseNodeFromTokens(child_tokens, depth + 1, - &node->children_.back())) { + if (tokentext == "<" || tokentext == "{") { + is_inner_comma++; + } else if (tokentext == ">" || tokentext == "}") { + is_inner_comma--; + } else if (tokentext == "," && is_inner_comma == 0) { + child_tokens.pop_back(); // pop the last "," + node->children_.push_back(new Node); + node->children_.back()->mother_ = node; + if (!ParseNodeFromTokens(child_tokens, depth + 1, &node->children_.back())) { return false; + } + child_tokens.clear(); } - return true; + } + // parse the last child + node->children_.push_back(new Node); + node->children_.back()->mother_ = node; + if (!ParseNodeFromTokens(child_tokens, depth + 1, &node->children_.back())) { + return false; + } + return true; } std::string PropTree::FormatString() { - std::stringstream ss; - ss << "\n"; - FormatNode(" ", root_, &ss); - return ss.str(); + std::stringstream ss; + ss << "\n"; + FormatNode(" ", root_, &ss); + return ss.str(); } -void PropTree::FormatNode(const std::string& line_prefix, Node* node, - std::stringstream* ss) { - *ss << line_prefix << node->name_; - size_t prop_num = node->properties_.size(); - if (prop_num > 0) { - size_t prop_cnt = 0; - *ss << "<"; - std::map::iterator it = node->properties_.begin(); - for (;it != node->properties_.end(); ++it) { - *ss << it->first << "=" << it->second; - if (++prop_cnt < prop_num) { - *ss << ", "; - } - } - *ss << ">"; +void PropTree::FormatNode(const std::string& line_prefix, Node* node, std::stringstream* ss) { + *ss << line_prefix << node->name_; + size_t prop_num = node->properties_.size(); + if (prop_num > 0) { + size_t prop_cnt = 0; + *ss << "<"; + std::map::iterator it = node->properties_.begin(); + for (; it != node->properties_.end(); ++it) { + *ss << it->first << "=" << it->second; + if (++prop_cnt < prop_num) { + *ss << ", "; + } } - size_t children_num = node->children_.size(); - if (children_num > 0) { - std::string prefix = line_prefix + " "; - *ss << " {\n"; - for (size_t i = 0; i < children_num; ++i) { - FormatNode(prefix, node->children_[i], ss); - if (i < children_num - 1) { - *ss << ","; - } - *ss << "\n"; - } - *ss << line_prefix << "}"; + *ss << ">"; + } + size_t children_num = node->children_.size(); + if (children_num > 0) { + std::string prefix = line_prefix + " "; + *ss << " {\n"; + for (size_t i = 0; i < children_num; ++i) { + FormatNode(prefix, node->children_[i], ss); + if (i < children_num - 1) { + *ss << ","; + } + *ss << "\n"; } + *ss << line_prefix << "}"; + } } -void PropTree::AddError(const std::string& error_str) { - state_.append(error_str + "\n"); -} +void PropTree::AddError(const std::string& error_str) { state_.append(error_str + "\n"); } } // namespace tera diff --git a/src/utils/prop_tree.h b/src/utils/prop_tree.h index c40f5c7b3..0aa6e9f6a 100644 --- a/src/utils/prop_tree.h +++ b/src/utils/prop_tree.h @@ -27,101 +27,97 @@ namespace tera { */ class Tokenizer { -public: - Tokenizer(const std::string& input); - ~Tokenizer(); + public: + Tokenizer(const std::string& input); + ~Tokenizer(); - enum TokenType { - INIT, - IDENTIFIER, - SYMBOL - }; + enum TokenType { INIT, IDENTIFIER, SYMBOL }; - struct Token { - TokenType type; - std::string text; + struct Token { + TokenType type; + std::string text; - size_t size() { return text.size(); } - void clear() { text.clear(); type = INIT; } - void push_back(char c) { text.push_back(c); } - }; + size_t size() { return text.size(); } + void clear() { + text.clear(); + type = INIT; + } + void push_back(char c) { text.push_back(c); } + }; - bool Next(); + bool Next(); - const Token& current() { return current_; } + const Token& current() { return current_; } - void Reset(const std::string& input) { - origin_ = input; - cur_pos_ = 0; - current_.clear(); - } + void Reset(const std::string& input) { + origin_ = input; + cur_pos_ = 0; + current_.clear(); + } -private: - void ConsumeUselessChars(); - void ConsumeIdentifier(); - void ConsumeSymbol(); + private: + void ConsumeUselessChars(); + void ConsumeIdentifier(); + void ConsumeSymbol(); -private: - Token current_; - std::string origin_; - std::string::size_type cur_pos_; + private: + Token current_; + std::string origin_; + std::string::size_type cur_pos_; }; class PropTree { -public: - PropTree(); - ~PropTree(); - - struct Node { - std::string name_; - std::map properties_; - std::vector children_; - Node* mother_; - - Node() : mother_(NULL) {} - ~Node() { - for (size_t i = 0; i < children_.size(); ++i) { - delete children_[i]; - } - } - }; + public: + PropTree(); + ~PropTree(); + + struct Node { + std::string name_; + std::map properties_; + std::vector children_; + Node* mother_; + + Node() : mother_(NULL) {} + ~Node() { + for (size_t i = 0; i < children_.size(); ++i) { + delete children_[i]; + } + } + }; - void Reset(); + void Reset(); - bool ParseFromString(const std::string& input); + bool ParseFromString(const std::string& input); - bool ParseFromFile(const std::string& file); + bool ParseFromFile(const std::string& file); - Node* GetRootNode() { return root_; } + Node* GetRootNode() { return root_; } - std::string FormatString(); + std::string FormatString(); - int MaxDepth() { return max_depth_; } + int MaxDepth() { return max_depth_; } - int MinDepth() { return min_depth_; } + int MinDepth() { return min_depth_; } - const std::string& State() { return state_; } + const std::string& State() { return state_; } -private: - bool ParseNodeFromTokens(std::deque& tokens, - int depth, Node** node); + private: + bool ParseNodeFromTokens(std::deque& tokens, int depth, Node** node); - bool ParsePropsFromTokens(std::deque& tokens, Node* node); + bool ParsePropsFromTokens(std::deque& tokens, Node* node); - bool ParseChildrenFromTokens(std::deque& tokens, - int depth, Node* node); + bool ParseChildrenFromTokens(std::deque& tokens, int depth, Node* node); - void FormatNode(const std::string& line_prefix, Node* node, - std::stringstream* ss); + void FormatNode(const std::string& line_prefix, Node* node, std::stringstream* ss); - void AddError(const std::string& error_str); + void AddError(const std::string& error_str); -private: - Node* root_; - int max_depth_; - int min_depth_; - std::string state_; + private: + Node* root_; + int max_depth_; + int min_depth_; + std::string state_; }; } // namespace tera -#endif // TERA_UTILS_PROP_TREE_H +#endif // TERA_UTILS_PROP_TREE_H diff --git a/src/utils/rpc_timer_list.cc b/src/utils/rpc_timer_list.cc index 008198386..d0ba4cf97 100644 --- a/src/utils/rpc_timer_list.cc +++ b/src/utils/rpc_timer_list.cc @@ -6,64 +6,61 @@ namespace tera { -RpcTimerList::RpcTimerList() - : head_(NULL), tail_(NULL), size_(0) {} +RpcTimerList::RpcTimerList() : head_(NULL), tail_(NULL), size_(0) {} RpcTimerList::~RpcTimerList() {} bool RpcTimerList::TopTime(int64_t* time) { - MutexLock lock(&mutex_); - if (NULL == head_) { - return false; - } - *time = head_->time; - return true; + MutexLock lock(&mutex_); + if (NULL == head_) { + return false; + } + *time = head_->time; + return true; } void RpcTimerList::Push(RpcTimer* item) { - MutexLock lock(&mutex_); - item->prev = tail_; - item->next = NULL; - if (NULL != tail_) { - tail_->next = item; - } - tail_ = item; - if (NULL == head_) { - head_ = item; - } - size_++; + MutexLock lock(&mutex_); + item->prev = tail_; + item->next = NULL; + if (NULL != tail_) { + tail_->next = item; + } + tail_ = item; + if (NULL == head_) { + head_ = item; + } + size_++; } void RpcTimerList::Erase(RpcTimer* item) { - MutexLock lock(&mutex_); - if (NULL != item->prev) { - item->prev->next = item->next; - } - if (NULL != item->next) { - item->next->prev = item->prev; - } - if (head_ == item) { - head_ = item->next; - } - if (tail_ == item) { - tail_ = item->prev; - } - item->prev = NULL; - item->next = NULL; - size_--; + MutexLock lock(&mutex_); + if (NULL != item->prev) { + item->prev->next = item->next; + } + if (NULL != item->next) { + item->next->prev = item->prev; + } + if (head_ == item) { + head_ = item->next; + } + if (tail_ == item) { + tail_ = item->prev; + } + item->prev = NULL; + item->next = NULL; + size_--; } size_t RpcTimerList::Size() { - MutexLock lock(&mutex_); - return size_; + MutexLock lock(&mutex_); + return size_; } -RpcTimerList* RpcTimerList::Instance() { - return s_instance; -} +RpcTimerList* RpcTimerList::Instance() { return s_instance; } RpcTimerList* RpcTimerList::s_instance = new RpcTimerList; -} // namespace tera +} // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/utils/rpc_timer_list.h b/src/utils/rpc_timer_list.h index c874e8393..0513ba7bd 100644 --- a/src/utils/rpc_timer_list.h +++ b/src/utils/rpc_timer_list.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_UTILS_RPC_TIMER_LIST_H_ -#define TERA_UTILS_RPC_TIMER_LIST_H_ +#ifndef TERA_UTILS_RPC_TIMER_LIST_H_ +#define TERA_UTILS_RPC_TIMER_LIST_H_ #include "common/mutex.h" #include "types.h" @@ -22,51 +22,49 @@ class ReadTabletRequest; class ReadTabletResponse; struct RpcTimer { - RpcTimer* prev; - RpcTimer* next; - int64_t time; + RpcTimer* prev; + RpcTimer* next; + int64_t time; - RpcTimer(int64_t t) - : prev(NULL), next(NULL), time(t) {} - virtual ~RpcTimer() {} + RpcTimer(int64_t t) : prev(NULL), next(NULL), time(t) {} + virtual ~RpcTimer() {} }; -template +template struct SpecRpcTimer : public RpcTimer { - const REQ* request; - RESP* response; - google::protobuf::Closure* done; - - SpecRpcTimer(const REQ* req, RESP* resp, - google::protobuf::Closure* d, int64_t t) - : RpcTimer(t), request(req), response(resp), done(d) {} - virtual ~SpecRpcTimer() {} + const REQ* request; + RESP* response; + google::protobuf::Closure* done; + + SpecRpcTimer(const REQ* req, RESP* resp, google::protobuf::Closure* d, int64_t t) + : RpcTimer(t), request(req), response(resp), done(d) {} + virtual ~SpecRpcTimer() {} }; typedef SpecRpcTimer WriteRpcTimer; typedef SpecRpcTimer ReadRpcTimer; class RpcTimerList { -public: - RpcTimerList(); - ~RpcTimerList(); + public: + RpcTimerList(); + ~RpcTimerList(); - static RpcTimerList* Instance(); + static RpcTimerList* Instance(); - bool TopTime(int64_t* time); + bool TopTime(int64_t* time); - void Push(RpcTimer* item); + void Push(RpcTimer* item); - void Erase(RpcTimer* item); + void Erase(RpcTimer* item); - size_t Size(); + size_t Size(); -private: - mutable Mutex mutex_; - RpcTimer* head_; - RpcTimer* tail_; - size_t size_; - static RpcTimerList* s_instance; + private: + mutable Mutex mutex_; + RpcTimer* head_; + RpcTimer* tail_; + size_t size_; + static RpcTimerList* s_instance; }; } // namespace tera diff --git a/src/utils/scan_filter.cc b/src/utils/scan_filter.cc deleted file mode 100644 index 3c0053911..000000000 --- a/src/utils/scan_filter.cc +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "scan_filter.h" - -#include - -namespace tera { - -static bool CheckValue(const KeyValuePair& kv, const Filter& filter) { - int64_t v1 = *(int64_t*)kv.value().c_str(); - int64_t v2 = *(int64_t*)filter.ref_value().c_str(); - BinCompOp op = filter.bin_comp_op(); - switch (op) { - case EQ: - return v1 == v2; - break; - case NE: - return v1 != v2; - break; - case LT: - return v1 < v2; - break; - case LE: - return v1 <= v2; - break; - case GT: - return v1 > v2; - break; - case GE: - return v1 >= v2; - break; - default: - LOG(ERROR) << "illegal compare operator: " << op; - } - return false; -} - -bool CheckCell(const KeyValuePair& kv, const Filter& filter) { - switch (filter.type()) { - case BinComp: { - if (filter.field() == ValueFilter) { - if (!CheckValue(kv, filter)) { - return false; - } - } else { - LOG(ERROR) << "only support value-compare."; - } - break; - } - default: { - LOG(ERROR) << "only support compare."; - break; - }} - return true; -} - - -ScanFilter::ScanFilter(const FilterList& filter_list) - : _filter_list(filter_list), - _suc_num(0), - _filter_num(filter_list.filter_size()) { -} - -ScanFilter::~ScanFilter() {} - -bool ScanFilter::Check(const KeyValuePair& kv) { - for (int i = 0; i < _filter_num; ++i) { - const Filter& filter = _filter_list.filter(i); - switch (filter.type()) { - case BinComp: { - int res = BinCompCheck(kv, filter); - if (res > 0) { - _suc_num++; - return true; - } else if (res == 0) { - continue; - } else { - return false; - } - } break; - default: { - LOG(ERROR) << "not support."; - return false; - }} - } - return true; -} - -bool ScanFilter::IsSuccess() { - if (_suc_num == _filter_num) { - return true; - } - return false; -} - -void ScanFilter::GetAllCfs(std::set* cf_set) { - CHECK(cf_set != NULL); - - for (int i = 0; i < _filter_num; ++i) { - const Filter& filter = _filter_list.filter(i); - switch (filter.type()) { - case BinComp: - if (filter.field() == ValueFilter) { - cf_set->insert(filter.content()); - } - break; - default: - LOG(ERROR) << "not support."; - } - } -} - -int ScanFilter::BinCompCheck(const KeyValuePair& kv, const Filter& filter) { - if (filter.field() == ValueFilter) { - if (kv.column_family() == filter.content() && kv.qualifier().size() == 0) { - if (DoBinCompCheck(filter.bin_comp_op(), kv.value(), filter.ref_value())) { - return 1; - } else { - return -1; - } - } else { - // not the proper column family - // only support filter on qualifier-empty cf - return 0; - } - - } else { - LOG(ERROR) << "not support"; - return -1; - } -} - -bool ScanFilter::DoBinCompCheck(BinCompOp op, const string& l_value, const string& r_value) { - int res = l_value.compare(r_value); - switch (op) { - case EQ: - if (res == 0) { return true; } - break; - case NE: - if (res != 0) { return true; } - break; - case LT: - if (res < 0) { return true; } - break; - case LE: - if (res <= 0) { return true; } - break; - case GT: - if (res > 0) { return true; } - break; - case GE: - if (res >= 0) { return true; } - break; - default: - LOG(ERROR) << "illegal compare operator: " << op; - } - return false; -} -} // namespace tera diff --git a/src/utils/scan_filter.h b/src/utils/scan_filter.h deleted file mode 100644 index f57a557c8..000000000 --- a/src/utils/scan_filter.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_UTILS_SCAN_FILTER_H_ -#define TERA_UTILS_SCAN_FILTER_H_ - -#include -#include "proto/tabletnode_rpc.pb.h" - -using std::string; - -namespace tera { - -bool CheckCell(const KeyValuePair& kv, const Filter& filter); - -class ScanFilter { -public: - ScanFilter(const FilterList& filter_list); - ~ScanFilter(); - - bool Check(const KeyValuePair& kv); - - bool IsSuccess(); - - void GetAllCfs(std::set* cf_set); - -private: - int BinCompCheck(const KeyValuePair& kv, const Filter& filter); - bool DoBinCompCheck(BinCompOp op, const string& l_value, const string& r_value); - -private: - ScanFilter(); - FilterList _filter_list; - int _suc_num; - int _filter_num; -}; - -} // namespace tera -#endif // TERA_UTILS_SCAN_FILTER_H_ diff --git a/src/utils/schema_utils.cc b/src/utils/schema_utils.cc index 214553d9c..4fc47be3b 100644 --- a/src/utils/schema_utils.cc +++ b/src/utils/schema_utils.cc @@ -11,31 +11,31 @@ namespace tera { bool IsSchemaCfDiff(const TableSchema& a, const TableSchema& b) { - std::stringstream s0; - std::stringstream s1; - for (int i = 0; i < a.column_families_size(); ++i) { - s0 << a.column_families(i).ShortDebugString(); - } - LOG(INFO) << "[utils] " << s0.str(); - for (int i = 0; i < b.column_families_size(); ++i) { - s1 << b.column_families(i).ShortDebugString(); - } - LOG(INFO) << "[utils] " << s1.str(); - return (s0.str().compare(s1.str()) != 0); + std::stringstream s0; + std::stringstream s1; + for (int i = 0; i < a.column_families_size(); ++i) { + s0 << a.column_families(i).ShortDebugString(); + } + LOG(INFO) << "[utils] " << s0.str(); + for (int i = 0; i < b.column_families_size(); ++i) { + s1 << b.column_families(i).ShortDebugString(); + } + LOG(INFO) << "[utils] " << s1.str(); + return (s0.str().compare(s1.str()) != 0); } bool IsSchemaLgDiff(const TableSchema& a, const TableSchema& b) { - std::stringstream s0; - std::stringstream s1; - for (int i = 0; i < a.locality_groups_size(); ++i) { - s0 << a.locality_groups(i).ShortDebugString(); - } - LOG(INFO) << "[utils] " << s0.str(); - for (int i = 0; i < b.locality_groups_size(); ++i) { - s1 << b.locality_groups(i).ShortDebugString(); - } - LOG(INFO) << "[utils] " << s1.str(); - return (s0.str().compare(s1.str()) != 0); + std::stringstream s0; + std::stringstream s1; + for (int i = 0; i < a.locality_groups_size(); ++i) { + s0 << a.locality_groups(i).ShortDebugString(); + } + LOG(INFO) << "[utils] " << s0.str(); + for (int i = 0; i < b.locality_groups_size(); ++i) { + s1 << b.locality_groups(i).ShortDebugString(); + } + LOG(INFO) << "[utils] " << s1.str(); + return (s0.str().compare(s1.str()) != 0); } -} // namespace tera +} // namespace tera diff --git a/src/utils/schema_utils.h b/src/utils/schema_utils.h index 3a3045ab8..42cee6fa1 100644 --- a/src/utils/schema_utils.h +++ b/src/utils/schema_utils.h @@ -13,6 +13,6 @@ bool IsSchemaCfDiff(const TableSchema& a, const TableSchema& b); bool IsSchemaLgDiff(const TableSchema& a, const TableSchema& b); -} // namespace tera +} // namespace tera -#endif // TERA_SCHEMA_UTILS_H_ +#endif // TERA_SCHEMA_UTILS_H_ diff --git a/src/utils/string_util.cc b/src/utils/string_util.cc index 4e0cf1865..4e56ab6fb 100644 --- a/src/utils/string_util.cc +++ b/src/utils/string_util.cc @@ -13,195 +13,193 @@ namespace tera { bool IsVisible(char c) { - return (c >= 0x21 && c <= 0x7E); // exclude space (0x20) + return (c >= 0x21 && c <= 0x7E); // exclude space (0x20) } char IsHex(uint8_t i) { - return ((i >= '0' && i <= '9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')); + return ((i >= '0' && i <= '9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')); } char ToHex(uint8_t i) { - char j = 0; - if (i < 10) { - j = i + '0'; - } else { - j = i - 10 + 'a'; - } - return j; + char j = 0; + if (i < 10) { + j = i + '0'; + } else { + j = i - 10 + 'a'; + } + return j; } char ToBinary(uint8_t i) { - char j = 0; - if (i >= '0' && i <= '9') { - j = i - '0'; - } else if (i >= 'a' && i <= 'f') { - j = i - 'a' + 10; - } else { - j = i - 'A' + 10; - } - return j; + char j = 0; + if (i >= '0' && i <= '9') { + j = i - '0'; + } else if (i >= 'a' && i <= 'f') { + j = i - 'a' + 10; + } else { + j = i - 'A' + 10; + } + return j; } std::string DebugString(const std::string& src) { - size_t src_len = src.size(); - std::string dst; - dst.resize(src_len << 2); - - size_t j = 0; - for (size_t i = 0; i < src_len; i++) { - uint8_t c = src[i]; - if (IsVisible(c)) { - dst[j++] = c; - } else { - dst[j++] = '\\'; - dst[j++] = 'x'; - dst[j++] = ToHex(c >> 4); - dst[j++] = ToHex(c & 0xF); - } + size_t src_len = src.size(); + std::string dst; + dst.resize(src_len << 2); + + size_t j = 0; + for (size_t i = 0; i < src_len; i++) { + uint8_t c = src[i]; + if (IsVisible(c)) { + if ('\\' == c) { + dst[j++] = '\\'; + dst[j++] = '\\'; + } else { + dst[j++] = c; + } + } else { + dst[j++] = '\\'; + dst[j++] = 'x'; + dst[j++] = ToHex(c >> 4); + dst[j++] = ToHex(c & 0xF); } + } - return dst.substr(0, j); + return dst.substr(0, j); } bool ParseDebugString(const std::string& src, std::string* dst) { - size_t src_len = src.size(); - std::string tmp; - tmp.resize(src_len); - - int state = 0; // 0: normal, 1: \, 2: \x, 3: \x[0-9a-fAZ-F] - char bin_char = 0; - size_t j = 0; - for (size_t i = 0; i < src_len; i++) { - uint8_t c = src[i]; - if (!IsVisible(c) && !isspace(c)) { - return false; + size_t src_len = src.size(); + std::string tmp; + tmp.resize(src_len); + + int state = 0; // 0: normal, 1: \, 2: \x, 3: \x[0-9a-fAZ-F] + char bin_char = 0; + size_t j = 0; + for (size_t i = 0; i < src_len; i++) { + uint8_t c = src[i]; + if (!IsVisible(c) && !isspace(c)) { + return false; + } + switch (state) { + case 0: + if (c == '\\') { + state = 1; + } else { + tmp[j++] = c; } - switch (state) { - case 0: - if (c == '\\') { - state = 1; - } else { - tmp[j++] = c; - } - break; - case 1: - if (c == 'x') { - state = 2; - } else if (c == '\\') { - tmp[j++] = '\\'; - state = 0; - } else { - return false; - } - break; - case 2: - if (!IsHex(c)) { - return false; - } else { - bin_char |= (ToBinary(c) << 4); - state = 3; - } - break; - case 3: - if (!IsHex(c)) { - return false; - } else { - bin_char |= ToBinary(c) & 0xF; - tmp[j++] = bin_char; - bin_char = 0; - state = 0; - } - break; - default: - abort(); - break; + break; + case 1: + if (c == 'x') { + state = 2; + } else if (c == '\\') { + tmp[j++] = '\\'; + state = 0; + } else { + return false; + } + break; + case 2: + if (!IsHex(c)) { + return false; + } else { + bin_char |= (ToBinary(c) << 4); + state = 3; + } + break; + case 3: + if (!IsHex(c)) { + return false; + } else { + bin_char |= ToBinary(c) & 0xF; + tmp[j++] = bin_char; + bin_char = 0; + state = 0; } + break; + default: + abort(); + break; } + } - if (state != 0) { - return false; - } + if (state != 0) { + return false; + } - dst->assign(tmp.substr(0, j)); - return true; + dst->assign(tmp.substr(0, j)); + return true; } -bool IsValidTableName(const std::string& str) { - return IsValidName(str); -} +bool IsValidTableName(const std::string& str) { return IsValidName(str); } -bool IsValidGroupName(const std::string& str) { - return IsValidName(str); -} +bool IsValidGroupName(const std::string& str) { return IsValidName(str); } -bool IsValidUserName(const std::string& str) { - return IsValidName(str); -} +bool IsValidUserName(const std::string& str) { return IsValidName(str); } const size_t kNameLenMin = 1; const size_t kNameLenMax = 512; bool IsValidName(const std::string& str) { - if (str.size() < kNameLenMin || kNameLenMax < str.size()) { - return false; - } - if (!(isupper(str[0]) || islower(str[0]))) { - return false; - } - for (size_t i = 0; i < str.size(); ++i) { - char c = str[i]; - if (!(isdigit(c) || isupper(c) || islower(c) - || (c == '_') || (c == '.') || (c == '-') || (c == '#'))) { - return false; - } - } - return true; + if (str.size() < kNameLenMin || kNameLenMax < str.size()) { + return false; + } + if (!(isupper(str[0]) || islower(str[0]))) { + return false; + } + for (size_t i = 0; i < str.size(); ++i) { + char c = str[i]; + if (!(isdigit(c) || isupper(c) || islower(c) || (c == '_') || (c == '.') || (c == '-') || + (c == '#'))) { + return false; + } + } + return true; } bool IsValidColumnFamilyName(const std::string& str) { - if ((64 * 1024 - 1) < str.size()) { // [0, 64KB) - return false; - } - for (size_t i = 0; i < str.size(); ++i) { - char c = str[i]; - if (!isprint(c)) { - return false; - } - } - return true; + if ((64 * 1024 - 1) < str.size()) { // [0, 64KB) + return false; + } + for (size_t i = 0; i < str.size(); ++i) { + char c = str[i]; + if (!isprint(c)) { + return false; + } + } + return true; } std::string RoundNumberToNDecimalPlaces(double n, int d) { - if (d < 0 || 9 < d) { - return "(null)"; - } - std::stringstream ss; - ss << std::fixed; - ss.precision(d); - ss << n; - return ss.str(); + if (d < 0 || 9 < d) { + return "(null)"; + } + std::stringstream ss; + ss << std::fixed; + ss.precision(d); + ss << n; + return ss.str(); } struct EditDistanceMatrix { - EditDistanceMatrix(int row, int col) - : matrix_((int*)malloc(sizeof(int) * row * col)), - n_(col) {} - int& At(int row, int col) {return matrix_[row * n_ + col];} - ~EditDistanceMatrix() { - free(matrix_); - matrix_ = NULL; - } - int* matrix_; -private: - int n_; // columns(row size) - EditDistanceMatrix(const EditDistanceMatrix& m); - EditDistanceMatrix& operator=(const EditDistanceMatrix& m); + EditDistanceMatrix(int row, int col) : matrix_((int*)malloc(sizeof(int) * row * col)), n_(col) {} + int& At(int row, int col) { return matrix_[row * n_ + col]; } + ~EditDistanceMatrix() { + free(matrix_); + matrix_ = NULL; + } + int* matrix_; + + private: + int n_; // columns(row size) + EditDistanceMatrix(const EditDistanceMatrix& m); + EditDistanceMatrix& operator=(const EditDistanceMatrix& m); }; static int MinOfThreeNum(int a, int b, int c) { - int min = (a < b) ? a : b; - min = (min < c) ? min : c; - return min; + int min = (a < b) ? a : b; + min = (min < c) ? min : c; + return min; } /* @@ -219,32 +217,30 @@ static int MinOfThreeNum(int a, int b, int c) { // https://en.wikipedia.org/wiki/Edit_distance // https://en.wikipedia.org/wiki/Levenshtein_distance int EditDistance(const std::string& a, const std::string& b) { - int n = a.size(); - int m = b.size(); - if ((n == 0) || (m == 0)) { - return (n == 0) ? m : n; - } - EditDistanceMatrix matrix(m, n); - matrix.At(0, 0) = (a[0] == b[0]) ? 0 : 1; + int n = a.size(); + int m = b.size(); + if ((n == 0) || (m == 0)) { + return (n == 0) ? m : n; + } + EditDistanceMatrix matrix(m, n); + matrix.At(0, 0) = (a[0] == b[0]) ? 0 : 1; + for (size_t i = 1; i < a.size(); i++) { + matrix.At(0, i) = matrix.At(0, i - 1) + 1; + } + for (size_t j = 1; j < b.size(); j++) { + matrix.At(j, 0) = matrix.At(j - 1, 0) + 1; + } + for (size_t j = 1; j < b.size(); j++) { for (size_t i = 1; i < a.size(); i++) { - matrix.At(0, i) = matrix.At(0, i-1) + 1; - } - for (size_t j = 1; j < b.size(); j++) { - matrix.At(j, 0) = matrix.At(j-1, 0) + 1; - } - for (size_t j = 1; j < b.size(); j++) { - for (size_t i = 1; i < a.size(); i++) { - int min = MinOfThreeNum(matrix.At(j-1, i-1), - matrix.At(j, i-1), - matrix.At(j-1, i)); - if (a[i] == b[j]) { - matrix.At(j, i) = min; - } else { - matrix.At(j, i) = min + 1; - } - } - } - return matrix.At(m-1, n-1); + int min = MinOfThreeNum(matrix.At(j - 1, i - 1), matrix.At(j, i - 1), matrix.At(j - 1, i)); + if (a[i] == b[j]) { + matrix.At(j, i) = min; + } else { + matrix.At(j, i) = min + 1; + } + } + } + return matrix.At(m - 1, n - 1); } -} // namespace tera +} // namespace tera diff --git a/src/utils/string_util.h b/src/utils/string_util.h index bc1f409d4..8d1c0e93d 100644 --- a/src/utils/string_util.h +++ b/src/utils/string_util.h @@ -2,29 +2,29 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_UTIL_STRING_UTIL_H_ -#define TERA_UTIL_STRING_UTIL_H_ +#ifndef TERA_UTIL_STRING_UTIL_H_ +#define TERA_UTIL_STRING_UTIL_H_ #include namespace tera { - extern const size_t kNameLenMin; - extern const size_t kNameLenMax; +extern const size_t kNameLenMin; +extern const size_t kNameLenMax; - // binary string to debug string - std::string DebugString(const std::string& src); - // debug string to binary string - bool ParseDebugString(const std::string& src, std::string* dst); +// binary string to debug string +std::string DebugString(const std::string& src); +// debug string to binary string +bool ParseDebugString(const std::string& src, std::string* dst); - bool IsValidName(const std::string& str); - bool IsValidTableName(const std::string& str); - bool IsValidGroupName(const std::string& name); - bool IsValidUserName(const std::string& name); +bool IsValidName(const std::string& str); +bool IsValidTableName(const std::string& str); +bool IsValidGroupName(const std::string& name); +bool IsValidUserName(const std::string& name); - bool IsValidColumnFamilyName(const std::string& str); - std::string RoundNumberToNDecimalPlaces(double n, int d); - int EditDistance(const std::string& a, const std::string& b); -} // namespace tera +bool IsValidColumnFamilyName(const std::string& str); +std::string RoundNumberToNDecimalPlaces(double n, int d); +int EditDistance(const std::string& a, const std::string& b); +} // namespace tera #endif // TERA_UTIL_STRING_UTIL_H_ diff --git a/src/utils/test/fragment_test.cc b/src/utils/test/fragment_test.cc index 81121962d..b3517ea50 100644 --- a/src/utils/test/fragment_test.cc +++ b/src/utils/test/fragment_test.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - #include "utils/fragment.h" #include @@ -10,383 +9,382 @@ namespace tera { TEST(FragmentTest, Head) { - RangeFragment all; - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// ----------->g - all.AddToRange("", "g"); - ASSERT_EQ(all.DebugString(), ":g "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ----------->g -// k<--------p - all.AddToRange("k", "p"); - ASSERT_EQ(all.DebugString(), ":g k:p "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ----------->g k<--------p -// t-------x - all.AddToRange("t", "x"); - ASSERT_EQ(all.DebugString(), ":g k:p t:x "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ----------->g k<--------p t-------x -// q---s - all.AddToRange("q", "s"); - ASSERT_EQ(all.DebugString(), ":g k:p q:s t:x "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ----------->g k<--------p q---s t-------x -// y-- - all.AddToRange("y", ""); - ASSERT_EQ(all.DebugString(), ":g k:p q:s t:x y: "); - - all.AddToRange("p", "q"); - ASSERT_EQ(all.DebugString(), ":g k:s t:x y: "); - - all.AddToRange("s", "t"); - ASSERT_EQ(all.DebugString(), ":g k:x y: "); - - all.AddToRange("g", "k"); - ASSERT_EQ(all.DebugString(), ":x y: "); - - all.AddToRange("x", "y"); - ASSERT_EQ(all.DebugString(), ": "); - - ASSERT_TRUE(all.IsCompleteRange()); + RangeFragment all; + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // ----------->g + all.AddToRange("", "g"); + ASSERT_EQ(all.DebugString(), ":g "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ----------->g + // k<--------p + all.AddToRange("k", "p"); + ASSERT_EQ(all.DebugString(), ":g k:p "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ----------->g k<--------p + // t-------x + all.AddToRange("t", "x"); + ASSERT_EQ(all.DebugString(), ":g k:p t:x "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ----------->g k<--------p t-------x + // q---s + all.AddToRange("q", "s"); + ASSERT_EQ(all.DebugString(), ":g k:p q:s t:x "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ----------->g k<--------p q---s t-------x + // y-- + all.AddToRange("y", ""); + ASSERT_EQ(all.DebugString(), ":g k:p q:s t:x y: "); + + all.AddToRange("p", "q"); + ASSERT_EQ(all.DebugString(), ":g k:s t:x y: "); + + all.AddToRange("s", "t"); + ASSERT_EQ(all.DebugString(), ":g k:x y: "); + + all.AddToRange("g", "k"); + ASSERT_EQ(all.DebugString(), ":x y: "); + + all.AddToRange("x", "y"); + ASSERT_EQ(all.DebugString(), ": "); + + ASSERT_TRUE(all.IsCompleteRange()); } TEST(FragmentTest, Tail) { - RangeFragment all; -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// t------------ - all.AddToRange("t", ""); - ASSERT_EQ(all.DebugString(), "t: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// t------------ -// --------e - all.AddToRange("", "e"); - ASSERT_EQ(all.DebugString(), ":e t: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// --------e t------------ -// h-----------n - all.AddToRange("h", "n"); - ASSERT_EQ(all.DebugString(), ":e h:n t: "); + RangeFragment all; + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // t------------ + all.AddToRange("t", ""); + ASSERT_EQ(all.DebugString(), "t: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // t------------ + // --------e + all.AddToRange("", "e"); + ASSERT_EQ(all.DebugString(), ":e t: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // --------e t------------ + // h-----------n + all.AddToRange("h", "n"); + ASSERT_EQ(all.DebugString(), ":e h:n t: "); } TEST(FragmentTest, OverlapFormer) { - RangeFragment all; - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// ------d - all.AddToRange("", "d"); - ASSERT_EQ(all.DebugString(), ":d "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d -// p-------t - all.AddToRange("p", "t"); - ASSERT_EQ(all.DebugString(), ":d p:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d p-------t -// n-o - all.AddToRange("n", "o"); - ASSERT_EQ(all.DebugString(), ":d n:o p:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d n-o p-------t -// o-p - all.AddToRange("o", "p"); - ASSERT_EQ(all.DebugString(), ":d n:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d n-----------t -// m---o - all.AddToRange("m", "o"); - ASSERT_EQ(all.DebugString(), ":d m:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d m-------------t -// l-------------s - all.AddToRange("l", "s"); - ASSERT_EQ(all.DebugString(), ":d l:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d l---------------t -// k-----------------t - all.AddToRange("k", "t"); - ASSERT_EQ(all.DebugString(), ":d k:t "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// ------d k-----------------t -// j---------------------u - all.AddToRange("j", "u"); - ASSERT_EQ(all.DebugString(), ":d j:u "); + RangeFragment all; + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // ------d + all.AddToRange("", "d"); + ASSERT_EQ(all.DebugString(), ":d "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d + // p-------t + all.AddToRange("p", "t"); + ASSERT_EQ(all.DebugString(), ":d p:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d p-------t + // n-o + all.AddToRange("n", "o"); + ASSERT_EQ(all.DebugString(), ":d n:o p:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d n-o p-------t + // o-p + all.AddToRange("o", "p"); + ASSERT_EQ(all.DebugString(), ":d n:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d n-----------t + // m---o + all.AddToRange("m", "o"); + ASSERT_EQ(all.DebugString(), ":d m:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d m-------------t + // l-------------s + all.AddToRange("l", "s"); + ASSERT_EQ(all.DebugString(), ":d l:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d l---------------t + // k-----------------t + all.AddToRange("k", "t"); + ASSERT_EQ(all.DebugString(), ":d k:t "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // ------d k-----------------t + // j---------------------u + all.AddToRange("j", "u"); + ASSERT_EQ(all.DebugString(), ":d j:u "); } TEST(FragmentTest, OverlapLater) { - RangeFragment all; -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// m-------q - all.AddToRange("m", "q"); - ASSERT_EQ(all.DebugString(), "m:q "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------q -// y-- - all.AddToRange("y", ""); - ASSERT_EQ(all.DebugString(), "m:q y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------q y-- -// m-n - all.AddToRange("m", "n"); - ASSERT_EQ(all.DebugString(), "m:q y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------q y-- -// m-------q - all.AddToRange("m", "q"); - ASSERT_EQ(all.DebugString(), "m:q y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------q y-- -// m---------r - all.AddToRange("m", "r"); - ASSERT_EQ(all.DebugString(), "m:r y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m---------r y-- -// o-p - all.AddToRange("o", "p"); - ASSERT_EQ(all.DebugString(), "m:r y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m---------r y-- -// o-----r - all.AddToRange("o", "r"); - ASSERT_EQ(all.DebugString(), "m:r y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m---------r y-- -// o-------s - all.AddToRange("o", "s"); - ASSERT_EQ(all.DebugString(), "m:s y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-----------s y-- -// s-t - all.AddToRange("s", "t"); - ASSERT_EQ(all.DebugString(), "m:t y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------t y-- -// u-v - all.AddToRange("u", "v"); - ASSERT_EQ(all.DebugString(), "m:t u:v y: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------t u-v y-- -// t------------ - all.AddToRange("t", ""); - ASSERT_EQ(all.DebugString(), "m: "); + RangeFragment all; + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // m-------q + all.AddToRange("m", "q"); + ASSERT_EQ(all.DebugString(), "m:q "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------q + // y-- + all.AddToRange("y", ""); + ASSERT_EQ(all.DebugString(), "m:q y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------q y-- + // m-n + all.AddToRange("m", "n"); + ASSERT_EQ(all.DebugString(), "m:q y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------q y-- + // m-------q + all.AddToRange("m", "q"); + ASSERT_EQ(all.DebugString(), "m:q y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------q y-- + // m---------r + all.AddToRange("m", "r"); + ASSERT_EQ(all.DebugString(), "m:r y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m---------r y-- + // o-p + all.AddToRange("o", "p"); + ASSERT_EQ(all.DebugString(), "m:r y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m---------r y-- + // o-----r + all.AddToRange("o", "r"); + ASSERT_EQ(all.DebugString(), "m:r y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m---------r y-- + // o-------s + all.AddToRange("o", "s"); + ASSERT_EQ(all.DebugString(), "m:s y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-----------s y-- + // s-t + all.AddToRange("s", "t"); + ASSERT_EQ(all.DebugString(), "m:t y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------t y-- + // u-v + all.AddToRange("u", "v"); + ASSERT_EQ(all.DebugString(), "m:t u:v y: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------t u-v y-- + // t------------ + all.AddToRange("t", ""); + ASSERT_EQ(all.DebugString(), "m: "); } TEST(FragmentTest, CommonMutilFragment) { - RangeFragment all; -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// j-------------q - all.AddToRange("j", "q"); - ASSERT_EQ(all.DebugString(), "j:q "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// j-------------q -// ------------------j - all.AddToRange("", "j"); - ASSERT_EQ(all.DebugString(), ":q "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// --------------------------------q -// q------------------ - all.AddToRange("q", ""); - ASSERT_EQ(all.DebugString(), ": "); - - ASSERT_TRUE(all.IsCompleteRange()); + RangeFragment all; + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // j-------------q + all.AddToRange("j", "q"); + ASSERT_EQ(all.DebugString(), "j:q "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // j-------------q + // ------------------j + all.AddToRange("", "j"); + ASSERT_EQ(all.DebugString(), ":q "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // --------------------------------q + // q------------------ + all.AddToRange("q", ""); + ASSERT_EQ(all.DebugString(), ": "); + + ASSERT_TRUE(all.IsCompleteRange()); } TEST(FragmentTest, CommonOneFragment) { - RangeFragment all; - all.AddToRange("", ""); - ASSERT_EQ(all.DebugString(), ": "); + RangeFragment all; + all.AddToRange("", ""); + ASSERT_EQ(all.DebugString(), ": "); - ASSERT_TRUE(all.IsCompleteRange()); + ASSERT_TRUE(all.IsCompleteRange()); - all.AddToRange("", ""); - ASSERT_EQ(all.DebugString(), ": "); + all.AddToRange("", ""); + ASSERT_EQ(all.DebugString(), ": "); - all.AddToRange("a", "b"); - ASSERT_EQ(all.DebugString(), ": "); + all.AddToRange("a", "b"); + ASSERT_EQ(all.DebugString(), ": "); - all.AddToRange("a", ""); - ASSERT_EQ(all.DebugString(), ": "); + all.AddToRange("a", ""); + ASSERT_EQ(all.DebugString(), ": "); - all.AddToRange("", "b"); - ASSERT_EQ(all.DebugString(), ": "); + all.AddToRange("", "b"); + ASSERT_EQ(all.DebugString(), ": "); - ASSERT_TRUE(all.IsCompleteRange()); + ASSERT_TRUE(all.IsCompleteRange()); } TEST(FragmentTest, Endkey) { - RangeFragment all; - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// (null) -// m-------------------------- - all.AddToRange("m", ""); - ASSERT_EQ(all.DebugString(), "m: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------------------- -// t------------ - all.AddToRange("t", ""); - ASSERT_EQ(all.DebugString(), "m: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------------------- -// t-----------z - all.AddToRange("t", "z"); - ASSERT_EQ(all.DebugString(), "m: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------------------- -// m-------------------------- - all.AddToRange("m", ""); - ASSERT_EQ(all.DebugString(), "m: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------------------- -// m-------------------------z - all.AddToRange("m", "z"); - ASSERT_EQ(all.DebugString(), "m: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// m-------------------------- -// l---------------------------- - all.AddToRange("l", ""); - ASSERT_EQ(all.DebugString(), "l: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// l---------------------------- -// l-----------r - all.AddToRange("l", "r"); - ASSERT_EQ(all.DebugString(), "l: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// l---------------------------- -// k---m - all.AddToRange("k", "m"); - ASSERT_EQ(all.DebugString(), "k: "); - -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// k------------------------------ -// --------------------------------------------------- - all.AddToRange("", ""); - ASSERT_EQ(all.DebugString(), ": "); - - ASSERT_TRUE(all.IsCompleteRange()); + RangeFragment all; + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // (null) + // m-------------------------- + all.AddToRange("m", ""); + ASSERT_EQ(all.DebugString(), "m: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------------------- + // t------------ + all.AddToRange("t", ""); + ASSERT_EQ(all.DebugString(), "m: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------------------- + // t-----------z + all.AddToRange("t", "z"); + ASSERT_EQ(all.DebugString(), "m: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------------------- + // m-------------------------- + all.AddToRange("m", ""); + ASSERT_EQ(all.DebugString(), "m: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------------------- + // m-------------------------z + all.AddToRange("m", "z"); + ASSERT_EQ(all.DebugString(), "m: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // m-------------------------- + // l---------------------------- + all.AddToRange("l", ""); + ASSERT_EQ(all.DebugString(), "l: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // l---------------------------- + // l-----------r + all.AddToRange("l", "r"); + ASSERT_EQ(all.DebugString(), "l: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // l---------------------------- + // k---m + all.AddToRange("k", "m"); + ASSERT_EQ(all.DebugString(), "k: "); + + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // k------------------------------ + // --------------------------------------------------- + all.AddToRange("", ""); + ASSERT_EQ(all.DebugString(), ": "); + + ASSERT_TRUE(all.IsCompleteRange()); } TEST(CoverTest, CompleteRange) { - RangeFragment all; - all.AddToRange("", ""); - ASSERT_TRUE(all.IsCoverRange("", "")); - ASSERT_TRUE(all.IsCoverRange("", "a")); - ASSERT_TRUE(all.IsCoverRange("a", "")); - ASSERT_TRUE(all.IsCoverRange("a", "b")); + RangeFragment all; + all.AddToRange("", ""); + ASSERT_TRUE(all.IsCoverRange("", "")); + ASSERT_TRUE(all.IsCoverRange("", "a")); + ASSERT_TRUE(all.IsCoverRange("a", "")); + ASSERT_TRUE(all.IsCoverRange("a", "b")); } TEST(CoverTest, Start) { -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// * - RangeFragment all; - all.AddToRange("", "h"); - - ASSERT_TRUE(all.IsCoverRange("", "g")); - ASSERT_TRUE(all.IsCoverRange("", "h")); - ASSERT_FALSE(all.IsCoverRange("", "i")); - ASSERT_FALSE(all.IsCoverRange("", "")); - - ASSERT_TRUE(all.IsCoverRange("a", "g")); - ASSERT_TRUE(all.IsCoverRange("a", "h")); - ASSERT_FALSE(all.IsCoverRange("a", "i")); - ASSERT_FALSE(all.IsCoverRange("a", "")); - - ASSERT_FALSE(all.IsCoverRange("h", "i")); - ASSERT_FALSE(all.IsCoverRange("h", "")); + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // * + RangeFragment all; + all.AddToRange("", "h"); + + ASSERT_TRUE(all.IsCoverRange("", "g")); + ASSERT_TRUE(all.IsCoverRange("", "h")); + ASSERT_FALSE(all.IsCoverRange("", "i")); + ASSERT_FALSE(all.IsCoverRange("", "")); + + ASSERT_TRUE(all.IsCoverRange("a", "g")); + ASSERT_TRUE(all.IsCoverRange("a", "h")); + ASSERT_FALSE(all.IsCoverRange("a", "i")); + ASSERT_FALSE(all.IsCoverRange("a", "")); + + ASSERT_FALSE(all.IsCoverRange("h", "i")); + ASSERT_FALSE(all.IsCoverRange("h", "")); } TEST(CoverTest, End) { -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// * - RangeFragment all; - all.AddToRange("h", ""); - ASSERT_FALSE(all.IsCoverRange("", "g")); - ASSERT_FALSE(all.IsCoverRange("", "h")); - ASSERT_FALSE(all.IsCoverRange("", "i")); - ASSERT_FALSE(all.IsCoverRange("", "")); - - ASSERT_FALSE(all.IsCoverRange("a", "g")); - ASSERT_FALSE(all.IsCoverRange("a", "h")); - ASSERT_FALSE(all.IsCoverRange("a", "i")); - ASSERT_FALSE(all.IsCoverRange("a", "")); - - ASSERT_TRUE(all.IsCoverRange("h", "i")); - ASSERT_TRUE(all.IsCoverRange("h", "")); + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // * + RangeFragment all; + all.AddToRange("h", ""); + ASSERT_FALSE(all.IsCoverRange("", "g")); + ASSERT_FALSE(all.IsCoverRange("", "h")); + ASSERT_FALSE(all.IsCoverRange("", "i")); + ASSERT_FALSE(all.IsCoverRange("", "")); + + ASSERT_FALSE(all.IsCoverRange("a", "g")); + ASSERT_FALSE(all.IsCoverRange("a", "h")); + ASSERT_FALSE(all.IsCoverRange("a", "i")); + ASSERT_FALSE(all.IsCoverRange("a", "")); + + ASSERT_TRUE(all.IsCoverRange("h", "i")); + ASSERT_TRUE(all.IsCoverRange("h", "")); } TEST(CoverTest, Common) { -// a b c d e f g h i j k l m n o p q r s t u v w x y z -// * * - RangeFragment all; - all.AddToRange("h", "o"); - - ASSERT_FALSE(all.IsCoverRange("a", "g")); - ASSERT_FALSE(all.IsCoverRange("a", "h")); - ASSERT_FALSE(all.IsCoverRange("a", "i")); - ASSERT_FALSE(all.IsCoverRange("a", "n")); - ASSERT_FALSE(all.IsCoverRange("a", "o")); - ASSERT_FALSE(all.IsCoverRange("a", "p")); - ASSERT_FALSE(all.IsCoverRange("a", "")); - - ASSERT_TRUE(all.IsCoverRange("h", "i")); - ASSERT_TRUE(all.IsCoverRange("h", "o")); - ASSERT_FALSE(all.IsCoverRange("h", "p")); - ASSERT_FALSE(all.IsCoverRange("h", "")); - - ASSERT_TRUE(all.IsCoverRange("i", "n")); - ASSERT_TRUE(all.IsCoverRange("i", "o")); - ASSERT_FALSE(all.IsCoverRange("i", "p")); - ASSERT_FALSE(all.IsCoverRange("i", "")); - - ASSERT_FALSE(all.IsCoverRange("o", "p")); - ASSERT_FALSE(all.IsCoverRange("o", "")); - - ASSERT_FALSE(all.IsCoverRange("p", "q")); - ASSERT_FALSE(all.IsCoverRange("p", "")); + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // * * + RangeFragment all; + all.AddToRange("h", "o"); + + ASSERT_FALSE(all.IsCoverRange("a", "g")); + ASSERT_FALSE(all.IsCoverRange("a", "h")); + ASSERT_FALSE(all.IsCoverRange("a", "i")); + ASSERT_FALSE(all.IsCoverRange("a", "n")); + ASSERT_FALSE(all.IsCoverRange("a", "o")); + ASSERT_FALSE(all.IsCoverRange("a", "p")); + ASSERT_FALSE(all.IsCoverRange("a", "")); + + ASSERT_TRUE(all.IsCoverRange("h", "i")); + ASSERT_TRUE(all.IsCoverRange("h", "o")); + ASSERT_FALSE(all.IsCoverRange("h", "p")); + ASSERT_FALSE(all.IsCoverRange("h", "")); + + ASSERT_TRUE(all.IsCoverRange("i", "n")); + ASSERT_TRUE(all.IsCoverRange("i", "o")); + ASSERT_FALSE(all.IsCoverRange("i", "p")); + ASSERT_FALSE(all.IsCoverRange("i", "")); + + ASSERT_FALSE(all.IsCoverRange("o", "p")); + ASSERT_FALSE(all.IsCoverRange("o", "")); + + ASSERT_FALSE(all.IsCoverRange("p", "q")); + ASSERT_FALSE(all.IsCoverRange("p", "")); } -} // namespace tera - +} // namespace tera int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/utils/test/prop_tree_test.cc b/src/utils/test/prop_tree_test.cc index 72a7afb7d..1500d57c1 100644 --- a/src/utils/test/prop_tree_test.cc +++ b/src/utils/test/prop_tree_test.cc @@ -10,146 +10,147 @@ namespace tera { -//class TokenizerTest : public ::testing::Test, public Tokenizer { -//public: +// class TokenizerTest : public ::testing::Test, public Tokenizer { +// public: // TokenizerTest() : TPrinter(3) {} // ~TokenizerTest() {} //}; TEST(TokenizerTest, ConsumeUselessChars) { - std::string input; - input = "hello"; - Tokenizer t(input); - t.ConsumeUselessChars(); - ASSERT_EQ(t.cur_pos_, 0u); - - input = " hello"; - t.Reset(input); - t.ConsumeUselessChars(); - ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); - - input = "\thello"; - t.Reset(input); - t.ConsumeUselessChars(); - ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); - - input = " # this is a comment;\n hello"; - t.Reset(input); - t.ConsumeUselessChars(); - ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); + std::string input; + input = "hello"; + Tokenizer t(input); + t.ConsumeUselessChars(); + ASSERT_EQ(t.cur_pos_, 0u); + + input = " hello"; + t.Reset(input); + t.ConsumeUselessChars(); + ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); + + input = "\thello"; + t.Reset(input); + t.ConsumeUselessChars(); + ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); + + input = " # this is a comment;\n hello"; + t.Reset(input); + t.ConsumeUselessChars(); + ASSERT_EQ(t.origin_[t.cur_pos_], 'h'); } TEST(TokenizerTest, Next) { - std::string input; - input = "hello"; - Tokenizer t(input); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "hello"); - ASSERT_FALSE(t.Next()); - - input = "hello world"; - t.Reset(input); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "hello"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "world"); - ASSERT_FALSE(t.Next()); - - input = "int main (int64_t ar.gc, char* arg-v[])"; - t.Reset(input); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().type, Tokenizer::IDENTIFIER); - ASSERT_EQ(t.current().text, "int"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "main"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().type, Tokenizer::SYMBOL); - ASSERT_EQ(t.current().text, "("); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "int64_t"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "ar.gc"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, ","); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "char"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "*"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "arg-v"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "["); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, "]"); - ASSERT_TRUE(t.Next()); - ASSERT_EQ(t.current().text, ")"); - ASSERT_FALSE(t.Next()); + std::string input; + input = "hello"; + Tokenizer t(input); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "hello"); + ASSERT_FALSE(t.Next()); + + input = "hello world"; + t.Reset(input); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "hello"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "world"); + ASSERT_FALSE(t.Next()); + + input = "int main (int64_t ar.gc, char* arg-v[])"; + t.Reset(input); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().type, Tokenizer::IDENTIFIER); + ASSERT_EQ(t.current().text, "int"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "main"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().type, Tokenizer::SYMBOL); + ASSERT_EQ(t.current().text, "("); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "int64_t"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "ar.gc"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, ","); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "char"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "*"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "arg-v"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "["); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, "]"); + ASSERT_TRUE(t.Next()); + ASSERT_EQ(t.current().text, ")"); + ASSERT_FALSE(t.Next()); } TEST(PropTreeTest, ParseFromString) { - std::string input; - PropTree pt; - PropTree::Node* proot; + std::string input; + PropTree pt; + PropTree::Node* proot; - input = "rootchildren_.size(), 3u); + input = "root{child1, child2, child3,}"; + EXPECT_TRUE(pt.ParseFromString(input)); + proot = pt.GetRootNode(); + EXPECT_EQ(proot->children_.size(), 3u); - input = "root{child1}"; - EXPECT_TRUE(pt.ParseFromString(input)); + input = "root{child1}"; + EXPECT_TRUE(pt.ParseFromString(input)); - input = "root{child1{child11, child12}, child2{child21},}"; - EXPECT_TRUE(pt.ParseFromString(input)); - EXPECT_EQ(pt.GetRootNode()->children_.size(), 2u); - EXPECT_EQ(pt.MaxDepth(), 3); - EXPECT_EQ(pt.MinDepth(), 3); + input = "root{child1{child11, child12}, child2{child21},}"; + EXPECT_TRUE(pt.ParseFromString(input)); + EXPECT_EQ(pt.GetRootNode()->children_.size(), 2u); + EXPECT_EQ(pt.MaxDepth(), 3); + EXPECT_EQ(pt.MinDepth(), 3); - input = "root:hahh{child1{child11, child12}, child2{child21},}"; - EXPECT_FALSE(pt.ParseFromString(input)); - // LOG(ERROR) << pt.FormatString(); + input = "root:hahh{child1{child11, child12}, child2{child21},}"; + EXPECT_FALSE(pt.ParseFromString(input)); + // LOG(ERROR) << pt.FormatString(); - input = "root{child1, child2,}"; - EXPECT_TRUE(pt.ParseFromString(input)); + input = "root{child1, child2,}"; + EXPECT_TRUE(pt.ParseFromString(input)); - input = "root { \ + input = + "root { \ child1 { \ child11, \ child12, \ }, \ child2, \ }"; - EXPECT_TRUE(pt.ParseFromString(input)); - EXPECT_EQ(pt.GetRootNode()->children_.size(), 2u); - EXPECT_EQ(pt.MaxDepth(), 3); - EXPECT_EQ(pt.MinDepth(), 2); - // LOG(ERROR) << pt.FormatString(); + EXPECT_TRUE(pt.ParseFromString(input)); + EXPECT_EQ(pt.GetRootNode()->children_.size(), 2u); + EXPECT_EQ(pt.MaxDepth(), 3); + EXPECT_EQ(pt.MinDepth(), 2); + // LOG(ERROR) << pt.FormatString(); } -} // namespace tera +} // namespace tera diff --git a/src/utils/test/scan_filter_test.cc b/src/utils/test/scan_filter_test.cc deleted file mode 100644 index fa3d4ec64..000000000 --- a/src/utils/test/scan_filter_test.cc +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#define private public - -#include "scan_filter.h" - -#include "gtest/gtest.h" - -namespace tera { - -ScanFilter::ScanFilter() {} - -class ScanFilterTest : public ::testing::Test, public ScanFilter { -public: - ScanFilterTest() { - Init(); - } - - void Init() { - Filter filter; - filter.set_type(BinComp); - filter.set_bin_comp_op(EQ); - filter.set_field(ValueFilter); - filter.set_content("cf1"); - filter.set_ref_value("10"); - _filter_list.add_filter()->CopyFrom(filter); - - filter.set_content("cf2"); - filter.set_bin_comp_op(GT); - _filter_list.add_filter()->CopyFrom(filter); - - _filter_num = 2; - } -}; - -TEST_F(ScanFilterTest, Check) { - KeyValuePair kv; - kv.set_column_family("cf1"); - kv.set_value("10"); - - EXPECT_TRUE(Check(kv)); - EXPECT_FALSE(IsSuccess()); - - kv.set_value("20"); - EXPECT_FALSE(Check(kv)); - EXPECT_FALSE(IsSuccess()); - - kv.set_column_family("cf2"); - EXPECT_TRUE(Check(kv)); - EXPECT_TRUE(IsSuccess()); -} - -TEST_F(ScanFilterTest, GetAllCfs) { - std::set cf_set; - GetAllCfs(&cf_set); - EXPECT_EQ(cf_set.size(), 2); - std::set::iterator it = cf_set.begin(); - EXPECT_EQ(*it, "cf1"); - it++; - EXPECT_EQ(*it, "cf2"); -} - -TEST_F(ScanFilterTest, BinCompCheck) { - KeyValuePair kv; - kv.set_column_family("cf1"); - kv.set_value("10"); - - Filter filter; - filter.set_type(BinComp); - filter.set_bin_comp_op(EQ); - filter.set_field(ValueFilter); - filter.set_content("cf1"); - filter.set_ref_value("10"); - EXPECT_TRUE(BinCompCheck(kv, filter) > 0); - - kv.set_value("20"); - EXPECT_TRUE(BinCompCheck(kv, filter) < 0); - - kv.set_column_family("cf2"); - EXPECT_TRUE(BinCompCheck(kv, filter) == 0); -} - -TEST_F(ScanFilterTest, DoBinCompCheck) { - EXPECT_TRUE(DoBinCompCheck(EQ, "10", "10")); - EXPECT_TRUE(DoBinCompCheck(NE, "10", "20")); - EXPECT_TRUE(DoBinCompCheck(GT, "10", "00")); - EXPECT_TRUE(DoBinCompCheck(GE, "10", "10")); - EXPECT_TRUE(DoBinCompCheck(GE, "20", "10")); - EXPECT_TRUE(DoBinCompCheck(LT, "00", "10")); - EXPECT_TRUE(DoBinCompCheck(LE, "10", "10")); - EXPECT_TRUE(DoBinCompCheck(LE, "00", "10")); - - EXPECT_FALSE(DoBinCompCheck(EQ, "00", "10")); - EXPECT_FALSE(DoBinCompCheck(NE, "00", "00")); - EXPECT_FALSE(DoBinCompCheck(GT, "00", "10")); - EXPECT_FALSE(DoBinCompCheck(GT, "00", "00")); - EXPECT_FALSE(DoBinCompCheck(GE, "00", "10")); - EXPECT_FALSE(DoBinCompCheck(LT, "20", "10")); - EXPECT_FALSE(DoBinCompCheck(LT, "10", "10")); - EXPECT_FALSE(DoBinCompCheck(LE, "20", "10")); -} -} // namespace tera diff --git a/src/utils/test/string_util_test.cc b/src/utils/test/string_util_test.cc index 8a378e736..ace0e1a9e 100644 --- a/src/utils/test/string_util_test.cc +++ b/src/utils/test/string_util_test.cc @@ -9,92 +9,91 @@ namespace tera { TEST(StringUtilTest, IsValidName) { - ASSERT_FALSE(IsValidName("")); - ASSERT_FALSE(IsValidName(std::string("\0", 1))); - ASSERT_FALSE(IsValidName("\1")); + ASSERT_FALSE(IsValidName("")); + ASSERT_FALSE(IsValidName(std::string("\0", 1))); + ASSERT_FALSE(IsValidName("\1")); - ASSERT_FALSE(IsValidName(std::string(kNameLenMin - 1, 'a'))); - ASSERT_TRUE(IsValidName(std::string(kNameLenMin, 'a'))); - ASSERT_TRUE(IsValidName(std::string(kNameLenMin + 1, 'a'))); + ASSERT_FALSE(IsValidName(std::string(kNameLenMin - 1, 'a'))); + ASSERT_TRUE(IsValidName(std::string(kNameLenMin, 'a'))); + ASSERT_TRUE(IsValidName(std::string(kNameLenMin + 1, 'a'))); - ASSERT_TRUE(IsValidName(std::string(kNameLenMax - 1, 'a'))); - ASSERT_TRUE(IsValidName(std::string(kNameLenMax, 'a'))); - ASSERT_FALSE(IsValidName(std::string(kNameLenMax + 1, 'a'))); + ASSERT_TRUE(IsValidName(std::string(kNameLenMax - 1, 'a'))); + ASSERT_TRUE(IsValidName(std::string(kNameLenMax, 'a'))); + ASSERT_FALSE(IsValidName(std::string(kNameLenMax + 1, 'a'))); - ASSERT_FALSE(IsValidName("1abc")); - ASSERT_FALSE(IsValidName("_1abc")); + ASSERT_FALSE(IsValidName("1abc")); + ASSERT_FALSE(IsValidName("_1abc")); - ASSERT_TRUE(IsValidName("a")); - ASSERT_TRUE(IsValidName("A")); - ASSERT_TRUE(IsValidName("abcDEFGz123_233000_")); + ASSERT_TRUE(IsValidName("a")); + ASSERT_TRUE(IsValidName("A")); + ASSERT_TRUE(IsValidName("abcDEFGz123_233000_")); - ASSERT_FALSE(IsValidName("abcDEFGz123_233\1bac")); - ASSERT_FALSE(IsValidName("a~`!@#$%^&*()_=+")); - ASSERT_FALSE(IsValidName("a[{;:'\",<>/?\"'}]")); + ASSERT_FALSE(IsValidName("abcDEFGz123_233\1bac")); + ASSERT_FALSE(IsValidName("a~`!@#$%^&*()_=+")); + ASSERT_FALSE(IsValidName("a[{;:'\",<>/?\"'}]")); } TEST(StringUtilTest, IsValidCfName) { - ASSERT_TRUE(IsValidColumnFamilyName("")); - ASSERT_TRUE(IsValidColumnFamilyName(std::string(64 * 1024 - 1, 'a'))); - ASSERT_FALSE(IsValidColumnFamilyName(std::string(64 * 1024, 'a'))); + ASSERT_TRUE(IsValidColumnFamilyName("")); + ASSERT_TRUE(IsValidColumnFamilyName(std::string(64 * 1024 - 1, 'a'))); + ASSERT_FALSE(IsValidColumnFamilyName(std::string(64 * 1024, 'a'))); - ASSERT_TRUE(IsValidColumnFamilyName("1")); - ASSERT_TRUE(IsValidColumnFamilyName("cf0")); - ASSERT_TRUE(IsValidColumnFamilyName("_1234567890-abcdefghijklmnopqrstuvwxyz:.")); + ASSERT_TRUE(IsValidColumnFamilyName("1")); + ASSERT_TRUE(IsValidColumnFamilyName("cf0")); + ASSERT_TRUE(IsValidColumnFamilyName("_1234567890-abcdefghijklmnopqrstuvwxyz:.")); - ASSERT_FALSE(IsValidColumnFamilyName("cf0\1")); - ASSERT_FALSE(IsValidColumnFamilyName("cf0\2")); + ASSERT_FALSE(IsValidColumnFamilyName("cf0\1")); + ASSERT_FALSE(IsValidColumnFamilyName("cf0\2")); } TEST(StringUtilTest, RoundNumberToNDecimalPlaces) { - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, -1), "(null)"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 10), "(null)"); - - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 0), "33"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 1), "33.0"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 2), "33.00"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 9), "33.000000000"); - - ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 0), "123456790"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 1), "123456790.0"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 2), "123456789.99"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 6), "123456789.987654"); - - ASSERT_EQ(RoundNumberToNDecimalPlaces(0, 6), "0.000000"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(0.1, 6), "0.100000"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(0.01, 6), "0.010000"); - ASSERT_EQ(RoundNumberToNDecimalPlaces(0.000012345678, 6), "0.000012"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, -1), "(null)"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 10), "(null)"); + + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 0), "33"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 1), "33.0"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 2), "33.00"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(33, 9), "33.000000000"); + + ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 0), "123456790"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 1), "123456790.0"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 2), "123456789.99"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(123456789.987654321, 6), "123456789.987654"); + + ASSERT_EQ(RoundNumberToNDecimalPlaces(0, 6), "0.000000"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(0.1, 6), "0.100000"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(0.01, 6), "0.010000"); + ASSERT_EQ(RoundNumberToNDecimalPlaces(0.000012345678, 6), "0.000012"); } TEST(EditDistance, AllCase) { - ASSERT_EQ(EditDistance("", ""), 0); - ASSERT_EQ(EditDistance("", "a"), 1); - ASSERT_EQ(EditDistance("a", ""), 1); - ASSERT_EQ(EditDistance("ab", ""), 2); - ASSERT_EQ(EditDistance("", "ab"), 2); - - ASSERT_EQ(EditDistance("a", "a"), 0); - ASSERT_EQ(EditDistance("a", "b"), 1); - - ASSERT_EQ(EditDistance("ax", "axy"), 1); // insertion - ASSERT_EQ(EditDistance("ax", "a"), 1); // removal - ASSERT_EQ(EditDistance("ax", "ay"), 1); // substitution - - ASSERT_EQ(EditDistance("showschema", "show_schema"), 1); - ASSERT_EQ(EditDistance("showschema", "showscheama"), 1); - ASSERT_EQ(EditDistance("branch", "branc"), 1); - ASSERT_EQ(EditDistance("update", "udpate"), 2); - - ASSERT_EQ(EditDistance("aaa", "bbb"), 3); - ASSERT_EQ(EditDistance("aaa", "baa"), 1); - ASSERT_EQ(EditDistance("abb", "acc"), 2); - ASSERT_EQ(EditDistance("abc", "op"), 3); - ASSERT_EQ(EditDistance("abc", "rstuvw"), 6); + ASSERT_EQ(EditDistance("", ""), 0); + ASSERT_EQ(EditDistance("", "a"), 1); + ASSERT_EQ(EditDistance("a", ""), 1); + ASSERT_EQ(EditDistance("ab", ""), 2); + ASSERT_EQ(EditDistance("", "ab"), 2); + + ASSERT_EQ(EditDistance("a", "a"), 0); + ASSERT_EQ(EditDistance("a", "b"), 1); + + ASSERT_EQ(EditDistance("ax", "axy"), 1); // insertion + ASSERT_EQ(EditDistance("ax", "a"), 1); // removal + ASSERT_EQ(EditDistance("ax", "ay"), 1); // substitution + + ASSERT_EQ(EditDistance("showschema", "show_schema"), 1); + ASSERT_EQ(EditDistance("showschema", "showscheama"), 1); + ASSERT_EQ(EditDistance("branch", "branc"), 1); + ASSERT_EQ(EditDistance("update", "udpate"), 2); + + ASSERT_EQ(EditDistance("aaa", "bbb"), 3); + ASSERT_EQ(EditDistance("aaa", "baa"), 1); + ASSERT_EQ(EditDistance("abb", "acc"), 2); + ASSERT_EQ(EditDistance("abc", "op"), 3); + ASSERT_EQ(EditDistance("abc", "rstuvw"), 6); } - } int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/utils/test/tprinter_test.cc b/src/utils/test/tprinter_test.cc index d67d2deee..5f61aa4ea 100644 --- a/src/utils/test/tprinter_test.cc +++ b/src/utils/test/tprinter_test.cc @@ -11,98 +11,97 @@ namespace tera { class TPrinterTest : public ::testing::Test, public TPrinter { -public: - TPrinterTest() - : TPrinter(3, "No.", "year", "avg") { - } - ~TPrinterTest() {} + public: + TPrinterTest() : TPrinter(3, "No.", "year", "avg") {} + ~TPrinterTest() {} }; TEST_F(TPrinterTest, ParseColType) { - string item, name; - CellType type; - item = "hello"; + string item, name; + CellType type; + item = "hello"; - EXPECT_TRUE(TPrinter::ParseColType(item, &name, &type)); - VLOG(5) << name << " " << type; - EXPECT_EQ(name, "hello"); - EXPECT_EQ(type, INT); + EXPECT_TRUE(TPrinter::ParseColType(item, &name, &type)); + VLOG(5) << name << " " << type; + EXPECT_EQ(name, "hello"); + EXPECT_EQ(type, INT); - item = "hello"; - EXPECT_FALSE(TPrinter::ParseColType(item, &name, &type)); + item = "hello"; + EXPECT_FALSE(TPrinter::ParseColType(item, &name, &type)); } TEST_F(TPrinterTest, NumToStr) { - int64_t i = 100; - ASSERT_EQ("100", NumToStr(i)); - ASSERT_EQ("0", NumToStr(0)); - ASSERT_EQ("10", NumToStr(10)); - ASSERT_EQ("10K", NumToStr(10000)); - ASSERT_EQ("10P", NumToStr(10000000000000000ll)); - - ASSERT_EQ("12.34K", NumToStr(12344)); - ASSERT_EQ("10.11P", NumToStr(10110000000000000ll)); - - ASSERT_EQ("1", NumToStr(1.0)); - ASSERT_EQ("1.23", NumToStr(1.23)); - ASSERT_EQ("1.20", NumToStr(1.2)); + int64_t i = 100; + ASSERT_EQ("100", NumToStr(i)); + ASSERT_EQ("0", NumToStr(0)); + ASSERT_EQ("10", NumToStr(10)); + ASSERT_EQ("10K", NumToStr(10000)); + ASSERT_EQ("10P", NumToStr(10000000000000000ll)); + + ASSERT_EQ("12.34K", NumToStr(12344)); + ASSERT_EQ("10.11P", NumToStr(10110000000000000ll)); + + ASSERT_EQ("1", NumToStr(1.0)); + ASSERT_EQ("1.23", NumToStr(1.23)); + ASSERT_EQ("1.20", NumToStr(1.2)); } TEST_F(TPrinterTest, AddRow) { - // test varargs row - ASSERT_TRUE(AddRow(3, "1", 2013, 1.234)); - ASSERT_TRUE(AddRow(3, "2", 2014, 500.0)); - ASSERT_EQ(2, (int)body_.size()); - ASSERT_EQ(3, (int)body_[0].size()); - ASSERT_EQ(body_[0][0].type, STRING); - ASSERT_EQ(body_[0][1].type, INT); - ASSERT_EQ(body_[0][1].value.i, 2013); - ASSERT_EQ(3, (int)body_[1].size()); - ASSERT_EQ(body_[1][2].type, DOUBLE); - ASSERT_EQ(body_[1][2].value.d, 500); - - ASSERT_FALSE(AddRow(4, "2", 2014, 500.0)); - ASSERT_FALSE(AddRow(1, "2", 2014, 500.0)); - - // test int vector row - std::vector vi(3, 9); - ASSERT_TRUE(AddRow(vi)); - ASSERT_EQ(body_[2][0].type, INT); - ASSERT_EQ(body_[2][1].type, INT); - ASSERT_EQ(body_[2][2].value.i, 9); - vi.resize(1); - ASSERT_FALSE(AddRow(vi)); - - // test string vector row - std::vector vs(3, "hello"); - ASSERT_TRUE(AddRow(vs)); - ASSERT_EQ(body_[3][0].type, STRING); - ASSERT_EQ(body_[3][1].type, STRING); - ASSERT_EQ(*body_[3][2].value.s, "hello"); - vs.resize(5); - ASSERT_FALSE(AddRow(vs)); + // test varargs row + ASSERT_TRUE(AddRow(3, "1", 2013, 1.234)); + ASSERT_TRUE(AddRow(3, "2", 2014, 500.0)); + ASSERT_EQ(2, (int)body_.size()); + ASSERT_EQ(3, (int)body_[0].size()); + ASSERT_EQ(body_[0][0].type, STRING); + ASSERT_EQ(body_[0][1].type, INT); + ASSERT_EQ(body_[0][1].value.i, 2013); + ASSERT_EQ(3, (int)body_[1].size()); + ASSERT_EQ(body_[1][2].type, DOUBLE); + ASSERT_EQ(body_[1][2].value.d, 500); + + ASSERT_FALSE(AddRow(4, "2", 2014, 500.0)); + ASSERT_FALSE(AddRow(1, "2", 2014, 500.0)); + + // test int vector row + std::vector vi(3, 9); + ASSERT_TRUE(AddRow(vi)); + ASSERT_EQ(body_[2][0].type, INT); + ASSERT_EQ(body_[2][1].type, INT); + ASSERT_EQ(body_[2][2].value.i, 9); + vi.resize(1); + ASSERT_FALSE(AddRow(vi)); + + // test string vector row + std::vector vs(3, "hello"); + ASSERT_TRUE(AddRow(vs)); + ASSERT_EQ(body_[3][0].type, STRING); + ASSERT_EQ(body_[3][1].type, STRING); + ASSERT_EQ(*body_[3][2].value.s, "hello"); + vs.resize(5); + ASSERT_FALSE(AddRow(vs)); } TEST_F(TPrinterTest, New) { - ASSERT_EQ(3, (int)head_.size()); - ASSERT_EQ(STRING, head_[0].second); - ASSERT_EQ(INT, head_[1].second); - ASSERT_EQ(DOUBLE, head_[2].second); + ASSERT_EQ(3, (int)head_.size()); + ASSERT_EQ(STRING, head_[0].second); + ASSERT_EQ(INT, head_[1].second); + ASSERT_EQ(DOUBLE, head_[2].second); } TEST_F(TPrinterTest, ToString) { - ASSERT_TRUE(AddRow(3, "1", 2013, 1.234)); - ASSERT_TRUE(AddRow(3, "2", 2014, 500)); + ASSERT_TRUE(AddRow(3, "1", 2013, 1.234)); + ASSERT_TRUE(AddRow(3, "2", 2014, 500)); - string outstr = ToString(); - LOG(ERROR) << outstr.size() << std::endl << outstr; + string outstr = ToString(); + LOG(ERROR) << outstr.size() << std::endl + << outstr; } -} // namespace tera +} // namespace tera int main(int argc, char** argv) { - ::google::ParseCommandLineFlags(&argc, &argv, true); - ::google::InitGoogleLogging(argv[0]); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/src/utils/tprinter.cc b/src/utils/tprinter.cc index e3e3467f8..678989cd5 100644 --- a/src/utils/tprinter.cc +++ b/src/utils/tprinter.cc @@ -16,249 +16,241 @@ namespace tera { -TPrinter::TPrinter() : cols_(0), col_width_(cols_) { -} +TPrinter::TPrinter() : cols_(0), col_width_(cols_) {} TPrinter::TPrinter(int cols, ...) : cols_(cols), col_width_(cols_) { - assert (cols > 0); - va_list args; - va_start(args, cols); - for (int i = 0; i < cols; ++i) { - string item = va_arg(args, char*); - string name; - CellType type; - if (!ParseColType(item, &name, &type)) { - name = item; - type = STRING; - } - head_.push_back(std::make_pair(name, type)); - col_width_[i] = name.size(); + assert(cols > 0); + va_list args; + va_start(args, cols); + for (int i = 0; i < cols; ++i) { + string item = va_arg(args, char*); + string name; + CellType type; + if (!ParseColType(item, &name, &type)) { + name = item; + type = STRING; } - va_end(args); + head_.push_back(std::make_pair(name, type)); + col_width_[i] = name.size(); + } + va_end(args); } -TPrinter::~TPrinter() { -} +TPrinter::~TPrinter() {} bool TPrinter::AddRow(int cols, ...) { - if (cols != cols_) { - return false; - } - Line line; - va_list args; - va_start(args, cols); - for (int i = 0; i < cols; ++i) { - switch (head_[i].second) { - case INT: - line.push_back(Cell((int64_t)va_arg(args, int64_t), INT)); - break; - case DOUBLE: - line.push_back(Cell((double)va_arg(args, double), DOUBLE)); - break; - case STRING: - line.push_back(Cell((char*)va_arg(args, char*), STRING)); - break; - default: - abort(); - } + if (cols != cols_) { + return false; + } + Line line; + va_list args; + va_start(args, cols); + for (int i = 0; i < cols; ++i) { + switch (head_[i].second) { + case INT: + line.push_back(Cell((int64_t)va_arg(args, int64_t), INT)); + break; + case DOUBLE: + line.push_back(Cell((double)va_arg(args, double), DOUBLE)); + break; + case STRING: + line.push_back(Cell((char*)va_arg(args, char*), STRING)); + break; + default: + abort(); } - va_end(args); - FormatOneLine(line, NULL); // modify column width - body_.push_back(line); - return true; + } + va_end(args); + FormatOneLine(line, NULL); // modify column width + body_.push_back(line); + return true; } bool TPrinter::AddRow(const std::vector& row) { - if ((int)row.size() != cols_) { - return false; - } - Line line; - for (int i = 0; i < cols_; ++i) { - line.push_back(Cell(row[i], STRING)); - } - FormatOneLine(line, NULL); // modify column width only - body_.push_back(line); - return true; + if ((int)row.size() != cols_) { + return false; + } + Line line; + for (int i = 0; i < cols_; ++i) { + line.push_back(Cell(row[i], STRING)); + } + FormatOneLine(line, NULL); // modify column width only + body_.push_back(line); + return true; } bool TPrinter::AddRow(const std::vector& row) { - if ((int)row.size() != cols_) { - return false; - } - Line line; - for (int i = 0; i < cols_; ++i) { - line.push_back(Cell(row[i], INT)); - } - FormatOneLine(line, NULL); // modify column width only - body_.push_back(line); - return true; + if ((int)row.size() != cols_) { + return false; + } + Line line; + for (int i = 0; i < cols_; ++i) { + line.push_back(Cell(row[i], INT)); + } + FormatOneLine(line, NULL); // modify column width only + body_.push_back(line); + return true; } -void TPrinter::Print(const PrintOpt& opt) { - std::cout << ToString(opt); -} +void TPrinter::Print(const PrintOpt& opt) { std::cout << ToString(opt); } string TPrinter::ToString(const PrintOpt& opt) { - std::ostringstream ostr; - if (head_.size() < 1) { - return ""; + std::ostringstream ostr; + if (head_.size() < 1) { + return ""; + } + if (opt.print_head) { + int line_len = 0; + for (int i = 0; i < cols_; ++i) { + line_len += 2 + col_width_[i]; + ostr << " " << std::setfill(' ') << std::setw(col_width_[i]) + << std::setiosflags(std::ios::left) << head_[i].first; + } + ostr << std::endl; + for (int i = 0; i < line_len + 2; ++i) { + ostr << "-"; } - if (opt.print_head) { - int line_len = 0; - for (int i = 0; i < cols_; ++i) { - line_len += 2 + col_width_[i]; - ostr << " " << std::setfill(' ') - << std::setw(col_width_[i]) - << std::setiosflags(std::ios::left) - << head_[i].first; - } - ostr << std::endl; - for (int i = 0; i < line_len + 2; ++i) { - ostr << "-"; - } - ostr << std::endl; + ostr << std::endl; + } + bool first_line = true; + for (size_t i = 0; i < body_.size(); ++i) { + std::vector line; + FormatOneLine(body_[i], &line); + if (first_line) { + first_line = false; + } else { + ostr << std::endl; } - bool first_line = true; - for (size_t i = 0; i < body_.size(); ++i) { - std::vector line; - FormatOneLine(body_[i], &line); - if (first_line) { - first_line = false; - } else { - ostr << std::endl; - } - for (int j = 0; j < cols_; ++j) { - ostr << " " << std::setfill(' ') - << std::setw(col_width_[j]) - << std::setiosflags(std::ios::left) - << line[j]; - } + for (int j = 0; j < cols_; ++j) { + ostr << " " << std::setfill(' ') << std::setw(col_width_[j]) + << std::setiosflags(std::ios::left) << line[j]; } - return ostr.str(); + } + return ostr.str(); } void TPrinter::Reset(int cols, ...) { - assert (cols > 0); - cols_ = cols; - col_width_.resize(cols_, 0); - head_.clear(); - body_.clear(); + assert(cols > 0); + cols_ = cols; + col_width_.resize(cols_, 0); + head_.clear(); + body_.clear(); - va_list args; - va_start(args, cols); - for (int i = 0; i < cols; ++i) { - string item = va_arg(args, char*); - string name; - CellType type; - if (!ParseColType(item, &name, &type)) { - name = item; - type = STRING; - } - head_.push_back(std::make_pair(name, type)); - col_width_[i] = name.size(); + va_list args; + va_start(args, cols); + for (int i = 0; i < cols; ++i) { + string item = va_arg(args, char*); + string name; + CellType type; + if (!ParseColType(item, &name, &type)) { + name = item; + type = STRING; } - va_end(args); + head_.push_back(std::make_pair(name, type)); + col_width_[i] = name.size(); + } + va_end(args); } void TPrinter::Reset(const std::vector& row) { - assert (row.size() > 0); - cols_ = row.size(); - col_width_.resize(cols_, 0); - head_.clear(); - body_.clear(); + assert(row.size() > 0); + cols_ = row.size(); + col_width_.resize(cols_, 0); + head_.clear(); + body_.clear(); - for (int i = 0; i < cols_; ++i) { - head_.push_back(std::make_pair(row[i], STRING)); - col_width_[i] = row[i].size(); - } + for (int i = 0; i < cols_; ++i) { + head_.push_back(std::make_pair(row[i], STRING)); + col_width_[i] = row[i].size(); + } } bool TPrinter::ParseColType(const string& item, string* name, CellType* type) { - string::size_type pos1; - pos1 = item.find('<'); - if (pos1 == string::npos) { - return false; - } - if (item[item.size() - 1] != '>') { - return false; - } - string type_str = item.substr(pos1 + 1, item.size() - pos1 - 2); - if (type_str == "int") { - *type = INT; - } else if (type_str == "double") { - *type = DOUBLE; - } else if (type_str == "string") { - *type = STRING; - } else { - return false; - } - *name = item.substr(0, pos1); - return true; + string::size_type pos1; + pos1 = item.find('<'); + if (pos1 == string::npos) { + return false; + } + if (item[item.size() - 1] != '>') { + return false; + } + string type_str = item.substr(pos1 + 1, item.size() - pos1 - 2); + if (type_str == "int") { + *type = INT; + } else if (type_str == "double") { + *type = DOUBLE; + } else if (type_str == "string") { + *type = STRING; + } else { + return false; + } + *name = item.substr(0, pos1); + return true; } void TPrinter::FormatOneLine(Line& ori, std::vector* dst) { - if (dst) { - dst->clear(); + if (dst) { + dst->clear(); + } + for (size_t i = 0; i < ori.size(); ++i) { + string str = ori[i].ToString(); + if (col_width_[i] < str.size()) { + col_width_[i] = str.size(); } - for (size_t i = 0; i < ori.size(); ++i) { - string str = ori[i].ToString(); - if (col_width_[i] < str.size()) { - col_width_[i] = str.size(); - } - if (dst) { - dst->push_back(str); - } + if (dst) { + dst->push_back(str); } + } } string TPrinter::NumToStr(const double num) { - const int64_t kKB = 1000; - const int64_t kMB = kKB * 1000; - const int64_t kGB = kMB * 1000; - const int64_t kTB = kGB * 1000; - const int64_t kPB = kTB * 1000; + const int64_t kKB = 1000; + const int64_t kMB = kKB * 1000; + const int64_t kGB = kMB * 1000; + const int64_t kTB = kGB * 1000; + const int64_t kPB = kTB * 1000; - string unit; - double res; - if (num > kPB) { - res = (1.0 * num) / kPB; - unit = "P"; - } else if (num > kTB) { - res = (1.0 * num) / kTB; - unit = "T"; - } else if (num > kGB) { - res = (1.0 * num) / kGB; - unit = "G"; - } else if (num > kMB) { - res = (1.0 * num) / kMB; - unit = "M"; - } else if (num > kKB) { - res = (1.0 * num) / kKB; - unit = "K"; - } else { - res = num; - unit = ""; - } - const int buflen = 16; - char buf[buflen]; - if ((int)res - res == 0) { - snprintf(buf, buflen, "%d%s", (int)res, unit.c_str()); - } else { - snprintf(buf, buflen, "%.2f%s", res, unit.c_str()); - } - return string(buf); + string unit; + double res; + if (num > kPB) { + res = (1.0 * num) / kPB; + unit = "P"; + } else if (num > kTB) { + res = (1.0 * num) / kTB; + unit = "T"; + } else if (num > kGB) { + res = (1.0 * num) / kGB; + unit = "G"; + } else if (num > kMB) { + res = (1.0 * num) / kMB; + unit = "M"; + } else if (num > kKB) { + res = (1.0 * num) / kKB; + unit = "K"; + } else { + res = num; + unit = ""; + } + const int buflen = 16; + char buf[buflen]; + if ((int)res - res == 0) { + snprintf(buf, buflen, "%d%s", (int)res, unit.c_str()); + } else { + snprintf(buf, buflen, "%.2f%s", res, unit.c_str()); + } + return string(buf); } string TPrinter::Cell::ToString() { - switch (type) { + switch (type) { case INT: - return NumToStr(value.i); + return NumToStr(value.i); case DOUBLE: - return NumToStr(value.d); + return NumToStr(value.d); case STRING: - return *value.s; + return *value.s; default: - abort(); - } + abort(); + } } -} // namespace tera +} // namespace tera diff --git a/src/utils/tprinter.h b/src/utils/tprinter.h index b98792b1f..9c554379e 100644 --- a/src/utils/tprinter.h +++ b/src/utils/tprinter.h @@ -4,8 +4,8 @@ // // Author: Xu Peilin (xupeilin@baidu.com) -#ifndef TERA_UTILS_T_PRINTER_H_ -#define TERA_UTILS_T_PRINTER_H_ +#ifndef TERA_UTILS_T_PRINTER_H_ +#define TERA_UTILS_T_PRINTER_H_ #include @@ -19,78 +19,85 @@ using std::string; namespace tera { class TPrinter { -public: - struct PrintOpt { - public: - bool print_head; // if print table header - - // >0 for positive order, <0 for reverse order, 0 for not sort - int sort_dir; - int sort_col; // select column num for sorting - - PrintOpt() : print_head(true), sort_dir(0), sort_col(0) {} - }; - - TPrinter(); - TPrinter(int cols, ...); - ~TPrinter(); - - bool AddRow(int cols, ...); - bool AddRow(const std::vector& row); - bool AddRow(const std::vector& row); - - void Print(const PrintOpt& opt = PrintOpt()); - - string ToString(const PrintOpt& opt = PrintOpt()); - - void Reset(int cols, ...); - void Reset(const std::vector& head); - -private: - enum CellType { - INT, - DOUBLE, - STRING - }; - struct Cell { - CellType type; - union { - int64_t i; - double d; - string* s; - } value; - - string ToString(); - - Cell (int64_t v, CellType t) { value.i = v; type = t; } - Cell (double v, CellType t) { value.d = v; type = t; } - Cell (const string& v, CellType t) { value.s = new string(v); type = t; } - Cell (const Cell& ref) { *this = ref; } - ~Cell () { if (type == STRING) delete value.s; } - Cell& operator=(const Cell& ref) { - type = ref.type; - if (type == STRING && this != &ref) { - value.s = new string(*ref.value.s); - } else { - value = ref.value; - } - return *this; - } - }; - typedef std::vector Line; - - // column format: "name" - // e.g. "name", "money", "speed" - bool ParseColType(const string& item, string* name, CellType* type); - void FormatOneLine(Line& ori, std::vector* dst); - static string NumToStr(const double num); - -private: - int cols_; - std::vector > head_; - std::vector body_; - std::vector col_width_; + public: + struct PrintOpt { + public: + bool print_head; // if print table header + + // >0 for positive order, <0 for reverse order, 0 for not sort + int sort_dir; + int sort_col; // select column num for sorting + + PrintOpt() : print_head(true), sort_dir(0), sort_col(0) {} + }; + + TPrinter(); + TPrinter(int cols, ...); + ~TPrinter(); + + bool AddRow(int cols, ...); + bool AddRow(const std::vector& row); + bool AddRow(const std::vector& row); + + void Print(const PrintOpt& opt = PrintOpt()); + + string ToString(const PrintOpt& opt = PrintOpt()); + + void Reset(int cols, ...); + void Reset(const std::vector& head); + + private: + enum CellType { INT, DOUBLE, STRING }; + struct Cell { + CellType type; + union { + int64_t i; + double d; + string* s; + } value; + + string ToString(); + + Cell(int64_t v, CellType t) { + value.i = v; + type = t; + } + Cell(double v, CellType t) { + value.d = v; + type = t; + } + Cell(const string& v, CellType t) { + value.s = new string(v); + type = t; + } + Cell(const Cell& ref) { *this = ref; } + ~Cell() { + if (type == STRING) delete value.s; + } + Cell& operator=(const Cell& ref) { + type = ref.type; + if (type == STRING && this != &ref) { + value.s = new string(*ref.value.s); + } else { + value = ref.value; + } + return *this; + } + }; + typedef std::vector Line; + + // column format: "name" + // e.g. "name", "money", "speed" + bool ParseColType(const string& item, string* name, CellType* type); + void FormatOneLine(Line& ori, std::vector* dst); + static string NumToStr(const double num); + + private: + int cols_; + std::vector > head_; + std::vector body_; + std::vector col_width_; }; -} // namespace tera -#endif // TERA_UTILS_T_PRINTER_H_ +} // namespace tera +#endif // TERA_UTILS_T_PRINTER_H_ diff --git a/src/utils/utils_cmd.cc b/src/utils/utils_cmd.cc index 7c25bd47c..f66a25d6a 100644 --- a/src/utils/utils_cmd.cc +++ b/src/utils/utils_cmd.cc @@ -24,138 +24,139 @@ namespace tera { namespace utils { std::string GetBinaryLocationDir() { - char exec_full_path[1024]; - int32_t path_size = readlink("/proc/self/exe", exec_full_path, 1024 - 1); - CHECK(path_size > 0); - exec_full_path[path_size] = '\0'; - VLOG(5) << "current binary location: " << exec_full_path; - - std::string full_dir; - SplitStringPath(exec_full_path, &full_dir, NULL); - return full_dir; + char exec_full_path[1024]; + int32_t path_size = readlink("/proc/self/exe", exec_full_path, 1024 - 1); + CHECK(path_size > 0); + exec_full_path[path_size] = '\0'; + VLOG(5) << "current binary location: " << exec_full_path; + + std::string full_dir; + SplitStringPath(exec_full_path, &full_dir, NULL); + return full_dir; } std::string GetCurrentLocationDir() { - char current_path[1024] = {'\0'}; - std::string current_dir; + char current_path[1024] = {'\0'}; + std::string current_dir; - if (getcwd(current_path, 1024)) { - current_dir = current_path; - } - return current_dir; + if (getcwd(current_path, 1024)) { + current_dir = current_path; + } + return current_dir; } std::string GetValueFromEnv(const std::string& env_name) { - if (env_name.empty()) { - return ""; - } - - const char* env = getenv(env_name.c_str()); - if (!env) { - VLOG(5) << "fail to fetch from env: " << env_name; - return ""; - } - return env; + if (env_name.empty()) { + return ""; + } + + const char* env = getenv(env_name.c_str()); + if (!env) { + VLOG(5) << "fail to fetch from env: " << env_name; + return ""; + } + return env; } std::string ConvertByteToString(const uint64_t size) { - std::string hight_unit; - double min_size; - const uint64_t kKB = 1 << 10; - const uint64_t kMB = kKB << 10; - const uint64_t kGB = kMB << 10; - const uint64_t kTB = kGB << 10; - const uint64_t kPB = kTB << 10; - - if (size == 0) { - return "0"; - } - - if (size > kPB) { - min_size = (1.0 * size) / kPB; - hight_unit = "P"; - } else if (size > kTB) { - min_size = (1.0 * size) / kTB; - hight_unit = "T"; - } else if (size > kGB) { - min_size = (1.0 * size) / kGB; - hight_unit = "G"; - } else if (size > kMB) { - min_size = (1.0 * size) / kMB; - hight_unit = "M"; - } else if (size > kKB) { - min_size = (1.0 * size) / kKB; - hight_unit = "K"; - } else { - min_size = size; - hight_unit = ""; - } - - if ((int)min_size - min_size == 0) { - return StringFormat("%d%s", (int)min_size, hight_unit.c_str()); - } else { - return StringFormat("%.2f%s", min_size, hight_unit.c_str()); - } + std::string hight_unit; + double min_size; + const uint64_t kKB = 1 << 10; + const uint64_t kMB = kKB << 10; + const uint64_t kGB = kMB << 10; + const uint64_t kTB = kGB << 10; + const uint64_t kPB = kTB << 10; + + if (size == 0) { + return "0"; + } + + if (size > kPB) { + min_size = (1.0 * size) / kPB; + hight_unit = "P"; + } else if (size > kTB) { + min_size = (1.0 * size) / kTB; + hight_unit = "T"; + } else if (size > kGB) { + min_size = (1.0 * size) / kGB; + hight_unit = "G"; + } else if (size > kMB) { + min_size = (1.0 * size) / kMB; + hight_unit = "M"; + } else if (size > kKB) { + min_size = (1.0 * size) / kKB; + hight_unit = "K"; + } else { + min_size = size; + hight_unit = ""; + } + + if ((int)min_size - min_size == 0) { + return StringFormat("%d%s", (int)min_size, hight_unit.c_str()); + } else { + return StringFormat("%.2f%s", min_size, hight_unit.c_str()); + } } -bool ExecuteShellCmd(const std::string cmd, std::string* ret_str) { - char output_buffer[80]; - FILE *fp = popen(cmd.c_str(), "r"); - if (!fp) { - LOG(ERROR) << "fail to execute cmd: " << cmd; - return false; - } - fgets(output_buffer, sizeof(output_buffer), fp); - pclose(fp); - if (ret_str) { - *ret_str = std::string(output_buffer); - } - return true; +bool ExecuteShellCmd(const std::string& cmd, std::string* ret_str) { + char output_buffer[80]; + FILE* fp = popen(cmd.c_str(), "r"); + if (!fp) { + LOG(ERROR) << "fail to execute cmd: " << cmd; + return false; + } + fgets(output_buffer, sizeof(output_buffer), fp); + pclose(fp); + if (ret_str) { + *ret_str = std::string(output_buffer); + } + return true; } std::string GetLocalHostAddr() { - std::string cmd = - "/sbin/ifconfig | grep 'inet addr:'| grep -v '127.0.0.1' | cut -d: -f2 | awk '{ print $1}'"; - std::string addr; - if (!ExecuteShellCmd(cmd, &addr)) { - LOG(ERROR) << "fail to fetch local host addr"; - } else if (addr.length() > 1) { - addr.erase(addr.length() - 1, 1); - } - return addr; + std::string cmd = + "/sbin/ifconfig | grep 'inet addr:'| grep -v '127.0.0.1' | cut -d: -f2 | " + "awk '{ print $1}'"; + std::string addr; + if (!ExecuteShellCmd(cmd, &addr)) { + LOG(ERROR) << "fail to fetch local host addr"; + } else if (addr.length() > 1) { + addr.erase(addr.length() - 1, 1); + } + return addr; } std::string GetLocalHostName() { - char str[kMaxHostNameSize + 1]; - if (0 != gethostname(str, kMaxHostNameSize + 1)) { - LOG(FATAL) << "gethostname fail"; - exit(1); - } - std::string hostname(str); - return hostname; + char str[kMaxHostNameSize + 1]; + if (0 != gethostname(str, kMaxHostNameSize + 1)) { + LOG(FATAL) << "gethostname fail"; + exit(1); + } + std::string hostname(str); + return hostname; } void SetupLog(const std::string& name) { - // log info/warning/error/fatal to tera.log - // log warning/error/fatal to tera.wf - - std::string program_name = "tera"; - if (!name.empty()) { - program_name = name; - } - - std::string log_filename = FLAGS_log_dir + "/" + program_name + ".INFO."; - std::string wf_filename = FLAGS_log_dir + "/" + program_name + ".WARNING."; - google::SetLogDestination(google::INFO, log_filename.c_str()); - google::SetLogDestination(google::WARNING, wf_filename.c_str()); - google::SetLogDestination(google::ERROR, ""); - google::SetLogDestination(google::FATAL, ""); - - google::SetLogSymlink(google::INFO, program_name.c_str()); - google::SetLogSymlink(google::WARNING, program_name.c_str()); - google::SetLogSymlink(google::ERROR, ""); - google::SetLogSymlink(google::FATAL, ""); + // log info/warning/error/fatal to tera.log + // log warning/error/fatal to tera.wf + + std::string program_name = "tera"; + if (!name.empty()) { + program_name = name; + } + + std::string log_filename = FLAGS_log_dir + "/" + program_name + ".INFO."; + std::string wf_filename = FLAGS_log_dir + "/" + program_name + ".WARNING."; + google::SetLogDestination(google::INFO, log_filename.c_str()); + google::SetLogDestination(google::WARNING, wf_filename.c_str()); + google::SetLogDestination(google::ERROR, ""); + google::SetLogDestination(google::FATAL, ""); + + google::SetLogSymlink(google::INFO, program_name.c_str()); + google::SetLogSymlink(google::WARNING, program_name.c_str()); + google::SetLogSymlink(google::ERROR, ""); + google::SetLogSymlink(google::FATAL, ""); } -} // namespace utils -} // namespace tera +} // namespace utils +} // namespace tera diff --git a/src/utils/utils_cmd.h b/src/utils/utils_cmd.h index 9fa85e766..522bfc995 100644 --- a/src/utils/utils_cmd.h +++ b/src/utils/utils_cmd.h @@ -25,12 +25,11 @@ std::string GetLocalHostAddr(); std::string GetLocalHostName(); -bool ExecuteShellCmd(const std::string cmd, - std::string* ret_str = NULL); +bool ExecuteShellCmd(const std::string& cmd, std::string* ret_str = NULL); void SetupLog(const std::string& program_name); -} // namespace utils -} // namespace tera +} // namespace utils +} // namespace tera -#endif // TERA_UTILS_UTILS_CMD_H_ +#endif // TERA_UTILS_UTILS_CMD_H_ diff --git a/src/version.h b/src/version.h index 789f02611..6daebbc66 100644 --- a/src/version.h +++ b/src/version.h @@ -14,4 +14,4 @@ extern const char kCompiler[]; void PrintSystemVersion(); std::string SystemVersionInfo(); -#endif // TERA_VERSION_H_ +#endif // TERA_VERSION_H_ diff --git a/src/zk/dummy_zk_adapter.h b/src/zk/dummy_zk_adapter.h index fea520651..1979f69b5 100644 --- a/src/zk/dummy_zk_adapter.h +++ b/src/zk/dummy_zk_adapter.h @@ -10,23 +10,21 @@ namespace tera { namespace zk { -class DummyNodeZkAdapter: public ZooKeeperAdapter { -public: - virtual ~DummyNodeZkAdapter() {} +class DummyNodeZkAdapter : public ZooKeeperAdapter { + public: + virtual ~DummyNodeZkAdapter() {} -protected: - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) {} - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) {} - virtual void OnNodeCreated(const std::string& path) {} - virtual void OnNodeDeleted(const std::string& path) {} - virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} - virtual void OnSessionTimeout() {} + protected: + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) {} + virtual void OnSessionTimeout() {} }; -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera -#endif // TERA_ZK_DUMMY_ZK_ADAPTER_H_ +#endif // TERA_ZK_DUMMY_ZK_ADAPTER_H_ diff --git a/src/zk/zk_adapter.cc b/src/zk/zk_adapter.cc old mode 100755 new mode 100644 index d1398f253..7dd1f4d9f --- a/src/zk/zk_adapter.cc +++ b/src/zk/zk_adapter.cc @@ -22,1336 +22,1307 @@ FILE* ZooKeeperAdapter::lib_log_output_ = NULL; Mutex ZooKeeperAdapter::lib_log_mutex_; struct ZooKeeperWatch { - pthread_mutex_t mutex; - bool watch_value; - bool watch_exist; - bool watch_child; - - ZooKeeperWatch() - : watch_value(false), watch_exist(false), watch_child(false) { - pthread_mutex_init(&mutex, NULL); - } + pthread_mutex_t mutex; + bool watch_value; + bool watch_exist; + bool watch_child; - ~ZooKeeperWatch() { - pthread_mutex_destroy(&mutex); - } + ZooKeeperWatch() : watch_value(false), watch_exist(false), watch_child(false) { + pthread_mutex_init(&mutex, NULL); + } + + ~ZooKeeperWatch() { pthread_mutex_destroy(&mutex); } }; ZooKeeperAdapter::ZooKeeperAdapter() - : handle_(NULL), state_(ZS_DISCONN), session_id_(-1), - state_cond_(&state_mutex_), session_timeout_(0), session_timer_id_(0), - thread_pool_(1) { -} - -ZooKeeperAdapter::~ZooKeeperAdapter() { - Finalize(); -} - -bool ZooKeeperAdapter::Init(const std::string& server_list, - const std::string& root_path, - uint32_t session_timeout, - const std::string& id, - int* zk_errno, + : handle_(NULL), + state_(ZS_DISCONN), + session_id_(-1), + state_cond_(&state_mutex_), + session_timeout_(0), + session_timer_id_(0), + thread_pool_(1) {} + +ZooKeeperAdapter::~ZooKeeperAdapter() { Finalize(); } + +bool ZooKeeperAdapter::Init(const std::string& server_list, const std::string& root_path, + uint32_t session_timeout, const std::string& id, int* zk_errno, int wait_timeout) { - MutexLock mutex(&state_mutex_); - - if (NULL != handle_) { - SetZkAdapterCode(ZE_INITED, zk_errno); - return false; - } + MutexLock mutex(&state_mutex_); - server_list_ = server_list; - root_path_ = root_path; - if (root_path_.size() < 1) { - LOG(ERROR) << "zookeeper_init fail : invalid "; - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (root_path_[root_path_.size() - 1] == '/') { - root_path_.resize(root_path_.size() - 1); - } - id_ = id; - handle_ = zookeeper_init((server_list_ + root_path_).c_str(), - EventCallBack, session_timeout, NULL, this, 0); - if (NULL == handle_) { - LOG(ERROR) << "zookeeper_init fail : " << zerror(errno); - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; - } - - while (state_ == ZS_DISCONN || state_ == ZS_CONNECTING) { - if (wait_timeout > 0) { - state_cond_.TimeWait(wait_timeout); - break; - } else { - state_cond_.Wait(); - } - } - - int code = ZE_OK; - // succe - if (state_ == ZS_CONNECTED) { - pthread_rwlock_init(&watcher_lock_, NULL); - pthread_rwlock_init(&locks_lock_, NULL); + if (NULL != handle_) { + SetZkAdapterCode(ZE_INITED, zk_errno); + return false; + } - LOG(INFO) << "zookeeper_init success"; - SetZkAdapterCode(code, zk_errno); - return true; - } + server_list_ = server_list; + root_path_ = root_path; + if (root_path_.size() < 1) { + LOG(ERROR) << "zookeeper_init fail : invalid "; + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (root_path_[root_path_.size() - 1] == '/') { + root_path_.resize(root_path_.size() - 1); + } + id_ = id; + handle_ = zookeeper_init((server_list_ + root_path_).c_str(), EventCallBack, session_timeout, + NULL, this, 0); + if (NULL == handle_) { + LOG(ERROR) << "zookeeper_init fail : " << zerror(errno); + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; + } - // fail - if (state_ == ZS_TIMEOUT) { - code = ZE_SESSION; - } else if (state_ == ZS_AUTH) { - code = ZE_AUTH; + while (state_ == ZS_DISCONN || state_ == ZS_CONNECTING) { + if (wait_timeout > 0) { + state_cond_.TimeWait(wait_timeout); + break; } else { - code = ZE_UNKNOWN; + state_cond_.Wait(); } - zookeeper_close(handle_); - handle_ = NULL; - state_ = ZS_DISCONN; + } + + int code = ZE_OK; + // succe + if (state_ == ZS_CONNECTED) { + pthread_rwlock_init(&watcher_lock_, NULL); + pthread_rwlock_init(&locks_lock_, NULL); - LOG(ERROR) << "zookeeper_init fail : " << ZkErrnoToString(code); + LOG(INFO) << "zookeeper_init success"; SetZkAdapterCode(code, zk_errno); - return false; + return true; + } + + // fail + if (state_ == ZS_TIMEOUT) { + code = ZE_SESSION; + } else if (state_ == ZS_AUTH) { + code = ZE_AUTH; + } else if (state_ == ZS_DISCONN || state_ == ZS_CONNECTING) { + code = ZE_NOT_INIT; + } else { + code = ZE_UNKNOWN; + } + zookeeper_close(handle_); + handle_ = NULL; + state_ = ZS_DISCONN; + + LOG(ERROR) << "zookeeper_init fail : " << ZkErrnoToString(code); + SetZkAdapterCode(code, zk_errno); + return false; } void ZooKeeperAdapter::Finalize() { - zhandle_t* old_handle; - { - MutexLock mutex(&state_mutex_); - if (NULL == handle_) { - return; - } - old_handle = handle_; - handle_ = NULL; - } - int ret = zookeeper_close(old_handle); - if (ret == ZOK) { - LOG(INFO) << "zookeeper_close success"; - } else { - LOG(ERROR) << "zookeeper_close fail : " << zerror(ret); + zhandle_t* old_handle; + { + MutexLock mutex(&state_mutex_); + if (NULL == handle_) { + return; } - { - MutexLock mutex(&state_mutex_); - pthread_rwlock_destroy(&locks_lock_); - pthread_rwlock_destroy(&watcher_lock_); - locks_.clear(); - watchers_.clear(); - state_ = ZS_DISCONN; - if (!thread_pool_.CancelTask(session_timer_id_)) { - LOG(WARNING) << "session timeout timer is triggered"; - return; - } - session_timer_id_ = 0; - LOG(INFO) << "zookeeper_session_timeout_timer has gone, safe to finalize."; + old_handle = handle_; + handle_ = NULL; + } + int ret = zookeeper_close(old_handle); + if (ret == ZOK) { + LOG(INFO) << "zookeeper_close success"; + } else { + LOG(ERROR) << "zookeeper_close fail : " << zerror(ret); + } + { + MutexLock mutex(&state_mutex_); + pthread_rwlock_destroy(&locks_lock_); + pthread_rwlock_destroy(&watcher_lock_); + locks_.clear(); + watchers_.clear(); + state_ = ZS_DISCONN; + if (session_timer_id_ != 0 && !thread_pool_.CancelTask(session_timer_id_)) { + LOG(WARNING) << "session timeout timer is triggered"; + return; } + session_timer_id_ = 0; + LOG(INFO) << "zookeeper_session_timeout_timer has gone, safe to finalize."; + } } -bool ZooKeeperAdapter::CreatePersistentNode(const std::string& path, - const std::string& value, +bool ZooKeeperAdapter::CreatePersistentNode(const std::string& path, const std::string& value, int* zk_errno) { - MutexLock mutex(&state_mutex_); - return Create(path, value, 0, NULL, zk_errno); + MutexLock mutex(&state_mutex_); + return Create(path, value, 0, NULL, zk_errno); } -bool ZooKeeperAdapter::CreateEphemeralNode(const std::string& path, - const std::string& value, +bool ZooKeeperAdapter::CreateEphemeralNode(const std::string& path, const std::string& value, int* zk_errno) { - MutexLock mutex(&state_mutex_); - return Create(path, value, ZOO_EPHEMERAL, NULL, zk_errno); + MutexLock mutex(&state_mutex_); + return Create(path, value, ZOO_EPHEMERAL, NULL, zk_errno); } bool ZooKeeperAdapter::CreateSequentialEphemeralNode(const std::string& path, const std::string& value, - std::string* ret_path, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - return Create(path, value, ZOO_EPHEMERAL | ZOO_SEQUENCE, ret_path, zk_errno); + std::string* ret_path, int* zk_errno) { + MutexLock mutex(&state_mutex_); + return Create(path, value, ZOO_EPHEMERAL | ZOO_SEQUENCE, ret_path, zk_errno); } -bool ZooKeeperAdapter::Create(const std::string& path, const std::string& value, - int flag, std::string* ret_path, int* zk_errno) { - state_mutex_.AssertHeld(); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - int value_len = value.size(); - if (value_len == 0) { - value_len = -1; - } - - size_t root_path_len = root_path_.size(); - size_t path_len = path.size(); - char * ret_path_buf = NULL; - size_t ret_path_size = 0; - if (ret_path != NULL) { - ret_path_size = root_path_len + path_len + 11; - ret_path_buf = new char[ret_path_size]; - } - - int ret = zoo_create(handle_, path.c_str(), value.c_str(), value_len, - &ZOO_OPEN_ACL_UNSAFE, flag, ret_path_buf, - ret_path_size); - if (ZOK == ret) { - if (NULL != ret_path) { - size_t ret_path_len = strlen(ret_path_buf); - if (((flag & ZOO_SEQUENCE) == ZOO_SEQUENCE && - ret_path_len == root_path_len + path_len + 10) || - ((flag & ZOO_SEQUENCE) != ZOO_SEQUENCE && - ret_path_len == root_path_len + path_len)) { - // compatible to zk 3.3.x - *ret_path = ret_path_buf + root_path_len; - } else { - *ret_path = ret_path_buf; - } - } - LOG(INFO) << "zoo_create success"; - } else { - LOG(WARNING) << "zoo_create fail : " << zerror(ret); - } - - if (NULL != ret_path_buf) { - delete[] ret_path_buf; - } - - switch (ret) { - case ZOK: - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - case ZNONODE: - SetZkAdapterCode(ZE_NO_PARENT, zk_errno); - return false; - case ZNODEEXISTS: - SetZkAdapterCode(ZE_EXIST, zk_errno); - return false; - case ZNOAUTH: - SetZkAdapterCode(ZE_AUTH, zk_errno); - return false; - case ZNOCHILDRENFOREPHEMERALS: - SetZkAdapterCode(ZE_ENTITY_PARENT, zk_errno); - return false; - case ZBADARGUMENTS: - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - case ZINVALIDSTATE: - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; - case ZMARSHALLINGERROR: - SetZkAdapterCode(ZE_SYSTEM, zk_errno); - return false; - default: - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - } +bool ZooKeeperAdapter::Create(const std::string& path, const std::string& value, int flag, + std::string* ret_path, int* zk_errno) { + state_mutex_.AssertHeld(); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + int value_len = value.size(); + if (value_len == 0) { + value_len = -1; + } + + size_t root_path_len = root_path_.size(); + size_t path_len = path.size(); + char* ret_path_buf = NULL; + size_t ret_path_size = 0; + if (ret_path != NULL) { + ret_path_size = root_path_len + path_len + 11; + ret_path_buf = new char[ret_path_size]; + } + + int ret = zoo_create(handle_, path.c_str(), value.c_str(), value_len, &ZOO_OPEN_ACL_UNSAFE, flag, + ret_path_buf, ret_path_size); + if (ZOK == ret) { + if (NULL != ret_path) { + size_t ret_path_len = strlen(ret_path_buf); + if (((flag & ZOO_SEQUENCE) == ZOO_SEQUENCE && + ret_path_len == root_path_len + path_len + 10) || + ((flag & ZOO_SEQUENCE) != ZOO_SEQUENCE && ret_path_len == root_path_len + path_len)) { + // compatible to zk 3.3.x + *ret_path = ret_path_buf + root_path_len; + } else { + *ret_path = ret_path_buf; + } + } + LOG(INFO) << "zoo_create success"; + } else { + LOG(WARNING) << "zoo_create fail : " << zerror(ret); + } + + if (NULL != ret_path_buf) { + delete[] ret_path_buf; + } + + switch (ret) { + case ZOK: + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + case ZNONODE: + SetZkAdapterCode(ZE_NO_PARENT, zk_errno); + return false; + case ZNODEEXISTS: + SetZkAdapterCode(ZE_EXIST, zk_errno); + return false; + case ZNOAUTH: + SetZkAdapterCode(ZE_AUTH, zk_errno); + return false; + case ZNOCHILDRENFOREPHEMERALS: + SetZkAdapterCode(ZE_ENTITY_PARENT, zk_errno); + return false; + case ZBADARGUMENTS: + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + case ZINVALIDSTATE: + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; + case ZMARSHALLINGERROR: + SetZkAdapterCode(ZE_SYSTEM, zk_errno); + return false; + default: + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + } } bool ZooKeeperAdapter::DeleteNode(const std::string& path, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - int ret = zoo_delete(handle_, path.c_str(), -1); - if (ZOK == ret) { - LOG(INFO) << "zoo_delete success"; - } else { - LOG(WARNING) << "zoo_delete fail : " << zerror(ret); - } - - switch (ret) { - case ZOK: - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - case ZNONODE: - SetZkAdapterCode(ZE_NOT_EXIST, zk_errno); - return false; - case ZNOAUTH: - SetZkAdapterCode(ZE_AUTH, zk_errno); - return false; - case ZBADVERSION: // impossible - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - case ZNOTEMPTY: - SetZkAdapterCode(ZE_HAS_CHILD, zk_errno); - return false; - case ZBADARGUMENTS: - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - case ZINVALIDSTATE: - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; - case ZMARSHALLINGERROR: - SetZkAdapterCode(ZE_SYSTEM, zk_errno); - return false; - default: - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - } + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + int ret = zoo_delete(handle_, path.c_str(), -1); + if (ZOK == ret) { + LOG(INFO) << "zoo_delete success"; + } else { + LOG(WARNING) << "zoo_delete fail : " << zerror(ret); + } + + switch (ret) { + case ZOK: + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + case ZNONODE: + SetZkAdapterCode(ZE_NOT_EXIST, zk_errno); + return false; + case ZNOAUTH: + SetZkAdapterCode(ZE_AUTH, zk_errno); + return false; + case ZBADVERSION: // impossible + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + case ZNOTEMPTY: + SetZkAdapterCode(ZE_HAS_CHILD, zk_errno); + return false; + case ZBADARGUMENTS: + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + case ZINVALIDSTATE: + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; + case ZMARSHALLINGERROR: + SetZkAdapterCode(ZE_SYSTEM, zk_errno); + return false; + default: + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + } } -bool ZooKeeperAdapter::ReadNode(const std::string& path, std::string* value, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } +bool ZooKeeperAdapter::ReadNode(const std::string& path, std::string* value, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - int ret = GetWrapper(path, false, value); - SetZkAdapterCode(ret, zk_errno); - return (ZE_OK == ret); + int ret = GetWrapper(path, false, value); + SetZkAdapterCode(ret, zk_errno); + return (ZE_OK == ret); } -bool ZooKeeperAdapter::ReadAndWatchNode(const std::string& path, - std::string* value, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - pthread_rwlock_wrlock(&watcher_lock_); - std::pair insert_ret = watchers_.insert( - std::pair(path, NULL)); - struct ZooKeeperWatch*& watch = insert_ret.first->second; - if (NULL == watch) { - watch = new ZooKeeperWatch; - } - pthread_mutex_lock(&watch->mutex); - pthread_rwlock_unlock(&watcher_lock_); - - bool is_watch = false; - if (!watch->watch_value) { - is_watch = true; - } else { - pthread_mutex_unlock(&watch->mutex); - LOG(INFO) << "watch has been set before"; - } - - int ret = GetWrapper(path, is_watch, value); - if (ZE_OK == ret) { - if (is_watch) { - watch->watch_value = true; - pthread_mutex_unlock(&watch->mutex); - } - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } else { - if (is_watch) { - pthread_mutex_unlock(&watch->mutex); - } - SetZkAdapterCode(ret, zk_errno); - return false; +bool ZooKeeperAdapter::ReadAndWatchNode(const std::string& path, std::string* value, + int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + pthread_rwlock_wrlock(&watcher_lock_); + std::pair insert_ret = + watchers_.insert(std::pair(path, NULL)); + struct ZooKeeperWatch*& watch = insert_ret.first->second; + if (NULL == watch) { + watch = new ZooKeeperWatch; + } + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + + bool is_watch = false; + if (!watch->watch_value) { + is_watch = true; + } else { + pthread_mutex_unlock(&watch->mutex); + LOG(INFO) << "watch has been set before"; + } + + int ret = GetWrapper(path, is_watch, value); + if (ZE_OK == ret) { + if (is_watch) { + watch->watch_value = true; + pthread_mutex_unlock(&watch->mutex); + } + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + } else { + if (is_watch) { + pthread_mutex_unlock(&watch->mutex); } + SetZkAdapterCode(ret, zk_errno); + return false; + } } -bool ZooKeeperAdapter::ListChildren(const std::string& path, - std::vector* child_list, - std::vector* value_list, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } +bool ZooKeeperAdapter::ListChildren(const std::string& path, std::vector* child_list, + std::vector* value_list, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - int ret = GetChildrenWrapper(path, false, child_list, value_list); - SetZkAdapterCode(ret, zk_errno); - return (ZE_OK == ret); + int ret = GetChildrenWrapper(path, false, child_list, value_list); + SetZkAdapterCode(ret, zk_errno); + return (ZE_OK == ret); } bool ZooKeeperAdapter::ListAndWatchChildren(const std::string& path, std::vector* child_list, - std::vector* value_list, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - pthread_rwlock_wrlock(&watcher_lock_); - std::pair insert_ret = watchers_.insert( - std::pair(path, NULL)); - struct ZooKeeperWatch*& watch = insert_ret.first->second; - if (NULL == watch) { - watch = new ZooKeeperWatch; - } - pthread_mutex_lock(&watch->mutex); - pthread_rwlock_unlock(&watcher_lock_); - - bool is_watch = false; - if (!watch->watch_child) { - is_watch = true; - } else { - pthread_mutex_unlock(&watch->mutex); - LOG(INFO)<< "is_watch has been set before"; - } - - int ret = GetChildrenWrapper(path, is_watch, child_list, value_list); - if (ZE_OK == ret) { - if (is_watch) { - watch->watch_child = true; - pthread_mutex_unlock(&watch->mutex); - } - SetZkAdapterCode(ret, zk_errno); - return true; - } else { - if (is_watch) { - pthread_mutex_unlock(&watch->mutex); - } - SetZkAdapterCode(ret, zk_errno); - return false; - } -} - -bool ZooKeeperAdapter::CheckExist(const std::string& path, bool* is_exist, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; + std::vector* value_list, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + pthread_rwlock_wrlock(&watcher_lock_); + std::pair insert_ret = + watchers_.insert(std::pair(path, NULL)); + struct ZooKeeperWatch*& watch = insert_ret.first->second; + if (NULL == watch) { + watch = new ZooKeeperWatch; + } + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + + bool is_watch = false; + if (!watch->watch_child) { + is_watch = true; + } else { + pthread_mutex_unlock(&watch->mutex); + LOG(INFO) << "is_watch has been set before"; + } + + int ret = GetChildrenWrapper(path, is_watch, child_list, value_list); + if (ZE_OK == ret) { + if (is_watch) { + watch->watch_child = true; + pthread_mutex_unlock(&watch->mutex); } - - int ret = ExistsWrapper(path, false, is_exist); SetZkAdapterCode(ret, zk_errno); - return (ZE_OK == ret); -} - -bool ZooKeeperAdapter::CheckAndWatchExist(const std::string& path, bool* is_exist, - int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - bool is_watch = false; - pthread_rwlock_wrlock(&watcher_lock_); - std::pair insert_ret = watchers_.insert( - std::pair(path, NULL)); - struct ZooKeeperWatch*& watch = insert_ret.first->second; - if (NULL == watch) { - watch = new ZooKeeperWatch; - } - pthread_mutex_lock(&watch->mutex); - pthread_rwlock_unlock(&watcher_lock_); - if (!watch->watch_exist) { - is_watch = true; - } else { - pthread_mutex_unlock(&watch->mutex); - LOG(INFO) << "is_watch has been set before"; - } - int ret = ExistsWrapper(path, is_watch, is_exist); - if (ZE_OK == ret) { - if (is_watch) { - watch->watch_exist = true; - pthread_mutex_unlock(&watch->mutex); - } - } else { - if (is_watch) { - pthread_mutex_unlock(&watch->mutex); - } + return true; + } else { + if (is_watch) { + pthread_mutex_unlock(&watch->mutex); } SetZkAdapterCode(ret, zk_errno); - return (ZE_OK == ret); + return false; + } } +bool ZooKeeperAdapter::CheckExist(const std::string& path, bool* is_exist, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } -bool ZooKeeperAdapter::CheckAndWatchExistForLock(const std::string& path, - bool* is_exist, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - int ret = ExistsWrapperForLock(path, is_exist); - SetZkAdapterCode(ret, zk_errno); - return (ZE_OK == ret); + int ret = ExistsWrapper(path, false, is_exist); + SetZkAdapterCode(ret, zk_errno); + return (ZE_OK == ret); } -bool ZooKeeperAdapter::WriteNode(const std::string& path, - const std::string& value, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - - int ret = zoo_set(handle_, path.c_str(), value.c_str(), value.size(), -1); - if (ZOK == ret) { - LOG(INFO) << "zoo_set success"; - } else { - LOG(WARNING) << "zoo_set fail : " << zerror(ret); - } - - switch (ret) { - case ZOK: - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - case ZNONODE: - SetZkAdapterCode(ZE_NOT_EXIST, zk_errno); - return false; - case ZNOAUTH: - SetZkAdapterCode(ZE_AUTH, zk_errno); - return false; - case ZBADVERSION: // impossible - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - case ZBADARGUMENTS: - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - case ZINVALIDSTATE: - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; - case ZMARSHALLINGERROR: - SetZkAdapterCode(ZE_SYSTEM, zk_errno); - return false; - default: - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - } +bool ZooKeeperAdapter::CheckAndWatchExist(const std::string& path, bool* is_exist, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + bool is_watch = false; + pthread_rwlock_wrlock(&watcher_lock_); + std::pair insert_ret = + watchers_.insert(std::pair(path, NULL)); + struct ZooKeeperWatch*& watch = insert_ret.first->second; + if (NULL == watch) { + watch = new ZooKeeperWatch; + } + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + if (!watch->watch_exist) { + is_watch = true; + } else { + pthread_mutex_unlock(&watch->mutex); + LOG(INFO) << "is_watch has been set before"; + } + int ret = ExistsWrapper(path, is_watch, is_exist); + if (ZE_OK == ret) { + if (is_watch) { + watch->watch_exist = true; + pthread_mutex_unlock(&watch->mutex); + } + } else { + if (is_watch) { + pthread_mutex_unlock(&watch->mutex); + } + } + SetZkAdapterCode(ret, zk_errno); + return (ZE_OK == ret); } -void ZooKeeperAdapter::EventCallBack(zhandle_t * zh, int type, int state, - const char * node_path, void * watch_ctx) { - VLOG(5) << "recv event: type=" << ZooTypeToString(type) << ", state=" - << ZooStateToString(state) << ", path=[" << node_path << "]"; - - if (NULL == watch_ctx) { - return; - } - ZooKeeperAdapter* zk_adapter = (ZooKeeperAdapter*)watch_ctx; - - MutexLock mutex(&zk_adapter->state_mutex_); - if (zh != zk_adapter->handle_) { - LOG(WARNING)<< "zhandle not match"; - return; - } - // handle_ is guaranteed (by zk lib) to be valid within callback func. - // no need to check it. - - if (ZOO_SESSION_EVENT == type) { - zk_adapter->SessionEventCallBack(state); - return; - } +bool ZooKeeperAdapter::CheckAndWatchExistForLock(const std::string& path, bool* is_exist, + int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - if (NULL == node_path) { - LOG(WARNING) << "path is missing"; - return; - } + int ret = ExistsWrapperForLock(path, is_exist); + SetZkAdapterCode(ret, zk_errno); + return (ZE_OK == ret); +} - std::string path = node_path; - if (!ZooKeeperUtil::IsValidPath(path)) { - LOG(WARNING) << "path is invalid"; - return; - } +bool ZooKeeperAdapter::WriteNode(const std::string& path, const std::string& value, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + int ret = zoo_set(handle_, path.c_str(), value.c_str(), value.size(), -1); + if (ZOK == ret) { + LOG(INFO) << "zoo_set success"; + } else { + LOG(WARNING) << "zoo_set fail : " << zerror(ret); + } + + switch (ret) { + case ZOK: + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + case ZNONODE: + SetZkAdapterCode(ZE_NOT_EXIST, zk_errno); + return false; + case ZNOAUTH: + SetZkAdapterCode(ZE_AUTH, zk_errno); + return false; + case ZBADVERSION: // impossible + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + case ZBADARGUMENTS: + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + case ZINVALIDSTATE: + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; + case ZMARSHALLINGERROR: + SetZkAdapterCode(ZE_SYSTEM, zk_errno); + return false; + default: + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + } +} - if (ZOO_CREATED_EVENT == type) { - zk_adapter->CreateEventCallBack(path); - } else if (ZOO_DELETED_EVENT == type) { - zk_adapter->DeleteEventCallBack(path); - } else if (ZOO_CHANGED_EVENT == type) { - zk_adapter->ChangeEventCallBack(path); - } else if (ZOO_CHILD_EVENT == type) { - zk_adapter->ChildEventCallBack(path); - } else if (ZOO_NOTWATCHING_EVENT == type) { - zk_adapter->WatchLostEventCallBack(state, path); - } else { - LOG(WARNING) << "unknown event type : " << type; - } +void ZooKeeperAdapter::EventCallBack(zhandle_t* zh, int type, int state, const char* node_path, + void* watch_ctx) { + VLOG(5) << "recv event: type=" << ZooTypeToString(type) << ", state=" << ZooStateToString(state) + << ", path=[" << node_path << "]"; + + if (NULL == watch_ctx) { + return; + } + ZooKeeperAdapter* zk_adapter = (ZooKeeperAdapter*)watch_ctx; + + MutexLock mutex(&zk_adapter->state_mutex_); + if (zh != zk_adapter->handle_) { + LOG(WARNING) << "zhandle not match"; + return; + } + // handle_ is guaranteed (by zk lib) to be valid within callback func. + // no need to check it. + + if (ZOO_SESSION_EVENT == type) { + zk_adapter->SessionEventCallBack(state); + return; + } + + if (NULL == node_path) { + LOG(WARNING) << "path is missing"; + return; + } + + std::string path = node_path; + if (!ZooKeeperUtil::IsValidPath(path)) { + LOG(WARNING) << "path is invalid"; + return; + } + + if (ZOO_CREATED_EVENT == type) { + zk_adapter->CreateEventCallBack(path); + } else if (ZOO_DELETED_EVENT == type) { + zk_adapter->DeleteEventCallBack(path); + } else if (ZOO_CHANGED_EVENT == type) { + zk_adapter->ChangeEventCallBack(path); + } else if (ZOO_CHILD_EVENT == type) { + zk_adapter->ChildEventCallBack(path); + } else if (ZOO_NOTWATCHING_EVENT == type) { + zk_adapter->WatchLostEventCallBack(state, path); + } else { + LOG(WARNING) << "unknown event type : " << type; + } } void ZooKeeperAdapter::CreateEventCallBack(std::string path) { - VLOG(5) << "CreateEventCallBack: path=[" << path << "]"; - - pthread_rwlock_wrlock(&watcher_lock_); - WatcherMap::iterator itor = watchers_.find(path); - if (itor == watchers_.end()) { - pthread_rwlock_unlock(&watcher_lock_); - LOG(INFO) << "watch not match"; - return; - } + VLOG(5) << "CreateEventCallBack: path=[" << path << "]"; - ZooKeeperWatch * watch = itor->second; - pthread_mutex_lock(&watch->mutex); + pthread_rwlock_wrlock(&watcher_lock_); + WatcherMap::iterator itor = watchers_.find(path); + if (itor == watchers_.end()) { pthread_rwlock_unlock(&watcher_lock_); - if (!watch->watch_exist) { - pthread_mutex_unlock(&watch->mutex); - LOG(WARNING) << "watch not match"; - return; - } - - bool is_exist; - int ret = ExistsWrapper(path, true, &is_exist); - if (ZE_OK == ret) { - pthread_mutex_unlock(&watch->mutex); - state_mutex_.Unlock(); - OnNodeCreated(path); - if (!is_exist) { - OnNodeDeleted(path); - } - state_mutex_.Lock(); - } else { - watch->watch_exist = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnWatchFailed(path, ZT_WATCH_EXIST, ret); - state_mutex_.Lock(); + LOG(INFO) << "watch not match"; + return; + } + + ZooKeeperWatch* watch = itor->second; + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + if (!watch->watch_exist) { + pthread_mutex_unlock(&watch->mutex); + LOG(WARNING) << "watch not match"; + return; + } + + bool is_exist; + int ret = ExistsWrapper(path, true, &is_exist); + if (ZE_OK == ret) { + pthread_mutex_unlock(&watch->mutex); + state_mutex_.Unlock(); + OnNodeCreated(path); + if (!is_exist) { + OnNodeDeleted(path); } + state_mutex_.Lock(); + } else { + watch->watch_exist = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnWatchFailed(path, ZT_WATCH_EXIST, ret); + state_mutex_.Lock(); + } } void ZooKeeperAdapter::DeleteEventCallBack(std::string path) { - VLOG(5) << "DeleteEventCallBack: path=[" << path << "]"; - - pthread_rwlock_wrlock(&watcher_lock_); - WatcherMap::iterator itor = watchers_.find(path); - if (itor == watchers_.end()) { - pthread_rwlock_unlock(&watcher_lock_); - LOG(INFO) << "watch not match"; - return; - } + VLOG(5) << "DeleteEventCallBack: path=[" << path << "]"; - ZooKeeperWatch * watch = itor->second; - pthread_mutex_lock(&watch->mutex); + pthread_rwlock_wrlock(&watcher_lock_); + WatcherMap::iterator itor = watchers_.find(path); + if (itor == watchers_.end()) { pthread_rwlock_unlock(&watcher_lock_); - - if (!watch->watch_exist && !watch->watch_value && !watch->watch_child) { - pthread_mutex_unlock(&watch->mutex); - LOG(WARNING) << "watch not match"; - return; + LOG(INFO) << "watch not match"; + return; + } + + ZooKeeperWatch* watch = itor->second; + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + + if (!watch->watch_exist && !watch->watch_value && !watch->watch_child) { + pthread_mutex_unlock(&watch->mutex); + LOG(WARNING) << "watch not match"; + return; + } + + bool is_watch_exist = watch->watch_exist; + bool is_exist; + int ret = ExistsWrapper(path, true, &is_exist); + if (ZE_OK == ret) { + watch->watch_value = false; + watch->watch_child = false; + pthread_mutex_unlock(&watch->mutex); + if (!is_watch_exist) { + TryCleanWatch(path); } - - bool is_watch_exist = watch->watch_exist; - bool is_exist; - int ret = ExistsWrapper(path, true, &is_exist); - if (ZE_OK == ret) { - watch->watch_value = false; - watch->watch_child = false; - pthread_mutex_unlock(&watch->mutex); - if (!is_watch_exist) { - TryCleanWatch(path); - } - state_mutex_.Unlock(); - OnNodeDeleted(path); - if (is_exist && is_watch_exist) { - OnNodeCreated(path); - } - state_mutex_.Lock(); - } else { - watch->watch_exist = false; - watch->watch_value = false; - watch->watch_child = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnNodeDeleted(path); - if (is_watch_exist) { - OnWatchFailed(path, ZT_WATCH_EXIST, ret); - } - state_mutex_.Lock(); + state_mutex_.Unlock(); + OnNodeDeleted(path); + if (is_exist && is_watch_exist) { + OnNodeCreated(path); } + state_mutex_.Lock(); + } else { + watch->watch_exist = false; + watch->watch_value = false; + watch->watch_child = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnNodeDeleted(path); + if (is_watch_exist) { + OnWatchFailed(path, ZT_WATCH_EXIST, ret); + } + state_mutex_.Lock(); + } } void ZooKeeperAdapter::ChangeEventCallBack(std::string path) { - VLOG(5) << "ChangeEventCallBack: path=[" << path << "]"; - - pthread_rwlock_wrlock(&watcher_lock_); - WatcherMap::iterator itor = watchers_.find(path); - if (itor == watchers_.end()) { - pthread_rwlock_unlock(&watcher_lock_); - LOG(INFO) << "watch not match"; - return; - } + VLOG(5) << "ChangeEventCallBack: path=[" << path << "]"; - ZooKeeperWatch * watch = itor->second; - pthread_mutex_lock(&watch->mutex); + pthread_rwlock_wrlock(&watcher_lock_); + WatcherMap::iterator itor = watchers_.find(path); + if (itor == watchers_.end()) { pthread_rwlock_unlock(&watcher_lock_); - - if (!watch->watch_value) { - pthread_mutex_unlock(&watch->mutex); - LOG(WARNING) << "watch not match"; - return; - } - - std::string value; - int ret = GetWrapper(path, true, &value); - if (ZE_OK == ret) { - pthread_mutex_unlock(&watch->mutex); - state_mutex_.Unlock(); - OnNodeValueChanged(path, value); - state_mutex_.Lock(); - } else if (ZE_NOT_EXIST == ret) { - watch->watch_value = false; - watch->watch_child = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnNodeDeleted(path); - state_mutex_.Lock(); - } else { - watch->watch_value = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnWatchFailed(path, ZT_WATCH_VALUE, ret); - state_mutex_.Lock(); - } + LOG(INFO) << "watch not match"; + return; + } + + ZooKeeperWatch* watch = itor->second; + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + + if (!watch->watch_value) { + pthread_mutex_unlock(&watch->mutex); + LOG(WARNING) << "watch not match"; + return; + } + + std::string value; + int ret = GetWrapper(path, true, &value); + if (ZE_OK == ret) { + pthread_mutex_unlock(&watch->mutex); + state_mutex_.Unlock(); + OnNodeValueChanged(path, value); + state_mutex_.Lock(); + } else if (ZE_NOT_EXIST == ret) { + watch->watch_value = false; + watch->watch_child = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnNodeDeleted(path); + state_mutex_.Lock(); + } else { + watch->watch_value = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnWatchFailed(path, ZT_WATCH_VALUE, ret); + state_mutex_.Lock(); + } } void ZooKeeperAdapter::ChildEventCallBack(std::string path) { - VLOG(5) << "ChildEventCallBack: path=[" << path << "]"; - - pthread_rwlock_wrlock(&watcher_lock_); - WatcherMap::iterator itor = watchers_.find(path); - if (itor == watchers_.end()) { - pthread_rwlock_unlock(&watcher_lock_); - LOG(INFO) << "watch not match"; - return; - } + VLOG(5) << "ChildEventCallBack: path=[" << path << "]"; - ZooKeeperWatch * watch = itor->second; - pthread_mutex_lock(&watch->mutex); + pthread_rwlock_wrlock(&watcher_lock_); + WatcherMap::iterator itor = watchers_.find(path); + if (itor == watchers_.end()) { pthread_rwlock_unlock(&watcher_lock_); - - if (!watch->watch_child) { - pthread_mutex_unlock(&watch->mutex); - LOG(WARNING) << "watch not match"; - return; - } - - std::vector child_list; - std::vector value_list; - int ret = GetChildrenWrapper(path, true, &child_list, &value_list); - if (ZE_OK == ret) { - pthread_mutex_unlock(&watch->mutex); - state_mutex_.Unlock(); - OnChildrenChanged(path, child_list, value_list); - state_mutex_.Lock(); - } else if (ZE_NOT_EXIST == ret) { - watch->watch_child = false; - watch->watch_value = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnNodeDeleted(path); - state_mutex_.Lock(); - } else { - watch->watch_child = false; - pthread_mutex_unlock(&watch->mutex); - TryCleanWatch(path); - state_mutex_.Unlock(); - OnWatchFailed(path, ZT_WATCH_CHILD, ret); - state_mutex_.Lock(); - } + LOG(INFO) << "watch not match"; + return; + } + + ZooKeeperWatch* watch = itor->second; + pthread_mutex_lock(&watch->mutex); + pthread_rwlock_unlock(&watcher_lock_); + + if (!watch->watch_child) { + pthread_mutex_unlock(&watch->mutex); + LOG(WARNING) << "watch not match"; + return; + } + + std::vector child_list; + std::vector value_list; + int ret = GetChildrenWrapper(path, true, &child_list, &value_list); + if (ZE_OK == ret) { + pthread_mutex_unlock(&watch->mutex); + state_mutex_.Unlock(); + OnChildrenChanged(path, child_list, value_list); + state_mutex_.Lock(); + } else if (ZE_NOT_EXIST == ret) { + watch->watch_child = false; + watch->watch_value = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnNodeDeleted(path); + state_mutex_.Lock(); + } else { + watch->watch_child = false; + pthread_mutex_unlock(&watch->mutex); + TryCleanWatch(path); + state_mutex_.Unlock(); + OnWatchFailed(path, ZT_WATCH_CHILD, ret); + state_mutex_.Lock(); + } } void ZooKeeperAdapter::SessionTimeoutWrapper() { - this->OnSessionTimeout(); - MutexLock mutex(&state_mutex_); - session_timer_id_ = 0; + session_timer_id_ = 0; + this->OnSessionTimeout(); + MutexLock mutex(&state_mutex_); } void ZooKeeperAdapter::SessionEventCallBack(int state) { - if (ZOO_CONNECTED_STATE == state) { - if (ZS_CONNECTING == state_) { - if (!thread_pool_.CancelTask(session_timer_id_)) { - LOG(WARNING) << "session timeout timer is triggered"; - return; - } - session_timer_id_ = 0; - } - const clientid_t *cid = zoo_client_id(handle_); - if (cid == NULL) { - LOG(WARNING) << "zoo_client_id fail"; - return; - } - session_id_ = cid->client_id; - state_ = ZS_CONNECTED; - state_cond_.Signal(); - session_timeout_ = zoo_recv_timeout(handle_); - LOG(INFO) << "connected to zk server, session timeout: " - << session_timeout_ << " ms"; - } else if (ZOO_CONNECTING_STATE == state || ZOO_ASSOCIATING_STATE == state) { - if (ZS_CONNECTED == state_) { - LOG(INFO) << "disconnect from zk server, enable timer: " - << session_timeout_ << " ms"; - ThreadPool::Task task = - std::bind(&ZooKeeperAdapter::SessionTimeoutWrapper, this); - session_timer_id_ = thread_pool_.DelayTask(session_timeout_, task); - } - session_id_ = -1; - state_ = ZS_CONNECTING; - state_cond_.Signal(); - } else if (ZOO_AUTH_FAILED_STATE == state) { - session_id_ = -1; - state_ = ZS_AUTH; - state_cond_.Signal(); - } else if (ZOO_EXPIRED_SESSION_STATE == state) { - session_id_ = -1; - state_ = ZS_TIMEOUT; - state_cond_.Signal(); - state_mutex_.Unlock(); - OnSessionTimeout(); - state_mutex_.Lock(); - } + if (ZOO_CONNECTED_STATE == state) { + if (ZS_CONNECTING == state_) { + if (!thread_pool_.CancelTask(session_timer_id_)) { + LOG(WARNING) << "session timeout timer is triggered"; + return; + } + session_timer_id_ = 0; + } + const clientid_t* cid = zoo_client_id(handle_); + if (cid == NULL) { + LOG(WARNING) << "zoo_client_id fail"; + return; + } + session_id_ = cid->client_id; + state_ = ZS_CONNECTED; + state_cond_.Signal(); + session_timeout_ = zoo_recv_timeout(handle_); + LOG(INFO) << "connected to zk server, session timeout: " << session_timeout_ << " ms"; + } else if (ZOO_CONNECTING_STATE == state || ZOO_ASSOCIATING_STATE == state) { + if (ZS_CONNECTED == state_) { + LOG(INFO) << "disconnect from zk server, enable timer: " << session_timeout_ << " ms"; + ThreadPool::Task task = std::bind(&ZooKeeperAdapter::SessionTimeoutWrapper, this); + session_timer_id_ = thread_pool_.DelayTask(session_timeout_, task); + } + session_id_ = -1; + state_ = ZS_CONNECTING; + state_cond_.Signal(); + } else if (ZOO_AUTH_FAILED_STATE == state) { + session_id_ = -1; + state_ = ZS_AUTH; + state_cond_.Signal(); + } else if (ZOO_EXPIRED_SESSION_STATE == state) { + session_id_ = -1; + state_ = ZS_TIMEOUT; + state_cond_.Signal(); + state_mutex_.Unlock(); + OnSessionTimeout(); + state_mutex_.Lock(); + } } void ZooKeeperAdapter::WatchLostEventCallBack(int state, std::string path) { - // shit... + // shit... } -bool ZooKeeperAdapter::WatchZkLock(const std::string &path, int* zk_errno) { - LOG(INFO) << "watch zk lock, path = " << path; - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } +bool ZooKeeperAdapter::WatchZkLock(const std::string& path, int* zk_errno) { + LOG(INFO) << "watch zk lock, path = " << path; + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - pthread_rwlock_wrlock(&locks_lock_); - LockMap::iterator itor = locks_.find(path); - if (itor == locks_.end()) { - pthread_rwlock_unlock(&locks_lock_); - LOG(WARNING) << "lock not exist"; - SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); - return false; - } + pthread_rwlock_wrlock(&locks_lock_); + LockMap::iterator itor = locks_.find(path); + if (itor == locks_.end()) { + pthread_rwlock_unlock(&locks_lock_); + LOG(WARNING) << "lock not exist"; + SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); + return false; + } - ZooKeeperLock * lock = itor->second; - state_mutex_.Unlock(); - if (!lock->CheckAndWatchNodeForLock(zk_errno)) { - LOG(WARNING) << "watch master lock failed"; - state_mutex_.Lock(); - delete lock; - locks_.erase(itor); - pthread_rwlock_unlock(&locks_lock_); - return false; - } else { - state_mutex_.Lock(); - pthread_rwlock_unlock(&locks_lock_); - return true; - } + ZooKeeperLock* lock = itor->second; + state_mutex_.Unlock(); + if (!lock->CheckAndWatchNodeForLock(zk_errno)) { + LOG(WARNING) << "watch master lock failed"; + state_mutex_.Lock(); + delete lock; + locks_.erase(itor); + pthread_rwlock_unlock(&locks_lock_); + return false; + } else { + state_mutex_.Lock(); + pthread_rwlock_unlock(&locks_lock_); + return true; + } } -bool ZooKeeperAdapter::SyncLock(const std::string& path, int* zk_errno, - int32_t timeout) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } - bool ret_val; - - pthread_rwlock_wrlock(&locks_lock_); - std::pair insert_ret = locks_.insert( - std::pair(path, NULL)); - if (!insert_ret.second) { - ZooKeeperLock * lock = insert_ret.first->second; - if (lock == NULL || !lock->IsAcquired()) { - LOG(INFO) << "lock exists but is not acquired"; - } else { - LOG(INFO) << "lock has been acquired"; - } - pthread_rwlock_unlock(&locks_lock_); - SetZkAdapterCode(ZE_LOCK_EXIST, zk_errno); - return false; +bool ZooKeeperAdapter::SyncLock(const std::string& path, int* zk_errno, int32_t timeout) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + bool ret_val; + + pthread_rwlock_wrlock(&locks_lock_); + std::pair insert_ret = + locks_.insert(std::pair(path, NULL)); + if (!insert_ret.second) { + ZooKeeperLock* lock = insert_ret.first->second; + if (lock == NULL || !lock->IsAcquired()) { + LOG(INFO) << "lock exists but is not acquired"; + } else { + LOG(INFO) << "lock has been acquired"; } pthread_rwlock_unlock(&locks_lock_); + SetZkAdapterCode(ZE_LOCK_EXIST, zk_errno); + return false; + } + pthread_rwlock_unlock(&locks_lock_); - timeval start_time, end_time; - gettimeofday(&start_time, NULL); - end_time.tv_sec = start_time.tv_sec + timeout; - end_time.tv_usec = start_time.tv_usec; + timeval start_time, end_time; + gettimeofday(&start_time, NULL); + end_time.tv_sec = start_time.tv_sec + timeout; + end_time.tv_usec = start_time.tv_usec; - LockCompletion * callback_param = new LockCompletion(); - ZooKeeperLock * lock = new ZooKeeperLock(this, path, SyncLockCallback, - callback_param); - callback_param->SetLock(lock); + LockCompletion* callback_param = new LockCompletion(); + ZooKeeperLock* lock = new ZooKeeperLock(this, path, SyncLockCallback, callback_param); + callback_param->SetLock(lock); - state_mutex_.Unlock(); - if (!lock->BeginLock(zk_errno)) { - state_mutex_.Lock(); - delete callback_param; - delete lock; - pthread_rwlock_wrlock(&locks_lock_); - locks_.erase(path); - pthread_rwlock_unlock(&locks_lock_); - return false; - } + state_mutex_.Unlock(); + if (!lock->BeginLock(zk_errno)) { state_mutex_.Lock(); - + delete callback_param; + delete lock; pthread_rwlock_wrlock(&locks_lock_); - locks_[path] = lock; + locks_.erase(path); pthread_rwlock_unlock(&locks_lock_); - - timeval now_time; - gettimeofday(&now_time, NULL); - if (timeout > 0 && (now_time.tv_sec > end_time.tv_sec - || (now_time.tv_sec == end_time.tv_sec && now_time.tv_usec - > end_time.tv_usec))) { - if (lock->IsAcquired()) { - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } else { - SetZkAdapterCode(ZE_LOCK_TIMEOUT, zk_errno); - return false; - } - } - - state_mutex_.Unlock(); - if (timeout > 0) { - ret_val = callback_param->Wait(zk_errno, &end_time); + return false; + } + state_mutex_.Lock(); + + pthread_rwlock_wrlock(&locks_lock_); + locks_[path] = lock; + pthread_rwlock_unlock(&locks_lock_); + + timeval now_time; + gettimeofday(&now_time, NULL); + if (timeout > 0 && (now_time.tv_sec > end_time.tv_sec || (now_time.tv_sec == end_time.tv_sec && + now_time.tv_usec > end_time.tv_usec))) { + if (lock->IsAcquired()) { + SetZkAdapterCode(ZE_OK, zk_errno); + return true; } else { - ret_val = callback_param->Wait(zk_errno); - } - state_mutex_.Lock(); - return ret_val; + SetZkAdapterCode(ZE_LOCK_TIMEOUT, zk_errno); + return false; + } + } + + state_mutex_.Unlock(); + if (timeout > 0) { + ret_val = callback_param->Wait(zk_errno, &end_time); + } else { + ret_val = callback_param->Wait(zk_errno); + } + state_mutex_.Lock(); + return ret_val; } -bool ZooKeeperAdapter::AsyncLock(const std::string& path, - LOCK_CALLBACK callback_func, - void * callback_param, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; +bool ZooKeeperAdapter::AsyncLock(const std::string& path, LOCK_CALLBACK callback_func, + void* callback_param, int* zk_errno) { + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } + + pthread_rwlock_wrlock(&locks_lock_); + std::pair insert_ret = + locks_.insert(std::pair(path, NULL)); + if (!insert_ret.second) { + ZooKeeperLock* lock = insert_ret.first->second; + if (lock == NULL || !lock->IsAcquired()) { + LOG(INFO) << "lock exists but is not acquired"; + } else { + LOG(INFO) << "lock has been acquired"; } - + pthread_rwlock_unlock(&locks_lock_); + SetZkAdapterCode(ZE_LOCK_EXIST, zk_errno); + return false; + } + pthread_rwlock_unlock(&locks_lock_); + ZooKeeperLock* lock = new ZooKeeperLock(this, path, callback_func, callback_param); + state_mutex_.Unlock(); + if (!lock->BeginLock(zk_errno)) { + state_mutex_.Lock(); pthread_rwlock_wrlock(&locks_lock_); - std::pair insert_ret = locks_.insert( - std::pair(path, NULL)); - if (!insert_ret.second) { - ZooKeeperLock * lock = insert_ret.first->second; - if (lock == NULL || !lock->IsAcquired()) { - LOG(INFO) << "lock exists but is not acquired"; - } else { - LOG(INFO) << "lock has been acquired"; - } - pthread_rwlock_unlock(&locks_lock_); - SetZkAdapterCode(ZE_LOCK_EXIST, zk_errno); - return false; - } + locks_.erase(path); pthread_rwlock_unlock(&locks_lock_); - ZooKeeperLock * lock = new ZooKeeperLock(this, path, callback_func, - callback_param); - state_mutex_.Unlock(); - if (!lock->BeginLock(zk_errno)) { - state_mutex_.Lock(); - pthread_rwlock_wrlock(&locks_lock_); - locks_.erase(path); - pthread_rwlock_unlock(&locks_lock_); - delete lock; - return false; - } else { - state_mutex_.Lock(); - pthread_rwlock_wrlock(&locks_lock_); - locks_[path] = lock; - pthread_rwlock_unlock(&locks_lock_); - return true; - } + delete lock; + return false; + } else { + state_mutex_.Lock(); + pthread_rwlock_wrlock(&locks_lock_); + locks_[path] = lock; + pthread_rwlock_unlock(&locks_lock_); + return true; + } } -void ZooKeeperAdapter::SyncLockCallback(const std::string& path, int err, - void * param) { - LockCompletion * comp = (LockCompletion *) param; - comp->Signal(err); +void ZooKeeperAdapter::SyncLockCallback(const std::string& path, int err, void* param) { + LockCompletion* comp = (LockCompletion*)param; + comp->Signal(err); } bool ZooKeeperAdapter::CancelLock(const std::string& path, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - pthread_rwlock_wrlock(&locks_lock_); - LockMap::iterator itor = locks_.find(path); - if (itor == locks_.end()) { - pthread_rwlock_unlock(&locks_lock_); - LOG(WARNING) << "lock not exist"; - SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); - return false; - } + pthread_rwlock_wrlock(&locks_lock_); + LockMap::iterator itor = locks_.find(path); + if (itor == locks_.end()) { + pthread_rwlock_unlock(&locks_lock_); + LOG(WARNING) << "lock not exist"; + SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); + return false; + } - ZooKeeperLock * lock = itor->second; - state_mutex_.Unlock(); - if (!lock->CancelLock(zk_errno)) { - state_mutex_.Lock(); - delete lock; - locks_.erase(itor); - pthread_rwlock_unlock(&locks_lock_); - return false; - } else { - state_mutex_.Lock(); - pthread_rwlock_unlock(&locks_lock_); - return true; - } + ZooKeeperLock* lock = itor->second; + state_mutex_.Unlock(); + if (!lock->CancelLock(zk_errno)) { + state_mutex_.Lock(); + delete lock; + locks_.erase(itor); + pthread_rwlock_unlock(&locks_lock_); + return false; + } else { + state_mutex_.Lock(); + pthread_rwlock_unlock(&locks_lock_); + return true; + } } bool ZooKeeperAdapter::Unlock(const std::string& path, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (!ZooKeeperUtil::IsValidPath(path)) { - SetZkAdapterCode(ZE_ARG, zk_errno); - return false; - } - if (NULL == handle_) { - SetZkAdapterCode(ZE_NOT_INIT, zk_errno); - return false; - } + MutexLock mutex(&state_mutex_); + if (!ZooKeeperUtil::IsValidPath(path)) { + SetZkAdapterCode(ZE_ARG, zk_errno); + return false; + } + if (NULL == handle_) { + SetZkAdapterCode(ZE_NOT_INIT, zk_errno); + return false; + } - pthread_rwlock_wrlock(&locks_lock_); - LockMap::iterator itor = locks_.find(path); - if (itor == locks_.end() || itor->second == NULL) { - pthread_rwlock_unlock(&locks_lock_); - LOG(WARNING) << "lock not exist"; - SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); - return false; - } + pthread_rwlock_wrlock(&locks_lock_); + LockMap::iterator itor = locks_.find(path); + if (itor == locks_.end() || itor->second == NULL) { + pthread_rwlock_unlock(&locks_lock_); + LOG(WARNING) << "lock not exist"; + SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); + return false; + } - ZooKeeperLock * lock = itor->second; - state_mutex_.Unlock(); - if (lock->Unlock(zk_errno)) { - state_mutex_.Lock(); - delete lock; - locks_.erase(itor); - pthread_rwlock_unlock(&locks_lock_); - return true; - } else { - state_mutex_.Lock(); - pthread_rwlock_unlock(&locks_lock_); - return false; - } + ZooKeeperLock* lock = itor->second; + state_mutex_.Unlock(); + if (lock->Unlock(zk_errno)) { + state_mutex_.Lock(); + delete lock; + locks_.erase(itor); + pthread_rwlock_unlock(&locks_lock_); + return true; + } else { + state_mutex_.Lock(); + pthread_rwlock_unlock(&locks_lock_); + return false; + } } void ZooKeeperAdapter::GetId(std::string* id) { - MutexLock mutex(&state_mutex_); - *id = id_; + MutexLock mutex(&state_mutex_); + *id = id_; } void ZooKeeperAdapter::TryCleanWatch(const std::string& path) { - state_mutex_.AssertHeld(); - pthread_rwlock_wrlock(&watcher_lock_); - WatcherMap::iterator itor = watchers_.find(path); - if (itor == watchers_.end()) { - pthread_rwlock_unlock(&watcher_lock_); - return; - } - - ZooKeeperWatch * watch = itor->second; - pthread_mutex_lock(&watch->mutex); - if (!watch->watch_child && !watch->watch_exist && !watch->watch_value) { - pthread_mutex_unlock(&watch->mutex); - delete watch; - watchers_.erase(itor); - } else { - pthread_mutex_unlock(&watch->mutex); - } + state_mutex_.AssertHeld(); + pthread_rwlock_wrlock(&watcher_lock_); + WatcherMap::iterator itor = watchers_.find(path); + if (itor == watchers_.end()) { pthread_rwlock_unlock(&watcher_lock_); + return; + } + + ZooKeeperWatch* watch = itor->second; + pthread_mutex_lock(&watch->mutex); + if (!watch->watch_child && !watch->watch_exist && !watch->watch_value) { + pthread_mutex_unlock(&watch->mutex); + delete watch; + watchers_.erase(itor); + } else { + pthread_mutex_unlock(&watch->mutex); + } + pthread_rwlock_unlock(&watcher_lock_); } -void ZooKeeperAdapter::LockEventCallBack(zhandle_t * zh, int type, int state, - const char * node_path, void * watch_ctx) { - VLOG(5) << "recv lock event: type=" << ZooTypeToString(type) << ", state=" - << ZooStateToString(state) << ", path=[" << node_path << "]"; +void ZooKeeperAdapter::LockEventCallBack(zhandle_t* zh, int type, int state, const char* node_path, + void* watch_ctx) { + VLOG(5) << "recv lock event: type=" << ZooTypeToString(type) + << ", state=" << ZooStateToString(state) << ", path=[" << node_path << "]"; - if (ZOO_DELETED_EVENT != type) { - LOG(WARNING) << "only allow DELETE_EVENT for lock"; - return; - } + if (ZOO_DELETED_EVENT != type) { + LOG(WARNING) << "only allow DELETE_EVENT for lock"; + return; + } - if (NULL == watch_ctx) { - return; - } - ZooKeeperAdapter* zk_adapter = (ZooKeeperAdapter*)watch_ctx; - - { - MutexLock mutex(&zk_adapter->state_mutex_); - if (zh != zk_adapter->handle_) { - LOG(WARNING) << "zhandle not match"; - return; - } - } + if (NULL == watch_ctx) { + return; + } + ZooKeeperAdapter* zk_adapter = (ZooKeeperAdapter*)watch_ctx; - if (NULL == node_path) { - LOG(WARNING) << "path is missing"; - return; + { + MutexLock mutex(&zk_adapter->state_mutex_); + if (zh != zk_adapter->handle_) { + LOG(WARNING) << "zhandle not match"; + return; } + } - std::string path = node_path; - if (!ZooKeeperUtil::IsValidPath(path)) { - LOG(WARNING) << "path is invalid"; - return; - } + if (NULL == node_path) { + LOG(WARNING) << "path is missing"; + return; + } + + std::string path = node_path; + if (!ZooKeeperUtil::IsValidPath(path)) { + LOG(WARNING) << "path is invalid"; + return; + } - zk_adapter->LockEventCallBack(path); + zk_adapter->LockEventCallBack(path); } void ZooKeeperAdapter::LockEventCallBack(std::string path) { - VLOG(5) << "LockEventCallBack: path=[" << path << "]"; - MutexLock mutex(&state_mutex_); - - std::string lock_path; - ZooKeeperUtil::GetParentPath(path, &lock_path); + VLOG(5) << "LockEventCallBack: path=[" << path << "]"; + MutexLock mutex(&state_mutex_); - pthread_rwlock_wrlock(&locks_lock_); - LockMap::iterator itor = locks_.find(lock_path); - if (itor == locks_.end()) { - pthread_rwlock_unlock(&locks_lock_); - LOG(WARNING) << "lock [" << lock_path << "] not exist"; - return; - } - ZooKeeperLock* lock = itor->second; - if (lock == NULL) { - pthread_rwlock_unlock(&locks_lock_); - return; - } - state_mutex_.Unlock(); - if (lock->CheckSelfNodePath(path)) { - OnZkLockDeleted(); - } else { - lock->OnWatchNodeDeleted(path); - } + std::string lock_path; + ZooKeeperUtil::GetParentPath(path, &lock_path); - state_mutex_.Lock(); + pthread_rwlock_wrlock(&locks_lock_); + LockMap::iterator itor = locks_.find(lock_path); + if (itor == locks_.end()) { + pthread_rwlock_unlock(&locks_lock_); + LOG(WARNING) << "lock [" << lock_path << "] not exist"; + return; + } + ZooKeeperLock* lock = itor->second; + if (lock == NULL) { pthread_rwlock_unlock(&locks_lock_); + return; + } + state_mutex_.Unlock(); + if (lock->CheckSelfNodePath(path)) { + OnZkLockDeleted(); + } else { + lock->OnWatchNodeDeleted(path); + } + + state_mutex_.Lock(); + pthread_rwlock_unlock(&locks_lock_); } bool ZooKeeperAdapter::GetSessionId(int64_t* session_id, int* zk_errno) { - MutexLock mutex(&state_mutex_); - if (ZS_CONNECTED == state_) { - *session_id = session_id_; - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; + MutexLock mutex(&state_mutex_); + if (ZS_CONNECTED == state_) { + *session_id = session_id_; + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + } + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; } bool ZooKeeperAdapter::SetLibraryLogOutput(const std::string& file) { - MutexLock mutex(&lib_log_mutex_); - FILE* new_log = fopen(file.c_str(), "a"); - if (NULL == new_log) { - LOG(WARNING) << "fail to open file ["<< file << "]: " << strerror(errno); - return false; - } - zoo_set_log_stream(new_log); - if (NULL != lib_log_output_) { - fclose(lib_log_output_); - } - lib_log_output_ = new_log; - return true; + MutexLock mutex(&lib_log_mutex_); + FILE* new_log = fopen(file.c_str(), "a"); + if (NULL == new_log) { + LOG(WARNING) << "fail to open file [" << file << "]: " << strerror(errno); + return false; + } + zoo_set_log_stream(new_log); + if (NULL != lib_log_output_) { + fclose(lib_log_output_); + } + lib_log_output_ = new_log; + return true; } -int ZooKeeperAdapter::ExistsWrapper(const std::string& path, bool is_watch, - bool* is_exist) { - state_mutex_.AssertHeld(); - struct Stat stat; - int ret = zoo_exists(handle_, path.c_str(), is_watch, &stat); - if (ZOK == ret) { - *is_exist = true; - LOG(INFO) << "zoo_exists node [" << path << "] success"; - } else if (ZNONODE == ret) { - *is_exist = false; - LOG(INFO) << "zoo_exists node [" << path << "] not exist"; - } else { - LOG(WARNING) << "zoo_exists node [" << path << "] fail : " << zerror(ret); - } - - switch (ret) { - case ZOK: - case ZNONODE: - return ZE_OK; - case ZNOAUTH: - return ZE_AUTH; - case ZBADARGUMENTS: - return ZE_ARG; - case ZINVALIDSTATE: - return ZE_SESSION; - case ZMARSHALLINGERROR: - return ZE_SYSTEM; - default: - return ZE_UNKNOWN; - } +int ZooKeeperAdapter::ExistsWrapper(const std::string& path, bool is_watch, bool* is_exist) { + state_mutex_.AssertHeld(); + struct Stat stat; + int ret = zoo_exists(handle_, path.c_str(), is_watch, &stat); + if (ZOK == ret) { + *is_exist = true; + LOG(INFO) << "zoo_exists node [" << path << "] success"; + } else if (ZNONODE == ret) { + *is_exist = false; + LOG(INFO) << "zoo_exists node [" << path << "] not exist"; + } else { + LOG(WARNING) << "zoo_exists node [" << path << "] fail : " << zerror(ret); + } + + switch (ret) { + case ZOK: + case ZNONODE: + return ZE_OK; + case ZNOAUTH: + return ZE_AUTH; + case ZBADARGUMENTS: + return ZE_ARG; + case ZINVALIDSTATE: + return ZE_SESSION; + case ZMARSHALLINGERROR: + return ZE_SYSTEM; + default: + return ZE_UNKNOWN; + } } -int ZooKeeperAdapter::ExistsWrapperForLock(const std::string& path, - bool* is_exist) { - state_mutex_.AssertHeld(); - struct Stat stat; - int ret = zoo_wexists(handle_, path.c_str(), LockEventCallBack, this, &stat); - if (ZOK == ret) { - *is_exist = true; - LOG(INFO) << "zoo_exists node [" << path << "] success"; - } else if (ZNONODE == ret) { - *is_exist = false; - LOG(INFO) << "zoo_exists node [" << path << "] not exist"; - } else { - LOG(WARNING) << "zoo_exists node [" << path << "] fail : " << zerror(ret); - } - - switch (ret) { - case ZOK: - case ZNONODE: - return ZE_OK; - case ZNOAUTH: - return ZE_AUTH; - case ZBADARGUMENTS: - return ZE_ARG; - case ZINVALIDSTATE: - return ZE_SESSION; - case ZMARSHALLINGERROR: - return ZE_SYSTEM; - default: - return ZE_UNKNOWN; - } +int ZooKeeperAdapter::ExistsWrapperForLock(const std::string& path, bool* is_exist) { + state_mutex_.AssertHeld(); + struct Stat stat; + int ret = zoo_wexists(handle_, path.c_str(), LockEventCallBack, this, &stat); + if (ZOK == ret) { + *is_exist = true; + LOG(INFO) << "zoo_exists node [" << path << "] success"; + } else if (ZNONODE == ret) { + *is_exist = false; + LOG(INFO) << "zoo_exists node [" << path << "] not exist"; + } else { + LOG(WARNING) << "zoo_exists node [" << path << "] fail : " << zerror(ret); + } + + switch (ret) { + case ZOK: + case ZNONODE: + return ZE_OK; + case ZNOAUTH: + return ZE_AUTH; + case ZBADARGUMENTS: + return ZE_ARG; + case ZINVALIDSTATE: + return ZE_SESSION; + case ZMARSHALLINGERROR: + return ZE_SYSTEM; + default: + return ZE_UNKNOWN; + } } -int ZooKeeperAdapter::GetWrapper(const std::string& path, bool is_watch, - std::string* value) { - state_mutex_.AssertHeld(); - char* buffer = new char[kMaxNodeDataLen]; - int buffer_len = kMaxNodeDataLen; - int ret = zoo_get(handle_, path.c_str(), is_watch, buffer, &buffer_len, - NULL); - if (ZOK == ret) { - if (buffer_len < 0) { - buffer_len = 0; - } else if (buffer_len >= kMaxNodeDataLen) { - buffer_len = kMaxNodeDataLen - 1; - } - buffer[buffer_len] = '\0'; - *value = buffer; - VLOG(10) << "zoo_get success"; - } else { - LOG(WARNING) << "zoo_get fail : " << zerror(ret); - } - delete[] buffer; - - switch (ret) { - case ZOK: - return ZE_OK; - case ZNONODE: - return ZE_NOT_EXIST; - case ZNOAUTH: - return ZE_AUTH; - case ZBADARGUMENTS: - return ZE_ARG; - case ZINVALIDSTATE: - return ZE_SESSION; - case ZMARSHALLINGERROR: - return ZE_SYSTEM; - default: - return ZE_UNKNOWN; - } +int ZooKeeperAdapter::GetWrapper(const std::string& path, bool is_watch, std::string* value) { + state_mutex_.AssertHeld(); + char* buffer = new char[kMaxNodeDataLen]; + int buffer_len = kMaxNodeDataLen; + int ret = zoo_get(handle_, path.c_str(), is_watch, buffer, &buffer_len, NULL); + if (ZOK == ret) { + if (buffer_len < 0) { + buffer_len = 0; + } else if (buffer_len >= kMaxNodeDataLen) { + buffer_len = kMaxNodeDataLen - 1; + } + buffer[buffer_len] = '\0'; + *value = buffer; + VLOG(10) << "zoo_get success"; + } else { + LOG(WARNING) << "zoo_get fail : " << zerror(ret); + } + delete[] buffer; + + switch (ret) { + case ZOK: + return ZE_OK; + case ZNONODE: + return ZE_NOT_EXIST; + case ZNOAUTH: + return ZE_AUTH; + case ZBADARGUMENTS: + return ZE_ARG; + case ZINVALIDSTATE: + return ZE_SESSION; + case ZMARSHALLINGERROR: + return ZE_SYSTEM; + default: + return ZE_UNKNOWN; + } } int ZooKeeperAdapter::GetChildrenWrapper(const std::string& path, bool is_watch, std::vector* child_list, std::vector* value_list) { - state_mutex_.AssertHeld(); - struct String_vector str_vec; - allocate_String_vector(&str_vec, 0); - int ret = zoo_get_children(handle_, path.c_str(), is_watch, &str_vec); - if (ZOK == ret) { - child_list->clear(); - value_list->clear(); - for (int i = 0; i < str_vec.count; i++) { - child_list->push_back(str_vec.data[i]); - std::string child_path = path + '/' + str_vec.data[i]; - std::string value; - int ret2 = GetWrapper(child_path, false, &value); - if (ZE_OK != ret2) { - value = ""; - LOG(WARNING) << "read node fail : " << ret2; - } - value_list->push_back(value); - } - LOG(INFO) << "zoo_get_children success"; - } else { - LOG(WARNING) << "zoo_get_children fail : " << zerror(ret); - } - deallocate_String_vector(&str_vec); - - switch (ret) { - case ZOK: - return ZE_OK; - case ZNONODE: - return ZE_NOT_EXIST; - case ZNOAUTH: - return ZE_AUTH; - case ZBADARGUMENTS: - return ZE_ARG; - case ZINVALIDSTATE: - return ZE_SESSION; - case ZMARSHALLINGERROR: - return ZE_SYSTEM; - default: - return ZE_UNKNOWN; - } + state_mutex_.AssertHeld(); + struct String_vector str_vec; + allocate_String_vector(&str_vec, 0); + int ret = zoo_get_children(handle_, path.c_str(), is_watch, &str_vec); + if (ZOK == ret) { + child_list->clear(); + value_list->clear(); + for (int i = 0; i < str_vec.count; i++) { + child_list->push_back(str_vec.data[i]); + std::string child_path = path + '/' + str_vec.data[i]; + std::string value; + int ret2 = GetWrapper(child_path, false, &value); + if (ZE_OK != ret2) { + value = ""; + LOG(WARNING) << "read node fail : " << ret2; + } + value_list->push_back(value); + } + LOG(INFO) << "zoo_get_children success"; + } else { + LOG(WARNING) << "zoo_get_children fail : " << zerror(ret); + } + deallocate_String_vector(&str_vec); + + switch (ret) { + case ZOK: + return ZE_OK; + case ZNONODE: + return ZE_NOT_EXIST; + case ZNOAUTH: + return ZE_AUTH; + case ZBADARGUMENTS: + return ZE_ARG; + case ZINVALIDSTATE: + return ZE_SESSION; + case ZMARSHALLINGERROR: + return ZE_SYSTEM; + default: + return ZE_UNKNOWN; + } } -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera diff --git a/src/zk/zk_adapter.h b/src/zk/zk_adapter.h index 87003d764..dd5bb50f6 100644 --- a/src/zk/zk_adapter.h +++ b/src/zk/zk_adapter.h @@ -4,8 +4,8 @@ // // Author: likang01(com@baidu.com) -#ifndef TERA_ZK_ZK_ADAPTER_H_ -#define TERA_ZK_ZK_ADAPTER_H_ +#ifndef TERA_ZK_ZK_ADAPTER_H_ +#define TERA_ZK_ZK_ADAPTER_H_ #include #include @@ -17,7 +17,6 @@ #include "zk/zk_lock.h" #include "zk/zk_util.h" - namespace tera { namespace zk { @@ -35,161 +34,146 @@ namespace zk { // typedef void (*LOCK_CALLBACK)(const char * path, int err, void * param); struct ZooKeeperWatch; class ZooKeeperAdapter { -public: - ZooKeeperAdapter(); - virtual ~ZooKeeperAdapter(); - - bool Init(const std::string& server_list, const std::string& root_path, - uint32_t session_timeout, const std::string& id, int* zk_errno, - int wait_timeout = -1); // default wait until zk server ready - void Finalize(); - bool GetSessionId(int64_t* session_id, int* zk_errno); - - // create - bool CreatePersistentNode(const std::string& path, const std::string& value, - int* zk_errno); - bool CreateEphemeralNode(const std::string& path, const std::string& value, - int* zk_errno); - bool CreateSequentialEphemeralNode(const std::string& path, - const std::string& value, - std::string* ret_path, int* zk_errno); - - // delete - bool DeleteNode(const std::string& path, int* zk_errno); - - // write - bool WriteNode(const std::string& path, const std::string& value, - int* zk_errno); - - // read - bool ReadNode(const std::string& path, std::string* value, int* zk_errno); - bool ReadAndWatchNode(const std::string& path, std::string* value, - int* zk_errno); - - // exist - bool CheckExist(const std::string&path, bool* is_exist, int* zk_errno); - bool CheckAndWatchExist(const std::string& path, bool* is_exist, - int* zk_errno); - bool CheckAndWatchExistForLock(const std::string& path, bool* is_exist, - int* zk_errno); - - // list - bool ListChildren(const std::string& path, - std::vector* child_list, - std::vector* value_list, - int* zk_errno); - bool ListAndWatchChildren(const std::string& path, - std::vector* child_list, - std::vector* value_list, - int* zk_errno); - - // callback - static void EventCallBack(zhandle_t* zh, int type, int state, - const char* path, void* watch_ctx); - static void LockEventCallBack(zhandle_t* zh, int type, int state, - const char* path, void* watch_ctx); - - // lock - bool AsyncLock(const std::string& path, LOCK_CALLBACK func, void* param, - int* zk_errno); - bool SyncLock(const std::string& path, int* zk_errno, int32_t timeout = -1); - bool CancelLock(const std::string& path, int* zk_errno); - bool Unlock(const std::string& path, int* zk_errno); - static void SyncLockCallback(const std::string& path, int err, void* param); - - void GetId(std::string* id); - static bool SetLibraryLogOutput(const std::string& file); - -protected: - bool Create(const std::string& path, const std::string& value, int flag, - std::string* ret_path, int* zk_errno); - - void CreateEventCallBack(std::string path); - void DeleteEventCallBack(std::string path); - void ChangeEventCallBack(std::string path); - void ChildEventCallBack(std::string path); - void SessionEventCallBack(int state); - void WatchLostEventCallBack(int state, std::string path); - void LockEventCallBack(std::string path); - bool WatchZkLock(const std::string &path, int* zk_errno); - virtual void OnZkLockDeleted() {} - - void TryCleanWatch(const std::string& path); - - int Lock(const std::string& path, bool async, int32_t timeout = -1); - - virtual void OnChildrenChanged(const std::string& path, - const std::vector& name_list, - const std::vector& data_list) = 0; - virtual void OnNodeValueChanged(const std::string& path, - const std::string& value) = 0; - virtual void OnNodeCreated(const std::string& path) = 0; - virtual void OnNodeDeleted(const std::string& path) = 0; - virtual void OnWatchFailed(const std::string& path, int watch_type, - int err) = 0; - virtual void OnSessionTimeout() = 0; - - int ExistsWrapper(const std::string& path, bool is_watch, bool* is_exist); - int ExistsWrapperForLock(const std::string& path, bool* is_exist); - int GetChildrenWrapper(const std::string& path, bool is_watch, - std::vector* child_list, - std::vector* value_list); - int GetWrapper(const std::string& path, bool is_watch, std::string* value); - void SessionTimeoutWrapper(); - -private: - static FILE* lib_log_output_; - static Mutex lib_log_mutex_; - - // protected by state_mutex_ - Mutex state_mutex_; - std::string id_; - std::string server_list_; - std::string root_path_; - zhandle_t * handle_; - volatile int state_; - volatile int64_t session_id_; - common::CondVar state_cond_; - uint32_t session_timeout_; - int64_t session_timer_id_; - ThreadPool thread_pool_; - - // protected by watcher_lock_ - typedef std::map WatcherMap; - WatcherMap watchers_; - pthread_rwlock_t watcher_lock_; - - // protected by locks_lock_ - typedef std::map LockMap; - LockMap locks_; - pthread_rwlock_t locks_lock_; + public: + ZooKeeperAdapter(); + virtual ~ZooKeeperAdapter(); + + bool Init(const std::string& server_list, const std::string& root_path, uint32_t session_timeout, + const std::string& id, int* zk_errno, + int wait_timeout = -1); // default wait until zk server ready + void Finalize(); + bool GetSessionId(int64_t* session_id, int* zk_errno); + + // create + bool CreatePersistentNode(const std::string& path, const std::string& value, int* zk_errno); + bool CreateEphemeralNode(const std::string& path, const std::string& value, int* zk_errno); + bool CreateSequentialEphemeralNode(const std::string& path, const std::string& value, + std::string* ret_path, int* zk_errno); + + // delete + bool DeleteNode(const std::string& path, int* zk_errno); + + // write + bool WriteNode(const std::string& path, const std::string& value, int* zk_errno); + + // read + bool ReadNode(const std::string& path, std::string* value, int* zk_errno); + bool ReadAndWatchNode(const std::string& path, std::string* value, int* zk_errno); + + // exist + bool CheckExist(const std::string& path, bool* is_exist, int* zk_errno); + bool CheckAndWatchExist(const std::string& path, bool* is_exist, int* zk_errno); + bool CheckAndWatchExistForLock(const std::string& path, bool* is_exist, int* zk_errno); + + // list + bool ListChildren(const std::string& path, std::vector* child_list, + std::vector* value_list, int* zk_errno); + bool ListAndWatchChildren(const std::string& path, std::vector* child_list, + std::vector* value_list, int* zk_errno); + + // callback + static void EventCallBack(zhandle_t* zh, int type, int state, const char* path, void* watch_ctx); + static void LockEventCallBack(zhandle_t* zh, int type, int state, const char* path, + void* watch_ctx); + + // lock + bool AsyncLock(const std::string& path, LOCK_CALLBACK func, void* param, int* zk_errno); + bool SyncLock(const std::string& path, int* zk_errno, int32_t timeout = -1); + bool CancelLock(const std::string& path, int* zk_errno); + bool Unlock(const std::string& path, int* zk_errno); + static void SyncLockCallback(const std::string& path, int err, void* param); + + void GetId(std::string* id); + static bool SetLibraryLogOutput(const std::string& file); + + protected: + bool Create(const std::string& path, const std::string& value, int flag, std::string* ret_path, + int* zk_errno); + + void CreateEventCallBack(std::string path); + void DeleteEventCallBack(std::string path); + void ChangeEventCallBack(std::string path); + void ChildEventCallBack(std::string path); + void SessionEventCallBack(int state); + void WatchLostEventCallBack(int state, std::string path); + void LockEventCallBack(std::string path); + bool WatchZkLock(const std::string& path, int* zk_errno); + virtual void OnZkLockDeleted() {} + + void TryCleanWatch(const std::string& path); + + int Lock(const std::string& path, bool async, int32_t timeout = -1); + + virtual void OnChildrenChanged(const std::string& path, const std::vector& name_list, + const std::vector& data_list) = 0; + virtual void OnNodeValueChanged(const std::string& path, const std::string& value) = 0; + virtual void OnNodeCreated(const std::string& path) = 0; + virtual void OnNodeDeleted(const std::string& path) = 0; + virtual void OnWatchFailed(const std::string& path, int watch_type, int err) = 0; + virtual void OnSessionTimeout() = 0; + + int ExistsWrapper(const std::string& path, bool is_watch, bool* is_exist); + int ExistsWrapperForLock(const std::string& path, bool* is_exist); + int GetChildrenWrapper(const std::string& path, bool is_watch, + std::vector* child_list, + std::vector* value_list); + int GetWrapper(const std::string& path, bool is_watch, std::string* value); + void SessionTimeoutWrapper(); + + private: + static FILE* lib_log_output_; + static Mutex lib_log_mutex_; + + // protected by state_mutex_ + Mutex state_mutex_; + std::string id_; + std::string server_list_; + std::string root_path_; + zhandle_t* handle_; + volatile int state_; + volatile int64_t session_id_; + common::CondVar state_cond_; + uint32_t session_timeout_; + int64_t session_timer_id_; + ThreadPool thread_pool_; + + // protected by watcher_lock_ + typedef std::map WatcherMap; + WatcherMap watchers_; + pthread_rwlock_t watcher_lock_; + + // protected by locks_lock_ + typedef std::map LockMap; + LockMap locks_; + pthread_rwlock_t locks_lock_; }; class ZooKeeperLightAdapter : public ZooKeeperAdapter { -private: - bool ReadAndWatchNode(const std::string&, std::string*, int*) {return false;} - bool CheckAndWatchExist(const std::string&, bool*, int*) {return false;} - bool CheckAndWatchExistForLock(const std::string&, bool*, int*) {return false;} - bool ListAndWatchChildren(const std::string&, std::vector*, - std::vector*, int*) {return false;} - - bool AsyncLock(const std::string&, LOCK_CALLBACK, void*, int*) {return false;} - bool SyncLock(const std::string&, int*, int32_t = -1) {return false;} - bool CancelLock(const std::string&, int*) {return false;} - bool Unlock(const std::string&, int*) {return false;} - -private: - virtual void OnChildrenChanged(const std::string&, - const std::vector&, - const std::vector&) {} - virtual void OnNodeValueChanged(const std::string&, const std::string&) {} - virtual void OnNodeCreated(const std::string&) {} - virtual void OnNodeDeleted(const std::string&) {} - virtual void OnWatchFailed(const std::string&, int, int) {} - virtual void OnSessionTimeout() {} + private: + bool ReadAndWatchNode(const std::string&, std::string*, int*) { return false; } + bool CheckAndWatchExist(const std::string&, bool*, int*) { return false; } + bool CheckAndWatchExistForLock(const std::string&, bool*, int*) { return false; } + bool ListAndWatchChildren(const std::string&, std::vector*, + std::vector*, int*) { + return false; + } + + bool AsyncLock(const std::string&, LOCK_CALLBACK, void*, int*) { return false; } + bool SyncLock(const std::string&, int*, int32_t = -1) { return false; } + bool CancelLock(const std::string&, int*) { return false; } + bool Unlock(const std::string&, int*) { return false; } + + private: + virtual void OnChildrenChanged(const std::string&, const std::vector&, + const std::vector&) {} + virtual void OnNodeValueChanged(const std::string&, const std::string&) {} + virtual void OnNodeCreated(const std::string&) {} + virtual void OnNodeDeleted(const std::string&) {} + virtual void OnWatchFailed(const std::string&, int, int) {} + virtual void OnSessionTimeout() {} }; -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera #endif // TERA_ZK_ZK_ADAPTER_H_ diff --git a/src/zk/zk_lock.cc b/src/zk/zk_lock.cc index dc40071b3..86b436e19 100644 --- a/src/zk/zk_lock.cc +++ b/src/zk/zk_lock.cc @@ -14,298 +14,288 @@ namespace tera { namespace zk { -ZooKeeperLock::ZooKeeperLock(ZooKeeperAdapter * adapter, - const std::string& lock_path, LOCK_CALLBACK func, - void * param) - : adapter_(adapter), lock_path_(lock_path), is_acquired_(false), - callback_func_(func), callback_param_(param) { - pthread_mutex_init(&mutex_, NULL); +ZooKeeperLock::ZooKeeperLock(ZooKeeperAdapter* adapter, const std::string& lock_path, + LOCK_CALLBACK func, void* param) + : adapter_(adapter), + lock_path_(lock_path), + is_acquired_(false), + callback_func_(func), + callback_param_(param) { + pthread_mutex_init(&mutex_, NULL); } -ZooKeeperLock::~ZooKeeperLock() { - pthread_mutex_destroy(&mutex_); -} +ZooKeeperLock::~ZooKeeperLock() { pthread_mutex_destroy(&mutex_); } bool ZooKeeperLock::BeginLock(int* zk_errno) { - // use session id as GUID - // get session id - int64_t session_id = -1; - if (!adapter_->GetSessionId(&session_id, zk_errno)) { - SetZkAdapterCode(ZE_SESSION, zk_errno); - return false; - } - char guid[17]; - sprintf(guid, "%016llx", static_cast(session_id)); - LOG(INFO) << "lock GUID = " << guid; - - // get all lock nodes - std::vector child_list; - std::vector value_list; - if (!adapter_->ListChildren(lock_path_, &child_list, &value_list, - zk_errno)) { - LOG(WARNING) << "list lock path fail : " << ZkErrnoToString(*zk_errno); - return false; - } - - // delete lock nodes with same GUID to avoid conflict - *zk_errno = ZE_OK; - std::vector::iterator itor; - for (itor = child_list.begin(); itor != child_list.end(); ++itor) { - const std::string& name = *itor; - if (name.size() > 16 && 0 == strncmp(name.c_str(), guid, 16) - && name[16] == '#') { - std::string child_path = lock_path_ + "/" + name; - int zk_ret; - if (!adapter_->DeleteNode(child_path, &zk_ret)) { - LOG(WARNING)<< "delete same GUID lock node fail : " - << ZkErrnoToString(*zk_errno); - SetZkAdapterCode(zk_ret, zk_errno); - } - } - } - if (ZE_OK != *zk_errno) { - return false; - } - - // create lock node - std::string lock_node_path = lock_path_ + "/" + guid + "#"; - std::string lock_node_data; - adapter_->GetId(&lock_node_data); - std::string ret_path; - if (!adapter_->CreateSequentialEphemeralNode(lock_node_path, lock_node_data, - &ret_path, zk_errno)) { - LOG(WARNING) << "create my lock node fail : " << ZkErrnoToString(*zk_errno); - return false; - } - - child_list.clear(); - value_list.clear(); - if (!adapter_->ListChildren(lock_path_, &child_list, &value_list, - zk_errno)) { - LOG(WARNING) << "list lock path fail : " << ZkErrnoToString(*zk_errno); - return false; - } - if (child_list.size() == 0) { - LOG(WARNING)<< "lock path is empty. where is my node?"; - SetZkAdapterCode(ZE_SYSTEM, zk_errno); - return false; + // use session id as GUID + // get session id + int64_t session_id = -1; + if (!adapter_->GetSessionId(&session_id, zk_errno)) { + SetZkAdapterCode(ZE_SESSION, zk_errno); + return false; + } + char guid[17]; + sprintf(guid, "%016llx", static_cast(session_id)); + LOG(INFO) << "lock GUID = " << guid; + + // get all lock nodes + std::vector child_list; + std::vector value_list; + if (!adapter_->ListChildren(lock_path_, &child_list, &value_list, zk_errno)) { + LOG(WARNING) << "list lock path fail : " << ZkErrnoToString(*zk_errno); + return false; + } + + // delete lock nodes with same GUID to avoid conflict + *zk_errno = ZE_OK; + std::vector::iterator itor; + for (itor = child_list.begin(); itor != child_list.end(); ++itor) { + const std::string& name = *itor; + if (name.size() > 16 && 0 == strncmp(name.c_str(), guid, 16) && name[16] == '#') { + std::string child_path = lock_path_ + "/" + name; + int zk_ret; + if (!adapter_->DeleteNode(child_path, &zk_ret)) { + LOG(WARNING) << "delete same GUID lock node fail : " << ZkErrnoToString(*zk_errno); + SetZkAdapterCode(zk_ret, zk_errno); + } } + } + if (ZE_OK != *zk_errno) { + return false; + } + + // create lock node + std::string lock_node_path = lock_path_ + "/" + guid + "#"; + std::string lock_node_data; + adapter_->GetId(&lock_node_data); + std::string ret_path; + if (!adapter_->CreateSequentialEphemeralNode(lock_node_path, lock_node_data, &ret_path, + zk_errno)) { + LOG(WARNING) << "create my lock node fail : " << ZkErrnoToString(*zk_errno); + return false; + } - const std::string& self_name = ret_path; - int32_t self_seq_no = ZooKeeperUtil::GetSequenceNo(self_name); - if (self_seq_no < 0) { - LOG(WARNING) << "sequence node name is invalid"; - SetZkAdapterCode(ZE_SYSTEM, zk_errno); - return false; - } - self_node_.name = ZooKeeperUtil::GetNodeName(ret_path.c_str()); - self_node_.seq = self_seq_no; - - for (itor = child_list.begin(); itor != child_list.end(); ++itor) { - const std::string& name = *itor; - int32_t seq_no = ZooKeeperUtil::GetSequenceNo(name); - if (seq_no >= 0 && seq_no < self_seq_no) { - struct SeqNode child = {name, seq_no}; - node_list_.push(child); - } - } + child_list.clear(); + value_list.clear(); + if (!adapter_->ListChildren(lock_path_, &child_list, &value_list, zk_errno)) { + LOG(WARNING) << "list lock path fail : " << ZkErrnoToString(*zk_errno); + return false; + } + if (child_list.size() == 0) { + LOG(WARNING) << "lock path is empty. where is my node?"; + SetZkAdapterCode(ZE_SYSTEM, zk_errno); + return false; + } - if (node_list_.empty()) { - LOG(INFO)<< "get lock success"; - is_acquired_ = true; - callback_func_(lock_path_, ZE_OK, callback_param_); - SetZkAdapterCode(ZE_OK, zk_errno); - return true; + const std::string& self_name = ret_path; + int32_t self_seq_no = ZooKeeperUtil::GetSequenceNo(self_name); + if (self_seq_no < 0) { + LOG(WARNING) << "sequence node name is invalid"; + SetZkAdapterCode(ZE_SYSTEM, zk_errno); + return false; + } + self_node_.name = ZooKeeperUtil::GetNodeName(ret_path.c_str()); + self_node_.seq = self_seq_no; + + for (itor = child_list.begin(); itor != child_list.end(); ++itor) { + const std::string& name = *itor; + int32_t seq_no = ZooKeeperUtil::GetSequenceNo(name); + if (seq_no >= 0 && seq_no < self_seq_no) { + struct SeqNode child = {name, seq_no}; + node_list_.push(child); } + } - // std::sort(node_list_.begin(), node_list_.end()); - - do { - watch_path_ = lock_path_ + "/" + node_list_.top().name; - bool is_exist; - if (!adapter_->CheckAndWatchExistForLock(watch_path_, &is_exist, - zk_errno)) { - return false; - } - if (is_exist) { - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } else { - node_list_.pop(); - } - } while (!node_list_.empty()); - + if (node_list_.empty()) { LOG(INFO) << "get lock success"; is_acquired_ = true; callback_func_(lock_path_, ZE_OK, callback_param_); SetZkAdapterCode(ZE_OK, zk_errno); return true; -} + } -bool ZooKeeperLock::CancelLock(int* zk_errno) { - pthread_mutex_lock(&mutex_); - if (IsAcquired()) { - pthread_mutex_unlock(&mutex_); - LOG(WARNING)<< "lock is acquired"; - SetZkAdapterCode(ZE_LOCK_ACQUIRED, zk_errno); - return false; + // std::sort(node_list_.begin(), node_list_.end()); + + do { + watch_path_ = lock_path_ + "/" + node_list_.top().name; + bool is_exist; + if (!adapter_->CheckAndWatchExistForLock(watch_path_, &is_exist, zk_errno)) { + return false; + } + if (is_exist) { + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + } else { + node_list_.pop(); } + } while (!node_list_.empty()); + + LOG(INFO) << "get lock success"; + is_acquired_ = true; + callback_func_(lock_path_, ZE_OK, callback_param_); + SetZkAdapterCode(ZE_OK, zk_errno); + return true; +} +bool ZooKeeperLock::CancelLock(int* zk_errno) { + pthread_mutex_lock(&mutex_); + if (IsAcquired()) { pthread_mutex_unlock(&mutex_); - callback_func_(lock_path_, ZE_LOCK_CANCELED, callback_param_); - LOG(INFO)<< "unlock success"; - SetZkAdapterCode(ZE_OK, zk_errno); - return true; + LOG(WARNING) << "lock is acquired"; + SetZkAdapterCode(ZE_LOCK_ACQUIRED, zk_errno); + return false; + } + + pthread_mutex_unlock(&mutex_); + callback_func_(lock_path_, ZE_LOCK_CANCELED, callback_param_); + LOG(INFO) << "unlock success"; + SetZkAdapterCode(ZE_OK, zk_errno); + return true; } bool ZooKeeperLock::Unlock(int* zk_errno) { - pthread_mutex_lock(&mutex_); - if (!IsAcquired()) { - pthread_mutex_unlock(&mutex_); - LOG(WARNING) << "lock is not acquired"; - SetZkAdapterCode(ZE_LOCK_NOT_ACQUIRED, zk_errno); - return false; - } - - if (!adapter_->DeleteNode(lock_path_ + "/" + self_node_.name, zk_errno)) { - pthread_mutex_unlock(&mutex_); - LOG(WARNING) << "unlock fail : " << ZkErrnoToString(*zk_errno); - return false; - } + pthread_mutex_lock(&mutex_); + if (!IsAcquired()) { + pthread_mutex_unlock(&mutex_); + LOG(WARNING) << "lock is not acquired"; + SetZkAdapterCode(ZE_LOCK_NOT_ACQUIRED, zk_errno); + return false; + } + if (!adapter_->DeleteNode(lock_path_ + "/" + self_node_.name, zk_errno)) { pthread_mutex_unlock(&mutex_); - LOG(INFO)<< "unlock success"; - SetZkAdapterCode(ZE_OK, zk_errno); - return true; + LOG(WARNING) << "unlock fail : " << ZkErrnoToString(*zk_errno); + return false; + } + + pthread_mutex_unlock(&mutex_); + LOG(INFO) << "unlock success"; + SetZkAdapterCode(ZE_OK, zk_errno); + return true; } bool ZooKeeperLock::CheckAndWatchNodeForLock(int* zk_errno) { - pthread_mutex_lock(&mutex_); - std::string path = lock_path_ + "/" + self_node_.name; - LOG(INFO) << "check and watch lock node [" << path << "]"; - if (!IsAcquired()) { - pthread_mutex_unlock(&mutex_); - SetZkAdapterCode(ZE_LOCK_NOT_ACQUIRED, zk_errno); - return false; - } - *zk_errno = ZE_OK; - bool is_exist; - if (!adapter_->CheckAndWatchExistForLock(path, &is_exist, - zk_errno)) { - pthread_mutex_unlock(&mutex_); - LOG(WARNING) << "check and watch exist for lock failed"; - SetZkAdapterCode(ZE_UNKNOWN, zk_errno); - return false; - } - if (is_exist) { - pthread_mutex_unlock(&mutex_); - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } else { - pthread_mutex_unlock(&mutex_); - LOG(WARNING) << "lock node not exist, watch failed"; - SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); - return false; - } + pthread_mutex_lock(&mutex_); + std::string path = lock_path_ + "/" + self_node_.name; + LOG(INFO) << "check and watch lock node [" << path << "]"; + if (!IsAcquired()) { + pthread_mutex_unlock(&mutex_); + SetZkAdapterCode(ZE_LOCK_NOT_ACQUIRED, zk_errno); + return false; + } + *zk_errno = ZE_OK; + bool is_exist; + if (!adapter_->CheckAndWatchExistForLock(path, &is_exist, zk_errno)) { + pthread_mutex_unlock(&mutex_); + LOG(WARNING) << "check and watch exist for lock failed"; + SetZkAdapterCode(ZE_UNKNOWN, zk_errno); + return false; + } + if (is_exist) { + pthread_mutex_unlock(&mutex_); + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + } else { + pthread_mutex_unlock(&mutex_); + LOG(WARNING) << "lock node not exist, watch failed"; + SetZkAdapterCode(ZE_LOCK_NOT_EXIST, zk_errno); + return false; + } } bool ZooKeeperLock::CheckSelfNodePath(const std::string& path) { - pthread_mutex_lock(&mutex_); - const std::string self_lock_path = lock_path_ + "/" + self_node_.name; - pthread_mutex_unlock(&mutex_); - if (path.compare(self_lock_path) == 0) { - return true; - } - return false; + pthread_mutex_lock(&mutex_); + const std::string self_lock_path = lock_path_ + "/" + self_node_.name; + pthread_mutex_unlock(&mutex_); + if (path.compare(self_lock_path) == 0) { + return true; + } + return false; } void ZooKeeperLock::OnWatchNodeDeleted(const std::string& path) { - pthread_mutex_lock(&mutex_); - if (IsAcquired()) { - pthread_mutex_unlock(&mutex_); - return; - } - if (watch_path_.compare(path) != 0) { - pthread_mutex_unlock(&mutex_); - return; + pthread_mutex_lock(&mutex_); + if (IsAcquired()) { + pthread_mutex_unlock(&mutex_); + return; + } + if (watch_path_.compare(path) != 0) { + pthread_mutex_unlock(&mutex_); + return; + } + LOG(INFO) << "node [" << path << "] is deleted"; + + int zk_ret = ZE_OK; + node_list_.pop(); + while (!node_list_.empty()) { + watch_path_ = lock_path_ + "/" + node_list_.top().name; + bool is_exist; + if (!adapter_->CheckAndWatchExistForLock(watch_path_, &is_exist, &zk_ret)) { + pthread_mutex_unlock(&mutex_); + callback_func_(lock_path_, zk_ret, callback_param_); + return; } - LOG(INFO) << "node [" << path << "] is deleted"; - - int zk_ret = ZE_OK; - node_list_.pop(); - while (!node_list_.empty()) { - watch_path_ = lock_path_ + "/" + node_list_.top().name; - bool is_exist; - if (!adapter_->CheckAndWatchExistForLock(watch_path_, &is_exist, - &zk_ret)) { - pthread_mutex_unlock(&mutex_); - callback_func_(lock_path_, zk_ret, callback_param_); - return; - } - if (is_exist) { - pthread_mutex_unlock(&mutex_); - LOG(INFO) << "watch next node [" << watch_path_ << "]"; - return; - } else { - LOG(INFO) << "next node [" << watch_path_ << "] dead, skip"; - node_list_.pop(); - } + if (is_exist) { + pthread_mutex_unlock(&mutex_); + LOG(INFO) << "watch next node [" << watch_path_ << "]"; + return; + } else { + LOG(INFO) << "next node [" << watch_path_ << "] dead, skip"; + node_list_.pop(); } + } - is_acquired_ = true; - pthread_mutex_unlock(&mutex_); - LOG(INFO) << "get lock success"; - callback_func_(lock_path_, zk_ret, callback_param_); + is_acquired_ = true; + pthread_mutex_unlock(&mutex_); + LOG(INFO) << "get lock success"; + callback_func_(lock_path_, zk_ret, callback_param_); } -LockCompletion::LockCompletion() - : lock_(NULL), errno_(ZE_OK) { - pthread_mutex_init(&mutex_, NULL); - pthread_cond_init(&cond_, NULL); +LockCompletion::LockCompletion() : lock_(NULL), errno_(ZE_OK) { + pthread_mutex_init(&mutex_, NULL); + pthread_cond_init(&cond_, NULL); } LockCompletion::~LockCompletion() { - pthread_mutex_destroy(&mutex_); - pthread_cond_destroy(&cond_); + pthread_mutex_destroy(&mutex_); + pthread_cond_destroy(&cond_); } -void LockCompletion::SetLock(ZooKeeperLock * lock) { - lock_ = lock; -} - -bool LockCompletion::Wait(int* zk_errno, const timeval * end_time) { - pthread_mutex_lock(&mutex_); - while (1) { - if (lock_->IsAcquired()) { - pthread_mutex_unlock(&mutex_); - SetZkAdapterCode(ZE_OK, zk_errno); - return true; - } else if (errno_ != ZE_OK) { - pthread_mutex_unlock(&mutex_); - SetZkAdapterCode(errno_, zk_errno); - return false; - } else if (end_time != NULL) { - struct timespec abs_time; - abs_time.tv_sec = end_time->tv_sec; - abs_time.tv_nsec = end_time->tv_usec * 1000; - int err = pthread_cond_timedwait(&cond_, &mutex_, &abs_time); - if (err == ETIMEDOUT && !lock_->IsAcquired() && errno_ == ZE_OK) { - pthread_mutex_unlock(&mutex_); - SetZkAdapterCode(ZE_LOCK_TIMEOUT, zk_errno); - return false; - } - } else { - pthread_cond_wait(&cond_, &mutex_); - } +void LockCompletion::SetLock(ZooKeeperLock* lock) { lock_ = lock; } + +bool LockCompletion::Wait(int* zk_errno, const timeval* end_time) { + pthread_mutex_lock(&mutex_); + while (1) { + if (lock_->IsAcquired()) { + pthread_mutex_unlock(&mutex_); + SetZkAdapterCode(ZE_OK, zk_errno); + return true; + } else if (errno_ != ZE_OK) { + pthread_mutex_unlock(&mutex_); + SetZkAdapterCode(errno_, zk_errno); + return false; + } else if (end_time != NULL) { + struct timespec abs_time; + abs_time.tv_sec = end_time->tv_sec; + abs_time.tv_nsec = end_time->tv_usec * 1000; + int err = pthread_cond_timedwait(&cond_, &mutex_, &abs_time); + if (err == ETIMEDOUT && !lock_->IsAcquired() && errno_ == ZE_OK) { + pthread_mutex_unlock(&mutex_); + SetZkAdapterCode(ZE_LOCK_TIMEOUT, zk_errno); + return false; + } + } else { + pthread_cond_wait(&cond_, &mutex_); } + } } void LockCompletion::Signal(int err) { - pthread_mutex_lock(&mutex_); - errno_ = err; - pthread_cond_signal(&cond_); - pthread_mutex_unlock(&mutex_); + pthread_mutex_lock(&mutex_); + errno_ = err; + pthread_cond_signal(&cond_); + pthread_mutex_unlock(&mutex_); } -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera diff --git a/src/zk/zk_lock.h b/src/zk/zk_lock.h index a5862c978..81dedc399 100644 --- a/src/zk/zk_lock.h +++ b/src/zk/zk_lock.h @@ -4,8 +4,8 @@ // // Author: likang01(com@baidu.com) -#ifndef TERA_ZK_ZK_LOCK_H_ -#define TERA_ZK_ZK_LOCK_H_ +#ifndef TERA_ZK_ZK_LOCK_H_ +#define TERA_ZK_ZK_LOCK_H_ #include #include @@ -15,69 +15,62 @@ namespace tera { namespace zk { -struct SeqNode -{ - std::string name; - int32_t seq; +struct SeqNode { + std::string name; + int32_t seq; }; -class SeqNodeComp -{ -public: - bool operator() (const SeqNode & i, const SeqNode & j) - { - return i.seq >= j.seq; - } +class SeqNodeComp { + public: + bool operator()(const SeqNode& i, const SeqNode& j) { return i.seq >= j.seq; } }; -typedef void (*LOCK_CALLBACK)(const std::string& path, int err, void * param); +typedef void (*LOCK_CALLBACK)(const std::string& path, int err, void* param); class ZooKeeperAdapter; -class ZooKeeperLock -{ -public: - ZooKeeperLock(ZooKeeperAdapter * adapter, const std::string& lock_path, - LOCK_CALLBACK func, void * param); - ~ZooKeeperLock(); - bool BeginLock(int* zk_errno); - bool CancelLock(int* zk_errno); - bool Unlock(int* zk_errno); - bool IsAcquired() {return is_acquired_;} - void OnWatchNodeDeleted(const std::string& path); - bool CheckAndWatchNodeForLock(int* zk_errno); - bool CheckSelfNodePath(const std::string& path); +class ZooKeeperLock { + public: + ZooKeeperLock(ZooKeeperAdapter* adapter, const std::string& lock_path, LOCK_CALLBACK func, + void* param); + ~ZooKeeperLock(); + bool BeginLock(int* zk_errno); + bool CancelLock(int* zk_errno); + bool Unlock(int* zk_errno); + bool IsAcquired() { return is_acquired_; } + void OnWatchNodeDeleted(const std::string& path); + bool CheckAndWatchNodeForLock(int* zk_errno); + bool CheckSelfNodePath(const std::string& path); -private: - ZooKeeperAdapter * adapter_; - std::string lock_path_; - struct SeqNode self_node_; - std::priority_queue, SeqNodeComp> node_list_; - std::string watch_path_; - pthread_mutex_t mutex_; + private: + ZooKeeperAdapter* adapter_; + std::string lock_path_; + struct SeqNode self_node_; + std::priority_queue, SeqNodeComp> node_list_; + std::string watch_path_; + pthread_mutex_t mutex_; - volatile bool is_acquired_; - LOCK_CALLBACK callback_func_; - void * callback_param_; + volatile bool is_acquired_; + LOCK_CALLBACK callback_func_; + void* callback_param_; }; -struct LockCompletion -{ -public: - LockCompletion(); - ~LockCompletion(); - void SetLock(ZooKeeperLock * lock); - bool Wait(int* zk_errno, const timeval * abs_time = NULL); - void Signal(int err); +struct LockCompletion { + public: + LockCompletion(); + ~LockCompletion(); + void SetLock(ZooKeeperLock* lock); + bool Wait(int* zk_errno, const timeval* abs_time = NULL); + void Signal(int err); -private: - ZooKeeperLock * lock_; - int errno_; - pthread_mutex_t mutex_; - pthread_cond_t cond_; + private: + ZooKeeperLock* lock_; + int errno_; + pthread_mutex_t mutex_; + pthread_cond_t cond_; }; -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera #endif // TERA_ZK_ZK_LOCK_H_ diff --git a/src/zk/zk_util.cc b/src/zk/zk_util.cc index 579a59f0d..afcefb139 100644 --- a/src/zk/zk_util.cc +++ b/src/zk/zk_util.cc @@ -18,200 +18,196 @@ namespace tera { namespace zk { std::string ZkErrnoToString(int err) { - switch (err) { + switch (err) { case ZE_OK: - return "OK"; + return "OK"; case ZE_ARG: - return "Bad argument"; + return "Bad argument"; case ZE_SESSION: - return "Broken session"; + return "Broken session"; case ZE_SYSTEM: - return "System error"; + return "System error"; case ZE_INITED: - return "Has init"; + return "Has init"; case ZE_NOT_INIT: - return "Not init"; + return "Not init"; case ZE_EXIST: - return "Node exist"; + return "Node exist"; case ZE_NOT_EXIST: - return "Node not exist"; + return "Node not exist"; case ZE_NO_PARENT: - return "No parent node"; + return "No parent node"; case ZE_ENTITY_PARENT: - return "Ephemeral parent node"; + return "Ephemeral parent node"; case ZE_AUTH: - return "Authorization error"; + return "Authorization error"; case ZE_HAS_CHILD: - return "Has child"; + return "Has child"; case ZE_LOCK_TIMEOUT: - return "Lock timeout"; + return "Lock timeout"; case ZE_LOCK_EXIST: - return "Lock exist"; + return "Lock exist"; case ZE_LOCK_NOT_EXIST: - return "Lock not exist"; + return "Lock not exist"; case ZE_LOCK_CANCELED: - return "Lock is canceled"; + return "Lock is canceled"; case ZE_LOCK_ACQUIRED: - return "Lock is acquired"; + return "Lock is acquired"; case ZE_LOCK_NOT_ACQUIRED: - return "Lock not acquired"; + return "Lock not acquired"; case ZE_UNKNOWN: - return "Unknown error"; + return "Unknown error"; default: - ; - } - return ""; + ; + } + return ""; } -void SetZkAdapterCode(int code, int* ret_code) { - *ret_code = code; -} +void SetZkAdapterCode(int code, int* ret_code) { *ret_code = code; } std::string ZooStateToString(int state) { - if (ZOO_EXPIRED_SESSION_STATE == state) { - return "ZOO_EXPIRED_SESSION_STATE"; - } else if (ZOO_AUTH_FAILED_STATE == state) { - return "ZOO_AUTH_FAILED_STATE"; - } else if (ZOO_CONNECTING_STATE == state) { - return "ZOO_CONNECTING_STATE"; - } else if (ZOO_ASSOCIATING_STATE == state) { - return "ZOO_ASSOCIATING_STATE"; - } else if (ZOO_CONNECTED_STATE == state) { - return "ZOO_CONNECTED_STATE"; - } else { - return "ZOO_UNKNOWN_STATE"; - } + if (ZOO_EXPIRED_SESSION_STATE == state) { + return "ZOO_EXPIRED_SESSION_STATE"; + } else if (ZOO_AUTH_FAILED_STATE == state) { + return "ZOO_AUTH_FAILED_STATE"; + } else if (ZOO_CONNECTING_STATE == state) { + return "ZOO_CONNECTING_STATE"; + } else if (ZOO_ASSOCIATING_STATE == state) { + return "ZOO_ASSOCIATING_STATE"; + } else if (ZOO_CONNECTED_STATE == state) { + return "ZOO_CONNECTED_STATE"; + } else { + return "ZOO_UNKNOWN_STATE"; + } } std::string ZooTypeToString(int type) { - if (ZOO_CREATED_EVENT == type) { - return "ZOO_CREATED_EVENT"; - } else if (ZOO_DELETED_EVENT == type) { - return "ZOO_DELETED_EVENT"; - } else if (ZOO_CHANGED_EVENT == type) { - return "ZOO_CHANGED_EVENT"; - } else if (ZOO_CHILD_EVENT == type) { - return "ZOO_CHILD_EVENT"; - } else if (ZOO_SESSION_EVENT == type) { - return "ZOO_SESSION_EVENT"; - } else if (ZOO_NOTWATCHING_EVENT == type) { - return "ZOO_NOTWATCHING_EVENT"; - } else { - return "ZOO_UNKNOWN_EVENT"; - } + if (ZOO_CREATED_EVENT == type) { + return "ZOO_CREATED_EVENT"; + } else if (ZOO_DELETED_EVENT == type) { + return "ZOO_DELETED_EVENT"; + } else if (ZOO_CHANGED_EVENT == type) { + return "ZOO_CHANGED_EVENT"; + } else if (ZOO_CHILD_EVENT == type) { + return "ZOO_CHILD_EVENT"; + } else if (ZOO_SESSION_EVENT == type) { + return "ZOO_SESSION_EVENT"; + } else if (ZOO_NOTWATCHING_EVENT == type) { + return "ZOO_NOTWATCHING_EVENT"; + } else { + return "ZOO_UNKNOWN_EVENT"; + } } -bool ZooKeeperUtil::IsChild(const char * child, const char * parent) { - size_t child_len = strlen(child); - size_t parent_len = strlen(parent); - if (child[child_len - 1] == '/') { - child_len--; - } - if (parent[parent_len - 1] == '/') { - parent_len--; - } - if (child_len <= parent_len || 0 != strncmp(parent, child, parent_len) - || child[parent_len] != '/' || child[parent_len + 1] == '\0') { - return false; - } - - const char * slash_ptr = strchr(child + parent_len + 1, '/'); - if (slash_ptr == NULL || slash_ptr == child + child_len) { - return false; - } - - return true; +bool ZooKeeperUtil::IsChild(const char* child, const char* parent) { + size_t child_len = strlen(child); + size_t parent_len = strlen(parent); + if (child[child_len - 1] == '/') { + child_len--; + } + if (parent[parent_len - 1] == '/') { + parent_len--; + } + if (child_len <= parent_len || 0 != strncmp(parent, child, parent_len) || + child[parent_len] != '/' || child[parent_len + 1] == '\0') { + return false; + } + + const char* slash_ptr = strchr(child + parent_len + 1, '/'); + if (slash_ptr == NULL || slash_ptr == child + child_len) { + return false; + } + + return true; } bool ZooKeeperUtil::GetParentPath(const std::string& path, std::string* parent) { - if (path[0] != '/') { - return false; - } - size_t last_slash_pos = path.find_last_of('/'); - if (last_slash_pos > 0) { - parent->assign(path, 0, last_slash_pos); - } else { - parent->assign("/"); - } - return true; + if (path[0] != '/') { + return false; + } + size_t last_slash_pos = path.find_last_of('/'); + if (last_slash_pos > 0) { + parent->assign(path, 0, last_slash_pos); + } else { + parent->assign("/"); + } + return true; } -const char * ZooKeeperUtil::GetNodeName(const char * path) { - if (path[0] != '/') { - return NULL; - } - const char * last_slash_ptr = rindex(path, '/'); - return last_slash_ptr + 1; +const char* ZooKeeperUtil::GetNodeName(const char* path) { + if (path[0] != '/') { + return NULL; + } + const char* last_slash_ptr = rindex(path, '/'); + return last_slash_ptr + 1; } int32_t ZooKeeperUtil::GetSequenceNo(const std::string& name) { - size_t name_len = name.size(); - if (name_len < 10) { - LOG(ERROR) << "name [" << name << "] too short"; - return -1; - } - - const char * seq_str = name.c_str() + name_len - 10; - while (*seq_str == '0') { // skip '0' - seq_str++; - } - - int32_t seq_no; - char * seq_end_ptr; - if (*seq_str != '\0') { - seq_no = strtol(seq_str, &seq_end_ptr, 10); - if (*seq_end_ptr == '\0' && seq_no > 0) { - return seq_no; - } else { - LOG(ERROR) << "name [" << name << "] not end in 10 digit"; - return -1; - } + size_t name_len = name.size(); + if (name_len < 10) { + LOG(ERROR) << "name [" << name << "] too short"; + return -1; + } + + const char* seq_str = name.c_str() + name_len - 10; + while (*seq_str == '0') { // skip '0' + seq_str++; + } + + int32_t seq_no; + char* seq_end_ptr; + if (*seq_str != '\0') { + seq_no = strtol(seq_str, &seq_end_ptr, 10); + if (*seq_end_ptr == '\0' && seq_no > 0) { + return seq_no; } else { - return 0; + LOG(ERROR) << "name [" << name << "] not end in 10 digit"; + return -1; } + } else { + return 0; + } } bool ZooKeeperUtil::IsValidPath(const std::string& path) { - if (path.empty() || path[0] != '/' - || (path.size() > 1 && *path.rbegin() == '/')) { - return false; - } - return true; + if (path.empty() || path[0] != '/' || (path.size() > 1 && *path.rbegin() == '/')) { + return false; + } + return true; } bool FakeZkUtil::WriteNode(const std::string& name, const std::string& value) { - FileStream node_file; - if (!node_file.Open(name, FILE_WRITE)) { - return false; - } - if (node_file.Write(value.c_str(), value.size()) != (int32_t)value.size()) { - return false; - } - node_file.Close(); - return true; + FileStream node_file; + if (!node_file.Open(name, FILE_WRITE)) { + return false; + } + if (node_file.Write(value.c_str(), value.size()) != (int32_t)value.size()) { + return false; + } + node_file.Close(); + return true; } bool FakeZkUtil::ReadNode(const std::string& name, std::string* value) { - FileStream node_file; - if (!node_file.Open(name, FILE_READ)) { - LOG(ERROR) << "fail to open node file: " << name; - return false; - } - if (node_file.ReadLine(value) < 0) { - LOG(ERROR) << "fail to read node file: " << name; - return false; - } - node_file.Close(); - return true; + FileStream node_file; + if (!node_file.Open(name, FILE_READ)) { + LOG(ERROR) << "fail to open node file: " << name; + return false; + } + if (node_file.ReadLine(value) < 0) { + LOG(ERROR) << "fail to read node file: " << name; + return false; + } + node_file.Close(); + return true; } -bool FakeZkUtil::ListNodes(const std::string& path, - std::vector* values) { - if (!ListCurrentDir(path, values)) { - return false; - } - return true; +bool FakeZkUtil::ListNodes(const std::string& path, std::vector* values) { + if (!ListCurrentDir(path, values)) { + return false; + } + return true; } -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera diff --git a/src/zk/zk_util.h b/src/zk/zk_util.h index f3618e2d6..bdf57741d 100644 --- a/src/zk/zk_util.h +++ b/src/zk/zk_util.h @@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef TERA_ZK_ZK_UTIL_H_ -#define TERA_ZK_ZK_UTIL_H_ +#ifndef TERA_ZK_ZK_UTIL_H_ +#define TERA_ZK_ZK_UTIL_H_ #include @@ -19,58 +19,48 @@ std::string ZooStateToString(int state); std::string ZooTypeToString(int type); enum ZooKeeperErrno { - ZE_OK = 0, - ZE_ARG, - ZE_SESSION, - ZE_SYSTEM, - ZE_INITED, - ZE_NOT_INIT, - ZE_EXIST, - ZE_NOT_EXIST, - ZE_NO_PARENT, - ZE_ENTITY_PARENT, - ZE_AUTH, - ZE_HAS_CHILD, - ZE_LOCK_TIMEOUT, - ZE_LOCK_EXIST, - ZE_LOCK_NOT_EXIST, - ZE_LOCK_CANCELED, - ZE_LOCK_ACQUIRED, - ZE_LOCK_NOT_ACQUIRED, - ZE_UNKNOWN + ZE_OK = 0, + ZE_ARG, + ZE_SESSION, + ZE_SYSTEM, + ZE_INITED, + ZE_NOT_INIT, + ZE_EXIST, + ZE_NOT_EXIST, + ZE_NO_PARENT, + ZE_ENTITY_PARENT, + ZE_AUTH, + ZE_HAS_CHILD, + ZE_LOCK_TIMEOUT, + ZE_LOCK_EXIST, + ZE_LOCK_NOT_EXIST, + ZE_LOCK_CANCELED, + ZE_LOCK_ACQUIRED, + ZE_LOCK_NOT_ACQUIRED, + ZE_UNKNOWN }; -enum ZooKeeperState { - ZS_DISCONN, - ZS_CONNECTING, - ZS_CONNECTED, - ZS_AUTH, - ZS_TIMEOUT -}; +enum ZooKeeperState { ZS_DISCONN, ZS_CONNECTING, ZS_CONNECTED, ZS_AUTH, ZS_TIMEOUT }; -enum ZooKeeperWatchType { - ZT_WATCH_VALUE = 1, - ZT_WATCH_EXIST = 2, - ZT_WATCH_CHILD = 4 -}; +enum ZooKeeperWatchType { ZT_WATCH_VALUE = 1, ZT_WATCH_EXIST = 2, ZT_WATCH_CHILD = 4 }; class ZooKeeperUtil { -public: - static bool IsChild(const char * child, const char * parent); - static bool GetParentPath(const std::string& path, std::string* parent); - static const char * GetNodeName(const char * path); - static int32_t GetSequenceNo(const std::string& name); - static bool IsValidPath(const std::string& path); + public: + static bool IsChild(const char* child, const char* parent); + static bool GetParentPath(const std::string& path, std::string* parent); + static const char* GetNodeName(const char* path); + static int32_t GetSequenceNo(const std::string& name); + static bool IsValidPath(const std::string& path); }; class FakeZkUtil { -public: - static bool WriteNode(const std::string& name, const std::string& value); - static bool ReadNode(const std::string& name, std::string* value); - static bool ListNodes(const std::string& path, std::vector* values); + public: + static bool WriteNode(const std::string& name, const std::string& value); + static bool ReadNode(const std::string& name, std::string* value); + static bool ListNodes(const std::string& path, std::vector* values); }; -} // namespace zk -} // namespace tera +} // namespace zk +} // namespace tera #endif // TERA_ZK_ZK_UTIL_H_ diff --git a/test/README.md b/test/README.md deleted file mode 100644 index 9993376ea..000000000 --- a/test/README.md +++ /dev/null @@ -1,17 +0,0 @@ -#function test caseз - -##׼ -װpythonnose,һҪִ - -## -* нű - -sh ft_test.sh - -* ֤ - -cd tmp/log/test.log鿴 - -* ջ - -rm -rf tmp diff --git a/test/testcase/__init__.py b/test/testcase/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/testcase/common.py b/test/testcase/common.py deleted file mode 100644 index 56fa734f7..000000000 --- a/test/testcase/common.py +++ /dev/null @@ -1,433 +0,0 @@ -""" -Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -Use of this source code is governed by a BSD-style license that can be -found in the LICENSE file. -""" - -import subprocess -import filecmp -import os -import time -import nose.tools -import json - -from conf import const - -def check_core(): - """ - if system core path is not current directory, this function can not catch the core. - """ - ret = runcmd("cd %s && ls|grep core" % (const.teracli_dir), ignore_status=True) - assert( ret == 1 ) - -def runcmd(cmd, ignore_status=False): - """ - run cmd and return code - """ - print time.strftime("%Y%m%d-%H%M%S") + " command: "+cmd - p = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) - (out,err) = p.communicate() - print "stdout: " - print out - print "stderr: " - print err - print "returncode: %d" % p.returncode - p.wait() - ret = p.returncode - if not ignore_status: - assert( ret == 0 ) - return ret - -def runcmd_output(cmd, ignore_status=False): - """ - run cmd and return code - """ - print time.strftime("%Y%m%d-%H%M%S") + " command: "+cmd - p = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) - (out,err) = p.communicate() - print "stdout: " - print out - print "stderr: " - print err - print "returncode: %d" % p.returncode - ret = p.returncode - if not ignore_status: - assert( ret == 0 ) - return out.strip() - -def wait_table_disabled(tablename): - retry_times = 10 - while( retry_times > 0 ): - time.sleep(2) - retry_times = retry_times - 1 - disable_count = runcmd_output('cd %s && ./teracli show %s|grep kTabletDisable|wc -l' % (const.teracli_dir, tablename), ignore_status=True) - tablet_count = runcmd_output('cd %s && ./teracli show %s|grep %s|wc -l' % (const.teracli_dir, tablename, tablename), ignore_status=True) - if ( disable_count == tablet_count ): - return - assert( retry_times > 0 ) - -def drop_table(tablename): - ret = runcmd('cd %s && ./teracli show %s' % (const.teracli_dir, tablename), ignore_status=True) - if(ret == 0): - runcmd('cd %s && ./teracli disable %s' % (const.teracli_dir, tablename) ) - wait_table_disabled(tablename) - runcmd('cd %s && ./teracli show %s' % (const.teracli_dir, tablename) ) - runcmd('cd %s && ./teracli drop %s' % (const.teracli_dir, tablename) ) - time.sleep(5) - -def cleanup(): - """ - cleanup - """ - drop_table("test") - files = os.listdir('.') - for f in files: - if f.endswith('.out'): - os.remove(f) - -def print_debug_msg(sid=0, msg=""): - """ - provide general print interface - """ - print "@%d======================%s" % (sid, msg) - -def execute_and_check_returncode(cmd, code): - print(cmd) - ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - ret.communicate() - nose.tools.assert_equal(ret.returncode, code) - -def exe_and_check_res(cmd): - """ - execute cmd and check result - """ - - print cmd - ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - nose.tools.assert_equal(ret.stderr.readlines(), []) - - -def clear_env(): - """ - clear env - """ - - print_debug_msg(4, "delete table_test001 and table_test002, clear env") - drop_table("table_test001") - drop_table("table_test002") - - -def cluster_op(op): - if op == 'kill': - print 'kill cluster' - ret = subprocess.Popen(const.kill_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - elif op == 'launch': - print 'launch cluster' - ret = subprocess.Popen(const.launch_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - elif op == 'launch_ts_first': - print 'launch cluster' - ret = subprocess.Popen(const.launch_ts_first_script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - else: - print 'unknown argument' - nose.tools.assert_true(False) - - -def create_kv_table(): - print 'create kv table' - cleanup() - ret = subprocess.Popen(const.teracli_binary + ' create test', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - - -def create_singleversion_table(): - print 'create single version table' - cleanup() - ret = subprocess.Popen(const.teracli_binary + ' create "test{cf0, cf1}"', - stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - - -def create_multiversion_table(): - print 'create multi version table' - cleanup() - ret = subprocess.Popen(const.teracli_binary + ' create "test{cf0, cf1}"', - stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - - -def createbyfile(schema, deli=''): - """ - This function creates a table according to a specified schema - :param schema: schema file path - :param deli: deli file path - :return: None - """ - - cleanup() - create_cmd = '{teracli} createbyfile {schema} {deli}'.format(teracli=const.teracli_binary, schema=schema, deli=deli) - print create_cmd - ret = subprocess.Popen(create_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - - -def rowread_table(table_name, file_path): - allv = 'scan' - flags = '--printable=false' - - tmpfile = 'tmp.file' - scan_cmd = '{teracli} {flags} {op} {table_name} "" "" > {out}'.format( - teracli=const.teracli_binary, flags=flags, op=allv, table_name=table_name, out=tmpfile) - print scan_cmd - ret = subprocess.Popen(scan_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - ret.communicate() - - tmpfile2 = 'tmp.file2' - awk_args = '' - awk_args += """-F ':' '{print $1}'""" - awk_cmd = 'awk {args} {out} |sort -u > {out1}'.format( - args=awk_args, out=tmpfile, out1=tmpfile2) - print awk_cmd - ret = subprocess.Popen(awk_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - ret.communicate() - - rowread_cmd = 'while read line; do {teracli} {flags} get {table_name} $line; done < {out1} > {output}'.format( - teracli=const.teracli_binary, flags=flags, table_name=table_name, out1=tmpfile2, output=file_path) - ret = subprocess.Popen(rowread_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - ret.communicate() - - #ret = subprocess.Popen('rm -rf tmp.file tmp.file2', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - #ret.communicate() - - -def run_tera_mark(file_path, op, table_name, random, value_size, num, key_size, cf='', key_seed=1, value_seed=1): - """ - This function provide means to write data into Tera and dump a copy into a specified file at the same time. - :param file_path: a copy of data will be dumped into file_path for future use - :param op: ['w' | 'd'], 'w' indicates write and 'd' indicates delete - :param table_name: table name - :param random: ['random' | 'seq'] - :param value_size: value size in Bytes - :param num: entry number - :param key_size: key size in Bytes - :param cf: cf list, e.g. 'cf0:qual,cf1:flag'. Empty cf list for kv mode. Notice: no space in between - :param key_seed: seed for random key generator - :param value_seed: seed for random value generator - :return: None - """ - - # write data into Tera - tera_bench_args = "" - awk_args = "" - - if cf == '': # kv mode - tera_bench_args += """--compression_ratio=1 --key_seed={kseed} --value_seed={vseed} """\ - """ --value_size={vsize} --num={num} --benchmarks={random} """\ - """ --key_size={ksize} """.format(kseed=key_seed, vseed=value_seed, - vsize=value_size, num=num, random=random, ksize=key_size) - if op == 'd': # delete - awk_args += """-F '\t' '{print $1}'""" - else: # write - awk_args += """-F '\t' '{print $1"\t"$2}'""" - else: # table - tera_bench_args += """--cf={cf} --compression_ratio=1 --key_seed={kseed} --value_seed={vseed} """\ - """ --value_size={vsize} --num={num} --benchmarks={random} """\ - """ --key_size={ksize} """.format(cf=cf, kseed=key_seed, vseed=value_seed, - vsize=value_size, num=num, random=random, ksize=key_size) - if op == 'd': # delete - awk_args += """-F '\t' '{print $1"\t"$3"\t"$4}'""" - else: # write - awk_args += """-F '\t' '{print $1"\t"$2"\t"$3"\t"$4}'""" - - tera_mark_args = """--mode={op} --tablename={table_name} --type=async """\ - """ --verify=false""".format(op=op, table_name=table_name) - - cmd = '{tera_bench} {bench_args} | awk {awk_args} | {tera_mark} {mark_args}'.format( - tera_bench=const.tera_bench_binary, bench_args=tera_bench_args, awk_args=awk_args, - tera_mark=const.tera_mark_binary, mark_args=tera_mark_args) - - runcmd(cmd) - - # write/append data to a file for comparison - for path, is_append in file_path: - if cf == '': - awk_args = """-F '\t' '{print $1"::0:"$2}'""" - else: - awk_args = """-F '\t' '{print $1":"$3":"$4":"$2}'""" - - redirect_op = '' - if is_append is True: - redirect_op += '>>' - else: - redirect_op += '>' - - dump_cmd = '{tera_bench} {tera_bench_args} | awk {awk_args} {redirect_op} {out}'.format( - tera_bench=const.tera_bench_binary, tera_bench_args=tera_bench_args, - redirect_op=redirect_op, awk_args=awk_args, out=path) - runcmd(dump_cmd) - - -def scan_table(table_name, file_path, allversion, snapshot=0, is_async=False): - """ - This function scans the table and write the output into file_path - :param table_name: table name - :param file_path: write scan output into file_path - :param allversion: [True | False] - :param is_async: True for batch scan - """ - - allv = '' - if allversion is True: - allv += 'scanallv' - else: - allv += 'scan' - - if is_async is True: - async_flag = '--tera_sdk_batch_scan_enabled=true --v=30 --printable=false' - else: - async_flag = '--tera_sdk_batch_scan_enabled=false --printable=false' - - snapshot_args = '' - if snapshot != 0: - snapshot_args += '--snapshot={snapshot}'.format(snapshot=snapshot) - - scan_cmd = '{teracli} {flags} {op} {table_name} "" "" {snapshot} > {out}'.format( - teracli=const.teracli_binary, flags=async_flag, op=allv, table_name=table_name, snapshot=snapshot_args, out=file_path) - runcmd(scan_cmd) - - -def get_tablet_list(table_name): - # TODO: need a more elegant & general way to obtain tablet info - show_cmd = '{teracli} show {table}'.format(teracli=const.teracli_binary, table=table_name) - print show_cmd - ret = subprocess.Popen(show_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - tablet_info = ret.stdout.readlines()[5:] # tablet info starts from the 6th line - tablet_info = filter(lambda x: x != '\n', tablet_info) - tablet_paths = [] - for tablet in tablet_info: - comp = filter(None, tablet.split(' ')) - tablet_paths.append(comp[2]) - return tablet_paths - - -def parse_showinfo(): - ''' - if you want to get show info, you can call this function to return with a dict - ''' - show_cmd = '{teracli} show'.format(teracli=const.teracli_binary) - print show_cmd - ret = subprocess.Popen(show_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - table_info = ret.stdout.readlines()[2:-1] - retinfo = {} - for line in table_info: - line = line.strip("\n") - line_list = line.split(" ") - list_ret = [line_list[i] for i in range(len(line_list)) if line_list[i] != ""] - - retinfo[list_ret[1]] = {} - retinfo[list_ret[1]]["status"] = list_ret[2] - retinfo[list_ret[1]]["size"] = list_ret[3] - retinfo[list_ret[1]]["lg_size"] = [list_ret[j] for j in range(4, len(list_ret) - 2)] - retinfo[list_ret[1]]["tablet"] = list_ret[len(list_ret) - 2] - retinfo[list_ret[1]]["busy"] = list_ret[len(list_ret) - 1] - - print json.dumps(retinfo) - return retinfo - - -def compact_tablets(tablet_list): - # TODO: compact may timeout - for tablet in tablet_list: - compact_cmd = '{teracli} tablet compact {tablet}'.format(teracli=const.teracli_binary, tablet=tablet) - print compact_cmd - ret = subprocess.Popen(compact_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - - -def snapshot_op(table_name): - """ - This function creates | deletes a snapshot - :param table_name: table name - :return: snapshot id on success, None otherwise - """ - # TODO: delete snapshot - snapshot_cmd = '{teracli} snapshot {table_name} create'.format(teracli=const.teracli_binary, table_name=table_name) - print snapshot_cmd - ret = subprocess.Popen(snapshot_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - out = ret.stdout.readlines() - ret = '' - try: - ret += out[1] - except IndexError: - return None - - if ret.startswith('new snapshot: '): - snapshot_id = ret[len('new snapshot: '):-1] - if snapshot_id.isdigit(): - return int(snapshot_id) - return None - - -def rollback_op(table_name, snapshot, rollback_name): - """ - Invoke rollback action - :param table_name: table name - :param snapshot: rollback to a specific snapshot - :return: None - """ - rollback_cmd = '{teracli} snapshot {table_name} rollback --snapshot={snapshot} --rollback_name={rname}'.\ - format(teracli=const.teracli_binary, table_name=table_name, snapshot=snapshot, rname=rollback_name) - print rollback_cmd - ret = subprocess.Popen(rollback_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - - -def compare_files(file1, file2, need_sort): - """ - This function compares two files. - :param file1: file path to the first file - :param file2: file path to the second file - :param need_sort: whether the files need to be sorted - :return: True if the files are the same, False on the other hand - """ - if need_sort is True: - sort_cmd = 'sort {f1} > {f1}.sort; sort {f2} > {f2}.sort'.format(f1=file1, f2=file2) - print sort_cmd - ret = subprocess.Popen(sort_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - print ''.join(ret.stdout.readlines()) - print ''.join(ret.stderr.readlines()) - os.rename(file1+'.sort', file1) - os.rename(file2+'.sort', file2) - return filecmp.cmp(file1, file2, shallow=False) - - -def file_is_empty(file_path): - """ - This function test whether a file is empty - :param file_path: file path - :return: True if the file is empty, False on the other hand - """ - return not os.path.getsize(file_path) - - -def cleanup_files(file_list): - for file_path in file_list: - os.remove(file_path) - -def check_show_user_result(cmd, should_contain, substr): - print(cmd) - ret = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) - stdoutdata = ''.join(ret.stdout.readlines()) - if should_contain: - nose.tools.assert_true(substr in stdoutdata) - else: - nose.tools.assert_true(substr not in stdoutdata) diff --git a/test/testcase/conf.py b/test/testcase/conf.py deleted file mode 100644 index 47ae34abc..000000000 --- a/test/testcase/conf.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -Use of this source code is governed by a BSD-style license that can be -found in the LICENSE file. -""" - - -class Const: - def __init__(self): - self.tera_bench_binary = './tera_bench' - self.tera_mark_binary = './tera_mark' - self.teracli_binary = './teracli' - self.kill_script = './kill_tera.sh' - self.launch_script = './launch_tera.sh' - self.launch_ts_first_script = './launch_ts_first.sh' - self.data_path = 'testcase/data/' - self.user_root_flag_path = './testcase/data/tera.flag.root' - self.teracli_dir = '.' - -const = Const() diff --git a/test/testcase/data/create_table_schema b/test/testcase/data/create_table_schema deleted file mode 100644 index 666d2d418..000000000 --- a/test/testcase/data/create_table_schema +++ /dev/null @@ -1,12 +0,0 @@ -table_test001 { - lg_index { - update_flag - }, - lg_props { - level, - weight - }, - lg_raw { - data - } -} diff --git a/test/testcase/data/deli.10 b/test/testcase/data/deli.10 deleted file mode 100644 index de87b38dc..000000000 --- a/test/testcase/data/deli.10 +++ /dev/null @@ -1,9 +0,0 @@ -00000000000214773994 -00000000000429670287 -00000000000644585408 -00000000000859991147 -00000000001074648851 -00000000001288272903 -00000000001502155677 -00000000001717101851 -00000000001932156209 diff --git a/test/testcase/data/kv.schema b/test/testcase/data/kv.schema deleted file mode 100644 index 76e579ae4..000000000 --- a/test/testcase/data/kv.schema +++ /dev/null @@ -1,2 +0,0 @@ -test - diff --git a/test/testcase/data/table.schema b/test/testcase/data/table.schema deleted file mode 100644 index 2eaba3f44..000000000 --- a/test/testcase/data/table.schema +++ /dev/null @@ -1,6 +0,0 @@ -test { - lg0 { - cf0, - cf1 - } -} diff --git a/test/testcase/data/tera.flag.root b/test/testcase/data/tera.flag.root deleted file mode 100644 index ece8b6cd6..000000000 --- a/test/testcase/data/tera.flag.root +++ /dev/null @@ -1,19 +0,0 @@ -# 指定各种路径,onebox中无需修改 ---log_dir=../log ---tera_tabletnode_path_prefix=../data/ ---tera_zk_lib_log_path=../log/zk.log - -# 指定使用本地文件系统 ---tera_leveldb_env_type=local - -## 是否使用zk -# 指定使用非zk模式, 但只能本机访问tera ---tera_zk_enabled=false -# 指定使用zk, 可以跨服务使用, 配置相应地址和路径即可 -#--tera_zk_enabled=true -#--tera_zk_addr_list=localhost:2181 -#--tera_zk_root_path=/tera - ---tera_acl_enabled=true ---tera_user_identity=root ---tera_user_passcode=helloroot diff --git a/test/testcase/shell_script/launch_ts_first.sh b/test/testcase/shell_script/launch_ts_first.sh deleted file mode 100755 index 02ceb6502..000000000 --- a/test/testcase/shell_script/launch_ts_first.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash -CURRENT_DIR=`dirname $0` -source ${CURRENT_DIR}/config - -# make sure tera is killed -./kill_tera.sh - -FAKE_ZK_PATH_PREFIX="${CURRENT_DIR}/../fakezk" -TIME=`date +%Y-%m-%d-%H:%M:%S` - -# init all fake zk node -rm -rf ${FAKE_ZK_PATH_PREFIX} -mkdir -p ${FAKE_ZK_PATH_PREFIX}/master-lock -mkdir -p ${FAKE_ZK_PATH_PREFIX}/ts -mkdir -p ${FAKE_ZK_PATH_PREFIX}/kick - -# backup tabletnode log & launch tera tabletnodes -if [ ! -x ${CURRENT_DIR}/../log ];then - mkdir ${CURRENT_DIR}/../log -fi - -# backup master log & launch tera master -echo "launching master..." -MASTER_LOG_FILE=${CURRENT_DIR}/../log/master.stderr -if [ -f ${MASTER_LOG_FILE} ];then - mv ${MASTER_LOG_FILE} ${MASTER_LOG_FILE}.${TIME} -fi -${CURRENT_DIR}/tera_master \ - --flagfile=${CURRENT_DIR}/../conf/tera.flag \ - --tera_master_port=${PORT} \ - --tera_fake_zk_path_prefix=${FAKE_ZK_PATH_PREFIX} \ - --tera_log_prefix=master &> ${MASTER_LOG_FILE} ${TABLETNODE_LOG_FILE} = 774 * 0.9 and size <= 774 * 1.1: - nose.tools.assert_true(True) - else: - nose.tools.assert_true(False) - - def test_showsize_table(self): - common.create_singleversion_table() - table_name = "test" - scan_file = 'scan.out' - common.run_tera_mark([], op='w', table_name='test', cf='cf0:q,cf1:q', random='random', - key_seed=1, value_seed=10, value_size=100, num=5000, key_size=20) - show_ret = common.parse_showinfo() - time.sleep(3) - show_ret = common.parse_showinfo() - size = float(show_ret[table_name]["size"][:-1]) - if size >= 1.65 * 0.95 and size <= 1.65 * 1.05: - nose.tools.assert_true(True) - else: - nose.tools.assert_true(False) diff --git a/test/testcase/test_single_row_txn.py b/test/testcase/test_single_row_txn.py deleted file mode 100644 index 26e584f14..000000000 --- a/test/testcase/test_single_row_txn.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -Use of this source code is governed by a BSD-style license that can be -found in the LICENSE file. -""" - -import common -import nose -import re -import subprocess - - -def doSingleRowTxnBaseTest(value): - ''' - there is no concurrency in this case, - so if we insert a value with single-row-txn, - the commit should success and should found the value when read - ''' - input_str = 'txn start single_row_txn_test row\n' \ - + 'get single_row_txn_test row cf0:qu0\n' \ - + 'put single_row_txn_test row cf0:qu0 ' + value + '\n' \ - + 'txn commit\nquit\n' - output_str = 'row:cf0:qu0:.*:' + value - p = subprocess.Popen('./teracli', - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True) - p.stdin.write(input_str) - print p.communicate('') - - p = subprocess.Popen('./teracli get single_row_txn_test row cf0:qu0', - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True) - ret = p.communicate('')[0] - print ret - res_re = re.compile(output_str) - nose.tools.assert_true(res_re.match(ret) is not None) - - -def setUp(): - ''' - setup - ''' - common.drop_table("single_row_txn_test") - - cmd = "./teracli create 'single_row_txn_test{lg0{cf0}}'" - common.exe_and_check_res(cmd) - - -def testSingleRowTxnBase(): - # do test - doSingleRowTxnBaseTest('v1') - doSingleRowTxnBaseTest('v2') - - -def tearDown(): - # clear env - common.drop_table("single_row_txn_test") - pass diff --git a/test/testcase/test_write_read_update_delete.py b/test/testcase/test_write_read_update_delete.py deleted file mode 100644 index 4c7b2af97..000000000 --- a/test/testcase/test_write_read_update_delete.py +++ /dev/null @@ -1,54 +0,0 @@ -''' -Copyright (c) 2016, Baidu.com, Inc. All Rights Reserved -Use of this source code is governed by a BSD-style license that can be -found in the LICENSE file. -''' - -import common -import nose.tools -from TeraSdk import Client, TeraSdkException - - -table = None - -def setUp(): - #clear env - common.drop_table("crud_table") - - #set env - cmd = "./teracli create 'crud_table{lg0{cf0}}'" - common.exe_and_check_res(cmd) - global table - try: - client = Client("", "pysdk") - table = client.OpenTable("crud_table") - except TeraSdkException as e: - print(e.reason) - nose.tools.assert_true(False) - -def tearDown(): - pass - -''' -0. put -1. read -2. delete row -3. read -4. put -5. read -''' -def test_case_0(): - table.Put("row", "cf0", "qu0", "value") - nose.tools.assert_equal(table.Get("row", "cf0", "qu0", 0), "value") - - mu = table.NewRowMutation("row") - mu.DeleteRow() - table.ApplyMutation(mu) - - try: - table.Get("row", "cf0", "qu0", 0) - except TeraSdkException as e: - nose.tools.assert_true("not found" in e.reason) - - table.Put("row", "cf0", "qu0", "value") - nose.tools.assert_equal(table.Get("row", "cf0", "qu0", 0), "value")