diff --git a/README.md b/README.md new file mode 100644 index 0000000..c5f146a --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# auto-cpp-rewriter diff --git a/convert/CMakeLists.txt b/convert/CMakeLists.txt new file mode 100644 index 0000000..8bddaf6 --- /dev/null +++ b/convert/CMakeLists.txt @@ -0,0 +1,111 @@ +set(LLVM_LINK_COMPONENTS support) +add_compile_options(-Werror=return-type) + +set(CMAKE_CXX_STANDARD 17) + +find_package(gflags) +find_package (glog 0.6.0 REQUIRED) +find_package (absl REQUIRED) + +include(FetchContent) + +FetchContent_Declare(json + GIT_REPOSITORY https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent + GIT_TAG v3.10.2) + +FetchContent_GetProperties(json) +if(NOT json_POPULATED) + FetchContent_Populate(json) + add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL) +endif() + +add_clang_executable(convert + Env.cpp + Tool.cpp + Config.cpp + Convert.cpp + Deleter.cpp + ExprInfo.cpp + ExprParser.cpp + ExprParserDetail.cpp + ExprParserBSField.cpp + ConvertAction.cpp + LogicParser.cpp + info/Info.cpp + info/IfInfo.cpp + info/LoopInfo.cpp + info/InfoBase.cpp + info/NewVarDef.cpp + info/MethodInfo.cpp + info/ActionMethodInfo.cpp + info/NewActionParam.cpp + info/PrefixPair.cpp + info/CommonInfo.cpp + info/CommonInfoCore.cpp + info/CommonInfoLeaf.cpp + info/CommonInfoFixed.cpp + info/CommonInfoFixedList.cpp + info/CommonInfoBodyText.cpp + info/CommonInfoDetail.cpp + info/CommonInfoPrepare.cpp + info/CommonInfoNormal.cpp + info/CommonInfoMultiIntList.cpp + info/CommonInfoMiddleNode.cpp + info/VarDeclInfo.cpp + info/FeatureInfo.cpp + info/BinaryOpInfo.cpp + info/ConstructorInfo.cpp + info/MiddleNodeInfo.cpp + info/SeqListInfo.cpp + info/ActionDetailInfo.cpp + info/ActionDetailFixedInfo.cpp + info/BSFieldInfo.cpp + rule/RuleBase.cpp + rule/PreRule.cpp + rule/GeneralRule.cpp + rule/MiddleNodeRule.cpp + rule/CommonInfoRule.cpp + rule/ActionDetailRule.cpp + rule/DoubleListRule.cpp + rule/ProtoListRule.cpp + rule/SeqListRule.cpp + rule/AddFeatureMethodRule.cpp + rule/HashFnRule.cpp + rule/QueryTokenRule.cpp + rule/StrRule.cpp + rule/BSFieldOrderRule.cpp + rule/proto_list/ProtoListExprInfo.cpp + handler/StrictRewriter.cpp + handler/OverviewHandler.cpp + handler/AdlogFieldHandler.cpp + handler/FieldDeclHandler.cpp + handler/LogicHandler.cpp + handler/BSFieldHandler.cpp + expr_parser/ExprParserQueryToken.cpp + visitor/CtorVisitor.cpp + visitor/BSCtorVisitor.cpp + visitor/FieldDeclVisitor.cpp + visitor/ExtractMethodVisitor.cpp + visitor/BSExtractMethodVisitor.cpp + visitor/MiddleNodeJson.cpp + matcher_callback/FeatureDeclCallback.cpp + matcher_callback/InferFilterCallback.cpp + matcher_callback/TypeAliasCallback.cpp + matcher_callback/BSFeatureDeclCallback.cpp + matcher_callback/BSTypeAliasCallback.cpp + ) + +target_link_libraries(convert + PUBLIC + clangTooling + clangBasic + clangASTMatchers + clangFrontend + clangSerialization + clangTooling + glog + gflags + nlohmann_json::nlohmann_json + absl::strings + absl::optional +) diff --git a/convert/Config.cpp b/convert/Config.cpp new file mode 100644 index 0000000..7fb1779 --- /dev/null +++ b/convert/Config.cpp @@ -0,0 +1,20 @@ +#include "Config.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +FeatureInfo* GlobalConfig::feature_info_ptr(const std::string& feature_name) { + std::lock_guard lock(mu); + auto it = feature_info.find(feature_name); + if (it != feature_info.end()) { + return &(it->second); + } + + it = feature_info.insert({feature_name, FeatureInfo(feature_name)}).first; + return &(it->second); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Config.h b/convert/Config.h new file mode 100644 index 0000000..d6c3d91 --- /dev/null +++ b/convert/Config.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "info/FeatureInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; + +class FeatureInfo; + +class GlobalConfig final { + public: + static GlobalConfig* Instance() { + static GlobalConfig instance; + return &instance; + } + + FeatureInfo* feature_info_ptr(const std::string& feature_name); + + std::mutex mu; + clang::FileID file_id; + clang::SourceManager* source_manager = nullptr; + + std::string cmd = ""; + bool remove_comment = false; + bool dump_ast = false; + bool overwrite = false; + std::string filename; + std::string message_def_filename; + std::string field_detail_filename; + bool use_reco_user_info = false; + bool rewrite_reco_user_info = false; + + std::string middle_node_json_file = "data/middle_node.json"; + + json all_adlog_fields = json::object(); + json feature_def = json::object(); + json fast_feature_def = json::object(); + std::map enum_map; + + std::vector filenames; + std::string feature_list_filename = + "teams/ad/ad_algorithm/feature/fast/impl/feature_list_complete_adlog.cc"; + std::map feature_filename_map; + std::map feature_content_map; + std::unordered_map feature_info; + + /// infer filter 函数实现,暂时先用 map 简单处理,后面再重构。 + std::unordered_map infer_filter_funcs; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Convert.cpp b/convert/Convert.cpp new file mode 100644 index 0000000..a9af3e7 --- /dev/null +++ b/convert/Convert.cpp @@ -0,0 +1,108 @@ +#include +#include +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Frontend/CompilerInstance.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include +#include +#include + +#include "ConvertAction.h" +#include "LogicParser.h" + +using namespace llvm; +using namespace clang; +using namespace clang::tooling; + +static llvm::cl::OptionCategory MatcherCategory("Matcher"); + +cl::opt Cmd("cmd", + cl::desc("cmd (default = 'hello') "), + cl::value_desc("cmd"), + cl::init("hello")); + +cl::opt RemoveComment("remove-comment", + cl::desc("remove comment"), + cl::init(false)); + +cl::opt DumpAst("dump-ast", + cl::desc("dump ast"), + cl::init(false)); + +cl::opt Overwrite("overwrite", + cl::desc("overwrite exists bs file, default false"), + cl::init(false)); + +cl::opt Filename("filename", + cl::desc("filename"), + cl::init("")); + +cl::opt FieldDetailFilename("field-detail-filename", + cl::desc("field detail filename for parse result"), + cl::init("")); + +cl::opt MessageDefFilename("message-def-filename", + cl::desc("json filename for message def"), + cl::init("")); + +cl::opt UseRecoUserInfo("use_reco_user_info", + cl::desc("use reco user info"), + cl::init(false)); + +DECLARE_bool(logtostderr); + +using ks::ad_algorithm::convert::GlobalConfig; +using ks::ad_algorithm::convert::ConvertAction; +using ks::ad_algorithm::convert::LogicParser; + +int main(int argc, const char **argv) { + google::InitGoogleLogging(argv[0]); + FLAGS_logtostderr = 1; + + auto ExpectedParser = CommonOptionsParser::create(argc, argv, MatcherCategory); + if (!ExpectedParser) { + LOG(ERROR) << "ExpectedParser error, return"; + return 1; + } + + auto config = GlobalConfig::Instance(); + config->cmd = Cmd; + config->remove_comment = RemoveComment; + config->dump_ast = DumpAst; + config->overwrite = Overwrite; + config->filename = Filename; + config->field_detail_filename = FieldDetailFilename; + config->message_def_filename = MessageDefFilename; + config->use_reco_user_info = UseRecoUserInfo; + + LOG(INFO) << "Cmd: " << config->cmd; + + CommonOptionsParser& op = ExpectedParser.get(); + ClangTool Tool(op.getCompilations(), op.getSourcePathList()); + + if (config->cmd == "hello") { + LOG(INFO) << "hello"; + } else if (config->cmd == "convert") { + return Tool.run(newFrontendActionFactory().get()); + } else if (config->cmd == "parse_logic") { + return Tool.run(newFrontendActionFactory().get()); + } else { + LOG(ERROR) << "unsupported cmd: " << config->cmd; + } + + return 0; +} + diff --git a/convert/ConvertAction.cpp b/convert/ConvertAction.cpp new file mode 100644 index 0000000..daf5850 --- /dev/null +++ b/convert/ConvertAction.cpp @@ -0,0 +1,441 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Declares clang::SyntaxOnlyAction. +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +// Declares llvm::cl::extrahelp. +#include "llvm/Support/CommandLine.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" + +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Frontend/CompilerInstance.h" +#include "llvm/ADT/StringRef.h" + +#include "Tool.h" +#include "info/FeatureInfo.h" +#include "ConvertAction.h" +#include "matcher_callback/InferFilterCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using clang::TK_IgnoreUnlessSpelledInSource; +using clang::ast_matchers::decl; +using clang::ast_matchers::namedDecl; +using clang::ast_matchers::matchesName; +using clang::ast_matchers::typeAliasDecl; +using clang::ast_matchers::traverse; +using clang::ast_matchers::cxxRecordDecl; +using clang::ast_matchers::isDerivedFrom; +using clang::ast_matchers::unless; +using clang::ast_matchers::hasName; +using clang::ast_matchers::isExpandedFromMacro; +using clang::ast_matchers::isDerivedFrom; + +bool RmComment::HandleComment(clang::Preprocessor& preprocessor, + clang::SourceRange source_range) { + rewriter.ReplaceText(source_range, ""); + return false; +} + +ConvertASTConsumer::ConvertASTConsumer(clang::Rewriter &R) : + rewriter_(R), + type_alias_callback_(R), + feature_decl_callback_(R), + infer_filter_callback_(R) { + // 目前只能匹配到 typeAliasDecl(), 可能会有更好的匹配。 + auto TypeAliasMatcher = decl(typeAliasDecl(), + namedDecl(matchesName("Extract.*"))).bind("TypeAlias"); + type_alias_finder_.addMatcher(TypeAliasMatcher, &type_alias_callback_); + + // TK_IgnoreUnlessSpelledInSource 用来忽略模板 + auto FeatureDeclMatcher = traverse(TK_IgnoreUnlessSpelledInSource, + cxxRecordDecl(isDerivedFrom(hasName("FastFeature")), + unless(isExpandedFromMacro("DISALLOW_COPY_AND_ASSIGN")), + unless(isExpandedFromMacro("REGISTER_EXTRACTOR")))).bind("FeatureDecl"); + + match_finder_.addMatcher(FeatureDeclMatcher, &feature_decl_callback_); + + // infer filter + auto InferFilterMatcher = cxxRecordDecl(hasName("ItemFilter")).bind("InferFilter"); + infer_filter_finder_.addMatcher(InferFilterMatcher, &infer_filter_callback_); +} + +void ConvertASTConsumer::HandleTranslationUnit(clang::ASTContext &Context) { + // 解析模板参数, 这一步必须在解析 Extract 之前, 因为这些参数会被用到。 + type_alias_finder_.matchAST(Context); + + // 解析 Extract 逻辑并进行改写。 + match_finder_.matchAST(Context); + + // 处理 infer filter + infer_filter_finder_.matchAST(Context); +} + +void ConvertAction::EndSourceFileAction() { + // 按顺序执行各个处理逻辑。 + handle_features(); + handle_infer_filters(); + handle_item_filters(); + handle_label_extractor(); +} + +void ConvertAction::handle_features() { + auto config = GlobalConfig::Instance(); + { + std::lock_guard lock(config->mu); + + // 处理特征抽取类,依次执行处理逻辑。 + std::vector paths; + for (auto it = config->feature_info.begin(); it != config->feature_info.end(); it++) { + const std::string& extractor_name = it->first; + + // 跳过 `ItemFilter` 类。 + if (extractor_name == "ItemFilter") { + continue; + } + + std::string bs_extractor_name = std::string("BS") + extractor_name; + const FeatureInfo& feature_info = it->second; + + const std::string& origin_file = feature_info.origin_file(); + if (origin_file.size() == 0) { + LOG(INFO) << "origin_file is empty! feature_name: " << extractor_name; + continue; + } + + // 跳过已经改写过的文件。 + if (tool::is_bs_already_rewritten(origin_file)) { + if (!config->overwrite) { + LOG(INFO) << tool::get_bs_correspond_path(origin_file) << "already exists, skip"; + continue; + } + } + + paths.push_back(feature_info.origin_file()); + + // 读取原始文件内容。 + const clang::FileID& file_id = feature_info.file_id(); + std::string header_content; + llvm::raw_string_ostream raw_string(header_content); + + // 获取改写后的内容。 + rewriter_.getEditBuffer(file_id).write(raw_string); + + // 替换简单的字符串。 + header_content = replace_simple(header_content, extractor_name); + + std::string new_h_filename = tool::get_bs_correspond_path(origin_file); + + for (auto& path : paths) { + auto pos = header_content.find(path); + if (pos != std::string::npos) { + std::string new_path = tool::get_bs_correspond_path(origin_file); + header_content.replace(pos, path.size(), new_path); + } + } + + // format + std::string cmd_format( + "clang-format " + "--style=\"{BasedOnStyle: Google, ColumnLimit: 110, IndentCaseLabels: true}\" -i "); // NOLINT + + // 写入到 .h 文件 + std::error_code ec; + std::ofstream wfile(new_h_filename.c_str()); + if (wfile.is_open()) { + wfile << header_content; + } + wfile.close(); + std::system((cmd_format + new_h_filename).c_str()); + LOG(INFO) << "convert done, .h: " << new_h_filename; + + // 写入到 .cc 文件 + if (!feature_info.is_template()) { + std::string new_cc_filename = std::regex_replace(new_h_filename, std::regex("\\.h"), ".cc"); + write_cc_file(feature_info, new_h_filename, new_cc_filename, bs_extractor_name); + std::system((cmd_format + new_cc_filename).c_str()); + LOG(INFO) << "convert done, .cc: " << new_cc_filename; + } + } + + LOG(INFO) << "start write bs field"; + json field_output = json::object(); + for (auto it_feature = config->feature_info.begin(); it_feature != config->feature_info.end(); + it_feature++) { + const std::string& extractor_name = it_feature->first; + FeatureInfo& feature_info = it_feature->second; + + feature_info.gen_output(); + field_output[extractor_name] = feature_info.output(); + } + + if (config->field_detail_filename.size() > 0) { + std::ofstream out_bs_fields(std::string("../data/") + config->field_detail_filename); + out_bs_fields << field_output.dump(4); + out_bs_fields.close(); + LOG(INFO) << "write field to file: data/" << config->field_detail_filename; + } else { + LOG(INFO) << "field_detail_filename is empty!"; + } + + LOG(INFO) << "done"; + } +} + +void ConvertAction::handle_infer_filters() { + auto config = GlobalConfig::Instance(); + { + std::lock_guard lock(config->mu); + std::vector paths; + for (auto it = config->feature_info.begin(); it != config->feature_info.end(); it++) { + const std::string& extractor_name = it->first; + if (extractor_name != "ItemFilter") { + continue; + } + + const FeatureInfo& feature_info = it->second; + + const std::string& origin_file = feature_info.origin_file(); + if (origin_file.size() == 0) { + LOG(INFO) << "origin_file is empty! class_name: " << extractor_name; + continue; + } + + const clang::FileID& file_id = feature_info.file_id(); + std::string header_content; + llvm::raw_string_ostream raw_string(header_content); + rewriter_.getEditBuffer(file_id).write(raw_string); + header_content = replace_simple_infer_filter(header_content); + + std::string new_h_filename = "teams/ad/ad_nn/utils/bs_item_filter_auto.h"; + std::string new_cc_filename = "teams/ad/ad_nn/utils/bs_item_filter_auto.cc"; + + // format + std::string cmd_format( + "clang-format " + "--style=\"{BasedOnStyle: Google, ColumnLimit: 110, IndentCaseLabels: true}\" -i "); + + // 写入到 .h 文件 + std::error_code ec; + std::ofstream wfile(new_h_filename.c_str()); + if (wfile.is_open()) { + wfile << header_content; + } + wfile.close(); + std::system((cmd_format + new_h_filename).c_str()); + LOG(INFO) << "convert done, .h: " << new_h_filename; + + // 写入到 .cc 文件 + std::ofstream wfile_cc(new_cc_filename.c_str()); + if (wfile_cc.is_open()) { + wfile_cc << "#include \"teams/ad/ad_nn/utils/bs_item_filter.h\"\n\n"; + wfile_cc << "namespace ks {\nnamespace ad_nn {\n\n"; + + auto& infer_filter_funcs = config->infer_filter_funcs; + for (auto it_filter = infer_filter_funcs.begin(); it_filter != infer_filter_funcs.end(); + it_filter++) { + wfile_cc << "bool BSItemFilter::" << it_filter->first + << "(const SampleInterface& bs, const FilterCondition& filter_condition, size_t pos) " + << replace_simple_infer_filter(it_filter->second) + << "\n\n"; + } + + wfile_cc << "} // namespace ad_nn\n} // namespace ks"; + } + wfile_cc.close(); + std::system((cmd_format + new_cc_filename).c_str()); + LOG(INFO) << "convert done, .cc: " << new_cc_filename; + } + LOG(INFO) << "done"; + } +} + +void ConvertAction::handle_item_filters() {} + +void ConvertAction::handle_label_extractor() {} + +void ConvertAction::write_cc_file(const FeatureInfo& feature_info, + const std::string& new_h_filename, + const std::string& new_cc_filename, + const std::string& bs_extractor_name) { + std::ofstream wfile_cc(new_cc_filename.c_str()); + + const std::string &extract_method_content = feature_info.extract_method_content(); + + if (wfile_cc.is_open()) { + // 写入常见的头文件。 + if (feature_info.has_hash_fn_str()) { + wfile_cc << "#include \"teams/ad/ad_nn/bs_field_helper/bs_field_helper.h\"\n"; + } + + if (feature_info.has_query_token()) { + wfile_cc << "#include \"teams/ad/ad_algorithm/bs_feature/fast/frame/bs_action_util.h\"\n"; + } + + if (extract_method_content.find("std::move") != std::string::npos) { + wfile_cc << "#include \n"; + } + + if (extract_method_content.find("unordered_map") != std::string::npos) { + wfile_cc << "#include \n"; + } + + if (extract_method_content.find("unordered_set") != std::string::npos) { + wfile_cc << "#include \n"; + } + + wfile_cc << "#include \"" << new_h_filename << "\"\n\n"; + wfile_cc << "namespace ks {\nnamespace ad_algorithm {\n" + << bs_extractor_name << "::" << bs_extractor_name << "(): BS" + << feature_info.constructor_info().init_list() << "\n" + << tool::rm_empty_line(feature_info.constructor_info().body_content()) + << "\n\n"; + + // `reco_user_info` 相关字段特殊处理,需要通过 `gflags` 参数区分逻辑。 + if (const auto& reco_extract_body = feature_info.reco_extract_body()) { + wfile_cc << "void " << bs_extractor_name << "::ExtractWithBSRecoUserInfo(" + << "const BSLog& bslog, size_t pos, std::vector* result) \n" + << tool::rm_empty_line(reco_extract_body.value()) << "\n"; + } + + // 写入主要的 `Extract` 方法。 + wfile_cc << "void " << bs_extractor_name << "::Extract(" + << "const BSLog& bslog, size_t pos, std::vector* " + "result) \n" + << tool::rm_empty_line(extract_method_content) << "\n"; + + // 写入其他函数。 + const auto &other_methods = feature_info.other_methods(); + for (auto it_method = other_methods.begin(); it_method != other_methods.end(); it_method++) { + wfile_cc << it_method->second.bs_return_type() << " " + << "BS" << feature_info.feature_name() + << "::" << it_method->second.decl() << it_method->second.body() + << "\n"; + } + + wfile_cc << "} // namespace ad_algorithm\n} // namespace ks\n"; + } + + wfile_cc.close(); +} + +std::string ConvertAction::replace_simple(const std::string& content, + const std::string& class_name) { + std::regex p_class_name(class_name); + + static std::regex p_more_class("class (Extract.*) ?: ?public"); + static std::regex p_empty_line("\n *;"); + static std::regex p_colon(": *;"); + static std::regex p_adlog("const AdLog ?\\& ?(adlog|ad_log)"); + static std::regex p_fast_feature("FastFeature"); + static std::regex p_register("REGISTER_EXTRACTOR"); + static std::regex p_register_seq("REGISTER_SEQUENCE_EXTRACTOR"); + static std::regex p_using("using Extract"); + static std::regex p_extract_bs("EXTRACTOR\\(Extract"); + static std::regex p_ad_callback_log("(::)?auto_cpp_rewriter::AdCallbackLog"); + static std::regex p_name_extend_two("(::)?auto_cpp_rewriter::CommonInfoAttr"); + // 必须放到 FastFeature 替换以后 + static std::regex p_extract_multi("ExtractMultiAttrBSFastFeatureNoPrefix"); + static std::regex p_tempalte_item_type("template ?& new_field_defs = feature_info.new_field_defs(); + for (auto it = new_field_defs.begin(); it != new_field_defs.end(); it++) { + if (it->second.var_def().size() > 0) { + oss << it->second.var_def() << ";\n"; + } + if (it->second.exists_var_def().size() > 0) { + oss << it->second.exists_var_def() << ";\n"; + } + } + + std::string s = std::regex_replace(header_content, + p_private, + oss.str(), + std::regex_constants::format_first_only); + + return s; +} + +std::unique_ptr ConvertAction::CreateASTConsumer(clang::CompilerInstance &CI, + llvm::StringRef file) { + rewriter_.setSourceMgr(CI.getSourceManager(), CI.getLangOpts()); + + LOG(INFO) << "remove_comment: " << GlobalConfig::Instance()->remove_comment; + if (GlobalConfig::Instance()->remove_comment) { + rm_comment_ = new RmComment(rewriter_); + LOG(INFO) << "add comment handler"; + CI.getPreprocessor().addCommentHandler(rm_comment_); + } + + return std::make_unique(rewriter_); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ConvertAction.h b/convert/ConvertAction.h new file mode 100644 index 0000000..4254604 --- /dev/null +++ b/convert/ConvertAction.h @@ -0,0 +1,161 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" + +#include "clang/Lex/Preprocessor.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Frontend/CompilerInstance.h" +#include "llvm/ADT/StringRef.h" + +#include "Tool.h" +#include "info/FeatureInfo.h" +#include "matcher_callback/FeatureDeclCallback.h" +#include "matcher_callback/TypeAliasCallback.h" +#include "matcher_callback/InferFilterCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 删除代码中的注释。 +class RmComment: public clang::CommentHandler { + public: + explicit RmComment(clang::Rewriter& rewriter): rewriter(rewriter) {} + + bool HandleComment(clang::Preprocessor& preprocessor, + clang::SourceRange source_range) override; + + private: + clang::Rewriter &rewriter; +}; + +/// 处理 AST 节点。 +class ConvertASTConsumer : public clang::ASTConsumer { + public: + ConvertASTConsumer(clang::Rewriter &R); + void HandleTranslationUnit(clang::ASTContext &Context) override; + + private: + clang::Rewriter& rewriter_; + + /// 用于匹配 `FeatureDecl` 的 `MatchFinder`。 + /// + /// 所有的特征都继承自同一个基类 `FastFeature`,我们可以使用同一个 `MatchFinder` 来匹配所有的特征。 + /// + /// 示例: + /// ```cpp + /// class ExtractUserId: public FastFeature { + /// ... + /// }; + /// ``` + clang::ast_matchers::MatchFinder match_finder_; + + /// 用于匹配 `TypeAliasDecl` 的 `MatchFinder`。 + /// + /// 用于匹配模板类型。 + clang::ast_matchers::MatchFinder type_alias_finder_; + + /// 用于匹配 `InferFilterDecl` 的 `MatchFinder`。 + /// + /// 用于匹配 `filter` 类。 + clang::ast_matchers::MatchFinder infer_filter_finder_; + + /// 用于在匹配上 `FeatureDecl` 后的处理。这是主要的逻辑处理类。 + FeatureDeclCallback feature_decl_callback_; + + /// 用于在匹配上 `TypeAliasDecl` 后的处理。 + TypeAliasCallback type_alias_callback_; + + /// 用于在匹配上 `InferFilterDecl` 后的处理。 + InferFilterCallback infer_filter_callback_; +}; + +/// 处理逻辑。 +class ConvertAction : public clang::ASTFrontendAction { + public: + ConvertAction() {} + ~ConvertAction() { delete rm_comment_; } + + /// 结束源文件处理后的动作,保存改写后的文件。 + void EndSourceFileAction() override; + + /// 处理特征抽取类。 + void handle_features(); + + /// 处理 `filter` 类。 + void handle_infer_filters(); + + /// 处理 `item_filter` 类。 + void handle_item_filters(); + + /// 处理 `label_extractor` 类。 + void handle_label_extractor(); + + /// 将改写的结果写入新的 `c++` 文件。 + void write_cc_file(const FeatureInfo &feature_info, + const std::string &new_h_filename, + const std::string &new_cc_filename, + const std::string &bs_extractor_name); + + /// 替换简单的字符串。 + /// + /// 用于替换固定的字符串代码,不涉及到复杂的 `ast` 节点。 + /// + /// 示例: + /// ```cpp + /// // 原始代码 + /// #include "teams/ad/ad_algorithm/feature/fast/frame/fast_feature.h"; + /// class ExtractUserId: public FastFeature { + /// ... + /// }; + /// + /// // 改写后的代码 + /// #include "teams/ad/ad_algorithm/bs_feature/fast/frame/bs_fast_feature.h"; + /// class BSExtractUserId: public BSFastFeature { + /// ... + /// }; + /// ``` + std::string replace_simple(const std::string& content, + const std::string& class_name); + + /// 替换 `filter` 类。 + std::string replace_simple_infer_filter(const std::string& content); + + /// 插入新的字段定义。 + /// + /// 用于处理复杂中间节点时,插入新的节点定义。 + std::string insert_new_field_def(const std::string& header_content, + const FeatureInfo& feature_info); + + /// 创建 `ASTConsumer`。 + std::unique_ptr CreateASTConsumer(clang::CompilerInstance& CI, + llvm::StringRef file) override; + + private: + /// 用于改写 `c++` 代码的 `Rewriter`。 + clang::Rewriter rewriter_; + + /// 用于删除注释的处理器。 + RmComment* rm_comment_ = nullptr; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Deleter.cpp b/convert/Deleter.cpp new file mode 100644 index 0000000..d54031c --- /dev/null +++ b/convert/Deleter.cpp @@ -0,0 +1,120 @@ +#include + +#include "ExprInfo.h" +#include "Env.h" +#include "Tool.h" +#include "Deleter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +// 大部分来自 adlog 的变量定义都需要删除, 因为会在 Env 中单独添加 enum 以及定义。 +bool Deleter::need_delete(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr == nullptr) { + return false; + } + + // 中间节点的叶子节点普通变量不需要删除, 直接把值替换为中间节点的 bs 表达式。 + // 其他节点定义需要删除。 + // 如 + // const auto &author_attr = live_info->author_info().attribute(); + // uint32 fans_count = author_attr.fans_count(); + // 替换为 + // uint32 fans_count = bs_util.BSGetLiveInfoAuthorInfoAttributeFansCount(bs, pos); + // + // author_attr 被删除, 而 fans_count 被保留。 + if (expr_info_ptr->is_basic() && expr_info_ptr->is_from_middle_node()) { + return false; + } + + // 叶子节点的 size 方法不需要删除。 + if (expr_info_ptr->callee_name() == "size" && expr_info_ptr->is_in_decl_stmt()) { + return false; + } + + // 引用其他变量赋值不需要被删除。 + if (expr_info_ptr->is_basic() && + expr_info_ptr->is_decl_init_expr() && + expr_info_ptr->is_decl_ref_expr()) { + return false; + } + + // 示例: + // // const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); + // // auto iter = ad_action.find(no); + // // const auto& action_base_infos = iter->second.list(); + // if (expr_info_ptr->is_from_adlog() && !expr_info_ptr->is_basic()) { + // return true; + // } + + if (expr_info_ptr->is_reco_proto_type()) { + return false; + } + + // 引用 `seq_list` 类型需要删除。 + // + // 示例: + // const auto& seq_list = *seq_list_ptr; + if (expr_info_ptr->is_seq_list_root_deref()) { + return true; + } + + // 字符串拼接不需要删除。 + // + // 示例: + // std::string const &province = loc.province(); + // std::string const &city = loc.city(); + // std::string res = province + city + community_type; + if (expr_info_ptr->is_str_concat()) { + return false; + } + + // photo token 方法不需要删除。 + if (expr_info_ptr->is_query_token_call() || expr_info_ptr->is_photo_text_call()) { + return false; + } + + // CommonInfoMultiMap 中的 int_list_value 方法不删除。 + // 示例: + // filename: ad/ad_algorithm/feature/fast/impl/extract_combine_realtime_action_match_cnt_v2.h + // + // auto id = action_list[key_idx].int_list_value(i); + if (expr_info_ptr->is_basic()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + if (expr_info_ptr->is_common_info_list_method()) { + if (auto feature_info = env_ptr->get_feature_info()) { + if (feature_info->has_common_info_multi_map()) { + return false; + } + } + } + } + } + + if (expr_info_ptr->is_from_seq_list()) { + return false; + } + + if (expr_info_ptr->is_from_seq_list_reco()) { + return false; + } + + if (expr_info_ptr->is_from_reco_user_info()) { + return false; + } + + if (expr_info_ptr->is_common_info_multi_map_attr()) { + return false; + } + + if (expr_info_ptr->is_from_adlog()) { + return true; + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Deleter.h b/convert/Deleter.h new file mode 100644 index 0000000..f7eb137 --- /dev/null +++ b/convert/Deleter.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class ExprInfo; + +/// 处理最终需要删掉的表达式, 如 +/// auto live_info = GetLiveInfo(adlog.item(pos)); +/// auto it = user_attr_map_.find(user_attr.name_value()); +class Deleter { + public: + static bool need_delete(ExprInfo* expr_info_ptr, Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Env.cpp b/convert/Env.cpp new file mode 100644 index 0000000..126a370 --- /dev/null +++ b/convert/Env.cpp @@ -0,0 +1,1863 @@ +#include + +#include +#include +#include +#include + +#include "Env.h" +#include "Tool.h" +#include "info/CommonInfoMultiIntList.h" +#include "info/IfInfo.h" +#include "info/LoopInfo.h" +#include "info/NewVarDef.h" +#include "info/AdlogFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +const Env* Env::get_loop_parent() const { + if (is_loop_) { + return parent_; + } + + if (parent_ != nullptr) { + return parent_->get_loop_parent(); + } + + return nullptr; +} + +Env* Env::mutable_loop_parent() { + const Env* loop_parent = get_loop_parent(); + return const_cast(loop_parent); +} + +const Env* Env::get_loop_env() const { + if (is_loop_) { + return this; + } + + if (parent_ != nullptr) { + return parent_->get_loop_env(); + } + + return nullptr; +} + +const Env *Env::get_outer_loop() const { + if (is_loop_) { + if (parent_ != nullptr) { + if (parent_->is_loop()) { + return parent_->get_outer_loop(); + } else { + return this; + } + } else { + return this; + } + } else { + if (parent_ != nullptr) { + return parent_->get_outer_loop(); + } else { + return nullptr; + } + } +} + +Env *Env::mutable_outer_loop() { + const Env* loop_env = get_outer_loop(); + return const_cast(loop_env); +} + +const Env *Env::get_outer_loop_parent() const { + const Env* loop_env = get_outer_loop(); + + if (loop_env != nullptr) { + return loop_env->parent(); + } else { + return nullptr; + } +} + +Env *Env::mutable_outer_loop_parent() { + const Env* env_ptr = get_outer_loop_parent(); + return const_cast(env_ptr); +} + +const Env *Env::get_outer_if() const { + if (is_if_) { + if (parent_ != nullptr) { + if (parent_->is_if()) { + return parent_->get_outer_if(); + } else { + return this; + } + } else { + return this; + } + } else { + if (parent_ != nullptr) { + return parent_->get_outer_if(); + } else { + return nullptr; + } + } +} + +Env *Env::mutable_outer_if() { + const Env *if_env = get_outer_if(); + return const_cast(if_env); +} + +const Env *Env::get_outer_if_parent() const { + const Env *if_env = get_outer_if(); + + if (if_env != nullptr) { + return if_env->parent(); + } else { + return nullptr; + } +} + +Env *Env::mutable_outer_if_parent() { + const Env *env_ptr = get_outer_if_parent(); + return const_cast(env_ptr); +} + +Env* Env::mutable_loop_env() { + const Env* loop_env = get_loop_env(); + return const_cast(loop_env); +} + +const Env* Env::get_root() const { + if (parent_ == nullptr) { + return const_cast(this); + } + + return parent_->get_root(); +} + +Env* Env::get_mutable_root() { + if (parent_ == nullptr) { + return this; + } + + return parent_->get_mutable_root(); +} + +const Env* Env::get_common_info_prepare_env() const { + if (common_info_prepare_) { + return this; + } + + if (parent_ != nullptr) { + return parent_->get_common_info_prepare_env(); + } + + return nullptr; +} + +Env* Env::mutable_common_info_prepare_env() { + const Env* prepare_env = get_common_info_prepare_env(); + return const_cast(prepare_env); +} + +const Env* Env::get_common_info_parent_env() const { + const Env* prepare_env = get_common_info_prepare_env(); + if (prepare_env == nullptr) { + return nullptr; + } + + return prepare_env->parent(); +} + +Env* Env::mutable_common_info_parent_env() { + Env* prepare_env = mutable_common_info_prepare_env(); + if (prepare_env == nullptr) { + return nullptr; + } + + return prepare_env->parent(); +} + +bool Env::is_parent_loop() const { + return parent_ != nullptr && parent_->is_loop(); +} + +bool Env::is_parent_if() const { + return parent_ != nullptr && parent_->is_if(); +} + +void Env::add_used_var_name(const std::string& name) { + used_var_names_.insert(name); +} + +bool Env::is_var_name_used(const std::string& name) const { + return used_var_names_.find(name) != used_var_names_.end(); +} + +void Env::add_template_var_names(const std::vector& var_names) { + for (size_t i = 0; i < var_names.size(); i++) { + LOG(INFO) << "add_template_var_name: " << var_names[i]; + used_var_names_.insert(var_names[i]); + } +} + +void Env::add_child(Env* child) { + if (child != nullptr) { + children_.push_back(child); + } +} + +void Env::add(const std::string& key, clang::Expr* expr) { + if (var_decls_.find(key) != var_decls_.end()) { + LOG(INFO) << "override key: " << key << ", expr: " << stmt_to_string(expr); + } + get_mutable_root()->add_used_var_name(key); + var_decls_[key] = expr; +} + +void Env::set_feature_name(const std::string& feature_name) { + feature_name_ = feature_name; +} + +const std::string& Env::feature_name() const { + const FeatureInfo* feature_info = get_feature_info(); + if (feature_info != nullptr) { + return feature_info->feature_name(); + } + + static std::string empty; + return empty; +} + +void Env::set_feature_type(const std::string& feature_type) { + feature_type_ = feature_type; +} + +const std::string& Env::feature_type() const { + const FeatureInfo* feature_info = get_feature_info(); + if (feature_info != nullptr) { + return feature_info->feature_type(); + } + + static std::string empty; + return empty; +} + +clang::Expr* Env::find(const std::string& key) const { + auto it = var_decls_.find(key); + if (it != var_decls_.end()) { + return it->second; + } + + if (parent_ != nullptr) { + return parent_->find(key); + } + + return nullptr; +} + +void Env::erase(const std::string& key) { + if (var_decls_.find(key) != var_decls_.end()) { + var_decls_.erase(key); + } +} + +void Env::add_loop_var(const std::string& key) { + loop_var_names_.push_back(key); + if (loop_info_) { + loop_info_->set_loop_var(key); + } +} + +void Env::pop_loop_var() { + if (loop_var_names_.size() > 0) { + loop_var_names_.pop_back(); + } +} + +const std::string& Env::get_last_loop_var() const { + if (is_loop_) { + if (loop_var_names_.size() > 0) { + return loop_var_names_.back(); + } + } + + if (parent_ != nullptr) { + return parent_->get_last_loop_var(); + } + + static std::string empty; + return empty; +} + +bool Env::is_loop_var(const std::string& key) const { + for (size_t i = 0; i < loop_var_names_.size(); i++) { + if (loop_var_names_[i] == key) { + return true; + } + } + + if (parent_ != nullptr) { + return parent_->is_loop_var(key); + } + + return false; +} + +bool Env::is_in_loop() const { + if (loop_info_) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_in_loop(); + } + + return false; +} + +void Env::add_loop_expr(clang::Expr* expr) { + if (expr == nullptr) { + return; + } + + loop_exprs_.push_back(expr); +} + +size_t Env::get_loop_expr_size() { + return loop_exprs_.size(); +} + +clang::Expr* Env::get_loop_expr(size_t index) { + if (index >= loop_exprs_.size()) { + return nullptr; + } + + return loop_exprs_[index]; +} + +clang::Expr* Env::get_first_loop_expr() { + if (loop_exprs_.size() == 0) { + return nullptr; + } + + return loop_exprs_[0]; +} + +clang::Expr* Env::find_parent_loop() { + if (parent_ == nullptr) { + return nullptr; + } + + return parent_->get_first_loop_expr(); +} + +bool Env::is_in_if() const { + if (is_if_) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_in_if(); + } + + return false; +} + +bool Env::is_in_if_cond() const { + if (is_if_) { + if (if_info_.has_value() && if_info_->if_stage() == IfStage::COND) { + return true; + } + } + + if (parent_ != nullptr) { + return parent_->is_in_if_cond(); + } + + return false; +} + +bool Env::is_in_if_body() const { + if (is_if_) { + if (if_info_.has_value() && if_info_->if_stage() == IfStage::THEN) { + return true; + } + } + + if (parent_ != nullptr) { + return parent_->is_in_if_body(); + } + + return false; +} + +void Env::set_is_if(bool is_if) { + is_if_ = is_if; + if (parent_ != nullptr) { + parent_->set_has_if_in_children(true); + } +} + +bool Env::has_if_in_children() const { + return has_if_in_children_; +} + +void Env::set_has_if_in_children(bool v) { + has_if_in_children_ = v; +} + +bool Env::is_check_middle_node_root_cond() const { + return if_info_ && if_info_->is_check_middle_node_root_cond(); +} + +bool Env::is_combine_feature() const { + if (const auto& feature_info = get_feature_info()) { + return feature_info->is_combine(); + } + + return false; +} + +bool Env::is_user_feature() const { + return tool::is_user_feature(feature_type()); +} + +bool Env::is_item_feature() const { + return tool::is_item_feature(feature_type()); +} + +bool Env::is_sparse_feature() const { + return tool::is_sparse_feature(feature_type()); +} + +bool Env::is_dense_feature() const { + return tool::is_dense_feature(feature_type()); +} + +bool Env::is_common_info_loop() const { + if (loop_info_) { + return loop_info_->is_common_info_list_map(); + } + + return false; +} + +bool Env::is_parent_common_info_loop() const { + if (parent_ != nullptr) { + return parent_->is_common_info_loop(); + } + + return false; +} + +bool Env::is_in_common_info_loop_body() const { + if (const Env* prepare_env = get_common_info_prepare_env()) { + if (const auto& loop_info = prepare_env->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::BODY) { + return true; + } + } + } + + return false; +} + +bool Env::is_in_common_info_if_body() const { + if (const auto& if_info = cur_if_info()) { + if (if_info->is_check_common_info_cond()) { + if (if_info->if_stage() == IfStage::THEN || if_info->if_stage() == IfStage::ELSE) { + return true; + } + } + } + + if (parent_ != nullptr) { + return parent_->is_in_common_info_if_body(); + } + + return false; +} + +void Env::set_is_child_common_attr_cond(bool v) { + is_child_common_attr_cond_ = v; +} + +bool Env::is_child_common_attr_cond() const { + return is_child_common_attr_cond_; +} + +absl::optional Env::get_common_attr_int_value() const { + if (const auto& common_info_normal = get_common_info_normal()) { + if (if_info_) { + if (const absl::optional& index = if_info_->common_info_index()) { + if (*index < common_info_normal->common_info_details_size()) { + return absl::optional(common_info_normal->get_common_info_detail(*index)->common_info_value()); + } else { + LOG(INFO) << "index out of range for common info details, index: " << *index + << ", common_info_details.size: " << common_info_normal->common_info_details_size(); + return absl::nullopt; + } + } + } else { + // 通过 != 来判断, if_info 已不存在, 只能通过 last_common_info_detail 获取 + if (common_info_normal->common_info_details_size() > 0) { + const auto& last_common_info_detail = common_info_normal->last_common_info_detail(); + return absl::optional(last_common_info_detail->common_info_value()); + } + } + } + + if (parent_ != nullptr) { + return parent_->get_common_attr_int_value(); + } + + return absl::nullopt; +} + +absl::optional Env::get_common_attr_int_name() const { + if (const auto& common_info_fixed_list = get_common_info_fixed_list()) { + if (if_info_) { + if (const auto last_detail = common_info_fixed_list->last_common_info_detail()) { + return absl::optional(last_detail->int_name()); + } + } + } + + if (parent_ != nullptr) { + return parent_->get_common_attr_int_name(); + } + + return absl::nullopt; +} + +bool Env::is_check_item_pos_cond() const { + return is_if_ && if_info_->is_check_item_pos_cond(); +} + +bool Env::is_action_detail_cond() const { + return is_if_ && action_detail_info_.has_value(); +} + +void Env::set_cxx_for_range_stmt(clang::CXXForRangeStmt* cxx_for_range_stmt) { + cxx_for_range_stmt_ = cxx_for_range_stmt; +} +clang::CXXForRangeStmt* Env::cxx_for_range_stmt() const { + return cxx_for_range_stmt_; +} + +void Env::set_for_stmt(clang::ForStmt* for_stmt) { + for_stmt_ = for_stmt; +} + +clang::ForStmt* Env::for_stmt() const { + return for_stmt_; +} + +void Env::set_if_stmt(clang::IfStmt* if_stmt) { + if_stmt_ = if_stmt; +} + +clang::IfStmt* Env::if_stmt() const { + if (if_stmt_ != nullptr) { + return if_stmt_; + } + + if (parent_ != nullptr) { + return parent_->if_stmt(); + } + + return nullptr; +} + +// 如果 if 里的语句都是变量定义,并且这些变量最终都被删掉,那么整条 if 语句可以被删掉。 +bool Env::is_all_if_stmt_deleted() const { + if (if_stmt_ == nullptr) { + return false; + } + + int total_deleted = deleted_vars_.size(); + if (total_deleted == 0) { + return false; + } + + int total_if_stmt = 0; + if (clang::CompoundStmt* compound_stmt = dyn_cast(if_stmt_->getThen())) { + total_if_stmt = compound_stmt->size(); + } + + return total_deleted == total_if_stmt; +} + +void Env::add_decl_stmt(const std::string& name, clang::DeclStmt* decl_stmt) { + decl_stmts_[name] = decl_stmt; + get_mutable_root()->add_used_var_name(name); +} + +clang::DeclStmt* Env::get_decl_stmt(const std::string& name) const { + auto it = decl_stmts_.find(name); + if (it != decl_stmts_.end()) { + return it->second; + } + + if (parent_ != nullptr) { + return parent_->get_decl_stmt(name); + } + + return nullptr; +} + +clang::DeclStmt* Env::get_decl_stmt_in_cur_env(const std::string& name) const { + auto it = decl_stmts_.find(name); + if (it != decl_stmts_.end()) { + return it->second; + } + + return nullptr; +} + +Env* Env::find_decl_env(const std::string& name) { + if (decl_stmts_.find(name) != decl_stmts_.end()) { + return this; + } + + if (parent_ != nullptr) { + return parent_->find_decl_env(name); + } + + return nullptr; +} + +void Env::add_deleted_var(const std::string& name) { + LOG(INFO) << "add deleted var: " << name; + deleted_vars_.insert(name); +} + +// 可能有隐式转换, 统一通过 str 来判断 +bool Env::add_deleted_var_by_expr(clang::Expr* expr) { + for (auto it = var_decls_.begin(); it != var_decls_.end(); it++) { + if (stmt_to_string(expr) == stmt_to_string(it->second)) { + deleted_vars_.insert(it->first); + return true; + } + } + + return false; +} + +// 可能有隐式转换, 统一通过 str 来判断 +bool Env::add_deleted_var_by_expr_str(const std::string& expr_str) { + for (auto it = var_decls_.begin(); it != var_decls_.end(); it++) { + if (expr_str == stmt_to_string(it->second)) { + deleted_vars_.insert(it->first); + return true; + } + } + + return false; +} + +void Env::pop_deleted_var(const std::string& name) { + deleted_vars_.erase(name); +} + +void Env::clear_deleted_var() { + deleted_vars_.clear(); +} + +Env* Env::mutable_new_def_target_env(bool is_from_reco) { + if (is_from_reco) { + if (parent_ != nullptr) { + return parent_->mutable_new_def_target_env(is_from_reco); + } else { + return this; + } + } + + // common info 比较特殊,整个 for 循环会被替换调,必须定义在 commno info prepare parent 中,否则会找不到。 + if (is_in_common_info_loop_body()) { + return mutable_common_info_parent_env(); + } + + if (is_in_loop()) { + return mutable_loop_parent(); + } + + if (const auto& if_info = cur_if_info()) { + if (if_info->is_check_action_detail_cond()) { + return parent_; + } + if (if_info->if_stage() == IfStage::COND) { + return parent_; + } + } + + return this; +} + +void Env::add_new_def(const std::string& bs_enum_str, + const std::string& var_def, + NewVarType new_var_type) { + bool is_from_reco = tool::is_str_from_reco_user_info(bs_enum_str); + Env* target_env = mutable_new_def_target_env(is_from_reco); + if (target_env == nullptr) { + return; + } + + target_env->add_new_def_helper(bs_enum_str, var_def, new_var_type); +} + +void Env::add_new_def_helper(const std::string &bs_enum_str, + const std::string &var_def, + NewVarType new_var_type) { + absl::optional &var = find_mutable_new_def(bs_enum_str); + if (var) { + if (var->var_def().size() == 0) { + var->set_var_def(var_def, new_var_type); + } + + return; + } + + std::string var_name = find_valid_new_name(bs_enum_str); + get_mutable_root()->add_used_var_name(var_name); + absl::optional new_def = absl::make_optional( + bs_enum_str, var_name, var_def, new_var_type); + new_defs_[bs_enum_str] = std::move(new_def); +} + +void Env::add_new_def_meta(const std::string& bs_enum_str, + const std::string& var_def, + NewVarType new_var_type) { + add_new_def(bs_enum_str, var_def, new_var_type); + add_attr_meta(bs_enum_str); +} + +void Env::add_attr_meta(const std::string& bs_enum_str) { + if (starts_with(bs_enum_str, "adlog")) { + if (auto constructor_info = mutable_constructor_info()) { + constructor_info->add_bs_field_enum(bs_enum_str); + } + } else { + LOG(INFO) << "bs_enum_str is not starts_with adlog, skip! bs_enum_str: " << bs_enum_str; + } +} + +void Env::set_normal_adlog_field_info(const std::string& bs_enum_str, + const std::string& adlog_field) { + if (auto construct_info = mutable_constructor_info()) { + construct_info->set_normal_adlog_field_info(bs_enum_str, adlog_field); + } +} + +void Env::set_common_info_field_info(const std::string& bs_enum_str, + const std::string& adlog_field, + const std::string& common_info_enum_name, + int common_info_value) { + if (auto construct_info = mutable_constructor_info()) { + construct_info->set_common_info_field_info(bs_enum_str, + adlog_field, + common_info_enum_name, + common_info_value); + } +} + +// 可能会出现在添加的 exists var 后面,因此也需要先查找是否存在。 +// var_name 会用第一次出现时候查找的新名字。即如果 decl_stmt 后出现,并且名字和添加 def 时候的不一样, +// 则需要替换成添加 def 时的名字。如下 +// auto iter = ad_action.find(no); +// if (iter != ad_action.end()) { +// const auto& action_base_infos = iter->second.list(); +// int64_t imp_num = action_base_infos.size(); +// AddFeature(0, imp_num, result); +// } else { +// AddFeature(0, 0, result); +// } +// 在遇到 ad_action.find(no) 时,就知道要判断 action_detail 是否存在, 现在是根据 +// action_detail_key_${no}_list_size 是否存在来判断的, 因此现在要添加 action_detail_key_${no}_list_size +// 对应的 exists_def, 因为是第一次添加,因此会找到一个新的 var_name: list_size。但是后面又遇到 +// action_base_infos.size() 时, bs_enum_str 是和刚才的一样的, 但是已经有了一个 decl_stmt 的 var_name: +// imp_num。此时就要决定用哪个变量名了。如果用 decl_stmt 中的 decl name, 那么当 decl name 已经被用过时, +// 直接将 decl name 设置为 var_name, 则之后的引用都没问题。但是之前已经替换的引用会有问题,需要想办法解决。 +// 不过这种情况出现的比较少, 在 action_detail 判断中可能会遇到。action.find(no) 之后会需要立即判断是否存在, +// 需要用到 key_${no}_list_size 对应的 exists_var, 之后的处理逻辑中可能直接用到 list_size 这个变量, 如 +// int64_t imp_num = action_base_infos.size(), 对应的 var_name 是 imp_num。因此只需要不将 var_name 和 +// exists_name 耦合即可, 这两个属性都可以手动设置即可满足需求。 +// +// loop body 内添加的 adlog var 统一定义到 loop parent env 中,因为 loop 可能需要整体被替换, 防止 +// 出现重复定义。 +// +// reco_user_info 字段比较特殊,需要替换两次,必须保存在 root env 中。 +void Env::add_new_def(const std::string& bs_enum_str, + const std::string& var_name, + const std::string& var_def, + NewVarType new_var_type) { + bool is_from_reco = tool::is_str_from_reco_user_info(bs_enum_str); + Env *target_env = mutable_new_def_target_env(is_from_reco); + if (target_env == nullptr) { + return; + } + + target_env->add_new_def_helper(bs_enum_str, var_name, var_def, new_var_type); +} + +void Env::add_new_def_helper(const std::string &bs_enum_str, + const std::string &var_name, + const std::string &var_def, + NewVarType new_var_type) { + absl::optional &var = find_mutable_new_def(bs_enum_str); + if (var) { + LOG(INFO) << "var_def already exists, bs_enum_str: " << bs_enum_str + << ", old var_name: " << var->name() + << ", set var_name: " << var_name; + var->set_name(var_name); + + if (var->var_def().size() == 0) { + var->set_var_def(var_def, new_var_type); + } + + return; + } + + absl::optional new_def = absl::make_optional( + bs_enum_str, var_name, var_def, new_var_type); + new_defs_[bs_enum_str] = std::move(new_def); + get_mutable_root()->add_used_var_name(var_name); +} + +void Env::add_new_def_meta(const std::string& bs_enum_str, + const std::string& var_name, + const std::string& var_def, + NewVarType new_var_type) { + add_new_def(bs_enum_str, var_name, var_def, new_var_type); + add_attr_meta(bs_enum_str); +} + +void Env::add_new_exists_def(const std::string &bs_enum_str, + const std::string &exists_var_def) { + bool is_from_reco = tool::is_str_from_reco_user_info(bs_enum_str); + if (auto env = mutable_new_def_target_env(is_from_reco)) { + env->add_new_exists_def_helper(bs_enum_str, exists_var_def); + } +} + +void Env::add_new_exists_def_helper(const std::string& bs_enum_str, + const std::string& exists_var_def) { + if (absl::optional& var_def = find_mutable_new_def(bs_enum_str)) { + var_def->set_exists_var_def(tool::get_exists_name(var_def->name()), exists_var_def); + } else { + std::string var_name = find_valid_new_name(bs_enum_str); + get_mutable_root()->add_used_var_name(var_name); + + absl::optional new_def = absl::make_optional(bs_enum_str, var_name); + std::string exists_name = tool::get_exists_name(var_name); + new_def->set_exists_var_def(exists_name, exists_var_def); + new_defs_.emplace(bs_enum_str, new_def); + } +} + +void Env::add_new_exists_def_meta(const std::string& bs_enum_str, + const std::string& exists_var_def) { + add_new_exists_def(bs_enum_str, exists_var_def); + add_attr_meta(bs_enum_str); +} + +const absl::optional& Env::find_new_def(const std::string& bs_enum_str) const { + auto it = new_defs_.find(bs_enum_str); + if (it != new_defs_.end() && it->second.has_value()) { + return it->second; + } + + if (parent_ != nullptr) { + return parent_->find_new_def(bs_enum_str); + } + + static absl::optional empty = absl::nullopt; + return empty; +} + +void Env::add_new_def_overwrite( + const std::string &bs_var_name, + const std::string &var_def, + NewVarType new_var_type +) { + absl::optional &var = find_mutable_new_def(bs_var_name); + if (var) { + if (var->var_def().size() == 0) { + var->set_var_def(var_def, new_var_type); + } + + return; + } + + get_mutable_root()->add_used_var_name(bs_var_name); + absl::optional new_def = absl::make_optional( + bs_var_name, bs_var_name, var_def, new_var_type); + new_defs_[bs_var_name] = std::move(new_def); +} + +absl::optional& Env::find_mutable_new_def(const std::string& bs_enum_str) { + const absl::optional& var_def = find_new_def(bs_enum_str); + return const_cast&>(var_def); +} + +std::string Env::find_valid_new_name(const std::string& bs_enum_str) const { + const absl::optional& var_def = find_new_def(bs_enum_str); + if (var_def) { + // var_def 第一次添加的时候一定有合法的 name。 + return var_def->name(); + } + + std::vector tokens = absl::StrSplit(bs_enum_str, "_"); + + if (tokens.size() == 1) { + if (is_new_name_valid(tokens[0])) { + return tokens[0]; + } + } + + for (int i = tokens.size() - 2; i >= 0; i--) { + if (tokens[i].size() > 0 && std::isdigit(tokens[i][0])) { + continue; + } + std::string new_name = absl::StrJoin(tokens.begin() + i, tokens.end(), "_"); + if (is_new_name_valid(new_name)) { + return new_name; + } + } + + LOG(INFO) << "cannot find valid name, bs_enum_str is too short: " << bs_enum_str; + return ""; +} + +bool Env::is_new_name_valid(const std::string& name) const { + if (get_root()->is_var_name_used(name)) { + return false; + } + + return true; +} + +bool Env::is_new_var_exists(const std::string& bs_enum_str) const { + const absl::optional& var_def = find_new_def(bs_enum_str); + if (var_def && var_def->var_def().size() > 0) { + return true; + } + + return false; +} + +bool Env::is_new_var_not_exists(const std::string& bs_enum_str) const { + return !is_new_var_exists(bs_enum_str); +} + +const std::string& Env::find_new_var_name(const std::string& bs_enum_str) const { + if (const absl::optional& var_def = find_new_def(bs_enum_str)) { + return var_def->name(); + } + + static std::string empty; + return empty; +} + +const std::string& Env::find_new_exists_var_name(const std::string& bs_enum_str) const { + if (const absl::optional& var_def = find_new_def(bs_enum_str)) { + return var_def->exists_name(); + } + + static std::string empty; + return empty; +} + +std::vector Env::get_all_new_def_var_names() const { + std::vector res; + + for (auto it = new_defs_.begin(); it != new_defs_.end(); it++) { + if (it->second.has_value()) { + res.emplace_back(it->second->name()); + } + } + + return res; +} + +std::string Env::get_all_new_defs() const { + std::ostringstream oss; + for (auto it = new_defs_.begin(); it != new_defs_.end(); it++) { + if (it->second.has_value()) { + LOG(INFO) << "new var, var_name: " << it->second->name() << ", def: " << it->second->var_def(); + if (it->second->var_def().size() > 0) { + oss << it->second->var_def() << ";\n\n"; + } + if (it->second->exists_var_def().size() > 0) { + oss << it->second->exists_var_def() << ";\n\n"; + } + } + } + + return oss.str(); +} + +int Env::get_action() { + if (action_ != -1) { + return action_; + } + + if (parent_ != nullptr) { + return parent_->get_action(); + } + + return -1; +} + +void Env::set_action_expr(clang::Expr* expr, std::string bs_action_expr) { + action_expr_ = expr; + bs_action_expr_ = bs_action_expr; +} + +std::string Env::get_bs_action_expr() { + if (bs_action_expr_.size() > 0) { + return bs_action_expr_; + } + + if (parent_ != nullptr) { + return parent_->get_bs_action_expr(); + } + + return ""; +} + +std::string Env::find_one_action_detail_leaf_name(const std::string& bs_enum_str) const { + for (auto it = new_defs_.begin(); it != new_defs_.end(); it++) { + if (starts_with(it->first, bs_enum_str)) { + const absl::optional& new_var_def = it->second; + if (new_var_def && new_var_def->is_list()) { + return new_var_def->name(); + } + } + } + + if (parent_ != nullptr) { + return parent_->find_one_action_detail_leaf_name(bs_enum_str); + } + + return ""; +} + +void Env::add_if_stmt(clang::IfStmt* if_stmt) { + if_info_.emplace(if_stmt); +} + +void Env::add_loop_stmt(clang::ForStmt* for_stmt) { + loop_info_.emplace(for_stmt); +} + +void Env::add_loop_stmt(clang::CXXForRangeStmt* cxx_for_range_stmt) { + loop_info_.emplace(cxx_for_range_stmt); +} + +void Env::add_action_detail_prefix_adlog(const std::string& prefix_adlog) { + action_detail_prefix_adlog_.emplace(prefix_adlog); +} + +// 多个 common info +// 一定要找到 prefix 所在 loop Env 来创建,才能保证唯一。 +absl::optional& Env::touch_common_info_normal() { + static absl::optional empty; + + if (common_info_prepare_ && common_info_prepare_->prefix()) { + if (!common_info_prepare_->is_common_info_normal()) { + return empty; + } + + if (!common_info_normal_) { + const std::string& prefix_adlog = *(common_info_prepare_->prefix_adlog()); + if (starts_with(prefix_adlog, "adlog") || (feature_name() == "ItemFilter" && starts_with(prefix_adlog, "item"))) { + LOG(INFO) << "touch common_info_normal, prefix_adlog: " + << *(common_info_prepare_->prefix_adlog()); + common_info_normal_.emplace(*(common_info_prepare_->prefix_adlog())); + } else if (const auto &middle_node_info = get_middle_node_info()) { + LOG(INFO) << "touch common_info_normal with middle_node, middle_node: " + << middle_node_info->name() + << ", prefix_adlog: " << *(common_info_prepare_->prefix_adlog()); + common_info_normal_.emplace(*(common_info_prepare_->prefix_adlog()), middle_node_info->name()); + } else { + LOG(INFO) << "prefix_adlog is not starts_with adlog! prefix_adlog: " << prefix_adlog; + return empty; + } + + common_info_normal_->set_env_ptr(this); + if (common_info_prepare_->name_value_alias()) { + common_info_normal_->set_name_value_alias(*(common_info_prepare_->name_value_alias())); + } + common_info_prepare_->set_is_confirmed(); + } + + return common_info_normal_; + } + + if (parent_ != nullptr) { + return parent_->touch_common_info_normal(); + } + + LOG(INFO) << "canot find common info prefix!"; + return empty; +} + +// common info enum 变量通过模板参数传递 +absl::optional& Env::touch_common_info_fixed_list() { + static absl::optional empty; + + if (common_info_prepare_ && common_info_prepare_->prefix_adlog()) { + // if (common_info_prepare_->is_common_info_normal()) { + // return (empty); + // } + if (!common_info_fixed_list_) { + const std::string& prefix_adlog = *(common_info_prepare_->prefix_adlog()); + if (starts_with(prefix_adlog, "adlog") + || (feature_name() == "ItemFilter" + && starts_with(prefix_adlog, "item"))) { + common_info_fixed_list_.emplace(*(common_info_prepare_->prefix_adlog())); + LOG(INFO) << "touch common_info_fixed_list, prefix_adlog: " + << *(common_info_prepare_->prefix_adlog()); + } else if (const auto& middle_node_info = get_middle_node_info()) { + common_info_fixed_list_.emplace(*(common_info_prepare_->prefix_adlog()), + middle_node_info); + LOG(INFO) << "touch common_info_fixed_list with middle_node, prefix_adlog: " + << *(common_info_prepare_->prefix_adlog()) + << ", middle_node root: " << middle_node_info->name(); + } else { + LOG(INFO) << "prefix_adlog is not starts_with adlog! prefix_adlog: " + << prefix_adlog; + return empty; + } + + common_info_fixed_list_->set_env_ptr(this); + common_info_prepare_->set_is_confirmed(); + } + return common_info_fixed_list_; + } + + if (parent_ != nullptr) { + return parent_->touch_common_info_fixed_list(); + } + + LOG(INFO) << "canot find common info prefix!"; + return empty; +} + +// 多个 common info +// 一定要找到 prefix 所在 loop Env 来创建,才能保证唯一。 +absl::optional& Env::touch_common_info_multi_map(const std::string& map_name, + const std::string& attr_name) { + if (common_info_prepare_ && common_info_prepare_->prefix()) { + if (!common_info_multi_map_) { + common_info_multi_map_.emplace(*(common_info_prepare_->prefix_adlog()), map_name, attr_name); + common_info_multi_map_->set_env_ptr(this); + common_info_prepare_->set_is_confirmed(); + LOG(INFO) << "touch common_info_multi_map, prefix_adlog: " + << *(common_info_prepare_->prefix_adlog()); + } + return (common_info_multi_map_); + } + + if (parent_ != nullptr) { + return parent_->touch_common_info_multi_map(map_name, attr_name); + } + + LOG(INFO) << "cannot get common info prefix for multi map!"; + static absl::optional empty; + return empty; +} + +absl::optional& Env::touch_common_info_multi_int_list() { + if (common_info_prepare_ && common_info_prepare_->prefix()) { + if (!common_info_multi_int_list_) { + common_info_multi_int_list_.emplace(*(common_info_prepare_->prefix_adlog())); + common_info_multi_int_list_->set_env_ptr(this); + common_info_prepare_->set_is_confirmed(); + LOG(INFO) << "touch common_info_multi_int_list, prefix_adlog: " + << *(common_info_prepare_->prefix_adlog()); + + // 从 feature info 中复制 map_vec_connections + if (auto feature_info = mutable_feature_info()) { + if (const auto& int_list_info = feature_info->common_info_multi_int_list()) { + LOG(INFO) << "int_list_info address: " << &(*int_list_info); + const auto& map_vec_connections = int_list_info->map_vec_connections(); + for (auto it = map_vec_connections.begin(); it != map_vec_connections.end(); it++) { + common_info_multi_int_list_->add_map_vec_connection(it->first, it->second); + LOG(INFO) << "copy from feature_info, map_name: " << it->first + << ", vec_name: " << it->second; + } + } + } + } + + // 目前还区分不了 + if (common_info_fixed_list_) { + common_info_fixed_list_ = absl::nullopt; + } + + return (common_info_multi_int_list_); + } + + if (parent_ != nullptr) { + return parent_->touch_common_info_multi_int_list(); + } + + LOG(INFO) << "cannot get common info prefix for multi_int_list!"; + static absl::optional empty; + return empty; +} + +absl::optional& Env::touch_action_detail_info(int action) { + if (action_detail_prefix_adlog_) { + if (!action_detail_info_) { + action_detail_info_.emplace(*action_detail_prefix_adlog_, action); + action_detail_info_->set_env_ptr(this); + } + + return (action_detail_info_); + } + + if (parent_ != nullptr) { + return parent_->touch_action_detail_info(action); + } + + LOG(INFO) << "cannot find action detail prefix!"; + static absl::optional empty; + return empty; +} + +absl::optional& Env::update_action_detail_info(int action) { + if (auto& action_detail_info = touch_action_detail_info(action)) { + action_detail_info->add_action(action); + return action_detail_info; + } + + LOG(INFO) << "update_action_detail_info faield! cannot find action detail prefix!"; + static absl::optional empty; + return empty; +} + +absl::optional& Env::touch_action_detail_fixed_info(const std::string& action) { + if (action_detail_prefix_adlog_) { + if (!action_detail_fixed_info_) { + action_detail_fixed_info_.emplace(*action_detail_prefix_adlog_, action); + action_detail_fixed_info_->set_env_ptr(this); + } + + return action_detail_fixed_info_; + } + + if (parent_ != nullptr) { + return parent_->touch_action_detail_fixed_info(action); + } + + LOG(INFO) << "cannot find action detail prefix!"; + static absl::optional empty; + return empty; +} + +absl::optional& Env::touch_seq_list_info(const std::string& root_name) { + if (!seq_list_info_) { + seq_list_info_.emplace(root_name); + } + + return seq_list_info_; +} + +absl::optional & Env::touch_proto_list_info(const std::string &prefix_adlog) { + if (!proto_list_info_) { + proto_list_info_.emplace(prefix_adlog); + } + + return proto_list_info_; +} + +absl::optional& Env::touch_bs_field_info(const std::string& bs_var_name) { + if (decl_stmts_.find(bs_var_name) != decl_stmts_.end()) { + if (!bs_field_info_) { + bs_field_info_.emplace(); + } + + return bs_field_info_; + } + + if (parent_ != nullptr) { + return parent_->touch_bs_field_info(bs_var_name); + } + + return bs_field_info_; +} + +void Env::add_middle_node_name(const std::string& name) { + middle_node_info_.emplace(name); +} + +void Env::set_feature_info(FeatureInfo* feature_info) { + feature_info_ = feature_info; +} + +void Env::set_constructor_info(ConstructorInfo* constructor_info) { + constructor_info_ = constructor_info; +} + +void Env::set_common_info_prefix_adlog(const std::string& prefix_adlog) { + common_info_prepare_.emplace(prefix_adlog); + if (auto feature_info = mutable_feature_info()) { + if (const auto& info_prepare = feature_info->common_info_prepare()) { + common_info_prepare_->set_template_int_names(info_prepare->template_int_names()); + common_info_prepare_->set_common_info_values(info_prepare->common_info_values()); + } + } +} + +const absl::optional& Env::common_info_prefix() const { + if (common_info_prepare_) { + return common_info_prepare_->prefix(); + } + + static absl::optional empty; + return empty; +} + +const absl::optional& Env::get_common_info_prefix() const { + if (common_info_prepare_) { + return common_info_prepare_->prefix(); + } + + if (parent_ != nullptr) { + return parent_->get_common_info_prefix(); + } + + static absl::optional empty; + return empty; +} + +void Env::update_template_common_info_values() { + if (common_info_normal_) { + if (const auto& feature_info = get_feature_info()) { + if (Env* parent_env_ptr = common_info_normal_->parent_env_ptr()) { + if (feature_info->is_template()) { + const std::set& values = feature_info->template_common_info_values(); + for (int value: values) { + if (common_info_normal_->is_already_exists(value)) { + continue; + } + common_info_normal_->add_common_info_value(value); + const auto& common_info_detail = common_info_normal_->last_common_info_detail(); + parent_env_ptr->add_common_info_detail_def(*common_info_detail); + } + } + } + } + } +} + +void Env::add_common_info_detail_def(const CommonInfoLeaf& common_info_detail) { + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + if (bs_enum_str.size() == 0) { + LOG(INFO) << "cannot get bs_enum_str from common_info_detail!"; + return; + } + + if (is_new_var_exists(bs_enum_str)) { + return; + } + + if (common_info_detail.is_ready()) { + if (common_info_detail.is_scalar()) { + // 处理单值, 如 attr.int_value() + if (is_new_var_not_exists(bs_enum_str)) { + // 此处只添加单值的 exists 定义, 取值的定义在 basic_scalar 中添加 + LOG(INFO) << "add scalar exists, bs_enum_str: " << bs_enum_str + << ", def: " << common_info_detail.get_bs_scalar_exists_def(this); + add_new_exists_def_meta(bs_enum_str, + common_info_detail.get_bs_scalar_exists_def(this)); + add_new_def_meta(bs_enum_str, + common_info_detail.get_bs_scalar_def(this), + NewVarType::SCALAR); + if (const auto& enum_name = common_info_detail.common_info_enum_name()) { + set_common_info_field_info(bs_enum_str, + common_info_detail.get_adlog_field_str(), + *enum_name, + common_info_detail.common_info_value()); + } + } + } else { + // 处理 list 或者 map, 如 attr.int_list_value(i) + if (common_info_detail.is_list()) { + LOG(INFO) << "add list def, bs_enum_str: " << bs_enum_str + << ", list_def: " << common_info_detail.get_bs_list_def(this); + add_new_def_meta(bs_enum_str, + common_info_detail.get_bs_list_def(this), + NewVarType::LIST); + if (const auto& enum_name = common_info_detail.common_info_enum_name()) { + LOG(INFO) << "set common_info_field_info, bs_enum_str: " << bs_enum_str + << ", enum_name: " << *enum_name; + set_common_info_field_info(bs_enum_str, + common_info_detail.get_adlog_field_str(), + *enum_name, + common_info_detail.common_info_value()); + } + } else if (common_info_detail.is_map()) { + LOG(INFO) << "add map def, bs_enum_str: " << bs_enum_str + << ", map_def: " << common_info_detail.get_bs_map_def(this); + add_new_def(bs_enum_str, + common_info_detail.get_bs_map_def(this), + NewVarType::MAP); + add_attr_meta(bs_enum_str + "_key"); + add_attr_meta(bs_enum_str + "_value"); + + if (const auto& enum_name = common_info_detail.common_info_enum_name()) { + set_common_info_field_info(bs_enum_str + std::string("_key"), + common_info_detail.get_adlog_field_str() + std::string(".key"), + *enum_name, + common_info_detail.common_info_value()); + + set_common_info_field_info(bs_enum_str + std::string("_value"), + common_info_detail.get_adlog_field_str() + std::string(".value"), + *enum_name, + common_info_detail.common_info_value()); + } + } + } + + if (common_info_detail.common_info_type() == CommonInfoType::MIDDLE_NODE) { + if (auto feature_info = mutable_feature_info()) { + if (common_info_detail.is_scalar()) { + feature_info->add_field_def(common_info_detail.get_bs_enum_str(), + common_info_detail.get_functor_name(), + common_info_detail.get_bs_scalar_field_def(this), + common_info_detail.get_exists_functor_name(), + common_info_detail.get_bs_scalar_exists_field_def(this), + NewVarType::SCALAR, + AdlogVarType::COMMON_INFO_MIDDLE_NODE); + } else if (common_info_detail.is_list() || common_info_detail.is_list_size()) { + feature_info->add_field_def(common_info_detail.get_bs_enum_str(), + common_info_detail.get_functor_name(), + common_info_detail.get_bs_list_field_def(this), + NewVarType::LIST, + AdlogVarType::COMMON_INFO_MIDDLE_NODE); + } else if (common_info_detail.is_map() || common_info_detail.is_map_size()) { + feature_info->add_field_def(common_info_detail.get_bs_enum_str(), + common_info_detail.get_functor_name(), + common_info_detail.get_bs_map_field_def(this), + NewVarType::MAP, + AdlogVarType::COMMON_INFO_MIDDLE_NODE); + } + + if (const auto& name_value_alias = common_info_detail.name_value_alias()) { + feature_info->set_common_info_prefix_name_value(common_info_detail.get_bs_enum_str(), + common_info_detail.prefix_adlog(), + *name_value_alias); + } + } + } + } +} + +void Env::clear_common_info_fixed_list() { + if (auto& common_info_fixed_list = mutable_common_info_fixed_list()) { + common_info_fixed_list = absl::nullopt; + } +} + +void Env::add_ctor_decls(const VarDeclInfo& var_decl_info) { + const std::unordered_map& var_decls = var_decl_info.var_decls(); + for (auto it = var_decls.begin(); it != var_decls.end(); it++) { + LOG(INFO) << "add ctor decl, name: " << it->first << ", v: " << stmt_to_string(it->second.init_expr()); + var_decls_.emplace(it->first, it->second.init_expr()); + } +} + +void Env::update(clang::DeclStmt* decl_stmt) { + clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl()); + if (var_decl == nullptr) { + return; + } + LOG(INFO) << "update decl_stmt: " << stmt_to_string(decl_stmt); + + std::string var_name = var_decl->getNameAsString(); + if (!var_decl->hasInit()) { + add(var_name, nullptr); + decl_info_.emplace(var_name); + if (!tool::is_basic_array(var_decl->getType()) && !tool::is_builtin_simple_type(var_decl->getType())) { + LOG(INFO) << "add_deleted_var: " << var_name; + add_deleted_var(var_name); + } + add_decl_stmt(var_name, decl_stmt); + return; + } + + clang::Expr* expr = var_decl->getInit(); + expr = expr->IgnoreCasts(); + + if (find(var_name) != nullptr) { + LOG(INFO) << "overwrite var_name: " << var_name << ", stmt: " << stmt_to_string(expr); + } + + if (starts_with(var_name, "__range") && var_name.find(".") == std::string::npos) { + std::string expr_str = stmt_to_string(expr); + if (ends_with(expr_str, "begin()") || ends_with(expr_str, "end()")) { + std::vector str_arr = absl::StrSplit(expr_str, "."); + if (str_arr[0] != "adlog") { + add_deleted_var(str_arr[0]); + LOG(INFO) << "add_deleted_var: " << str_arr[0]; + } + } + } + + add(var_name, expr); + decl_info_.emplace(var_name, expr, decl_stmt); + + if (stmt_to_string(decl_stmt).find("static") == std::string::npos) { + add_decl_stmt(var_name, decl_stmt); + } +} + +void Env::update(clang::IfStmt* if_stmt) { + set_is_if(true); + set_if_stmt(if_stmt); + if_info_.emplace(if_stmt); + + if (parent_ != nullptr) { + parent_->increase_if_index(); + } +} + +void Env::update(clang::ForStmt* for_stmt) { + set_for_stmt(for_stmt); + set_is_loop(true); + loop_info_.emplace(for_stmt); + loop_info_->set_env_ptr(this); +} + +void Env::update(clang::CXXForRangeStmt* cxx_for_range_stmt) { + set_cxx_for_range_stmt(cxx_for_range_stmt); + set_is_loop(true); + loop_info_.emplace(cxx_for_range_stmt); + loop_info_->set_env_ptr(this); +} + +void Env::update(clang::BinaryOperator* binary_operator) { + std::string op = binary_operator->getOpcodeStr().str(); + + binary_op_info_.emplace(op, binary_operator->getLHS(), binary_operator->getRHS()); + + if (if_info_ && if_info_->if_stage() == IfStage::COND) { + if_info_->update_check_equal(op); + } + + if (op == "=") { + if (binary_op_info_->left_expr_str() != binary_op_info_->right_expr_str()) { + var_decls_[binary_op_info_->left_expr_str()] = binary_operator->getRHS(); + } + } +} + +void Env::update(clang::CXXOperatorCallExpr* cxx_operator_call_expr) { + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + + if (cxx_operator_call_expr->getNumArgs() == 2) { + binary_op_info_.emplace(op, cxx_operator_call_expr->getArg(0), cxx_operator_call_expr->getArg(1)); + } + + // 在上一级声明, 在当前 Env 赋值。 + if (op == "operator=") { + std::string var_name = stmt_to_string(cxx_operator_call_expr->getArg(0)); + if (parent_ != nullptr) { + clang::Expr* value_expr = cxx_operator_call_expr->getArg(1); + parent_->add(var_name, value_expr); + + std::vector str_arr = absl::StrSplit(stmt_to_string(value_expr), "."); + std::string callee_name = str_arr[0]; + if (find(callee_name) != nullptr && parent_ != nullptr) { + LOG(INFO) << "add to parent, callee_name: " << callee_name + << ", expr: " << stmt_to_string(find(callee_name)); + parent_->add(callee_name, find(callee_name)); + } + } + } + + if (if_info_ && if_info_->if_stage() == IfStage::COND) { + if_info_->update_check_equal(op); + } +} + +void Env::update(clang::CaseStmt* case_stmt) { + switch_case_info_.emplace(case_stmt); +} + +void Env::update_assign_info(clang::BinaryOperator* binary_operator) { + std::string name = stmt_to_string(binary_operator->getLHS()); + assign_info_.emplace(name, binary_operator->getLHS(), binary_operator->getRHS()); +} + +absl::optional Env::get_template_int_name(clang::Expr* init_expr) const { + if (init_expr == nullptr) { + return absl::nullopt; + } + + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(init_expr)) { + if (decl_ref_expr->getDecl()->isTemplateParameter() && + decl_ref_expr->getType().getTypePtr()->isIntegerType()) { + return absl::optional(stmt_to_string(decl_ref_expr)); + } + } + + return get_template_int_name(find(stmt_to_string(init_expr))); +} + +bool Env::is_template_int_ref(clang::Expr* expr) const { + absl::optional int_name = get_template_int_name(expr); + return int_name.has_value(); +} + +bool Env::is_reco_user_info() const { + if (is_reco_user_info_) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_reco_user_info(); + } + + return false; +} + +void Env::set_is_reco_user_info(bool v) { + if (parent_ == nullptr) { + is_reco_user_info_ = v; + return; + } + + parent_->set_is_reco_user_info(v); +} + +const std::string& Env::get_method_name() const { + const Env* root = get_root(); + if (root != nullptr) { + return root->method_name(); + } + + return method_name_; +} + +bool Env::is_feature_other_method(const std::string& method_name) const { + if (const auto& feature_info = get_feature_info()) { + return feature_info->is_feature_other_method(method_name); + } + + return false; +} + +void Env::add_action_param_new_def(const std::string& prefix, const NewActionParam& new_action_param) { + const auto new_params = new_action_param.new_params(); + for (size_t i = 0; i < new_params.size(); i++) { + if (new_params[i].field() == "size") { + add_new_def_meta(new_params[i].get_bs_enum_str(prefix), + new_params[i].get_new_def(prefix, this), + NewVarType::SCALAR); + } else { + add_new_def_meta(new_params[i].get_bs_enum_str(prefix), + new_params[i].get_new_def(prefix, this), + NewVarType::LIST); + } + } +} + +const ConstructorInfo* Env::get_constructor_info() const { + if (parent_ == nullptr) { + return constructor_info_; + } + + return parent_->get_constructor_info(); +} + +const FeatureInfo* Env::get_feature_info() const { + if (parent_ == nullptr) { + return feature_info_; + } + + return parent_->get_feature_info(); +} + +ConstructorInfo* Env::mutable_constructor_info() { + if (parent_ == nullptr) { + return constructor_info_; + } + + return parent_->mutable_constructor_info(); +} + +FeatureInfo* Env::mutable_feature_info() { + if (parent_ == nullptr) { + return feature_info_; + } + + return parent_->mutable_feature_info(); +} + +std::vector Env::find_new_action_param_var_name(const std::string& prefix, + const NewActionParam& new_action_param) const { + std::vector res; + + if (new_action_param.origin_name().size() > 0) { + const auto& new_params = new_action_param.new_params(); + for (size_t i = 0; i < new_params.size(); i++) { + std::string bs_enum_str = prefix + "_" + new_params[i].field(); + if (const auto& new_var = find_new_def(bs_enum_str)) { + res.push_back(new_var->name()); + } else { + LOG(INFO) << "cannot find action param new_var name in env, bs_enum_str: " << bs_enum_str + << ", prefix: " << prefix; + } + } + } + + return res; +} + +bool Env::is_in_loop_init() const { + if (const auto& loop_info = cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + return true; + } + } + + return false; +} + +bool Env::is_in_loop_body() const { + if (const auto &loop_info = get_loop_info()) { + if (loop_info->loop_stage() == LoopStage::BODY) { + return true; + } + } + + return false; +} + +bool Env::is_in_for_range_init() const { + if (const auto &loop_info = cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT && !loop_info->is_for_stmt()) { + return true; + } + } + + return false; +} + +// 可能还有更多情况需要遍历。 +bool Env::is_decl_ref_contains_self(clang::DeclRefExpr* decl_ref_expr, + clang::Expr* value_expr) const { + if (decl_ref_expr == nullptr || value_expr == nullptr) { + return false; + } + + if (stmt_to_string(decl_ref_expr) == stmt_to_string(value_expr)) { + return true; + } + + if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(value_expr)) { + if (is_decl_ref_contains_self(decl_ref_expr, implicit_cast_expr->getSubExpr())) { + return true; + } + } + + if (clang::ParenExpr* paren_expr = dyn_cast(value_expr)) { + if (is_decl_ref_contains_self(decl_ref_expr, paren_expr->getSubExpr())) { + return true; + } + } + + if (clang::BinaryOperator* binary_operator = dyn_cast(value_expr)) { + if (is_decl_ref_contains_self(decl_ref_expr, binary_operator->getLHS())) { + return true; + } + if (is_decl_ref_contains_self(decl_ref_expr, binary_operator->getRHS())) { + return true; + } + } + + if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = dyn_cast(value_expr)) { + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + if (is_decl_ref_contains_self(decl_ref_expr, cxx_operator_call_expr->getArg(i))) { + return true; + } + } + } + + return false; +} + +bool Env::is_decl_in_parent_env(const std::string &var_name) const { + if (parent_ != nullptr) { + if (clang::DeclStmt* decl_stmt = parent_->get_decl_stmt_in_cur_env(var_name)) { + return true; + } + } + + return false; +} + +bool Env::is_decl_in_cur_env(const std::string &var_name) const { + if (clang::DeclStmt* decl_stmt = get_decl_stmt_in_cur_env(var_name)) { + return true; + } + + return false; +} + +bool Env::is_bslog_field_var_decl(const std::string& var_name) const { + if (clang::DeclStmt *decl_stmt = get_decl_stmt(var_name)) { + std::string decl_stmt_str = stmt_to_string(decl_stmt); + + if (decl_stmt_str.find("GetSingular") != std::string::npos || + decl_stmt_str.find("BSRepeatedField") != std::string::npos || + decl_stmt_str.find("BSMapField") != std::string::npos) { + return true; + } + } + + return false; +} + +std::unordered_map * Env::find_bs_field_detail_ptr_by_var_name( + const std::string &var_name +) { + if (bs_field_info_) { + auto& map_field_detail = bs_field_info_->mutable_map_bs_field_detail(); + if (map_field_detail.find(var_name) != map_field_detail.end()) { + return &map_field_detail; + } + } + + if (parent_ != nullptr) { + return parent_->find_bs_field_detail_ptr_by_var_name(var_name); + } + + return nullptr; +} + +bool Env::is_in_parent_else() const { + if (parent_ != nullptr && parent_->is_if()) { + if (const auto& if_info = parent_->cur_if_info()) { + if (if_info->if_stage() == IfStage::ELSE) { + return true; + } + } + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Env.h b/convert/Env.h new file mode 100644 index 0000000..8cb6ad5 --- /dev/null +++ b/convert/Env.h @@ -0,0 +1,1185 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +#include "Traits.h" +#include "./info/Info.h" +#include "./info/IfInfo.h" +#include "./info/DeclInfo.h" +#include "./info/NewActionParam.h" +#include "./info/NewVarDef.h" +#include "./info/ConstructorInfo.h" +#include "./info/ActionDetailInfo.h" +#include "./info/ActionDetailFixedInfo.h" +#include "./info/FeatureInfo.h" +#include "./info/SeqListInfo.h" +#include "./info/AssignInfo.h" +#include "./info/BSFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 保存解析 `ast` 节点时获取的各种信息,用于之后的改写。 +/// +/// 由于不同的改写规则需要不同的信息,而解析 `ast` 时并不知道此信息是用于哪个改写规则,因此 +/// 采用一个类似全局的变量 `Env` 来将这些信息保存到不同的属性字段中,改写规则从 `Env` 获取需要的信息 +/// 来进行处理。实际 `Env` 是保存在某个 `ast` 节点中,使得所有子节点都可以通过 `parent` 指针来访问。 +/// +/// 只有 `if` 和 `for`, `body` 需要 Env。 +/// +/// 需要遍历完所有的叶子节点才能拿到所有的信息,如 common info enum 是在 if 判断语句里, action detail no +/// 是在 find(no) 中。不同的信息格式不一样,每种信息对应了一个 struct。详细的 info 定义可参考 `info/Info.h`。 +class Env { + private: + /// 父节点 `Env`,用于保存代码的递归结构信息。 + Env *parent_ = nullptr; + + /// 子节点。 + std::vector children_; + + /// 当前 `Env` 直到 `root Env` 使用过的变量名,用于在插入新增变量时检查变量名是否重复。 + std::unordered_set used_var_names_; + + /// 特征名。 + std::string feature_name_; + + /// 特征类型。 + std::string feature_type_; + + /// 是否是循环。 + bool is_loop_ = false; + + /// 是否是 `if` 语句。 + bool is_if_ = false; + + /// 是第几个 `if` 语句。 + int if_index_ = 0; + + /// 当前 `Env` 的 action。用于 `action_detail_info` 相关的逻辑。 + /// + /// 示例: + /// ```cpp + /// int no = 1; + /// auto action_detail = adlog.user_info().action_detail.find(no); + /// ``` + int action_ = -1; + + /// 当前 `Env` 的 action 表达式。 + /// + /// 示例: + /// ```cpp + /// int no = 1; + /// ``` + clang::Expr *action_expr_ = nullptr; + + /// action 表达式对应的 `bs` 表达式。 + std::string bs_action_expr_; + + /// action 表达式对应的叶子字段。 + /// + /// `action_detail_info` 中 `map` 保存的是 `message` 结构,取叶子节点需要通过嵌套结构进行获取。 + /// + /// 示例: + /// ```cpp + /// int no = 1; + /// const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); + /// auto action_no_iter = ad_action.find(no); + /// if (action_no_iter != ad_action.end()) { + /// const auto& action_no_list = action_no_iter->second.list(); + /// for (int k = 0; k < action_no_list.size() && k < 100; ++k) { + /// uint64_t photo_id = action_no_list[k].photo_id(); + /// ... + /// } + /// } + /// ``` + std::vector action_leaf_fields_; + + /// 出现过的变量声明。`key` 是变量名,`value` 是解析的 `clang::Expr` 表达式。 + std::map var_decls_; + + /// 循环变量名。 + std::vector loop_var_names_; + + /// 循环表达式。 + std::vector loop_exprs_; + + /// common attr 枚举 int 值。 + int common_attr_int_value_ = 0; + + /// 是否是 common attr 条件语句的子节点。 + bool is_child_common_attr_cond_ = false; + + /// 是否有 `if` 语句的子节点。 + bool has_if_in_children_ = false; + + /// 出现过的变量声明。`key` 是变量名,`value` 是解析的 `clang::DeclStmt` 表达式。 + std::map decl_stmts_; + + /// 删除过的变量名。 + std::set deleted_vars_; + + /// `c++` `for range` 循环表达式。 + clang::CXXForRangeStmt *cxx_for_range_stmt_ = nullptr; + + /// `c++` `for` 循环表达式。 + clang::ForStmt *for_stmt_ = nullptr; + + /// `c++` `if` 语句。 + clang::IfStmt *if_stmt_ = nullptr; + + /// 第一个 `if` 语句。 + clang::IfStmt *first_if_stmt_ = nullptr; + + /// 第一个 `if` 语句是否是检查 item pos 条件的语句。 + /// + /// 示例: + /// ```cpp + /// if (pos >= adlog.item_size())) { + /// ... + /// } + /// ``` + bool is_first_if_check_item_pos_cond_ = true; + + /// 第一个 `if` 语句是检查 item pos 条件语句,并且是包含处理逻辑。 + /// + /// 示例: + /// ```cpp + /// if (adlog.item_size() >= pos) { ... } + /// ``` + bool is_first_if_check_item_pos_include_cond_ = false; + + /// 字段是否来自 `reco` 字段。 + /// + /// 示例: + /// ```cpp + /// auto id = adlog.user_info().reco_user_info().id(); + /// ``` + bool is_reco_user_info_ = false; + + /// 方法名。 + std::string method_name_; + + /// 新增变量定义。 + /// + /// 用于中间节点逻辑插入新的 `field` 定义。 + std::map> new_defs_; + + /// adlog.user_info.xxx 这种格式, 创建 ActionDetailInfo 时候再转换。 + absl::optional action_detail_prefix_adlog_; + + /// `if` 语句信息。 + absl::optional if_info_; + + /// 循环信息。 + absl::optional loop_info_; + + /// 变量声明信息。 + absl::optional decl_info_; + + /// 二元操作信息。 + absl::optional binary_op_info_; + + /// switch case 信息。 + absl::optional switch_case_info_; + + /// 赋值信息。 + absl::optional assign_info_; + + /// `action_detail` 字段相关信息。 + absl::optional action_detail_info_; + + /// 通过模板参数传递的 `action number` 对应的 `action_detail` 字段固定信息。 + absl::optional action_detail_fixed_info_; + + /// 普通 `common info` 信息。如 `common info` 枚举等。 + absl::optional common_info_normal_; + + /// 多个 `common info` 保存到 `vector` 中对应的信息。 + absl::optional common_info_multi_map_; + + /// 多个 `common info` `int list` 对应的信息。 + absl::optional common_info_multi_int_list_; + + /// `common info` 对应的枚举通过模板参数传递对应的信息。 + absl::optional common_info_fixed_; + + /// 多个 `common info` 对应的枚举通过模板参数传递。 + absl::optional common_info_fixed_list_; + + /// 用于枚举可能通过参数传递而出现在后面的情况,需要先创建 `common info` 部分信息,之后再更新枚举等。 + absl::optional common_info_prepare_; + + /// `PhotoInfo` 等中间节点信息。 + absl::optional middle_node_info_; + + /// `protobuf` `repeated` 字段对应的信息。 + absl::optional seq_list_info_; + + /// `photo_list` 固定字段对应的信息。 + absl::optional proto_list_info_; + + /// `proto` 字段对应的 `bs` 字段信息。 + absl::optional bs_field_info_; + + /// 特征构造函数相关信息。只保留其指针,用于修改其内容。 + ConstructorInfo *constructor_info_ = nullptr; + + /// 当前 `Env` 所在的特征。 + FeatureInfo *feature_info_ = nullptr; + + public: + Env() = default; + + /// 创建 `Env` 节点。传递其父节点作为参数。 + explicit Env(Env* parent): parent_(parent) { + parent_->add_child(this); + } + + /// 获取当前 `Env` 中所有变量声明。 + const std::map& var_decls() const { return var_decls_; } + + /// 获取当前 `Env` 的父节点。 + Env* parent() { return parent_; } + + /// 获取当前 `Env` 的 `const` 父节点。 + const Env* parent() const { return parent_; } + + /// 当前 `Env` 是否是根节点。 + bool is_root() { return parent_ == nullptr; } + + /// 获取当前 `Env` 的 `const` 根节点。 + const Env* get_root() const; + + /// 获取当前 `Env` 的可变根节点。 + Env* get_mutable_root(); + + /// 获取 `common info` 预备信息所在的 `const Env`。 + const Env* get_common_info_prepare_env() const; + + /// 获取 `common info` 预备信息所在的 `Env`。 + Env* mutable_common_info_prepare_env(); + + /// 获取 `common info` 父节点所在的 `const Env`。 + const Env* get_common_info_parent_env() const; + + /// 获取 `common info` 父节点所在的可变 `Env`。 + Env* mutable_common_info_parent_env(); + + /// 当前 `Env` 父节点是否是循环。 + bool is_parent_loop() const; + + /// 当前 `Env` 父节点是否是 `if` 语句。 + bool is_parent_if() const; + + /// 添加使用过的变量名。 + /// Env 可能是局部变量, 因此用过的变量名必须保存在 root 里。 + void add_used_var_name(const std::string& name); + + /// 变量名是否被使用过,需要判断直到根节点的所有节点。 + bool is_var_name_used(const std::string& name) const; + + /// 添加模板变量名。 + void add_template_var_names(const std::vector& var_names); + + /// 设置特征名。 + void set_feature_name(const std::string& feature_name); + + /// 获取特征名。 + const std::string& feature_name() const; + + /// 设置特征类型。 + void set_feature_type(const std::string& feature_type); + + /// 获取特征类型。 + const std::string& feature_type() const; + + /// 当前 `Env` 是否是循环。 + bool is_loop() const { return is_loop_; } + + /// 设置当前 `Env` 是否是循环。 + void set_is_loop(bool is_loop) { is_loop_ = is_loop; } + + /// 获取循环 `Env` 的 `const` 父节点。 + /// + /// 如果不是循环节点,则返回 `nullptr`。 + const Env* get_loop_parent() const; + + /// 获取循环 `Env` 的可变父节点。 + Env* mutable_loop_parent(); + + // 获取最外层的 `const loop Env`, 可能有多层 loop 嵌套。 + const Env* get_outer_loop() const; + + // 最外层的 mutable loop, 可能有多层 loop 嵌套。 + Env* mutable_outer_loop(); + + /// 获取最外层的 `loop` 的父节点。 + const Env* get_outer_loop_parent() const; + + /// 获取最外层的 `loop` 的可变父节点。 + Env* mutable_outer_loop_parent(); + + /// 获取外层 `if` `Env`。 + const Env *get_outer_if() const; + + /// 获取外层 `if` 的可变 `Env`。 + Env *mutable_outer_if(); + + /// 获取外层 `if` 对应的父节点。 + const Env *get_outer_if_parent() const; + + /// 获取外层 `if` 对应的 `mutable` 父节点。 + Env *mutable_outer_if_parent(); + + /// 获取循环 `Env`。 + const Env* get_loop_env() const; + + /// 获取循环 `Env` 的可变版本。 + Env* mutable_loop_env(); + + /// 添加子节点。 + void add_child(Env* child); + + /// 将解析得到的 `Expr` 添加到 `map` 中。 + void add(const std::string& key, clang::Expr* expr); + + /// 根据变量名查找代码中的变量。 + clang::Expr* find(const std::string& key) const; + + /// 从 `map` 中删除变量。 + void erase(const std::string& key); + + /// 添加循环变量。 + void add_loop_var(const std::string& key); + + /// 获取循环变量。 + void pop_loop_var(); + + /// 获取最后一个循环变量。用于有多个循环变量的情况。 + const std::string& get_last_loop_var() const; + + /// 变量是否是循环变量。 + bool is_loop_var(const std::string& key) const; + + /// 当前 `Env` 是否是在循环中。 + bool is_in_loop() const; + + /// 获取循环变量名。 + const std::vector& loop_var_names() const { return loop_var_names_; } + + /// 当前 `Env` 是否是 `if` 语句。 + bool is_if() const { return is_if_; } + + /// 设置当前 `Env` 是否是 `if` 语句。 + void set_is_if(bool is_if); + + /// 当前 `Env` 是否是第一个 `if` 语句。 + bool is_first_if() const { return is_if_ && parent_ != nullptr && parent_->if_index() == 1; } + + /// 获取 `if` 语句索引。用于有多个 `if` 语句的情况。 + int if_index() const { return if_index_; } + + /// 增加 `if` 语句索引。 + void increase_if_index() { if_index_++; } + + /// 当前 `Env` 是否是在 `if` 语句中。 + bool is_in_if() const; + + /// 当前 `Env` 是否是在 `if` 语句条件中。 + bool is_in_if_cond() const; + + /// 当前 `Env` 是否是在 `if` 语句体中。 + bool is_in_if_body() const; + + /// 当前 `Env` 是否有 `if` 语句的子节点。 + bool has_if_in_children() const; + + /// 设置 `has_if_in_children`。 + void set_has_if_in_children(bool v); + + /// 当前 `Env` 是否是在检查中间节点的条件。 + bool is_check_middle_node_root_cond() const; + + /// 添加循环 `Expr`。 + void add_loop_expr(clang::Expr* expr); + + /// 获取循环 `Expr` 数量。 + size_t get_loop_expr_size(); + + /// 获取循环 `Expr`。 + clang::Expr* get_loop_expr(size_t index); + + /// 获取第一个循环 `Expr`。 + clang::Expr* get_first_loop_expr(); + + /// 获取父循环 `Expr`。 + clang::Expr* find_parent_loop(); + + /// 当前 `Env` 是否是组合特征。 + bool is_combine_feature() const; + + /// 当前 `Env` 是否是用户特征。 + bool is_user_feature() const; + + /// 当前 `Env` 是否是 item 特征。 + bool is_item_feature() const; + + /// 当前 `Env` 是否是 `sparse` 特征。 + bool is_sparse_feature() const; + + /// 当前 `Env` 是否是 `dense` 特征。 + bool is_dense_feature() const; + + /// 是否是 common info list 或者 map 的遍历 loop + bool is_common_info_loop() const; + + /// 父节点是否是 common info list 或者 map 的遍历 loop + bool is_parent_common_info_loop() const; + + /// 当前 `Env` 是否是在 common info list 或者 map 的遍历 loop 体中。 + bool is_in_common_info_loop_body() const; + + /// 当前 `Env` 是否是在 common info 的 `if` 语句体中。 + bool is_in_common_info_if_body() const; + + /// 获取 common info 的 枚举 int 值。 + absl::optional get_common_attr_int_value() const; + + /// 获取 common info 的 枚举名字符串。 + absl::optional get_common_attr_int_name() const; + + /// 是否是在检查 item 位置条件。 + bool is_check_item_pos_cond() const; + + /// 是否是在检查 action detail 条件。 + bool is_action_detail_cond() const; + + /// 设置是否是 common attr 判断条件。 + void set_is_child_common_attr_cond(bool v); + + /// 当前 `Env` 是否是 common attr 判断条件。 + bool is_child_common_attr_cond() const; + + /// 设置 `CXXForRangeStmt`。 + void set_cxx_for_range_stmt(clang::CXXForRangeStmt* cxx_for_range_stmt); + + /// 获取 `CXXForRangeStmt`。 + clang::CXXForRangeStmt* cxx_for_range_stmt() const; + + /// 设置 `ForStmt`。 + void set_for_stmt(clang::ForStmt* for_stmt); + + /// 获取 `ForStmt`。 + clang::ForStmt* for_stmt() const; + + /// 设置 `IfStmt`。 + void set_if_stmt(clang::IfStmt* if_stmt); + + /// 获取 `IfStmt`。 + clang::IfStmt* if_stmt() const; + + /// 是否所有 `if` 语句都被删除。 + bool is_all_if_stmt_deleted() const; + + /// 添加 `DeclStmt`。 + void add_decl_stmt(const std::string& name, clang::DeclStmt* decl_stmt); + + /// 获取原始的 `DeclStmt`。 + clang::DeclStmt* get_decl_stmt(const std::string& name) const; + + /// 获取当前 `Env` 中的 `DeclStmt`。 + clang::DeclStmt* get_decl_stmt_in_cur_env(const std::string& name) const; + + /// 查找 `DeclStmt` 的 `Env`。 + Env* find_decl_env(const std::string& name); + + /// 变量是否在 `DeclStmt`。 + bool is_var_in_decl_stmt(const std::string& name) const; + + /// 添加删除的变量。 + void add_deleted_var(const std::string& name); + + /// 通过 `Expr` 添加删除的变量。 + bool add_deleted_var_by_expr(clang::Expr* expr); + + /// 通过 `Expr` 字符串添加删除的变量。 + bool add_deleted_var_by_expr_str(const std::string& expr_str); + + /// 获取删除的变量。 + const std::set& deleted_vars() const { return deleted_vars_; } + + /// 删除变量。用于删除改写后不再需要的 `proto` 变量相关逻辑。 + void pop_deleted_var(const std::string& name); + + /// 清楚需要删除的列表。 + void clear_deleted_var(); + + /// 一些语句中的变量定义会在之后的语句中继续使用,因此不能定义在当前 Env 中,必须往上寻找合适的 Env。 + /// 比如 if 语句, common info body, loop body 等。 + /// if 只有 cond 中的语句可能会有这种现象,因此目前对于 if 只考虑 if conf 中的语句。 + Env* mutable_new_def_target_env(bool is_from_reco); + + /// 添加新的变量定义。 + /// + /// 如果是已经在代码里定义的变量, 则直接用其变量名,不再重新新增定义, 并且将其初始化直接替换为 bs 表达式。 + void add_new_def(const std::string& bs_enum_str, + const std::string& var_def, + NewVarType new_var_type); + + /// 添加新的定义辅助函数。 + void add_new_def_helper(const std::string &bs_enum_str, + const std::string &var_def, + NewVarType new_var_type); + + /// 添加新的变量 meta 信息。如变量类型,字段路径等。 + void add_new_def_meta(const std::string& bs_enum_str, + const std::string& var_def, + NewVarType new_var_type); + + /// 添加 decl_stmt 的 adlog 变量 + void add_new_def(const std::string& bs_enum_str, + const std::string& var_name, + const std::string& var_def, + NewVarType new_var_type); + + /// 添加新的定义辅助函数。 + void add_new_def_helper(const std::string &bs_enum_str, + const std::string &var_name, + const std::string &var_def, + NewVarType new_var_type); + + /// 添加新的变量 meta 信息。如变量类型,字段路径等。 + void add_new_def_meta(const std::string& bs_enum_str, + const std::string& var_name, + const std::string& var_def, + NewVarType new_var_type); + + /// 添加已经存在的变量定义。用于已经声明的变量。 + void add_new_exists_def(const std::string& bs_enum_str, + const std::string& exists_var_def); + + /// 添加已经存在的变量定义辅助函数。 + void add_new_exists_def_helper(const std::string& bs_enum_str, + const std::string& exists_var_def); + + /// 添加已经存在的变量 meta 信息。如变量类型,字段路径等。 + void add_new_exists_def_meta(const std::string& bs_enum_str, + const std::string& exists_var_def); + + /// 用于修复 bs field, 直接使用 bs_var_name 为变量名。 + void add_new_def_overwrite(const std::string &bs_var_name, + const std::string &var_def, + NewVarType new_var_type); + + /// 添加 `bs_enum_str` 作为 `meta` 信息。 + void add_attr_meta(const std::string& bs_enum_str); + + /// 添加字段详细信息,包括: + /// 1. adlog_field, 如: adlog.user_info.id + /// 2. bs_field_enum, 如: adlog_user_info_id + void set_normal_adlog_field_info(const std::string& bs_enum_str, + const std::string& adlog_field); + + /// 添加字段详细信息,包括: + /// 1. adlog_field, 如: adlog.user_info.id + /// 2. bs_field_enum, 如: adlog_user_info_id + /// 3. enum_value, 如果是 CommonInfo + /// 4. enum_str, 如果是 CommonInfo + void set_common_info_field_info(const std::string& bs_enum_str, + const std::string& adlog_field, + const std::string& common_info_enum_name, + int common_info_value); + + /// 根据 `bs_enum_str` 查找新的变量定义。 + const absl::optional& find_new_def(const std::string& bs_enum_str) const; + + /// 根据 `bs_enum_str` 查找新的 `mutable` 变量定义。用于更新信息。 + absl::optional& find_mutable_new_def(const std::string& bs_enum_str); + + /// 获取所有新的变量定义。用于最后添加代码。 + const std::map>& new_defs() const { return new_defs_; } + + /// 根据字符串拼接规则查找有效的新的变量名。 + std::string find_valid_new_name(const std::string& bs_enum_str) const; + + /// 变量名是否有效。 + bool is_new_name_valid(const std::string& name) const; + + /// 新变量名是否存在。 + bool is_new_var_exists(const std::string& bs_enum_str) const; + + /// 变量名是否不存在。 + bool is_new_var_not_exists(const std::string& bs_enum_str) const; + + /// 根据 `bs_enum_str` 获取新的变量名。 + const std::string& find_new_var_name(const std::string& bs_enum_str) const; + + /// 根据 `bs_enum_str` 获取已经存在的变量名。 + const std::string& find_new_exists_var_name(const std::string& bs_enum_str) const; + + /// 获取所有新的变量名。 + std::vector get_all_new_def_var_names() const; + + /// 获取所有新的变量定义。 + std::string get_all_new_defs() const; + + /// 设置第一个 `if` 语句。 + void set_first_if_stmt(clang::IfStmt* if_stmt) { first_if_stmt_ = if_stmt; } + + /// 获取第一个 `if` 语句。 + clang::IfStmt* first_if_stmt() const { return first_if_stmt_; } + + /// 设置是否是第一个 `if` 语句的 item 位置条件检查。 + void set_is_first_if_check_item_pos_cond(bool v) { is_first_if_check_item_pos_cond_ = v; } + + /// 是否是第一个 `if` 语句的 item 位置条件检查。 + bool is_first_if_check_item_pos_cond() const { return is_first_if_check_item_pos_cond_; } + + /// 设置是否是第一个 `if` 语句的 item 位置条件检查包含其他逻辑。 + void set_is_first_if_check_item_pos_include_cond(bool v) { is_first_if_check_item_pos_include_cond_ = v; } + + /// 第一个 `if` 语句的 item 位置条件检查是否包含其他逻辑。 + bool is_first_if_check_item_pos_include_cond() const { return is_first_if_check_item_pos_include_cond_; } + + /// 设置 action number。 + void set_action(int action) { action_ = action; } + + /// 获取 action number。 + int get_action(); + + /// 设置 action 表达式。action 可能是变量。 + void set_action_expr(clang::Expr* expr, std::string bs_action_expr); + + /// 获取 action 表达式。 + std::string get_bs_action_expr(); + + /// 根据 `bs_enum_str` 查找一个 action detail 叶子变量名。 + std::string find_one_action_detail_leaf_name(const std::string& bs_enum_str) const; + + /// 添加 `IfStmt`。 + void add_if_stmt(clang::IfStmt* if_stmt); + + /// 添加 `ForStmt`。 + void add_loop_stmt(clang::ForStmt* for_stmt); + + /// 添加 `CXXForRangeStmt`。 + void add_loop_stmt(clang::CXXForRangeStmt* cxx_for_range_stmt); + + /// 添加 action detail 字段前缀, 如 `adlog.user_info.longterm_action_detail`。 + void add_action_detail_prefix_adlog(const std::string& prefix_adlog); + + /// 添加中间节点名。如 `PhotoInfo`。 + void add_middle_node_name(const std::string& name); + + /// 设置特征信息对应的指针。 + void set_feature_info(FeatureInfo* feature_info); + + /// 设置构造函数信息的指针,用于修改其内容。 + void set_constructor_info(ConstructorInfo* constructor_info); + + /// 在 `common info` 第一次出现的地方创建 `CommonInfoNormal` 信息。 + /// + /// common info 第一次出现的地方一定是取 repeated common info 字段,因此可以知道 prefix, 并且 prefix 一定是 + /// 最早出现的, name_value 在之后出现。 + absl::optional& touch_common_info_normal(); + + /// 在通过模板参数传递的 `common info` 第一次出现的地方创建 `CommonInfoFixedList` 信息。 + absl::optional& touch_common_info_fixed_list(); + + /// 在多个 `common info` 第一次出现的地方创建 `CommonInfoMultiMap` 信息。 + absl::optional& touch_common_info_multi_map(const std::string& map_name, + const std::string& attr_name); + + /// 在多个 `common info` 第一次出现的地方创建 `CommonInfoMultiIntList` 信息。 + absl::optional & touch_common_info_multi_int_list(); + + /// 在 `action detail` 第一次出现的地方创建 `ActionDetailInfo` 信息。 + absl::optional& touch_action_detail_info(int action); + + /// 更新 `action detail` 信息。 + absl::optional& update_action_detail_info(int action); + + /// 在 `action detail` 第一次出现的地方创建 `ActionDetailFixedInfo` 信息。 + absl::optional& touch_action_detail_fixed_info(const std::string& action); + + /// 在 `seq list` 第一次出现的地方创建 `SeqListInfo` 信息。 + absl::optional& touch_seq_list_info(const std::string& root_name); + + /// 在 `photo list` 第一次出现的地方创建 `ProtoListInfo` 信息。 + absl::optional& touch_proto_list_info(const std::string& prefix_adlog); + + /// 在 `bs field` 第一次出现的地方创建 `BSFieldInfo` 信息。 + absl::optional& touch_bs_field_info(const std::string& bs_var_name); + + /// 设置 `common_info` 前缀。 + /// + /// 示例: adlog.user_info.common_info_attr。 + void set_common_info_prefix_adlog(const std::string& prefix_adlog); + + /// 获取 `common_info` 前缀。 + const absl::optional& common_info_prefix() const; + + /// 获取 `common_info` 前缀。 + const absl::optional& get_common_info_prefix() const; + + /// 更新模板 `common_info` 的值。 + void update_template_common_info_values(); + + /// 添加 `common_info` 详细定义。 + void add_common_info_detail_def(const CommonInfoLeaf& common_info_detail); + + /// 添加构造函数声明。 + void add_ctor_decls(const VarDeclInfo& var_decl_info); + + // 获取模板参数名 + absl::optional get_template_int_name(clang::Expr* init_expr) const; + + /// 是否是模板参数引用。 + bool is_template_int_ref(clang::Expr* expr) const; + + /// 是否是 reco user info。 + bool is_reco_user_info() const; + + /// 设置是否是 reco user info。 + void set_is_reco_user_info(bool v); + + /// 获取当前 `Env` 所在的方法名。 + const std::string& get_method_name() const; + + /// 方法名。 + const std::string& method_name() const { return method_name_; } + + /// 设置方法名。 + void set_method_name(const std::string& method_name) { method_name_ = method_name; } + + /// 是否是 feature 其他方法。 + bool is_feature_other_method(const std::string& method_name) const; + + /// 添加 action 参数的新定义。 + void add_action_param_new_def(const std::string& prefix, + const NewActionParam& new_action_param); + + /// 查找 action 参数的新定义变量名。 + std::vector find_new_action_param_var_name(const std::string& prefix, + const NewActionParam& new_action_param) const; + + /// 清除 `common_info_fixed_list`。 + void clear_common_info_fixed_list(); + + /// 是否在循环初始化语句中。 + bool is_in_loop_init() const; + + /// 是否在循环体中。 + bool is_in_loop_body() const; + + /// 是否在循环初始化语句中。 + bool is_in_for_range_init() const; + + /// `Decl` 表达式是否包含自身的引用。 + /// + /// 计算表达式可能会包含引用到自身的变量。 + bool is_decl_ref_contains_self(clang::DeclRefExpr* decl_ref_expr, + clang::Expr* value_expr) const; + + + /// 当前 `Env` 的 `const` `if` 信息。 + const absl::optional& cur_if_info() const { return if_info_; } + + /// 当前 `Env` 的 `const` `loop` 信息。 + const absl::optional& cur_loop_info() const { return loop_info_; } + + /// 当前 `Env` 的 `const` `switch case` 信息。 + const absl::optional& cur_switch_case_info() const { return switch_case_info_; } + + /// 当前 `Env` 的 `const` `Decl` 信息。 + const absl::optional& cur_decl_info() const { return decl_info_; } + + /// 当前 `Env` 的 `const` `BinaryOperator` 信息。 + const absl::optional& cur_binary_op_info() const { return binary_op_info_; } + + /// 当前 `Env` 的 `const` `AssignInfo` 信息。 + const absl::optional& cur_assign_info() const { return assign_info_; } + + /// 当前 `Env` 的 `const` `ActionDetailInfo` 信息。 + const absl::optional& cur_action_detail_info() const { return action_detail_info_; } + + /// 当前 `Env` 的 `const` `ActionDetailFixedInfo` 信息。 + const absl::optional& cur_action_detail_fixed_info() const { + return action_detail_fixed_info_; + } + + /// 当前 `Env` 的 `const` `CommonInfoNormal` 信息。 + const absl::optional& cur_common_info_normal() const { return common_info_normal_; } + + /// 当前 `Env` 的 `const` `CommonInfoMultiMap` 信息。 + const absl::optional& cur_common_info_multi_map() const { + return common_info_multi_map_; + } + + /// 当前 `Env` 的 `const` `CommonInfoMultiIntList` 信息。 + const absl::optional& cur_common_info_multi_int_list() const { + return common_info_multi_int_list_; + } + + /// 当前 `Env` 的 `const` `CommonInfoFixed` 信息。 + const absl::optional& cur_common_info_fixed() const { return common_info_fixed_; } + + /// 当前 `Env` 的 `const` `CommonInfoFixedList` 信息。 + const absl::optional& cur_common_info_fixed_list() const { + return common_info_fixed_list_; + } + + /// 当前 `Env` 的 `const` `CommonInfoPrepare` 信息。 + const absl::optional& cur_common_info_prepare() const { return common_info_prepare_; } + + /// 当前 `Env` 的 `const` `MiddleNodeInfo` 信息。 + const absl::optional& cur_middle_node_info() const { return middle_node_info_; } + + /// 当前 `Env` 的 `const` `SeqListInfo` 信息。 + const absl::optional& cur_seq_list_info() const { return seq_list_info_; } + + /// 当前 `Env` 的 `const` `ProtoListInfo` 信息。 + const absl::optional& cur_proto_list_info() const { return proto_list_info_; } + + /// 当前 `Env` 的 `const` `BSFieldInfo` 信息。 + const absl::optional& cur_bs_field_info() const { return bs_field_info_; } + + + /// 当前 `Env` 的 `mutable` `if` 信息。 + absl::optional& cur_mutable_if_info() { return if_info_; } + + /// 当前 `Env` 的 `mutable` `loop` 信息。 + absl::optional& cur_mutable_loop_info() { return loop_info_; } + + /// 当前 `Env` 的 `mutable` `switch case` 信息。 + absl::optional& cur_mutable_switch_case_info() { return switch_case_info_; } + + /// 当前 `Env` 的 `mutable` `Decl` 信息。 + absl::optional& cur_mutable_decl_info() { return decl_info_; } + + /// 当前 `Env` 的 `mutable` `BinaryOperator` 信息。 + absl::optional& cur_mutable_binary_op_info() { return binary_op_info_; } + + /// 当前 `Env` 的 `mutable` `AssignInfo` 信息。 + absl::optional& cur_mutable_assign_info() { return assign_info_; } + + /// 当前 `Env` 的 `mutable` `ActionDetailInfo` 信息。 + absl::optional& cur_mutable_action_detail_info() { return action_detail_info_; } + + /// 当前 `Env` 的 `mutable` `ActionDetailFixedInfo` 信息。 + absl::optional& cur_mutable_action_detail_fixed_info() { + return action_detail_fixed_info_; + } + + /// 当前 `Env` 的 `mutable` `CommonInfoNormal` 信息。 + absl::optional& cur_mutable_common_info_normal() { return common_info_normal_; } + + /// 当前 `Env` 的 `mutable` `CommonInfoMultiMap` 信息。 + absl::optional& cur_mutable_common_info_multi_map() { return common_info_multi_map_; } + + /// 当前 `Env` 的 `mutable` `CommonInfoMultiIntList` 信息。 + absl::optional& cur_mutable_common_info_multi_int_list() { + return common_info_multi_int_list_; + } + + /// 当前 `Env` 的 `CommonInfoFixed` 信息。 + absl::optional& cur_mutable_common_info_fixed() { return common_info_fixed_; } + + /// 当前 `Env` 的 `mutable` `CommonInfoFixedList` 信息。 + absl::optional& cur_mutable_common_info_fixed_list() { + return common_info_fixed_list_; + } + + /// 当前 `Env` 的 `mutable` `CommonInfoPrepare` 信息。 + absl::optional& cur_mutable_common_info_prepare() { return common_info_prepare_; } + + /// 当前 `Env` 的 `mutable` `MiddleNodeInfo` 信息。 + absl::optional& cur_mutable_middle_node_info() { return middle_node_info_; } + + /// 当前 `Env` 的 `mutable` `SeqListInfo` 信息。 + absl::optional& cur_mutable_seq_list_info() { return seq_list_info_; } + + /// 当前 `Env` 的 `mutable` `ProtoListInfo` 信息。 + absl::optional& cur_mutable_proto_list_info() { return proto_list_info_; } + + /// 当前 `Env` 的 `mutable` `BSFieldInfo` 信息。 + absl::optional& cur_mutable_bs_field_info() { return bs_field_info_; } + + /// 当前 `Env` 的 `mutable` `if` 信息。 + absl::optional& cur_info(const IfInfo& v) { return if_info_; } + + /// 当前 `Env` 的 `mutable` `loop` 信息。 + absl::optional& cur_info(const LoopInfo& v) { return loop_info_; } + + /// 当前 `Env` 的 `mutable` `switch case` 信息。 + absl::optional& cur_info(const SwitchCaseInfo& v) { return switch_case_info_; } + + /// 当前 `Env` 的 `mutable` `Decl` 信息。 + absl::optional& cur_info(const DeclInfo& v) { return decl_info_; } + + /// 当前 `Env` 的 `mutable` `BinaryOperator` 信息。 + absl::optional& cur_info(const BinaryOpInfo& v) { return binary_op_info_; } + + /// 当前 `Env` 的 `mutable` `AssignInfo` 信息。 + absl::optional& cur_info(const AssignInfo& v) { return assign_info_; } + + /// 当前 `Env` 的 `ActionDetailInfo` 信息。 + absl::optional& cur_info(const ActionDetailInfo& v) { return action_detail_info_; } + + /// 当前 `Env` 的 `mutable` `ActionDetailFixedInfo` 信息。 + absl::optional& cur_info(const ActionDetailFixedInfo& v) { + return action_detail_fixed_info_; + } + + /// 当前 `Env` 的 `mutable` `CommonInfoNormal` 信息。 + absl::optional& cur_info(const CommonInfoNormal& v) { return common_info_normal_; } + + /// 当前 `Env` 的 `mutable` `CommonInfoMultiMap` 信息。 + absl::optional& cur_info(const CommonInfoMultiMap& v) { + return common_info_multi_map_; + } + + /// 当前 `Env` 的 `CommonInfoMultiIntList` 信息。 + absl::optional& cur_info(const CommonInfoMultiIntList& v) { + return common_info_multi_int_list_; + } + + /// 当前 `Env` 的 `mutable` `CommonInfoFixed` 信息。 + absl::optional& cur_info(const CommonInfoFixed& v) { return common_info_fixed_; } + + /// 当前 `Env` 的 `mutable` `CommonInfoFixedList` 信息。 + absl::optional& cur_info(const CommonInfoFixedList& v) { + return common_info_fixed_list_; + } + + /// 当前 `Env` 的 `mutable` `CommonInfoPrepare` 信息。 + absl::optional& cur_info(const CommonInfoPrepare& v) { return common_info_prepare_; } + + /// 当前 `Env` 的 `mutable` `MiddleNodeInfo` 信息。 + absl::optional& cur_info(const MiddleNodeInfo& v) { return middle_node_info_; } + + /// 当前 `Env` 的 `mutable` `SeqListInfo` 信息。 + absl::optional& cur_info(const SeqListInfo& v) { return seq_list_info_; } + + /// 当前 `Env` 的 `mutable` `ProtoListInfo` 信息。 + absl::optional& cur_info(const ProtoListInfo& v) { return proto_list_info_; } + + /// 当前 `Env` 的 `mutable` `BSFieldInfo` 信息。 + absl::optional& cur_info(const BSFieldInfo& v) { return bs_field_info_; } + + const absl::optional& cur_info(const IfInfo& v) const { return if_info_; } + + const absl::optional& cur_info(const LoopInfo& v) const { return loop_info_; } + + const absl::optional& cur_info(const SwitchCaseInfo& v) const { + return switch_case_info_; + } + + const absl::optional& cur_info(const DeclInfo& v) const { return decl_info_; } + const absl::optional& cur_info(const BinaryOpInfo& v) const { return binary_op_info_; } + const absl::optional& cur_info(const AssignInfo& v) const { return assign_info_; } + const absl::optional& cur_info(const ActionDetailInfo& v) const { + return action_detail_info_; + } + const absl::optional& cur_info(const ActionDetailFixedInfo& v) const { + return action_detail_fixed_info_; + } + const absl::optional& cur_info(const CommonInfoNormal& v) const { + return common_info_normal_; + } + const absl::optional& cur_info(const CommonInfoMultiMap& v) const { + return common_info_multi_map_; + } + const absl::optional& cur_info(const CommonInfoMultiIntList& v) const { + return common_info_multi_int_list_; + } + const absl::optional& cur_info(const CommonInfoFixed& v) const { + return common_info_fixed_; + } + const absl::optional& cur_info(const CommonInfoFixedList& v) const { + return common_info_fixed_list_; + } + const absl::optional& cur_info(const CommonInfoPrepare& v) const { + return common_info_prepare_; + } + const absl::optional& cur_info(const MiddleNodeInfo& v) const { + return middle_node_info_; + } + const absl::optional& cur_info(const SeqListInfo& v) const { + return seq_list_info_; + } + const absl::optional& cur_info(const ProtoListInfo& v) const { + return proto_list_info_; + } + + const absl::optional &cur_info(const BSFieldInfo &v) const {return bs_field_info_;} + + /// 获取 `info` 的通用模板函数。从当前 `Env` 开始查找,如果当前 `Env` 没有,则查找父 `Env`, 一直到根节点。 + template + const absl::optional& get_info() const { + const absl::optional& info = cur_info(InfoTraits::v); + if (info.has_value()) { + return info; + } + + if (parent_ != nullptr) { + return parent_->get_info(); + } + + static const absl::optional empty; + return empty; + } + + template + absl::optional& get_info() { + const absl::optional& info = const_cast(this)->get_info(); + return const_cast&>(info); + } + + const absl::optional& get_if_info() const { return get_info(); } + const absl::optional& get_loop_info() const { return get_info(); } + const absl::optional& get_switch_case_info() const { return get_info(); } + const absl::optional& get_decl_info() const { return get_info(); } + const absl::optional& get_binary_op_info() const { return get_info(); } + const absl::optional& get_assign_info() const { return get_info(); } + const absl::optional& get_action_detail_info() const { + return get_info(); + } + const absl::optional& get_action_detail_fixed_info() const { + return get_info(); + } + const absl::optional& get_common_info_normal() const { + return get_info(); + } + const absl::optional& get_common_info_multi_map() const { + return get_info(); + } + const absl::optional& get_common_info_multi_int_list() const { + return get_info(); + } + const absl::optional& get_common_info_fixed() const { + return get_info(); + } + const absl::optional& get_common_info_fixed_list() const { + return get_info(); + } + const absl::optional& get_common_info_prepare() const { + return get_info(); + } + const absl::optional& get_middle_node_info() const { + return get_info(); + } + const absl::optional& get_seq_list_info() const { return get_info(); } + const absl::optional& get_proto_list_info() const { return get_info(); } + const absl::optional& get_bs_field_info() const { return get_info(); } + + absl::optional& mutable_if_info() { return get_info(); } + absl::optional& mutable_loop_info() { return get_info(); } + absl::optional& mutable_switch_case_info() { return get_info(); } + absl::optional& mutable_decl_info() { return get_info(); } + absl::optional& mutable_binary_op_info() { return get_info(); } + absl::optional& mutable_assign_info() { return get_info(); } + absl::optional& mutable_action_detail_info() { return get_info(); } + absl::optional& mutable_action_detail_fixed_info() { + return get_info(); + } + absl::optional& mutable_common_info_normal() { + return get_info(); + } + absl::optional& mutable_common_info_multi_map() { + return get_info(); + } + absl::optional& mutable_common_info_multi_int_list() { + return get_info(); + } + absl::optional& mutable_common_info_fixed() { + return get_info(); + } + absl::optional& mutable_common_info_fixed_list() { + return get_info(); + } + absl::optional& mutable_common_info_prepare() { + return get_info(); + } + absl::optional& mutable_middle_node_info() { + return get_info(); + } + absl::optional& mutable_seq_list_info() { return get_info(); } + absl::optional& mutable_proto_list_info() { return get_info(); } + absl::optional& mutable_bs_field_info() { return get_info(); } + + const ConstructorInfo* cur_constructor_info() const { return constructor_info_; } + const FeatureInfo* cur_feature_info() const { return feature_info_; } + ConstructorInfo* cur_mutable_constructor_info() { return constructor_info_; } + FeatureInfo* cur_mutable_feature_info() { return feature_info_; } + ConstructorInfo* cur_info(const ConstructorInfo& v) { return constructor_info_; } + FeatureInfo* cur_info(const FeatureInfo& v) { return feature_info_; } + const ConstructorInfo* cur_info(const ConstructorInfo& v) const { return constructor_info_; } + const FeatureInfo* cur_info(const FeatureInfo& v) const { return feature_info_; } + + /// 获取 `ConstructorInfo` 指针。 + const ConstructorInfo* get_constructor_info() const; + + /// 获取 `FeatureInfo` 指针。 + const FeatureInfo* get_feature_info() const; + + /// 获取 `ConstructorInfo` 指针。 + ConstructorInfo* mutable_constructor_info(); + + /// 获取 `FeatureInfo` 指针。 + FeatureInfo* mutable_feature_info(); + + /// 更新变量相关信息。 + void update(clang::DeclStmt* decl_stmt); + void update(clang::IfStmt* if_stmt); + void update(clang::ForStmt* for_stmt); + void update(clang::CXXForRangeStmt* cxx_for_range_stmt); + void update(clang::BinaryOperator* binary_operator); + void update(clang::CXXOperatorCallExpr* cxx_operator_call_expr); + void update(clang::CaseStmt* case_stmt); + + void update_assign_info(clang::BinaryOperator* binary_operator); + + /// 清空 `Decl` 信息。 + void clear_decl_info() { decl_info_ = absl::nullopt; } + + /// 清空 `BinaryOperator` 信息。 + void clear_binary_op_info() {binary_op_info_ = absl::nullopt;} + + /// 清空 `switch case` 信息。 + void clear_switch_case_info() { switch_case_info_ = absl::nullopt; } + + /// 清空 `AssignInfo` 信息。 + void clear_assign_info() { assign_info_ = absl::nullopt; } + + bool is_decl_in_parent_env(const std::string& var_name) const; + bool is_decl_in_cur_env(const std::string& var_name) const; + + bool is_bslog_field_var_decl(const std::string& var_name) const; + + /// 找到包含具体 var_name 的 map_bs_field_detail。 + std::unordered_map* + + find_bs_field_detail_ptr_by_var_name(const std::string& var_name); + + bool is_in_parent_else() const; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprInfo.cpp b/convert/ExprInfo.cpp new file mode 100644 index 0000000..456728e --- /dev/null +++ b/convert/ExprInfo.cpp @@ -0,0 +1,2853 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "./Config.h" +#include "./Env.h" +#include "./Tool.h" +#include "./ExprInfo.h" +#include "./info/CommonInfo.h" +#include "./handler/StrictRewriter.h" +#include "./info/LoopInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::unordered_set ExprInfo::ignore_callee_ = { + "int_value", + "float_value", + "string_value", + "int_list_value", + "string_list_value", + "map_int64_int64_value", + "map_int64_float_value", + "map_int64_string_value", + "first", + "find", + "at", + "second", + "operator[]", + "operator->", + "Get" +}; + +bool ExprInfo::need_replace() const { + // product_name.find + if (is_from_adlog() && is_caller_str_ref()) { + return true; + } + + if (is_from_reco_user_info_real()) { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + return true; + } else { + return false; + } + } + + if (is_from_adlog()) { + return true; + } + + if (parent_ != nullptr && CommonAttrInfo::is_common_info_leaf_method(callee_name_)) { + return true; + } + + return false; +} + +// 需要改写的变量最终肯定是单值的变量,类型是基础类型,需要区分普通表达式和中间节点,以及自定义变量, +// 有以下几种情况: +// +// 1. 普通表达式, 主要是普通 adlog 字段以及 common _info 的字段, 所使用的 bs 字段都可以在编译器确定 +// a. 本身就是单值,返回 BSFieldHelper::GetSingular(*bs, enum_str, pos)。 +// b. 来自 list, 返回 list_var_name.Get(i), list_var_name 是从 Env 中获取的 list 变量,i 是循环变量。 +// c. 来自 map, kv.first 表示 key, 返回 map_var_name.GetKey(i), kv.second 表示 value, 返回 +// map_var_name.GetValue(i), map_var_name 是从 Env 中获取的 map 变量。 +// +// 2. 中间节点, 主要是来自 PhotoInfo, LiveInfo 等中间节点的字段, 所使用的的 bs 字段在编译器确定不了,需要在 +// 运行期根据 item_type 等字段的取值决定用哪个 bs 字段, 这些节点的变量统一提供了模板函数来处理, 直接将 +// 变量替换为对应的 bs_util 函数即可, 如 bs_util.GetPhotoInfoXXX(), list 和 map 也是类似的处理。 +// +// 3. 自定义变量, 里面可能包含 common info, 如 std::vector。 +// 最后访问的时候还是会按照 CommonAttrInfo 访问, 如: +// action_list[key_idx].int_list_value_size(); +// action_list[key_idx].int_list_value(i) +// 这种情况 is_from_adlog() 一定是 false, 因此不需要替换为 bs 的表单式,只需要将最后的方法名修改即可, 如: +// action_list[key_idx].size(); +// action_list[key_idx].Get(i); +std::string ExprInfo::get_bs_field_value() const { + if (is_from_adlog()) { + if (is_from_reco_user_info()) { + return get_bs_field_value_reco_user_info(); + } else if (is_from_query_token() || is_from_photo_text()) { + return get_bs_field_value_query_token(); + } else if (is_common_info_map_iter_second()) { + if (parent_ != nullptr && !parent_->is_loop_var_ref()) { + return parent_->origin_expr_str() + ".first"; + } else { + return get_bs_field_value_normal(); + } + } else if (is_loop_var_size_method()) { + return get_bs_field_value_loop_var_size(); + } else if (is_general_proto_list_size_method()) { + return get_bs_field_value_general_proto_list_size_method(); + } else if (is_from_middle_node()) { + if (is_from_repeated_common_info()) { + return get_bs_field_value_normal(); + } else if (is_middle_node_leaf_list_size_method()) { + return get_bs_field_value_middle_node_leaf_list_size_method(); + } else { + return get_bs_field_value_middle_node(); + } + } else if (is_action_detail_list_size_expr()) { + return get_bs_field_value_action_detail_list_size(); + } else if (is_from_seq_list() || is_from_seq_list_reco()) { + return origin_expr_str(); + } else { + return get_bs_field_value_normal(); + } + } else if (parent_ != nullptr && CommonAttrInfo::is_common_info_leaf_method(callee_name_)) { + if (CommonAttrInfo::is_common_info_size_method(callee_name_)) { + return parent_->get_bs_field_value() + ".size()"; + } else { + return parent_->get_bs_field_value() + ".Get(" + env_ptr_->get_last_loop_var() + ")"; + } + } else if (is_decl_ref_expr()) { + std::string origin_expr_str = tool::fix_ad_enum(stmt_to_string(origin_expr_)); + if (is_item_type_enum()) { + return std::string("bs::ItemType::") + get_ad_enum_name(); + } else if (is_ad_enum()) { + if (env_ptr_->find(origin_expr_str) != nullptr) { + return origin_expr_str; + } else { + return std::string("::bs::") + origin_expr_str; + } + } else { + return origin_expr_str; + } + } else { + return stmt_to_string(expr_); + } +} + +std::string ExprInfo::get_bs_field_value_normal() const { + std::string bs_enum_str = get_bs_enum_str(); + std::ostringstream oss; + + // adlog.item_size() 特殊处理 + if (bs_enum_str == "adlog_item_size") { + return "bslog.item_size()"; + } + + if ((callee_name_ == "size" || callee_name_ == "length") && + is_from_repeated_common_info()) { + return parent_->get_bs_field_value_normal() + "." + callee_name_ + "()"; + } + + bool is_expr_from_list = is_from_list(); + bool is_expr_from_map = is_from_map(); + + // common_info 逻辑不一样,从类型看都包含 repeated,但是其类型是从调用函数来区分的。 + // xxx_value 表示单值,xxx_list_value 表示 list, map_xxx_yyy_value 表示 map。 + if (is_from_repeated_common_info()) { + if (callee_name_.find("list") != std::string::npos) { + is_expr_from_list = true; + } else if (callee_name_.find("map") != std::string::npos) { + is_expr_from_map = true; + } + } + + const absl::optional& var_def = env_ptr_->find_new_def(bs_enum_str); + if (!var_def) { + LOG(INFO) << "cannot find var def in env, return empty str, expr: " << stmt_to_string(expr_) + << ", bs_enum_str: " << bs_enum_str; + return ""; + } + + LOG(INFO) << "expr: " << origin_expr_str() + << ", is_from_repeated_common_info: " << is_from_repeated_common_info() + << ", is_loop_var_ref: " << is_loop_var_ref(); + if (is_from_repeated_common_info()) { + // common info 的下标目前写死为 idx + // loop var + if (is_loop_var_ref()) { + if (const auto& loop_info = env_ptr_->get_loop_info()) { + if (loop_info->loop_stage() == LoopStage::BODY) { + if (!loop_info->is_for_stmt()) { + return origin_expr_str(); + } + } + } + } + + // 引用的变量 + if (is_decl_ref_expr() && is_str_type()) { + return origin_expr_str(); + } + + if (is_common_info_method()) { + if (callee_name_ == "first") { + oss << var_def->name() + << ".GetKey(idx)"; + } else if (callee_name_ == "second") { + oss << var_def->name() + << ".GetValue(idx)"; + } else if (is_common_info_list_method()) { + if (contains_loop_var()) { + if (const auto& loop_info = env_ptr_->get_loop_info()) { + if (loop_info->is_for_stmt()) { + oss << var_def->name() << ".Get(" << loop_info->loop_var() << ")"; + } else { + oss << var_def->name() << ".Get(idx)"; + } + } else { + LOG(INFO) << "cannot get loop_info!"; + } + } else if (call_expr_params_.size() > 0 && call_expr_params_[0] != nullptr) { + // 参数来自模板参数或者属性 + oss << var_def->name() + << ".Get(" << tool::trim_this(call_expr_params_[0]->origin_expr_str()) << ")"; + } else { + oss << var_def->name(); + } + } else { + LOG(INFO) << "expr: " << origin_expr_str() + << ", name: " << var_def->name(); + oss << var_def->name(); + } + } else if (is_common_info_size_method()) { + oss << var_def->name() << ".size()"; + } else { + if (callee_name_ == "first") { + oss << var_def->name() << ".GetKey(idx)"; + } else if (callee_name_ == "second") { + oss << var_def->name() << ".GetValue(idx)"; + } else { + oss << var_def->name(); + } + } + } else if (is_expr_from_list) { + if (contains_loop_var()) { + oss << var_def->name() << ".Get(" << env_ptr_->get_last_loop_var() << ")"; + } else if (absl::optional int_param = find_int_param()) { + oss << var_def->name() << ".Get(" << *int_param << ")"; + } else if (env_ptr_->is_in_loop_body()) { + // 不是很准确,还需要更准确。 + oss << var_def->name() << ".Get(idx)"; + } else { + LOG(INFO) << "cannot find int_param, return var name"; + oss << var_def->name(); + } + } else if (is_expr_from_map && !is_from_action_detail_map() && is_basic()) { + if (const auto& loop_info = env_ptr_->cur_loop_info()) { + if (callee_name_ == "first") { + if (loop_info->is_for_stmt()) { + oss << var_def->name() << ".GetKey(" << env_ptr_->get_last_loop_var() << ")"; + } else { + oss << var_def->name() << ".GetKey(idx)"; + } + } else if (callee_name_ == "second") { + if (loop_info->is_for_stmt()) { + oss << var_def->name() << ".GetValue(" << env_ptr_->get_last_loop_var() << ")"; + } else { + oss << var_def->name() << ".GetValue(idx)"; + } + } else { + LOG(INFO) << "unsupported map expr: " << stmt_to_string(expr_) + << ", must be xxx.first or xxx.second"; + } + } else { + LOG(INFO) << "cannot find cur loop_info, expr: " << to_string(); + } + } else { + // 为了防止直接替换每行代码太长, 每个 scalar 都提前定义一个变量, 变量名取 bs_enum_str 结尾的部分, + // 将定义保存在 env 中。 + oss << var_def->name(); + } + + LOG(INFO) << "expr: " << origin_expr_str() + << ", str: " << oss.str(); + + return oss.str(); +} + +std::string ExprInfo::get_bs_field_value_loop_var_size() const { + if (!is_loop_var_size_method()) { + return ""; + } + + if (parent_ == nullptr) { + return ""; + } + + std::string bs_enum_str = parent_->get_bs_enum_str(); + + const absl::optional &var_def = env_ptr_->find_new_def(bs_enum_str); + if (!var_def) { + LOG(INFO) << "cannot find var def in env, return empty str, expr: " + << stmt_to_string(expr_) << ", bs_enum_str: " << bs_enum_str; + return ""; + } + + if (const auto& loop_info = env_ptr_->get_loop_info()) { + std::ostringstream oss; + oss << var_def->name() << ".Get("; + if (loop_info->is_for_stmt()) { + oss << loop_info->loop_var(); + } else { + oss << "idx"; + } + oss << ").size()"; + + return oss.str(); + } + + return ""; +} + +std::string ExprInfo::get_bs_field_value_query_token() const { + LOG(INFO) << "expr: " << origin_expr_str() + << ", callee_name: " << callee_name_ + << ", is_cxx_member_call_expr: " << is_cxx_member_call_expr(); + if (parent_ == nullptr) { + return origin_expr_str(); + } + + if (is_proto_map_string_float_ref()) { + return origin_expr_str(); + } + + if (is_query_token_call()) { + return "std::move(BSGetQueryToken(bs))"; + } + + if (is_photo_text_call()) { + return "std::move(BSGetPhotoText(bs, pos))"; + } + + std::string parent_str = parent_->get_bs_field_value_query_token(); + LOG(INFO) << "expr: " << origin_expr_str() + << ", parent: " << parent_str + << ", callee_name: " << callee_name_; + std::ostringstream oss; + if (callee_name_ == "first") { + // 特殊处理, for 循环一开始会添加变量 auto query_key = query_token.GetKey(idx); + oss << "query_key"; + } else if (callee_name_ == "second") { + oss << parent_str << ".GetValue(idx)"; + } else if (callee_name_ == "operator->") { + oss << parent_str; + } else if (callee_name_ == "size") { + oss << parent_str << ".size()"; + } else if (callee_name_ == "c_str") { + oss << parent_str << ".data()"; + } else if (callee_name_ == "empty") { + oss << parent_str << ".is_empty()"; + } else if (callee_name_.size() > 0) { + oss << parent_str << "." << callee_name_ << "()"; + } else { + oss << parent_str; + } + + return oss.str(); +} + +std::string ExprInfo::get_bs_field_value_reco_user_info() const { + std::string text = origin_expr_str(); + static std::regex p("^(adlog|ad_log)"); + return std::regex_replace(text, p, "bslog"); +} + +std::string ExprInfo::get_bs_middle_node_leaf() const { + std::string bs_enum_str = get_bs_enum_str(); + std::vector arr = absl::StrSplit(bs_enum_str, "_"); + + std::ostringstream oss; + + bool is_has = bs_enum_str.find("exists") != std::string::npos; + if (is_has) { + oss << "BSHas"; + } else { + oss << "BSGet"; + } + + oss << get_middle_node_root_name(); + for (const std::string& s: arr) { + if (starts_with(s, "Get") || s == "exists" || s.size() == 0) { + continue; + } + + oss << char(toupper(s[0])) << s.substr(1); + } + + return oss.str(); +} + +std::string ExprInfo::get_bs_middle_node_leaf_trim_size() const { + std::string leaf = get_bs_middle_node_leaf(); + if (ends_with(leaf, "Size")) { + return leaf.substr(0, leaf.size() - 4); + } + + return leaf; +} + +std::string ExprInfo::get_middle_node_field() const { + return get_adlog_field_str(); +} + +std::string ExprInfo::get_bs_field_value_middle_node() const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const auto& loop_info = env_ptr_->get_loop_info()) { + if (const auto& var = env_ptr_->find_new_def(bs_enum_str)) { + if (var->name().size() > 0) { + if (loop_info->is_for_stmt()) { + return var->name() + ".Get(" + env_ptr_->get_last_loop_var() + ")"; + } else if (loop_info->is_middle_node_proto_list_loop()) { + return var->name() + ".Get(idx)"; + } else { + LOG(INFO) << "new var name is empty, bs_enum_str: " << bs_enum_str + << ", expr: " << to_string(); + return ""; + } + } else { + LOG(INFO) << "cannot find new def, bs_enum_str: " << bs_enum_str + << ", expr: " << to_string(); + return ""; + } + } + } + + if (is_string() && !is_from_list()) { + if (const auto& var = env_ptr_->find_new_def(bs_enum_str)) { + if (var->name().size() > 0) { + return var->name(); + } else { + LOG(INFO) << "var->name() is empty, bs_enum_str: " << bs_enum_str + << ", expr: " << origin_expr_str(); + } + } else { + LOG(INFO) << "cannot find middle node str def in env, bs_enum_str: " << bs_enum_str + << ", expr: " << origin_expr_str(); + } + + return ""; + } + + return get_bs_middle_node_leaf() + std::string("(bs, pos)"); +} + +std::string ExprInfo::get_bs_field_value_action_detail_leaf(const std::string& param) const { + if (!is_action_detail_leaf()) { + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::ostringstream oss; + + const absl::optional &var_def = env_ptr_->find_new_def(bs_enum_str); + if (!var_def) { + LOG(INFO) << "cannot find var def in env, return empty str, expr: " + << stmt_to_string(expr_) << ", bs_enum_str: " << bs_enum_str; + return ""; + } + + oss << var_def->name() << ".Get(" << param << ")"; + + return oss.str(); +} + +bool ExprInfo::is_action_detail_map_size_method() const { + return callee_name_ == "explore_long_term_ad_action_size"; +} + +std::string ExprInfo::get_bs_field_value_general_proto_list_size_method() const { + std::string bs_enum_str = get_bs_enum_str(); + if (ends_with(bs_enum_str, "_size")) { + std::string new_bs_enum_str = + bs_enum_str.substr(0, bs_enum_str.size() - std::string("_size").size()); + if (const auto& var_def = env_ptr_->find_new_def(new_bs_enum_str)) { + return var_def->name() + ".size()"; + } + } + + return ""; +} + +std::string ExprInfo::get_bs_field_value_middle_node_leaf_list_size_method() const { + return get_bs_field_value_general_proto_list_size_method(); +} + +clang::Expr* ExprInfo::find_action_detail_index_param() const { + if (!is_action_detail_leaf()) { + return nullptr; + } + + if (parent_ != nullptr && parent_->call_expr_params_size() > 1) { + return parent_->call_expr_param(1)->expr(); + } + + return nullptr; +} + +// bs 单值定义 +// 如: auto key_item_type = BSFieldEnum::adlog_item_type; +// int64_t item_type = BSFieldHelper::GetSingular(*bs, key_item_type, pos); +std::string ExprInfo::get_bs_scalar_def() const { + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr_->find_valid_new_name(bs_enum_str); + return get_bs_scalar_def_helper(false, new_name); +} + +std::string ExprInfo::get_bs_scalar_def(const std::string& name) const { + return get_bs_scalar_def_helper(false, name); +} + +std::string ExprInfo::get_bs_scalar_exists_def() const { + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr_->find_valid_new_name(bs_enum_str); + return get_bs_scalar_def_helper(true, new_name); +} + +std::string ExprInfo::get_bs_scalar_exists_def(const std::string& name) const { + return get_bs_scalar_def_helper(true, name); +} + +std::string ExprInfo::get_bs_scalar_def_helper(bool is_exists_expr, + const std::string& name) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + const auto& qual_type = expr_->getType(); + std::string type_str = tool::get_builtin_type_str(qual_type); + std::string new_name = name; + std::string enum_new_name = std::string("enum_") + new_name; + + if (is_exists_expr) { + type_str = "bool"; + new_name = tool::get_exists_name(new_name); + enum_new_name = tool::get_exists_name(enum_new_name); + } + + oss << " auto " << enum_new_name << " = BSFieldEnum::" << bs_enum_str << ";\n "; + oss << type_str << " " << new_name << " = BSFieldHelper::"; + + if (is_exists_expr) { + oss << "HasSingular"; + } else { + oss << "GetSingular"; + } + + oss << "<" << type_str; + + if (env_ptr_->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << ">" + << "(*bs, " << enum_new_name; + + if (is_exists_expr ||!env_ptr_->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +// bs list 定义。 +// 如: BSReaptedField adlog_item_xxx(*bs, enum_str, pos); +std::string ExprInfo::get_bs_list_def() const { + std::ostringstream oss; + + const auto& qual_type = expr_->getType(); + const std::string& feature_type = env_ptr_->feature_type(); + std::string bs_enum_str = get_bs_enum_str();; + std::string new_name = env_ptr_->find_valid_new_name(bs_enum_str); + std::string enum_new_name = std::string("enum_") + new_name; + + std::string type_str; + if (is_repeated_proto_type()) { + if (absl::optional type_str_opt = tool::get_repeated_proto_inner_type(qual_type)) { + type_str = *type_str_opt; + } else { + LOG(INFO) << "cannot find list inner type str, expr: " << origin_expr_str() + << ", type_str: " << qual_type.getAsString(); + } + } else { + type_str = tool::get_builtin_type_str(qual_type); + } + + if (type_str.size() > 0) { + std::string tmpl_args = type_str; + if (env_ptr_->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + tmpl_args = type_str + ", true"; + } + + oss << " auto " << enum_new_name << " = BSFieldEnum::" << bs_enum_str + << ";\n"; + oss << "BSRepeatedField<" << tmpl_args << "> "; + oss << new_name << "(*bs, " << enum_new_name; + if (!env_ptr_->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + } else { + LOG(INFO) << "cannot find list inner type str, expr: " << origin_expr_str() + << ", type_str: " << qual_type.getAsString(); + } + + return oss.str(); +} + +std::pair ExprInfo::get_map_kv_type() const { + std::pair res; + + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + const clang::TemplateArgumentList* map_args = + get_proto_map_args_list(cxx_member_call_expr->getType()); + if (map_args != nullptr && map_args->size() == 2) { + res.first = map_args->get(0).getAsType().getAsString(); + res.second = map_args->get(1).getAsType().getAsString(); + } + } + + return res; +} + +// bs map 定义。 +// 如: BSMapField adlog_item_xxx(*bs, key_enum_str, value_enum_str, pos); +std::string ExprInfo::get_bs_map_def() const { + std::ostringstream oss; + + // const auto& qual_type = expr_->getType(); + const std::string& feature_type = env_ptr_->feature_type(); + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr_->find_valid_new_name(bs_enum_str); + std::string enum_new_name_key = std::string("enum_") + new_name + "_key"; + std::string enum_new_name_value = std::string("enum_") + new_name + "_value"; + + std::pair kv_type = get_map_kv_type(); + bool is_not_item_field = !tool::is_item_field(bs_enum_str); + + oss << " auto " << enum_new_name_key << " = BSFieldEnum::" << bs_enum_str << "_key;\n"; + oss << " auto " << enum_new_name_value << " = BSFieldEnum::" << bs_enum_str << "_value;\n"; + oss << "BSMapField<" << kv_type.first + << ", " << kv_type.second; + if (env_ptr_->is_combine_feature() && is_not_item_field) { + oss << ", true"; + } + oss << "> "; + oss << new_name << "(*bs, " << enum_new_name_key << ", " << enum_new_name_value; + if (!env_ptr_->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +bool ExprInfo::is_ignore_callee_name() const { + return ignore_callee_.find(callee_name_) != ignore_callee_.end(); +} + +bool ExprInfo::is_keep_callee_name() const { + auto it = ignore_callee_.find(callee_name_); + if (it == ignore_callee_.end()) { + return true; + } + + return false; +} + +std::string ExprInfo::get_bs_enum_str() const { + // 忽略中间节点根节点 + if (is_middle_node_root()) { + return ""; + } + + if (parent_ == nullptr) { + std::string root_str = stmt_to_string(expr_); + if (root_str == "ad_log") { + return "adlog"; + } else { + return root_str; + } + } + + // common_info 最后访问的节点,不处理,直接返回 common_info enum 对应的节点。 + if (tool::is_common_info_enum(parent_->expr()->getType())) { + return parent_->get_bs_enum_str(); + } + + // middle node loop 遍历 + if ((callee_name_ == "begin" || callee_name_ == "end")) { + if (is_from_middle_node() || is_from_implicit_loop_var()) { + return parent_->get_bs_enum_str(); + } + } + + // 固定写法 + // adlog.Get().time() + if (callee_name_ == "Get" && parent_ != nullptr && parent_->is_adlog_root()) { + return "adlog"; + } + + std::ostringstream oss; + oss << parent_->get_bs_enum_str(); + if (cur_expr_str_.size() > 0) { + if (callee_name_ == "item") { + // 去掉 pos + oss << "_" << "item"; + } else if (starts_with(callee_name_, "has_")) { + // has_xxx + if (is_proto2_has_method(callee_name_)) { + oss << "_" << callee_name_; + } else { + oss << "_" << callee_name_.substr(4) << "_exists"; + } + } else if (tool::is_common_info_enum(expr_->getType())) { + // common_info + oss << "_" << callee_name_; + if (const auto& enum_value = env_ptr_->get_common_attr_int_value()) { + oss << "_key_" << *enum_value; + // } else if (const auto& enum_name = env_ptr_->get_common_attr_int_name()) { + // oss << "_key_" << *enum_name; + } + } else if (is_action_detail_find_expr()) { + // 确定 action 后的其他 expr + if (const auto& action_detail_info = env_ptr_->get_action_detail_info()) { + if (absl::optional action = get_action()) { + LOG(INFO) << "find action value: " << *action; + oss << "_" << "key_" << *action; + } else { + LOG(INFO) << "cannot find action, expr: " << origin_expr_str(); + } + } else if (const auto& action_detail_fixed_info = env_ptr_->get_action_detail_fixed_info()) { + oss << "_" << "key_" << action_detail_fixed_info->action(); + } else { + LOG(INFO) << "cannot get action_detail info in env, expr: " << stmt_to_string(expr_); + } + } else if (is_keep_callee_name()) { + oss << "_" << absl::StrJoin(cur_expr_str_, "_"); + } + } + + return oss.str(); +} + +std::string ExprInfo::get_adlog_field_str() const { + // 忽略中间节点根节点 + if (is_middle_node_root()) { + return ""; + } + + if (parent_ == nullptr) { + std::string root_str = stmt_to_string(expr_); + if (root_str == "ad_log") { + return "adlog"; + } else { + return root_str; + } + } + + // common_info 最后访问的节点,不处理,直接返回 common_info enum 对应的节点。 + if (tool::is_common_info_enum(parent_->expr()->getType())) { + return parent_->get_adlog_field_str(); + } + + // middle node loop 遍历 + if (callee_name_ == "begin" || callee_name_ == "end") { + return parent_->get_adlog_field_str(); + } + + // 固定写法 + // adlog.Get().time() + if (callee_name_ == "Get" && parent_ != nullptr && parent_->is_adlog_root()) { + return "adlog"; + } + + std::ostringstream oss; + oss << parent_->get_adlog_field_str(); + if (cur_expr_str_.size() > 0) { + if (oss.str().size() > 0) { + oss << "."; + } + + if (callee_name_ == "item") { + // 去掉 pos + oss << "item"; + } else if (starts_with(callee_name_, "has_")) { + // has_xxx + if (is_proto2_has_method(callee_name_)) { + oss << callee_name_; + } else { + oss << callee_name_.substr(4) << ".exists"; + } + } else if (tool::is_common_info_enum(expr_->getType())) { + // common_info + oss << callee_name_; + if (const auto& enum_value = env_ptr_->get_common_attr_int_value()) { + oss << ".key:" << *enum_value; + } + } else if (is_repeated_common_info_size()) { + oss << std::regex_replace(callee_name_, std::regex("_size$"), ".size"); + } else if (is_action_detail_find_expr()) { + // 确定 action 后的其他 expr + if (const auto& action_detail_info = env_ptr_->get_action_detail_info()) { + if (absl::optional action = get_action()) { + oss << "key:" << *action; + } else { + LOG(INFO) << "cannot find action!"; + } + } else if (const auto& action_detail_fixed_info = env_ptr_->get_action_detail_fixed_info()) { + oss << "key:" << action_detail_fixed_info->action(); + } + } else if (is_keep_callee_name()) { + oss << absl::StrJoin(cur_expr_str_, "."); + } + } + + return oss.str(); +} + +std::string ExprInfo::origin_expr_str() const { + if (origin_expr_ == nullptr) { + return stmt_to_string(expr_); + } + + return stmt_to_string(origin_expr_); +} + +std::string ExprInfo::get_bs_exists_enum_str() const { + std::string bs_enum_str = get_bs_enum_str(); + if (bs_enum_str.size() == 0) { + LOG(INFO) << "cannot find bs_enum_str!"; + return ""; + } + + return bs_enum_str + std::string("_exists"); +} + +absl::optional ExprInfo::get_common_info_prefix() const { + if (!is_from_repeated_common_info()) { + return absl::nullopt; + } + + std::string s = get_bs_enum_str(); + if (s.find("_key_") != std::string::npos) { + std::regex p("(.*)_key_(.*)"); + return absl::optional(std::regex_replace(s, p, "$1")); + } else { + return absl::optional(s); + } +} + +absl::optional ExprInfo::get_common_info_prefix_adlog() const { + if (is_from_repeated_common_info() || is_repeated_common_info_size()) { + std::string s = get_adlog_field_str(); + + if (is_repeated_common_info_size()) { + static std::regex p_size("\\.size$"); + return absl::optional(std::regex_replace(s, p_size, "")); + } else { + if (s.find(".key:") != std::string::npos) { + std::regex p("(.*)\\.key:(.*)"); + return absl::optional(std::regex_replace(s, p, "$1")); + } else { + return absl::optional(s); + } + } + } + + return absl::nullopt; +} + +std::string ExprInfo::get_adlog_expr() const { + return ""; +} + +// 普通 adlog 字段,递归后以 adlog 开头,如 adlog.item(pos).id() +bool ExprInfo::is_from_normal_adlog() const { + if (parent_ == nullptr) { + std::string expr_str = stmt_to_string(expr_); + return expr_str == "adlog" || expr_str == "ad_log"; + } + return parent_->is_from_normal_adlog(); +} + +bool ExprInfo::is_from_adlog_item() const { + if (parent_ == nullptr) { + std::string expr_str = stmt_to_string(expr_); + return expr_str == "item"; + } + + return parent_->is_from_adlog_item(); +} + +// 中间节点,如 PhotoInfo, LiveInfo, 也是来自 adlog +bool ExprInfo::is_from_middle_node() const { + if (parent_ == nullptr || is_parent_this()) { + return tool::is_middle_node_root(get_first_caller_name()); + } + + return parent_->is_from_middle_node(); +} + +bool ExprInfo::is_decl_ref_expr() const { + return origin_expr_ != nullptr; +} + +std::string ExprInfo::get_first_decl_ref() const { + if (is_decl_ref_expr()) { + return stmt_to_string(origin_expr_); + } + + if (parent_ != nullptr) { + return parent_->get_first_decl_ref(); + } + + return ""; +} + +bool ExprInfo::is_iter_second() const { + std::string expr_str = stmt_to_string(expr_); + if (is_decl_ref_expr()) { + expr_str = stmt_to_string(origin_expr_); + } + + if (expr_str.find("->second") != std::string::npos) { + return true; + } + + return false; +} + +// 中间节点跟节点 +bool ExprInfo::is_middle_node_root() const { + return tool::is_middle_node_root(stmt_to_string(expr_)); +} + +std::string ExprInfo::get_middle_node_root_name() const { + std::string first_caller_name = get_first_caller_name(); + if (starts_with(first_caller_name, "Get") && ends_with(first_caller_name, "(adlog.item(pos))")) { + static std::string other_str = "Get(adlog.item(pos))"; + return first_caller_name.substr(3, first_caller_name.size() - other_str.size()); + } + + if (starts_with(first_caller_name, "Get") && ends_with(first_caller_name, "(item)")) { + static std::string other_str = "Get(item)"; + return first_caller_name.substr(3, first_caller_name.size() - other_str.size()); + } + + return ""; +} + +// 中间节点有任一节点来自 repeated common_info, 则认为是来自 common_info 节点, 如 +// 1. attr.first +// 2. attr.second +// 3. attr.int_value() +// 4. adlog.item(pos).common_info_attr(i) +bool ExprInfo::is_from_repeated_common_info() const { + if (tool::is_repeated_common_info(expr_->getType())) { + return true; + } + + if (tool::is_common_info_enum(expr_->getType()) && contains_loop_var()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_repeated_common_info(); + } + + return false; +} + +// 中间节点有任一节点来自 action_detail, 则认为来自 action_detail, 如 +// const auto& ad_dsp_action_detail = adlog.user_info().user_real_time_action().real_time_dsp_action_detail(); +// 1. ad_dsp_action_detail.find(no) +// 2. auto it = ad_dsp_action_detail.find(no); +// auto list = it->second.list; +// 3. list.size(); +bool ExprInfo::is_from_action_detail_map() const { + if (is_action_detail_map()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_action_detail_map(); + } + + return false; +} + +bool ExprInfo::is_action_detail_map() const { + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + return tool::is_action_detail_map(expr_->getType()); + } + + return false; +} + +// const auto& ad_dsp_action_detail = adlog.user_info().user_real_time_action().real_time_dsp_action_detail(); +// auto iter = ad_dsp_action_detail.find(no); +bool ExprInfo::is_action_detail_find_expr() const { + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + if (parent_ != nullptr && + (callee_name == "find" || callee_name == "at") && + parent_->is_action_detail_map()) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_action_detail_list_expr() const { + return false; +} + +bool ExprInfo::is_action_detail_map_end() const { + if (ends_with(stmt_to_string(expr_), ".end()")) { + if (parent_ != nullptr && parent_->is_action_detail_map()) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_action_info_type() const { + return tool::is_action_info(expr_->getType()); +} + +bool ExprInfo::is_action_detail_leaf() const { + if (is_cxx_member_call_expr()) { + if (parent_->is_action_info_type()) { + if (parent_->is_cxx_operator_call_expr()) { + return true; + } + } + } + + return false; +} + +// 单个 action detail 或者多个 action_detail, 如果是多个 action_detail, 取 actioin_vec_ 的第一个。 +absl::optional ExprInfo::get_action() const { + if (env_ptr_ == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return absl::nullopt; + } + + if (!is_action_detail_find_expr()) { + LOG(INFO) << "is not action_detail find expr: " << raw_expr_str(); + return absl::nullopt; + } + + LOG(INFO) << "expr: " << origin_expr_str() + << ", get action"; + + if (params_.size() == 1) { + std::string arg = stmt_to_string(params_[0]); + absl::optional int_value = find_int_ref_value(arg); + if (int_value) { + return int_value; + } else { + return find_first_int_in_loop_expr(arg); + } + } else { + LOG(INFO) << "expr has no param, cannot find action: " << stmt_to_string(expr_); + } + + return absl::nullopt; +} + +absl::optional ExprInfo::find_int_ref_value(const std::string& name) const { + clang::Expr* value_expr = env_ptr_->find(name); + if (value_expr == nullptr) { + return absl::nullopt; + } + + std::string value_expr_str = stmt_to_string(value_expr); + if (is_integer(value_expr_str)) { + return absl::optional(std::stoi(value_expr_str)); + } + + return find_int_ref_value(value_expr_str); +} + +absl::optional ExprInfo::find_first_int_in_loop_expr(const std::string& arg) const { + if (env_ptr_->is_loop_var(arg)) { + if (const auto& loop_info = env_ptr_->get_loop_info()) { + if (!loop_info->is_for_stmt() && loop_info->is_int_list_member_loop()) { + const std::vector &int_list_member_values = loop_info->int_list_member_values(); + if (const absl::optional &int_list_index = loop_info->int_list_index()) { + if (int_list_member_values.size() > 0 && + *int_list_index < int_list_member_values.size()) { + return absl::optional(int_list_member_values[*int_list_index]); + } + } + } + + if (loop_info->parent_env_ptr() != nullptr) { + if (const auto& parent_loop_info = loop_info->parent_env_ptr()->get_loop_info()) { + if (!parent_loop_info->is_for_stmt() && parent_loop_info->is_int_list_member_loop()) { + const std::vector &int_list_member_values = parent_loop_info->int_list_member_values(); + if (const absl::optional &int_list_index = parent_loop_info->int_list_index()) { + if (int_list_member_values.size() > 0 && *int_list_index < int_list_member_values.size()) { + return absl::optional(int_list_member_values[*int_list_index]); + } else { + LOG(INFO) << "cannot find int_list_index, arg: " << arg; + } + } + } + } + } + } + } + + // for 循环遍历 int list var + // for (int i = 0; i < urb_type_array.size(); i++) { + // int urb_type = urb_type_array[i]; + // } + if (clang::Expr* init_expr = env_ptr_->find(arg)) { + std::string s = stmt_to_string(init_expr); + std::regex p("([\\w_\\d]+)\\[([\\d\\w_]+)\\]"); + std::smatch m; + if (std::regex_search(s, m, p)) { + if (m.size() > 2) { + std::string var_name = m[1]; + std::string index_name = m[2]; + if (env_ptr_->is_loop_var(index_name)) { + if (clang::Expr* loop_var_init_expr = env_ptr_->find(var_name)) { + std::string expr_str = stmt_to_string(loop_var_init_expr); + std::vector values = tool::get_int_list_values_from_init_str(expr_str); + if (values.size() > 0) { + return absl::optional(values[0]); + LOG(INFO) << "find first action from int list var: " << values[0] + << ", loop_var_name: " << var_name + << ", values: " << absl::StrJoin(values, ","); + } + } + } + } + } + } + + return absl::nullopt; +} + +absl::optional ExprInfo::get_action_detail_prefix_adlog() const { + if (is_action_detail_map()) { + std::string s = get_adlog_field_str(); + if (ends_with(s, ".")) { + return s.substr(0, s.size() - 1); + } else if (s.find(".key:") != std::string::npos) { + std::regex p("(.*)\\.key:(.*)"); + return absl::optional(std::regex_replace(s, p, "$1")); + } else { + return absl::optional(s); + } + } + + return absl::nullopt; +} + +bool ExprInfo::is_find_name_value() const { + if (callee_name_ == "find" && params_.size() == 1) { + std::string param = stmt_to_string(params_[0]); + if (ends_with(param, ".name_value()")) { + return true; + } + } + + return false; +} + +std::string ExprInfo::get_common_info_multi_map_name() const { + if (callee_name_ == "find" && params_.size() == 1) { + if (parent_ != nullptr) { + const std::vector& expr_str = parent_->cur_expr_str(); + if (expr_str.size() == 1) { + return expr_str[0]; + } + } + } + + return ""; +} + +std::string ExprInfo::get_common_info_multi_attr_name() const { + if (callee_name_ == "find" && params_.size() == 1) { + std::string param = stmt_to_string(params_[0]); + if (ends_with(param, ".name_value()")) { + return param.substr(0, param.find(".")); + } + } + + return ""; +} + +// user_attr_map_.end() +bool ExprInfo::is_common_info_multi_map_end() const { + if (auto& common_info_multi_map = env_ptr_->mutable_common_info_multi_map()) { + const std::string& map_name = common_info_multi_map->map_name(); + if (ends_with(stmt_to_string(expr_), map_name + ".end()")) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_common_info_multi_map_attr() const { + if (auto& common_info_multi_map = env_ptr_->mutable_common_info_multi_map()) { + if (stmt_to_string(origin_expr_) == common_info_multi_map->attr_name()) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_common_info_scalar_method() const { + return CommonAttrInfo::is_common_info_scalar_method(callee_name_); +} + +bool ExprInfo::is_common_info_list_method() const { + return CommonAttrInfo::is_common_info_list_method(callee_name_); +} + +bool ExprInfo::is_common_info_map_method() const { + return CommonAttrInfo::is_common_info_map_method(callee_name_); +} + +bool ExprInfo::is_common_info_map_find_expr() const { + if (parent_ != nullptr) { + if (parent_->is_common_info_map_method() && callee_name_ == "find") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_common_info_method() const { + return CommonAttrInfo::is_common_info_method(callee_name_); +} + +bool ExprInfo::is_common_info_size_method() const { + return CommonAttrInfo::is_common_info_size_method(callee_name_); +} + +bool ExprInfo::is_common_info_empty_method() const { + if (parent_ != nullptr && parent_->is_common_info_method()) { + if (is_cxx_member_call_expr() && callee_name_ == "empty") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_common_info_list_size_method() const { + return CommonAttrInfo::is_common_info_list_size_method(callee_name_); +} + +bool ExprInfo::is_common_info_list_size_method_divide_by_int() const { + LOG(INFO) << "expr: " << origin_expr_str(); + if (is_binary_op_expr()) { + LOG(INFO) << "callee_name_: " << callee_name_ + << ", param size: " << call_expr_params_.size(); + if (callee_name_ == "%" && call_expr_params_.size() == 2) { + auto param0 = call_expr_params_[0]; + auto param1 = call_expr_params_[1]; + if (param0 != nullptr && param1 != nullptr) { + LOG(INFO) << "param1 is int: " << param1->is_integral() + << ", para1: " << param1->origin_expr_str(); + if (param0->is_common_info_list_size_method() && param1->is_integral()) { + return true; + } + } + } + } + + return false; +} + +bool ExprInfo::is_common_info_leaf_method() const { + return CommonAttrInfo::is_common_info_leaf_method(callee_name_); +} + +bool ExprInfo::is_common_info_name_value() const { + return is_from_repeated_common_info() && callee_name_ == "name_value"; +} + +bool ExprInfo::is_name_value_method() const { + return callee_name_ == "name_value"; +} + +bool ExprInfo::is_common_info_compare_int_value(Env* env_ptr) const { + if (env_ptr == nullptr) { + return false; + } + + if (is_integral() || + (is_decl_ref_expr() && + (is_int_ref() || is_template_int_ref() || is_common_attr_info_enum()))) { + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (if_info->has_cond_var_type(ExprType::ADLOG_COMMON_INFO_NAME_VALUE)) { + return true; + } + } + } + } + } + + return false; +} + +bool ExprInfo::is_repeated_common_info() const { + return tool::is_repeated_common_info(expr_->getType()); +} + +bool ExprInfo::is_repeated_common_info_size() const { + return tool::is_repeated_common_info_size(callee_name_); +} + +bool ExprInfo::is_common_info_struct_type() const { + return tool::is_common_info_struct(expr_->getType()); +} + +bool ExprInfo::is_common_info_map_end() const { + if (is_cxx_member_call_expr()) { + if (callee_name_ == "end") { + if (parent_ != nullptr && parent_->is_common_info_map_method()) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_common_info_map_iter() const { + LOG(INFO) << "expr: " << origin_expr_str() + << ", is_map_proto_iterator: " << tool::is_map_proto_iterator(expr_->getType()); + if (tool::is_map_proto_iterator(expr_->getType())) { + if (parent_ != nullptr) { + // 必须是非 for 循环遍历的情况。 + if (parent_->parent() != nullptr) { + LOG(INFO) << "parent is_common_info_map_method: " << parent_->is_common_info_map_method() + << ", parent: " << parent_->origin_expr_str() + << ", callee_name: " << parent_->callee_name(); + if (parent_->parent()->is_common_info_map_method()) { + return true; + } + } + } + } + + return false; +} + +bool ExprInfo::is_common_info_map_iter_second() const { + if (is_member_expr()) { + LOG(INFO) << "member expr: " << origin_expr_str() + << ", callee_name: " << callee_name_; + if (parent_ != nullptr) { + LOG(INFO) << "parent is_common_info_map_iter: " << parent_->is_common_info_map_iter(); + } + if (callee_name_ == "second") { + if (parent_ != nullptr && parent_->is_common_info_map_iter()) { + return true; + } + } + } + + return false; +} + +absl::optional ExprInfo::get_common_info_value_type() const { + if (!is_common_info_method()) { + return absl::nullopt; + } + + return CommonAttrInfo::find_value_type(callee_name_); +} + +bool ExprInfo::is_adlog_root() const { + std::string expr_str = stmt_to_string(origin_expr_); + return is_decl_ref_expr() && (expr_str == "adlog" || expr_str == "ad_log"); +} + +bool ExprInfo::is_item_type_enum() const { + if (is_decl_ref_expr()) { + return tool::is_item_type_enum(origin_expr_->getType()); + } + + return false; +} + +bool ExprInfo::is_ad_callback_log_enum() const { + if (is_decl_ref_expr()) { + return tool::is_ad_callback_log_enum(origin_expr_->getType()); + } + + return false; +} + +bool ExprInfo::is_ad_action_type_enum() const { + if (is_decl_ref_expr()) { + return tool::is_ad_action_type_enum(origin_expr_->getType()); + } + + return false; +} + +bool ExprInfo::is_ad_enum() const { + if (is_decl_ref_expr()) { + return tool::is_ad_enum(origin_expr_->getType()); + } + + return false; +} + +bool ExprInfo::is_item_field() const { + std::string bs_enum_str = get_bs_enum_str(); + return tool::is_item_field(bs_enum_str); +} + +bool ExprInfo::is_adlog_user_field() const { + std::string bs_enum_str = get_bs_enum_str(); + return tool::is_adlog_user_field(bs_enum_str); +} + +std::string ExprInfo::get_ad_enum_name() const { + if (!is_ad_enum()) { + return ""; + } + + std::vector arr = absl::StrSplit(stmt_to_string(origin_expr_), "::"); + if (arr.size() == 0) { + return ""; + } else { + return arr.back(); + } +} + +bool ExprInfo::is_first_param_adlog_item() const { + if (call_expr_params_.size() > 0) { + std::string s = call_expr_params_[0]->to_string(); + if (s == "adlog.item(pos)" || s == "item") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_unary_expr() const { + if (clang::UnaryOperator* unary_operator = dyn_cast(expr_)) { + return true; + } + + return false; +} + +std::string ExprInfo::get_first_caller_name() const { + if (is_first_param_adlog_item()) { + return to_string(); + } + + if (parent_ == nullptr || is_parent_this()) { + if (callee_name_.size() > 0) { + return tool::trim_this(callee_name_); + } else { + return tool::trim_this(origin_expr_str()); + } + } else { + return parent_->get_first_caller_name(); + } +} + +bool ExprInfo::is_from_adlog() const { + return is_from_normal_adlog() || + is_from_adlog_item() || + is_from_middle_node() || + is_from_repeated_common_info() || + is_from_action_detail_map() || + is_from_seq_list() || + is_from_reco_user_info() || + is_seq_list_reco_proto_type() || + is_from_query_token() || + is_from_photo_text(); +} + +bool ExprInfo::is_from_list() const { + LOG(INFO) << "expr: " << to_string() + << ", tool::is_var_proto_list(expr_->getType()): " << tool::is_var_proto_list(expr_->getType()) + << ", is_item_ref: " << is_item_ref(); + + if (tool::is_var_proto_list(expr_->getType())) { + return true; + } + + if (is_item_ref()) { + // item 不认为是 list。 + return false; + } + + if (contains_loop_var()) { + return true; + } + + if (is_var_proto_message_type() || is_basic()) { + if ((is_cxx_member_call_expr() || is_member_expr()) && call_expr_params_.size() > 0) { + if (call_expr_params_[0]->is_integral()) { + return true; + } + } + } + + if (parent_ != nullptr) { + return parent_->is_from_list(); + } + + return false; +} + +bool ExprInfo::is_from_map() const { + if (tool::is_var_proto_map(expr_->getType())) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_map(); + } + + return false; +} + +bool ExprInfo::is_basic() const { + return is_basic_type(expr_->getType()); +} + +bool ExprInfo::is_nullptr() const { + std::string expr_str = stmt_to_string(expr_); + return expr_str == "nullptr" || expr_str == "NULL" || expr_str == "__null"; +} + +bool ExprInfo::is_cxx_member_call_expr() const { + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + return true; + } + + return false; +} + +bool ExprInfo::is_call_expr() const { + if (clang::CallExpr* call_expr = dyn_cast(expr_)) { + return true; + } + + return false; +} + +// 需要被替换的单值类型。 +// 中间节点不算。 +// 来自 list 或者 map 的也不算。 +bool ExprInfo::is_basic_scalar() const { + if (is_decl_ref_expr() && tool::is_ad_enum(origin_expr_->getType())) { + return false; + } + + if (callee_name_ == "size") { + return true; + } + + // 有不同的情况, 需要区别对待 + // attr.name_value(): false + if (is_cxx_member_call_expr() && + callee_name_ == "name_value" && + parent_ != nullptr && + parent_->is_from_repeated_common_info()) { + return false; + } + + if (is_from_list()) { + return false; + } + + // item.type(): true + if (is_cxx_member_call_expr() && tool::is_ad_enum(expr_->getType())) { + return true; + } + + if (CommonAttrInfo::is_common_info_scalar_method(callee_name_)) { + return true; + } + + if (is_from_middle_node()) { + return false; + } + + if (is_from_action_detail_map()) { + return false; + } + + if (!is_basic()) { + return false; + } + + if (!is_from_adlog()) { + return false; + } + + if (callee_name_ == "item_size") { + return false; + } + + if (params_.size() > 0) { + return false; + } + + if (clang::MemberExpr* member_expr = dyn_cast(expr_)) { + return false; + } + + return true; +} + +absl::optional ExprInfo::get_builtin_type_str() const { + if (is_basic()) { + return absl::optional(tool::get_builtin_type_str(expr_->getType())); + } + + return absl::nullopt; +} + +// ::auto_cpp_rewriter::ContextInfoCommonAttr::MEDIUM_UID +bool ExprInfo::is_common_attr_info_enum() const { + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr_)) { + return tool::is_common_info_enum(decl_ref_expr->getType()); + } + + return false; +} + +absl::optional ExprInfo::get_common_attr_int_value() const { + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr_)) { + return find_common_attr_int_value(decl_ref_expr); + } + + return absl::nullopt; +} + +bool ExprInfo::contains_loop_var() const { + for (size_t i = 0; i < params_.size(); i++) { + if (env_ptr_->is_loop_var(stmt_to_string(params_[i]))) { + return true; + } + } + + for (size_t i = 0; i < call_expr_params_.size(); i++) { + if (call_expr_params_[i] != nullptr) { + if (env_ptr_->is_loop_var(call_expr_params_[i]->origin_expr_str())) { + return true; + } + } + } + + if (parent_ != nullptr) { + return parent_->contains_loop_var(); + } + + return false; +} + +bool ExprInfo::has_decl_ref() const { + if (is_decl_ref_expr()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->has_decl_ref(); + } + + return false; +} + +bool ExprInfo::is_caller_custom_decl_ref() const { + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(caller)) { + return stmt_to_string(decl_ref_expr) != "item"; + } + } + + return false; +} + +bool ExprInfo::is_caller_str_ref() const { + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr_)) { + if (parent_ != nullptr && parent_->is_decl_ref_expr()) { + return tool::is_string(parent_->origin_expr()->getType()); + } + } + + return false; +} + +bool ExprInfo::is_str_decl_ref() const { + if (origin_expr_ != nullptr) { + return tool::is_string(origin_expr_->getType()); + } + + return false; +} + +bool ExprInfo::is_item_ref() const { + return origin_expr_ != nullptr && stmt_to_string(origin_expr_) == "item"; +} + +bool ExprInfo::is_string() const { + return tool::is_string(expr_->getType()); +} + +bool ExprInfo::is_integral() const { + return expr_->getType().getTypePtr()->isIntegerType(); +} + +absl::optional ExprInfo::get_int_value() const { + if (expr_ == nullptr) { + return absl::nullopt; + } + + std::string s = stmt_to_string(expr_); + if (is_integer(s)) { + return absl::optional(std::stoi(s)); + } + + return absl::nullopt; +} + +absl::optional ExprInfo::get_int_ref_value() const { + if (origin_expr_ == nullptr) { + return absl::nullopt; + } + + std::string s = stmt_to_string(origin_expr_); + clang::Expr* value_expr = env_ptr_->find(s); + if (value_expr == nullptr) { + return absl::nullopt; + } + + return absl::nullopt; +} + +bool ExprInfo::is_int_ref() const { + absl::optional int_ref_value = get_int_ref_value(); + return int_ref_value.has_value(); +} + +absl::optional ExprInfo::get_ref_var_name() const { + if (origin_expr_ == nullptr) { + return absl::nullopt; + } + + return absl::optional(stmt_to_string(origin_expr_)); +} + +bool ExprInfo::is_template_int_ref() const { + if (origin_expr_ == nullptr) { + return false; + } + + absl::optional int_name = env_ptr_->get_template_int_name(origin_expr_); + + return int_name.has_value(); +} + +bool ExprInfo::is_common_info_enum_member_ref() const { + clang::Expr* inner_expr = tool::get_inner_expr(expr_); + if (inner_expr == nullptr) { + return false; + } + + if (clang::MemberExpr* member_expr = dyn_cast(inner_expr)) { + if (is_parent_this()) { + if (const auto feature_info = env_ptr_->get_feature_info()) { + std::string expr_str = tool::trim_this(stmt_to_string(inner_expr)); + if (feature_info->is_common_info_enum_member(expr_str, inner_expr->getType())) { + return true; + } + } + } + } + + return false; +} + +std::string ExprInfo::to_string() const { + return stmt_to_string(expr_); +} + +bool ExprInfo::is_get_norm_query() const { + return starts_with(to_string(), "this->GetNormQuery"); +} + +bool ExprInfo::contains_template_parameter() const { + for (size_t i = 0; i < params_.size(); i++) { + if (absl::optional int_name = env_ptr_->get_template_int_name(params_[i])) { + LOG(INFO) << "i: " << i << ", int_name: " << *int_name << ", contains template"; + return true; + } + } + + if (parent_ != nullptr) { + return parent_->contains_template_parameter(); + } + + return false; +} + +absl::optional ExprInfo::get_template_action() const { + if (is_action_detail_find_expr()) { + if (params_.size() > 0) { + return env_ptr_->get_template_int_name(params_[0]); + } + } + + return absl::nullopt; +} + +absl::optional ExprInfo::get_action_detail_field_name() const { + if (is_action_detail_map()) { + return absl::optional(""); + } + + if (parent_ == nullptr) { + return absl::nullopt; + } + + if (is_ignore_callee_name()) { + return parent_->get_action_detail_field_name(); + } + + absl::optional parent_field = parent_->get_action_detail_field_name(); + if (!parent_field) { + return absl::nullopt; + } + + std::ostringstream oss; + if (parent_field->size() > 0) { + oss << *parent_field << "." << callee_name_; + } else { + oss << callee_name_; + } + + return absl::optional(oss.str()); +} + +bool ExprInfo::is_var_proto_list() const { + return tool::is_var_proto_list(expr_->getType()); +} + +bool ExprInfo::is_var_proto_map() const { + return tool::is_var_proto_map(expr_->getType()); +} + +bool ExprInfo::is_parent_str_type() const { + return parent_ != nullptr && parent_->is_str_type(); +} + +bool ExprInfo::is_parent_str_ref() const { + return parent_ != nullptr && parent_->is_str_decl_ref(); +} + +bool ExprInfo::is_caller_loop_var() const { + if (is_cxx_member_call_expr()) { + if (parent_ != nullptr) { + std::string parent_caller = parent_->origin_expr_str(); + if (env_ptr_->get_last_loop_var() == parent_caller) { + return true; + } + } + } + + return false; +} + +std::string ExprInfo::callee_with_params(const StrictRewriter& rewriter) const { + if (callee_name_.size() > 0) { + std::ostringstream oss; + + // string 的 c_str() 固定替换为 data() + if (callee_name_ == "c_str") { + oss << "data("; + } else { + oss << callee_name_ << "("; + } + + std::vector args; + if (params_.size() > 0) { + for (size_t i = 0; i < params_.size(); i++) { + if (tool::is_cxx_default_arg_expr(params_[i])) { + continue; + } + args.push_back(rewriter.getRewrittenText(params_[i])); + } + } + + oss << absl::StrJoin(args, ",") << ")"; + + return oss.str(); + } + + return ""; +} + +void ExprInfo::add_call_expr_param(std::shared_ptr expr_info_ptr) { + call_expr_params_.push_back(expr_info_ptr); +} + +ExprInfo* ExprInfo::call_expr_param(size_t index) const { + if (index >= call_expr_params_.size()) { + return nullptr; + } + + return call_expr_params_[index].get(); +} + +bool ExprInfo::is_cxx_operator_call_expr() const { + if (expr_ != nullptr) { + if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = dyn_cast(expr_)) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_cxx_operator_call_expr_deref() const { + if (expr_ != nullptr) { + if (clang::CXXOperatorCallExpr *cxx_operator_call_expr = dyn_cast(expr_)) { + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + if (op == "operator*") { + return true; + } + } + } + + return false; +} + +NewVarType ExprInfo::get_new_var_type() const { + if (is_from_list()) { + return NewVarType::LIST; + } else if (is_from_map()) { + return NewVarType::MAP; + } else { + return NewVarType::SCALAR; + } +} + +bool ExprInfo::has_common_attr_int_value_in_env() const { + if (absl::optional enum_value = env_ptr_->get_common_attr_int_value()) { + return true; + } + + return false; +} + +bool ExprInfo::has_common_attr_int_value_in_expr() const { + if (absl::optional enum_value = get_common_attr_int_value_in_expr()) { + return true; + } + + return false; +} + +// 枚举或者 int +absl::optional ExprInfo::get_common_attr_int_value_in_expr() const { + if (is_common_attr_info_enum()) { + return get_common_attr_int_value(); + } else if (is_integral()) { + if (auto int_value = get_int_value()) { + return absl::optional(*int_value); + } + } + + return absl::nullopt; +} + +absl::optional ExprInfo::get_common_info_enum_name_in_expr() const { + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr_)) { + return find_common_attr_enum_name_from_expr(decl_ref_expr); + } + + return absl::nullopt; +} + +bool ExprInfo::is_int_list_member_ref() const { + if (const auto& feature_info = env_ptr_->get_feature_info()) { + if (feature_info->is_int_list_member(expr_)) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_int_list_var_ref() const { + std::string name = tool::trim_this(origin_expr_str()); + if (clang::Stmt* stmt = env_ptr_->get_decl_stmt(name)) { + if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (tool::is_int_vector(var_decl->getType())) { + return true; + } + } + } + } else { + LOG(INFO) << "cannot find decl_stmt in env, name: " << name; + } + + return false; +} + +bool ExprInfo::is_action_detail_list() const { + if (callee_name_ == "list" && is_from_action_detail_map()) { + return true; + } + + return false; +} + +bool ExprInfo::is_action_detail_list_size_expr() const { + if (callee_name_ == "size") { + if (parent_ != nullptr && parent_->is_action_detail_list()) { + return true; + } + } + + return false; +} + +std::string ExprInfo::get_bs_field_value_action_detail_list_size() const { + std::string bs_enum_str = get_bs_enum_str(); + const absl::optional& var_def = env_ptr_->find_new_def(bs_enum_str); + if (!var_def) { + LOG(INFO) << "cannot find var def in env, return empty str, expr: " << stmt_to_string(expr_) + << ", bs_enum_str: " << bs_enum_str; + return ""; + } + + return var_def->name(); +} + +bool ExprInfo::is_parent_this() const { + if (parent_ != nullptr && parent_->origin_expr_str() == "this") { + return true; + } + + return false; +} + +bool ExprInfo::is_pos_ref() const { + if (origin_expr_str() == "pos") { + return true; + } + + return false; +} + +bool ExprInfo::is_has_reco_user_info_method() const { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + return false; + } else { + std::string expr_str = to_string(); + return expr_str == "adlog.has_reco_user_info()" || expr_str == "ad_log.has_reco_user_info()"; + } +} + +bool ExprInfo::is_reco_user_info_method() const { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + return false; + } else { + std::string expr_str = to_string(); + return expr_str == "adlog.reco_user_info()" || + expr_str == "ad_log.reco_user_info()" || + expr_str == "ad_user_history_photo_embedding()"; + } +} + +bool ExprInfo::is_from_reco_user_info() const { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + return false; + } else { + if (callee_name_ == "reco_user_info" || callee_name_ == "has_reco_user_info" || callee_name_ == + "ad_user_history_photo_embedding") { + if (parent_ != nullptr && parent_->is_adlog_root()) { + return true; + } else { + return false; + } + } + + if (parent_ != nullptr) { + return parent_->is_from_reco_user_info(); + } + + return false; + } +} + +bool ExprInfo::is_from_reco_user_info_real() const { + if (callee_name_ == "reco_user_info" || + callee_name_ == "has_reco_user_info" || + callee_name_ == "ad_user_history_photo_embedding") { + if (parent_ != nullptr && parent_->is_adlog_root()) { + return true; + } else { + return false; + } + } + + if (parent_ != nullptr) { + return parent_->is_from_reco_user_info_real(); + } + + return false; +} + +bool ExprInfo::is_action_info() const { + return tool::is_action_info(expr_->getType()); +} + +bool ExprInfo::is_parent_action_info() const { + if (parent_ != nullptr) { + return parent_->is_action_info(); + } + + return false; +} + +bool ExprInfo::is_repeated_action_info() const { + return tool::is_repeated_action_info(expr_->getType()); +} + +bool ExprInfo::is_parent_repeated_action_info() const { + if (parent_ != nullptr) { + return parent_->is_repeated_action_info(); + } + + return false; +} + +ExprInfo* ExprInfo::caller_info() const { + if (caller_info_ != nullptr) { + return caller_info_.get(); + } else { + return nullptr; + } +} + +void ExprInfo::set_caller_info(std::shared_ptr caller_info) { + caller_info_ = caller_info; +} + +bool ExprInfo::is_seq_list_root() const { + if (is_cxx_member_call_expr() || is_call_expr()) { + std::string caller_name = get_first_caller_name(); + if (caller_name == "GetSeqList") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_seq_list_root_ref() const { + if (is_decl_ref_expr()) { + std::string caller_name = get_first_caller_name(); + if (caller_name == "GetSeqList") { + return true; + } + } + + return false; +} + +// const auto& seq_list = *seq_list_ptr; +bool ExprInfo::is_seq_list_root_deref() const { + if (clang::UnaryOperator* unary_operator = dyn_cast(expr_)) { + auto op_str = clang::UnaryOperator::getOpcodeStr(unary_operator->getOpcode()); + std::string op(op_str.data(), op_str.size()); + if (op == "*") { + std::string sub_expr = stmt_to_string(unary_operator->getSubExpr()); + clang::Expr* value_expr = env_ptr_->find(sub_expr); + if (value_expr != nullptr) { + std::string value_expr_str = tool::trim_this(stmt_to_string(value_expr)); + if (starts_with(value_expr_str, "GetSeqList")) { + return true; + } + } + } + } + + return false; +} + +bool ExprInfo::is_seq_list_ptr() const { + return is_seq_list_root_ref(); +} + +bool ExprInfo::is_seq_list() const { + return false; +} + +bool ExprInfo::is_seq_list_reco_proto_type() const { + if (is_seq_list_root()) { + std::string caller_name = get_first_caller_name(); + if (const auto& feature_info = env_ptr_->get_feature_info()) { + if (auto method_info = feature_info->find_method_info(caller_name)) { + if (tool::is_reco_proto(method_info->return_type())) { + return true; + } + } + } + } + + return false; +} + +bool ExprInfo::is_address_expr() const { + if (clang::UnaryOperator* unary_operator = dyn_cast(expr_)) { + std::string op = clang::UnaryOperator::getOpcodeStr(unary_operator->getOpcode()).str(); + if (op == "&") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_ptr_deref_expr() const { + if (clang::UnaryOperator *unary_operator = dyn_cast(expr_)) { + std::string op = clang::UnaryOperator::getOpcodeStr(unary_operator->getOpcode()).str(); + if (op == "*") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_from_seq_list() const { + if (is_seq_list() || is_seq_list_root() || is_seq_list_root_ref() || is_seq_list_root_deref()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_seq_list(); + } + + if (call_expr_params_.size() > 0) { + return call_expr_params_[0]->is_from_seq_list(); + } + + return false; +} + +bool ExprInfo::is_from_seq_list_reco() const { + if (is_seq_list_reco_proto_type()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_seq_list_reco(); + } + + if (call_expr_params_.size() > 0) { + return call_expr_params_[0]->is_from_seq_list_reco(); + } + + return false; +} + +bool ExprInfo::is_general_adlog_var() const { + if (callee_name_ == "size") { + if (parent_ != nullptr && parent_->is_common_info_list_method()) { + return false; + } + } + + if (is_from_adlog() && + is_basic() && + !is_method_is_train() && + !is_from_implicit_loop_var() && + (!is_cxx_operator_call_expr() || is_cxx_operator_call_expr_deref()) && + !is_from_reco_user_info() && + !is_from_seq_list() && + !is_from_middle_node() && + !is_from_query_token() && + !is_from_photo_text() && + !contains_template_parameter()) { + return true; + } + + return false; +} + +bool ExprInfo::is_reco_proto_type() const { + return tool::is_reco_proto(expr_->getType()) || is_seq_list_reco_proto_type(); +} + +bool ExprInfo::is_repeated_proto_type() const { + return tool::is_repeated_proto(expr_->getType()); +} + +bool ExprInfo::is_repeated_proto_iterator_type() const { + return tool::is_repeated_proto_iterator(expr_->getType()); +} + +bool ExprInfo::is_repeated_proto_ptr() const { + return tool::is_repeated_proto_ptr(expr_->getType()); +} + +bool ExprInfo::is_map_repeated_int_list_type() const { + return tool::is_map_repeated_int_list_type(expr_->getType()); +} + +bool ExprInfo::is_from_int_list_member() const { + if (clang::MemberExpr* member_expr = dyn_cast(expr_)) { + std::string member_str = member_expr->getMemberDecl()->getNameAsString(); + if (const auto feature_info = env_ptr_->get_feature_info()) { + if (feature_info->is_int_list_member(member_str)) { + return true; + } + } + } + + if (parent_ != nullptr) { + return parent_->is_from_int_list_member(); + } + + return false; +} + +bool ExprInfo::is_map_int_int_type() const { + return tool::is_map_int_int_type(expr_->getType()); +} + +bool ExprInfo::is_str_type() const { + return tool::is_string(expr_->getType()); +} + +bool ExprInfo::is_var_proto_message_type() const { + return tool::is_var_proto_message(expr_->getType()); +} + +bool ExprInfo::is_repeated_proto_message_type() const { + return tool::is_repeated_proto_message(expr_->getType()); +} + +bool ExprInfo::is_member_expr() const { + if (clang::MemberExpr* member_expr = dyn_cast(expr_)) { + return true; + } + + return false; +} + +absl::optional ExprInfo::find_int_list_member_name() const { + if (is_member_expr()) { + if (const auto feature_info = env_ptr_->get_feature_info()) { + if (feature_info->is_int_list_member(callee_name_)) { + return absl::optional(callee_name_); + } + } + } + + if (parent_ != nullptr) { + return parent_->find_int_list_member_name(); + } + + return absl::nullopt; +} + +bool ExprInfo::is_loop_iter_end() const { + std::string s = to_string(); + return starts_with(s, "__") && ends_with(s, ".end()"); +} + +absl::optional ExprInfo::get_adlog_field_str_after_loop_var() const { + if (is_cxx_member_call_expr()) { + std::string adlog_str = get_adlog_field_str(); + if (const auto& loop_info = env_ptr_->get_loop_info()) { + const std::string& prefix = loop_info->prefix_adlog(); + if (prefix.size() > 0 && + (adlog_str.size() > prefix.size() + 1) && + starts_with(adlog_str, prefix)) { + return absl::make_optional(adlog_str.substr(prefix.size() + 1)); + } else { + LOG(INFO) << "cannot find prefix adlog for loop_var, expr: " << origin_expr_str(); + return absl::nullopt; + } + } + } + + LOG(INFO) << "not cxx_member_call_expr, expr: " << origin_expr_str(); + return absl::nullopt; +} + +bool ExprInfo::is_implicit_loop_var() const { + return tool::is_implicit_loop_var(origin_expr_str()); +} + +bool ExprInfo::is_from_implicit_loop_var() const { + return tool::is_from_implicit_loop_var(origin_expr_str()); +} + +bool ExprInfo::is_deref_implicit_loop_begin_expr() const { + return tool::is_deref_implicit_loop_begin(origin_expr_str()); +} + +bool ExprInfo::is_loop_var_ref() const { + if (env_ptr_->is_in_loop()) { + if (env_ptr_->is_loop_var(origin_expr_str())) { + return true; + } + + if (clang::Expr* init_expr = env_ptr_->find(origin_expr_str())) { + if (env_ptr_->is_loop_var(stmt_to_string(init_expr))) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_loop_var_size_method() const { + if (parent_ != nullptr) { + if (parent_->is_loop_var_ref() && callee_name_ == "size") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_parent_loop_var_ref() const { + if (parent_ != nullptr && parent_->is_loop_var_ref()) { + return true; + } + + return false; +} + +bool ExprInfo::is_binary_op_expr() const { + if (expr_ != nullptr) { + if (clang::BinaryOperator* binary_operator = dyn_cast(expr_)) { + return true; + } + } + + return false; +} + +absl::optional ExprInfo::find_int_param() const { + // adlog.item(pos) 中的 pos 不是循环变量 + if (callee_name_ == "item" && parent_ != nullptr && parent_->is_adlog_root()) { + return absl::nullopt; + } + + if (call_expr_params_.size() == 1) { + auto param = call_expr_params_[0]; + if (param->is_integral()) { + return absl::make_optional(param->origin_expr_str()); + } else if (absl::optional int_value = get_int_ref_value()) { + return absl::make_optional(std::to_string(*int_value)); + } + } + + if (parent_ != nullptr) { + return parent_->find_int_param(); + } + + return absl::nullopt; +} + +bool ExprInfo::is_list_size_method() const { + if (is_from_adlog() && absl::EndsWith(callee_name_, "_size")) { + return true; + } + + return false; +} + +bool ExprInfo::is_general_proto_list_size_method() const { + if (is_repeated_common_info_size()) { + return false; + } + + if (is_basic()) { + std::string bs_enum_str = get_bs_enum_str_trim_size(); + if (starts_with(bs_enum_str, "adlog")) { + if (bs_enum_str.size() > 0) { + if (const auto feature_info = env_ptr_->get_feature_info()) { + if (feature_info->is_in_bs_enum_var_type(bs_enum_str)) { + return true; + } + } + } + } + } + + return false; +} + +bool ExprInfo::is_middle_node_leaf_list_size_method() const { + if (is_repeated_common_info_size()) { + return false; + } + + if (is_from_middle_node()) { + if (is_basic()) { + std::string bs_enum_str = get_bs_enum_str_trim_size(); + if (!starts_with(bs_enum_str, "adlog")) { + if (bs_enum_str.size() > 0) { + if (const auto feature_info = env_ptr_->get_feature_info()) { + if (feature_info->is_in_middle_node_bs_enum_var_type(bs_enum_str)) { + return true; + } + } + } + } + } + } + + return false; +} + +std::string ExprInfo::get_bs_enum_str_trim_size() const { + std::string bs_enum_str = get_bs_enum_str(); + if (ends_with(bs_enum_str, "_size")) { + return bs_enum_str.substr(0, bs_enum_str.size() - 5); + } + + return ""; +} + +bool ExprInfo::is_str_concat() const { + if (is_cxx_operator_call_expr()) { + if (callee_name_ == "operator+") { + if (call_expr_params_.size() == 2) { + if (auto param0 = call_expr_params_[0]) { + if (param0->is_string()) { + return true; + } + } + } + } + } + + return false; +} + +bool ExprInfo::is_cxx_functional_cast_expr() const { + if (clang::CXXFunctionalCastExpr* cxx_functional_cast_expr = + dyn_cast(expr_)) { + return true; + } + + return false; +} + +bool ExprInfo::is_enum_proto_call() const { + static std::unordered_set names = { + "auto_cpp_rewriter::AdActionType_descriptor", + "auto_cpp_rewriter::AdCallbackLog::EventType_descriptor", + "auto_cpp_rewriter::class AdCallbackLog::EventType_descriptor", + "auto_cpp_rewriter::AdActionType_Name", + "auto_cpp_rewriter::AdActionType", + "auto_cpp_rewriter::AdCallbackLog::EventType_Parse", + "auto_cpp_rewriter::class AdCallbackLog::EventType_Parse" + }; + + if (is_call_expr() || is_cxx_functional_cast_expr()) { + std::string s = origin_expr_str(); + for (const auto &name : names) { + if (starts_with(s, name)) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_query_token_call() const { + if (is_cxx_member_call_expr()) { + std::string s = tool::trim_this(to_string()); + if (starts_with(s, "GetQueryToken")) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_from_query_token() const { + if (is_query_token_call()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_query_token(); + } + + return false; +} + +bool ExprInfo::is_from_photo_text() const { + if (is_photo_text_call()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_photo_text(); + } + + return false; +} + +bool ExprInfo::is_parent_from_proto_map_string_float() const { + if (is_cxx_member_call_expr() && parent_ != nullptr) { + if (parent_->is_from_query_token() || parent_->is_from_photo_text()) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_photo_text_call() const { + if (is_cxx_member_call_expr()) { + std::string s = tool::trim_this(to_string()); + if (starts_with(s, "GetPhotoText")) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto_map_string_float_ptr_type() const { + return tool::is_proto_map_string_float_ptr(expr_->getType()); +} + +bool ExprInfo::is_proto_map_string_float_ref() const { + if (is_decl_ref_expr()) { + if (is_query_token_call() || is_photo_text_call()) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto_map_string_float_iter_type() const { + return tool::is_proto_map_string_float_iter(expr_->getType()); +} + +bool ExprInfo::is_proto_map_string_float_iter_first() const { + if (parent_ != nullptr && parent_->is_proto_map_string_float_iter_type()) { + if (callee_name_ == "first") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto_map_string_float_iter_second() const { + if (parent_ != nullptr && parent_->is_proto_map_string_float_iter_type()) { + if (callee_name_ == "second") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto_map_string_float_size() const { + if (parent_ != nullptr && parent_->is_proto_map_string_float_ptr_type()) { + if (callee_name_ == "size") { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto_map_string_float_end() const { + if (is_cxx_member_call_expr()) { + if (callee_name_ == "end") { + if (parent_ != nullptr && parent_->is_proto_map_string_float_ptr_type()) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_photo_text_find_expr() const { + if (callee_name_ == "find") { + if (is_from_photo_text()) { + return true; + } + } + + return false; +} + +clang::Expr *ExprInfo::get_proto_map_string_float_loop_var_expr() const { + if (parent_ != nullptr && parent_->is_proto_map_string_float_ref()) { + return parent_->origin_expr(); + } + + if (parent_ != nullptr) { + return parent_->get_proto_map_string_float_loop_var_expr(); + } + + return nullptr; +} + +clang::Expr* ExprInfo::get_loop_var_expr() const { + if (tool::is_implicit_loop_var(origin_expr_str())) { + return expr_; + } + + if (parent_ != nullptr) { + return parent_->get_loop_var_expr(); + } + + return nullptr; +} + +bool ExprInfo::is_method_is_train() const { + if (is_cxx_member_call_expr() && callee_name_ == "is_train") { + return true; + } + + return false; +} + +bool ExprInfo::is_parent_common_info_map_method() const { + if (parent_ != nullptr) { + if (CommonAttrInfo::is_common_info_map_method(parent_->callee_name())) { + return true; + } + } + + return false; +} + +bool ExprInfo::is_proto2_has_method(const std::string& method_name) const { + static std::unordered_set names = { + "has_car", + "has_house" + }; + + return names.find(method_name) != names.end(); +} + +bool ExprInfo::is_char_arr_type() const { + return tool::is_char_arr(expr_->getType()); +} + +bool ExprInfo::is_decl_init_expr() const { + if (env_ptr_ != nullptr) { + if (const auto& decl_info = env_ptr_->cur_decl_info()) { + if (origin_expr_str() == stmt_to_string(decl_info->init_expr())) { + return true; + } + } + } + + return false; +} + +bool ExprInfo::is_in_decl_stmt() const { + if (const auto& decl_info = env_ptr_->cur_decl_info()) { + return true; + } + + return false; +} + +bool ExprInfo::is_repeated_proto_list_leaf_type() const { + return tool::is_repeated_proto_list_leaf_type(expr_->getType()); +} + +bool ExprInfo::is_bslog_field_enum_decl() const { + if (env_ptr_ != nullptr) { + if (is_decl_ref_expr()) { + std::string var_name = origin_expr_str(); + if (clang::Stmt *decl_stmt = env_ptr_->get_decl_stmt(var_name)) { + std::string decl_stmt_str = stmt_to_string(decl_stmt); + + if (decl_stmt_str.find("BSFieldEnum") != std::string::npos) { + return true; + } + } + } + } + + return false; +} + +bool ExprInfo::is_bslog_field_var_decl() const { + if (env_ptr_ != nullptr) { + if (is_decl_ref_expr()) { + std::string var_name = origin_expr_str(); + return env_ptr_->is_bslog_field_var_decl(var_name); + } + } + + return false; +} + +bool ExprInfo::is_bslog_field_enum_ref() const { + std::string s = origin_expr_str(); + if (absl::StartsWith(s, "BSFieldEnum::")) { + return true; + } + + return is_bslog_field_enum_decl(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprInfo.h b/convert/ExprInfo.h new file mode 100644 index 0000000..63cb8cd --- /dev/null +++ b/convert/ExprInfo.h @@ -0,0 +1,432 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; +enum class CommonInfoValueType; +enum class NewVarType; + +/// 表达式 `clang::Expr` 的各种信息。 +class ExprInfo { + public: + ExprInfo() = default; + explicit ExprInfo(clang::Expr* expr, Env* env_ptr): expr_(expr), env_ptr_(env_ptr) {} + + const std::string& callee_name() const { return callee_name_; } + const std::vector& params() const { return params_; } + + /// 以下几种情况需要替换: + /// 1. 来自 adlog 的字段, 如普通 adlog 字段, 中间节点字段, common info 字段。 + /// 2. 自定义变量,但是包含 CommonAttrInfo, 如 std::vector + bool need_replace() const; + + /// get bs field value from adlog expr, like adlog.item.id, kv.first for map, or attrs[i] for list + std::string get_bs_field_value() const; + std::string get_bs_field_value_normal() const; + std::string get_bs_field_value_loop_var_size() const; + std::string get_bs_field_value_middle_node() const; + std::string get_bs_field_value_action_detail_leaf(const std::string& param) const; + std::string get_bs_field_value_general_proto_list_size_method() const; + std::string get_bs_field_value_query_token() const; + std::string get_bs_field_value_reco_user_info() const; + std::string get_bs_field_value_middle_node_leaf_list_size_method() const; + + clang::Expr* find_action_detail_index_param() const; + + /// list or map definition + std::string get_bs_scalar_def() const; + std::string get_bs_scalar_def(const std::string& name) const; + + /// 返回单值判断条件是否存在的 bs 表达式。 + /// 返回 xxx_exists, xxx_exists 的定义需要提前放到 env_ptr 中 + std::string get_bs_scalar_exists_def() const; + std::string get_bs_scalar_exists_def(const std::string& name) const; + std::string get_bs_scalar_def_helper(bool is_exists_expr, const std::string& name) const; + + std::string get_bs_list_def() const; + std::pair get_map_kv_type() const; + std::string get_bs_map_def() const; + + bool is_ignore_callee_name() const; + bool is_keep_callee_name() const; + std::string get_bs_enum_str() const; + std::string get_bs_exists_enum_str() const; + std::string get_adlog_expr() const; + std::string get_adlog_field_str() const; + + clang::Expr* expr() const { return expr_; } + clang::DeclRefExpr* origin_expr() const { return origin_expr_; } + void set_origin_expr(clang::DeclRefExpr* origin_expr) { origin_expr_ = origin_expr; } + std::string origin_expr_str() const; + + void set_parent(std::shared_ptr parent) { parent_ = parent; } + void set_env_ptr(Env* env_ptr) { env_ptr_ = env_ptr; } + void set_expr(clang::Expr* expr) { expr_ = expr; } + void set_callee_name(const std::string& callee_name) { callee_name_ = callee_name; } + void add_param(clang::Expr* param) { params_.push_back(param); } + + bool is_from_normal_adlog() const; + + /// 来自 item,用于 infer 的 ItemFilter + bool is_from_adlog_item() const; + bool is_from_middle_node() const; + bool is_from_repeated_common_info() const; + bool is_from_action_detail_map() const; + bool is_from_adlog() const; + + bool is_middle_node_root() const; + std::string get_bs_middle_node_leaf() const; + std::string get_middle_node_root_name() const; + std::string get_middle_node_field() const; + + std::string get_bs_middle_node_leaf_trim_size() const; + + /// clang::DeclRefExpr* + bool is_decl_ref_expr() const; + std::string get_first_decl_ref() const; + + bool is_iter_second() const; + + bool is_from_list() const; + bool is_from_map() const; + bool is_basic() const; + bool is_nullptr() const; + bool is_basic_scalar() const; + + absl::optional get_builtin_type_str() const; + + bool is_common_attr_info_enum() const; + bool is_action_detail_list_expr() const; + + bool is_action_detail_map() const; + bool is_action_detail_find_expr() const; + bool is_action_detail_map_end() const; + bool is_action_info_type() const; + bool is_action_detail_leaf() const; + bool is_action_detail_map_size_method() const; + + /// 多个 common info + bool is_find_name_value() const; + std::string get_common_info_multi_map_name() const; + std::string get_common_info_multi_attr_name() const; + bool is_common_info_multi_map_end() const; + bool is_common_info_multi_map_attr() const; + bool is_common_info_scalar_method() const; + bool is_common_info_list_method() const; + bool is_common_info_map_method() const; + bool is_common_info_map_find_expr() const; + bool is_common_info_method() const; + bool is_common_info_size_method() const; + bool is_common_info_empty_method() const; + bool is_common_info_list_size_method() const; + bool is_common_info_list_size_method_divide_by_int() const; + bool is_common_info_leaf_method() const; + bool is_common_info_name_value() const; + bool is_common_info_compare_int_value(Env* env_ptr) const; + bool is_name_value_method() const; + + bool is_repeated_common_info() const; + bool is_repeated_common_info_size() const; + + bool is_common_info_struct_type() const; + bool is_common_info_map_end() const; + bool is_common_info_map_iter() const; + bool is_common_info_map_iter_second() const; + + absl::optional get_common_info_value_type() const; + + /// ItemType::AD_DSP + bool is_item_type_enum() const; + + /// using auto_cpp_rewriter::AdCallbackLog; + bool is_ad_callback_log_enum() const; + + /// auto_cpp_rewriter::AD_ITEM_CLICK + bool is_ad_action_type_enum() const; + + bool is_cxx_member_call_expr() const; + bool is_call_expr() const; + bool is_ad_enum() const; + + bool is_item_field() const; + bool is_adlog_user_field() const; + + std::string get_ad_enum_name() const; + + /// adlog + bool is_adlog_root() const; + + bool is_first_param_adlog_item() const; + bool is_unary_expr() const; + std::string get_first_caller_name() const; + absl::optional get_action() const; + absl::optional find_int_ref_value(const std::string& name) const; + + /// action detail prefix, 不包含后面的 key + absl::optional get_action_detail_prefix_adlog() const; + + /// 遍历 action_vec, 逻辑有点复杂。 + absl::optional find_first_int_in_loop_expr(const std::string& arg) const; + + absl::optional get_common_attr_int_value() const; + absl::optional get_common_info_prefix() const; + absl::optional get_common_info_prefix_adlog() const; + + inline void add_cur_expr_str(const std::string& s) { cur_expr_str_.push_back(s); } + const std::vector& cur_expr_str() const { return cur_expr_str_; } + + bool contains_loop_var() const; + + std::shared_ptr parent() const { return parent_; } + + bool has_decl_ref() const; + + /// caller 是自定义的变量, 不是 item, 如 product_name.find("xxx"); + bool is_caller_custom_decl_ref() const; + + /// caller 是自定义的变量, 并且是 str + bool is_caller_str_ref() const; + + bool is_str_decl_ref() const; + bool is_item_ref() const; + bool is_string() const; + + bool is_integral() const; + absl::optional get_int_value() const; + absl::optional get_int_ref_value() const; + bool is_int_ref() const; + absl::optional get_ref_var_name() const; + + bool is_template_int_ref() const; + + /// 来自 common info enum 属性的引用 + bool is_common_info_enum_member_ref() const; + + std::string to_string() const; + bool is_get_norm_query() const; + bool contains_template_parameter() const; + absl::optional get_template_action() const; + absl::optional get_action_detail_field_name() const; + + bool is_var_proto_list() const; + bool is_var_proto_map() const; + bool is_repeated_proto_message_type() const; + + bool is_parent_str_type() const; + bool is_parent_str_ref() const; + bool is_caller_loop_var() const; + std::string callee_with_params(const StrictRewriter& rewriter) const; + void add_call_expr_param(std::shared_ptr expr_info_ptr); + ExprInfo* call_expr_param(size_t index) const; + size_t call_expr_params_size() const { return call_expr_params_.size(); } + + bool is_cxx_operator_call_expr() const; + bool is_cxx_operator_call_expr_deref() const; + NewVarType get_new_var_type() const; + bool has_common_attr_int_value_in_env() const; + bool has_common_attr_int_value_in_expr() const; + absl::optional get_common_attr_int_value_in_expr() const; + absl::optional get_common_info_enum_name_in_expr() const; + + bool is_int_list_member_ref() const; + bool is_int_list_var_ref() const; + bool is_action_detail_list() const; + bool is_action_detail_list_size_expr() const; + std::string get_bs_field_value_action_detail_list_size() const; + + const std::string& raw_expr_str() const { return raw_expr_str_; } + void set_raw_expr_str(const std::string& raw_expr_str) { raw_expr_str_ = raw_expr_str; } + + bool is_parent_this() const; + bool is_pos_ref() const; + + bool is_has_reco_user_info_method() const; + bool is_reco_user_info_method() const; + + /// 表示不需要替换的字段,不是来自 adlog proto,如 reco_user_info, ad_user_history_photo_embedding 等。 + /// 开始只处理了 reco_user_info,后面又添加了其他字段,之后再找时间换个名字。 + bool is_from_reco_user_info() const; + + /// 不考虑是否替换,判断实际是否是来自 reco_user_info。 + bool is_from_reco_user_info_real() const; + + bool is_action_info() const; + bool is_parent_action_info() const; + + bool is_repeated_action_info() const; + bool is_parent_repeated_action_info() const; + + clang::Expr* caller() const { return caller_; } + void set_caller(clang::Expr* caller) { caller_ = caller; } + + ExprInfo* caller_info() const; + void set_caller_info(std::shared_ptr caller_info); + + bool is_seq_list_root() const; + bool is_seq_list_root_ref() const; + bool is_seq_list_root_deref() const; + bool is_seq_list_ptr() const; + bool is_seq_list() const; + bool is_from_seq_list() const; + bool is_seq_list_reco_proto_type() const; + + bool is_from_seq_list_reco() const; + + bool is_address_expr() const; + bool is_ptr_deref_expr() const; + + /// 普通的 adlog 变量 + bool is_general_adlog_var() const; + bool is_reco_proto_type() const; + bool is_repeated_proto_type() const; + bool is_repeated_proto_iterator_type() const; + bool is_repeated_proto_ptr() const; + bool is_map_repeated_int_list_type() const; + bool is_from_int_list_member() const; + bool is_map_int_int_type() const; + bool is_str_type() const; + bool is_var_proto_message_type() const; + + bool is_member_expr() const; + absl::optional find_int_list_member_name() const; + + bool is_loop_iter_end() const; + + absl::optional get_adlog_field_str_after_loop_var() const; + bool is_implicit_loop_var() const; + bool is_from_implicit_loop_var() const; + bool is_deref_implicit_loop_begin_expr() const; + + bool is_loop_var_ref() const; + bool is_loop_var_size_method() const; + bool is_parent_loop_var_ref() const; + bool is_binary_op_expr() const; + + absl::optional find_int_param() const; + + bool is_list_size_method() const; + + /// 特指叶子节点, 不包括 common info 数据对应的节点 + bool is_general_proto_list_size_method() const; + + /// 来自中间节点的 list size + bool is_middle_node_leaf_list_size_method() const; + std::string get_bs_enum_str_trim_size() const; + bool is_str_concat() const; + + bool is_cxx_functional_cast_expr() const; + bool is_enum_proto_call() const; + + /// QueryToken + bool is_query_token_call() const; + bool is_from_query_token() const; + + /// PhotoText + bool is_photo_text_call() const; + bool is_from_photo_text() const; + + bool is_parent_from_proto_map_string_float() const; + + bool is_proto_map_string_float_ptr_type() const; + bool is_proto_map_string_float_ref() const; + bool is_proto_map_string_float_iter_type() const; + bool is_proto_map_string_float_iter_first() const; + bool is_proto_map_string_float_iter_second() const; + bool is_proto_map_string_float_size() const; + bool is_proto_map_string_float_end() const; + bool is_photo_text_find_expr() const; + + clang::Expr* get_proto_map_string_float_loop_var_expr() const; + + clang::Expr* get_loop_var_expr() const; + bool is_method_is_train() const; + bool is_parent_common_info_map_method() const; + + /// proto2 比较特殊,基础类型也有对应的 has_xxx 方法。 + bool is_proto2_has_method(const std::string& method_name) const; + bool is_char_arr_type() const; + bool is_decl_init_expr() const; + bool is_in_decl_stmt() const; + bool is_repeated_proto_list_leaf_type() const; + + /// 是否是 bs 字段。 + bool is_bslog_field_enum_decl() const; + bool is_bslog_field_var_decl() const; + + /// 枚举值或者变量 ref。 + bool is_bslog_field_enum_ref() const; + + protected: + Env* env_ptr_ = nullptr; + + int level_ = 0; + + /// 保留完整 expr 方便之后的处理 + clang::Expr* expr_ = nullptr; + + /// DeclRefExpr + clang::DeclRefExpr* origin_expr_ = nullptr; + std::string adlog_expr_; + + /// enum key + std::string bs_expr_; + std::string feature_type_; + + /// 用于转换 adlog 字段,保存当前节点的 str + std::vector cur_expr_str_; + + std::string common_info_name_ = ""; + int common_info_value_ = 0; + std::string loop_var_name_ = ""; + std::string expr_type_ = ""; + std::string list_var_name_; + std::string callee_name_; + + clang::Expr* caller_ = nullptr; + std::shared_ptr caller_info_; + + std::string value_type_; + + /// for map。 + std::string member_str_; + std::string map_var_name_; + clang::QualType qual_type_; + bool is_basic_type_ = true; + std::string caller_str_; + std::string map_key_type_; + std::string map_value_type_; + std::string map_key_str_; + std::string map_value_str_; + + std::vector params_; + std::shared_ptr parent_; + + /// 原始的表达式。 + std::string raw_expr_str_; + + std::vector> call_expr_params_; + + static std::unordered_set ignore_callee_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParser.cpp b/convert/ExprParser.cpp new file mode 100644 index 0000000..9823278 --- /dev/null +++ b/convert/ExprParser.cpp @@ -0,0 +1,364 @@ +#include +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/ExprCXX.h" + +#include "./Deleter.h" +#include "./ExprParser.h" +#include "./ExprParserDetail.h" +#include "./ExprParserBSField.h" +#include "./expr_parser/ExprParserQueryToken.h" +#include "./Tool.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +// 解析表达式。 +std::shared_ptr parse_expr_simple(clang::Expr* expr, Env* env_ptr) { + if (expr == nullptr) { + LOG(INFO) << "expr is null"; + return nullptr; + } + + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + ExprInfo& expr_info = *expr_info_ptr; + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + + expr_info.set_parent(std::move(parse_expr_simple(caller, env_ptr))); + + // for loop, begin is loop var + if (callee_name == "begin") { + return expr_info_ptr; + } + + expr_info.set_callee_name(callee_name); + expr_info.add_cur_expr_str(callee_name); + + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + expr_info.add_param(cxx_member_call_expr->getArg(i)); + auto param_expr_info_ptr = parse_expr_simple(cxx_member_call_expr->getArg(i), env_ptr); + param_expr_info_ptr->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_expr_info_ptr)); + } + + return expr_info_ptr; + } else { + std::string expr_str = stmt_to_string(expr); + LOG(INFO) << "unsupported cxx member call expr: " << expr_str; + return expr_info_ptr; + } + } else if (clang::MemberExpr* member_expr = dyn_cast(expr)) { + clang::Expr* caller = member_expr->getBase(); + std::string member_str = tool::trim_this(member_expr->getMemberDecl()->getNameAsString()); + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_parent(std::move(parse_expr_simple(caller, env_ptr))); + expr_info_ptr->set_callee_name(member_str); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + if (member_str != "first" && member_str != "second") { + expr_info_ptr->add_cur_expr_str(member_str); + } + + return expr_info_ptr; + } else if (clang::ExprWithCleanups* expr_with_cleanups = dyn_cast(expr)) { + return parse_expr_simple(expr_with_cleanups->getSubExpr(), env_ptr); + } else if (clang::ImplicitCastExpr* cast_expr = dyn_cast(expr)) { + return parse_expr_simple(cast_expr->getSubExpr(), env_ptr); + } else if (clang::MaterializeTemporaryExpr* material_expr = dyn_cast(expr)) { + return parse_expr_simple(material_expr->getSubExpr(), env_ptr); + } else if (clang::CXXBindTemporaryExpr* cxx_bind_temporary_expr = dyn_cast(expr)) { + return parse_expr_simple(cxx_bind_temporary_expr->getSubExpr(), env_ptr); + } else if (clang::ParenExpr* paren_expr = dyn_cast(expr)) { + return parse_expr_simple(paren_expr->getSubExpr(), env_ptr); + } else if (clang::UnaryOperator* unary_operator = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + auto op_str = clang::UnaryOperator::getOpcodeStr(unary_operator->getOpcode()); + std::string op(op_str.data(), op_str.size()); + expr_info_ptr->set_callee_name(op); + expr_info_ptr->set_parent(std::move(parse_expr_simple(unary_operator->getSubExpr(), env_ptr))); + + auto param_expr_info = parse_expr_simple(unary_operator->getSubExpr(), env_ptr); + expr_info_ptr->add_param(unary_operator->getSubExpr()); + param_expr_info->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_expr_info)); + + return expr_info_ptr; + } else if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + expr_info_ptr->set_callee_name(op); + expr_info_ptr->set_parent(std::move(parse_expr_simple(cxx_operator_call_expr->getArg(0), env_ptr))); + + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + auto param_expr_info_ptr = parse_expr_simple(cxx_operator_call_expr->getArg(i), env_ptr); + param_expr_info_ptr->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_expr_info_ptr)); + } + + return expr_info_ptr; + } else if (clang::BinaryOperator* binary_operator = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + std::string op = binary_operator->getOpcodeStr().str(); + expr_info_ptr->set_callee_name(op); + + auto left = parse_expr_simple(binary_operator->getLHS(), env_ptr); + left->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(left)); + + auto right = parse_expr_simple(binary_operator->getRHS(), env_ptr); + right->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(right)); + + return expr_info_ptr; + } else if (clang::CallExpr* call_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + expr_info_ptr->set_callee_name(tool::trim_this(stmt_to_string(call_expr->getCallee()))); + clang::Expr* caller = call_expr->getCallee(); + expr_info_ptr->set_parent(parse_expr_simple(caller, env_ptr)); + + LOG(INFO) << "call_expr: " << stmt_to_string(call_expr) + << ", caller: " << stmt_to_string(caller) + << ", callee_name: " << expr_info_ptr->callee_name(); + for (size_t i = 0; i < call_expr->getNumArgs(); i++) { + auto param_expr_info_ptr = parse_expr_simple(call_expr->getArg(i), env_ptr); + param_expr_info_ptr->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_expr_info_ptr)); + } + + return expr_info_ptr; + } else if (clang::CXXDependentScopeMemberExpr* cxx_dependent_scope_member_expr = + dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_callee_name(cxx_dependent_scope_member_expr->getMember().getAsString()); + clang::Expr* base = cxx_dependent_scope_member_expr->getBase(); + expr_info_ptr->set_parent(std::move(parse_expr_simple(base, env_ptr))); + + return expr_info_ptr; + } else if (clang::ArraySubscriptExpr* array_subscript_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_callee_name("[]"); + expr_info_ptr->set_parent(std::move(parse_expr_simple(array_subscript_expr->getBase(), env_ptr))); + + return expr_info_ptr; + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(expr)) { + // 正常只有一个参数 + if (cxx_construct_expr->getNumArgs() == 1) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_parent(std::move(parse_expr_simple(cxx_construct_expr->getArg(0), env_ptr))); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } else { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } + } else if (clang::CXXFunctionalCastExpr* cxx_functional_cast_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + auto param_info_ptr = parse_expr_simple(cxx_functional_cast_expr->getSubExpr(), env_ptr); + param_info_ptr->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_info_ptr)); + + return expr_info_ptr; + } else if (clang::ConstantExpr* constant_expr = dyn_cast(expr)) { + return parse_expr_simple(constant_expr->getSubExpr(), env_ptr); + } else if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr)) { + LOG(INFO) << "parse decl_ref_expr: " << stmt_to_string(decl_ref_expr); + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_origin_expr(decl_ref_expr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + std::string expr_str = tool::trim_this(stmt_to_string(expr)); + + if (expr_str == "pos") { + return expr_info_ptr; + } + + // 示例: + // uint64_t min_time = UINT_MAX; + // min_time = std::min(min_time, dsp_infos[i].action_timestamp()); + if (const auto& assign_info = env_ptr->cur_assign_info()) { + if (expr_str == assign_info->name()) { + return expr_info_ptr; + } + } + + clang::Expr* env_value = env_ptr->find(expr_str); + + // 互相引用的表达式这里判断不准 + if (env_ptr->is_decl_ref_contains_self(decl_ref_expr, env_value)) { + return expr_info_ptr; + } + LOG(INFO) << "decl_ref_expr: " << stmt_to_string(decl_ref_expr) + << ", env_value: " << stmt_to_string(env_value); + + if (env_value != nullptr) { + auto expr_info_ptr = parse_expr_simple(env_value, env_ptr); + if (expr_info_ptr != nullptr) { + expr_info_ptr->set_origin_expr(decl_ref_expr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + } else { + LOG(INFO) << "parse decl ref expr failed, expr: " << stmt_to_string(expr); + } + + return expr_info_ptr; + } + + return expr_info_ptr; + } else if (clang::IntegerLiteral* integer_literal = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } else if (clang::CXXThisExpr* cxx_this_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } else if (clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr = + dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } else if (clang::GNUNullExpr* gnu_null_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } else { + LOG(INFO) << "unknown type, expr: " << stmt_to_string(expr); + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } + + LOG(INFO) << "parse error, expr: " << stmt_to_string(expr); + return nullptr; +} + +std::shared_ptr parse_expr(clang::Expr* expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr_simple(expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "parse expr error, return nullptr! expr: " << stmt_to_string(expr); + return nullptr; + } + + update_env_common_info(expr_info_ptr.get(), env_ptr); + + update_env_action_detail(expr_info_ptr.get(), env_ptr); + update_env_action_detail_fixed(expr_info_ptr.get(), env_ptr); + + update_env_middle_node(expr_info_ptr.get(), env_ptr); + update_env_double_list(expr_info_ptr.get(), env_ptr); + update_env_get_seq_list(expr_info_ptr.get(), env_ptr); + update_env_proto_list(expr_info_ptr.get(), env_ptr); + update_env_query_token(expr_info_ptr.get(), env_ptr); + + update_env_general(expr_info_ptr.get(), env_ptr); + + update_env_bs_field(expr_info_ptr.get(), env_ptr); + + return expr_info_ptr; +} + +void update_env_common_info(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_common_info_prepare(expr_info_ptr, env_ptr); + update_env_common_info_normal(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list(expr_info_ptr, env_ptr); + update_env_common_info_multi_map(expr_info_ptr, env_ptr); + update_env_common_info_multi_int_list(expr_info_ptr, env_ptr); +} + +void update_env_common_info_multi_map(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 多个 common info, 确定是 MULTI_MAP + // user_attr_map_.find(user_attr.name_value()) + update_env_common_info_multi_map_touch(expr_info_ptr, env_ptr); + update_env_common_info_multi_map_check_cond(expr_info_ptr, env_ptr); +} + +void update_env_common_info_multi_int_list(ExprInfo *expr_info_ptr, Env *env_ptr) { + update_env_common_info_multi_int_list_add_map(expr_info_ptr, env_ptr); + update_env_common_info_multi_int_list_def(expr_info_ptr, env_ptr); +} + +void update_env_action_detail(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 类似 common_info 的 list 和 map, 需要统一在 Env 中添加其定义。 + // const auto& ad_dsp_action_detail = adlog.user_info().user_real_time_action().real_time_dsp_action_detail(); + // auto iter = ad_dsp_action_detail.find(no); + // + // 对应变量的创建在 general 中进行。 + // 首次出现, 在 env_ptr 中创建 ActionDetailInfo。 + update_env_action_detail_prefix(expr_info_ptr, env_ptr); + update_env_action_detail_new_def(expr_info_ptr, env_ptr); + update_env_action_detail_check_cond(expr_info_ptr, env_ptr); + update_env_action_detail_action_param_def(expr_info_ptr, env_ptr); +} + + +void update_env_action_detail_fixed(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_action_detail_fixed_touch(expr_info_ptr, env_ptr); + update_env_action_detail_fixed_var_def(expr_info_ptr, env_ptr); +} + +void update_env_middle_node(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_middle_node_root(expr_info_ptr, env_ptr); + update_env_middle_node_leaf_def(expr_info_ptr, env_ptr); + update_env_middle_node_check_cond(expr_info_ptr, env_ptr); +} + +void update_env_get_seq_list(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_get_seq_list_touch(expr_info_ptr, env_ptr); + update_env_get_seq_list_def(expr_info_ptr, env_ptr); + update_env_get_seq_list_if_cond(expr_info_ptr, env_ptr); + update_env_get_seq_list_loop(expr_info_ptr, env_ptr); +} + +void update_env_proto_list(ExprInfo *expr_info_ptr, Env *env_ptr) { + update_env_proto_list_leaf(expr_info_ptr, env_ptr); + update_env_proto_list_size(expr_info_ptr, env_ptr); +} + +void update_env_query_token(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_query_token_field_def(expr_info_ptr, env_ptr); + update_env_query_token_loop(expr_info_ptr, env_ptr); +} + +void update_env_general(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_general_iter_second(expr_info_ptr, env_ptr); + update_env_general_basic_scalar_def(expr_info_ptr, env_ptr); + update_env_general_str_call(expr_info_ptr, env_ptr); + update_env_general_loop_var_method(expr_info_ptr, env_ptr); + update_env_general_basic_expr(expr_info_ptr, env_ptr); + update_env_general_binary_op_info(expr_info_ptr, env_ptr); + update_env_general_decl_info(expr_info_ptr, env_ptr); + update_env_general_get_norm_query(expr_info_ptr, env_ptr); + update_env_general_loop_var_expr(expr_info_ptr, env_ptr); + update_env_general_int_list_member_loop(expr_info_ptr, env_ptr); + update_env_general_check_item_pos_include(expr_info_ptr, env_ptr); + update_env_general_reco_user_info(expr_info_ptr, env_ptr); + update_env_general_proto_map_loop(expr_info_ptr, env_ptr); + update_env_general_proto_list_size_method(expr_info_ptr, env_ptr); +} + +void update_env_bs_field(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_bs_field_decl(expr_info_ptr, env_ptr); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParser.h b/convert/ExprParser.h new file mode 100644 index 0000000..4927ce7 --- /dev/null +++ b/convert/ExprParser.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" + +#include "Env.h" +#include "Tool.h" +#include "ExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 解析表达式中基本的信息, 如 callee_nem_, parent_ 等, 用于下一步与 env_ptr 更新参数。 +std::shared_ptr parse_expr_simple(clang::Expr* expr, Env* env_ptr); + +/// 更新 expr 中的各种信息到 env_ptr 中, 用于之后的替换。 +std::shared_ptr parse_expr(clang::Expr* expr, Env* env_ptr); + +void update_env_common_info(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_action_detail(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_action_detail_fixed(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_middle_node(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_double_list(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_get_seq_list(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_proto_list(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_query_token(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_bs_field(ExprInfo* expr_info_ptr, Env* env_ptr); + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParserBSField.cpp b/convert/ExprParserBSField.cpp new file mode 100644 index 0000000..b88ed8d --- /dev/null +++ b/convert/ExprParserBSField.cpp @@ -0,0 +1,131 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" +#include "./Deleter.h" +#include "./ExprParserBSField.h" +#include "./Tool.h" +#include "./Type.h" +#include "./info/IfInfo.h" +#include "./info/LoopInfo.h" +#include "./info/NewVarDef.h" +#include "./info/BSFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using tool::strip_suffix_semicolon_newline; + +void update_env_bs_field_decl(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 添加新的定义。 + if (expr_info_ptr->is_in_decl_stmt() && expr_info_ptr->is_bslog_field_enum_ref()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + std::vector args; + + if (clang::CallExpr* call_expr = dyn_cast(decl_info->init_expr())) { + for (size_t i = 0; i < call_expr->getNumArgs(); i++) { + clang::Expr* arg = call_expr->getArg(i); + args.emplace_back(stmt_to_string(arg)); + } + } else if (clang::CXXConstructExpr* cxx_constructor_expr = + dyn_cast(decl_info->init_expr())) { + for (size_t i = 0; i < cxx_constructor_expr->getNumArgs(); i++) { + clang::Expr* arg = cxx_constructor_expr->getArg(i); + args.emplace_back(stmt_to_string(arg)); + } + } + + if (args.size() > 0) { + bool is_bs_field = false; + bool is_has_value_in_params = false; + + const std::string &var_name = decl_info->name(); + + NewVarType new_var_type = NewVarType::SCALAR; + std::string decl_stmt_str = stmt_to_string(decl_info->decl_stmt()); + + if (decl_stmt_str.find("GetSingular") != std::string::npos) { + new_var_type = NewVarType::SCALAR; + is_bs_field = true; + } else if (decl_stmt_str.find("BSRepeatedField") != std::string::npos) { + new_var_type = NewVarType::LIST; + is_bs_field = true; + } else if (decl_stmt_str.find("BSMapField") != std::string::npos) { + new_var_type = NewVarType::MAP; + is_bs_field = true; + } + + if (!is_bs_field) { + return; + } + + std::vector enum_names; + std::vector enum_var_decl_stmts; + + for (size_t i = 0; i < args.size(); i++) { + if (args[i] == "*bs" || args[i] == "pos") { + continue; + } + + if (absl::StartsWith(args[i], "&")) { + is_has_value_in_params = true; + continue; + } + + if (args[i].find("BSFieldEnum::") != std::string::npos) { + enum_names.emplace_back(args[i]); + } else { + if (clang::Expr *arg_init = env_ptr->find(args[i])) { + std::string arg_init_str = stmt_to_string(arg_init); + if (arg_init_str.find("BSFieldEnum::") != std::string::npos) { + enum_names.emplace_back(args[i]); + + if (clang::DeclStmt *arg_decl_stmt = + env_ptr->get_decl_stmt(args[i])) { + enum_var_decl_stmts.emplace_back( + stmt_to_string(arg_decl_stmt)); + } else { + LOG(ERROR) << "cannot find decl_stmt, i: " << i + << ", arg: " << args[i]; + } + } + } else { + LOG(ERROR) << "cannot find enum decl in env: " << args[i]; + } + } + } + + if (enum_names.size() > 0) { + if (auto &bs_field_info = env_ptr->touch_bs_field_info(var_name)) { + bs_field_info->insert_bs_field_enum_var_names(var_name, + enum_names, + is_has_value_in_params); + LOG(INFO) << "insert_bs_field_enum_var_name, var_name: " + << var_name + << ", enum_names: " << absl::StrJoin(enum_names, ", ") + << ", expr: " << expr_info_ptr->origin_expr_str(); + + std::ostringstream oss; + oss << absl::StrJoin(enum_var_decl_stmts, "\n") + << strip_suffix_semicolon_newline(decl_stmt_str); + + bs_field_info->insert_new_def(var_name, oss.str(), + new_var_type); + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParserBSField.h b/convert/ExprParserBSField.h new file mode 100644 index 0000000..4ccbf43 --- /dev/null +++ b/convert/ExprParserBSField.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" + +#include "./Env.h" +#include "./Tool.h" +#include "./ExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 将新的 bs field 变量定义添加到新的 env 中,老的 decl_stmt 需要删掉。 +void update_env_bs_field_decl(ExprInfo* expr_info_ptr, Env* env_ptr); + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParserDetail.cpp b/convert/ExprParserDetail.cpp new file mode 100644 index 0000000..6beac6e --- /dev/null +++ b/convert/ExprParserDetail.cpp @@ -0,0 +1,2146 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" + +#include "Deleter.h" +#include "ExprParserDetail.h" +#include "Tool.h" +#include "Type.h" +#include "info/IfInfo.h" +#include "info/LoopInfo.h" +#include "info/NewActionParam.h" +#include "info/NewVarDef.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +// =============== 详细逻辑 =============== + +// common info 相关逻辑 +// auto attr = adlog.item.common_info_attr(i) +// attr.int_value(), attr.int_list_value(), kv.first +// common_info 字段肯定出现在 enum 条件之后,并且一个 if Env 最多只能有一个 common info enum, 其对应的数据 +// 只能是一种类型, 或者是单值,或者是 list、或者是 map。 +// 1. 如果是单值,则根据 expr_info 则可以确定最重的 bs 表达式,get_bs_field_value 返回 GetSingular 即可。 +// 2. 如果是 list, 则此处需要在 Env 中保存变量 list_var_name,get_bs_field_value 返回 list_var_name.Get(i) +// 3. 如果是 map, 则此处需要在 Env 中保存变量 map_var_name, get_bs_field_value 返回 map_var_name.Get(i) +// +// 如果是 list 或者 map, 需要在 Env 中添加其定义, 用于之后生成 bs 表达式。不区分 info 类型,统一放在 Env +// 的 map 中。 +// common info 枚举和 repeated common info 的变量肯定不会同时出现。common info 枚举只会在 if 条件中出现, +// 并且只是枚举,不会包含 repeated。而如果 reapted common info 中出现了 common info 的 enum,则肯定是在使用 +// common info 的时候,此时 common info 的数据类型就可以根据 callee_name 确定。 +// +// common info 第一次出现,只能确定 prefix, 并不能确定是 NORMAL 还是 MULTI_MAP, 因此先将 prefix 记下来, +// 一个 Env 里只能有一个 prefix。 +// +// 可能会有两层 loop, 第一层遍历 common info 字段, 第二层是遍历 common info 中的 list 或者 map, 需要找到第一 +// loop, 才是 common info prefix 出现的地方。 +// 目前通过 is_repeated_common_info 来区分, 如果是 repeated_common_info, 那么就是第一次出现 +// prefix 的地方。 +// +// 类型也可能在 int_value 之前出现, 如果是这种情况, 那么一定是先遍历 list 或者 map, 再在循环里判断枚举, +// 因此所有的 common_info_method 就都是一样的, 对应的类型也都是一样的, 可以统一在 CommonInfoNormal 中设置。 +// 如; +// teams/ad/ad_algorithm/feature/fast/impl/extract_live_lsp_segment_info.h +// +// for (const auto & attr : common_info_attrs) { +// if (attr.name_value() == attr_name) { +// for (int64 value : attr.int_list_value()) { +// if (variant == 0) { +// AddFeature(value, 1.0f, result); +// continue; +// } +// if (attr_name == auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_LATEST_LIVE_SEGMENT_INFO_LIVE_ID) { +// if (variant == 1) { +// AddFeature(value & MASK48, 1.0f, result); +// } else if (variant == 2) { +// AddFeature(value >> 48, 1.0f, result); +// } +// continue; +// } +// ... +// } +// } +// } +void update_env_common_info_prepare(ExprInfo* expr_info_ptr, Env* env_ptr) { + update_env_common_info_prefix(expr_info_ptr, env_ptr); + update_env_common_info_name_value_alias(expr_info_ptr, env_ptr); + update_env_common_info_method_name(expr_info_ptr, env_ptr); + update_env_common_info_int_value(expr_info_ptr, env_ptr); +} + +void update_env_common_info_prefix(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_repeated_common_info() || expr_info_ptr->is_repeated_common_info_size()) { + // common info 第一次出现, 设置 common info prefix, 必须是在 for 循环的 range 里。 + // 此时不能确定是 NORMAL 还是 MULTI_MAP, 只能先将 prefix 记录下来。 + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + absl::optional prefix_adlog_opt = expr_info_ptr->get_common_info_prefix_adlog(); + if (prefix_adlog_opt) { + loop_info->set_is_repeated_common_info(true); + if (Env* loop_env = env_ptr->mutable_loop_env()) { + const auto& common_info_prefix = loop_env->common_info_prefix(); + if (!common_info_prefix) { + LOG(INFO) << "set common_info_prefx_adlog: " << *prefix_adlog_opt; + loop_env->set_common_info_prefix_adlog(*prefix_adlog_opt); + } + } else { + LOG(INFO) << "something is wrong! cannot find loop env, expr: " + << stmt_to_string(expr_info_ptr->expr()); + } + } else { + LOG(INFO) << "cannot find common_info_prefix_adlog, expr: " + << stmt_to_string(expr_info_ptr->expr()); + } + } + } + } +} + +void update_env_common_info_name_value_alias(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 判断等于, 但是不是用 attr.name_value(), 而是模板参数 attr_name。在 CommonInfoNormal 中记录 attr_name。 + // 见: teams/ad/ad_algorithm/feature/fast/impl/extract_live_lsp_segment_info.h + // for (const auto & attr : common_info_attrs) { + // if (attr.name_value() == attr_name) { + // for (int64 value : attr.int_list_value()) { + // ... + // } + // } + // } + // if (expr_info_ptr->is_common_info_name_value()) { + // env_ptr->touch_common_info_normal(); + // } + if (expr_info_ptr->is_template_int_ref()) { + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (binary_op_info->is_equal_op()) { + if (binary_op_info->left_expr_str().find("name_value") != std::string::npos) { + if (auto& common_info_prepare = env_ptr->mutable_common_info_prepare()) { + if (!common_info_prepare->is_confirmed()) { + LOG(INFO) << "set name_value_alias: " << binary_op_info->right_expr_str(); + common_info_prepare->set_name_value_alias(binary_op_info->right_expr_str()); + } + } + } + } + } + } + } + } +} + +void update_env_common_info_method_name(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (auto& common_info_prepare = env_ptr->mutable_common_info_prepare()) { + if (!common_info_prepare->is_confirmed()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + if (expr_info_ptr->is_common_info_list_method()) { + common_info_prepare->set_method_name(expr_info_ptr->callee_name()); + } + if (expr_info_ptr->is_common_info_size_method()) { + common_info_prepare->set_is_for_stmt(loop_info->is_for_stmt()); + } + } + } + } + } +} + +void update_env_common_info_int_value(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_attr_info_enum()) { + if (auto& common_info_prepare = env_ptr->mutable_common_info_prepare()) { + if (absl::optional enum_value_opt = expr_info_ptr->get_common_attr_int_value()) { + common_info_prepare->set_int_value(*enum_value_opt); + } + } + } +} + +void update_env_common_info_normal_detail_with_value(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_from_repeated_common_info()) { + if (absl::optional enum_value = env_ptr->get_common_attr_int_value()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (Env* parent_env_ptr = common_info_normal->parent_env_ptr()) { + std::string bs_common_enum_str = expr_info_ptr->get_bs_enum_str(); + if (common_info_normal->common_info_details_size() > 0) { + auto& common_info_detail = common_info_normal->last_mutable_common_info_detail(); + // 是 common info 的取值方法, 如 int_value(), int_list_value() + if (expr_info_ptr->is_common_info_method()) { + common_info_detail->update_method_name(expr_info_ptr->callee_name()); + } else if (expr_info_ptr->is_common_info_size_method()) { + common_info_detail->update_size_method_name(expr_info_ptr->callee_name()); + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + common_info_detail->set_is_size_method_in_loop_init(true); + } + } else { + common_info_detail->set_is_size_method_in_loop_init(false); + } + } + } + } + } + } + } +} + +void update_env_common_info_normal_detail_without_value(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 也可能是 CommonInfoFixed, 也找不到 enum_value, 而是模板参数 + if (expr_info_ptr->is_from_repeated_common_info()) { + if (expr_info_ptr->is_common_info_method()) { + if (!expr_info_ptr->has_common_attr_int_value_in_env()) { + if (!env_ptr->is_loop()) { + if (auto& common_info_fixed_list = env_ptr->touch_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + last_detail->update_method_name(expr_info_ptr->callee_name()); + } + } + } + + if (env_ptr->is_loop()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + // Env 中找不到 int_value, 先出现 common_info_method, 一定是遍历 list 或者 map, 因此可以确定 common_info + // 对应的类型, 统一在CommonInfoNormal 中设置 uni_method_name + if (auto& common_info_normal = env_ptr->touch_common_info_normal()) { + common_info_normal->set_uni_method_name(expr_info_ptr->callee_name()); + } + } + } + } + + if (auto& common_info_multi_map = env_ptr->mutable_common_info_multi_map()) { + common_info_multi_map->update_method_name(expr_info_ptr->callee_name()); + } + } + } + } +} + +void update_env_common_info_normal_name_value(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_from_repeated_common_info()) { + if (expr_info_ptr->is_common_info_name_value()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if_info->add_cond_var_type(ExprType::ADLOG_COMMON_INFO_NAME_VALUE); + } + } + } + } +} + +void update_env_common_info_normal_check_cond_template_int(ExprInfo* expr_info_ptr, Env* env_ptr) { + // ::auto_cpp_rewriter::ContextInfoCommonAttr::MEDIUM_UID + if (expr_info_ptr->is_common_attr_info_enum() || expr_info_ptr->is_common_info_compare_int_value(env_ptr)) { + if (expr_info_ptr->is_template_int_ref()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (if_info->has_cond_var_type(ExprType::ADLOG_COMMON_INFO_NAME_VALUE)) { + if_info->set_is_check_common_info_normal_cond(true); + } + } + } + } + } +} + +void update_env_common_info_normal_int_value_in_if(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_attr_info_enum() || expr_info_ptr->is_common_info_compare_int_value(env_ptr)) { + if (!expr_info_ptr->is_template_int_ref()) { + if (absl::optional enum_value = expr_info_ptr->get_common_attr_int_value_in_expr()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if_info->set_is_check_common_info_normal_cond(true); + absl::optional index; + if (auto& common_info_normal = env_ptr->touch_common_info_normal()) { + common_info_normal->add_common_info_value(*enum_value); + index.emplace(common_info_normal->common_info_details_size() - 1); + + if (auto last_detail = common_info_normal->last_mutable_common_info_detail()) { + if (absl::optional enum_name = expr_info_ptr->get_common_info_enum_name_in_expr()) { + last_detail->set_common_info_enum_name(*enum_name); + } + } + + if (index) { + if_info->set_common_info_index(*index); + if_info->set_common_info_value(*enum_value); + } + + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + // 判断不等于 + // teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_id_dup_cover_id.h: + // if (!commonAttr.has_name_value()||!commonAttr.has_type() + // || commonAttr.type() !=::auto_cpp_rewriter::CommonTypeEnum::INT_ATTR + // || commonAttr.name_value() != + // ::auto_cpp_rewriter::ItemCommonInfoAttr_Name::ItemCommonInfoAttr_Name_DSP_DUP_COVER_ID) { + // continue; + // } + if (if_info->has_cond_var_type(ExprType::ADLOG_COMMON_INFO_NAME_VALUE)) { + if (binary_op_info->is_not_equal_op()) { + common_info_normal->set_is_check_equal(false); + } + } + } + } + } + } + } + } + } +} + +void update_env_common_info_normal_int_value_in_switch(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_attr_info_enum() || expr_info_ptr->is_common_info_compare_int_value(env_ptr)) { + if (!expr_info_ptr->is_template_int_ref()) { + if (auto& switch_case_info = env_ptr->mutable_switch_case_info()) { + if (absl::optional enum_value = expr_info_ptr->get_common_attr_int_value_in_expr()) { + if (auto& common_info_normal = env_ptr->touch_common_info_normal()) { + absl::optional index; + common_info_normal->add_common_info_value(*enum_value); + index.emplace(common_info_normal->common_info_details_size() - 1); + + if (auto last_detail = common_info_normal->last_mutable_common_info_detail()) { + if (absl::optional enum_name = expr_info_ptr->get_common_info_enum_name_in_expr()) { + last_detail->set_common_info_enum_name(*enum_name); + } + } + + LOG(INFO) << "add common info detail, enum_value: " << *enum_value + << ", index: " << *index; + + if (index) { + switch_case_info->set_common_info_index(*index); + switch_case_info->set_common_info_value(*enum_value); + } + } + } else { + LOG(INFO) << "cannot get enum value from expr: " << expr_info_ptr->origin_expr_str(); + } + } + } + } +} + +void update_env_common_info_normal_enum(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 先遍历 int_list_value, 再判断 enum + // teams/ad/ad_algorithm/feature/fast/impl/extract_live_lsp_segment_info.h + // for (const auto & attr : common_info_attrs) { + // if (attr.name_value() == attr_name) { + // for (int64 value : attr.int_list_value()) { + // if (variant == 0) { + // AddFeature(value, 1.0f, result); + // continue; + // } + // } + // } + // } + if (expr_info_ptr->is_common_attr_info_enum()) { + // attr 遍历先于枚举 + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (const absl::optional& uni_method_name = common_info_normal->uni_method_name()) { + if (CommonAttrInfo::is_common_info_list_method(*uni_method_name)) { + if (!common_info_normal->list_loop_var()) { + if (const Env* loop_env = env_ptr->get_loop_env()) { + if (loop_env->is_common_info_loop()) { + if (loop_env->loop_var_names().size() > 0) { + LOG(INFO) << "set list_loop_var: " << loop_env->get_last_loop_var(); + common_info_normal->set_list_loop_var(loop_env->get_last_loop_var()); + } + } + } + } + } + } + } + } +} + +void update_env_common_info_normal_detail_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 必须在添加定义之前执行。 + update_env_common_info_normal_list_loop_var_type(expr_info_ptr, env_ptr); + + // 添加 CommonInfoNormal 定义到 Env + if (expr_info_ptr->is_common_attr_info_enum() || + expr_info_ptr->is_common_info_compare_int_value(env_ptr) || + expr_info_ptr->is_common_info_method() || + expr_info_ptr->is_common_info_size_method()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (Env* parent_env_ptr = common_info_normal->parent_env_ptr()) { + if (common_info_normal->common_info_details_size() > 0) { + auto& last_detail = common_info_normal->last_mutable_common_info_detail(); + + if (expr_info_ptr->is_common_info_method()) { + last_detail->update_method_name(expr_info_ptr->callee_name()); + } + + if (expr_info_ptr->is_common_info_size_method()) { + last_detail->update_size_method_name(expr_info_ptr->callee_name()); + } + + LOG(INFO) << "add common info detail def, expr: " << expr_info_ptr->origin_expr_str() + << ", is_common_info_size_method: " << expr_info_ptr->is_common_info_size_method() + << ", common_info_value: " << last_detail->common_info_value() + << ", method_name: " << last_detail->method_name() + << ", bs_enum_str: " << last_detail->get_bs_enum_str(); + parent_env_ptr->add_common_info_detail_def(*last_detail); + } + } + } + } + + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_creative_support_tag.h + // int max_num = attr.string_list_value().size() > 20 ? 20 : attr.string_list_value().size(); + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_ad_product_real_name.h + // attr.string_value().empty() + if (expr_info_ptr->callee_name() == "size" || expr_info_ptr->callee_name() == "empty") { + if (auto parent = expr_info_ptr->parent()) { + if (parent->is_common_info_leaf_method()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (Env* parent_env_ptr = common_info_normal->parent_env_ptr()) { + if (auto& common_info_detail = common_info_normal->last_mutable_common_info_detail()) { + common_info_detail->update_method_name(parent->callee_name()); + LOG(INFO) << "add common info list detail def, expr: " << expr_info_ptr->origin_expr_str(); + parent_env_ptr->add_common_info_detail_def(*common_info_detail); + } + } + } + } + } + } +} + +void update_env_common_info_check_list_map(ExprInfo* expr_info_ptr, Env* env_ptr) { + // for (int64_t value : attr.int_list_value()) { ... } + bool is_common_info_list = false; + bool is_common_info_map = false; + + if (expr_info_ptr->is_common_info_list_method()) { + is_common_info_list = true; + } + + if (expr_info_ptr->is_common_info_map_method()) { + is_common_info_map = true; + } + + if (expr_info_ptr->callee_name() == "" && expr_info_ptr->is_parent_common_info_map_method()) { + is_common_info_map = true; + } + + if (is_common_info_list || is_common_info_map) { + if (auto& loop_info = env_ptr->cur_info(InfoTraits::v)) { + if (loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_common_info_list_map(true); + loop_info->set_is_common_info_map(is_common_info_map); + } + } + } +} + +void update_env_common_info_normal_size_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_size_method()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + if (common_info_normal->common_info_details_size() > 0) { + auto& last_common_info_detail = common_info_normal->last_mutable_common_info_detail(); + last_common_info_detail->set_is_for_stmt(loop_info->is_for_stmt()); + } + } + } + } + } +} + +void update_env_common_info_normal_list_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_list_method()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + if (common_info_normal->common_info_details_size() > 0) { + auto& last_common_info_detail = common_info_normal->last_mutable_common_info_detail(); + last_common_info_detail->set_is_for_stmt(loop_info->is_for_stmt()); + last_common_info_detail->set_list_loop_var(loop_info->loop_var()); + last_common_info_detail->update_method_name(expr_info_ptr->callee_name()); + } + } + } + } + + if (auto& common_info_prepare = env_ptr->mutable_common_info_prepare()) { + if (expr_info_ptr->parent() != nullptr) { + common_info_prepare->set_attr_name(expr_info_ptr->parent()->origin_expr_str()); + } + } + } +} + +void update_env_common_info_normal_list_method_address(ExprInfo *expr_info_ptr, Env *env_ptr) { + if (expr_info_ptr->is_address_expr() && + expr_info_ptr->call_expr_params_size() > 0) { + auto param = expr_info_ptr->call_expr_param(0); + if (param != nullptr && param->is_common_info_list_method()) { + if (auto &common_info_normal = env_ptr->mutable_common_info_normal()) { + if (auto& last_common_info_detail = common_info_normal->last_mutable_common_info_detail()) { + last_common_info_detail->set_has_list_method_address(true); + } + } + } + } +} + +void update_env_common_info_normal_list_size_method_not_equal(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_binary_op_expr() && expr_info_ptr->callee_name() == "!=") { + if (auto& if_info = env_ptr->cur_mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND && if_info->is_body_only_break()) { + if (expr_info_ptr->call_expr_params_size() == 2) { + auto param0 = expr_info_ptr->call_expr_param(0); + auto param1 = expr_info_ptr->call_expr_param(1); + if (param0 != nullptr && param1 != nullptr) { + if (param1->is_integral()) { + if (absl::optional compare_int_value = param1->get_int_value()) { + LOG(INFO) << "find compare_int_value: " << *compare_int_value; + if (param0->is_common_info_list_size_method() || + param0->is_common_info_list_size_method_divide_by_int()) { + LOG(INFO) << "left is common info list: " << param0->origin_expr_str() + << ", set_is_check_common_info_list_size_not_equal: true"; + if_info->set_is_check_common_info_list_size_not_equal(true); + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (auto& last_detail = common_info_normal->last_mutable_common_info_detail()) { + last_detail->set_compare_list_size_vlaue(*compare_int_value); + + if (param0->is_common_info_list_size_method_divide_by_int()) { + if (param0->call_expr_params_size() == 2) { + if (auto dividend_info = param0->call_expr_param(1)) { + if (absl::optional dividend = dividend_info->get_int_value()) { + LOG(INFO) << "last detail set_list_size_dividend: " << *dividend; + last_detail->set_list_size_dividend(*dividend); + } else { + LOG(INFO) << "cannot find dividend from param0: " << param0->origin_expr_str() + << ", binary_operator: " << expr_info_ptr->origin_expr_str(); + } + } + } + } + } + } + } + } else { + LOG(INFO) << "cannot find int value from param1: " << param1->origin_expr_str() + << ", binary_operator: " << expr_info_ptr->origin_expr_str(); + } + } + } + } + } + } + } +} + +void update_env_common_info_normal_helper_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + // helper 目前只用来处理 common info + if (expr_info_ptr->is_common_info_list_method() || expr_info_ptr->is_common_info_size_method()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + if (env_ptr->get_method_name() == "helper") { + // 更新 feature info 中的 method_info common_info_prepare + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo& method_info = feature_info->touch_method_info("helper"); + auto& common_info_prepare = method_info.mutable_common_info_prepare(); + if (!common_info_prepare) { + common_info_prepare.emplace(); + } + + if (common_info_prepare) { + if (expr_info_ptr->is_common_info_list_method()) { + common_info_prepare->set_method_name(expr_info_ptr->callee_name()); + } else { + common_info_prepare->update_size_method_name(expr_info_ptr->callee_name()); + } + if (expr_info_ptr->parent() != nullptr) { + common_info_prepare->set_attr_name(expr_info_ptr->parent()->origin_expr_str()); + } + } + } + + // 更新当前 env_ptr + auto& prepare = env_ptr->cur_mutable_common_info_prepare(); + if (!prepare) { + prepare.emplace(); + } + + if (prepare) { + if (expr_info_ptr->is_common_info_list_method()) { + prepare->set_method_name(expr_info_ptr->callee_name()); + } else { + prepare->update_size_method_name(expr_info_ptr->callee_name()); + } + if (expr_info_ptr->parent() != nullptr) { + prepare->set_attr_name(expr_info_ptr->parent()->origin_expr_str()); + } + } + } + } + } + } +} + +void update_env_common_info_normal_helper_def(ExprInfo *expr_info_ptr, Env *env_ptr) { + // case ::auto_cpp_rewriter:: CommonInfoAttr_NameExtendOne_AD_MERCHANT_FOLLOW_PHOTO_ID_LIST: + // helper(FeaturePrefix::USER_AD_MERCHANT_FOLLOW_REAlTIME_NEW_EXTEND_PHOTO, userAttr, result); + // break; + if (expr_info_ptr->is_call_expr() && expr_info_ptr->callee_name() == "helper") { + if (const auto &switch_case_info = env_ptr->cur_switch_case_info()) { + if (auto &common_info_normal = env_ptr->mutable_common_info_normal()) { + if (common_info_normal->common_info_details_size() > 0) { + if (auto &last_detail = common_info_normal->last_mutable_common_info_detail()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (const MethodInfo *method_info = feature_info->find_method_info("helper")) { + if (const auto &common_info_prepare = method_info->common_info_prepare()) { + if (const auto &method_name = common_info_prepare->method_name()) { + last_detail->update_method_name(*method_name); + if (auto parent_env = env_ptr->mutable_common_info_parent_env()) { + LOG(INFO) << "add def, bs_enum_str: " + << last_detail->get_bs_enum_str() << ", list_def: " + << last_detail->get_bs_list_def(env_ptr); + parent_env->add_new_def_meta(last_detail->get_bs_enum_str(), + last_detail->get_bs_list_def(env_ptr), + NewVarType::LIST); + } + } + } + } + } + } + } + } + } + } +} + +void update_env_common_info_normal_map_end_cond(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_map_end()) { + if (auto& if_info = env_ptr->cur_mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (binary_op_info->is_not_equal_op()) { + if_info->set_is_check_common_info_map_end(true); + if_info->set_is_check_equal(false); + if_info->set_left_expr_str(binary_op_info->left_expr_str()); + } + } + } + } + } +} + +// const int& pos = *__begin5; +void update_env_common_info_normal_list_loop_var_type(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_list_method()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + std::string loop_var_type = loop_info->loop_var_type(); + if (auto &common_info_normal = env_ptr->mutable_common_info_normal()) { + if (auto last_detail = common_info_normal->last_mutable_common_info_detail()) { + LOG(INFO) << "set list_loop_var_type: " << loop_var_type; + last_detail->set_list_loop_var_type(loop_var_type); + } + } else if (auto &common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + LOG(INFO) << "set list_loop_var_type: " << loop_var_type; + last_detail->set_list_loop_var_type(loop_var_type); + } + } + } + } + } +} + +// 逻辑比较绕, 之后需要重构一下。 +// CommonInfoNormal, CommonInfoMultiMap, CommonInfoFixed 应该是互斥的关系, 需要在 env 中保证。 +void update_env_common_info_normal(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 先出现 common_info_enum, 后出现使用 common info 的变量。 + // 因此出现使用 comon info 的变量时, 必定可以从 Env 中找到对应的 common info enum, 从 method_name + // 可以判断其数据类型, 是否 list 或者 map。 + // 添加其定义到 env_ptr 中, 如果是单值, 则添加对应的 has 方法到 env_ptr 中, 会在之后的 if 判断中用到。 + // 此时一定已经确定是 NORMAL 还是 MULTI_MAP。 + update_env_common_info_normal_detail_with_value(expr_info_ptr, env_ptr); + update_env_common_info_normal_detail_without_value(expr_info_ptr, env_ptr); + update_env_common_info_normal_name_value(expr_info_ptr, env_ptr); + + update_env_common_info_normal_check_cond_template_int(expr_info_ptr, env_ptr); + update_env_common_info_normal_int_value_in_if(expr_info_ptr, env_ptr); + update_env_common_info_normal_int_value_in_switch(expr_info_ptr, env_ptr); + + update_env_common_info_normal_enum(expr_info_ptr, env_ptr); + update_env_common_info_normal_detail_def(expr_info_ptr, env_ptr); + update_env_common_info_check_list_map(expr_info_ptr, env_ptr); + update_env_common_info_normal_size_method(expr_info_ptr, env_ptr); + update_env_common_info_normal_list_method(expr_info_ptr, env_ptr); + update_env_common_info_normal_list_method_address(expr_info_ptr, env_ptr); + update_env_common_info_normal_list_size_method_not_equal(expr_info_ptr, env_ptr); + update_env_common_info_normal_helper_method(expr_info_ptr, env_ptr); + update_env_common_info_normal_helper_def(expr_info_ptr, env_ptr); + update_env_common_info_normal_map_end_cond(expr_info_ptr, env_ptr); +} + +void update_env_common_info_fixed_list_touch(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_leaf_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (const auto& common_info_prepare = env_ptr->get_common_info_prepare()) { + if (!common_info_prepare->is_confirmed()) { + if (common_info_prepare->is_common_info_fixed_list()) { + env_ptr->touch_common_info_fixed_list(); + } + } + } + } + } + + if (expr_info_ptr->is_common_info_enum_member_ref()) { + if (const auto& binary_op_info = env_ptr->get_binary_op_info()) { + if (ends_with(binary_op_info->left_expr_str(), ".name_value()")) { + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (const auto& common_info_prepare = env_ptr->get_common_info_prepare()) { + if (!common_info_prepare->is_confirmed()) { + if (common_info_prepare->is_common_info_fixed_list()) { + if (auto& common_info_fixed_list = env_ptr->touch_common_info_fixed_list()) { + // pass + } + } + } + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_enum_member_ref(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_enum_member_ref() || expr_info_ptr->is_template_int_ref()) { + if (const auto &binary_op_info = env_ptr->get_binary_op_info()) { + if (ends_with(binary_op_info->left_expr_str(), ".name_value()")) { + if (auto &if_info = env_ptr->cur_mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (auto &common_info_fixed_list = env_ptr->touch_common_info_fixed_list()) { + std::string int_name = tool::trim_this(expr_info_ptr->origin_expr_str()); + common_info_fixed_list->add_int_name(int_name); + + if_info->set_is_check_common_info_fixed_cond(true); + if_info->set_common_info_int_name(int_name); + LOG(INFO) << "add_int_name: " << int_name + << ", set_is_check_common_info_fixed_cond: true"; + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_size_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_size_method()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + last_detail->update_size_method_name(expr_info_ptr->callee_name()); + } + } + } +} + +void update_env_common_info_fixed_list_leaf_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_leaf_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + if (expr_info_ptr->is_common_info_method()) { + last_detail->update_method_name(expr_info_ptr->callee_name()); + } else { + last_detail->update_size_method_name(expr_info_ptr->callee_name()); + if (const auto &loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + last_detail->set_is_size_method_in_loop_init(true); + last_detail->set_is_for_stmt(loop_info->is_for_stmt()); + } + } else { + last_detail->set_is_size_method_in_loop_init(false); + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_list_method(ExprInfo *expr_info_ptr, Env *env_ptr) { + // if (user_attr.name_value() == user_attr_name_) { + // for (const auto &val : user_attr.int_list_value()) { + // action_list.push_back(val); + // } + // } + if (expr_info_ptr->is_common_info_list_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (auto &common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + last_detail->update_method_name(expr_info_ptr->callee_name()); + if (const auto &loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + last_detail->set_is_for_stmt(loop_info->is_for_stmt()); + last_detail->set_list_loop_var(loop_info->loop_var()); + LOG(INFO) << "set fixed list loop_var: " << loop_info->loop_var(); + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_list_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_leaf_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (CommonAttrInfo::is_common_info_list_method(expr_info_ptr->callee_name()) || + CommonAttrInfo::is_common_info_list_size_method(expr_info_ptr->callee_name())) { + if (Env *common_info_parent = env_ptr->mutable_common_info_parent_env()) { + std::string bs_enum_str = last_detail->get_bs_enum_str(); + if (common_info_parent->is_new_var_not_exists(bs_enum_str)) { + LOG(INFO) << "add list def, bs_enum_str: " << bs_enum_str + << ", def: " << last_detail->get_bs_list_def(env_ptr); + common_info_parent->add_new_def(bs_enum_str, + last_detail->get_bs_list_def(env_ptr), + NewVarType::LIST); + LOG(INFO) << "add list field_def, bs_enum_str: " << bs_enum_str + << ", functor_name: " << last_detail->get_functor_name() + << ", field_def: " << last_detail->get_bs_list_field_def(env_ptr); + feature_info->add_field_def(bs_enum_str, + last_detail->get_functor_name(), + last_detail->get_bs_list_field_def(env_ptr), + NewVarType::LIST, + AdlogVarType::COMMON_INFO_FIXED); + feature_info->set_common_info_prefix_name_value(bs_enum_str, + last_detail->prefix_adlog(), + last_detail->int_name()); + } + } + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_map_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_leaf_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (CommonAttrInfo::is_common_info_map_method(expr_info_ptr->callee_name())) { + if (Env *parent = common_info_fixed_list->parent_env_ptr()) { + parent->add_new_def(last_detail->get_bs_enum_str(), + last_detail->get_bs_map_def(env_ptr), + NewVarType::MAP); + LOG(INFO) << "add map field_def, bs_enum_str: " << last_detail->get_bs_enum_str() + << ", functor_name: " + << last_detail->get_functor_name() << ", field_def: " + << last_detail->get_bs_map_field_def(env_ptr); + feature_info->add_field_def(last_detail->get_bs_enum_str(), + last_detail->get_functor_name(), + last_detail->get_bs_map_field_def(env_ptr), + NewVarType::MAP, + AdlogVarType::COMMON_INFO_FIXED); + feature_info->set_common_info_prefix_name_value(last_detail->get_bs_enum_str(), + last_detail->prefix_adlog(), + last_detail->int_name()); + } + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_scalar_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_leaf_method()) { + if (env_ptr->is_in_common_info_loop_body() || env_ptr->is_in_common_info_if_body()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (CommonAttrInfo::is_common_info_scalar_method(expr_info_ptr->callee_name())) { + if (Env *parent = common_info_fixed_list->parent_env_ptr()) { + parent->add_new_def(last_detail->get_bs_enum_str(), + last_detail->get_bs_scalar_def(env_ptr), + NewVarType::SCALAR); + LOG(INFO) << "add scalar field_def, bs_enum_str: " << last_detail->get_bs_enum_str() + << ", functor_name: " << last_detail->get_functor_name() + << ", field_def: " << last_detail->get_bs_scalar_field_def(env_ptr); + feature_info->add_field_def(last_detail->get_bs_enum_str(), + last_detail->get_functor_name(), + last_detail->get_bs_scalar_field_def(env_ptr), + last_detail->get_exists_functor_name(), + last_detail->get_bs_scalar_exists_field_def(env_ptr), + NewVarType::SCALAR, + AdlogVarType::COMMON_INFO_FIXED); + feature_info->set_common_info_prefix_name_value(last_detail->get_bs_enum_str(), + last_detail->prefix_adlog(), + last_detail->int_name()); + } + } + } + } + } + } + } +} + +void update_env_common_info_fixed_list_list_method_address(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_address_expr() && expr_info_ptr->call_expr_params_size() > 0) { + auto param = expr_info_ptr->call_expr_param(0); + if (param != nullptr && param->is_common_info_list_method()) { + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + last_detail->set_has_list_method_address(true); + } + + update_env_common_info_fixed_list_leaf_method(param, env_ptr); + update_env_common_info_fixed_list_list_method(param, env_ptr); + update_env_common_info_fixed_list_list_def(param, env_ptr); + } + } + } +} + +void update_env_common_info_fixed_list(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 通过模板参数传递 common info + // teams/ad/ad_algorithm/feature/fast/impl/extract_ad_ali_feature.h + // for (const auto &attr : item.ad_dsp_info().photo_info().common_info_attr()) { + // if (attr.name_value() == no) { + // AddFeature(attr.int_value(), 1.0, result); + // break; + // } + // } + update_env_common_info_fixed_list_touch(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_enum_member_ref(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_leaf_method(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_list_method(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_list_def(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_map_def(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_scalar_def(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_size_method(expr_info_ptr, env_ptr); + update_env_common_info_fixed_list_list_method_address(expr_info_ptr, env_ptr); +} + +void update_env_common_info_multi_map_touch(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_find_name_value()) { + if (auto& loop_info = env_ptr->mutable_loop_info()) { + env_ptr->add_deleted_var_by_expr(expr_info_ptr->expr()); + env_ptr->touch_common_info_multi_map(expr_info_ptr->get_common_info_multi_map_name(), + expr_info_ptr->get_common_info_multi_attr_name()); + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->set_has_common_info_multi_map(true); + } + } + } +} + +void update_env_common_info_multi_map_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_common_info_multi_map_end()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if_info->set_is_check_common_info_multi_cond(true); + } + } + } +} + +void update_env_common_info_multi_int_list_add_map(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->callee_name() == "operator[]" && expr_info_ptr->call_expr_params_size() == 2) { + ExprInfo *param0_info_ptr = expr_info_ptr->call_expr_param(0); + ExprInfo *param1_info_ptr = expr_info_ptr->call_expr_param(1); + if (param0_info_ptr != nullptr && param1_info_ptr != nullptr) { + if (param1_info_ptr->callee_name() == "name_value") { + if (env_ptr->is_in_common_info_loop_body()) { + if (auto &common_info_multi_int_list = env_ptr->touch_common_info_multi_int_list()) { + // 确定是 CommonInfoMultiIntList + if (param0_info_ptr->is_map_repeated_int_list_type()) { + LOG(INFO) << "add_attr_map_name: " << param0_info_ptr->origin_expr_str(); + common_info_multi_int_list->add_attr_map_name(param0_info_ptr->origin_expr_str()); + } + if (param0_info_ptr->is_map_int_int_type()) { + LOG(INFO) << "add_attr_size_map_name: " << param0_info_ptr->origin_expr_str(); + common_info_multi_int_list->add_attr_size_map_name(param0_info_ptr->origin_expr_str()); + } + + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->set_has_common_info_multi_int_list(true); + } + } + } + } + } + } +} + +void update_env_common_info_multi_int_list_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + // action_name2list[userAttr.name_value()] = &(userAttr.int_list_value()); + if (expr_info_ptr->callee_name() == "operator[]" && expr_info_ptr->call_expr_params_size() == 2) { + ExprInfo *param0_info_ptr = expr_info_ptr->call_expr_param(0); + ExprInfo *param1_info_ptr = expr_info_ptr->call_expr_param(1); + if (param0_info_ptr != nullptr && param1_info_ptr != nullptr) { + if (param1_info_ptr->callee_name() == "name_value") { + if (env_ptr->is_in_common_info_loop_body()) { + // 确定是 CommonInfoMultiIntList + if (auto &common_info_multi_int_list = env_ptr->touch_common_info_multi_int_list()) { + if (auto parent_env = common_info_multi_int_list->parent_env_ptr()) { + if (param0_info_ptr != nullptr && param1_info_ptr != nullptr) { + std::string map_name = tool::trim_this(param0_info_ptr->origin_expr_str()); + const std::string& vec_name = common_info_multi_int_list->find_correspond_vec_name(map_name); + if (vec_name.size() > 0) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + const std::vector &int_values = feature_info->get_int_list_member_values(vec_name); + if (param0_info_ptr->is_map_repeated_int_list_type()) { + for (int v : int_values) { + LOG(INFO) << "add field def, bs_enum_str: " + << common_info_multi_int_list->get_bs_enum_str(v) + << ", bs_list_field_def: " + << common_info_multi_int_list->get_bs_list_field_def(v); + feature_info->add_field_def(common_info_multi_int_list->get_bs_enum_str(v), + common_info_multi_int_list->get_functor_name(v), + common_info_multi_int_list->get_bs_list_field_def(v), + NewVarType::LIST, + AdlogVarType::COMMON_INFO_MULTI_INT_LIST); + } + } + } + } else { + LOG(INFO) << "cannot find correspond vec_name: " << map_name + << ", param0: " << param0_info_ptr->origin_expr_str(); + } + } + } + } + } + } + } + } +} + +void update_env_action_detail_prefix(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_action_detail_map()) { + absl::optional action_detail_prefix_adlog = expr_info_ptr->get_action_detail_prefix_adlog(); + if (action_detail_prefix_adlog) { + env_ptr->add_action_detail_prefix_adlog(*action_detail_prefix_adlog); + } else { + LOG(INFO) << "cannot find action_detail_prefix_adlog, expr: " << stmt_to_string(expr_info_ptr->expr()); + } + } +} + +void update_env_action_detail_new_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_action_detail_find_expr()) { + if (!expr_info_ptr->contains_template_parameter()) { + if(absl::optional action = expr_info_ptr->get_action()) { + LOG(INFO) << "touch action: " << *action; + if (auto& action_detail_info = env_ptr->mutable_action_detail_info()) { + action_detail_info->add_action(*action); + } else { + if (auto& action_detail_info = env_ptr->touch_action_detail_info(*action)) { + // action detail 比较特殊, 以 list_size 是否存在判断是否存在 + // if (absl::optional bs_enum_str = action_detail_info->get_bs_list_size_enum_str()) { + // action_detail_info->env_ptr()->add_new_exists_def_meta(*bs_enum_str, + // action_detail_info->get_action_detail_exists_def(env_ptr)); + // } + } else { + LOG(INFO) << "cannot get action_detail_info, expr: " << stmt_to_string(expr_info_ptr->expr()); + } + } + } else { + LOG(INFO) << "something is wrong! cannot find action from: " << expr_info_ptr->to_string(); + } + } + } +} + +void update_env_action_detail_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr) { + // if (iter == action_detail.end()) { ... } + if (expr_info_ptr->is_action_detail_map_end()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if_info->set_is_check_action_detail_cond(true); + if (const auto& action_detail_info = env_ptr->get_action_detail_info()) { + // action detail 比较特殊, 以 list_size 是否存在判断是否存在。 + // 如果是普通 action_detail, 必须添加到 parent env 中,当前 if 会被整体替换,对应的 env 也会被销毁。 + // 如果是遍历 action_dev_, 则添加到当前 for 循环的 env 中。后面会在 lambda 中用到。 + if (absl::optional bs_enum_str = action_detail_info->get_bs_list_size_enum_str()) { + LOG(INFO) << "add_new_exists_def, bs_enum_str: " << *bs_enum_str + << ", def: " << action_detail_info->get_action_detail_exists_def(env_ptr); + if (const auto& loop_info = env_ptr->get_loop_info()) { + if (loop_info->is_int_list_member_loop()) { + env_ptr->add_new_exists_def_helper(*bs_enum_str, + action_detail_info->get_action_detail_exists_def(env_ptr)); + env_ptr->add_attr_meta(*bs_enum_str); + return; + } + } + + env_ptr->add_new_exists_def_meta(*bs_enum_str, + action_detail_info->get_action_detail_exists_def(env_ptr)); + } + } else { + LOG(INFO) << "cannot get action_detail_info, expr: " << stmt_to_string(expr_info_ptr->expr()); + } + } + } + } +} + +void update_env_action_detail_action_param_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 函数调用, 添加 action 参数定义 + if (expr_info_ptr->is_cxx_member_call_expr() && + env_ptr->is_feature_other_method(expr_info_ptr->get_first_caller_name())) { + if (const auto feature_info = env_ptr->get_feature_info()) { + if (const MethodInfo* method_info_ptr = + feature_info->find_method_info(expr_info_ptr->get_first_caller_name())) { + for (size_t i = 0; i < expr_info_ptr->call_expr_params_size(); i++) { + const NewActionParam& new_param = method_info_ptr->find_new_action_param(i); + if (new_param.origin_name().size() > 0) { + ExprInfo* call_expr_param = expr_info_ptr->call_expr_param(i); + if (call_expr_param != nullptr) { + env_ptr->add_action_param_new_def(call_expr_param->get_bs_enum_str(), new_param); + } else { + LOG(INFO) << "cannot find call_expr_param, expr: " << expr_info_ptr->origin_expr_str() + << ", index: " << i; + } + } + } + } + } + } +} + +void update_env_action_detail_fixed_touch(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_action_detail_find_expr()) { + if (expr_info_ptr->contains_template_parameter()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (absl::optional action_name = expr_info_ptr->get_template_action()) { + if (auto& action_detail_fixed_info = env_ptr->touch_action_detail_fixed_info(*action_name)) { + LOG(INFO) << "add scalar exists_field_def, bs_enum_str: " + << action_detail_fixed_info->get_bs_enum_str("list.size") + << ", exists_functor_name: " + << action_detail_fixed_info->get_exists_functor_name("list.size") + << ", exists_field_def: " + << action_detail_fixed_info->get_action_detail_exists_field_def(env_ptr); + feature_info->add_field_def(action_detail_fixed_info->get_bs_enum_str("list.size"), + action_detail_fixed_info->get_exists_functor_name("list.size"), + action_detail_fixed_info->get_action_detail_exists_field_def(env_ptr), + NewVarType::SCALAR, + ExprType::ACTION_DETAIL_FIXED_HAS, + AdlogVarType::ACTION_DETAIL_FIXED); + feature_info->set_action_var_name(action_detail_fixed_info->get_bs_enum_str("list.size"), *action_name); + + if (auto constructor_info = env_ptr->mutable_constructor_info()) { + std::string leaf = action_detail_fixed_info->get_exists_functor_name("list.size"); + constructor_info->add_middle_node_leaf(leaf); + } + } + if (auto feature_info = env_ptr->mutable_feature_info()) { + // find 的参数可能和模板参数名不一样,需要保存 find 的参数名。 + if (expr_info_ptr->params().size() > 0) { + feature_info->set_action(stmt_to_string(expr_info_ptr->params()[0])); + } + } + } else { + LOG(INFO) << "something is wrong! cannot find template action name from: " + << expr_info_ptr->to_string(); + } + } + } + } +} + +void update_env_action_detail_fixed_var_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 来自 action detail 模板参数的变量 + if (expr_info_ptr->is_from_action_detail_map() && + expr_info_ptr->contains_loop_var() && + expr_info_ptr->contains_template_parameter() && + expr_info_ptr->is_basic()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (const auto& action_detail_fixed_info = env_ptr->get_action_detail_fixed_info()) { + if (absl::optional field_name = expr_info_ptr->get_action_detail_field_name()) { + clang::QualType qual_type = expr_info_ptr->expr()->getType(); + std::string bs_enum_str = action_detail_fixed_info->get_bs_enum_str(*field_name); + std::string list_def = action_detail_fixed_info->get_bs_list_def(env_ptr, *field_name, qual_type); + LOG(INFO) << "add list var, bs_enum_str: " << bs_enum_str; + action_detail_fixed_info->env_ptr()->add_new_def(bs_enum_str, + list_def, + NewVarType::LIST); + LOG(INFO) << "add list field_def, bs_enum_str: " << bs_enum_str + << ", functor_name: " << action_detail_fixed_info->get_functor_name(*field_name) + << ", field_def: " + << action_detail_fixed_info->get_bs_list_field_def(env_ptr, *field_name, qual_type); + + feature_info->add_field_def(bs_enum_str, + action_detail_fixed_info->get_functor_name(*field_name), + action_detail_fixed_info->get_bs_list_field_def(env_ptr, + *field_name, + qual_type), + NewVarType::LIST, + ExprType::ACTION_DETAIL_FIXED_GET, + AdlogVarType::ACTION_DETAIL_FIXED); + feature_info->set_action_var_name(bs_enum_str, *field_name); + } else { + LOG(INFO) << "error, cannot find field_name from: " << expr_info_ptr->to_string(); + } + } + } + } +} + +// 中间节点,如 GetPhotoInfo, GetLiveInfo +// auto photo_info = GetPhotoInfo(adlog.item(pos)); +// if (photo_info == nullpptr) { } +void update_env_middle_node_root(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_middle_node_root()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + env_ptr->add_middle_node_name(expr_info_ptr->get_middle_node_root_name()); + } + + // 注册根节点 + if (const auto &middle_node_info = env_ptr->get_middle_node_info()) { + std::string middle_node_leaf = expr_info_ptr->get_bs_middle_node_leaf(); + if (!expr_info_ptr->is_from_repeated_common_info()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + std::string leaf = + std::string("BSHas") + expr_info_ptr->get_middle_node_root_name(); + LOG(INFO) << "add leaf: " << leaf; + LOG(INFO) << "add scalar field_def, bs_enum_str: " << leaf + << ", functor_name: " << leaf << ", field_def: " + << middle_node_info->get_root_bs_exists_field_def(env_ptr); + feature_info->add_field_def(leaf, + leaf, + middle_node_info->get_root_bs_exists_field_def(env_ptr), + NewVarType::SCALAR, + AdlogVarType::MIDDLE_NODE_ROOT); + feature_info->set_middle_node_info(leaf, middle_node_info->name(), ""); + } + } + } + + // if 判断 nullptr + if (auto& if_info = env_ptr->cur_mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if_info->add_cond_var_type(ExprType::ADLOG_MIDDLE_NODE_ROOT); + } + } + } +} + +void update_env_middle_node_leaf_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + LOG(INFO) << "expr: " << expr_info_ptr->origin_expr_str() + << ", need_replace: " << expr_info_ptr->need_replace() + << ", is_middle_node_leaf_list_size_method: " << expr_info_ptr->is_middle_node_leaf_list_size_method(); + if (expr_info_ptr->need_replace() && expr_info_ptr->is_from_middle_node()) { + ExprInfo* new_expr_info_ptr = expr_info_ptr; + // str 比较特殊,不能用最后的方法, 比如 x.size(), x.data() + if (expr_info_ptr->parent() != nullptr && + expr_info_ptr->is_parent_str_type()) { + new_expr_info_ptr = expr_info_ptr->parent().get(); + } + + if (auto feature_info = env_ptr->mutable_feature_info()) { + std::string middle_node_leaf = new_expr_info_ptr->get_bs_middle_node_leaf(); + std::string bs_enum_str = new_expr_info_ptr->get_bs_enum_str(); + if (!new_expr_info_ptr->is_middle_node_root() && !new_expr_info_ptr->is_loop_iter_end()) { + // TODO(liuzhishan): 需要更精确的判断, 只能添加叶子节点, 中间节点都不能添加, 单值和 list 通过 + // is_basic 可以判断, 但是 map 不行。不过目前中间节点里的数据还没有 map。 + // common info 的统一由 CommonInfoNormal 添加, 这里只处理普通的 middle_node + if (const auto& middle_node_info = env_ptr->get_middle_node_info()) { + if (new_expr_info_ptr->is_basic() && !new_expr_info_ptr->is_from_repeated_common_info()) { + if (middle_node_leaf.find("BSHas") != std::string::npos) { + std::string exists_field_def = + middle_node_info->get_bs_exists_field_def(env_ptr, + middle_node_leaf, + new_expr_info_ptr->get_middle_node_field()); + LOG(INFO) << "add exists_field_def, bs_enum_str: " << bs_enum_str + << ", functor_name: " << middle_node_leaf + << ", exists_field_def: " << exists_field_def; + + feature_info->add_field_def(bs_enum_str, + middle_node_leaf, + exists_field_def, + NewVarType::SCALAR, + AdlogVarType::MIDDLE_NODE_LEAF); + feature_info->set_middle_node_info(bs_enum_str, middle_node_info->name(), new_expr_info_ptr->get_middle_node_field()); + } else if (expr_info_ptr->is_middle_node_leaf_list_size_method()) { + // leaf list size 方法,需要处理 list。 + // 注意, 需要根据 bs_enum_str 从 feature_info 中获取相应的信息。 + bs_enum_str = expr_info_ptr->get_bs_enum_str_trim_size(); + middle_node_leaf = expr_info_ptr->get_bs_middle_node_leaf_trim_size(); + const auto& middle_node_bs_enum_var_type = feature_info->middle_node_bs_enum_var_type(); + auto it = middle_node_bs_enum_var_type.find(bs_enum_str); + if (it != middle_node_bs_enum_var_type.end()) { + if (const auto& list_inner_type = it->second.list_inner_type()) { + std::string list_def = middle_node_info->get_bs_list_def( + env_ptr, bs_enum_str, middle_node_leaf, *list_inner_type); + LOG(INFO) << "add middle node list var def, bs_enum_str: " << bs_enum_str + << ", list def : " << list_def + << ", inner_type: " << *list_inner_type; + env_ptr->add_new_def(bs_enum_str, + list_def, + NewVarType::LIST); + + std::string list_field_def = + middle_node_info->get_bs_list_field_def(env_ptr, + middle_node_leaf, + it->second.adlog_field(), + *list_inner_type); + + LOG(INFO) << "add middle node field def, bs_enum_str: " << bs_enum_str + << ", list field def : " << list_field_def + << ", adlog_field: " << it->second.adlog_field() + << ", inner_type: " << *list_inner_type; + feature_info->add_field_def(bs_enum_str, + middle_node_leaf, + list_field_def, + NewVarType::LIST, + AdlogVarType::MIDDLE_NODE_LEAF); + feature_info->set_middle_node_info(bs_enum_str, middle_node_info->name(), it->second.adlog_field()); + } else { + LOG(INFO) << "cannot find middle node list inner type in feature_info" + << ", bs_enum_str: " << bs_enum_str + << ", expr: " << new_expr_info_ptr->origin_expr_str(); + } + } + } else { + // 目前还只有 list 和 scalar,暂时不考虑 map + LOG(INFO) << "expr: " << new_expr_info_ptr->origin_expr_str() + << ", type_str: " << new_expr_info_ptr->expr()->getType().getAsString() + << ", is_repeated_proto_type: " << new_expr_info_ptr->is_repeated_proto_type(); + if (new_expr_info_ptr->is_repeated_proto_iterator_type() || + new_expr_info_ptr->is_repeated_proto_type() || + new_expr_info_ptr->is_repeated_proto_ptr()) { + if (absl::optional inner_type = + tool::get_repeated_proto_inner_type(new_expr_info_ptr->expr()->getType())) { + std::string list_def = middle_node_info->get_bs_list_def(env_ptr, + bs_enum_str, + middle_node_leaf, + *inner_type); + env_ptr->add_new_def(bs_enum_str, + list_def, + NewVarType::LIST); + + std::string list_field_def = + middle_node_info->get_bs_list_field_def(env_ptr, + middle_node_leaf, + new_expr_info_ptr->get_middle_node_field(), + *inner_type); + + LOG(INFO) << "add list field_def, bs_enum_str: " << bs_enum_str + << ", expr: " << new_expr_info_ptr->origin_expr_str() + << ", functor_name: " << middle_node_leaf + << ", list_field_def: " << list_field_def + << ", inner_type: " << *inner_type; + feature_info->add_field_def(bs_enum_str, + middle_node_leaf, + list_field_def, + NewVarType::LIST, + AdlogVarType::MIDDLE_NODE_LEAF); + feature_info->set_middle_node_info(bs_enum_str, + middle_node_info->name(), + new_expr_info_ptr->get_middle_node_field()); + } else { + LOG(INFO) << "cannot find inner_type from type_str: " << new_expr_info_ptr->expr()->getType().getAsString() + << ", expr: " << new_expr_info_ptr->to_string(); + } + + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_middle_node_proto_list_loop(true); + loop_info->set_loop_var_expr_bs_enum_str(new_expr_info_ptr->get_bs_enum_str()); + } + } + } else { + // scalar + std::string field_def = + middle_node_info->get_bs_scalar_field_def(env_ptr, + middle_node_leaf, + new_expr_info_ptr->get_middle_node_field(), + new_expr_info_ptr->expr()->getType()); + + LOG(INFO) << "add field_def, bs_enum_str: " << bs_enum_str + << ", expr: " << new_expr_info_ptr->origin_expr_str() + << ", functor_name: " << middle_node_leaf + << ", field_def: " << field_def + << ", type: " << new_expr_info_ptr->expr()->getType().getAsString(); + feature_info->add_field_def(bs_enum_str, + middle_node_leaf, + field_def, + NewVarType::SCALAR, + AdlogVarType::MIDDLE_NODE_LEAF); + feature_info->set_middle_node_info(bs_enum_str, + middle_node_info->name(), + new_expr_info_ptr->get_middle_node_field()); + + // middle node str 比较特殊,可能会访问其 data() 和 size() 方法,因此需要单独添加变量定义。 + if (new_expr_info_ptr->is_string()) { + std::string str_scalar_def = + middle_node_info->get_bs_str_scalar_def(env_ptr, + bs_enum_str, + middle_node_leaf); + LOG(INFO) << "add middle node str scalar def, bs_enum_str: "<< bs_enum_str + << ", scalar def: " << str_scalar_def; + env_ptr->add_new_def(bs_enum_str, str_scalar_def, NewVarType::SCALAR); + } + } + } + } + } + } + } + } +} + +// if (photo_info == nullpptr) { } +void update_env_middle_node_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_nullptr()) { + if (auto& if_info = env_ptr->cur_mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (if_info->has_cond_var_type(ExprType::ADLOG_MIDDLE_NODE_ROOT)) { + // 必定是出现在 if (photo_info == nullptr) 的判断中 + LOG(INFO) << "set_is_check_middle_node_root_cond: true, expr: " + << expr_info_ptr->origin_expr_str(); + if_info->set_is_check_middle_node_root_cond(true); + } + } + } + } +} + +void update_env_double_list(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_from_adlog() && !expr_info_ptr->is_from_repeated_common_info()) { + if (expr_info_ptr->is_var_proto_list()) { + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_proto_list_loop(true); + if (env_ptr->parent() != nullptr) { + if (auto& parent_loop_info = env_ptr->parent()->cur_mutable_loop_info()) { + parent_loop_info->set_is_child_proto_list_loop(true); + } + } + } + } + } + } +} + +void update_env_get_seq_list_touch(ExprInfo *expr_info_ptr, Env *env_ptr) { + if (expr_info_ptr->is_seq_list_root() && !expr_info_ptr->is_seq_list_reco_proto_type()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + const auto& seq_list_info = env_ptr->get_seq_list_info(); + if (!seq_list_info) { + env_ptr->touch_seq_list_info(decl_info->name()); + } + } + } +} + +void update_env_get_seq_list_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_seq_list_root_deref() && !expr_info_ptr->is_seq_list_reco_proto_type()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + if (decl_info->name().size() > 0) { + if (expr_info_ptr->call_expr_params_size() > 0) { + auto param_expr_info_ptr = expr_info_ptr->call_expr_param(0); + if (param_expr_info_ptr != nullptr) { + std::string caller_name = param_expr_info_ptr->get_first_caller_name(); + std::string type_str = param_expr_info_ptr->expr()->getType().getAsString(); + if (auto &seq_list_info = env_ptr->mutable_seq_list_info()) { + seq_list_info->update(decl_info->name(), caller_name, type_str); + env_ptr->add_new_def(decl_info->name(), + seq_list_info->get_def(), + seq_list_info->get_var_type()); + } + } + } + } + } + } +} + +void update_env_get_seq_list_if_cond(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_seq_list_root_ref() && !expr_info_ptr->is_seq_list_reco_proto_type()) { + if (auto& if_info = env_ptr->cur_mutable_if_info()) { + if (auto& binary_op_info = env_ptr->cur_mutable_binary_op_info()) { + if (binary_op_info->is_not_equal_op()) { + if (binary_op_info->right_expr_str() == "nullptr") { + if_info->set_is_check_seq_list_cond(true); + } + } + } + } + } +} + +void update_env_get_seq_list_loop(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_from_seq_list()) { + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_seq_list_loop(true); + } + } + } +} + +void update_env_proto_list_leaf(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_general_adlog_var()) { + if (auto& loop_info = env_ptr->mutable_loop_info()) { + if (loop_info->is_proto_list_loop() && + env_ptr->is_in_loop_body() && + loop_info->prefix_adlog().size() > 0) { + if (const auto parent = loop_info->parent_env_ptr()) { + const auto& parent_loop_info = parent->get_loop_info(); + // 确定是单层 proto list loop + if (!parent_loop_info) { + std::string prefix_adlog = loop_info->prefix_adlog(); + if (auto& proto_list_info = loop_info->mutable_env_ptr()->touch_proto_list_info(prefix_adlog)) { + LOG(INFO) << "touch_proto_list_info: " << prefix_adlog + << ", prefix: " << proto_list_info->prefix() + << ", prefix_adlog: " << proto_list_info->prefix_adlog() + << ", expr: " << expr_info_ptr->origin_expr_str(); + + std::string adlog_str = expr_info_ptr->get_adlog_field_str(); + if (prefix_adlog.size() > 0 && adlog_str.size() > prefix_adlog.size()) { + // 不包括叶子节点,field_str 长度必须大于 0 + if (absl::optional field_str = + expr_info_ptr->get_adlog_field_str_after_loop_var()) { + if (*field_str != "size") { + proto_list_info->add_field(*field_str); + LOG(INFO) << "add proto_list field: " << *field_str; + } + } else { + LOG(INFO) << "cannot find field after loop_var: " << expr_info_ptr->origin_expr_str() + << ", loop_var: " << loop_info->loop_var(); + } + } else { + LOG(INFO) << "loop_var prefix_adlog is empty, loop_var: " + << loop_info->loop_var(); + } + } + } else { + LOG(INFO) << "find parent loop of loop!"; + } + } else { + LOG(INFO) << "parent of loop is nullptr!"; + } + } + } + } +} + +void update_env_proto_list_size(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_general_proto_list_size_method()) { + } +} + +// iter->second or it->second +void update_env_general_iter_second(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_iter_second()) { + if (auto& loop_info = env_ptr->mutable_loop_info()) { + loop_info->set_loop_iter(expr_info_ptr->get_first_decl_ref()); + } + } +} + +void update_env_general_basic_scalar_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 比较特殊,在 general_loop_var_method 中处理。 + if (expr_info_ptr->is_loop_var_size_method()) { + return; + } + + // 在 general_str_call 中单独处理 + if (expr_info_ptr->is_parent_str_ref() || expr_info_ptr->is_parent_str_type()) { + return; + } + + if (expr_info_ptr->is_general_proto_list_size_method()) { + return; + } + + if (expr_info_ptr->is_method_is_train()) { + return; + } + + if (expr_info_ptr->is_from_query_token() || expr_info_ptr->is_from_photo_text()) { + return; + } + + LOG(INFO) << "expr: " << expr_info_ptr->to_string() + << ", is_from_reco_user_info: " << expr_info_ptr->is_from_reco_user_info(); + + // 单值, 不包括 list size + if (expr_info_ptr->is_basic_scalar() && + expr_info_ptr->is_from_adlog() && + !expr_info_ptr->is_list_size_method() && + !expr_info_ptr->is_from_reco_user_info() && + !expr_info_ptr->is_from_seq_list() && + !expr_info_ptr->is_from_repeated_common_info()) { + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + Env* target_env = env_ptr; + // 比较特殊,可以定义在 root env。 + if (bs_enum_str == "adlog_time") { + target_env = env_ptr->get_mutable_root(); + } + + if (target_env == nullptr) { + LOG(INFO) << "get root env failed!"; + return; + } + // 单值, 如 int64_t user_id = adlog.user_info().id() + // + // 一个变量只应该被定义一次, 如果别别的变量引用, 则别的变量不替换, 例 + // int64_t user_id = adlog.user_info().id(); + // int64_t user_id_v1 = user_id; + // 被替换为 + // int64_t user_id = BSFieldHelper::GetSingular(*bs, BSFieldEnum::adlog_user_info_id); + // int64_t user_id_v1 = user_id; + if (!expr_info_ptr->is_decl_ref_expr()) { + if (env_ptr->is_new_var_not_exists(bs_enum_str)) { + if (const auto &decl_info = env_ptr->cur_decl_info()) { + if (!starts_with(decl_info->name(), "__") && + !starts_with(decl_info->name(), "* __")) { + if (expr_info_ptr->to_string() == + stmt_to_string(decl_info->init_expr())) { + LOG(INFO) + << "add basic sclar in decl, bs_enum_str: " << bs_enum_str + << ", var_name: " << decl_info->name() + << ", expr: " << expr_info_ptr->origin_expr_str() + << ", def: " + << expr_info_ptr->get_bs_scalar_def(decl_info->name()); + target_env->add_new_def_meta( + bs_enum_str, decl_info->name(), + expr_info_ptr->get_bs_scalar_def(decl_info->name()), + NewVarType::SCALAR); + target_env->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } else { + LOG(INFO) << "add basic sclar, bs_enum_str: " << bs_enum_str + << ", expr: " << expr_info_ptr->origin_expr_str() + << ", def: " << expr_info_ptr->get_bs_scalar_def(); + target_env->add_new_def_meta( + bs_enum_str, expr_info_ptr->get_bs_scalar_def(), + NewVarType::SCALAR); + target_env->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } + } + } else { + LOG(INFO) << "add basic sclar, bs_enum_str: " << bs_enum_str + << ", expr: " << expr_info_ptr->origin_expr_str() + << ", def: " << expr_info_ptr->get_bs_scalar_def() + << ", rewrite_reco_user_info: " << GlobalConfig::Instance()->rewrite_reco_user_info; + target_env->add_new_def_meta(bs_enum_str, + expr_info_ptr->get_bs_scalar_def(), + NewVarType::SCALAR); + target_env->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } + } + } + } +} + +void update_env_general_str_call(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_from_adlog() && + expr_info_ptr->is_cxx_member_call_expr() && + !expr_info_ptr->is_from_query_token() && + !expr_info_ptr->is_from_photo_text() && + !expr_info_ptr->is_from_middle_node() && + !expr_info_ptr->is_from_repeated_common_info() && + !expr_info_ptr->is_from_implicit_loop_var()) { + // string.data() + if (expr_info_ptr->is_parent_str_ref() || expr_info_ptr->is_parent_str_type()) { + if (auto expr_parent = expr_info_ptr->parent()) { + std::string expr_str = expr_parent->origin_expr_str(); + std::string bs_enum_str = expr_parent->get_bs_enum_str(); + if (tool::is_adlog_field(bs_enum_str)) { + if (env_ptr->is_new_var_not_exists(bs_enum_str)) { + if (env_ptr->is_loop_var(expr_str) || expr_parent->is_from_list()) { + // 如果存在会忽略 + LOG(INFO) << "add loop var str list def, bs_enum_str: " << bs_enum_str + << ", list def: " << expr_parent->get_bs_list_def(); + env_ptr->add_new_def_meta(bs_enum_str, expr_parent->get_bs_list_def(), NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_parent->get_adlog_field_str()); + } else { + LOG(INFO) << "add scalar var str def, bs_enum_str: " << bs_enum_str + << ", scalar def: " << expr_parent->get_bs_scalar_def(); + env_ptr->add_new_def_meta(bs_enum_str, expr_parent->get_bs_scalar_def(), NewVarType::SCALAR); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_parent->get_adlog_field_str()); + } + } + } + } + } + } +} + +void update_env_general_loop_var_method(ExprInfo *expr_info_ptr, Env *env_ptr) { + // xxx.size() or xxx.data() + if (expr_info_ptr->is_from_adlog() && + expr_info_ptr->is_parent_loop_var_ref() && + !expr_info_ptr->is_from_query_token() && + !expr_info_ptr->is_from_photo_text() && + expr_info_ptr->is_cxx_member_call_expr() && + (expr_info_ptr->callee_name() == "size" || expr_info_ptr->callee_name() == "data") && + !expr_info_ptr->is_from_repeated_common_info() && + !expr_info_ptr->is_from_implicit_loop_var()) { + // seg.size() + if (auto expr_parent = expr_info_ptr->parent()) { + std::string expr_str = expr_parent->origin_expr_str(); + std::string bs_enum_str = expr_parent->get_bs_enum_str(); + // 如果存在会忽略 + if (starts_with(bs_enum_str, "adlog") || starts_with(bs_enum_str, "ad_log")) { + if (auto feature_info = env_ptr->get_feature_info()) { + if (feature_info->is_in_bs_enum_var_type(bs_enum_str)) { + if (env_ptr->is_new_var_not_exists(bs_enum_str)) { + LOG(INFO) << "add loop var list def, bs_enum_str: " << bs_enum_str + << ", list def: " << expr_parent->get_bs_list_def(); + env_ptr->add_new_def_meta(bs_enum_str, + expr_parent->get_bs_list_def(), + NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_parent->get_adlog_field_str()); + } + } + } + } + } + } +} + +// 自定义的 adlog 变量, 需要在 Env 中添加定义。 +// 注意: 与 basic_scalar 有区别, 可能是来自 list 的变量,最终的取值是普通类型。 +// +// 此处只处理非 common info 的情况, 并且不包含模板参数。 +// +// 1. list +// expr 在循环中,并且其中包含循环变量,则必是来自 list。如 +// for(int i = 0; i < follow_num && i < max_follow_num; ++i){ +// const auto & follow_info = action_detail.follow(i); +// if(follow_info.id() == 90041) { +// continue; +// } +// +// 2. 单值 +// 如 adlog.user_info().id() +// +// 和上面的 scalar 逻辑有重复,需要整理下。is_basic 可能来自 list, is_basic_scalar 必须来自单值。 +// LOG(INFO) << "expr: " << expr_info_ptr->origin_expr_str() +// << ", is_from_adlog: " << expr_info_ptr->is_from_adlog() +// << ", is_basic: " << expr_info_ptr->is_basic() +// << ", is_from_middle_node: " << expr_info_ptr->is_from_middle_node() +// << ", is_from_action_detail: " << expr_info_ptr->is_from_action_detail_map() +// << ", is_decl_ref_expr: " << expr_info_ptr->is_decl_ref_expr() +// << ", contains_template_parameter(): " << expr_info_ptr->contains_template_parameter(); +void update_env_general_basic_expr(ExprInfo* expr_info_ptr, Env* env_ptr) { + LOG(INFO) << "expr: " << expr_info_ptr->origin_expr_str() + << ", is general adlog var: " << expr_info_ptr->is_general_adlog_var() + << ", is_from_adlog: " << expr_info_ptr->is_from_adlog() + << ", is_from_reco_user_info: " << expr_info_ptr->is_from_reco_user_info() + << ", is_from_implicit_loop_var: " << expr_info_ptr->is_from_implicit_loop_var() + << ", is_decl_ref_expr: " << expr_info_ptr->is_decl_ref_expr() + << ", contains_loop_var: " << expr_info_ptr->contains_loop_var() + << ", is_basic: " << expr_info_ptr->is_basic() + << ", is_basic_scalar: " << expr_info_ptr->is_basic_scalar() + << ", is_from_list: " << expr_info_ptr->is_from_list() + << ", is_from_map: " << expr_info_ptr->is_from_map() + << ", expr_info_ptr->is_from_repeated_common_info(): " << expr_info_ptr->is_from_repeated_common_info() + << ", is_cxx_operator_call_expr: " << expr_info_ptr->is_cxx_operator_call_expr() + << ", is_cxx_operator_call_expr_deref: " << expr_info_ptr->is_cxx_operator_call_expr_deref() + << ", is_loop_var_size_method: " << expr_info_ptr->is_loop_var_size_method() + << ", is_general_proto_list_size_method: " << expr_info_ptr->is_general_proto_list_size_method() + << ", is_from_query_token: " << expr_info_ptr->is_from_query_token() + << ", is_from_photo_text: " << expr_info_ptr->is_from_photo_text(); + if (expr_info_ptr->is_loop_var_size_method()) { + return; + } + + // 在 general_str_call 中单独处理 + if (expr_info_ptr->is_parent_str_ref() || expr_info_ptr->is_parent_str_type()) { + return; + } + + // 叶子节点 size 不需要添加定义。 + if (expr_info_ptr->is_general_proto_list_size_method()) { + return; + } + + // reco user info list size + if (expr_info_ptr->is_list_size_method()) { + return; + } + + // 在 QueryToken 中处理。 + if (expr_info_ptr->is_from_query_token() || expr_info_ptr->is_from_photo_text()) { + return; + } + + if (expr_info_ptr->is_general_adlog_var()) { + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (!env_ptr->is_in_for_range_init() && env_ptr->is_new_var_not_exists(bs_enum_str)) { + if (!expr_info_ptr->is_from_repeated_common_info() && + (expr_info_ptr->contains_loop_var() || expr_info_ptr->is_from_list())) { + // list + // 逻辑类似,可以合并 + if (const auto& loop_info = env_ptr->get_loop_info()) { + LOG(INFO) << "add list var def with meta, in loop body, bs_enum_str: " << bs_enum_str + << ", def: " << expr_info_ptr->get_bs_list_def() + << ", expr: " << expr_info_ptr->origin_expr_str(); + env_ptr->add_new_def_meta(bs_enum_str, expr_info_ptr->get_bs_list_def(), NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + if (auto& loop_info = env_ptr->mutable_loop_info()) { + std::string var_name = env_ptr->find_new_var_name(bs_enum_str); + if (var_name.size() > 0) { + loop_info->add_leaf_field(var_name); + } else { + LOG(ERROR) << "cannot find var_name, bs_enum_str: " << bs_enum_str + << ", expr: " << expr_info_ptr->to_string(); + } + } + } else if (expr_info_ptr->is_action_detail_leaf()) { + // 来自 action detail, 可能是按固定下标获取,没有 for 循环。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_live_user_impression_avegap_new.h + // const auto &ad_live_action_detail = adlog.user_info().ad_live_action_detail(); + // auto iter = ad_live_action_detail.find(no); + // if (iter == ad_live_action_detail.end()) { + // return; + // } + // const auto &live_infos = iter->second.list(); + // if (live_infos.size() < 2) { + // return; + // } + // if ((live_infos[0].action_time() < live_infos[live_infos.size() - 1].action_time())) { + // return; + // } + + LOG(INFO) << "add list var def with meta, bs_enum_str: " << bs_enum_str + << ", def: " << expr_info_ptr->get_bs_list_def() + << ", expr: " << expr_info_ptr->origin_expr_str(); + env_ptr->add_new_def_meta(bs_enum_str, expr_info_ptr->get_bs_list_def(), NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } else if (absl::optional int_param = expr_info_ptr->find_int_param()) { + LOG(INFO) << "add list var def with meta, has int_param, bs_enum_str: " << bs_enum_str + << ", def: " << expr_info_ptr->get_bs_list_def() + << ", expr: " << expr_info_ptr->origin_expr_str(); + env_ptr->add_new_def_meta(bs_enum_str, expr_info_ptr->get_bs_list_def(), NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } else { + LOG(INFO) << "cannot find loop parent, expr: " << stmt_to_string(expr_info_ptr->expr()); + } + } else if (!expr_info_ptr->is_decl_ref_expr() && expr_info_ptr->is_basic_scalar()) { + // 单值, 如 int64_t user_id = adlog.user_info().id() + // + // 一个变量只应该被定义一次, 如果别别的变量引用, 则别的变量不替换, 例 + // int64_t user_id = adlog.user_info().id(); + // int64_t user_id_v1 = user_id; + // 被替换为 + // int64_t user_id = BSFieldHelper::GetSingular(*bs, BSFieldEnum::adlog_user_info_id); + // int64_t user_id_v1 = user_id; + if (const auto &decl_info = env_ptr->cur_decl_info()) { + if (!starts_with(decl_info->name(), "__") && !starts_with(decl_info->name(), "* __")) { + if (expr_info_ptr->to_string() == stmt_to_string(decl_info->init_expr())) { + LOG(INFO) << "add scalar def from decl, bs_enum_str: " << bs_enum_str + << ", name: " << decl_info->name() + << ", scalar def: " << expr_info_ptr->get_bs_scalar_def(decl_info->name()); + env_ptr->add_new_def_meta(bs_enum_str, decl_info->name(), + expr_info_ptr->get_bs_scalar_def(decl_info->name()), + NewVarType::SCALAR); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } else { + LOG(INFO) << "add scalar def, bs_enum_str: " << bs_enum_str + << ", scalar def: " << expr_info_ptr->get_bs_scalar_def(); + env_ptr->add_new_def_meta(bs_enum_str, + expr_info_ptr->get_bs_scalar_def(), + NewVarType::SCALAR); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } + } + } else { + LOG(INFO) << "add scalar def, bs_enum_str: " << bs_enum_str + << ", def: " << expr_info_ptr->get_bs_scalar_def(); + env_ptr->add_new_def_meta(bs_enum_str, + expr_info_ptr->get_bs_scalar_def(), + NewVarType::SCALAR); + env_ptr->set_normal_adlog_field_info(bs_enum_str, expr_info_ptr->get_adlog_field_str()); + } + } + } + } +} + +void update_env_general_binary_op_info(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (auto& binary_op_info = env_ptr->cur_info(InfoTraits::v)) { + if (binary_op_info->is_assign_op()) { + // 当前 stmt 已经赋值,必须查看父级 Env + if (Env* parent = env_ptr->parent()) { + if (parent->is_template_int_ref(expr_info_ptr->origin_expr())) { + if (expr_info_ptr->origin_expr_str() == binary_op_info->left_expr_str()) { + binary_op_info->set_left_expr_type(ExprType::TEMPLATE_INT_REF); + } + } + } + + if (is_integer(expr_info_ptr->origin_expr_str())) { + if (expr_info_ptr->origin_expr_str() == binary_op_info->right_expr_str()) { + binary_op_info->set_right_expr_type(ExprType::INT); + } + } + } + } +} + +void update_env_general_decl_info(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + // 来自 adlog 的变量都需要删除, 在 Env 中重新添加定义。 + LOG(INFO) << "expr: " << stmt_to_string(expr_info_ptr->expr()) + << ", origin_expr: " << expr_info_ptr->raw_expr_str() + << ", is_from_adlog: " << expr_info_ptr->is_from_adlog() + << ", is_from_list: " << expr_info_ptr->is_from_list() + << ", is_reco_proto: " << expr_info_ptr->is_reco_proto_type() + << ", is_from_seq_list: " << expr_info_ptr->is_from_seq_list() + << ", is_from_seq_list_reco: " << expr_info_ptr->is_from_seq_list_reco() + << ", need_delete: " << Deleter::need_delete(expr_info_ptr, env_ptr); + if (Deleter::need_delete(expr_info_ptr, env_ptr)) { + env_ptr->add_deleted_var_by_expr_str(expr_info_ptr->raw_expr_str()); + } + } +} + +void update_env_general_get_norm_query(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 在 ConstructorInfo 中标记是否用到 GetNormQuery + if (expr_info_ptr->is_get_norm_query()) { + if (auto constructor_info = env_ptr->mutable_constructor_info()) { + constructor_info->set_has_get_norm_query(true); + } + } +} + +void update_env_general_loop_var_expr(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 添加 for range loop 变量到 loop_info + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (const auto& decl_info = env_ptr->cur_decl_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + if (tool::is_implicit_loop_var(decl_info->name())) { + loop_info->set_loop_var_expr(expr_info_ptr->get_loop_var_expr()); + loop_info->set_prefix_adlog(expr_info_ptr->get_adlog_field_str()); + LOG(INFO) << "set loop_var_expr: " << stmt_to_string(expr_info_ptr->get_loop_var_expr()) + << ", set prefix_adlog: " << expr_info_ptr->get_adlog_field_str(); + } + } + } + } +} + +// 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_match_dense_num.h, +// action_vec_ 是 std::vector 类型, 构造函数中会进行初始化。 +// +// const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); +// for (auto action_no : action_vec_) { +// auto action_no_iter = ad_action.find(action_no); +// ... +// } +// +// 先根据第一个 int 替换, 然后用字符串替换生成多个 lambda 函数。 +void update_env_general_int_list_member_loop(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_cxx_member_call_expr() && expr_info_ptr->callee_name() == "end") { + if (expr_info_ptr->is_from_implicit_loop_var() && expr_info_ptr->parent() != nullptr) { + LOG(INFO) << "expr: " << expr_info_ptr->origin_expr_str() + << ", is_int_list_member_ref: " + << expr_info_ptr->parent()->is_int_list_member_ref(); + if (expr_info_ptr->parent()->is_int_list_member_ref()) { + if (auto &loop_info = env_ptr->cur_mutable_loop_info()) { + if (!loop_info->is_for_stmt() && + loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_int_list_member_loop(true); + if (const auto feature_info = env_ptr->get_feature_info()) { + std::string loop_var_expr_str = tool::trim_this(loop_info->loop_var_expr_str()); + std::vector values = feature_info->get_int_list_member_values(loop_var_expr_str); + loop_info->set_int_list_member_values(values); + loop_info->set_int_list_index(0); + LOG(INFO) << "set_int_list_member_values: " << absl::StrJoin(values, ",") + << ", loop_var_expr_str: " << loop_var_expr_str + << ", int_list_index: " << 0; + } + } + } + } + } + } + + // 来自自定义变量的 int list + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_click_match_num_long.h + // std::vector urb_type_array = {1, 2, 3, 4, 5, 6, 8, 19}; + // for (int i = 0; i < urb_type_array.size(); i++) { + // int urb_type = urb_type_array[i]; + // const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); + // auto iter = ad_action.find(urb_type); + // if (iter == ad_action.end()) { + // ... + // } else { + // ... + // } + // } + if (expr_info_ptr->is_cxx_member_call_expr() && + expr_info_ptr->callee_name() == "size" && + expr_info_ptr->parent() != nullptr) { + if (expr_info_ptr->parent()->is_int_list_var_ref()) { + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->is_for_stmt() && loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_int_list_member_loop(true); + + std::string loop_var_expr_str = tool::trim_this(expr_info_ptr->parent()->origin_expr_str()); + clang::Expr* init_expr = env_ptr->find(loop_var_expr_str); + if (init_expr != nullptr) { + std::vector values = tool::get_int_list_values_from_init_str(stmt_to_string(init_expr)); + loop_info->set_int_list_member_values(values); + loop_info->set_int_list_index(0); + LOG(INFO) << "set_int_list_member_values: " << absl::StrJoin(values, ",") + << ", loop_var_expr_str: " << loop_var_expr_str << ", int_list_index: " << 0; + } + } + } + } + } +} + +// 检查 item_size, 但是所有逻辑都在 if body 里,并不是提前 return。在 if_info 中标记。 +// if (adlog.item_size() > pos) { +// ... +// } +// 或者 +// if (pos < adlog.item_size()) { +// ... +// } +// 或者 +// if (adlog.item_size() <= pos) { +// return; +// } +// 或者 +// if (pos >= adlog.item_size()) { +// return; +// } +void update_env_general_check_item_pos_include(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_pos_ref()) { + if (auto& if_info = env_ptr->mutable_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (expr_info_ptr->origin_expr_str() == binary_op_info->right_expr_str() && + (binary_op_info->is_greater_op() || binary_op_info->is_greater_equal_op()) && + binary_op_info->left_expr_str() == "adlog.item_size()") { + // if (adlog.item_size() >= pos) { ... } + if_info->set_is_check_item_pos_include_cond(true); + } else if (binary_op_info->left_expr_str() == "adlog.item_size()" && + (binary_op_info->is_less_equal_op() || binary_op_info->is_less_op())) { + // if (adlog.item_size() <= pos) { return; } + if_info->set_is_check_item_pos_include_cond(false); + } else if (binary_op_info->left_expr_str() == "pos" && + (binary_op_info->is_less_op() || binary_op_info->is_less_equal_op()) && + binary_op_info->right_expr_str() == "adlog.item_size()") { + // if (pos < adlog.item_size()) { ... } + if_info->set_is_check_item_pos_include_cond(true); + } else if (binary_op_info->left_expr_str() == "pos" && + (binary_op_info->is_greater_op() || binary_op_info->is_greater_equal_op()) && + binary_op_info->right_expr_str() == "adlog.item_size()") { + // if (pos >= adlog.item_size()) { return; } + if_info->set_is_check_item_pos_include_cond(false); + } + } + } + } + } +} + +void update_env_general_reco_user_info(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr == nullptr || env_ptr == nullptr) { + return; + } + + if (expr_info_ptr->is_from_adlog()) { + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (tool::is_str_from_reco_user_info(bs_enum_str)) { + if (env_ptr->is_in_loop_init()) { + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + loop_info->set_is_reco_user_info_loop(true); + } else { + loop_info->set_is_reco_user_info_loop(false); + } + } + } + } + } +} + +void update_env_general_proto_map_loop(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_var_proto_map() && + !expr_info_ptr->is_from_repeated_common_info() && + !expr_info_ptr->is_from_query_token() && + !expr_info_ptr->is_from_photo_text() && + !expr_info_ptr->is_from_middle_node()) { + if (!starts_with(expr_info_ptr->origin_expr_str(), "__") && + !starts_with(expr_info_ptr->origin_expr_str(), "*")) { + // 正常的变量,不是隐式变量 + if (auto &loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + loop_info->set_is_general_proto_map_loop(true); + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (auto parent = env_ptr->parent()) { + if (parent->is_new_var_not_exists(bs_enum_str)) { + LOG(INFO) << "add new var map, bs_enum_str: " << bs_enum_str + << ", expr: " << expr_info_ptr->origin_expr_str() + << ", map_def: " << expr_info_ptr->get_bs_map_def(); + parent->add_new_def(bs_enum_str, expr_info_ptr->get_bs_map_def(), + NewVarType::MAP); + parent->add_attr_meta(bs_enum_str + "_key"); + parent->add_attr_meta(bs_enum_str + "_value"); + } + } + } + } + } + } +} + +void update_env_general_proto_list_size_method(ExprInfo* expr_info_ptr, Env* env_ptr) { + // 叶子节点 proto list + // end 来自循环变量 + if (!expr_info_ptr->is_from_repeated_common_info()) { + if (expr_info_ptr->callee_name() == "size" || expr_info_ptr->is_from_implicit_loop_var()) { + if (auto parent = expr_info_ptr->parent().get()) { + if (parent->is_repeated_proto_list_leaf_type()) { + std::string bs_enum_str = parent->get_bs_enum_str(); + if (bs_enum_str.size() > 0 && tool::is_adlog_field(bs_enum_str)) { + LOG(INFO) << "add proto list leaf def, bs_enum_str: " << bs_enum_str + << ", list def: " << parent->get_bs_list_def(); + env_ptr->add_new_def_meta(bs_enum_str, parent->get_bs_list_def(), NewVarType::LIST); + env_ptr->set_normal_adlog_field_info(bs_enum_str, parent->get_adlog_field_str()); + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/ExprParserDetail.h b/convert/ExprParserDetail.h new file mode 100644 index 0000000..1093829 --- /dev/null +++ b/convert/ExprParserDetail.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include + +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" + +#include "Env.h" +#include "Tool.h" +#include "ExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// =============== 详细逻辑 =============== + +void update_env_common_info_prepare(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_multi_map(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_multi_int_list(ExprInfo *expr_info_ptr, Env *env_ptr); + +/// is_confirmed 为 false 的时候更新, 一旦 is_confirmed 为 true 则不更新。 +void update_env_common_info_prefix(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_name_value_alias(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_method_name(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_int_value(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_normal_detail_with_value(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_detail_without_value(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_name_value(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_normal_check_cond_template_int(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_int_value_in_if(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_int_value_in_switch(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_normal_enum(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_detail_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_size_method(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_list_method(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_normal_list_method_address(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// list_size_method 判断长度不等于, 如 +/// if (attr.float_list_value_size() != 32) { ... } +/// if (attr.float_list_value_size() % 3 != 0) { ... } +void update_env_common_info_normal_list_size_method_not_equal(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 自定义 helper method, 替换其中的参数 +void update_env_common_info_normal_helper_method(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// Extract 函数中遇到 helper 函数时添加定义 +void update_env_common_info_normal_helper_def(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 比较 common info map end +void update_env_common_info_normal_map_end_cond(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 可能会有 int64 转 int 的情况。 +void update_env_common_info_normal_list_loop_var_type(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_fixed_list_touch(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_enum_member_ref(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_size_method(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_leaf_method(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_list_method(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_list_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_map_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_scalar_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_fixed_list_list_method_address(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_multi_map_touch(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_multi_map_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_common_info_multi_int_list_add_map(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_common_info_multi_int_list_def(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// action_detail 相关逻辑 +void update_env_action_detail_prefix(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_action_detail_new_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_action_detail_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_action_detail_action_param_def(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// action_detail_fixed 相关逻辑 +void update_env_action_detail_fixed_touch(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_action_detail_fixed_var_def(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 中间节点相关逻辑 +void update_env_middle_node_root(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_middle_node_leaf_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_middle_node_check_cond(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 两层 proto 列表相关的信息。 +/// +/// GetSeqList 相关逻辑 +void update_env_get_seq_list_touch(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_get_seq_list_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_get_seq_list_if_cond(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_get_seq_list_loop(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// ProtoList 相关逻辑。中间的 proto list。 +void update_env_proto_list_leaf(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 更新 proto list size, 可能不在 for 循环中。 +/// 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_like_author_id.h +/// const auto & action_detail = adlog.user_info().action_detail(); +/// int like_num = action_detail.like().size(); +/// for (int i = 0; i < like_num && i < max_like_num; ++i) { +/// ... +/// } +void update_env_proto_list_size(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// QueryToken 相关逻辑。 +void update_env_query_token_loop(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// 普通逻辑 +void update_env_general_iter_second(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_basic_scalar_def(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_str_call(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_loop_var_method(ExprInfo* epxr_info_ptr, Env* env_ptr); +void update_env_general_basic_expr(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_binary_op_info(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_decl_info(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_get_norm_query(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_loop_var_expr(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_int_list_member_loop(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_check_item_pos_include(ExprInfo* expr_info_ptr, Env* env_ptr); +void update_env_general_reco_user_info(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// proto map 叶子节点 +void update_env_general_proto_map_loop(ExprInfo* expr_info_ptr, Env* env_ptr); + +/// proto list size method, 添加定义,可能是次变量最早出现的地方。 +void update_env_general_proto_list_size_method(ExprInfo* expr_info_ptr, Env* env_ptr); + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/LogicParser.cpp b/convert/LogicParser.cpp new file mode 100644 index 0000000..6a54437 --- /dev/null +++ b/convert/LogicParser.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/AST.h" +#include "clang/Lex/Lexer.h" +#include "clang/AST/ASTConsumer.h" + +#include "LogicParser.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using clang::TK_IgnoreUnlessSpelledInSource; +using clang::ast_matchers::decl; +using clang::ast_matchers::namedDecl; +using clang::ast_matchers::matchesName; +using clang::ast_matchers::typeAliasDecl; +using clang::ast_matchers::traverse; +using clang::ast_matchers::cxxRecordDecl; +using clang::ast_matchers::isDerivedFrom; +using clang::ast_matchers::unless; +using clang::ast_matchers::hasName; +using clang::ast_matchers::isExpandedFromMacro; +using clang::ast_matchers::isDerivedFrom; + +LogicConsumer::LogicConsumer(clang::Rewriter &R): + bs_feature_decl_callback_(R) { + // 目前只能匹配到 typeAliasDecl(), 可能会有更好的匹配。 + auto BSTypeAliasMatcher = decl(typeAliasDecl(), + namedDecl(matchesName("BSExtract.*"))).bind("BSTypeAlias"); + bs_type_alias_finder_.addMatcher(BSTypeAliasMatcher, &bs_type_alias_callback_); + + // TK_IgnoreUnlessSpelledInSource 用来忽略模板 + auto BSFeatureDeclMatcher = traverse(TK_IgnoreUnlessSpelledInSource, + cxxRecordDecl(isDerivedFrom(hasName("BSFastFeature")), + unless(isExpandedFromMacro("DISALLOW_COPY_AND_ASSIGN")), + unless(isExpandedFromMacro("REGISTER_BS_EXTRACTOR")))).bind("BSFeatureDecl"); // NOLINT + + bs_match_finder_.addMatcher(BSFeatureDeclMatcher, &bs_feature_decl_callback_); +} + +void LogicConsumer::HandleTranslationUnit(clang::ASTContext &Context) { + // 解析模板参数, 这一步必须在解析 Extract 之前, 因为这些参数会被用到。 + bs_type_alias_finder_.matchAST(Context); + + // 解析 BSExtract 逻辑。 + bs_match_finder_.matchAST(Context); +} + +void LogicParser::EndSourceFileAction() { + auto config = GlobalConfig::Instance(); + { + std::lock_guard lock(config->mu); + + std::string cmd_format("clang-format " + "--style=\"{BasedOnStyle: Google, ColumnLimit: 110, IndentCaseLabels: true}\" -i "); // NOLINT + + for (auto it = config->feature_info.begin(); it != config->feature_info.end(); it++) { + const std::string& bs_extractor_name = it->first; + const FeatureInfo& feature_info = it->second; + + const std::string& origin_file = feature_info.origin_file(); + if (origin_file.size() == 0) { + LOG(INFO) << "origin_file is empty! feature_name: " << bs_extractor_name; + continue; + } + + if (rewriter_.overwriteChangedFiles()) { + LOG(INFO) << "rewrite to file: " << origin_file; + } + + std::system((cmd_format + origin_file).c_str()); + if (const auto& cc_filename = it->second.cc_filename()) { + std::system((cmd_format + cc_filename.value()).c_str()); + } + } + } +} + +std::unique_ptr LogicParser::CreateASTConsumer(clang::CompilerInstance &CI, // NOLINT + llvm::StringRef file) { + rewriter_.setSourceMgr(CI.getSourceManager(), CI.getLangOpts()); + + return std::make_unique(rewriter_); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/LogicParser.h b/convert/LogicParser.h new file mode 100644 index 0000000..dc82340 --- /dev/null +++ b/convert/LogicParser.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Tooling.h" + +#include "matcher_callback/BSTypeAliasCallback.h" +#include "matcher_callback/BSFeatureDeclCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 解析 BS 特征逻辑。 +class LogicConsumer : public clang::ASTConsumer { + public: + explicit LogicConsumer(clang::Rewriter &R); // NOLINT + void HandleTranslationUnit(clang::ASTContext &Context) override; // NOLINT + + public: + clang::ast_matchers::MatchFinder bs_match_finder_; + clang::ast_matchers::MatchFinder bs_type_alias_finder_; + + BSFeatureDeclCallback bs_feature_decl_callback_; + BSTypeAliasCallback bs_type_alias_callback_; +}; + +class LogicParser : public clang::ASTFrontendAction { + public: + void EndSourceFileAction() override; + + std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, // NOLINT + llvm::StringRef file) override; + private: + clang::Rewriter rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/README.md b/convert/README.md new file mode 100644 index 0000000..c2a3d65 --- /dev/null +++ b/convert/README.md @@ -0,0 +1,67 @@ +# 从 adlog 特征类自动转 BS 特征类 + +## 转换逻辑 + +转换的时候主要需要考虑以下逻辑怎么转换 + +1. if +2. for +3. common info attr +4. 中间节点调用,如 PhotoInfo +5. 参数中有 proto 字段的自定义函数 + +转换时候只需要考虑 bs 字段相关的代码,转换时候有些信息是从子节点拿到的,因此需要有 env 来保存相关的信息, +只有 if 和 for 有 env。 + +## 表达式 + +每个 adlog 字段的相关调用都是一个表达式,经过解析后包含转换所需要的信息,比较重要的信息包括 bs_expr、 +env 、common info attr 枚举、for 循环变量等。 + +## 实现 + +最终转换的时候,需要根据表达式类型进行处理,对应以下逻辑: + +1. CXXMemberCallExpr: 如果是以 adlog 开头的表达式,则替换成 bs 的表达式,会有一些比较复杂的情况,如 + common_info 的 list、 map, action_detail 的 list ,photo info 等中间节点对应的字段。 +2. IfStmt: 需要区分是否是 common info ,中间定义的临时变量需要删除。 +3. for 循环: 需要记录循环变量,区分是否是 common info。对于 map、list 等类型,需要在循环外面定义新变量。 +4. operator[]: 如果是 adlog 字段,则替换成 key_xxx 对应的 bs 的值。 + +转换时候的全部信息都可以分为两类,一类是环境信息,一类是 expr 包含的信息,分别用 Env 和 ExprInfo 代替。转换时候 +首先从 ExprInfo 获取信息,如果不够则从 Env 获取。需要的信息必须要在转换之前添加到 Env 或者 ExprInfo 中, 通过 +env.update 或者 parse_expr 实现。 + +Env 保存了当前 scope 中能访问到的各种变量,以及额外的信息,如 common info enum、action_detail no 等, 只有 +三种情况会创建新的 Env: + +1. 每个提特征类开始时候需要创建 Env。 +2. 每个 IfStmt 会创建 Env。 +3. 每个 for 循环会创建 Env。 + +访问 Stmt 时候会先添加一些比较简单的变量信息到 Env 中,在解析 Expr 时候会添加其他复杂信息到 Env,如 common +info enum 等。 + +### ast 访问 + +每个 ast 节点访问按如下主要逻辑: + +1. update Env。 +2. 递归访问 visit 子节点。 +3. parse expr, handler 处理, 保证每个节点只会被访问一次。 + +## TODO + +详细规则见: `docs/rule.md` 。 +此列表不再更新。 + +- [x] 已存在变量直接用现有变量名, 不创建新变量名。 +- [x] 构造函数中出现的枚举要加到 attr_metas 里, 并且按 if 分支加。 +- [] 去掉不用的头文件。 +- [x] bs_util.HasXXX 添加 attr_metas 。 +- [x] common info 通过 int 来判断, int 是模板参数传进来的。 +- [x] 来自中间节点的 common info, 如 live info common info。 +- [x] common info case switch。 +- [x] action info list, 自定义函数 add_feature, 见 ExtractUserAdItemClickNoInvoke。 +- [x] GetCommonInfoAttrNew。 +- [x] 多个 action, 用数组保存起来。 diff --git a/convert/Tool.cpp b/convert/Tool.cpp new file mode 100644 index 0000000..80eec8e --- /dev/null +++ b/convert/Tool.cpp @@ -0,0 +1,1697 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/ExprCXX.h" + +#include "Env.h" +#include "Tool.h" +#include "info/CommonInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +clang::SourceRange find_source_range(clang::Stmt* stmt) { + if (stmt == nullptr) { + LOG(INFO) << "stmt is nullptr"; + return clang::SourceRange(); + } + + if (clang::ReturnStmt* returnStmt = dyn_cast(stmt)) { + clang::Expr* retValue = returnStmt->getRetValue(); + if (retValue != nullptr) { + return clang::SourceRange(returnStmt->getBeginLoc(), retValue->getEndLoc()); + } else { + return clang::SourceRange(returnStmt->getBeginLoc(), returnStmt->getBeginLoc().getLocWithOffset(6)); + } + } + + return clang::SourceRange(stmt->getBeginLoc(), stmt->getEndLoc()); +} + +std::string stmt_to_string(clang::Stmt* stmt, unsigned suppressTagKeyword) { + if (stmt == nullptr) { + return ""; + } + + if (clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr = + dyn_cast(stmt)) { + return "nullptr"; + } + + clang::LangOptions lo; + std::string out_str; + llvm::raw_string_ostream outstream(out_str); + clang::PrintingPolicy printPolicy(lo); + printPolicy.SuppressTagKeyword = suppressTagKeyword; + stmt->printPretty(outstream, nullptr, printPolicy); + return out_str; +} + +clang::Expr* get_first_caller(clang::Expr* initExpr, const Env* env_ptr) { + if (initExpr == nullptr) { + LOG(INFO) << "initExpr is null"; + return nullptr; + } + + if (clang::ImplicitCastExpr* castExpr = dyn_cast(initExpr)) { + return get_first_caller(castExpr->getSubExpr(), env_ptr); + + } else if (clang::MaterializeTemporaryExpr* materialExpr = + dyn_cast(initExpr)) { + return get_first_caller(materialExpr->getSubExpr(), env_ptr); + + } else if (clang::ParenExpr* parenExpr = dyn_cast(initExpr)) { + return get_first_caller(parenExpr->getSubExpr(), env_ptr); + + } else if (clang::UnaryOperator* unary_operator = dyn_cast(initExpr)) { + return get_first_caller(unary_operator->getSubExpr(), env_ptr); + + } else if (clang::DeclRefExpr* declRef = dyn_cast(initExpr)) { + if (env_ptr == nullptr) { + return declRef; + } else { + clang::Expr* env_value = env_ptr->find(stmt_to_string(declRef)); + if (env_value != nullptr) { + return get_first_caller(env_value, env_ptr); + } + return declRef; + } + + } else if (clang::CXXMemberCallExpr* memberCallExpr = dyn_cast(initExpr)) { + clang::Expr* caller = memberCallExpr->getImplicitObjectArgument(); + return get_first_caller(caller, env_ptr); + + } else if (clang::MemberExpr* memberExpr = dyn_cast(initExpr)) { + clang::Expr* caller = memberExpr->getBase(); + return get_first_caller(caller, env_ptr); + + } else if (clang::CXXOperatorCallExpr* operatorCallExpr = dyn_cast(initExpr)) { + clang::Expr* last_expr = nullptr; + for (auto it = operatorCallExpr->child_begin(); it != operatorCallExpr->child_end(); it++) { + if (clang::ImplicitCastExpr* tmpExpr = dyn_cast(*it)) { + last_expr = tmpExpr; + } + } + return get_first_caller(last_expr, env_ptr); + + } else { + return initExpr; + } +} + +const clang::TemplateArgumentList* get_proto_map_args_list(clang::QualType qualType) { + const clang::Type* type = qualType.getTypePtr(); + if (type == nullptr) { + return nullptr; + } + + const clang::CXXRecordDecl* typeDecl = nullptr; + if (type->isRecordType()) { + typeDecl = type->getAsCXXRecordDecl(); + } else if (type->isReferenceType()) { + typeDecl = type->getPointeeCXXRecordDecl(); + } else { + return nullptr; + } + + if (const clang::ClassTemplateSpecializationDecl* templateDecl = + dyn_cast(typeDecl)) { + const clang::TemplateArgumentList& paramList = templateDecl->getTemplateArgs(); + return ¶mList; + } + + return nullptr; +} + +bool is_map_value_builtin(clang::QualType qualType) { + const clang::Type* type = qualType.getTypePtr(); + if (type == nullptr) { + return false; + } + + const clang::TemplateArgumentList* paramList = get_proto_map_args_list(qualType); + if (paramList != nullptr && paramList->size() > 0) { + const clang::Type* valueType = paramList->get(paramList->size() - 1).getAsType().getTypePtr(); + if (valueType->isBuiltinType()) { + return true; + } else { + const clang::CXXRecordDecl* typeDecl = nullptr; + if (valueType->isRecordType()) { + typeDecl = valueType->getAsCXXRecordDecl(); + } else if (valueType->isReferenceType()) { + typeDecl = valueType->getPointeeCXXRecordDecl(); + } else { + return false; + } + + if (typeDecl->getNameAsString() == "std::string") { + return true; + } else { + return false; + } + } + } + return false; +} + +bool is_check_item_pos(clang::IfStmt* ifStmt) { + clang::Expr* condExpr = ifStmt->getCond(); + condExpr = condExpr->IgnoreImpCasts(); + if (clang::BinaryOperator* binaryOperator = dyn_cast(condExpr)) { + std::string op = binaryOperator->getOpcodeStr().str(); + std::string lhs = stmt_to_string(binaryOperator->getLHS()); + std::string rhs = stmt_to_string(binaryOperator->getRHS()); + if (lhs == "adlog.item_size()" && rhs == "pos") { + return true; + }else if (lhs == "pos" && rhs == "adlog.item_size()") { + return true; + } else { + return false; + } + } else { + return false; + } +} + +bool is_prefix(const std::string& s) { + if (s.substr(0, 1) == "$" && s.find("$FeaturePrefix::") != std::string::npos) { + return true; + } + return false; +} + +std::map> collect_prefix(json d) { + std::map> res; + for (auto it = d.begin(); it != d.end(); it++) { + res[it.key()] = std::set(); + collect_prefix_recursive(it.value(), &(res[it.key()])); + } + + return res; +} + +void collect_prefix_recursive(json d, std::set* str_set) { + if (d.is_string()) { + std::string x = d.get(); + if (x.substr(0, 1) == "$" && x.find("$FeaturePrefix::") != std::string::npos) { + str_set->insert(x); + } + } else if (d.is_array() && d.size() > 0) { + if (d[0].is_string()) { + std::string name = d[0].get(); + for (size_t i = 1; i < d.size(); i++) { + collect_prefix_recursive(d[i], str_set); + } + } else { + for (size_t i = 0; i < d.size(); i++) { + collect_prefix_recursive(d[i], str_set); + } + } + } else if (d.is_object()) { + for (auto it = d.begin(); it != d.end(); it++) { + collect_prefix_recursive(it.value(), str_set); + } + } +} + +json split_feature_def(json d, + const std::map>& prefixs, + const std::map& enum_map) { + json res = json::object(); + for (auto it = d.begin(); it != d.end(); it++) { + std::string name = it.key(); + res[name] = json::object(); + res[name][name + ".same"] = replace_enum(it.value(), enum_map); + + const std::set& prefix_set = prefixs.at(name); + + for (const auto& prefix: prefix_set) { + std::string prefix_name = prefix.substr(prefix.rfind(":") + 1); + if (prefix == "$FeaturePrefix::PHOTO_ACCOUNT_ID") { + for (auto it_enum = enum_map.begin(); it_enum != enum_map.end(); it_enum++) { + LOG(INFO) << "name: " << it_enum->first << ", v: " << it_enum->second; + } + } + res[name][name + "." + prefix_name] = + split_feature_def_recursive(it.value(), prefix, enum_map); + } + } + + return res; +} + +json split_feature_def_recursive(const json& d, + std::string prefix, + const std::map& enum_map) { + json res; + if (d.is_array() && d.size() > 0) { + res = json::array(); + for (size_t i = 0; i < d.size(); i++) { + bool is_rm = false; + if (d[i].is_array()) { + for (size_t j = 0; j < d[i].size(); j++) { + if (d[i][j].is_string() && is_prefix(d[i][j].get())) { + if (d[i][j].get() != prefix) { + is_rm = true; + break; + } + } + } + } + + if (!is_rm) { + json x = split_feature_def_recursive(d[i], prefix, enum_map); + if (x.is_array()) { + for (size_t j = 0; j < x.size(); j++) { + if (x[j].is_string()) { + std::string word = x[j].get(); + if (is_prefix(word) && word == prefix) { + if (enum_map.find(prefix) == enum_map.end()) { + LOG(INFO) << "cannot find enum for prefix: " << prefix; + } + x[j] = enum_map.at(prefix); + } else if (enum_map.find(word) != enum_map.end()) { + x[j] = enum_map.at(word); + } + } + } + } + if (!x.is_array() || x.size() > 0) { + res.push_back(x); + } + } + } + } else if (d.is_object()) { + res = json::object(); + for (auto it = d.begin(); it != d.end(); it++) { + json x = split_feature_def_recursive(it.value(), prefix, enum_map); + if (!x.is_array() || x.size() > 0) { + res[it.key()] = x; + } + } + } else { + res = json::parse(d.dump()); + } + + return res; +} + +json replace_enum(const json& d, const std::map& enum_map) { + json res; + if (d.is_array()) { + res = json::array(); + for (size_t i = 0; i < d.size(); i++) { + res.push_back(replace_enum(d[i], enum_map)); + } + } else if (d.is_object()) { + res = json::object(); + for (auto it = d.begin(); it != d.end(); it++) { + res[it.key()] = replace_enum(it.value(), enum_map); + } + } else if (d.is_string()) { + std::string word = d.get(); + if (enum_map.find(word) != enum_map.end()) { + res = enum_map.at(word); + } else { + res = word; + } + } else { + res = json::parse(d.dump()); + } + + return res; +} + +bool is_basic_type(clang::QualType qual_type) { + if (tool::is_ad_enum(qual_type)) { + return true; + } + + std::string type_name = qual_type.getAsString(); + if (type_name.find("algorithm") != std::string::npos) { + return false; + } + + if (type_name.find("auto_cpp_rewriter") != std::string::npos) { + return false; + } + + if (type_name.find("const") != std::string::npos && + type_name.find("*") != std::string::npos) { + return false; + } + + return true; +} + +bool is_integer(const std::string & s){ + static const std::regex p(" ?[\\-\\+]?[0-9]+ ?"); + return std::regex_match(s, p); +} + +bool starts_with(const std::string& s, const std::string& x) { + if (s.size() < x.size()) { + return false; + } + + return s.substr(0, x.size()) == x; +} + +bool ends_with(const std::string& s, const std::string& x) { + if (s.size() < x.size()) { + return false; + } + + return s.substr(s.size() - x.size()) == x; +} + +std::string lower(const std::string& s) { + std::string s1 = s; + std::transform(s1.begin(), s1.end(), s1.begin(), tolower); + return s1; +} + +absl::optional find_common_attr_int_value(clang::Expr* expr) { + if (expr != nullptr && tool::is_pointer(expr->getType())) { + return absl::nullopt; + } + + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr)) { + return find_common_attr_int_value_from_expr(decl_ref_expr); + } + + if (tool::is_enum_decl(expr)) { + return find_common_attr_int_value_from_expr(expr); + } + + return absl::nullopt; +} + +absl::optional find_common_attr_int_value_from_expr(clang::Expr* expr) { + absl::optional enum_name = find_common_attr_enum_name_from_expr(expr); + if (!enum_name) { + LOG(INFO) << "cannot find enum_name from expr: " << stmt_to_string(expr); + return absl::nullopt; + } + + const clang::Type* type = expr->getType().getTypePtr(); + const clang::EnumDecl* enumDecl = type->getAs()->getDecl(); + if (enumDecl != nullptr) { + for (auto it = enumDecl->enumerator_begin(); it != enumDecl->enumerator_end(); it++) { + std::string itName = it->getNameAsString(); + auto pos = itName.find(*enum_name); + if (pos != std::string::npos) { + if (itName.substr(pos) == *enum_name) { + int enum_value = std::stoi(stmt_to_string(it->getInitExpr())); + LOG(INFO) << "find enum, name: " << it->getNameAsString() + << ", value: " << stmt_to_string(it->getInitExpr()); + return absl::optional(enum_value); + } + } + } + } + + return absl::nullopt; +} + +absl::optional find_common_attr_enum_name_from_expr(clang::Expr* expr) { + std::vector arr = absl::StrSplit(stmt_to_string(expr), "::"); + if (arr.size() == 0) { + return absl::nullopt; + } + + static std::regex p("([A-Z0-9][A-Z0-9_]+)", std::regex::extended); + std::smatch match_res; + std::string enum_name; + if (std::regex_search(arr.back(), match_res, p)) { + if (match_res.size() > 0) { + enum_name = match_res[match_res.size() - 1]; + } + } + + if (enum_name.size() == 0) { + LOG(INFO) << "cannot find enum_name from expr: " << stmt_to_string(expr); + return absl::nullopt; + } + + return absl::make_optional(enum_name); +} + +bool ends_with_ignore_space(const std::string& s, char c) { + int n = s.size(); + for (int i = n - 1; i >= 0; i--) { + if (std::isspace(s[i])) { + continue; + } + if (s[i] == c) { + return true; + } else { + return false; + } + } + + return false; +} + +bool is_only_space(const std::string& s) { + for (size_t i = 0; i < s.size(); i++) { + if (!std::isspace(s[i])) { + return false; + } + } + + return true; +} + +bool is_only_semicolon(const std::string& s) { + for (size_t i = 0; i < s.size(); i++) { + if (std::isspace(s[i]) || s[i] == ';') { + continue; + } + return false; + } + + return true; +} + +std::string fix_semicolon(const std::string& s) { + if (is_only_space(s)) { + return ""; + } + + if (ends_with_ignore_space(s, ';')) { + if (is_only_semicolon(s)) { + return ""; + } else { + return s; + } + } else { + if (ends_with_ignore_space(s, '}')) { + return s; + } else { + return s + ";"; + } + } +} + +namespace tool { + +bool is_middle_node_expr(clang::Expr* expr, const Env& env) { + clang::Expr* first_caller = get_first_caller(expr, &env); + std::string expr_str = stmt_to_string(first_caller); + return starts_with(expr_str, "GetPhotoInfo") || starts_with(expr_str, "GetAuthorInfo"); +} + +bool is_middle_node_root(const std::string& s) { + static std::unordered_set middle_names = { + "GetPhotoInfo", + "GetAuthorInfo", + "GetLiveInfo", + "GetCommonInfoAttrNew" + }; + + std::string s1 = trim_this(s); + for (auto it = middle_names.begin(); it != middle_names.end(); it++) { + if (starts_with(s1, *it)) { + return true; + } + } + + return false; +} + +// live_info, 或者 GetLiveInfo(adlog.item(pos)) +bool is_middle_node_expr_root(clang::Expr* expr, const Env& env) { + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(expr)) { + clang::Expr* value_expr = env.find(stmt_to_string(decl_ref_expr)); + if (value_expr != nullptr) { + if (is_middle_node_root(stmt_to_string(value_expr))) { + return true; + } + } + } + + return is_middle_node_root(stmt_to_string(expr)); +} + +bool is_common_info_vector(clang::QualType qual_type) { + bool is_vector = qual_type.getAsString().find("std::vector") != std::string::npos; + return is_common_info_enum(qual_type) && is_vector; +} + +bool is_common_info_enum(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + if (type_str.find("auto_cpp_rewriter::") != std::string::npos && + type_str.find("CommonInfo") != std::string::npos && + type_str.find("Attr") != std::string::npos) { + return true; + } + + if (type_str.find("auto_cpp_rewriter::") != std::string::npos && + type_str.find("InfoCommon") != std::string::npos && + type_str.find("Attr") != std::string::npos) { + return true; + } + + return false; +} + +bool is_common_info_struct(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + if (type_str.find("auto_cpp_rewriter::") != std::string::npos && + type_str.find("CommonInfo") != std::string::npos && + type_str.find("Attr") != std::string::npos) { + return true; + } + + if (type_str.find("auto_cpp_rewriter::") != std::string::npos && + type_str.find("InfoCommon") != std::string::npos && + type_str.find("Attr") != std::string::npos) { + return true; + } + + return false; +} + +bool is_repeated_common_info(clang::QualType qualType) { + std::string type_str = qualType.getAsString(); + bool is_repeated = (type_str.find("::RepeatedPtr") != std::string::npos); + is_repeated &= (type_str.find("google::protobuf") != std::string::npos); + return is_repeated && is_common_info_enum(qualType); +} + +bool is_repeated_common_info_size(const std::string& method_name) { + return CommonAttrInfo::is_repeated_common_info_size(method_name); +} + +bool is_combine_feature(const std::string& feature_type) { + return lower(feature_type).find("combine") != std::string::npos; +} + +bool is_user_feature(const std::string& feature_type) { + return lower(feature_type).find("user") != std::string::npos; +} + +bool is_item_feature(const std::string& feature_type) { + return lower(feature_type).find("photo") != std::string::npos || + lower(feature_type).find("item") != std::string::npos; +} + +bool is_sparse_feature(const std::string& feature_type) { + return !is_dense_feature(feature_type); +} + +bool is_dense_feature(const std::string& feature_type) { + return lower(feature_type).find("dense") != std::string::npos; +} + +bool is_item_field(const std::string& s) { + return s.size() > 10 && s.substr(0, 10) == "adlog_item"; +} + +bool is_adlog_user_field(const std::string& s) { + return s.size() > 10 && starts_with(s, "adlog_user"); +} + +bool is_adlog_field(const std::string& s) { + if (s.size() > 5 && starts_with(s, "adlog")) { + return true; + } + if (s.size() > 6 && starts_with(s, "ad_log")) { + return true; + } + + return false; +} + +bool is_reco_user_field(const std::string& s) { + return s.size() >= 15 && starts_with(s, "adlog_reco_user"); +} + +bool is_item_type_enum(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + LOG(INFO) << "type_str: " << type_str; + if (type_str.find("algorithm::ItemType") != std::string::npos) { + return true; + } + + return false; +} + +bool is_ad_callback_log_enum(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + if (type_str.find("auto_cpp_rewriter::AdCallbackLog") != std::string::npos) { + return true; + } + + return false; +} + +bool is_ad_action_type_enum(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + if (type_str.find("auto_cpp_rewriter::AdActionType") != std::string::npos) { + return true; + } + + return false; +} + +bool is_ad_enum(clang::QualType qual_type) { + const clang::Type* type_ptr = qual_type.getTypePtr(); + + std::string type_str = qual_type.getAsString(); + bool is_from_ad_proto = type_str.find("auto_cpp_rewriter::") != std::string::npos; + + return is_from_ad_proto && type_ptr->isEnumeralType(); +} + +bool is_basic_array(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(int|int64_t|uint64_t|float|double) \\[\\d+\\]"); + std::smatch sm; + return std::regex_match(type_str, sm, p); +} + +bool is_builtin_simple_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(int|int64_t|uint32|uint64_t|float|double)"); + std::smatch sm; + return std::regex_match(type_str, sm, p); +} + +std::string get_bs_scalar_exists_expr(Env* env_ptr, + const std::string& common_info_prefix, + const std::string& value_type) { + std::ostringstream oss; + + oss << "BSFieldHelper::HasSingular<" << value_type; + if (env_ptr->is_combine_feature() && !is_item_field(common_info_prefix)) { + oss << ", true"; + } + oss << ">(*bs, BSFieldEnum::" << common_info_prefix << ", pos)"; + + return oss.str(); +} + +bool is_int32_type(const std::string &type_str) { + if (type_str == "int" || + type_str == "const int" || + type_str == "int32_t" || + type_str == "const int32_t") { + return true; + } + + return false; +} + +std::string get_builtin_type_str(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + static std::regex p(" ?\\&$"); + type_str = std::regex_replace(type_str, p, ""); + + if (is_int32_type(type_str)) { + return "int32_t"; + } + + const clang::Type* type_ptr = qual_type.getTypePtr(); + if (type_ptr == nullptr) { + return ""; + } + + if (type_ptr->isBooleanType()) { + return "bool"; + } else if (type_ptr->isIntegerType()) { + if (is_int32_type(type_str)) { + return "int32_t"; + } else if (type_str.find("uint64") != std::string::npos) { + return "uint64_t"; + } else { + return "int64_t"; + } + } else if (type_ptr->isFloatingType()) { + if (type_str.find("double") != std::string::npos) { + return "double"; + } else { + return "float"; + } + } else { + return "absl::string_view"; + } +} + +std::string get_exists_name(const std::string& name) { + return name + "_exists"; +} + +std::string fix_std_string(const std::string& s) { + if (s.find("include<") != std::string::npos || + s.find("include <") != std::string::npos || + s.find("std::string") != std::string::npos) { + return s; + } + + static std::regex p_string("([ <]?)string([ >,])"); + return std::regex_replace(s, p_string, "$1std::string$2"); +} + +std::string fix_string_view(const std::string& s) { + static std::regex p_string("std::string"); + return std::regex_replace(s, p_string, "absl::string_view"); +} + +std::string get_bs_correspond_path(const std::string& filename) { + static std::regex p(".*teams/ad/ad_algorithm/feature/fast/impl/"); + return std::regex_replace(filename, p, "teams/ad/ad_algorithm/bs_feature/fast/impl/bs_"); +} + +std::string read_file_to_string(const std::string& filename) { + std::ifstream infile(filename); + if (!infile.is_open()) { + LOG(INFO) << "cannot open file, filename: " << filename; + return ""; + } + + std::stringstream buffer; + buffer << infile.rdbuf(); + std::string s(buffer.str()); + + return s; +} + +bool is_bs_already_rewritten(const std::string& filename) { + std::string s = read_file_to_string(filename); + if (s.size() == 0) { + return false; + } + + bool is_rewritten = (s.find("getBS()") != std::string::npos && + s.find("BSFieldEnum::") != std::string::npos); + if (is_rewritten) { + return true; + } + + if (ends_with(filename, ".h")) { + std::string cc_filename = filename.substr(0, filename.size() - 1) + "cc"; + if (is_file_exists(cc_filename)) { + return is_bs_already_rewritten(cc_filename); + } + } + + return false; +} + +bool is_file_exists(const std::string& name) { + struct stat buffer; + return (stat(name.c_str(), &buffer) == 0); +} + +std::string rm_continue_break(const std::string& s) { + static std::regex p_continue("continue ?;"); + static std::regex p_break("break ?;"); + + std::string res = std::regex_replace(s, p_continue, ""); + res = std::regex_replace(res, p_break, ""); + + return res; +} + +std::string find_last_include(const std::string& content) { + static std::regex p_include("(#include ?[\"<].*[\">])"); + std::smatch m; + + std::string s = content; + std::string res; + + while (std::regex_search (s, m, p_include)) { + if (m.size() > 0) { + res = m[0]; + } + s = m.suffix().str(); + } + + return res; +} + +bool is_string(const std::string& type_str) { + if (type_str == "std::string" || type_str == "string" || type_str == "const std::string" || + type_str == "absl::string_view" || type_str == "char *" || type_str == "const basic_string" || + type_str == "basic_string" || type_str == "std::basic_string" || + type_str == "google::protobuf::string") { + return true; + } + + return false; +} + +// TODO(liuzhishan): fix +bool is_string(clang::QualType qual_type) { + if (is_char_arr(qual_type)) { + return true; + } + + std::string type_str = lower(qual_type.getAsString()); + return is_string(type_str); +} + +bool is_char_arr(clang::QualType qual_type) { + std::string type_str = lower(qual_type.getAsString()); + + std::regex p("(const )?char \\[.+?\\]"); + std::smatch m; + if (std::regex_match(type_str, m, p)) { + return true; + } + + return false; +} + +std::string add_quote(const std::string& s) { + std::ostringstream oss; + oss << "\"" << s << "\""; + return oss.str(); +} + +bool is_from_info_util(const std::string& s) { + return starts_with(s, "BSGet") || starts_with(s, "BSHas") || starts_with(s, "bs_util.BS"); +} + +std::string fix_ad_enum(const std::string& s) { + static std::regex p("ad::class "); + return std::regex_replace(s, p, "ad::"); +} + +std::string adlog_to_bs_enum_str(const std::string& s) { + std::string s1 = std::regex_replace(s, std::regex("\\."), "_"); + return std::regex_replace(s1, std::regex(":"), "_"); +} + +std::string dot_underscore_to_camel(const std::string& s) { + std::ostringstream oss; + + std::vector dot_tokens = absl::StrSplit(s, "."); + for (const auto& dot_token : dot_tokens) { + std::vector underscore_tokens = absl::StrSplit(dot_token, "_"); + for (const auto& underscore_token : underscore_tokens) { + if (starts_with(underscore_token, "Get") || + underscore_token == "exists" || + underscore_token.size() == 0) { + continue; + } + + oss << char(toupper(underscore_token[0])) << underscore_token.substr(1); + } + } + + return oss.str(); +} + +bool is_var_proto_list(clang::QualType qualType) { + const clang::Type* type = qualType.getTypePtr(); + if (type == nullptr) { + return false; + } + if (type->isRecordType() || type->isReferenceType()) { + return qualType.getAsString().find("google::protobuf::Repeated") != std::string::npos; + } + + return false; +} + +bool is_var_proto_map(clang::QualType qualType) { + const clang::Type* type = qualType.getTypePtr(); + + if (type == nullptr) { + return false; + } + if (type->isRecordType() || type->isReferenceType()) { + return qualType.getAsString().find("google::protobuf::Map") != std::string::npos; + } + + return false; +} + +bool is_var_proto_message(clang::QualType qual_type) { + const clang::Type *type = qual_type.getTypePtr(); + std::string type_str = qual_type.getAsString(); + + if (type == nullptr) { + return false; + } + + if (type->isRecordType() || type->isReferenceType()) { + if (type_str.find("auto_cpp_rewriter::") != std::string::npos) { + if (type_str.find("Map") == std::string::npos && + type_str.find("Repeated") == std::string::npos) { + return true; + } + } + } + + return false; +} + +bool is_repeated_proto_message(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + if (is_repeated_proto(qual_type)) { + if (type_str.find("auto_cpp_rewriter::") != std::string::npos) { + if (type_str.find("Map") == std::string::npos) { + return true; + } + } + } + + return false; +} + +std::vector find_common_info_values_in_file(const std::string& filename) { + std::vector res; + + std::string s = read_file_to_string(filename); + if (s.size() == 0) { + return res; + } + + static std::regex p("CommonInfoAttr_Name[A-Za-z0-9]+_([A-Z0-9_]+)"); + std::smatch m; + + while (std::regex_search (s, m, p)) { + if (m.size() > 0) { + } + s = m.suffix().str(); + } + + return res; +} + +bool is_skip(const std::string& feature_name) { + static std::unordered_set names = { + "RecoFastFeature", + "BSRecoFastFeature", + "FastFeatureNoPrefix", + "BSFastFeatureNoPrefix", + "CommonInfoAttrFeature", + "BSCommonInfoAttrFeature", + "ExtractMultiAttrFastFeatureNoPrefix", + "BSExtractMultiAttrFastFeatureNoPrefix", + }; + + return names.find(feature_name) != names.end(); +} + +bool is_enum_decl(clang::Expr* expr) { + if (expr == nullptr) { + return false; + } + + const clang::Type* type = expr->getType().getTypePtr(); + const clang::EnumDecl* enumDecl = type->getAs()->getDecl(); + if (enumDecl != nullptr) { + return true; + // for (auto it = enumDecl->enumerator_begin(); it != enumDecl->enumerator_end(); it++) { + // std::string itName = it->getNameAsString(); + // auto pos = itName.find(enum_name); + // if (pos != std::string::npos) { + // if (itName.substr(pos) == enum_name) { + // int enum_value = std::stoi(stmt_to_string(it->getInitExpr())); + // LOG(INFO) << "find enum, name: " << it->getNameAsString() + // << ", value: " << stmt_to_string(it->getInitExpr()); + // return absl::optional(enum_value); + // } + // } + // } + } + + return false; +} + +bool is_implicit_loop_var(const std::string& s) { + static const std::regex p("__(begin|range|end)\\d+"); + return std::regex_match(s, p); +} + +bool is_from_implicit_loop_var(const std::string &s) { + if (is_implicit_loop_var(s)) { + return true; + } + + static const std::regex p("__(begin|range|end).*\\.(begin|end)\\(\\)"); + return std::regex_match(s, p); +} + +bool is_deref_implicit_loop_begin(const std::string s) { + static const std::regex p("\\* ?__begin"); + return std::regex_match(s, p); +} + +bool is_cxx_default_arg_expr(clang::Expr* expr) { + if (expr == nullptr) { + return false; + } + + if (clang::CXXDefaultArgExpr* cxx_default_arg_expr = dyn_cast(expr)) { + return true; + } + + return false; +} + +std::string trim_this(const std::string& s) { + static const std::regex p("this\\->"); + return std::regex_replace(s, p, ""); +} + +bool is_int_vector(clang::QualType qual_type) { + const clang::Type* type = qual_type.getTypePtr(); + if (type != nullptr) { + if (qual_type.getAsString().find("vector= 0; i--) { + if (s[i] == '}') { + right_pos = i; + break; + } else if (isspace(s[i])) { + continue; + } else { + break; + } + } + + if (left_pos >= 0 && right_pos > 0) { + return s.substr(left_pos + 1, right_pos - left_pos - 1); + } else { + return s; + } +} + +std::string add_surround_big_parantheses(const std::string& s) { + std::ostringstream oss; + oss << "{" << s << "}"; + + return oss.str(); +} + +bool is_action_info(const std::string& type_str) { + static std::unordered_set action_info_types = { + "AdActionInfoList", + "::auto_cpp_rewriter::AdActionInfoList", + "const class auto_cpp_rewriter::AdActionInfoList", + "AdActionBaseInfo", + "::auto_cpp_rewriter::AdActionBaseInfo", + "const class auto_cpp_rewriter::AdActionBaseInfo", + "SimpleAdDspInfos", + "::auto_cpp_rewriter::SimpleAdDspInfos", + "const class auto_cpp_rewriter::SimpleAdDspInfos", + "SimpleLiveInfos", + "::auto_cpp_rewriter::SimpleLiveInfos", + "const class auto_cpp_rewriter::SimpleLiveInfos", + "SimpleLiveInfo", + "::auto_cpp_rewriter::SimpleLiveInfo", + "const class auto_cpp_rewriter::SimpleLiveInfo"}; + + return action_info_types.find(type_str) != action_info_types.end(); +} + +bool is_action_info(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + return is_action_info(type_str); +} + +bool is_repeated_proto(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(const )?::google::protobuf::Repeated(Ptr)?Field< ?.*> ?\\&?"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +bool is_repeated_proto_iterator(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(const )?(::)?google::protobuf::Repeated(Ptr)?Field< ?.*>::(const_)?iterator"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +bool is_map_proto(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(const )?::google::protobuf::Map< ?(.*)> ?\&?"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +bool is_map_proto_iterator(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + LOG(INFO) << "type_str: " << type_str; + + static std::regex p("(const )?(::)?google::protobuf::Map< ?.*>::(const_)?(iterator|pointer)"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +bool is_repeated_action_info(clang::QualType qual_type) { + if (is_repeated_proto(qual_type)) { + std::string type_str = qual_type.getAsString(); + static std::regex p("(const )?::google::protobuf::Repeated(Ptr)?Field< ?(.*)::(.*)> ?\\&?"); + std::smatch m; + if (std::regex_match(type_str, m, p)) { + if (m.size() > 0) { + if (is_action_info(m[m.size() - 1])) { + return true; + } + } + } + } + + return false; +} + +bool is_action_detail_map(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + if (type_str.find("google::protobuf::Map") != std::string::npos) { + if (type_str.find("ad::algorithm::AdActionInfoList") != std::string::npos || + type_str.find("ad::algorithm::SimpleAdDspInfos") != std::string::npos || + type_str.find("ad::algorithm::AdActionBaseInfo") != std::string::npos || + type_str.find("ad::algorithm::SimpleLiveInfos") != std::string::npos) { + return true; + } + } + + return false; +} + +std::string get_bs_type_str(clang::QualType qual_type, bool is_combine_user) { + std::string type_str = qual_type.getAsString(); + + if (is_reco_proto(qual_type)) { + return type_str; + } + + if (is_repeated_proto(qual_type) || + is_repeated_proto_ptr(qual_type)) { + if (auto inner_type = get_repeated_proto_inner_type(qual_type)) { + return get_bs_repeated_field_type(*inner_type, is_combine_user); + } + } else if (is_repeated_proto_iterator(qual_type)) { + if (auto inner_type = get_repeated_proto_iterator_inner_type(qual_type)) { + return get_bs_repeated_field_type(*inner_type, is_combine_user); + } + } else if (is_map_proto(qual_type)) { + if (auto inner_type = get_map_proto_inner_type(qual_type)) { + return get_bs_map_field_type(inner_type->first, inner_type->second, is_combine_user); + } + } + + return get_builtin_type_str(qual_type); +} + +absl::optional get_repeated_proto_inner_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + static std::regex p("(const )?(::)?google::protobuf::Repeated(Ptr)?Field< ?(.*)> ?[\\&\\*]?"); + std::smatch m; + if (std::regex_match(type_str, m, p)) { + if (m.size() > 0) { + LOG(INFO) << "match inner_type: " << m[m.size() - 1]; + std::string last_type = get_last_type_str(m[m.size() - 1]); + if (last_type == "string" || last_type == "std::string") { + return absl::make_optional("absl::string_view"); + } else { + return absl::make_optional(last_type); + } + } + } + + if (absl::optional inner_type = get_repeated_proto_iterator_inner_type(qual_type)) { + return absl::make_optional(*inner_type); + } + + return absl::nullopt; +} + +absl::optional get_repeated_proto_iterator_inner_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + static std::regex p("(const )?(::)?google::protobuf::Repeated(Ptr)?Field< ?(.*)>::const_iterator"); + std::smatch m; + if (std::regex_match(type_str, m, p)) { + if (m.size() > 0) { + std::string last_type = get_last_type_str(m[m.size() - 1]); + if (last_type == "string" || last_type == "std::string") { + return absl::make_optional("absl::string_view"); + } else { + return absl::make_optional(last_type); + } + } + } + + return absl::nullopt; +} + +absl::optional> get_map_proto_inner_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + static std::regex p("(const )?(class )?(::)?google::protobuf::Map< ?(.*), (.*) ?> ?(\\&\\*)?"); + std::smatch m; + + if (std::regex_match(type_str, m, p)) { + if (m.size() > 3) { + std::string param0 = m[m.size() - 2]; + std::string param1 = m[m.size() - 1]; + + if (param0 == "string" || param0 == "std::string") { + param0 = "absl::string_view"; + } + + if (param1 == "string" || param1 == "std::string") { + param1 = "absl::string_view"; + } + + return absl::make_optional(std::make_pair(param0, param1)); + } + } + + return absl::nullopt; +} + +std::string get_bs_repeated_field_type(const std::string &type_str, + bool is_combine_user) { + std::ostringstream oss; + oss << "BSRepeatedField<" << type_str; + if (is_combine_user) { + oss << ", true"; + } + oss << ">"; + + return oss.str(); +} + +std::string get_bs_map_field_type(const std::string &key_type_str, + const std::string &value_type_str, + bool is_combine_user) { + std::ostringstream oss; + oss << "BSMapField<" << key_type_str << ", " << value_type_str; + if(is_combine_user) { + oss << ", true"; + } + oss << ">"; + + return oss.str(); +} + +bool is_repeated_proto_ptr(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(const )?(class )?(::)?google::protobuf::Repeated(Ptr)?Field< ?.*> ?\\*"); + std::smatch m; + if (std::regex_match(type_str, m, p)) { + return true; + } + + static std::regex p_mapped_type("std::(unordered_)?map ?\\*>::mapped_type"); + return std::regex_match(type_str, m, p_mapped_type); +} + +bool is_reco_proto(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + return type_str.find("google::protobuf") != std::string::npos && + type_str.find("ks::reco") != std::string::npos; +} + +bool is_reco_proto_type(clang::QualType qual_type) { + return is_reco_proto(qual_type); +} + +std::string replace_adlog_to_bslog(const std::string &s) { + static std::regex p("(adlog|ad_log)"); + std::string new_str = std::regex_replace(s, p, "bslog"); + return new_str; +} + +bool is_map_repeated_int_list_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("std::(unordered_)?map ?\\*>"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +bool is_map_int_int_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("std::(unordered_)?map"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +clang::Expr* get_inner_expr(clang::Expr* expr) { + if (expr == nullptr) { + return expr; + } + + if (clang::MaterializeTemporaryExpr* materialize_temporary_expr = + dyn_cast(expr)) { + return get_inner_expr(materialize_temporary_expr->getSubExpr()); + } + + if (clang::ImplicitCastExpr *implicit_cast_expr = dyn_cast(expr)) { + return get_inner_expr(implicit_cast_expr->getSubExpr()); + } + + return expr; +} + +std::string rm_empty_line(const std::string& s) { + static std::regex p_empty_line("\n *;"); + return std::regex_replace(s, p_empty_line, ""); +} + +std::string trim_tail_underscore(const std::string& s) { + static std::regex p("_+$"); + return std::regex_replace(s, p, ""); +} + +bool is_common_info_list_or_map_loop_stmt(clang::Stmt* stmt) { + if (stmt == nullptr) { + return false; + } + + std::string stmt_str = stmt_to_string(stmt); + + if (clang::CXXForRangeStmt* cxx_for_range_stmt = dyn_cast(stmt)) { + if (CommonAttrInfo::is_common_info_list_or_map_loop(stmt_str)) { + return true; + } + } + + if (clang::ForStmt* for_stmt = dyn_cast(stmt)) { + if (CommonAttrInfo::is_common_info_list_or_map_loop(stmt_str)) { + return true; + } + } + + return false; +} + +std::string trim_tail_size(const std::string& s) { + if (ends_with(s, "_size")) { + std::string prefix = s.substr(0, s.size() - std::string("_size").size()); + return prefix; + } + + return s; +} + +bool contains_var(const std::string& expr_str, const std::string& var_name) { + std::regex p(std::string("([^a-zA-Z0-9_])?") + var_name + std::string("([^a-zA-Z0-9_])?")); + + std::smatch match_res; + if (std::regex_search(expr_str, match_res, p)) { + if (expr_str.size() != var_name.size()) { + return true; + } + } + + return false; +} + +bool is_map_item_type_int_type(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("std::(unordered_)?map"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +std::string get_last_type_str(const std::string& s) { + size_t pos = s.find("::"); + if (pos != std::string::npos) { + return s.substr(pos + 2); + } else { + return s; + } +} + +bool has_cc_file(const std::string& filename) { + if (ends_with(filename, ".h")) { + std::string cc_file = filename.substr(0, filename.size() - 1) + std::string("cc"); + return is_file_exists(cc_file); + } + + return false; +} + +bool is_pointer(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + return ends_with(type_str, "*"); +} + +bool is_proto_map_string_float(clang::QualType qual_type) { + if (auto inner_type = get_map_proto_inner_type(qual_type)) { + if (inner_type->first == "absl::string_view" && inner_type->second == "float") { + return true; + } + } + + return false; +} + +bool is_proto_map_string_float_ptr(clang::QualType qual_type) { + return is_proto_map_string_float(qual_type) && is_pointer(qual_type); +} + +bool is_proto_map_string_float_iter(clang::QualType qual_type) { + std::string type_str = qual_type.getAsString(); + + static std::regex p("(const )?(class )?(::)?google::protobuf::Map< ?(::)?std::string, float ?>::(const_)?iterator"); + std::smatch m; + return std::regex_match(type_str, m, p); +} + +std::string trim_exists(const std::string& s) { + if (ends_with(s, ".exists")) { + return s.substr(0, s.size() - 7); + } + + return s; +} + +bool is_int_type(const std::string& type_str) { + static std::regex p("(::)?google::protobuf::"); + static std::regex p_const("const ?"); + + static std::regex p_int("u?int(32|64)_?t?"); + + std::string s = std::regex_replace(lower(type_str), p, ""); + s = std::regex_replace(s, p_const, ""); + + if (s == "size_t") { + return true; + } + + std::smatch m; + if (std::regex_match(s, m, p_int)) { + return true; + } + + return false; +} + +bool is_bool_type(const std::string& type_str) { + return lower(type_str) == "bool"; +} + +bool is_float_type(const std::string& type_str) { + if (type_str == "float" || type_str == "double") { + return true; + } + + return false; +} + +bool is_repeated_proto_list_leaf_type(clang::QualType qual_type) { + if (is_var_proto_list(qual_type)) { + if (absl::optional inner_type = get_repeated_proto_inner_type(qual_type)) { + if (is_string(*inner_type)) { + return true; + } + if (is_int_type(*inner_type) || is_bool_type(*inner_type) || is_float_type(*inner_type)) { + return true; + } + } + } + + return false; +} + +std::vector get_int_list_values_from_init_str(const std::string& s) { + static std::regex p_space(" +"); + static std::regex p("\\{([\\d ,]+)\\}"); + + std::vector res; + + LOG(INFO) << "s: " << s; + std::smatch m; + if (std::regex_match(s, m, p)) { + if (m.size() > 0) { + std::string value_str = m[m.size() - 1]; + LOG(INFO) << "match value: " << value_str; + + value_str = std::regex_replace(value_str, p_space, ""); + if (value_str.size() == 0) { + LOG(INFO) << "cannot find init values from: " << s; + return res; + } + + std::vector arr = absl::StrSplit(value_str, ","); + for (size_t i = 0; i < arr.size(); i++) { + if (is_integer(arr[i])) { + res.push_back(std::stoi(arr[i])); + } + } + + return res; + } + } + + return res; +} + +std::string add_filename_suffix(const std::string& filename, const std::string& suffix) { + std::vector arr = absl::StrSplit(filename, "."); + if (arr.size() == 2) { + return arr[0] + suffix + "." + arr[1]; + } else { + return arr[0] + suffix; + } +} + +bool is_str_from_reco_user_info(const std::string& bs_enum_str) { + return bs_enum_str.find("reco_user_info") != std::string::npos; +} + +std::string replace_simple_text(const std::string &s) { + static std::regex p("_Bool"); + static std::regex p_empty_line("\n *\n"); + static std::regex p_empty_line_semicoln("\n *;"); + + std::string s1 = std::regex_replace(s, p, "bool"); + s1 = std::regex_replace(s1, p_empty_line_semicoln, ""); + + return s1; +} + +std::string insert_str_at_block_begin(const std::string& s, const std::string& new_str) { + std::ostringstream oss; + + if (new_str.size() > 0) { + size_t pos = s.find("{"); + if (pos != std::string::npos) { + oss << s.substr(0, pos + 1) << "\n" + << new_str << "\n" + << s.substr(pos + 1); + } + } + + return oss.str(); +} + +size_t find_bs_equal_end(const std::string &s) { + size_t pos = s.find("bs == nullptr"); + + if (pos == std::string::npos) { + pos = s.find("bs== nullptr"); + } + + if (pos == std::string::npos) { + pos = s.find("bs ==nullptr"); + } + + if (pos == std::string::npos) { + return pos; + } + + for (; pos < s.size(); pos++) { + if (s[pos] == '}') { + break; + } + } + + return pos; +} + +std::string insert_str_after_bs_equal_end(const std::string &s, const std::string &new_str) { + std::ostringstream oss; + + if (new_str.size() > 0) { + size_t pos = find_bs_equal_end(s); + if (pos != std::string::npos && pos + 1 < s.size()) { + oss << s.substr(0, pos + 1) << "\n" + << new_str << "\n" + << s.substr(pos + 1); + } + } else { + oss << s; + } + + return oss.str(); +} + +std::string strip_suffix_semicolon_newline(const std::string& s) { + int pos = s.size() - 1; + for (; pos >= 0; pos--) { + if (s[pos] == '\n' || s[pos] == ';') { + continue; + } + + break; + } + + if (pos >= 0 && pos < s.size()) { + return s.substr(0, pos + 1); + } else { + return s; + } +} + +} // tool +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Tool.h b/convert/Tool.h new file mode 100644 index 0000000..bed402c --- /dev/null +++ b/convert/Tool.h @@ -0,0 +1,318 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Declares clang::SyntaxOnlyAction. +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" + +// Declares llvm::cl::extrahelp. +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +#include "clang/Lex/Preprocessor.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/Frontend/CompilerInstance.h" +#include "llvm/ADT/StringRef.h" + +#include "Config.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; +using llvm::dyn_cast; + +class Env; + +clang::SourceRange find_source_range(clang::Stmt* stmt); + +std::string stmt_to_string(clang::Stmt* stmt, unsigned suppressTagKeyword = 0); + +clang::Expr* get_first_caller(clang::Expr* initExpr, const Env* env_ptr = nullptr); + +const clang::TemplateArgumentList* get_proto_map_args_list(clang::QualType qualType); + +bool is_map_value_builtin(clang::QualType qualType); + +bool is_check_item_pos(clang::IfStmt* ifStmt); + +bool is_prefix(const std::string& s); + +std::map> collect_prefix(json d); + +void collect_prefix_recursive(json d, std::set* str_set); + +json split_feature_def(json d, + const std::map>& prefixs, + const std::map& enum_map); + +json split_feature_def_recursive(const json& d, + std::string prefix, + const std::map& enum_map); + +json replace_enum(const json& d, + const std::map& enum_map); + +bool is_basic_type(clang::QualType qual_type); + +bool is_action_detail_find_expr(const std::string& adlog_expr); +bool is_action_detail_list_expr(const std::string& adlog_expr); +bool is_action_detail_find_expr(clang::Expr* expr, const Env& env); +bool is_action_detail_list_expr(clang::Expr* expr, const Env& env); + +bool is_special_adlog_expr(const std::string& adlog_expr); +bool is_special_adlog_expr(clang::Expr* expr, const Env& env); + +bool is_integer(const std::string & s); + +bool starts_with(const std::string& s, const std::string& x); +bool ends_with(const std::string& s, const std::string& x); + +std::string lower(const std::string& s); + +absl::optional find_common_attr_int_value(clang::Expr* expr); +absl::optional find_common_attr_int_value_from_expr(clang::Expr* expr); +absl::optional find_common_attr_enum_name_from_expr(clang::Expr* expr); + +template +std::string pointer_to_str(T* ptr) { + std::ostringstream oss; + oss << ptr; + return oss.str(); +} + +bool ends_with_ignore_space(const std::string& s, char c); +bool is_only_space(const std::string& s); +bool is_only_semicolon(const std::string& s); +std::string fix_semicolon(const std::string& s); + +namespace tool { + +bool is_middle_node_expr(clang::Expr* expr, const Env& env); +bool is_middle_node_root(const std::string& s); + +/// 中间节点根节点,如 GetLiveInfo(adlog.item(pos)) +bool is_middle_node_expr_root(clang::Expr* expr, const Env& env); + +bool is_common_info_vector(clang::QualType qual_type); + +/// 待修复,需要更精确,目前逻辑和 is_common_info_struct 一样 +bool is_common_info_enum(clang::QualType qual_type); + +/// proto 中的 reptead common info, 目前的实现不是很准确,会将 attr.int_list_value() 也当做 +/// repeated_common_info 节点, 需要再更精确些 +bool is_repeated_common_info(clang::QualType qual_type); + +bool is_repeated_common_info_size(const std::string& method_name); + +bool is_common_info_struct(clang::QualType qual_type); + +bool is_combine_feature(const std::string& feature_type); + +bool is_user_feature(const std::string& feature_type); + +bool is_item_feature(const std::string& feature_type); + +bool is_sparse_feature(const std::string& feature_type); + +bool is_dense_feature(const std::string& feature_type); + +bool is_item_field(const std::string& s); + +bool is_adlog_user_field(const std::string& s); + +bool is_adlog_field(const std::string& s); + +bool is_reco_user_field(const std::string& s); + +bool is_item_type_enum(clang::QualType qual_type); + +bool is_ad_callback_log_enum(clang::QualType qual_type); + +bool is_ad_action_type_enum(clang::QualType qual_type); + +bool is_ad_enum(clang::QualType qual_type); + +bool is_basic_array(clang::QualType qual_type); + +bool is_builtin_simple_type(clang::QualType qual_type); + +std::string get_bs_scalar_exists_expr(Env* env_ptr, + const std::string& common_info_prefix, + const std::string& value_type); + +bool is_int32_type(const std::string& type_str); + +std::string get_builtin_type_str(clang::QualType qual_type); + +bool is_action_detail_map(clang::QualType qual_type); + +std::string get_exists_name(const std::string& name); + +std::string fix_std_string(const std::string& s); + +std::string fix_string_view(const std::string& s); + +std::string get_bs_correspond_path(const std::string& filename); + +std::string read_file_to_string(const std::string& filename); + +bool is_bs_already_rewritten(const std::string& filename); + +bool is_file_exists(const std::string& name); + +std::string rm_continue_break(const std::string& s); + +std::string find_last_include(const std::string& content); + +bool is_string(const std::string& type_str); +bool is_string(clang::QualType qual_type); + +bool is_char_arr(clang::QualType qual_type); + +std::string add_quote(const std::string& s); + +bool is_from_info_util(const std::string& s); + +std::string fix_ad_enum(const std::string& s); + +inline std::string bool_to_string(bool v) { return v ? "true" : "false"; } + +std::string adlog_to_bs_enum_str(const std::string& s); + +std::string dot_underscore_to_camel(const std::string& s); + +bool is_var_proto_list(clang::QualType qualType); +bool is_var_proto_map(clang::QualType qualType); +bool is_var_proto_message(clang::QualType qual_type); + +bool is_repeated_proto_message(clang::QualType qual_type); + +std::vector find_common_info_values_in_file(const std::string& filename); + +bool is_skip(const std::string& feature_name); + +bool is_enum_decl(clang::Expr* expr); + +bool is_implicit_loop_var(const std::string& name); + +bool is_from_implicit_loop_var(const std::string &s); + +bool is_cxx_default_arg_expr(clang::Expr* expr); + +std::string trim_this(const std::string& s); + +bool is_int_vector(clang::QualType qual_type); + +std::string rm_surround_big_parantheses(const std::string& s); +std::string add_surround_big_parantheses(const std::string& s); + +bool is_action_info(const std::string& type_str); +bool is_action_info(clang::QualType qual_type); + +bool is_repeated_proto(clang::QualType qual_type); +bool is_repeated_proto_iterator(clang::QualType qual_type); + +bool is_map_proto(clang::QualType qual_type); +bool is_map_proto_iterator(clang::QualType qual_type); + +bool is_repeated_action_info(clang::QualType qual_type); + +std::string get_bs_type_str(clang::QualType qual_type, bool is_combine_user); + +absl::optional get_repeated_proto_inner_type(clang::QualType qual_type); +absl::optional get_repeated_proto_iterator_inner_type(clang::QualType qual_type); + +absl::optional> get_map_proto_inner_type(clang::QualType qual_type); + +std::string get_bs_repeated_field_type(const std::string& type_str, + bool is_combine_user); + +std::string get_bs_map_field_type(const std::string& key_type_str, + const std::string& value_type_str, + bool is_combine_user); + +bool is_repeated_proto_ptr(clang::QualType qual_type); + +bool is_reco_proto(clang::QualType qual_type); + +std::string replace_adlog_to_bslog(const std::string& s); + +bool is_map_repeated_int_list_type(clang::QualType qual_type); + +bool is_map_int_int_type(clang::QualType qual_type); + +clang::Expr* get_inner_expr(clang::Expr* expr); + +std::string rm_empty_line(const std::string& s); + +std::string trim_tail_underscore(const std::string& s); + +bool is_common_info_list_or_map_loop_stmt(clang::Stmt* stmt); + +std::string trim_tail_size(const std::string& s); + +bool contains_var(const std::string& expr_str, const std::string& var_name); + +bool is_map_item_type_int_type(clang::QualType qual_type); + +std::string get_last_type_str(const std::string& s); + +bool has_cc_file(const std::string& filename); + +bool is_pointer(clang::QualType qual_type); +bool is_proto_map_string_float(clang::QualType qual_type); +bool is_proto_map_string_float_ptr(clang::QualType qual_type); +bool is_proto_map_string_float_iter(clang::QualType qual_type); + +std::string trim_exists(const std::string& s); + +bool is_deref_implicit_loop_begin(const std::string s); + +bool is_int_type(const std::string& type_str); +bool is_bool_type(const std::string& type_str); +bool is_float_type(const std::string& type_str); +bool is_repeated_proto_list_leaf_type(clang::QualType qual_type); + +std::vector get_int_list_values_from_init_str(const std::string& s); +std::string add_filename_suffix(const std::string& filename, const std::string& suffix); + +bool is_str_from_reco_user_info(const std::string& bs_enum_str); + +std::string replace_simple_text(const std::string &s); + +std::string insert_str_at_block_begin(const std::string &s, const std::string &new_str); + +std::string strip_suffix_semicolon_newline(const std::string& s); + +// 找到 bs equal if 结尾。 +size_t find_bs_equal_end(const std::string& s); + +std::string insert_str_after_bs_equal_end(const std::string &s, const std::string &new_str); + +} // namespace tool + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Traits.h b/convert/Traits.h new file mode 100644 index 0000000..168dc4b --- /dev/null +++ b/convert/Traits.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +template +using constant_t = std::integral_constant; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/Type.h b/convert/Type.h new file mode 100644 index 0000000..bce3248 --- /dev/null +++ b/convert/Type.h @@ -0,0 +1,28 @@ +#pragma once + +namespace ks { +namespace ad_algorithm { +namespace convert { + +enum class ExprType { + NONE, + ADLOG_NORMAL, + ADLOG_COMMON_INFO_SCALAR, + ADLOG_COMMON_INFO_LIST, + ADLOG_COMMON_INFO_MAP, + ADLOG_MIDDLE_NODE_ROOT, + + /// attr.name_value() + ADLOG_COMMON_INFO_NAME_VALUE, + + NULLPTR, + TEMPLATE_INT_REF, + INT, + ACTION_DETAIL_FIXED_GET, + ACTION_DETAIL_FIXED_HAS, + REPEATED_PROTO +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/bs_field.py b/convert/bs_field.py new file mode 100644 index 0000000..af0e631 --- /dev/null +++ b/convert/bs_field.py @@ -0,0 +1,139 @@ +import re +import sys +import logging +import fire +import sh +import json +import codecs + +LOG_FORMAT = "%(asctime)s - %(levelname)s [%(filename)s:%(lineno)s - %(funcName)s] - %(message)s" +logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) +logger = logging.getLogger(__name__) + +def find_bs_field(filename: str='data/adlog_fields.json'): + config = json.load(codecs.open(filename, 'r', 'utf-8')) + all_bs_fields = set() + for name in config: + feature = config[name] + bs_fields = set() + logger.info('name: %s', name) + for x in feature['adlog_fields']: + for int_var_name in feature['int_var']: + # p1 = re.compile('(\(\*(.*)find\(%s\)->second.list\(\).begin\(\)\))(.*)' % (int_var_name)) + # if p1.search(x) != None: + # s = re.sub(p1, '\\2.key:%d.\\3' % (feature['int_var'][int_var_name]), x).replace('()', '') + # bs_fields.add(s) + # logger.info('s: %s', s) + # break + + p = re.compile('(find\(%s\))->second' % (int_var_name)) + if p.search(x) != None: + s = re.sub(p, 'key:%d' % (feature['int_var'][int_var_name]), x).replace('()', '') + bs_fields.add(s) + logger.info('s: %s', s) + break + + for common_info_var_name in feature['common_info_var']: + p = re.compile('(\(\*(.*)\.begin\(\)\)).(map[a-z0-9_]+)') + if p.search(x) != None: + logger.info('p.findall: %s', p.findall(x)) + s = re.sub(p, '\\2.key:%d' % (feature['common_info_var'][common_info_var_name]), x).replace('()', '') + bs_fields.add(s + '.key') + bs_fields.add(s + '.value') + logger.info('common_info s: %s.key', s) + logger.info('common_info s: %s.value', s) + break + + p1 = re.compile('(\(\*(.*)\.begin\(\)\)).((int|float|string)[a-z0-9_]+)') + if p1.search(x) != None: + logger.info('p1.findall: %s', p1.findall(x)) + s = re.sub(p1, '\\2.key:%d' % (feature['common_info_var'][common_info_var_name]), x).replace('()', '') + bs_fields.add(s) + logger.info('common_info s: %s', s) + break + + p2 = re.compile('((.*)\.common_info_attr\(i\)).(map[a-z0-9_]+)') + if p2.search(x) != None: + logger.info('p.findall: %s', p2.findall(x)) + s = re.sub(p2, '\\2.common_info_attr.key:%d' % (feature['common_info_var'][common_info_var_name]), x).replace('()', '') + bs_fields.add(s + '.key') + bs_fields.add(s + '.value') + logger.info('common_info s: %s.key', s) + logger.info('common_info s: %s.value', s) + break + + p3 = re.compile('((.*)\.common_info_attr\(i\)).((int|float|string)[a-z0-9_]+)') + if p3.search(x) != None: + logger.info('p3.findall: %s', p3.findall(x)) + s = re.sub(p3, '\\2.common_info_attr.key:%d' % (feature['common_info_var'][common_info_var_name]), x).replace('()', '') + bs_fields.add(s) + logger.info('common_info s: %s', s) + break + + bs_fields.add(x.replace('()', '')) + + arr = [] + for x in bs_fields: + if x.startswith('(*'): + continue + if x.endswith('name_value'): + continue + if x.endswith('adlog.item(pos)'): + continue + if x.endswith('.empty'): + continue + if x.find('common_info_attr(i).') > 0: + continue + if x.find('has_') > 0: + continue + + s = x.replace('item(pos)', 'item') + p4 = re.compile('(.*).list_value(\(\d+\))') + if p4.search(x) != None: + logger.info('p4.findall: %s', p4.findall(x)) + s = re.sub(p4, '\\1.list_value', s) + + if s.find('(') > 0: + continue + + p = re.compile('^[a-z0-9A-Z_:\.]+$') + if not p.match(s): + continue + + arr.append(s) + all_bs_fields.add(s) + + feature['bs_fields'] = arr + + json.dump(config, codecs.open('data/adlog_fields_bs.json', 'w', 'utf-8'), ensure_ascii=False, indent=4) + + all_bs_fields_arr = list(all_bs_fields) + json.dump(all_bs_fields_arr, codecs.open('data/all_bs_fields.json', 'w', 'utf-8'), ensure_ascii=False, indent=4) + + bs_fields_enum = [x.replace('.', '_').replace(':', '_') for x in all_bs_fields_arr] + logger.info('len: %d, %d', len(all_bs_fields_arr), len(bs_fields_enum)) + + feature_config_map = json.load(codecs.open('data/feature_config_map.json', 'r', 'utf-8'))['mapping'] + + enum_arr = [] + content = "enum BsFieldEnum {\n" + for i in range(len(all_bs_fields_arr)): + if all_bs_fields_arr[i] in feature_config_map: + enum_value = int(feature_config_map[all_bs_fields_arr[i]][1]) + enum_name = all_bs_fields_arr[i].replace('.', '_').replace(':', '_') + enum_arr.append((enum_name, enum_value)) + # content += ' %s = %d,\n' % (enum_name, enum_value) + else: + logger.info("cannot find id for %s", all_bs_fields_arr[i]) + + enum_arr.sort(key=lambda x: x[1]) + + for (enum_name, enum_value) in enum_arr: + content += ' %s = %d,\n' % (enum_name, enum_value) + + content += '}' + + codecs.open('data/bs_fields_enum.h', 'w', 'utf-8').write(content) + +if __name__ == '__main__': + fire.Fire() diff --git a/convert/expr_parser/ExprParserQueryToken.cpp b/convert/expr_parser/ExprParserQueryToken.cpp new file mode 100644 index 0000000..5353e5a --- /dev/null +++ b/convert/expr_parser/ExprParserQueryToken.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/AST.h" + +#include "../Deleter.h" +#include "../Tool.h" +#include "../Type.h" +#include "../Env.h" +#include "../ExprInfo.h" +#include "../info/IfInfo.h" +#include "../info/LoopInfo.h" +#include "../info/NewActionParam.h" +#include "../info/NewVarDef.h" +#include "ExprParserQueryToken.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void update_env_query_token_field_def(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_photo_text_call()) { + if (expr_info_ptr->call_expr_params_size() == 3) { + auto param = expr_info_ptr->call_expr_param(2); + if (param != nullptr && param->is_common_attr_info_enum()) { + if (absl::optional int_value = param->get_common_attr_int_value()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + // add field def + std::ostringstream oss; + oss << "BSFixedCommonInfo> " + "BSGetPhotoText{" + << tool::add_quote("adlog.item.ad_dsp_info.common_info_attr") + << ", " << *int_value << "};"; + std::string bs_enum_str = "adlog_item_ad_dsp_info_common_info_attr_key_" + + std::to_string(*int_value); + LOG(INFO) << "add photo_text field_def, bs_enum_str: " << bs_enum_str + << ", field_def: " << oss.str(); + feature_info->add_field_def(bs_enum_str, + "GetPhotoText", + oss.str(), + NewVarType::MAP, + AdlogVarType::GET_PHOTO_TEXT); + } + } else { + LOG(INFO) << "cannot find int_value from photo text call: " << expr_info_ptr->origin_expr_str(); + } + } + } + } + + // 添加 feature_info 信息。 + if (expr_info_ptr->is_query_token_call()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->set_has_query_token(true); + } + } +} + +void update_env_query_token_loop(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_proto_map_string_float_iter_type()) { + if (expr_info_ptr->is_from_query_token() || expr_info_ptr->is_from_photo_text()) { + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->loop_stage() == LoopStage::INIT) { + LOG(INFO) << "set_is_query_token: true" + << ", loop_var_expr: " + << stmt_to_string(expr_info_ptr->get_proto_map_string_float_loop_var_expr()); + loop_info->set_is_query_token_loop(true); + if (loop_info->loop_var_expr() == nullptr) { + loop_info->set_loop_var_expr(expr_info_ptr->get_proto_map_string_float_loop_var_expr()); + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/expr_parser/ExprParserQueryToken.h b/convert/expr_parser/ExprParserQueryToken.h new file mode 100644 index 0000000..6dcf5c1 --- /dev/null +++ b/convert/expr_parser/ExprParserQueryToken.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" + +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// QueryToken 相关逻辑。 +/// 添加 PhotoText field_def。 +void update_env_query_token_field_def(ExprInfo* expr_info_ptr, Env* env_ptr); + +void update_env_query_token_loop(ExprInfo* expr_info_ptr, Env* env_ptr); + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/AdlogFieldHandler.cpp b/convert/handler/AdlogFieldHandler.cpp new file mode 100644 index 0000000..a86c054 --- /dev/null +++ b/convert/handler/AdlogFieldHandler.cpp @@ -0,0 +1,9 @@ +#include "./AdlogFieldHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/AdlogFieldHandler.h b/convert/handler/AdlogFieldHandler.h new file mode 100644 index 0000000..715cf13 --- /dev/null +++ b/convert/handler/AdlogFieldHandler.h @@ -0,0 +1,97 @@ +#pragma once + +#include +#include + +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" + +#include "../Tool.h" +#include "../Env.h" +#include "../rule/PreRule.h" +#include "../rule/GeneralRule.h" +#include "../rule/CommonInfoRule.h" +#include "../rule/MiddleNodeRule.h" +#include "../rule/ActionDetailRule.h" +#include "../rule/SeqListRule.h" +#include "../rule/DoubleListRule.h" +#include "../rule/ProtoListRule.h" +#include "../rule/AddFeatureMethodRule.h" +#include "../rule/HashFnRule.h" +#include "../rule/QueryTokenRule.h" +#include "../rule/StrRule.h" +#include "StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class ExprInfo; + +class AdlogFieldHandler { + public: + explicit AdlogFieldHandler(clang::Rewriter& rewriter): // NOLINT + rewriter_(rewriter), + pre_rule_(rewriter), + general_rule_(rewriter), + common_info_rule_(rewriter), + action_detail_rule_(rewriter), + middle_node_rule_(rewriter), + seq_list_rule_(rewriter), + double_list_rule_(rewriter), + proto_list_rule_(rewriter), + add_feature_method_rule_(rewriter), + hash_fn_rule_(rewriter), + query_token_rule_(rewriter), + str_rule_(rewriter) {} + + template + void process(T t, Env* env_ptr) { + pre_rule_.process(t, env_ptr); + + common_info_rule_.process(t, env_ptr); + middle_node_rule_.process(t, env_ptr); + action_detail_rule_.process(t, env_ptr); + double_list_rule_.process(t, env_ptr); + seq_list_rule_.process(t, env_ptr); + proto_list_rule_.process(t, env_ptr); + add_feature_method_rule_.process(t, env_ptr); + hash_fn_rule_.process(t, env_ptr); + query_token_rule_.process(t, env_ptr); + str_rule_.process(t, env_ptr); + + /// 由于一些插入变量等逻辑, general_rule_ 必须放到最后一个。 + general_rule_.process(t, env_ptr); + } + + private: + StrictRewriter rewriter_; + + PreRule pre_rule_; + + CommonInfoRule common_info_rule_; + + MiddleNodeRule middle_node_rule_; + + ActionDetailRule action_detail_rule_; + + DoubleListRule double_list_rule_; + + SeqListRule seq_list_rule_; + + ProtoListRule proto_list_rule_; + + AddFeatureMethodRule add_feature_method_rule_; + + HashFnRule hash_fn_rule_; + + QueryTokenRule query_token_rule_; + + StrRule str_rule_; + + GeneralRule general_rule_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/BSFieldHandler.cpp b/convert/handler/BSFieldHandler.cpp new file mode 100644 index 0000000..68cd112 --- /dev/null +++ b/convert/handler/BSFieldHandler.cpp @@ -0,0 +1,9 @@ +#include "./BSFieldHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/BSFieldHandler.h b/convert/handler/BSFieldHandler.h new file mode 100644 index 0000000..5148f6c --- /dev/null +++ b/convert/handler/BSFieldHandler.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" + +#include "../Tool.h" +#include "../Env.h" +#include "./StrictRewriter.h" +#include "../rule/BSFieldOrderRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; + +class ExprInfo; + +class BSFieldHandler { + private: + StrictRewriter rewriter_; + BSFieldOrderRule bs_field_order_rule_; + + public: + explicit BSFieldHandler(clang::Rewriter& rewriter): // NOLINT + rewriter_(rewriter), + bs_field_order_rule_(rewriter) {} + + template json process_to_json(T t, Env* env_ptr) { + bs_field_order_rule_.process_to_json(t, env_ptr); + + return json::array(); + } +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/FieldDeclHandler.cpp b/convert/handler/FieldDeclHandler.cpp new file mode 100644 index 0000000..39493f1 --- /dev/null +++ b/convert/handler/FieldDeclHandler.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "FieldDeclHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void FieldDeclHandler::process(clang::InitListExpr *init_list_expr, Env* env_ptr) { +} + +void FieldDeclHandler::process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(decl_ref_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->is_item_type_enum()) { + std::string new_text = std::string("bs::ItemType::") + expr_info_ptr->get_ad_enum_name(); + rewriter_.ReplaceText(decl_ref_expr, new_text); + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/FieldDeclHandler.h b/convert/handler/FieldDeclHandler.h new file mode 100644 index 0000000..a72e00a --- /dev/null +++ b/convert/handler/FieldDeclHandler.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" + +#include "StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 处理 FieldDecl +class FieldDeclHandler { + public: + explicit FieldDeclHandler(clang::Rewriter& rewriter): rewriter_(rewriter) {} // NOLINT + + template + void process(T t, Env* env_ptr) {} + + void process(clang::InitListExpr *init_list_expr, Env* env_ptr); + void process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr); + + private: + StrictRewriter rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/LazyReplace.h b/convert/handler/LazyReplace.h new file mode 100644 index 0000000..dbcb7c3 --- /dev/null +++ b/convert/handler/LazyReplace.h @@ -0,0 +1,50 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/AST/Stmt.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +using StmtReplacement = std::pair; + +using StmtReplace = std::function; + +/// 注意: env_ptr 是局部变量, 使用方应该保证 LazyReplace 初始化与 run 执行的时候 env_ptr 都是合法指针。 +/// 比较安全的方式应该是使用 shared_ptr。 +/// +/// 不过目前处理的情况比较常见, common info vector 定义的时候初始化 LazyReplace, 遇到调用 int_value() +/// 等 common info 方法时候调用 run, 因此调用时候的声明周期一定是包含在定义时候的声明周期之类的, 直接使用 +/// Env* 也可以。 +class LazyReplace { + public: + explicit LazyReplace(Env* env_ptr, clang::Stmt* stmt, StmtReplace stmt_replace): + env_ptr_(env_ptr), + stmt_(stmt), + stmt_replace_(stmt_replace) {} + + StmtReplacement run() { + return stmt_replace_(env_ptr_, stmt_); + } + + private: + Env* env_ptr_; + clang::Stmt* stmt_; + StmtReplace stmt_replace_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/LogicHandler.cpp b/convert/handler/LogicHandler.cpp new file mode 100644 index 0000000..5eeeae4 --- /dev/null +++ b/convert/handler/LogicHandler.cpp @@ -0,0 +1,113 @@ +#include "LogicHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +json LogicHandler::process_to_json(clang::Stmt* stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::IfStmt* stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::Expr* expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::DeclStmt* decl_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CallExpr* call_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::MemberExpr* member_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::UnaryOperator* unary_operator, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::BinaryOperator* binary_operator, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::ForStmt* for_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::GNUNullExpr* gnu_null_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXThisExpr* cxx_this_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::ReturnStmt* return_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::BreakStmt* break_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::ContinueStmt* continue_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::IntegerLiteral* integer_literal, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::SwitchStmt* switch_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CaseStmt* case_stmt, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::ConstantExpr* constant_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXDependentScopeMemberExpr* cxx_dependent_scope_member_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::ArraySubscriptExpr* array_subscript_expr, Env* env_ptr) { + return nullptr; +} + +json LogicHandler::process_to_json(clang::CXXFunctionalCastExpr* cxx_functional_cast_expr, Env* env_ptr) { + return nullptr; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/LogicHandler.h b/convert/handler/LogicHandler.h new file mode 100644 index 0000000..63ff2a0 --- /dev/null +++ b/convert/handler/LogicHandler.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +#include + +#include "../Tool.h" +#include "../Env.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; +class ExprInfo; + +/// 解析特征中的逻辑,直接转换成 Op 节点。 +class LogicHandler { + public: + json process_to_json(clang::Stmt* stmt, Env* env_ptr); + json process_to_json(clang::IfStmt* stmt, Env* env_ptr); + json process_to_json(clang::Expr* expr, Env* env_ptr); + json process_to_json(clang::DeclStmt* decl_stmt, Env* env_ptr); + json process_to_json(clang::CallExpr* call_expr, Env* env_ptr); + json process_to_json(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr); + json process_to_json(clang::MemberExpr* member_expr, Env* env_ptr); + json process_to_json(clang::UnaryOperator* unary_operator, Env* env_ptr); + json process_to_json(clang::BinaryOperator* binary_operator, Env* env_ptr); + json process_to_json(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr); + json process_to_json(clang::ForStmt* for_stmt, Env* env_ptr); + json process_to_json(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr); + json process_to_json(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr); + json process_to_json(clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr, Env* env_ptr); + json process_to_json(clang::GNUNullExpr* gnu_null_expr, Env* env_ptr); + json process_to_json(clang::CXXThisExpr* cxx_this_expr, Env* env_ptr); + json process_to_json(clang::ReturnStmt* return_stmt, Env* env_ptr); + json process_to_json(clang::BreakStmt* break_stmt, Env* env_ptr); + json process_to_json(clang::ContinueStmt* continue_stmt, Env* env_ptr); + json process_to_json(clang::IntegerLiteral* integer_literal, Env* env_ptr); + json process_to_json(clang::SwitchStmt* switch_stmt, Env* env_ptr); + json process_to_json(clang::CaseStmt* case_stmt, Env* env_ptr); + json process_to_json(clang::ConstantExpr* constant_expr, Env* env_ptr); + json process_to_json(clang::CXXDependentScopeMemberExpr* cxx_dependent_scope_member_expr, + Env* env_ptr); + json process_to_json(clang::ArraySubscriptExpr* array_subscript_expr, Env* env_ptr); + json process_to_json(clang::CXXFunctionalCastExpr* cxx_functional_cast_expr, Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/OverviewHandler.cpp b/convert/handler/OverviewHandler.cpp new file mode 100644 index 0000000..cfbb7c3 --- /dev/null +++ b/convert/handler/OverviewHandler.cpp @@ -0,0 +1,277 @@ +#include +#include +#include +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/MethodInfo.h" +#include "../info/ActionMethodInfo.h" +#include "../info/CommonInfoMultiIntList.h" +#include "OverviewHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void OverviewHandler::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (env_ptr->get_method_name() != "Extract") { + LOG(INFO) << "expr: " << expr_info_ptr->origin_expr_str() + << ", type: " << expr_info_ptr->expr()->getType().getAsString() + << ", is_parent_repeated_action_info: " << expr_info_ptr->is_parent_repeated_action_info() + << ", is_parent_action_info: " << expr_info_ptr->is_parent_action_info(); + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo& method_info = feature_info->touch_method_info(env_ptr->get_method_name()); + if (expr_info_ptr->is_parent_repeated_action_info()) { + if (expr_info_ptr->callee_name() == "size") { + std::string name = expr_info_ptr->parent()->origin_expr_str() + std::string("_size"); + method_info.add_new_action_field_param(expr_info_ptr->parent()->origin_expr_str(), + "size", + "int", + env_ptr->is_combine_feature(), + name); + LOG(INFO) << "add_new_action_field_param in overview, origin_name: " + << expr_info_ptr->parent()->origin_expr_str() + << ", name: size, new_name: " << name; + } + } else if (expr_info_ptr->is_parent_action_info()) { + std::regex p("(.*?)\\[(\\w+)\\]\\.(\\w+)\\(\\)"); + std::string origin_name = std::regex_replace(expr_info_ptr->raw_expr_str(), p, "$1"); + std::string name = std::regex_replace(expr_info_ptr->raw_expr_str(), p, "$1_$3"); + std::string new_text = std::regex_replace(expr_info_ptr->raw_expr_str(), p, "$1_$3.Get($2)"); + std::string type_str = tool::get_builtin_type_str(expr_info_ptr->expr()->getType()); + method_info.add_new_action_field_param(origin_name, + expr_info_ptr->callee_name(), + type_str, + env_ptr->is_combine_feature(), + name); + LOG(INFO) << "add_new_action_field_param in overview, origin_name: " << origin_name + << ", name: " << name; + } + } + } + + if (expr_info_ptr->is_repeated_common_info()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (const absl::optional& prefix = env_ptr->get_common_info_prefix()) { + auto &common_info_prepare = feature_info->mutable_common_info_prepare(); + if (!common_info_prepare) { + common_info_prepare.emplace(*prefix); + } + } + } + } + + // 注册每个叶子节点 bs_enum_str 对应的 NewVarType。 + // common info 比较特殊。 + if (expr_info_ptr->is_from_adlog() && + expr_info_ptr->need_replace() && + expr_info_ptr->is_basic() && + !expr_info_ptr->is_repeated_common_info_size() && + !expr_info_ptr->is_from_repeated_common_info()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + NewVarType new_var_type = NewVarType::SCALAR; + if (expr_info_ptr->is_from_list() || expr_info_ptr->is_common_info_list_method()) { + new_var_type = NewVarType::LIST; + } else if (expr_info_ptr->is_from_map() || expr_info_ptr->is_common_info_map_method()) { + new_var_type = NewVarType::MAP; + } + + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (tool::is_adlog_field(bs_enum_str)) { + LOG(INFO) << "add_bs_enum_var_type: " << bs_enum_str + << ", expr: " << expr_info_ptr->origin_expr_str() + << ", new_var_type: " << static_cast(new_var_type); + feature_info->add_bs_enum_var_type(bs_enum_str, new_var_type); + } else if (expr_info_ptr->is_from_middle_node()) { + // 来自中间节点。 + if (!ends_with(bs_enum_str, "_size")) { + std::string adlog_field = expr_info_ptr->get_adlog_field_str(); + if (expr_info_ptr->is_from_list()) { + std::string inner_type = tool::get_builtin_type_str(expr_info_ptr->expr()->getType()); + LOG(INFO) << "add_middle_node_bs_enum_var_type: " << bs_enum_str + << ", list_inner_type: " << inner_type << ", expr: " << expr_info_ptr->origin_expr_str() + << ", new_var_type: " << static_cast(new_var_type) + << ", adlog_field: " << adlog_field; + feature_info->add_middle_node_bs_enum_var_type( + bs_enum_str, new_var_type, adlog_field, inner_type); + } else { + LOG(INFO) << "add_middle_node_bs_enum_var_type: " << bs_enum_str + << ", scalar, expr: " << expr_info_ptr->origin_expr_str() + << ", new_var_type: " << static_cast(new_var_type) + << ", adlog_field: " << adlog_field; + feature_info->add_middle_node_bs_enum_var_type(bs_enum_str, new_var_type, adlog_field); + } + } + } + } + } +} + +void OverviewHandler::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { +} + +void OverviewHandler::process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_operator_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + + if (expr_info_ptr->is_cxx_operator_call_expr()) { + // teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_deep_action_90d.h + // auto action_name = action_vec[i]; + // auto timestamp_name = timestamp_vec[i]; + // auto action_list = action_name2list[action_name]; + if (expr_info_ptr->callee_name() == "operator[]") { + if (expr_info_ptr->call_expr_params_size() == 2) { + auto param0_info_ptr = expr_info_ptr->call_expr_param(0); + auto param1_info_ptr = expr_info_ptr->call_expr_param(1); + if (param0_info_ptr != nullptr && + param1_info_ptr != nullptr && + param1_info_ptr->is_integral() && + param1_info_ptr->is_from_int_list_member()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (const auto& common_info_prepare = feature_info->common_info_prepare()) { + if (const auto &common_info_prefix = common_info_prepare->prefix()) { + if (auto &common_info_multi_int_list = + feature_info->touch_common_info_multi_int_list(*common_info_prefix)) { + if (param0_info_ptr->is_map_repeated_int_list_type() || + param0_info_ptr->is_map_int_int_type()) { + std::string map_name = param0_info_ptr->origin_expr_str(); + if (absl::optional vec_name = + param1_info_ptr->find_int_list_member_name()) { + LOG(INFO) << "add_map_vec_connection: " << map_name + << ", " << *vec_name; + common_info_multi_int_list->add_map_vec_connection(map_name, *vec_name); + } + } + } + } + } + } + } + } + } + } +} + +void OverviewHandler::process(clang::BinaryOperator *binary_operator, Env *env_ptr) { + // 收集 common info 枚举或者模板参数 + auto expr_info_ptr = parse_expr(binary_operator, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (binary_operator->isComparisonOp()) { + clang::Expr *left_expr = tool::get_inner_expr(binary_operator->getLHS()); + clang::Expr *right_expr = tool::get_inner_expr(binary_operator->getRHS()); + auto left_info_ptr = parse_expr(left_expr, env_ptr); + auto right_info_ptr = parse_expr(right_expr, env_ptr); + + if (left_info_ptr != nullptr && right_info_ptr != nullptr) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + if (auto& common_info_prepare = feature_info->mutable_common_info_prepare()) { + if (left_info_ptr->is_name_value_method()) { + if (right_info_ptr->is_template_int_ref()) { + common_info_prepare->add_template_int_name(tool::trim_this(right_info_ptr->origin_expr_str())); + } else if (right_info_ptr->is_common_info_enum_member_ref()) { + common_info_prepare->add_template_int_name(tool::trim_this(right_info_ptr->origin_expr_str())); + } else if (right_info_ptr->is_common_attr_info_enum()) { + if (absl::optional enum_value = right_info_ptr->get_common_attr_int_value()) { + common_info_prepare->add_common_info_value(*enum_value); + } + } else if (right_info_ptr->is_integral()) { + if (absl::optional int_value = right_info_ptr->get_int_value()) { + common_info_prepare->add_common_info_value(*int_value); + } + } + } + + if (right_info_ptr->is_name_value_method()) { + if (left_info_ptr->is_template_int_ref()) { + common_info_prepare->add_template_int_name(left_info_ptr->origin_expr_str()); + } else if (left_info_ptr->is_common_info_enum_member_ref()) { + common_info_prepare->add_template_int_name(tool::trim_this(left_info_ptr->origin_expr_str())); + } else if (left_info_ptr->is_common_attr_info_enum()) { + if (absl::optional enum_value = left_info_ptr->get_common_attr_int_value()) { + common_info_prepare->add_common_info_value(*enum_value); + } + } else if (left_info_ptr->is_integral()) { + if (absl::optional int_value = left_info_ptr->get_int_value()) { + common_info_prepare->add_common_info_value(*int_value); + } + } + } + } + } + } + } +} + +void OverviewHandler::process(clang::CallExpr* call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // 见 docs/get_value_from_action.md + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_click_no_lps.h + // add_feature 中会调用 get_value_from_Action 函数。需要将对应的参数添加到 add_feature 的 method_info 中。 + // get_value_from_Action 有两个重载版本,通过参数个数可以区分。 + // + // inline bool add_feature(...) { + // ... + // ks::ad_algorithm::get_value_from_Action( + // item_played5s_action_list, item_click_action_list, period, process_time, + // &item_click_industry_vec, &item_click_product_vec, &product_result, + // &industry_result); + // ... + // } + if (env_ptr->get_method_name() == "add_feature") { + if (expr_info_ptr->callee_name() == ActionMethodInfo::name()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo &method_info = feature_info->touch_method_info(env_ptr->get_method_name()); + + size_t param_size = expr_info_ptr->call_expr_params_size(); + const ActionMethodInfo action_method_info(param_size); + + for (size_t i = 0; i < param_size; i++) { + auto param_info_ptr = expr_info_ptr->call_expr_param(i); + if (param_info_ptr != nullptr && param_info_ptr->is_repeated_action_info()) { + const NewActionParam& new_param = action_method_info.find_param(i); + if (new_param.origin_name().size() > 0) { + method_info.add_new_action_param(i, new_param, param_info_ptr->origin_expr_str()); + LOG(INFO) << "add_new_action_param, i: " << i + << ", new_param: " << new_param.get_bs_field_param_str(); + } else { + LOG(INFO) << "cannot find new action param for method: " << action_method_info.name(); + } + } + } + } + } + } +} + +void OverviewHandler::process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) { + if (decl_ref_expr == nullptr) { + return; + } + + if (decl_ref_expr->getDecl()) { + if (decl_ref_expr->getDecl()->isTemplateParameter()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->add_template_var_name(stmt_to_string(decl_ref_expr)); + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/OverviewHandler.h b/convert/handler/OverviewHandler.h new file mode 100644 index 0000000..fe618db --- /dev/null +++ b/convert/handler/OverviewHandler.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +#include "clang/AST/AST.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +// 获取一些摘要信息,在改写之前 +class OverviewHandler { + public: + OverviewHandler() = default; + + template + void process(T t, Env* env_ptr) {} + + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr); + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr); + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr); + void process(clang::BinaryOperator* binary_operator, Env* env_ptr); + void process(clang::CallExpr* call_expr, Env* env_ptr); + void process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/README.md b/convert/handler/README.md new file mode 100644 index 0000000..3bdb2b3 --- /dev/null +++ b/convert/handler/README.md @@ -0,0 +1,3 @@ +# Handler + +处理各种转换逻辑 diff --git a/convert/handler/RewriteAction.h b/convert/handler/RewriteAction.h new file mode 100644 index 0000000..7986c8a --- /dev/null +++ b/convert/handler/RewriteAction.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Stmt.h" +#include "clang/AST/Expr.h" +#include "clang/Rewrite/Core/Rewriter.h" + +#include "../Tool.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class RewriteAction { + public: + explicit RewriteAction(clang::Stmt* stmt, const std::string& replace_text, const std::string& origin_text): + stmt_(stmt), replace_text_(replace_text), origin_text_(origin_text) {} + + clang::Stmt* stmt() const { return stmt_; } + const std::string& replace_text() const { return replace_text_; } + const std::string& origin_text() const { return origin_text_; } + + private: + clang::Stmt* stmt_; + std::string replace_text_; + std::string origin_text_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/StrictRewriter.cpp b/convert/handler/StrictRewriter.cpp new file mode 100644 index 0000000..37cb8e2 --- /dev/null +++ b/convert/handler/StrictRewriter.cpp @@ -0,0 +1,94 @@ +#include "../ExprInfo.h" +#include "../Tool.h" +#include "StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +bool StrictRewriter::ReplaceText(clang::Stmt* stmt, const std::string& s) { + if (is_visited(visited_, stmt)) { + return false; + } + + visited_.insert(pointer_to_str(stmt)); + return rewriter_.ReplaceText(find_source_range(stmt), s); +} + +bool StrictRewriter::ReplaceText(clang::SourceRange range, const std::string& s) { + return rewriter_.ReplaceText(range, s); +} + +bool StrictRewriter::InsertTextBefore(clang::Stmt* stmt, const std::string& s) { + if (is_visited(visited_insert_before_, stmt)) { + return false; + } + + visited_insert_before_.insert(pointer_to_str(stmt)); + std::string origin_text = rewriter_.getRewrittenText(find_source_range(stmt)); + return rewriter_.ReplaceText(find_source_range(stmt), s + origin_text); +} + +bool StrictRewriter::InsertTextBefore(clang::SourceLocation loc, const std::string& s) { + return rewriter_.InsertTextBefore(loc, s); +} + +bool StrictRewriter::InsertTextAfter(clang::SourceLocation loc, const std::string& s) { + return rewriter_.InsertTextAfter(loc, s); +} + +bool StrictRewriter::InsertTextAfterToken(clang::SourceLocation loc, const std::string& s) { + return rewriter_.InsertTextAfterToken(loc, s); +} + +bool StrictRewriter::RemoveText(clang::Stmt* stmt) { + if (is_visited(visited_delete_, stmt)) { + return false; + } + + visited_delete_.insert(pointer_to_str(stmt)); + return rewriter_.RemoveText(find_source_range(stmt)); +} + +bool StrictRewriter::RemoveText(clang::SourceRange range) { + return rewriter_.RemoveText(range); +} + +std::string StrictRewriter::getRewrittenText(clang::Stmt* stmt) const { + return rewriter_.getRewrittenText(find_source_range(stmt)); +} + +std::string StrictRewriter::getRewrittenText(clang::Expr* expr) const { + return rewriter_.getRewrittenText(find_source_range(expr)); +} + +std::string StrictRewriter::getRewrittenText(clang::SourceRange range) const { + return rewriter_.getRewrittenText(range); +} + +std::string StrictRewriter::getRewrittenText(ExprInfo* expr_info_ptr) const { + if (expr_info_ptr == nullptr) { + return ""; + } + + if (expr_info_ptr->is_decl_ref_expr()) { + return getRewrittenText(expr_info_ptr->origin_expr()); + } else { + return getRewrittenText(expr_info_ptr->expr()); + } +} + +void StrictRewriter::run_lazy_replace() { + for (size_t i = 0; i < lazy_replaces_.size(); i++) { + StmtReplacement res = lazy_replaces_[i].run(); + ReplaceText(res.first, res.second); + } +} + +bool StrictRewriter::is_replace_visited(clang::Stmt* stmt) { + return is_visited(visited_, stmt); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/handler/StrictRewriter.h b/convert/handler/StrictRewriter.h new file mode 100644 index 0000000..2b0ad60 --- /dev/null +++ b/convert/handler/StrictRewriter.h @@ -0,0 +1,77 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Stmt.h" +#include "clang/AST/Expr.h" +#include "clang/Rewrite/Core/Rewriter.h" + +#include "../Tool.h" +#include "LazyReplace.h" +#include "RewriteAction.h" + +class ExprInfo; + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 不能被替换两次,需要严格记录被替换的表达式。 +class StrictRewriter { + public: + explicit StrictRewriter(clang::Rewriter &rewriter): rewriter_(rewriter) {} // NOLINT + explicit StrictRewriter(clang::Rewriter &rewriter, const std::string &rule_name) // NOLINT + : rewriter_(rewriter), rule_name_(rule_name) {} + + /// 为了和 clang::Rewriter 接口保持一致,用驼峰命名 + bool ReplaceText(clang::Stmt *stmt, const std::string &s); + bool ReplaceText(clang::SourceRange range, const std::string &s); + bool InsertTextBefore(clang::Stmt *stmt, const std::string &s); + bool InsertTextBefore(clang::SourceLocation loc, const std::string &s); + bool InsertTextAfter(clang::SourceLocation loc, const std::string &s); + bool InsertTextAfterToken(clang::SourceLocation loc, const std::string &s); + bool RemoveText(clang::Stmt *stmt); + bool RemoveText(clang::SourceRange range); + std::string getRewrittenText(clang::Stmt *stmt) const; + std::string getRewrittenText(clang::Expr *expr) const; + std::string getRewrittenText(clang::SourceRange range) const; + std::string getRewrittenText(ExprInfo* expr_info_ptr) const; + + /// 例: thread_local static std::vector<::auto_cpp_rewriter::CommonInfoAttr> + /// action_list(user_attr_map_size); 有些声明的替换需要在 common info enum + /// 确定后才能知道具体的类型是 int 还是 float, 因此先将替换逻辑保存起来。 等 + /// common info enum 确定后再执行。 + template void emplace_lazy_replace(Args... args) { + lazy_replaces_.emplace_back(std::forward(args)...); + } + + void run_lazy_replace(); + bool is_replace_visited(clang::Stmt* stmt); + + private: + template + bool is_visited(const T& v, clang::Stmt* stmt) { + return v.find(pointer_to_str(stmt)) != v.end(); + } + + private: + clang::Rewriter& rewriter_; + std::string rule_name_; + std::unordered_set visited_; + std::unordered_set visited_delete_; + std::unordered_set visited_insert_before_; + std::vector lazy_replaces_; + std::vector rewrite_actions_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionDetailFixedInfo.cpp b/convert/info/ActionDetailFixedInfo.cpp new file mode 100644 index 0000000..e9c6ced --- /dev/null +++ b/convert/info/ActionDetailFixedInfo.cpp @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "ActionDetailFixedInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +ActionDetailFixedInfo::ActionDetailFixedInfo(const std::string& prefix_adlog, + const std::string& action): + prefix_adlog_(prefix_adlog), action_(action) { + prefix_ = tool::adlog_to_bs_enum_str(prefix_adlog_); +} + +std::string ActionDetailFixedInfo::get_exists_expr(Env* env_ptr, + const std::string& field_name) const { + return get_exists_functor_name(field_name) + std::string("(bs, pos)"); +} + +std::string ActionDetailFixedInfo::get_exists_field_def(Env* env_ptr, + const std::string& field_name) const { + std::ostringstream oss; + + oss << "BSHasFixedActionDetailImpl<" << user_template_param(env_ptr, field_name) << "> " + << get_exists_functor_name(field_name) << "{" + << tool::add_quote(prefix_adlog_) << ", " + << action_ << ", " + << tool::add_quote(tool::trim_exists(field_name)) + << "}"; + + return oss.str(); +} + +std::string ActionDetailFixedInfo::get_action_detail_exists_expr(Env* env_ptr) const { + return get_exists_expr(env_ptr, "list.size"); +} + +std::string ActionDetailFixedInfo::get_action_detail_exists_field_def(Env* env_ptr) const { + return get_exists_field_def(env_ptr, "list.size"); +} + +std::string ActionDetailFixedInfo::get_bs_enum_str(const std::string& field_name) const { + std::string s = prefix_ + "_key_" + action_ + "_" + field_name; + return tool::adlog_to_bs_enum_str(s); +} + +std::string ActionDetailFixedInfo::get_bs_var_name(Env* env_ptr, + const std::string& field_name) const { + std::string bs_enum_str = get_bs_enum_str(field_name); + + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + // 必定是 for_stmt 遍历 list value + const std::string& loop_var = env_ptr->get_last_loop_var(); + if (loop_var.size() > 0) { + return var->name() + "Get(" + loop_var + ")"; + } else { + LOG(INFO) << "cannot find loop var for for_stmt!"; + return ""; + } + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + return ""; + } +} + +std::string ActionDetailFixedInfo::get_bs_list_def(Env* env_ptr, + const std::string& field_name, + clang::QualType qual_type) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(field_name); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + + oss << "BSRepeatedField<" + << user_template_param(env_ptr, field_name, tool::get_builtin_type_str(qual_type)) << "> " + << new_name << " = std::move(" + << get_functor_name(field_name) << "(bs, pos))"; + + return oss.str(); +} + +std::string ActionDetailFixedInfo::get_bs_list_field_def(Env* env_ptr, + const std::string& field_name, + clang::QualType qual_type) const { + std::ostringstream oss; + + oss << "BSFixedActionDetail<" << user_template_param(env_ptr, field_name) << ">" + << get_functor_name(field_name) << "{" + << tool::add_quote(prefix_adlog_) << "," + << action_ << "," + << tool::add_quote(field_name) + << "}"; + + return oss.str(); +} + +std::string ActionDetailFixedInfo::get_functor_name(const std::string& field_name) const { + std::ostringstream oss; + + std::vector arr = absl::StrSplit(prefix_adlog_, "."); + oss << arr.back() << "." << field_name; + + return tool::dot_underscore_to_camel(oss.str()); +} + +std::string ActionDetailFixedInfo::get_exists_functor_name(const std::string& field_name) const { + std::string functor_name = get_functor_name(field_name); + static std::regex p("BSGet"); + return std::regex_replace(functor_name, p, "BSHas"); +} + +std::string ActionDetailFixedInfo::user_template_param(Env* env_ptr, + const std::string& field_name) const { + if (env_ptr->is_combine_feature() && !tool::is_item_field(get_bs_enum_str(field_name))) { + return "true"; + } + return ""; +} + +std::string ActionDetailFixedInfo::user_template_param(Env* env_ptr, + const std::string& field_name, + const std::string& type_str) const { + std::ostringstream oss; + oss << type_str; + if (env_ptr->is_combine_feature() && !tool::is_item_field(get_bs_enum_str(field_name))) { + oss << ", true"; + } + + return oss.str(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionDetailFixedInfo.h b/convert/info/ActionDetailFixedInfo.h new file mode 100644 index 0000000..4a947ae --- /dev/null +++ b/convert/info/ActionDetailFixedInfo.h @@ -0,0 +1,71 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "InfoBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class ExprInfo; + +/// action detail 的 action 通过模板参数传递, Extract 函数中不能确定。 +/// prefix 和 field_name 都是 . 隔开的形式, 如 adlog.user_info.user_real_time_action。 +/// 叶子节点一定是 BSRepeatedField。 +class ActionDetailFixedInfo : public InfoBase { + public: + ActionDetailFixedInfo() = default; + explicit ActionDetailFixedInfo(const std::string& prefix_adlog, const std::string& action); + + const std::string& prefix() const { return prefix_; } + const std::string& prefix_adlog() const { return prefix_adlog_; } + const std::string& action() const { return action_; } + + std::string get_exists_expr(Env* env_ptr, const std::string& field_name) const; + std::string get_exists_field_def(Env* env_ptr, const std::string& field_name) const; + + std::string get_action_detail_exists_expr(Env* env_ptr) const; + std::string get_action_detail_exists_field_def(Env* env_ptr) const; + + std::string get_bs_enum_str(const std::string& field_name) const; + std::string get_bs_var_name(Env* env_ptr, const std::string& field_name) const; + + /// 属性定义, action 来自模板参数 + /// std::string prefix = "adlog.user_info.user_real_time_action.real_time_dsp_action_detail"; + /// BSFixedActionDetail> BSGetRealTimeDspActionDetailPhotoId{prefix, action, "photo_id"}; + std::string get_bs_list_def(Env* env_ptr, + const std::string& field_name, + clang::QualType qual_type) const; + + std::string get_bs_list_field_def(Env* env_ptr, + const std::string& field_name, + clang::QualType qual_type) const; + + std::string get_functor_name(const std::string& field_name) const; + std::string get_exists_functor_name(const std::string& field_name) const; + + std::string user_template_param(Env* env_ptr, + const std::string& field_name) const; + std::string user_template_param(Env* env_ptr, + const std::string& field_name, + const std::string& type_str) const; + + private: + /// prefix_ 是 adlog.user_info.user_real_time_action 这种形式。 + std::string prefix_; + + std::string prefix_adlog_; + std::string action_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionDetailInfo.cpp b/convert/info/ActionDetailInfo.cpp new file mode 100644 index 0000000..2cde199 --- /dev/null +++ b/convert/info/ActionDetailInfo.cpp @@ -0,0 +1,95 @@ +#include +#include +#include "ActionDetailInfo.h" +#include "../Tool.h" +#include "../Env.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +absl::optional ActionDetailInfo::first_action() const { + if (actions_.size() == 0) { + return absl::nullopt; + } + + return absl::make_optional(actions_.at(0)); +} + +ActionDetailInfo::ActionDetailInfo(const std::string& prefix_adlog, int action): + prefix_adlog_(prefix_adlog) { + prefix_ = tool::adlog_to_bs_enum_str(prefix_adlog_); + actions_.push_back(action); +} + +// 如果 iter = action_map.find(no) 存在, 则 it->second.list 一定存在, 那么在 java 转 bslog 的时候 +// 一定会添加上 it->second.list.size(), 即 xxx_action_map_list_size 一定存在, 即使其值为 0. +// 因此用 iter->second.list.size() 是否存在来代表 iter != action_map.end() +std::string ActionDetailInfo::get_exists_expr(Env* env_ptr) const { + absl::optional bs_enum_str = get_bs_list_size_enum_str(); + if (!bs_enum_str) { + LOG(INFO) << "cannot get action detail exists_enum_str, prefix: " << prefix_; + return ""; + } + + LOG(INFO) << "bs_enum_str: " << *bs_enum_str; + if (const absl::optional& exists_var = env_ptr->find_new_def(*bs_enum_str)) { + if (exists_var->exists_name().size() > 0) { + LOG(INFO) << "bs_enum_str: " << *bs_enum_str << ", find exists_name: " << exists_var->exists_name(); + return exists_var->exists_name(); + } + } + + return ""; +} + +absl::optional ActionDetailInfo::get_bs_enum_str() const { + if (absl::optional action = first_action()) { + return absl::optional(prefix_ + "_key_" + std::to_string(*action)); + } else { + return absl::nullopt; + } +} + +absl::optional ActionDetailInfo::get_bs_list_size_enum_str() const { + absl::optional bs_enum_str = get_bs_enum_str(); + if (!bs_enum_str) { + LOG(INFO) << "cannot get bs_enum_str for action detail, prefix: " << prefix_; + return absl::nullopt; + } + + return absl::optional(*bs_enum_str + "_list_size"); +} + +std::string ActionDetailInfo::get_action_detail_exists_def(Env* env_ptr) const { + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + absl::optional bs_enum_str = get_bs_list_size_enum_str(); + if (!bs_enum_str) { + LOG(INFO) << "cannot get bs_enum_str for action detail exists, prefix: " << prefix_; + return ""; + } + + std::string var_name = env_ptr->find_valid_new_name(*bs_enum_str); + std::string new_name = tool::get_exists_name(var_name); + std::string enum_new_name = tool::get_exists_name(std::string("enum_") + var_name); + + std::ostringstream oss; + oss << " auto " << enum_new_name << " = BSFieldEnum::" << *bs_enum_str << ";\n "; + oss << "bool " << new_name << " = BSFieldHelper::HasSingularis_combine_feature() && !tool::is_item_field(*bs_enum_str)) { + oss << ", true"; + } + oss << ">" + << "(*bs, " << enum_new_name << ", pos)"; + + return oss.str(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionDetailInfo.h b/convert/info/ActionDetailInfo.h new file mode 100644 index 0000000..8767705 --- /dev/null +++ b/convert/info/ActionDetailInfo.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "InfoBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// action detail 分为以下几种情况: +/// 1. 单个 action_detail +/// 保存行为信息的 map, 通常以行为对应的枚举为 key, value 为一个 proto Message, 里面有变量 list, list 中保存了 +/// 行为。因此最终叶子节点的行为是展开的 list。 +/// +/// 如: teams/ad/ad_algorithm/feature/fast/impl/extract_user_dense_ad_item_click_num.h +/// +/// const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); +/// auto iter = ad_action.find(no); +/// if (iter != ad_action.end()) { +/// const auto& action_base_infos = iter->second.list(); +/// int64_t imp_num = action_base_infos.size(); +/// AddFeature(0, imp_num, result); +/// } else { +/// AddFeature(0, 0, result); +/// } +/// +/// 2. 多个 action_detail +/// 多个 action_detail, 但是在构造函数中可以确定具体的 action, 而不是通过模板参数传递,与 +/// ActionDetailFixedInfo 有区别。 +/// +/// 如: teams/ad/ad_algorithm/feature/fast/impl/extract_match_dense_num.h, +/// action_vec_ 是 std::vector 类型, 构造函数中会进行初始化。 +/// +/// const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); +/// for (auto action_no : action_vec_) { +/// auto action_no_iter = ad_action.find(action_no); +/// int photo_id_action_num = 0; +/// int product_name_hash_action_num = 0; +/// int second_industy_id_hash_action_num = 0; +/// if (action_no_iter != ad_action.end()) { +/// const auto& action_no_list = action_no_iter->second.list(); +/// for (int k = 0; k < action_no_list.size() && k < 100; ++k) { +/// ... +/// } +/// } +/// +/// 处理逻辑: +/// 由于 action 在构造函数中都可以知道, 因此需要在 LoopInfo 中将这几个值保存起来, 然后在遍历 action_vec_ 时 +/// 即可确定当前类的逻辑是 ActionDetailMultiInfo, 将 action_no 在 Env 中设置为第一个值, 然后在处理 action_vec_ +/// for 循环时候先或的 body 的结果,再将 `_key_x_` 统一替换为其他的 action, 从而得到多个 action 的结果。 +/// +/// 3. action no 通过模板参数传递, 参考 ActionDetailFixedInfo +class ActionDetailInfo : public InfoBase { + public: + ActionDetailInfo() = default; + explicit ActionDetailInfo(const std::string& prefix_adlog, int action); + + const std::string& prefix() const { return prefix_; } + const std::string& prefix_adlog() const { return prefix_adlog_; } + + absl::optional first_action() const; + void add_action(int action) { actions_.push_back(action); } + + const std::vector& leaf_fields() const { return leaf_fields_; } + + std::string get_exists_expr(Env* env_ptr) const; + absl::optional get_bs_enum_str() const; + absl::optional get_bs_list_size_enum_str() const; + + std::string get_action_detail_exists_def(Env* env_ptr) const; + + private: + std::string prefix_; + std::string prefix_adlog_; + std::vector actions_; + std::vector leaf_fields_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionMethodInfo.cpp b/convert/info/ActionMethodInfo.cpp new file mode 100644 index 0000000..92a5ccc --- /dev/null +++ b/convert/info/ActionMethodInfo.cpp @@ -0,0 +1,79 @@ +#include "ActionMethodInfo.h" +#include "NewActionParam.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +const std::string ActionMethodInfo::name_ = "ks::ad_algorithm::get_value_from_Action"; + +ActionMethodInfo::ActionMethodInfo(size_t param_size): param_size_(param_size) { + new_action_params_.emplace_back("first_list"); + new_action_params_.emplace_back("second_list"); + + // 注意,必须用 uint64, 否则 cdiff 不过。 + NewActionFieldParam param0_timestamp("first_list_action_timestamp", + "action_timestamp", + "uint64_t", + false); + new_action_params_[0].add_new_param(param0_timestamp); + + NewActionFieldParam param0_product_id_hash("first_list_product_id_hash", + "product_id_hash", + "uint64_t", + false); + new_action_params_[0].add_new_param(param0_product_id_hash); + + NewActionFieldParam param0_second_industry_id_hash("first_list_second_industry_id_hash", + "second_industry_id_hash", + "uint64_t", + false); + new_action_params_[0].add_new_param(param0_second_industry_id_hash); + + if (param_size_ == 10) { + NewActionFieldParam param0_photo_id("first_list_photo_id", + "photo_id", + "uint64_t", + false); + new_action_params_[0].add_new_param(param0_photo_id); + } + + NewActionFieldParam param1_timestamp("second_list_action_timestamp", + "action_timestamp", + "uint64_t", + false); + new_action_params_[1].add_new_param(param1_timestamp); + + NewActionFieldParam param1_product_id_hash("second_list_product_id_hash", + "product_id_hash", + "uint64_t", + false); + new_action_params_[1].add_new_param(param1_product_id_hash); + + NewActionFieldParam param1_second_industry_id_hash("second_list_second_industry_id_hash", + "second_industry_id_hash", + "uint64_t", + false); + new_action_params_[1].add_new_param(param1_second_industry_id_hash); + + if (param_size_ == 10) { + NewActionFieldParam param1_photo_id("second_list_photo_id", + "photo_id", + "uint64_t", + false); + new_action_params_[1].add_new_param(param1_photo_id); + } +} + +const NewActionParam& ActionMethodInfo::find_param(size_t index) const { + if (index >= new_action_params_.size()) { + static NewActionParam empty; + return (empty); + } + + return (new_action_params_[index]); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ActionMethodInfo.h b/convert/info/ActionMethodInfo.h new file mode 100644 index 0000000..6f6f63c --- /dev/null +++ b/convert/info/ActionMethodInfo.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/AST/Type.h" + +#include "NewActionParam.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// get_value_from_Action 对应的信息,提前写好。 +/// 有两个重载版本,通过参数个数可以区分, 一个参数是 8 个,另一个是 10 个,多了两个 photo_id 相关的参数。 +/// 对应的 bs_get_value_from_action 实现见: teams/ad/ad_algorithm/bs_feature/fast/frame/bs_action_util.cc +class ActionMethodInfo { + public: + explicit ActionMethodInfo(size_t param_size); + + static const std::string& name() { return (name_); } + + size_t param_size() const { return param_size_; } + const std::vector& new_action_params() const { return (new_action_params_); } + + const NewActionParam& find_param(size_t index) const; + + private: + static const std::string name_; + size_t param_size_ = 8; + + /// AdActionInfo 对应到多个用到的字段,需要保存多个新参数。 + std::vector new_action_params_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/AdlogFieldInfo.h b/convert/info/AdlogFieldInfo.h new file mode 100644 index 0000000..5e9cff8 --- /dev/null +++ b/convert/info/AdlogFieldInfo.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include +#include +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// adlog field 类型。 +/// 1. 普通字段,如 adlog.user_info.id +/// 2. CommonInfo, 如 adlog.user_info.common_info_attr.key:239 +/// 3. MiddleNode, 如 photo_info->author_info().id() +enum class AdlogFieldType { + NONE, + NORMAL, + COMMON_INFO, + MIDDLE_NODE +}; + +/// 保存当前 adlog field 相关的信息。包括以下几种: +/// 1. 普通字段,包含 adlog_field, bs_enum_str +/// 2. CommonInfo,包含 adlog_field, bs_enum_str, enum_value, enum_str +/// 3. MiddleNode, 包含 adlog_field, bs_enum_str, root +class AdlogFieldInfo { + public: + AdlogFieldInfo() = default; + explicit AdlogFieldInfo(const std::string& adlog_field, + const std::string& bs_enum_str, + AdlogFieldType adlog_field_type): + adlog_field_(adlog_field), bs_enum_str_(bs_enum_str), adlog_field_type_(adlog_field_type) {} + + const std::string& adlog_field() const { return (adlog_field_); } + const std::string& bs_enum_str() const { return (bs_enum_str_); } + AdlogFieldType adlog_field_type() const { return adlog_field_type_; } + const absl::optional& common_info_enum_name() const { return (common_info_enum_name_); } + const absl::optional& common_info_enum_value() const { return (common_info_enum_value_); } + + void set_common_info_enum_name(const std::string& common_info_enum_name) { + common_info_enum_name_.emplace(common_info_enum_name); + } + + void set_common_info_enum_value(int common_info_enum_value) { + common_info_enum_value_.emplace(common_info_enum_value); + } + + private: + std::string adlog_field_; + std::string bs_enum_str_; + AdlogFieldType adlog_field_type_; + absl::optional common_info_enum_name_; + absl::optional common_info_enum_value_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/AssignInfo.h b/convert/info/AssignInfo.h new file mode 100644 index 0000000..4fd13b3 --- /dev/null +++ b/convert/info/AssignInfo.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 保存当前 assign op 相关的信息。 +class AssignInfo { + public: + AssignInfo() = default; + explicit AssignInfo(const std::string& name, + clang::Expr* left_expr, + clang::Expr* right_expr): + name_(name), + left_expr_(left_expr), + right_expr_(right_expr) {} + + const std::string& name() const { return name_; } + clang::Expr* left_expr() const { return left_expr_; } + clang::Expr* right_expr() const { return right_expr_; } + + private: + std::string name_; + clang::Expr* left_expr_ = nullptr; + clang::Expr* right_expr_ = nullptr; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/BSFieldInfo.cpp b/convert/info/BSFieldInfo.cpp new file mode 100644 index 0000000..25f9471 --- /dev/null +++ b/convert/info/BSFieldInfo.cpp @@ -0,0 +1,38 @@ +#include "../Tool.h" +#include "./BSFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void BSFieldInfo::insert_bs_field_enum_var_names(const std::string &bs_var_name, + const std::vector &enum_var_names, + bool is_has_value_in_params) { + map_bs_field_detail_[bs_var_name].enum_var_names = enum_var_names; + map_bs_field_detail_[bs_var_name].is_has_value_in_params = is_has_value_in_params; +} + +const std::vector & +BSFieldInfo::find_bs_field_enum_var_names(const std::string &bs_var_name) const { + auto it = map_bs_field_detail_.find(bs_var_name); + if (it != map_bs_field_detail_.end()) { + return it->second.enum_var_names; + } + + static std::vector empty; + return empty; +} + +void BSFieldInfo::insert_new_def( + const std::string &var_name, + const std::string &new_def, + NewVarType new_var_type +) { + auto& x = map_bs_field_detail_[var_name]; + x.new_def = new_def; + x.new_var_type = new_var_type; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/BSFieldInfo.h b/convert/info/BSFieldInfo.h new file mode 100644 index 0000000..1926352 --- /dev/null +++ b/convert/info/BSFieldInfo.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" +#include "./NewVarDef.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +struct BSFieldDetail { + std::vector enum_var_names; + std::string new_def; + NewVarType new_var_type; + bool is_visited = false; + bool is_has_value_in_params = false; +}; + +/// 保存当前 assign op 相关的信息。 +class BSFieldInfo { + private: + /// bs field 变量到 bs_field_enum 变量的映射。 + /// 如 bs_field_enums_["user_id"] = { "enum_user_id"}; + std::unordered_map map_bs_field_detail_; + + public: + BSFieldInfo() = default; + + const std::unordered_map& map_bs_field_detail() const { + return map_bs_field_detail_; + } + + std::unordered_map& mutable_map_bs_field_detail() { + return map_bs_field_detail_; + } + + /// 插入 bs_field_enum 与 bs_field va_name。 + void insert_bs_field_enum_var_names(const std::string &bs_var_name, + const std::vector &enum_var_names, + bool is_has_value_in_params); + + const std::vector & find_bs_field_enum_var_names(const std::string &bs_var_name) const; + + void insert_new_def(const std::string& var_name, + const std::string& new_def, + NewVarType new_var_type); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/BinaryOpInfo.cpp b/convert/info/BinaryOpInfo.cpp new file mode 100644 index 0000000..2e332b2 --- /dev/null +++ b/convert/info/BinaryOpInfo.cpp @@ -0,0 +1,18 @@ +#include "../Tool.h" +#include "BinaryOpInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::string BinaryOpInfo::left_expr_str() const { + return stmt_to_string(left_expr_); +} + +std::string BinaryOpInfo::right_expr_str() const { + return stmt_to_string(right_expr_); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/BinaryOpInfo.h b/convert/info/BinaryOpInfo.h new file mode 100644 index 0000000..fa72d49 --- /dev/null +++ b/convert/info/BinaryOpInfo.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 保存当前 binary op 相关的信息, 如 ==, !=。 +/// 用于判断 common info 是等于还是不等于。 +class BinaryOpInfo { + public: + BinaryOpInfo() = default; + explicit BinaryOpInfo(const std::string& op, + clang::Expr* left_expr, + clang::Expr* right_expr): + op_(op), + left_expr_(left_expr), + right_expr_(right_expr) {} + + const std::string& op() const { return op_; } + clang::Expr* left_expr() const { return left_expr_; } + clang::Expr* right_expr() const { return right_expr_; } + + bool is_equal_op() const { return op_.find("==") != std::string::npos; } + bool is_not_equal_op() const { return op_.find("!=") != std::string::npos; } + bool is_assign_op() const { return op_ == "=" || op_ == "operator="; } + bool is_greater_op() const { return op_ == ">"; } + bool is_less_op() const { return op_ == "<"; } + bool is_less_equal_op() const { return op_ == "<="; } + bool is_greater_equal_op() const { return op_ == ">="; } + bool is_and_op() const { return op_ == "&&"; } + bool is_or_op() const { return op_ == "||"; } + + std::string left_expr_str() const; + std::string right_expr_str() const; + + ExprType left_expr_type() const { return left_expr_type_; } + ExprType right_expr_type() const { return right_expr_type_; } + + void set_left_expr_type(ExprType expr_type) { left_expr_type_ = expr_type; } + void set_right_expr_type(ExprType expr_type) { right_expr_type_ = expr_type; } + + private: + std::string op_; + clang::Expr* left_expr_ = nullptr; + clang::Expr* right_expr_ = nullptr; + ExprType left_expr_type_ = ExprType::NONE; + ExprType right_expr_type_ = ExprType::NONE; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfo.cpp b/convert/info/CommonInfo.cpp new file mode 100644 index 0000000..2f10bfc --- /dev/null +++ b/convert/info/CommonInfo.cpp @@ -0,0 +1,289 @@ +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "../Tool.h" +#include "../Env.h" +#include "CommonInfo.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonAttrInfo::set_env_ptr(Env* env_ptr) { + env_ptr_ = env_ptr; +} + +Env* CommonAttrInfo::env_ptr() const { + return env_ptr_; +} + +Env* CommonAttrInfo::parent_env_ptr() const { + if (env_ptr_ == nullptr) { + return nullptr; + } + + return env_ptr_->parent(); +} + +const absl::optional& CommonAttrInfo::get_common_info_prepare() const { + return env_ptr_->cur_common_info_prepare(); +} + +const std::string CommonAttrInfo::bool_value = "bool_value"; +const std::string CommonAttrInfo::int_value = "int_value"; +const std::string CommonAttrInfo::float_value = "float_value"; +const std::string CommonAttrInfo::string_value = "string_value"; +const std::string CommonAttrInfo::bool_list_value = "bool_list_value"; +const std::string CommonAttrInfo::int_list_value = "int_list_value"; +const std::string CommonAttrInfo::float_list_value = "float_list_value"; +const std::string CommonAttrInfo::string_list_value = "string_list_value"; +const std::string CommonAttrInfo::map_unit64_bool_value = "map_unit64_bool_value"; +const std::string CommonAttrInfo::map_int64_int64_value = "map_int64_int64_value"; +const std::string CommonAttrInfo::map_int64_float_value = "map_int64_float_value"; +const std::string CommonAttrInfo::map_int64_string_value = "map_int64_string_value"; +const std::string CommonAttrInfo::map_string_int64_value = "map_string_int64_value"; +const std::string CommonAttrInfo::map_int64_multi_value = "map_int64_multi_value"; +const std::string CommonAttrInfo::common_info_attr_size = "common_info_attr_size"; + +const std::unordered_set CommonAttrInfo::scalar_method_names = { + bool_value, + int_value, + float_value, + string_value +}; + +const std::unordered_set CommonAttrInfo::list_method_names = { + bool_list_value, + int_list_value, + float_list_value, + string_list_value +}; + +const std::unordered_set CommonAttrInfo::map_method_names = { + map_unit64_bool_value, + map_int64_int64_value, + map_int64_float_value, + map_int64_string_value, + map_string_int64_value +}; + +const std::unordered_set CommonAttrInfo::repeated_size_method_names = { + common_info_attr_size +}; + +bool CommonAttrInfo::is_common_info_scalar_method(const std::string& method_name) { + return scalar_method_names.find(method_name) != scalar_method_names.end(); +} + +bool CommonAttrInfo::is_common_info_list_method(const std::string& method_name) { + return list_method_names.find(method_name) != list_method_names.end(); +} + +bool CommonAttrInfo::is_common_info_map_method(const std::string& method_name) { + return map_method_names.find(method_name) != map_method_names.end(); +} + +bool CommonAttrInfo::is_common_info_method(const std::string& method_name) { + return is_common_info_scalar_method(method_name) || + is_common_info_list_method(method_name) || + is_common_info_map_method(method_name); +} + +bool CommonAttrInfo::is_common_info_size_method(const std::string& method_name) { + if (ends_with(method_name, "_size")) { + std::string prefix = method_name.substr(0, method_name.size() - std::string("_size").size()); + if (is_common_info_method(prefix)) { + return true; + } + } + + return false; +} + +bool CommonAttrInfo::is_common_info_list_size_method(const std::string& method_name) { + if (ends_with(method_name, "_size")) { + std::string prefix = method_name.substr(0, method_name.size() - std::string("_size").size()); + if (is_common_info_list_method(prefix)) { + return true; + } + } + + return false; +} + +bool CommonAttrInfo::is_common_info_map_size_method(const std::string& method_name) { + if (ends_with(method_name, "_size")) { + std::string prefix = method_name.substr(0, method_name.size() - std::string("_size").size()); + if (is_common_info_map_method(prefix)) { + return true; + } + } + + return false; +} + +bool CommonAttrInfo::is_common_info_leaf_method(const std::string& method_name) { + return is_common_info_method(method_name) || is_common_info_size_method(method_name); +} + +bool CommonAttrInfo::is_repeated_common_info_size(const std::string& method_name) { + return repeated_size_method_names.find(method_name) != repeated_size_method_names.end(); +} + +absl::optional CommonAttrInfo::find_value_type(const std::string& method_name) { + if (method_name == bool_value) { + return absl::optional(CommonInfoValueType::BOOL); + } else if (method_name == int_value) { + return absl::optional(CommonInfoValueType::INT); + } else if (method_name == float_value) { + return absl::optional(CommonInfoValueType::FLOAT); + } else if (method_name == string_value) { + return absl::optional(CommonInfoValueType::STRING); + } else if (method_name == bool_list_value) { + return absl::optional(CommonInfoValueType::BOOL_LIST); + } else if (method_name == int_list_value) { + return absl::optional(CommonInfoValueType::INT_LIST); + } else if (method_name == float_list_value) { + return absl::optional(CommonInfoValueType::FLOAT_LIST); + } else if (method_name == string_list_value) { + return absl::optional(CommonInfoValueType::STRING_LIST); + } else if (method_name == map_unit64_bool_value) { + return absl::optional(CommonInfoValueType::MAP_INT_BOOL); + } else if (method_name == map_int64_int64_value) { + return absl::optional(CommonInfoValueType::MAP_INT_INT); + } else if (method_name == map_int64_float_value) { + return absl::optional(CommonInfoValueType::MAP_INT_FLOAT); + } else if (method_name == map_int64_string_value) { + return absl::optional(CommonInfoValueType::MAP_INT_STRING); + } else if (method_name == map_string_int64_value) { + return absl::optional(CommonInfoValueType::MAP_STRING_INT); + } else if (method_name == map_int64_multi_value) { + return absl::optional(CommonInfoValueType::MAP_INT_MULTI_INT); + } else { + return absl::nullopt; + } +} + +std::string CommonAttrInfo::get_inner_type_str(CommonInfoValueType value_type) { + switch (value_type) { + case CommonInfoValueType::BOOL: + case CommonInfoValueType::BOOL_LIST: + return "bool"; + case CommonInfoValueType::INT: + case CommonInfoValueType::INT_LIST: + case CommonInfoValueType::MAP_INT_INT: + return "int64_t"; + case CommonInfoValueType::FLOAT: + case CommonInfoValueType::FLOAT_LIST: + case CommonInfoValueType::MAP_INT_FLOAT: + return "float"; + case CommonInfoValueType::STRING: + case CommonInfoValueType::STRING_LIST: + case CommonInfoValueType::MAP_INT_STRING: + return "absl::string_view"; + case CommonInfoValueType::MAP_STRING_INT: + return "int64_t"; + default: + return ""; + } +} + +std::pair CommonAttrInfo::get_map_inner_type_str(CommonInfoValueType value_type) { + switch (value_type) { + case CommonInfoValueType::MAP_INT_INT: + return {"int64_t", "int64_t"}; + case CommonInfoValueType::MAP_INT_FLOAT: + return {"int64_t", "float"}; + case CommonInfoValueType::MAP_INT_STRING: + return {"int64_t", "absl::string_view"}; + case CommonInfoValueType::MAP_STRING_INT: + return {"absl::string_view", "int64_t"}; + case CommonInfoValueType::MAP_INT_MULTI_INT: + return {"int64_t", "int64_t"}; + default: + return {"", ""}; + } +} + +std::string CommonAttrInfo::get_bs_type_str(const std::string& method_name, bool is_combine_user) { + absl::optional value_type = find_value_type(method_name); + if (!value_type) { + return ""; + } + + std::ostringstream oss; + + if (is_common_info_scalar_method(method_name)) { + oss << get_inner_type_str(*value_type); + } else if (is_common_info_list_method(method_name)) { + oss << "BSRepeatedField<" << get_inner_type_str(*value_type); + if (is_combine_user) { + oss << ", true"; + } + oss << ">"; + } else { + std::pair type_str = get_map_inner_type_str(*value_type); + oss << "BSMapField<" << type_str.first << ", " << type_str.second; + if (is_combine_user) { + oss << ", true"; + } + oss << ">"; + } + + return oss.str(); +} + +bool CommonAttrInfo::is_common_info_list_or_map_loop(const std::string& s) { + for (const auto& name : list_method_names) { + if (s.find(name + "()") != std::string::npos) { + return true; + } + if (s.find(name + "_size()") != std::string::npos) { + return true; + } + } + + for (const auto &name : map_method_names) { + if (s.find(name + "()") != std::string::npos) { + return true; + } + if (s.find(name + "_size()") != std::string::npos) { + return true; + } + } + + return false; +} + +bool CommonAttrInfo::is_common_info_list_or_map_loop(const std::string &s, const std::string& loop_name) { + std::regex p("for ?\\([^\\(\\)]*" + loop_name + "(.begin\\()? ?\\)"); + std::smatch match_res; + if (std::regex_search(s, match_res, p)) { + return true; + } + + return false; +} + +bool CommonAttrInfo::contains_size_method(const std::string& s) { + for (const auto& x : list_method_names) { + if (s.find(x + "_size()") != std::string::npos) { + return true; + } + } + + for (const auto& x : map_method_names) { + if (s.find(x + "_size()") != std::string::npos) { + return true; + } + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfo.h b/convert/info/CommonInfo.h new file mode 100644 index 0000000..8ca7304 --- /dev/null +++ b/convert/info/CommonInfo.h @@ -0,0 +1,222 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "PrefixPair.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// CommonAttrInfo, 包括 common info 枚举值,以及其他复杂逻辑。 +/// common info 在第一次出现时候创建, prefix 是第一次出现 common info 时候可以确定的, 因此也是创建 +/// common info 必须的参数。其他参数都是 absl::optional, 后面遇到了再更新。 +/// +/// 分为以下几种情况。 +/// +/// 1. 普通 common info, 用 CommonInfoType::NORMAL 表示。 +/// 主要分为两种情况: 单个 common info enum 和多个 common info enum, 都可以当做多个 common info enum 处理。 +/// common info 中重要的几个信息都是按以下顺序在不同的地方出现的: +/// - prefix: 一定是第一次循环或者取变量时候出现, 如 +/// const auto & common_info_attrs = item.ad_dsp_info().live_info().common_info_attr(); +/// - common_info_value: 一定是 if 判断条件中出现的, 且 if 条件另一个变量一定是 xxx.name_value(), 判断相等 +/// 的值是 clang::DeclRefExpr* 或者 int, clang::DeclRefExpr* 表示枚举, int 表示枚举对应的 int 值,可能是 +/// 模板参数传进来的。如 +/// if (attr.name_value() == +/// auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_LATEST_LIVE_SEGMENT_INFO_LIVE_ID) { +/// ... +/// } +/// - method_name: 一定是在 if body 中调用取值时候出现的, 取值类型可以根据 method_name 判断出来, 如 +/// attr.int_value(), attr.int_list_value() +/// +/// 需要注意的是, 枚举可能只出现在模板参数里, 类的处理逻辑里并不会出现, 但是 bs 的 common info enum 必须在 +/// 解析时候就知道, 才能生成对应的 bs 代码, 参考: +/// teams/ad/ad_algorithm/feature/fast/impl/extract_live_lsp_segment_info.h +/// 这种情况需要在解析前将当前文件中所有的枚举都找出来, 然后传给 FeatureInfo, 之后再替换时候再加上这部分。 +/// 因为 common info 枚举格式都比较固定, 因此可以直接用正则提取出来。 +/// +/// +/// a. 单个 common info enum +/// auto attr = adlog.item.common_info_attr(i) +/// attr.int_value(), attr.int_list_value(), kv.first +/// +/// 单个 common info enum 的判断方式又分为两种情况, 一种是等于, 一种是不等于。不等于的情况需要在 +/// CommonInfoNormal 中进行标记, 并且最后再替换的时候只需要将整个 loop 的 body 部分都替换即可。 +/// +/// b. 多个 common info, 在 for 循环中用 if 判断枚举来区分, 并且需要在 if env 中记录当前下标。 +/// 如 teams/ad/ad_algorithm/feature/fast/impl/extract_live_lsp_segment_info.h +/// +/// const auto & common_info_attrs = item.ad_dsp_info().live_info().common_info_attr(); +/// for (const auto & attr : common_info_attrs) { +/// if (attr.name_value() == attr_name) { +/// for (int64 value : attr.int_list_value()) { +/// if (variant == 0) { +/// AddFeature(value, 1.0f, result); +/// continue; +/// } +/// if (attr_name == auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_LATEST_LIVE_SEGMENT_INFO_LIVE_ID) { +/// if (variant == 1) { +/// AddFeature(value & MASK48, 1.0f, result); +/// } else if (variant == 2) { +/// AddFeature(value >> 48, 1.0f, result); +/// } +/// continue; +/// } +/// if (attr_name == auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_LATEST_LIVE_SEGMENT_INFO_TIMESTAMP) { +/// if (variant == 1) { +/// AddFeature((adlog.Get().time() / 1000 - value) / 60, 1.0f, result); +/// } +/// continue; +/// } +/// ... +/// } +/// } +/// +/// 需要判断每个枚举, 并将对应的逻辑转换为 bs 的逻辑。 +/// +/// 枚举也可能是模板参数传进来,因此 if 条件比较的也可能是 int。int 的值在解析的时候取不到, 因此只能用变量的 +/// 方式保存。使用的时候用类似中间节点的方式。需要在 CommonInfoDetail 中进行区分。 +/// +/// 2. 多个 common info, 提前用 map 保存起来, 用 CommonInfoType::MULTI_MAP 表示。 +/// 此种情况比较复杂, 提特征的逻辑见: +/// teams/ad/ad_algorithm/feature/fast/impl/extract_combine_realtime_action_match_cnt_v2.h 。 +/// 构造函数里保存了一个 common info enum 到下标 int 的 map, 然后依次判断枚举对应的 common info 是否存在, +/// 如果存在则将对应的值保存到另一个数组对应的下标中。之后再遍历其中的 int_list value ,作为特征。 +/// 先假设都是 int_list。后面再处理其他类型。 +/// +/// 会用到多个 common info enum, 如下 +/// for (const ::auto_cpp_rewriter::CommonInfoAttr& user_attr : adlog.user_info().common_info_attr()) { +/// auto iter = user_attr_map_.find(user_attr.name_value()); +/// if (iter != user_attr_map_.end()) { +/// int index = iter->second; +/// action_list[index] = user_attr; +/// cnt++; +/// } +/// } +/// 和普通的 common info 用 if (name_value == enum) 判断不同,这个用了一个 user_attr_map_ 提前存的 key +/// 来表示要用到的 common info enum。需要改成按 user_attr_map_ 来遍历。 +/// for (auto it = user_attr_map_.begin(); it != user_attr_map_.end(); it++) { +/// BSReaptedField user_attr(*bs, it->first, pos); +/// int index = iter->second; +/// action_list[index] = user_attr; +/// cnt++; +/// } +/// +/// 当遇到 user_attr_map_.find(user_attr.name_value()), 就可以确定是 MULTI_MAP 类型, 可以确定其 attr_map 以及 +/// user_attr。 +/// +/// 4. 多个 common info, 提前用 map 保存起来,但是类型都是 int64_list, 用 CommonInfoType::MULTI_INT_LIST 表示。 +/// 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_7d.h +/// 详细逻辑见 docs/common_info.md。 +/// +/// 34 中间节点的 common info +/// 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_item_goods_id_list_size.h, 所需字段来自中间 +/// 节点的 common info, 如 live info common info。 +/// +/// 确定类型之前的信息都保存在 CommonInfoPrepare 中, 一旦确定类型, 设置 is_confirmed 为 true。 +enum class CommonInfoType { + NORMAL, + MULTI_MAP, + FIXED, + MIDDLE_NODE, + MULTI_INT_LIST +}; + +enum class CommonInfoValueType { + BOOL, + INT, + FLOAT, + STRING, + BOOL_LIST, + INT_LIST, + FLOAT_LIST, + STRING_LIST, + MAP_INT_BOOL, + MAP_INT_INT, + MAP_INT_FLOAT, + MAP_INT_STRING, + MAP_STRING_INT, + MAP_INT_MULTI_INT +}; + +class CommonAttrInfo { + public: + CommonAttrInfo() = default; + explicit CommonAttrInfo(CommonInfoType common_info_type): common_info_type_(common_info_type) {} + + void set_env_ptr(Env* env_ptr); + Env* env_ptr() const; + Env* parent_env_ptr() const; + + bool is_normal() const { return common_info_type_ == CommonInfoType::NORMAL; } + bool is_middle_node() const { return common_info_type_ == CommonInfoType::MIDDLE_NODE; } + + const absl::optional& get_common_info_prepare() const; + + /// common info method 相关 + static const std::string bool_value; + static const std::string int_value; + static const std::string float_value; + static const std::string string_value; + static const std::string bool_list_value; + static const std::string int_list_value; + static const std::string float_list_value; + static const std::string string_list_value; + static const std::string map_unit64_bool_value; + static const std::string map_int64_int64_value; + static const std::string map_int64_float_value; + static const std::string map_int64_string_value; + static const std::string map_string_int64_value; + static const std::string map_int64_multi_value; + static const std::string common_info_attr_size; + + static const std::unordered_set scalar_method_names; + static const std::unordered_set list_method_names; + static const std::unordered_set map_method_names; + static const std::unordered_set repeated_size_method_names; + + static bool is_common_info_scalar_method(const std::string& method_name); + static bool is_common_info_list_method(const std::string& method_name); + static bool is_common_info_map_method(const std::string& method_name); + static bool is_common_info_method(const std::string& method_name); + static bool is_common_info_size_method(const std::string& method_name); + static bool is_common_info_list_size_method(const std::string& method_name); + static bool is_common_info_map_size_method(const std::string& method_name); + static bool is_common_info_leaf_method(const std::string& method_name); + + static bool is_repeated_common_info_size(const std::string& method_name); + + static absl::optional find_value_type(const std::string& method_name); + + /// scalar 或者 list 取的是内部数据的类型, map 是 value 的类型。 + static std::string get_inner_type_str(CommonInfoValueType value_type); + + static std::pair get_map_inner_type_str(CommonInfoValueType value_type); + static std::string get_bs_type_str(const std::string& method_name, bool is_combine_user); + + static bool is_common_info_list_or_map_loop(const std::string& s); + static bool is_common_info_list_or_map_loop(const std::string& s, const std::string& loop_name); + + static bool contains_size_method(const std::string& s); + + protected: + Env* env_ptr_ = nullptr; + CommonInfoType common_info_type_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoBodyText.cpp b/convert/info/CommonInfoBodyText.cpp new file mode 100644 index 0000000..bacfe04 --- /dev/null +++ b/convert/info/CommonInfoBodyText.cpp @@ -0,0 +1,73 @@ +#include +#include +#include + +#include "CommonInfoBodyText.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoBodyText::set_bs_rewritten_text(const CommonInfoBodyText& body_text) { + bs_pre_text_ = body_text.get_bs_pre_text(); + + if (body_text.bs_loop_text()) { + bs_loop_text_.emplace(*body_text.bs_loop_text()); + } + + if (body_text.bs_post_text()) { + bs_post_text_.emplace(*body_text.bs_post_text()); + } +} + +void CommonInfoBodyText::set_bs_rewritten_text(const std::string &bs_pre_text, + const std::string &bs_loop_text, + const std::string &bs_post_text) { + bs_pre_text_ = bs_pre_text; + + if (bs_loop_text.size() > 0) { + bs_loop_text_.emplace(bs_loop_text); + } + + if (bs_post_text.size() > 0) { + bs_post_text_.emplace(bs_post_text); + } +} + +const std::string &CommonInfoBodyText::get_bs_loop_text() const { + if (bs_loop_text_) { + return *bs_loop_text_; + } + + static std::string empty; + return (empty); +} + +const std::string &CommonInfoBodyText::get_bs_post_text() const { + if (bs_post_text_) { + return *bs_post_text_; + } + + static std::string empty; + return (empty); +} + +std::string CommonInfoBodyText::bs_body_text() const { + std::ostringstream oss; + + oss << bs_pre_text_; + + if (bs_loop_text_) { + oss << *bs_loop_text_; + } + + if (bs_post_text_) { + oss << *bs_post_text_; + } + + return oss.str(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoBodyText.h b/convert/info/CommonInfoBodyText.h new file mode 100644 index 0000000..b3eb8e4 --- /dev/null +++ b/convert/info/CommonInfoBodyText.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class CommonInfoBodyText { + public: + void set_bs_rewritten_text(const std::string &bs_pre_text, + const std::string &bs_loop_text, + const std::string &bs_post_text); + void set_bs_rewritten_text(const CommonInfoBodyText& body_text); + + const absl::optional &bs_loop_text() const {return (bs_loop_text_);} + const absl::optional &bs_post_text() const {return (bs_post_text_);} + + const std::string &get_bs_pre_text() const { return (bs_pre_text_); } + const std::string& get_bs_loop_text() const; + const std::string& get_bs_post_text() const; + + std::string bs_body_text() const; + + protected: + std::string bs_pre_text_; + absl::optional bs_loop_text_; + absl::optional bs_post_text_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoCore.cpp b/convert/info/CommonInfoCore.cpp new file mode 100644 index 0000000..24cf1f0 --- /dev/null +++ b/convert/info/CommonInfoCore.cpp @@ -0,0 +1,51 @@ +#include +#include + +#include "absl/strings/str_split.h" +#include "../Tool.h" +#include "../Env.h" +#include "PrefixPair.h" +#include "CommonInfo.h" +#include "CommonInfoCore.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +CommonInfoCore::CommonInfoCore(const std::string& method_name) { + update_method_name(method_name); +} + +bool CommonInfoCore::is_scalar() const { + return CommonAttrInfo::is_common_info_scalar_method(method_name_); +} + +bool CommonInfoCore::is_list() const { + return CommonAttrInfo::is_common_info_list_method(method_name_); +} + +bool CommonInfoCore::is_map() const { + return CommonAttrInfo::is_common_info_map_method(method_name_); +} + +bool CommonInfoCore::is_list_size() const { + return CommonAttrInfo::is_common_info_list_size_method(method_name_); +} + +bool CommonInfoCore::is_map_size() const { + return CommonAttrInfo::is_common_info_map_size_method(method_name_); +} + +std::string CommonInfoCore::get_list_inner_type_str(CommonInfoValueType value_type) const { + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type); + if (list_loop_var_type_ && tool::is_int32_type(*list_loop_var_type_)) { + return "int32_t"; + } else { + return type_str; + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoCore.h b/convert/info/CommonInfoCore.h new file mode 100644 index 0000000..4b9ef7c --- /dev/null +++ b/convert/info/CommonInfoCore.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "PrefixPair.h" +#include "CommonInfo.h" +#include "CommonInfoPrepare.h" +#include "CommonInfoBodyText.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +class CommonInfoCore : public CommonInfoBodyText { + public: + CommonInfoCore() = default; + explicit CommonInfoCore(const std::string& method_name); + + bool is_ready() const { return is_ready_; } + + /// CommonInfoMultiMap 和 CommonInfoFixed 会用到 + const std::string& method_name() const { return method_name_; } + void set_method_name(const std::string& method_name) { method_name_ = method_name; } + + const absl::optional& size_method_name() const { return size_method_name_; } + + const CommonInfoValueType& value_type() const { return value_type_; } + void set_value_type(CommonInfoValueType value_type) { value_type_ = value_type; } + + virtual void update_method_name(const std::string& method_name) { + set_method_name(method_name); + if (absl::optional value_type = CommonAttrInfo::find_value_type(method_name)) { + set_value_type(*value_type); + } + } + + virtual void update_size_method_name(const std::string& size_method_name) { + std::string method_name = + size_method_name.substr(0, size_method_name.size() - std::string("_size").size()); + set_method_name(method_name); + if (absl::optional value_type = CommonAttrInfo::find_value_type(method_name)) { + set_value_type(*value_type); + } + size_method_name_.emplace(size_method_name); + } + + bool is_for_stmt() const { return is_for_stmt_; } + void set_is_for_stmt(bool v) { is_for_stmt_ = v; } + + bool is_scalar() const; + bool is_list() const; + bool is_map() const; + bool is_list_size() const; + bool is_map_size() const; + + /// size_method 是否是出现在 for 循环的初始化中用来遍历, 还是只被用来当做变量。 + bool is_size_method_in_loop_init() const { return is_size_method_in_loop_init_; } + void set_is_size_method_in_loop_init(bool v) { is_size_method_in_loop_init_ = v; } + + bool has_list_method_address() const { return has_list_method_address_; } + void set_has_list_method_address(bool v) { has_list_method_address_ = v; } + + const absl::optional &list_loop_var_type() const {return (list_loop_var_type_);} + void set_list_loop_var_type(const std::string &list_loop_var_type) { + list_loop_var_type_.emplace(list_loop_var_type); + } + + std::string get_list_inner_type_str(CommonInfoValueType value_type) const; + + const absl::optional& compare_list_size_value() const { return (compare_list_size_value_); } + void set_compare_list_size_vlaue(int v) { compare_list_size_value_.emplace(v); } + + const absl::optional& list_size_dividend() const { return (list_size_dividend_); } + void set_list_size_dividend(int v) { list_size_dividend_.emplace(v); } + + protected: + std::string method_name_; + + absl::optional size_method_name_; + + CommonInfoValueType value_type_; + + bool is_ready_ = false; + + /// for list method + bool is_for_stmt_ = false; + + bool is_size_method_in_loop_init_ = false; + + bool has_list_method_address_ = false; + + absl::optional list_loop_var_type_; + + absl::optional compare_list_size_value_; + + absl::optional list_size_dividend_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoDetail.cpp b/convert/info/CommonInfoDetail.cpp new file mode 100644 index 0000000..8e6bea4 --- /dev/null +++ b/convert/info/CommonInfoDetail.cpp @@ -0,0 +1,240 @@ +#include +#include + +#include "absl/strings/str_split.h" +#include "../Tool.h" +#include "../Env.h" +#include "CommonInfo.h" +#include "CommonInfoDetail.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +CommonInfoDetail::CommonInfoDetail(const std::string& prefix_adlog, + int common_info_value): + CommonInfoLeaf(prefix_adlog, common_info_value) { +} + +CommonInfoDetail::CommonInfoDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& method_name): + CommonInfoLeaf(prefix_adlog, common_info_value, method_name) { + is_ready_ = true; +} + +std::string CommonInfoDetail::get_exists_expr(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "cannot find common info method or value, prefix: " << prefix_; + return ""; + } + + std::string inner_type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (inner_type_str.size() == 0) { + LOG(INFO) << "cannot get inner_type_str, prefix: " << prefix_ + << ", value_type_: " << static_cast(value_type_); + return ""; + } + + std::ostringstream oss; + std::string bs_enum_str = get_bs_enum_str(); + + if (name_value_alias_) { + oss << *name_value_alias_ << " == " << common_info_value_ << " && "; + } + + LOG(INFO) << "method_name_ : "<< method_name_ + << ", common_info_value: " << common_info_value_ + << ", bs_enum_str: " << bs_enum_str; + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (CommonAttrInfo::is_common_info_scalar_method(method_name_)) { + if (var->exists_name().size() > 0) { + return var->exists_name(); + } else { + LOG(INFO) << "cannot find scalar exists var, bs_enum_str: " << bs_enum_str; + } + } else { + if (var->name().size() == 0) { + LOG(INFO) << "cannot find common info var_name, prefix: " << prefix_; + return ""; + } + + oss << "!" << var->name() << ".is_empty()"; + } + } + + return oss.str(); +} + +std::string CommonInfoDetail::get_bs_enum_str() const { + return prefix_ + "_key_" + std::to_string(common_info_value_); +} + +std::string CommonInfoDetail::get_bs_scalar_def(Env* env_ptr) const { + if (!is_ready_) { + LOG(INFO) << "common info is not ready!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string enum_new_name = std::string("enum_") + new_name; + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + std::ostringstream oss; + + oss << " auto " << enum_new_name << " = BSFieldEnum::" << bs_enum_str << ";\n "; + oss << type_str << " " << new_name << " = BSFieldHelper::"; + oss << "GetSingular<" << type_str; + + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << ">" + << "(*bs, " << enum_new_name; + + if (!env_ptr->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +std::string CommonInfoDetail::get_bs_scalar_exists_def(Env* env_ptr) const { + if (!is_ready_) { + LOG(INFO) << "common info is not ready!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string enum_new_name = std::string("enum_") + new_name; + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + new_name = tool::get_exists_name(new_name); + enum_new_name = tool::get_exists_name(enum_new_name); + + std::ostringstream oss; + + oss << " auto " << enum_new_name << " = BSFieldEnum::" << bs_enum_str << ";\n "; + oss << "bool " << new_name << " = BSFieldHelper::"; + oss << "HasSingular<" << type_str; + + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << ">" + << "(*bs, " << enum_new_name; + + if (!env_ptr->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +std::string CommonInfoDetail::get_bs_list_def(Env* env_ptr) const { + if (!is_ready_) { + LOG(INFO) << "common info is not ready!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string type_str = get_list_inner_type_str(value_type_); + std::string enum_new_name = std::string("enum_") + new_name; + + std::ostringstream oss; + + oss << " auto " << enum_new_name << " = BSFieldEnum::" << bs_enum_str << ";\n "; + oss << "BSRepeatedField<" << type_str; + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << new_name << "(*bs, " << enum_new_name; + if (!env_ptr->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +std::string CommonInfoDetail::get_bs_map_def(Env* env_ptr) const { + if (!is_ready_) { + LOG(INFO) << "common info is not ready!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::pair map_type_str = CommonAttrInfo::get_map_inner_type_str(value_type_); + std::string enum_new_name_key = std::string("enum_") + new_name + "_key"; + std::string enum_new_name_value = std::string("enum_") + new_name + "_value"; + + std::ostringstream oss; + + oss << " auto " << enum_new_name_key << " = BSFieldEnum::" << bs_enum_str << "_key" << ";\n "; + oss << " auto " << enum_new_name_value << " = BSFieldEnum::" << bs_enum_str << "_value" << ";\n "; + + oss << "BSMapField<" << map_type_str.first << ", " << map_type_str.second; + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << new_name << "(*bs, " << enum_new_name_key << ", " << enum_new_name_value; + if (!env_ptr->is_user_feature()) { + oss << ", pos"; + } + oss << ")"; + + return oss.str(); +} + +std::string CommonInfoDetail::get_bs_var_name(Env* env_ptr) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (is_for_stmt()) { + // 必定是遍历 list value + const std::string& loop_var = env_ptr->get_last_loop_var(); + if (loop_var.size() > 0) { + return var->name() + "Get(" + loop_var + ")"; + } else { + LOG(INFO) << "cannot find loop var for for_stmt!"; + return ""; + } + } else { + return var->name() + ".Get(idx)"; + } + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + return ""; + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoDetail.h b/convert/info/CommonInfoDetail.h new file mode 100644 index 0000000..aa5f383 --- /dev/null +++ b/convert/info/CommonInfoDetail.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfoLeaf.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +class CommonInfoDetail: public CommonInfoLeaf { + public: + explicit CommonInfoDetail(const std::string& prefix_adlog, + int common_info_value); + explicit CommonInfoDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& method_name); + /// 返回判断条件是否存在的 bs 表达式 + /// 如果是 scalar, 则返回 xxx_exists, xxx_exists 的定义需要提前放到 env_ptr 中 + /// 如果是 list 或者 map, 则返回 !attr.is_empty() + std::string get_exists_expr(Env* env_ptr) const override; + std::string get_bs_enum_str() const override; + + std::string get_bs_scalar_def(Env* env_ptr) const override; + std::string get_bs_scalar_exists_def(Env* env_ptr) const override; + std::string get_bs_list_def(Env* env_ptr) const override; + std::string get_bs_map_def(Env* env_ptr) const override; + + std::string get_bs_var_name(Env* env_ptr) const override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoFixed.cpp b/convert/info/CommonInfoFixed.cpp new file mode 100644 index 0000000..9f2ce9a --- /dev/null +++ b/convert/info/CommonInfoFixed.cpp @@ -0,0 +1,401 @@ +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "../Tool.h" +#include "../Env.h" +#include "CommonInfoFixed.h" +#include "MiddleNodeInfo.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::string CommonInfoFixed::get_bs_wrap_text(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "missing method_name for common info fixed, prefix: " << prefix_ + << ", int_name: " << int_name_; + return ""; + } + + std::string pre_text = bs_pre_text_; + + std::string loop_text; + if (bs_loop_text_) { + loop_text = *bs_loop_text_; + } + + bool is_list = CommonAttrInfo::is_common_info_list_method(method_name_); + if (is_list && !is_for_stmt()) { + if (list_loop_var_) { + std::string var_name = get_bs_var_name(env_ptr); + // std::regex p_list_loop_var(std::string("([^a-zA-Z0-9_])") + + // *list_loop_var_ + + // std::string("([^a-zA-Z0-9_])")); + // loop_text = std::regex_replace(loop_text, + // p_list_loop_var, + // std::string("$1") + var_name + std::string("$2")); + std::string loop_var_assign = std::string("auto ") + + *list_loop_var_ + + " = " + var_name; + loop_text = loop_var_assign + ";\n" + loop_text; + } + } + + std::ostringstream oss; + oss << "if ("; + + oss << get_exists_expr(env_ptr) << ") {\n "; + + if (is_scalar()) { + oss << pre_text; + } else if (is_for_stmt()) { + oss << pre_text << "\n" << loop_text; + } else if (has_list_method_address()) { + oss << pre_text; + } else { + if (size_method_name() && !is_size_method_in_loop_init()) { + oss << pre_text; + } else { + std::string bs_enum_str = get_bs_enum_str(); + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + oss << pre_text + << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << loop_text + << "\n}\n"; + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + } + } + + if (bs_post_text_) { + oss << *bs_post_text_; + } + + oss << "}\n\n"; + + return oss.str(); +} + +std::string CommonInfoFixed::get_exists_expr(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "cannot find common info method or value, prefix: " << prefix_; + return ""; + } + + std::string inner_type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (inner_type_str.size() == 0) { + LOG(INFO) << "cannot get inner_type_str, prefix: " << prefix_; + } + + std::ostringstream oss; + std::string bs_enum_str = get_bs_enum_str(); + + if (CommonAttrInfo::is_common_info_scalar_method(method_name_)) { + oss << get_exists_functor_name() << "(bs, pos)"; + } else { + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (var->name().size() == 0) { + LOG(INFO) << "cannot find common info var_name, prefix: " << prefix_; + return ""; + } + + oss << "!" << var->name() << ".is_empty()"; + } + } + + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_enum_str() const { + return prefix_ + "_" + tool::trim_tail_underscore(int_name_); +} + +std::string CommonInfoFixed::get_bs_scalar_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + std::ostringstream oss; + oss << type_str << " " << new_name << " = " << get_functor_name() << "(bs, pos)"; + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_list_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + std::ostringstream oss; + oss << "BSRepeatedField<" << type_str; + if (!middle_node_info_) { + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + } + oss << "> " << new_name << " = std::move(" << get_functor_name() << "(bs, pos))"; + + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_map_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::pair map_type_str = CommonAttrInfo::get_map_inner_type_str(value_type_); + + std::ostringstream oss; + oss << "BSMapField<" << map_type_str.first << ", " << map_type_str.second; + if (!middle_node_info_) { + if (env_ptr->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + } + oss << "> " << new_name << " = std::move(" << get_functor_name() << "(bs, pos))"; + + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_var_name(Env* env_ptr) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (is_scalar()) { + return var->name(); + } else if (is_list()) { + if (is_for_stmt()) { + // 必定是遍历 list value + const std::string &loop_var = env_ptr->get_last_loop_var(); + if (loop_var.size() > 0) { + return var->name() + ".Get(" + loop_var + ")"; + } else { + LOG(INFO) << "cannot find loop var for for_stmt!"; + return ""; + } + } else { + return var->name() + ".Get(idx)"; + } + } else { + // 应该用 get_map_bs_var_name + return var->name() + ".GetKey(idx)"; + } + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + return ""; + } +} + +std::string CommonInfoFixed::get_bs_var_def_name(Env *env_ptr) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional &var = env_ptr->find_new_def(bs_enum_str)) { + return var->name(); + } + + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " + << bs_enum_str; + return ""; +} + +std::string CommonInfoFixed::get_map_bs_var_name(Env* env_ptr, const std::string& member) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional &var = env_ptr->find_new_def(bs_enum_str)) { + if (is_map()) { + if (member == "first") { + return var->name() + ".GetKey(idx)"; + } else { + return var->name() + ".GetValue(idx)"; + } + } + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + return ""; + } + + return ""; +} + +std::string CommonInfoFixed::get_functor_name() const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + std::vector arr = absl::StrSplit(bs_enum_str, "_"); + oss << "BSGet"; + + if (middle_node_info_) { + oss << middle_node_info_->name(); + } + + for (const std::string& s: arr) { + if (starts_with(s, "adlog") || s == "exists" || s.size() == 0) { + continue; + } + + oss << char(toupper(s[0])) << s.substr(1); + } + + return oss.str(); +} + +std::string CommonInfoFixed::get_exists_functor_name() const { + std::string functor_name = get_functor_name(); + static std::regex p("BSGet"); + return std::regex_replace(functor_name, p, "BSHas"); +} + +// BSFixedCommonInfo BSGetItemAdDspInfoCommonInfoAttr(no); +std::string CommonInfoFixed::get_bs_scalar_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (middle_node_info_) { + oss << "BS" << middle_node_info_->name() << "<" << type_str; + } else { + oss <<"BSFixedCommonInfo<" << type_str; + } + + if (!middle_node_info_) { + if (env_ptr->is_combine_feature() && !tool::is_item_field(get_bs_enum_str())) { + oss << ", true"; + } + } + + if (middle_node_info_) { + oss << "> " << get_functor_name() << "{" << int_name_ << "}"; + } else { + oss << "> " << get_functor_name() << "{" << tool::add_quote(prefix_adlog_) << ", " << int_name_ << "}"; + } + + return oss.str(); +} + +// BSHasFixedCommonInfoImpl BSHasItemAdDspInfoCommonInfoAttr(no); +// 中间节点的 CommonInfo 单值比较特殊,需要带上类型 +// BSHasLiveInfoCommonInfoImpl BSHasLiveInfoCommonInfoAttrNo{no}; +std::string CommonInfoFixed::get_bs_scalar_exists_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (middle_node_info_) { + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + oss << "BSHas" << middle_node_info_->name() << common_info_camel_ << "Impl<" << type_str; + } else { + oss <<"BSHasFixedCommonInfoImpl<"; + } + + if (!middle_node_info_) { + if (tool::is_adlog_user_field(prefix_)) { + oss << "true"; + } else { + oss << "false"; + } + } + + oss << "> " << get_exists_functor_name(); + + if (middle_node_info_) { + oss << "{" << int_name_ << "}"; + } else { + oss << "{" << tool::add_quote(prefix_adlog_) << ", " << int_name_ << "}"; + } + + return oss.str(); +} + +// BSFixedCommonInfo> BSGetItemAdDspInfoCommonInfoAttr(no); +std::string CommonInfoFixed::get_bs_list_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (middle_node_info_) { + oss << "BS" << middle_node_info_->name() << "<"; + } else { + oss << "BSFixedCommonInfo<"; + } + + oss <<"BSRepeatedField<" << type_str << ">"; + if (!middle_node_info_) { + if (env_ptr->is_combine_feature() && !tool::is_item_field(get_bs_enum_str())) { + oss << ", true"; + } + } + + oss << "> " << get_functor_name(); + if (middle_node_info_) { + oss << "{" << int_name_ << "}"; + } else { + oss << "{" << tool::add_quote(prefix_adlog_) << ", " << int_name_ << "}"; + } + + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_map_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::pair map_type_str = CommonAttrInfo::get_map_inner_type_str(value_type_); + if (middle_node_info_) { + oss << "BS" << middle_node_info_->name() << "<"; + } else { + oss << "BSFixedCommonInfo<"; + } + + oss <<"BSMapField<" << map_type_str.first << "," << map_type_str.second << ">"; + if (!middle_node_info_) { + if (env_ptr->is_combine_feature() && !tool::is_item_field(get_bs_enum_str())) { + oss << ", true"; + } + } + + oss << "> " << get_functor_name(); + if (middle_node_info_) { + oss << "{" << int_name_ << "}"; + } else { + oss << "{" << tool::add_quote(prefix_adlog_) << ", " << int_name_ << "}"; + } + + return oss.str(); +} + +std::string CommonInfoFixed::get_bs_scalar_field_value_expr(Env* env_ptr) const { + std::ostringstream oss; + oss << get_functor_name() << "(bs, pos)"; + return oss.str(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoFixed.h b/convert/info/CommonInfoFixed.h new file mode 100644 index 0000000..7aa4d48 --- /dev/null +++ b/convert/info/CommonInfoFixed.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfo.h" +#include "MiddleNodeInfo.h" +#include "CommonInfoPrepare.h" +#include "PrefixPair.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// common info value 是模板参数, 用 BSFixedCommonInfo +class CommonInfoFixed : public CommonInfoCore, public PrefixPair { + public: + explicit CommonInfoFixed(const std::string& prefix_adlog, + const std::string& name): + PrefixPair(prefix_adlog), + int_name_(name) {} + + explicit CommonInfoFixed(const std::string& prefix_adlog, + const std::string& name, + const absl::optional& middle_node_info): + PrefixPair(prefix_adlog), + int_name_(name), + middle_node_info_(middle_node_info) {} + + const std::string& int_name() const { return int_name_; } + void set_int_name(const std::string& int_name) { int_name_ = int_name; } + + void set_list_loop_var(const std::string& list_loop_var) { list_loop_var_ = list_loop_var; } + const absl::optional& list_loop_var() const { return list_loop_var_; } + + std::string get_bs_wrap_text(Env* env_ptr) const; + + std::string get_exists_expr(Env* env_ptr) const; + std::string get_bs_enum_str() const; + + std::string get_bs_scalar_def(Env* env_ptr) const; + std::string get_bs_list_def(Env* env_ptr) const; + std::string get_bs_map_def(Env* env_ptr) const; + + std::string get_bs_var_name(Env* env_ptr) const; + std::string get_bs_var_def_name(Env *env_ptr) const; + std::string get_map_bs_var_name(Env* env_ptr, const std::string& member) const; + + /// 属性定义, no 来自模板参数 + /// BSFixedCommonInfo BSGetItemAdDspInfoCommonInfoAttr(no); + /// get_functor_name 返回 BSGetItemAdDspInfoCommonInfoAttr。 + /// get_exists_functor_name 返回 BSHasItemAdDspInfoCommonInfoAttr。 + std::string get_functor_name() const; + + std::string get_exists_functor_name() const; + + std::string get_bs_scalar_field_def(Env* env_ptr) const; + std::string get_bs_scalar_exists_field_def(Env* env_ptr) const; + std::string get_bs_list_field_def(Env* env_ptr) const; + std::string get_bs_map_field_def(Env* env_ptr) const; + + std::string get_bs_scalar_field_value_expr(Env* env_ptr) const; + + private: + /// 模板参数变量名 + std::string int_name_; + + absl::optional list_loop_var_; + + absl::optional middle_node_info_; + + const std::string common_info_camel_ = "CommonInfo"; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoFixedList.cpp b/convert/info/CommonInfoFixedList.cpp new file mode 100644 index 0000000..14095e8 --- /dev/null +++ b/convert/info/CommonInfoFixedList.cpp @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Env.h" +#include "Type.h" +#include "../handler/StrictRewriter.h" +#include "CommonInfoFixed.h" +#include "CommonInfoFixedList.h" +#include "CommonInfoMiddleNode.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoFixedList::add_int_name(const std::string& int_name) { + if (is_already_exists(int_name)) { + LOG(INFO) << "int_name already exists, skip, int_name: " << int_name; + return; + } + + LOG(INFO) << "add_int_name: " << int_name << ", prefix_adlog_: " << prefix_adlog_; + if (middle_node_info_) { + common_info_details_.emplace_back(new CommonInfoFixed(prefix_adlog_, int_name, middle_node_info_)); + } else { + common_info_details_.emplace_back(new CommonInfoFixed(prefix_adlog_, int_name)); + } + + if (list_loop_var_) { + common_info_details_.back()->set_list_loop_var(*list_loop_var_); + } +} + +CommonInfoFixed* CommonInfoFixedList::mutable_common_info_detail_by_int_name(const std::string& int_name) { + for (size_t i = 0; i < common_info_details_.size(); i++) { + if (common_info_details_[i] != nullptr && common_info_details_[i]->int_name() == int_name) { + return common_info_details_[i].get(); + } + } + + return nullptr; +} + +// 目前只能处理 list 类型的 common info, map 类型的还处理不了,不过目前只遇到过 list 的。 +std::string CommonInfoFixedList::get_bs_rewritten(StrictRewriter* rewriter_ptr, size_t index) const { + if (index >= common_info_details_.size()) { + LOG(INFO) << "out of range, index: " << index + << ", common_info_details_.size(): " << common_info_details_.size(); + return ""; + } + + if (common_info_details_[index] == nullptr) { + LOG(INFO) << "common info detail is nullptr, index: " << index + << ", prefix: " << prefix_; + return ""; + } + + const CommonInfoFixed& common_info_detail = *(common_info_details_[index]); + + std::ostringstream oss_body; + + if (const absl::optional& common_info_prepare = get_common_info_prepare()) { + const auto& other_if_stmts = common_info_prepare->other_if_stmt_strs(); + for (size_t i = 0; i < other_if_stmts.size(); i++) { + oss_body << other_if_stmts[i] << "\n"; + } + } + + oss_body << "\n" << common_info_detail.get_bs_pre_text() << "\n"; + std::string pre_text = oss_body.str(); + + std::string loop_text; + if (common_info_detail.bs_loop_text()) { + loop_text = *(common_info_detail.bs_loop_text()); + } + + if (common_info_detail.is_list()) { + if (common_info_detail.list_loop_var()) { + std::string var_name = common_info_detail.get_bs_var_name(env_ptr_); + // std::regex p_list_loop_var(std::string("([^a-zA-Z0-9_])") + + // *(common_info_detail.list_loop_var()) + + // std::string("([^a-zA-Z0-9_])")); + // loop_text = std::regex_replace(loop_text, p_list_loop_var, std::string("$1") + var_name + std::string("$2")); + std::string loop_var_assign = std::string("auto ") + + *(common_info_detail.list_loop_var()) + + " = " + var_name; + loop_text = loop_var_assign + ";\n" + loop_text; + } + } + + + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + const absl::optional& var = env_ptr_->find_new_def(bs_enum_str); + if (!var) { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + + std::ostringstream oss; + oss << "if (" << common_info_detail.get_exists_expr(env_ptr_) << ") {\n "; + + if (common_info_detail.is_list() && var) { + if (const auto& compare_list_size_value = common_info_detail.compare_list_size_value()) { + oss << "if (" << var->name() << ".size()"; + if (const auto& list_size_dividend = common_info_detail.list_size_dividend()) { + oss << " % " << *list_size_dividend; + } + oss << " == " << *compare_list_size_value << ") {\n "; + } + } + + if (common_info_detail.is_scalar()) { + oss << pre_text; + } else if (common_info_detail.is_for_stmt()) { + oss << pre_text << "\n" << loop_text << "\n"; + } else if (common_info_detail.has_list_method_address()) { + oss << pre_text; + } else if (common_info_detail.is_list() || common_info_detail.is_map()) { + if (common_info_detail.size_method_name() && !common_info_detail.is_size_method_in_loop_init()) { + oss << pre_text; + } else { + if (var) { + oss << pre_text + << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << loop_text + << "\n}\n"; + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + } + } + + auto &post_text = common_info_detail.bs_post_text(); + if (post_text) { + oss << *post_text; + } + + oss << "}\n\n"; + + if (common_info_detail.is_list() && var) { + if (common_info_detail.compare_list_size_value()) { + oss << "}\n\n"; + } + } + + return oss.str(); +} + +std::string CommonInfoFixedList::get_bs_wrap_text(const std::string& text) const { + if (common_info_details_.size() == 0) { + LOG(INFO) << "out of range, index: " << 0 + << ", common_info_details_.size(): " << common_info_details_.size(); + return ""; + } + + if (common_info_details_[0] == nullptr) { + LOG(INFO) << "common info detail is nullptr, prefix: " << prefix_; + return ""; + } + + const CommonInfoFixed& common_info_detail = *(common_info_details_[0]); + + std::string s = text; + + if (common_info_detail.is_list()) { + if (common_info_detail.list_loop_var()) { + std::string var_name = common_info_detail.get_bs_var_name(env_ptr_); + // std::regex p_list_loop_var(std::string("([^a-zA-Z0-9_])") + + // *list_loop_var_ + + // std::string("([^a-zA-Z0-9_])")); + // s = std::regex_replace(s, p_list_loop_var, std::string("$1") + var_name + std::string("$2")); + std::string loop_var_assign = std::string("auto ") + + *(common_info_detail.list_loop_var()) + + " = " + var_name; + s = loop_var_assign + ";\n" + s; + } + } + + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + const absl::optional& var = env_ptr_->find_new_def(bs_enum_str); + if (!var) { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + + std::ostringstream oss; + oss << "if ("; + oss << common_info_detail.get_exists_expr(env_ptr_) << ") {\n "; + + if (common_info_detail.is_list() && var) { + if (const auto& compare_list_size_value = common_info_detail.compare_list_size_value()) { + oss << "if (" << var->name() << ".size()"; + if (const auto& list_size_dividend = common_info_detail.list_size_dividend()) { + oss << " % " << *list_size_dividend; + } + oss << " == " << *compare_list_size_value << ") {\n "; + } + } + + if (common_info_detail.is_scalar() || + common_info_detail.is_list_size() || + common_info_detail.is_map_size()) { + oss << s; + } else if (common_info_detail.is_for_stmt()) { + oss << s; + } else if (common_info_detail.has_list_method_address()) { + oss << s; + } else if (common_info_detail.is_list() || common_info_detail.is_map()) { + if (common_info_detail.size_method_name() && !common_info_detail.is_size_method_in_loop_init()) { + oss << s; + } else { + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + if (const absl::optional& var = env_ptr_->find_new_def(bs_enum_str)) { + oss << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << s + << "\n}\n"; + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + } + } + + oss << "}\n\n"; + + if (common_info_detail.is_list() && var) { + if (common_info_detail.compare_list_size_value()) { + oss << "}\n\n"; + } + } + + return oss.str(); +} + +bool CommonInfoFixedList::is_already_exists(const std::string& int_name) { + for (size_t i = 0; i < common_info_details_.size(); i++) { + if (common_info_details_[i]->int_name() == int_name) { + return true; + } + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoFixedList.h b/convert/info/CommonInfoFixedList.h new file mode 100644 index 0000000..204e5ab --- /dev/null +++ b/convert/info/CommonInfoFixedList.h @@ -0,0 +1,111 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfo.h" +#include "PrefixPair.h" +#include "MiddleNodeInfo.h" +#include "CommonInfoFixed.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// 多个 common info value 是模板参数, 用 BSFixedCommonInfo +/// 与 CommonInfoNormal 逻辑类似,重复代码比较多,后面有时间再重构下。 +class CommonInfoFixedList : public CommonAttrInfo, public PrefixPair { + public: + explicit CommonInfoFixedList(const std::string& prefix_adlog): + CommonAttrInfo(CommonInfoType::FIXED), + PrefixPair(prefix_adlog) {} + + explicit CommonInfoFixedList(const std::string& prefix_adlog, + const absl::optional& middle_node_info): + CommonAttrInfo(CommonInfoType::FIXED), + PrefixPair(prefix_adlog), + middle_node_info_(middle_node_info) {} + + /// 注意,添加标准是遇到 attr.name_value() 判断时候添加,因此必须去重。 + /// 由于在 BinaryOperator 中,因此 attr.name_value() 可能会被访问多次。 + void add_int_name(const std::string& int_name); + + void set_list_loop_var(const std::string& list_loop_var) { list_loop_var_ = list_loop_var; } + const absl::optional& list_loop_var() const { return list_loop_var_; } + + const std::vector> & common_info_details() const { + return (common_info_details_); + } + std::vector> &mutable_common_info_details() { + return (common_info_details_); + } + + const CommonInfoFixed* get_common_info_detail(size_t index) const { + if (index >= common_info_details_.size()) { + return nullptr; + } + + return common_info_details_[index].get(); + } + + CommonInfoFixed* mutable_common_info_detail(size_t index) { + if (index >= common_info_details_.size()) { + return nullptr; + } + + return common_info_details_[index].get(); + } + + CommonInfoFixed* mutable_common_info_detail_by_int_name(const std::string& int_name); + + size_t common_info_details_size() const { + return common_info_details_.size(); + } + + const CommonInfoFixed* last_common_info_detail() const { + if (common_info_details_.size() == 0) { + return nullptr; + } + return common_info_details_.back().get(); + } + CommonInfoFixed* last_mutable_common_info_detail() { + if (common_info_details_.size() == 0) { + return nullptr; + } + return common_info_details_.back().get(); + } + + /// 列表遍历普通 for 循环的分 cxx_for_range_stmt 和 for_stmt 两种情况。 + /// cxx_for_range_stmt 可以直接替换, for_stmt + /// 可能还有其他条件,因此必须基于原来的 for 循环替换。 + std::string get_bs_rewritten(StrictRewriter *rewriter_ptr, size_t index) const; + std::string get_bs_wrap_text(const std::string &text) const; + + bool is_already_exists(const std::string& int_name); + + void add_other_if_stmt(clang::IfStmt* other_if_stmt) { other_if_stmts_.push_back(other_if_stmt); } + + private: + /// 模板参数变量名 + std::vector> common_info_details_; + absl::optional list_loop_var_; + absl::optional middle_node_info_; + std::vector other_if_stmts_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoLeaf.cpp b/convert/info/CommonInfoLeaf.cpp new file mode 100644 index 0000000..9f302c2 --- /dev/null +++ b/convert/info/CommonInfoLeaf.cpp @@ -0,0 +1,317 @@ +#include +#include + +#include "absl/strings/str_split.h" + +#include "../Tool.h" +#include "../Env.h" +#include "CommonInfoLeaf.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoLeaf::update_method_name(const std::string& method_name) { + if (!is_ready_) { + CommonInfoCore::update_method_name(method_name); + is_ready_ = true; + } +} + +void CommonInfoLeaf::update_size_method_name(const std::string& size_method_name) { + if (!is_ready_) { + CommonInfoCore::update_size_method_name(size_method_name); + is_ready_ = true; + } +} + +std::string CommonInfoLeaf::get_exists_expr(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "cannot find common info method, prefix: " << prefix_ + << ", address: " << this; + return ""; + } + + std::string inner_type_str = CommonAttrInfo::get_inner_type_str(value_type_); + if (inner_type_str.size() == 0) { + LOG(INFO) << "cannot get inner_type_str, prefix: " << prefix_; + } + + std::ostringstream oss; + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (is_scalar()) { + oss << var->exists_name(); + } else { + if (var->name().size() == 0) { + LOG(INFO) << "cannot find common info var_name, prefix: " << prefix_; + return ""; + } + + oss << "!" << var->name() << ".is_empty()"; + } + } + + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_scalar_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + std::ostringstream oss; + oss << type_str << " " << new_name << " = " << get_functor_name() << "(bs, pos)"; + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_scalar_exists_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = tool::get_exists_name(env_ptr->find_valid_new_name(bs_enum_str)); + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + + std::ostringstream oss; + oss << "bool " << new_name << " = " << get_exists_functor_name() << "(bs, pos)"; + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_list_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!" << ", address: " << this; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::string type_str = get_list_inner_type_str(value_type_); + LOG(INFO) << "get_bs_list_def, type_str: " << type_str; + + std::ostringstream oss; + oss << "BSRepeatedField<" << type_str; + if (env_ptr->is_combine_feature() && !is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << new_name << " = std::move(" << get_functor_name() << "(bs, pos))"; + + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_map_def(Env* env_ptr) const { + if (method_name_.size() == 0) { + LOG(INFO) << "common info fixed missing method_name!"; + return ""; + } + + if (env_ptr == nullptr) { + LOG(INFO) << "env_ptr is nullptr!"; + return ""; + } + + std::string bs_enum_str = get_bs_enum_str(); + std::string new_name = env_ptr->find_valid_new_name(bs_enum_str); + std::pair map_type_str = CommonAttrInfo::get_map_inner_type_str(value_type_); + + std::ostringstream oss; + oss << "BSMapField<" << map_type_str.first << ", " << map_type_str.second; + if (env_ptr->is_combine_feature() && !is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << new_name << " = std::move(" << get_functor_name() << "(bs, pos))"; + + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_var_name(Env* env_ptr) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (is_list()) { + if (is_for_stmt()) { + // 必定是遍历 list value + const std::string &loop_var = env_ptr->get_last_loop_var(); + if (loop_var.size() > 0) { + return var->name() + ".Get(" + loop_var + ")"; + } else { + LOG(INFO) << "cannot find loop var for for_stmt!"; + return ""; + } + } else { + return var->name() + ".Get(idx)"; + } + } else if (is_scalar()) { + return var->name(); + } else { + // map 直接根据 member_expr 替换,不会调用到这个函数。 + return var->name(); + } + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + return ""; + } +} + +std::string CommonInfoLeaf::get_bs_var_def_name(Env* env_ptr) const { + std::string bs_enum_str = get_bs_enum_str(); + + if (const absl::optional &var = env_ptr->find_new_def(bs_enum_str)) { + return var->name(); + } + + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " + << bs_enum_str; + return ""; +} + +std::string CommonInfoLeaf::bs_enum_str_to_camel(const std::string bs_enum_str) const { + std::ostringstream oss; + + std::vector arr = absl::StrSplit(bs_enum_str, "_"); + for (const std::string& s: arr) { + if (starts_with(s, "adlog") || s == "exists" || s.size() == 0) { + continue; + } + + oss << char(toupper(s[0])) << s.substr(1); + } + + return oss.str(); +} + +std::string CommonInfoLeaf::get_functor_name() const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + oss << "BSGet" << bs_enum_str_to_camel(bs_enum_str); + + return oss.str(); +} + +std::string CommonInfoLeaf::get_exists_functor_name() const { + std::string functor_name = get_functor_name(); + static std::regex p("BSGet"); + return std::regex_replace(functor_name, p, "BSHas"); +} + +// BSFixedCommonInfo BSGetItemAdDspInfoCommonInfoAttr(no); +std::string CommonInfoLeaf::get_bs_scalar_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + oss << get_functor_tmpl() << "<" << type_str; + if (env_ptr->is_combine_feature() && !is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << get_functor_name() << "{" << get_field_def_params() << "}"; + + return oss.str(); +} + +// BSHasLiveInfoImpl BSHasLiveInfoCommonInfoAttrKey2332; +std::string CommonInfoLeaf::get_bs_scalar_exists_field_def(Env* env_ptr) const { + std::ostringstream oss; + + oss << get_exists_functor_tmpl() << " "; + oss << get_exists_functor_name() + << "{" << get_field_def_params() << "}"; + + return oss.str(); +} + +// BSFixedCommonInfo> BSGetItemAdDspInfoCommonInfoAttr(no); +std::string CommonInfoLeaf::get_bs_list_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + std::string type_str = get_list_inner_type_str(value_type_); + oss << get_functor_tmpl() << ""; + if (env_ptr->is_combine_feature() && !is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << get_functor_name() << "{" << get_field_def_params() << "}"; + + return oss.str(); +} + +std::string CommonInfoLeaf::get_bs_map_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + std::pair map_type_str = CommonAttrInfo::get_map_inner_type_str(value_type_); + oss << get_functor_tmpl() << ""; + if (env_ptr->is_combine_feature() && !is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << get_functor_name() << "{" << get_field_def_params() << "}"; + + return oss.str(); +} + +void CommonInfoLeaf::copy_except_int_value(CommonInfoLeaf* common_info_leaf) { + if (common_info_leaf == nullptr) { + return; + } + + // pre_text 中可能有包含 key_xxx 的参数。需要替换。 + // 如 userAttr 会被替换为 key_xxx。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_impression_realtime_new_extend.h + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_ADLOGFULL_EVENT_NEXTDAY_STAY_INDUSTRY_ID_V3_LIST: + // helper(FeaturePrefix::COMBINE_USER_SIM_REALTIME_CONV_THIRD_INDUSTRY_NAME, userAttr, result); + // break; + bs_pre_text_ = replace_value_key(common_info_leaf->get_bs_pre_text(), + common_info_leaf->common_info_value()); + bs_loop_text_.emplace(replace_value_key(common_info_leaf->get_bs_loop_text(), + common_info_leaf->common_info_value())); + bs_post_text_.emplace(replace_value_key(common_info_leaf->get_bs_post_text(), + common_info_leaf->common_info_value())); + + list_loop_var_ = common_info_leaf->list_loop_var(); + name_value_alias_ = common_info_leaf->name_value_alias(); + common_info_type_ = common_info_leaf->common_info_type(); + + // core + method_name_ = common_info_leaf->method_name(); + size_method_name_ = common_info_leaf->size_method_name(); + value_type_ = common_info_leaf->value_type(); + is_ready_ = common_info_leaf->is_ready(); + is_for_stmt_ = common_info_leaf->is_for_stmt(); + is_size_method_in_loop_init_ = common_info_leaf->is_size_method_in_loop_init(); + has_list_method_address_ = common_info_leaf->has_list_method_address(); +} + +std::string CommonInfoLeaf::replace_value_key(const std::string& s, int common_info_value) const { + std::regex p("key_" + std::to_string(common_info_value) + "([^\\d\\w_])?"); + std::string text = std::string("key_") + std::to_string(common_info_value_) + "$1"; + return std::regex_replace(s, p, text); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoLeaf.h b/convert/info/CommonInfoLeaf.h new file mode 100644 index 0000000..3537e99 --- /dev/null +++ b/convert/info/CommonInfoLeaf.h @@ -0,0 +1,116 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "PrefixPair.h" +#include "CommonInfo.h" +#include "CommonInfoCore.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// 通过 bs_info_util 中的模板类创建的 common info。 +class CommonInfoLeaf : public CommonInfoCore, public PrefixPair { + public: + using CommonInfoCore::CommonInfoCore; + explicit CommonInfoLeaf(const std::string& prefix_adlog, + int common_info_value): + PrefixPair(prefix_adlog), + common_info_value_(common_info_value) {} + + explicit CommonInfoLeaf(const std::string& prefix_adlog, + int common_info_value, + const std::string& method_name): + CommonInfoCore(method_name), + PrefixPair(prefix_adlog), + common_info_value_(common_info_value) {} + + int common_info_value() const { return common_info_value_; } + void set_common_info_value(int v) { common_info_value_ = v; } + + CommonInfoType common_info_type() const { return common_info_type_; } + void set_common_info_type(CommonInfoType common_info_type) { common_info_type_ = common_info_type; } + + void set_common_info_enum_name(const std::string& common_info_enum_name) { + common_info_enum_name_.emplace(common_info_enum_name); + } + + const absl::optional& common_info_enum_name() const { return (common_info_enum_name_); } + + /// 必须判断是否 ready, 一旦 ready 就不允许再 update。 + /// 否则 leaf method 可能在 get_common_info_body_text 中会被再次访问,当 last detail 类型不一样会有问题。 + void update_method_name(const std::string& method_name) override; + + void update_size_method_name(const std::string& size_method_name) override; + + void set_list_loop_var(const std::string& list_loop_var) { list_loop_var_ = list_loop_var; } + + const absl::optional& list_loop_var() const { return list_loop_var_; } + + const absl::optional& name_value_alias() const { return name_value_alias_; } + + void set_name_value_alias(const std::string& name_value_alias) { + name_value_alias_.emplace(name_value_alias); + } + + virtual std::string get_exists_expr(Env* env_ptr) const; + virtual std::string get_bs_scalar_exists_def(Env* env_ptr) const; + + virtual std::string get_bs_scalar_def(Env* env_ptr) const; + virtual std::string get_bs_list_def(Env* env_ptr) const; + virtual std::string get_bs_map_def(Env* env_ptr) const; + + virtual std::string get_bs_var_name(Env* env_ptr) const; + virtual std::string get_bs_var_def_name(Env* env_ptr) const; + + std::string bs_enum_str_to_camel(const std::string bs_enum_str) const; + + /// 属性定义, no 来自模板参数 + /// BSFixedCommonInfo BSGetItemAdDspInfoCommonInfoAttr(no); + /// get_functor_name 返回 BSGetItemAdDspInfoCommonInfoAttr。 + /// get_exists_functor_name 返回 BSHasItemAdDspInfoCommonInfoAttr。 + virtual std::string get_functor_name() const; + virtual std::string get_exists_functor_name() const; + + virtual std::string get_bs_scalar_field_def(Env* env_ptr) const; + virtual std::string get_bs_scalar_exists_field_def(Env* env_ptr) const; + virtual std::string get_bs_list_field_def(Env* env_ptr) const; + virtual std::string get_bs_map_field_def(Env* env_ptr) const; + + virtual std::string get_bs_enum_str() const { return ""; } + virtual std::string get_functor_tmpl() const { return ""; } + virtual std::string get_exists_functor_tmpl() const { return ""; } + virtual std::string get_field_def_params() const { return ""; } + virtual bool is_item_field(const std::string& bs_enum_str) const { return ""; } + + std::string get_adlog_field_str() const { + return prefix_adlog_ + std::string(".key:") + std::to_string(common_info_value_); + } + + void copy_except_int_value(CommonInfoLeaf* common_info_leaf); + std::string replace_value_key(const std::string& s, int common_info_value) const; + + protected: + int common_info_value_; + + absl::optional list_loop_var_; + absl::optional name_value_alias_; + CommonInfoType common_info_type_; + absl::optional common_info_enum_name_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoMiddleNode.cpp b/convert/info/CommonInfoMiddleNode.cpp new file mode 100644 index 0000000..8b4afb9 --- /dev/null +++ b/convert/info/CommonInfoMiddleNode.cpp @@ -0,0 +1,77 @@ +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "../Tool.h" +#include "../Env.h" +#include "CommonInfo.h" +#include "CommonInfoMiddleNode.h" +#include "MiddleNodeInfo.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +CommonInfoMiddleNodeDetail::CommonInfoMiddleNodeDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& root): + CommonInfoLeaf(prefix_adlog, common_info_value), + root_(root) { +} + +CommonInfoMiddleNodeDetail::CommonInfoMiddleNodeDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& method_name, + const std::string& root): + CommonInfoLeaf(prefix_adlog, common_info_value, method_name), + root_(root) { + is_ready_ = true; +} + +std::string CommonInfoMiddleNodeDetail::get_functor_name() const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(); + oss << "BSGet" << root_ << bs_enum_str_to_camel(bs_enum_str); + + return oss.str(); +} + +std::string CommonInfoMiddleNodeDetail::get_bs_enum_str() const { + std::string s = prefix_ + "_key_" + std::to_string(common_info_value_); + return tool::adlog_to_bs_enum_str(s); +} + +std::string CommonInfoMiddleNodeDetail::get_functor_tmpl() const { + return std::string("BS") + root_; +} + +std::string CommonInfoMiddleNodeDetail::get_bs_scalar_exists_field_def(Env* env_ptr) const { + std::ostringstream oss; + + std::string type_str = CommonAttrInfo::get_inner_type_str(value_type_); + oss << get_exists_functor_tmpl() << "<" << type_str << ">" << " "; + oss << get_exists_functor_name() << "{" << get_field_def_params() << "}"; + + return oss.str(); +} + +std::string CommonInfoMiddleNodeDetail::get_exists_functor_tmpl() const { + std::ostringstream oss; + oss << std::string("BSHas") + root_ + common_info_camel_ + "Impl"; + return oss.str(); +} + +std::string CommonInfoMiddleNodeDetail::get_field_def_params() const { + return std::to_string(common_info_value_); +} + +bool CommonInfoMiddleNodeDetail::is_item_field(const std::string& bs_enum_str) const { + return true; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoMiddleNode.h b/convert/info/CommonInfoMiddleNode.h new file mode 100644 index 0000000..f2ee475 --- /dev/null +++ b/convert/info/CommonInfoMiddleNode.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfo.h" +#include "CommonInfoLeaf.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// 来自中间节点的 common info。 +/// 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_item_goods_id_list_size.h, 所需字段来自中间 +/// 节点的 common info, 如 live info common info。 +/// +/// auto live_info = GetLiveInfo(adlog.item(pos)); +/// if (live_info == nullptr) { +/// return; +/// } +/// if (live_info->common_info_attr_size() > 0) { +/// const auto& attr = live_info->common_info_attr(); +/// for (const auto& liveAttr : attr) { +/// if (liveAttr.name_value() == 23071) { +/// int goods_num = liveAttr.int_list_value_size(); +/// AddFeature(0, goods_num, result); +/// break; +/// } +/// } +/// } +/// 在遇到 `common_info_attr` 时候可以确定是否来自中间节点, 如果是则创建 `CommonInfoMiddleNode`, 之后需要 +/// 重新实现 `CommonInfo` 对应的逻辑。 +class CommonInfoMiddleNodeDetail: public CommonInfoLeaf { + public: + explicit CommonInfoMiddleNodeDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& root); + explicit CommonInfoMiddleNodeDetail(const std::string& prefix_adlog, + int common_info_value, + const std::string& method_name, + const std::string& root); + + void update_method_name(const std::string& method_name) override { + if (!is_ready_) { + CommonInfoCore::update_method_name(method_name); + is_ready_ = true; + } + } + + void update_size_method_name(const std::string& size_method_name) override { + if (!is_ready_) { + CommonInfoCore::update_size_method_name(size_method_name); + LOG(INFO) << "update_size_method: " << size_method_name + << ", method_name: " << method_name_ + << ", address: " << this; + is_ready_ = true; + } + } + + std::string get_functor_name() const override; + + std::string get_bs_scalar_exists_field_def(Env* env_ptr) const override; + + std::string get_bs_enum_str() const override; + std::string get_functor_tmpl() const override; + std::string get_exists_functor_tmpl() const override; + std::string get_field_def_params() const override; + bool is_item_field(const std::string& bs_enum_str) const override; + + protected: + std::string root_; + std::string bs_rewritten_text_; + const std::string common_info_camel_ = "CommonInfo"; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoMultiIntList.cpp b/convert/info/CommonInfoMultiIntList.cpp new file mode 100644 index 0000000..c76e39d --- /dev/null +++ b/convert/info/CommonInfoMultiIntList.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "../Type.h" +#include "CommonInfoMultiIntList.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoMultiIntList::add_attr_map_name(const std::string &attr_map_name) { + if (std::find(attr_map_names_.begin(), attr_map_names_.end(), attr_map_name) != attr_map_names_.end()) { + return; + } + + attr_map_names_.push_back(attr_map_name); +} + +void CommonInfoMultiIntList::add_attr_size_map_name(const std::string &attr_size_map_name) { + if (std::find(attr_size_map_names_.begin(), + attr_size_map_names_.end(), + attr_size_map_name) != attr_size_map_names_.end()) { + return; + } + + attr_size_map_names_.push_back(attr_size_map_name); + if (attr_map_names_.size() > 0) { + size_list_connections_[attr_size_map_name] = attr_map_names_.back(); + } else { + LOG(INFO) << "must have attr_map_name before, attr_size_map_name: " << attr_size_map_name; + } +} + +const std::string& CommonInfoMultiIntList::find_correspond_list_map(const std::string& size_map_name) const { + auto it = size_list_connections_.find(size_map_name); + if (it == size_list_connections_.end()) { + static std::string empty; + LOG(INFO) << "cannot find correspond_list_map, size_map_name: " << size_map_name; + return (empty); + } + + return it->second; +} + +const std::unordered_map& CommonInfoMultiIntList::map_vec_connections() const { + return (map_vec_connections_); +} + +void CommonInfoMultiIntList::add_map_vec_connection(const std::string& map_name, + const std::string& vec_name) { + map_vec_connections_[map_name] = vec_name; +} + +std::string CommonInfoMultiIntList::get_bs_list_def(int v) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(v); + std::string new_name = env_ptr_->find_valid_new_name(bs_enum_str); + std::string type_str = "int64_t"; + + oss << "BSRepeatedField<" << type_str; + if (env_ptr_->is_combine_feature() && !tool::is_item_field(bs_enum_str)) { + oss << ", true"; + } + oss << "> " << new_name << " = std::move(" << get_functor_name(v) << "(bs, pos))"; + + return oss.str(); +} + +std::string CommonInfoMultiIntList::get_bs_enum_str(int v) const { + return prefix_ + "_key_" + std::to_string(v); +} + +std::string CommonInfoMultiIntList::get_functor_name(int v) const { + std::ostringstream oss; + + std::string bs_enum_str = get_bs_enum_str(v); + std::vector arr = absl::StrSplit(bs_enum_str, "_"); + + oss << "BSGet"; + + bool start = false; + for (const std::string &s : arr) { + if (s.find("common") != std::string::npos) { + start = true; + } + + if (start) { + oss << char(toupper(s[0])) << s.substr(1); + } + } + + return oss.str(); +} + +std::string CommonInfoMultiIntList::get_bs_list_field_def(int v) const { + std::ostringstream oss; + + std::string type_str = "int64_t"; + oss << "BSFixedCommonInfo"; + if (env_ptr_->is_combine_feature() && + !tool::is_item_field(get_bs_enum_str(v))) { + oss << ", true"; + } + oss << "> " << get_functor_name(v) << "{" << tool::add_quote(prefix_adlog_) + << ", " << v << "}"; + + return oss.str(); +} + +const std::string& CommonInfoMultiIntList::find_correspond_vec_name(const std::string& map_name) const { + auto it = map_vec_connections_.find(map_name); + if (it == map_vec_connections_.end()) { + static std::string empty; + return (empty); + } + + return it->second; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoMultiIntList.h b/convert/info/CommonInfoMultiIntList.h new file mode 100644 index 0000000..966c03e --- /dev/null +++ b/convert/info/CommonInfoMultiIntList.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../Type.h" +#include "CommonInfo.h" +#include "CommonInfoCore.h" +#include "CommonInfoPrepare.h" +#include "PrefixPair.h" +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// 所有 common info 类型都一样 +class CommonInfoMultiIntList : public CommonAttrInfo, public CommonInfoCore, public PrefixPair { + public: + explicit CommonInfoMultiIntList(const std::string& prefix_adlog) : + CommonAttrInfo(CommonInfoType::MULTI_MAP), + PrefixPair(prefix_adlog) {} + + void add_attr_map_name(const std::string& attr_map_name); + const std::vector& attr_map_names() const { return attr_map_names_; } + + void add_attr_size_map_name(const std::string& attr_size_map_name); + const std::vector& attr_size_map_names() const { return attr_size_map_names_; } + + const std::string& find_correspond_list_map(const std::string& size_map_name) const; + + const std::unordered_map& map_vec_connections() const; + void add_map_vec_connection(const std::string& map_name, const std::string& vec_name); + + std::string get_bs_list_def(int v) const; + std::string get_bs_enum_str(int v) const; + std::string get_functor_name(int v) const; + std::string get_bs_list_field_def(int v) const; + + const std::string& find_correspond_vec_name(const std::string& map_name) const; + + private: + std::vector attr_map_names_; + std::vector attr_size_map_names_; + std::unordered_map map_vec_connections_; + std::unordered_map size_list_connections_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoMultiMap.h b/convert/info/CommonInfoMultiMap.h new file mode 100644 index 0000000..51a97de --- /dev/null +++ b/convert/info/CommonInfoMultiMap.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + #include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfo.h" +#include "CommonInfoCore.h" +#include "CommonInfoPrepare.h" +#include "PrefixPair.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +/// 所有 common info 类型都一样 +class CommonInfoMultiMap : public CommonAttrInfo, public CommonInfoCore, public PrefixPair { + public: + explicit CommonInfoMultiMap(const std::string& prefix_adlog): + CommonAttrInfo(CommonInfoType::MULTI_MAP), + PrefixPair(prefix_adlog) {} + + explicit CommonInfoMultiMap(const std::string& prefix_adlog, + const std::string& map_name, + const std::string& attr_name): + CommonAttrInfo(CommonInfoType::MULTI_MAP), + PrefixPair(prefix_adlog), + map_name_(map_name), + attr_name_(attr_name) {} + + const std::string& map_name() const { return map_name_; } + const std::string& attr_name() const { return attr_name_; } + + private: + std::string map_name_; + std::string attr_name_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoNormal.cpp b/convert/info/CommonInfoNormal.cpp new file mode 100644 index 0000000..c4ccb84 --- /dev/null +++ b/convert/info/CommonInfoNormal.cpp @@ -0,0 +1,286 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Env.h" +#include "Type.h" +#include "../handler/StrictRewriter.h" +#include "CommonInfoDetail.h" +#include "CommonInfoNormal.h" +#include "CommonInfoMiddleNode.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoNormal::add_common_info_value(int v) { + if (is_already_exists(v)) { + LOG(INFO) << "already exists, skip, common_info_value: " << v; + return; + } + + if (middle_node_root_) { + // middle node + if (uni_method_name_) { + auto* detail_ptr = new CommonInfoMiddleNodeDetail(prefix_adlog_, v, *uni_method_name_, *middle_node_root_); + common_info_details_.emplace_back(detail_ptr); + } else { + auto* detail_ptr = new CommonInfoMiddleNodeDetail(prefix_adlog_, v, *middle_node_root_); + LOG(INFO) << "add CommonInfoMiddleNodeDetail, prefix: " << prefix_ + << ", v: " << v + << ", middle_node_root: " << *middle_node_root_; + common_info_details_.emplace_back(detail_ptr); + } + } else { + // normal + if (uni_method_name_) { + common_info_details_.emplace_back(new CommonInfoDetail(prefix_adlog_, v, *uni_method_name_)); + } else { + common_info_details_.emplace_back(new CommonInfoDetail(prefix_adlog_, v)); + } + } + + if (name_value_alias_) { + common_info_details_.back()->set_name_value_alias(*name_value_alias_); + } + if (list_loop_var_) { + common_info_details_.back()->set_list_loop_var(*list_loop_var_); + } + common_info_details_.back()->set_common_info_type(common_info_type_); +} + +std::shared_ptr CommonInfoNormal::mutable_common_info_detail_by_value(int value) { + for (size_t i = 0; i < common_info_details_.size(); i++) { + if (common_info_details_[i] != nullptr && common_info_details_[i]->common_info_value() == value) { + return common_info_details_[i]; + } + } + + return nullptr; +} + +const std::shared_ptr &CommonInfoNormal::last_common_info_detail() const { + if (common_info_details_.size() > 0) { + return (common_info_details_.back()); + } + + static std::shared_ptr empty{}; + return (empty); +} + +std::shared_ptr& CommonInfoNormal::last_mutable_common_info_detail() { + if (common_info_details_.size() > 0) { + return (common_info_details_.back()); + } + + static std::shared_ptr empty{}; + return (empty); +} + +// 目前只能处理 list 类型的 common info, map 类型的还处理不了,不过目前只遇到过 list 的。 +std::string CommonInfoNormal::get_bs_rewritten(StrictRewriter* rewriter_ptr, size_t index) const { + if (index >= common_info_details_.size()) { + LOG(INFO) << "out of range, index: " << index + << ", common_info_details_.size(): " << common_info_details_.size(); + return ""; + } + + if (common_info_details_[index] == nullptr) { + LOG(INFO) << "common info detail is nullptr, index: " << index + << ", prefix: " << prefix_; + return ""; + } + + const CommonInfoLeaf& common_info_detail = *(common_info_details_[index]); + + std::ostringstream oss_body; + + if (const absl::optional& common_info_prepare = get_common_info_prepare()) { + const auto &other_decl_stmts = common_info_prepare->other_decl_stmt_strs(); + for (size_t i = 0; i < other_decl_stmts.size(); i++) { + oss_body << fix_semicolon(other_decl_stmts[i]) << "\n"; + } + + const auto& other_if_stmts = common_info_prepare->other_if_stmt_strs(); + for (size_t i = 0; i < other_if_stmts.size(); i++) { + oss_body << other_if_stmts[i] << "\n"; + } + } + + oss_body << "\n" << common_info_detail.get_bs_pre_text() << "\n"; + std::string pre_text = oss_body.str(); + + std::string loop_text; + if (common_info_detail.bs_loop_text()) { + loop_text = *(common_info_detail.bs_loop_text()); + } + + if (common_info_detail.is_list()) { + if (common_info_detail.list_loop_var()) { + std::string var_name = common_info_detail.get_bs_var_name(env_ptr_); + // std::regex p_list_loop_var(std::string("([^a-zA-Z0-9_])") + + // *(common_info_detail.list_loop_var()) + + // std::string("([^a-zA-Z0-9_])")); + // loop_text = std::regex_replace(loop_text, p_list_loop_var, std::string("$1") + var_name + std::string("$2")); + std::string loop_var_assign = std::string("auto ") + + *(common_info_detail.list_loop_var()) + + " = " + var_name; + loop_text = loop_var_assign + ";\n" + loop_text; + } + } + + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + const absl::optional& var = env_ptr_->find_new_def(bs_enum_str); + if (!var) { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + + std::ostringstream oss; + oss << "if (" << common_info_detail.get_exists_expr(env_ptr_) << ") {\n "; + + if (common_info_detail.is_list() && var) { + if (const auto& compare_list_size_value = common_info_detail.compare_list_size_value()) { + oss << "if (" << var->name() << ".size()"; + if (const auto& list_size_dividend = common_info_detail.list_size_dividend()) { + oss << " % " << *list_size_dividend; + } + oss << " == " << *compare_list_size_value << ") {\n "; + } + } + + if (common_info_detail.is_scalar()) { + oss << pre_text; + } else if (common_info_detail.is_for_stmt()) { + oss << pre_text << "\n" << loop_text << "\n"; + } else if (common_info_detail.has_list_method_address()) { + oss << pre_text; + } else if (common_info_detail.is_list() || common_info_detail.is_map()) { + if (loop_text.size() == 0) { + oss << pre_text; + } else { + if (var) { + oss << pre_text + << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << loop_text + << "\n}\n"; + } + } + } + + auto& post_text = common_info_detail.bs_post_text(); + if (post_text) { + oss << *post_text; + } + + oss << "}\n\n"; + + if (common_info_detail.is_list() && var) { + if (common_info_detail.compare_list_size_value()) { + oss << "}\n\n"; + } + } + + return oss.str(); +} + +std::string CommonInfoNormal::get_bs_wrap_text(const std::string& text) const { + if (common_info_details_.size() == 0) { + LOG(INFO) << "out of range, index: " << 0 + << ", common_info_details_.size(): " << common_info_details_.size(); + return ""; + } + + if (common_info_details_[0] == nullptr) { + LOG(INFO) << "common info detail is nullptr, prefix: " << prefix_; + return ""; + } + + const CommonInfoLeaf& common_info_detail = *(common_info_details_[0]); + + std::string s = text; + + if (common_info_detail.is_list()) { + if (const auto& list_loop_var = common_info_detail.list_loop_var()) { + std::string var_name = common_info_detail.get_bs_var_name(env_ptr_); + // std::regex p_list_loop_var(std::string("([^a-zA-Z0-9_])") + + // *list_loop_var_ + + // std::string("([^a-zA-Z0-9_])")); + // s = std::regex_replace(s, p_list_loop_var, std::string("$1") + var_name + std::string("$2")); + std::string loop_var_assign = std::string("auto ") + + *list_loop_var + + " = " + var_name; + s = loop_var_assign + ";\n" + s; + } + } + + std::string bs_enum_str = common_info_detail.get_bs_enum_str(); + const absl::optional& var = env_ptr_->find_new_def(bs_enum_str); + if (!var) { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + + std::ostringstream oss; + oss << "if (" << common_info_detail.get_exists_expr(env_ptr_) << ") {\n "; + + if (common_info_detail.is_list() && var) { + if (const auto& compare_list_size_value = common_info_detail.compare_list_size_value()) { + oss << "if (" << var->name() << ".size()"; + if (const auto& list_size_dividend = common_info_detail.list_size_dividend()) { + oss << " % " << *list_size_dividend; + } + oss << " == " << *compare_list_size_value << ") {\n "; + } + } + + if (common_info_detail.is_scalar() || + common_info_detail.is_list_size() || + common_info_detail.is_map_size()) { + oss << s; + } else if (common_info_detail.is_for_stmt()) { + oss << s; + } else if (common_info_detail.has_list_method_address()) { + oss << s; + } else if (common_info_detail.is_list() || common_info_detail.is_map()) { + if (common_info_detail.size_method_name() && !common_info_detail.is_size_method_in_loop_init()) { + oss << s; + } else { + if (var) { + oss << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << s + << "\n}\n"; + } else { + LOG(INFO) << "cannot find list or map var_name in env, bs_enum_str: " << bs_enum_str; + } + } + } + + oss << "}\n\n"; + + if (common_info_detail.is_list() && var) { + if (common_info_detail.compare_list_size_value()) { + oss << "}\n\n"; + } + } + + return oss.str(); +} + +bool CommonInfoNormal::is_already_exists(int v) { + for (size_t i = 0; i < common_info_details_.size(); i++) { + if (common_info_details_[i]->common_info_value() == v) { + return true; + } + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoNormal.h b/convert/info/CommonInfoNormal.h new file mode 100644 index 0000000..aa1636c --- /dev/null +++ b/convert/info/CommonInfoNormal.h @@ -0,0 +1,100 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "CommonInfo.h" +#include "CommonInfoLeaf.h" +#include "CommonInfoPrepare.h" +#include "PrefixPair.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class StrictRewriter; + +class CommonInfoNormal : public CommonAttrInfo, public PrefixPair { + public: + explicit CommonInfoNormal(const std::string& prefix_adlog): + CommonAttrInfo(CommonInfoType::NORMAL), + PrefixPair(prefix_adlog) { + } + + explicit CommonInfoNormal(const std::string &prefix_adlog, + const std::string &middle_node_root) : + CommonAttrInfo(CommonInfoType::MIDDLE_NODE), + PrefixPair(prefix_adlog), + middle_node_root_(middle_node_root) {} + + /// 注意,添加标准是遇到 attr.name_value() 判断时候添加,因此必须去重。 + /// 由于在 BinaryOperator 中,因此 attr.name_value() 可能会被访问多次。 + void add_common_info_value(int v); + + void set_list_loop_var(const std::string& list_loop_var) { list_loop_var_ = list_loop_var; } + const absl::optional& list_loop_var() const { return list_loop_var_; } + + const std::vector>& common_info_details() const { + return (common_info_details_); + } + std::vector>& mutable_common_info_details() { + return (common_info_details_); + } + + const std::shared_ptr& get_common_info_detail(size_t index) const { + return (common_info_details_[index]); + } + std::shared_ptr& mutable_common_info_detail(size_t index) { + return common_info_details_[index]; + } + + std::shared_ptr mutable_common_info_detail_by_value(int value); + + size_t common_info_details_size() const { return common_info_details_.size(); } + + const std::shared_ptr& last_common_info_detail() const; + std::shared_ptr& last_mutable_common_info_detail(); + + void set_uni_method_name(const std::string& uni_method_name) { uni_method_name_ = uni_method_name; } + const absl::optional& uni_method_name() const { return uni_method_name_; } + + void add_other_if_stmt(clang::IfStmt* other_if_stmt) { other_if_stmts_.push_back(other_if_stmt); } + + /// 列表遍历普通 for 循环的分 cxx_for_range_stmt 和 for_stmt 两种情况。 + /// cxx_for_range_stmt 可以直接替换, for_stmt 可能还有其他条件,因此必须基于原来的 for 循环替换。 + std::string get_bs_rewritten(StrictRewriter* rewriter_ptr, size_t index) const; + std::string get_bs_wrap_text(const std::string& text) const; + + bool is_check_equal() const { return is_check_equal_; } + void set_is_check_equal(bool v) { is_check_equal_ = v; } + + const absl::optional& name_value_alias() const { return name_value_alias_; } + void set_name_value_alias(const std::string& name_value_alias) { + name_value_alias_.emplace(name_value_alias); + } + + bool is_already_exists(int v); + + protected: + absl::optional list_loop_var_; + absl::optional uni_method_name_; + std::vector other_if_stmts_; + std::vector> common_info_details_; + bool is_check_equal_ = true; + absl::optional name_value_alias_; + absl::optional middle_node_root_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoPrepare.cpp b/convert/info/CommonInfoPrepare.cpp new file mode 100644 index 0000000..97f9b88 --- /dev/null +++ b/convert/info/CommonInfoPrepare.cpp @@ -0,0 +1,30 @@ +#include "../Tool.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +CommonInfoPrepare::CommonInfoPrepare(const std::string& prefix_adlog): + prefix_adlog_(prefix_adlog) { + prefix_.emplace(tool::adlog_to_bs_enum_str(prefix_adlog)); +} + +void CommonInfoPrepare::update_size_method_name(const std::string &size_method_name) { + size_method_name_.emplace(size_method_name); + if (!method_name_) { + method_name_.emplace(tool::trim_tail_size(size_method_name)); + } +} + +bool CommonInfoPrepare::is_common_info_normal() const { + return common_info_values_.size() > 0; +} + +bool CommonInfoPrepare::is_common_info_fixed_list() const { + return template_int_names_.size() > 0 && common_info_values_.size() == 0; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/CommonInfoPrepare.h b/convert/info/CommonInfoPrepare.h new file mode 100644 index 0000000..04c2006 --- /dev/null +++ b/convert/info/CommonInfoPrepare.h @@ -0,0 +1,117 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "clang/AST/Stmt.h" +#include "clang/AST/Expr.h" + +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// CommonInfo 在确定类型之前需要保存的信息。 +/// 几种 CommonInfo 可能都会用到。 +class CommonInfoPrepare { + public: + CommonInfoPrepare() = default; + explicit CommonInfoPrepare(const std::string& prefix_adlog); + + bool is_confirmed() const { return is_confirmed_; } + void set_is_confirmed() { is_confirmed_ = true; } + + const absl::optional& prefix() const { return (prefix_); } + void set_prefix(const std::string& prefix) { prefix_.emplace(prefix); } + + const absl::optional& prefix_adlog() const { return (prefix_adlog_); } + void set_prefix_adlog(const std::string& prefix_adlog) { prefix_adlog_.emplace(prefix_adlog); } + + const absl::optional& name_value_alias() const { return (name_value_alias_); } + void set_name_value_alias(const std::string& name_value_alias) { + name_value_alias_.emplace(name_value_alias); + } + + const absl::optional& method_name() const { return (method_name_); } + void set_method_name(const std::string& method_name) { method_name_.emplace(method_name); } + + const absl::optional &size_method_name() const { + return (size_method_name_); + } + + void set_size_method_name(const std::string &size_method_name) { + size_method_name_.emplace(size_method_name); + } + + void update_size_method_name(const std::string &size_method_name); + + const absl::optional& int_value() const { return (int_value_); } + void set_int_value(int v) { int_value_.emplace(v); } + + void add_other_if_stmt(clang::IfStmt* other_if_stmt) { other_if_stmts_.push_back(other_if_stmt); } + const std::vector& other_if_stmts() const { return other_if_stmts_; } + + void add_other_if_stmt_str(const std::string& other_if_stmt_str) { + other_if_stmt_strs_.push_back(other_if_stmt_str); + } + const std::vector& other_if_stmt_strs() const { return (other_if_stmt_strs_); } + + void add_other_decl_stmt_str(const std::string &other_decl_stmt_str) { + other_decl_stmt_strs_.push_back(other_decl_stmt_str); + } + const std::vector &other_decl_stmt_strs() const { + return (other_decl_stmt_strs_); + } + + bool is_for_stmt() const { return is_for_stmt_; } + void set_is_for_stmt(bool v) { is_for_stmt_ = v; } + + + void add_template_int_name(const std::string& template_int_name) { + template_int_names_.push_back(template_int_name); + } + void add_common_info_value(int v) { common_info_values_.push_back(v); } + + const std::vector& template_int_names() const { return (template_int_names_); } + const std::vector& common_info_values() const { return (common_info_values_); } + + void set_template_int_names(const std::vector& int_names) { template_int_names_ = int_names; } + void set_common_info_values(const std::vector& values) { common_info_values_ = values; } + + bool is_common_info_normal() const; + bool is_common_info_fixed_list() const; + + const absl::optional &attr_name() const { return (attr_name_); } + void set_attr_name(const std::string &attr_name) {attr_name_.emplace(attr_name);} + + private: + bool is_confirmed_ = false; + absl::optional prefix_; + absl::optional prefix_adlog_; + absl::optional name_value_alias_; + absl::optional method_name_; + absl::optional size_method_name_; + absl::optional int_value_; + std::vector other_if_stmts_; + std::vector other_if_stmt_strs_; + bool is_for_stmt_ = false; + + std::vector other_decl_stmt_strs_; + + /// 用于在 Overrivew 中记录,区分是 CommonInfoNormal 还是 CommonInfoFixedList。 + /// 如果出现模板参数,则肯定是 CommonInfoFixedList。 + std::vector template_int_names_; + std::vector common_info_values_; + + /// 变量名 + absl::optional attr_name_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ConstructorInfo.cpp b/convert/info/ConstructorInfo.cpp new file mode 100644 index 0000000..50599da --- /dev/null +++ b/convert/info/ConstructorInfo.cpp @@ -0,0 +1,40 @@ +#include +#include "AdlogFieldInfo.h" +#include "ConstructorInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void ConstructorInfo::fix_common_info_enums() { + for (size_t i = 0; i < common_info_enums_.size(); i++) { + std::string bs_enum_str = common_info_enums_[i].bs_enum_str(); + if (bs_enum_str.size() > 0 && bs_field_enums_.find(bs_enum_str) != bs_field_enums_.end()) { + bs_field_enums_.erase(bs_enum_str); + } + } +} + +std::string ConstructorInfo::joined_params() const { + return absl::StrJoin(params_, ","); +} + +void ConstructorInfo::set_normal_adlog_field_info(const std::string& bs_enum_str, + const std::string& adlog_field) { + AdlogFieldInfo adlog_field_info(adlog_field, bs_enum_str, AdlogFieldType::NORMAL); + adlog_field_infos_.insert({bs_enum_str, adlog_field_info}); +} + +void ConstructorInfo::set_common_info_field_info(const std::string& bs_enum_str, + const std::string& adlog_field, + const std::string& common_info_enum_name, + int common_info_value) { + AdlogFieldInfo adlog_field_info(adlog_field, bs_enum_str, AdlogFieldType::COMMON_INFO); + adlog_field_info.set_common_info_enum_name(common_info_enum_name); + adlog_field_info.set_common_info_enum_value(common_info_value); + adlog_field_infos_.insert({bs_enum_str, adlog_field_info}); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ConstructorInfo.h b/convert/info/ConstructorInfo.h new file mode 100644 index 0000000..d9d8ca3 --- /dev/null +++ b/convert/info/ConstructorInfo.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include "../Type.h" +#include "VarDeclInfo.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtCXX.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "AdlogFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 构造函数中用到的 common info enum, 如果是在 if 中出现的需要保存 if 结束的位置, 用于插入 attr_meta。 +/// 必须在解析 Extract 函数时遇到具体的 common info 变量,才知道完整的 bs_field_enum, 因此替换是在 +/// AdlogFieldHandler 中完成的。 +class CommonInfoEnumLoc { + public: + explicit CommonInfoEnumLoc(clang::DeclRefExpr* enum_ref): enum_ref_(enum_ref) {} + explicit CommonInfoEnumLoc(clang::DeclRefExpr* enum_ref, clang::SourceLocation loc): + enum_ref_(enum_ref), + if_stmt_end_(loc) {} + + clang::DeclRefExpr* enum_ref() const { return enum_ref_; } + const absl::optional& if_stmt_end() const { return if_stmt_end_; } + + void set_bs_enum_str(const std::string& bs_enum_str) { bs_enum_str_ = bs_enum_str; } + const std::string& bs_enum_str() const { return bs_enum_str_; } + + private: + std::string bs_enum_str_; + clang::DeclRefExpr* enum_ref_; + absl::optional if_stmt_end_; +}; + +/// 构造函数里的信息 +class ConstructorInfo { + public: + ConstructorInfo() = default; + explicit ConstructorInfo(const std::string& feature_name): feature_name_(feature_name) {} + + const std::string& feature_name() const { return feature_name_; } + const std::string& feature_type() const { return feature_type_; } + + void set_body(clang::Stmt* body) { body_ = body; } + clang::Stmt* body() { return body_; } + + void set_feature_type(const std::string& feature_type) { feature_type_ = feature_type; } + void add_common_info_enum(clang::DeclRefExpr* expr) { common_info_enums_.emplace_back(expr); } + void add_common_info_enum(clang::DeclRefExpr* expr, clang::SourceLocation loc) { + common_info_enums_.emplace_back(expr, loc); + } + + const std::vector& common_info_enums() const { return common_info_enums_; } + std::vector& mutable_common_info_enums() { return common_info_enums_; } + + const clang::SourceLocation body_end() const { return body_end_; } + void set_body_end(clang::SourceLocation loc) { body_end_ = loc; } + + void add_bs_field_enum(const std::string& bs_field_enum) { bs_field_enums_.insert(bs_field_enum); } + void add_middle_node_leaf(const std::string& name) { middle_node_leafs_.insert(name); } + + const std::unordered_set& bs_field_enums() const { return bs_field_enums_; } + const std::unordered_set& middle_node_leafs() const { return middle_node_leafs_; } + + void set_init_list(const std::string& init_list) { init_list_ = init_list; } + const std::string& init_list() const { return init_list_; } + + void set_body_content(const std::string& body_content) { body_content_ = body_content; } + const std::string& body_content() const { return body_content_; } + + void set_first_init_stmt(clang::CXXCtorInitializer * stmt) { first_init_stmt_ = stmt; } + clang::CXXCtorInitializer * first_init_stmt() { return first_init_stmt_; } + + /// 去掉 bs_field_enums_ 中出现的 common_info_enums_ + void fix_common_info_enums(); + + bool has_get_norm_query() const { return has_get_norm_query_; } + void set_has_get_norm_query(bool v) { has_get_norm_query_ = v; } + + const VarDeclInfo& var_decl_info() const { return var_decl_info_; } + VarDeclInfo& mutable_var_decl_info() { return var_decl_info_; } + + void add_param(const std::string& param) { params_.push_back(param); } + const std::vector& params() const { return (params_); } + std::string joined_params() const; + + clang::SourceRange source_range() const { return source_range_; } + void set_source_range(clang::SourceRange source_range) { source_range_ = source_range; } + + const std::unordered_map& adlog_field_infos() const { + return (adlog_field_infos_); + } + void set_normal_adlog_field_info(const std::string& bs_enum_str, + const std::string& adlog_field); + void set_common_info_field_info(const std::string& bs_enum_str, + const std::string& adlog_field, + const std::string& common_info_enum_name, + int common_info_value); + + private: + clang::Stmt* body_; + std::string feature_name_; + std::string feature_type_; + clang::SourceLocation body_end_; + std::vector common_info_enums_; + std::unordered_set bs_field_enums_; + std::unordered_set middle_node_leafs_; + std::string init_list_; + std::string body_content_; + clang::CXXCtorInitializer * first_init_stmt_; + bool has_get_norm_query_ = false; + VarDeclInfo var_decl_info_; + std::vector params_; + clang::SourceRange source_range_; + std::unordered_map adlog_field_infos_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/DeclInfo.h b/convert/info/DeclInfo.h new file mode 100644 index 0000000..496c4af --- /dev/null +++ b/convert/info/DeclInfo.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 保存当前 decl_stmt 相关的信息。 +class DeclInfo { + public: + DeclInfo() = default; + explicit DeclInfo(const std::string& name): name_(name), init_expr_(nullptr) {} + explicit DeclInfo(const std::string& name, clang::Expr* init_expr): name_(name), init_expr_(init_expr) {} + explicit DeclInfo(const std::string& name, clang::Expr* init_expr, clang::DeclStmt* decl_stmt): + name_(name), init_expr_(init_expr), decl_stmt_(decl_stmt) {} + + const std::string& name() const { return name_; } + + clang::Expr* init_expr() const { return init_expr_; } + void set_init_expr(clang::Expr* init_expr) { init_expr_ = init_expr; } + + clang::DeclStmt* decl_stmt() const { return decl_stmt_; } + + private: + std::string name_; + clang::Expr* init_expr_ = nullptr; + clang::DeclStmt* decl_stmt_ = nullptr; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/FeatureInfo.cpp b/convert/info/FeatureInfo.cpp new file mode 100644 index 0000000..71a00f1 --- /dev/null +++ b/convert/info/FeatureInfo.cpp @@ -0,0 +1,443 @@ +#include +#include + +#include "MiddleNodeInfo.h" +#include "NewVarDef.h" +#include "TemplateParamInfo.h" +#include "clang/AST/Decl.h" + +#include +#include "../Tool.h" +#include "FeatureInfo.h" +#include "MethodInfo.h" +#include "AdlogFieldInfo.h" +#include "MiddleNodeInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +absl::optional FeatureInfo::last_field_decl_end_log() const { + if (field_decls_.size() == 0) { + return absl::nullopt; + } + + return absl::make_optional(field_decls_.back()->getEndLoc()); +} + +void FeatureInfo::add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + NewVarType new_var_type, + AdlogVarType adlog_var_type) { + NewVarDef new_var_def(bs_enum_str, name, new_def, new_var_type, adlog_var_type); + new_field_defs_.emplace(bs_enum_str, new_var_def); +} + +void FeatureInfo::add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + NewVarType new_var_type, + ExprType expr_type, + AdlogVarType adlog_var_type) { + NewVarDef new_var_def(bs_enum_str, name, new_def, new_var_type, adlog_var_type); + new_var_def.set_expr_type(expr_type); + new_field_defs_.emplace(bs_enum_str, new_var_def); +} + +void FeatureInfo::add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + const std::string& exists_name, + const std::string& new_exists_def, + NewVarType new_var_type, + AdlogVarType adlog_var_type) { + NewVarDef new_var_def(bs_enum_str, name, new_def, new_var_type, adlog_var_type); + new_var_def.set_exists_var_def(exists_name, new_exists_def); + new_field_defs_.emplace(bs_enum_str, new_var_def); +} + +void FeatureInfo::set_middle_node_info(const std::string& bs_enum_str, + const std::string& middle_node_root, + const std::string& middle_node_field) { + auto it = new_field_defs_.find(bs_enum_str); + if (it == new_field_defs_.end()) { + LOG(INFO) << "cannot find new field in new_field_defs, bs_enum_str: " << bs_enum_str + << ", middle_node_root: " << middle_node_root + << ", middle_node_field: " << middle_node_field; + return; + } + + it->second.set_middle_node_root(middle_node_root); + it->second.set_middle_node_field(middle_node_field); +} + +void FeatureInfo::set_common_info_prefix_name_value(const std::string& bs_enum_str, + const std::string& prefix_adlog, + const std::string& name_value_alias) { + auto it = new_field_defs_.find(bs_enum_str); + if (it == new_field_defs_.end()) { + LOG(INFO) << "cannot find new field in new_field_defs, bs_enum_str: " << bs_enum_str + << ", prefix_adlog: " << prefix_adlog + << ", name_value_alias: " << name_value_alias; + return; + } + + it->second.set_common_info_prefix_adlog(prefix_adlog); + it->second.set_common_info_name_value_alias(name_value_alias); +} + +void FeatureInfo::set_action_var_name(const std::string& bs_enum_str, + const std::string& action_var_name) { + auto it = new_field_defs_.find(bs_enum_str); + if (it == new_field_defs_.end()) { + LOG(INFO) << "cannot find new field in new_field_defs, bs_enum_str: " << bs_enum_str + << ", action_var_name: " << action_var_name; + return; + } + + it->second.set_action_var_name(action_var_name); +} + +clang::BinaryOperator* FeatureInfo::find_update_action_stmt() const { + if (!action_) { + return nullptr; + } + + for (size_t i = 0; i < binary_op_stmts_.size(); i++) { + std::vector arr = absl::StrSplit(stmt_to_string(binary_op_stmts_[i]), "="); + if (arr.size() == 2) { + std::string s = std::regex_replace(arr[0], std::regex(" "), ""); + if (s == *action_) { + return binary_op_stmts_[i]; + } + } + } + + return nullptr; +} + +bool FeatureInfo::is_member(clang::Expr* expr) const { + std::string expr_str = tool::trim_this(stmt_to_string(expr)); + for (size_t i = 0; i < field_decls_.size(); i++) { + if (expr_str == field_decls_[i]->getNameAsString()) { + return true; + } + } + + return false; +} + +bool FeatureInfo::is_member(const std::string& name) const { + for (size_t i = 0; i < field_decls_.size(); i++) { + if (name == field_decls_[i]->getNameAsString()) { + return true; + } + } + + return false; +} + +bool FeatureInfo::is_int_list_member(const std::string &name) const { + for (size_t i = 0; i < field_decls_.size(); i++) { + if (field_decls_[i]->getNameAsString() == name) { + if (tool::is_int_vector(field_decls_[i]->getType())) { + return true; + } + } + } + + return false; +} + +bool FeatureInfo::is_int_list_member(clang::Expr* expr) const { + return is_member(expr) && tool::is_int_vector(expr->getType()); +} + +bool FeatureInfo::is_int_list_member(const std::string& name, clang::QualType qual_type) const { + return is_member(name) && tool::is_int_vector(qual_type); +} + +bool FeatureInfo::is_common_info_enum_member(const std::string& name, clang::QualType qual_type) const { + return is_member(name) && tool::is_common_info_enum(qual_type); +} + +void FeatureInfo::add_int_list_member_single_value(const std::string& name, int value) { + int_list_member_values_[name].push_back(value); +} + +void FeatureInfo::add_int_list_member_values(const std::string& name, const std::vector& values) { + int_list_member_values_[name] = values; +} + +const std::vector& FeatureInfo::get_int_list_member_values(const std::string& name) const { + auto it = int_list_member_values_.find(name); + if (it != int_list_member_values_.end()) { + return it->second; + } + + static std::vector empty; + return (empty); +} + +void FeatureInfo::add_other_method(const std::string& name, + clang::QualType return_type, + const std::string& bs_return_type, + const std::string& decl, + const std::string& body) { + MethodInfo& method_info = touch_method_info(name); + method_info.update(return_type, bs_return_type, decl, body); +} + +MethodInfo& FeatureInfo::touch_method_info(const std::string& method_name) { + auto it = other_methods_.find(method_name); + if (it != other_methods_.end()) { + return it->second; + } else { + return other_methods_.emplace(method_name, MethodInfo(method_name)).first->second; + } +} + +const MethodInfo* FeatureInfo::find_method_info(const std::string& method_name) const { + auto it = other_methods_.find(method_name); + if (it == other_methods_.end()) { + return nullptr; + } + + return &(it->second); +} + +MethodInfo* FeatureInfo::mutable_method_info(const std::string& method_name) { + const MethodInfo* method_info_ptr = find_method_info(method_name); + return const_cast(method_info_ptr); +} + +bool FeatureInfo::is_feature_other_method(const std::string& method_name) const { + return other_methods_.find(method_name) != other_methods_.end(); +} + +bool FeatureInfo::is_combine() const { + return tool::is_combine_feature(feature_type_) || tool::is_item_feature(feature_type_); +} + +absl::optional& FeatureInfo::touch_common_info_multi_int_list(const std::string& prefix) { + if (!common_info_multi_int_list_) { + common_info_multi_int_list_.emplace(prefix); + } + return (common_info_multi_int_list_); +} + +void FeatureInfo::gen_output() { + output_ = { + {"normal_field", json::array()}, + {"middle_node", json::array()}, + {"norm_query", json::array()}, + {"field_def", json::array()}, + {"exists_field_def", json::array()}, + {"is_template", is_template_}, + {"specialization_class_names", json::object()}, + {"all_field", json::array()}}; + + output_["h_file"] = origin_file_; + output_["cc_file"] = ""; + + if (is_template_) { + for (auto it_name = specialization_class_names_.begin(); + it_name != specialization_class_names_.end(); + it_name++) { + // [param, param1, ...] + json params = json::array(); + + for (size_t i = 0; i < it_name->second.size(); i++) { + json arg = json::object(); + + if (i < template_param_names_.size()) { + const auto& param_info = it_name->second[i]; + arg["name"] = template_param_names_[i]; + arg["value_str"] = param_info.value_str(); + if (param_info.enum_value()) { + arg["enum_value"] = *(param_info.enum_value()); + } + if (param_info.qual_type()) { + arg["type_str"] = param_info.qual_type()->getAsString(); + } + } else { + LOG(INFO) << "out of range, i: " << i + << ", template_param_names_.size(): " << template_param_names_.size(); + } + + params.push_back(std::move(arg)); + } + + output_["specialization_class_names"][it_name->first] = std::move(params); + } + } + + auto& constructor_info = mutable_constructor_info(); + constructor_info.fix_common_info_enums(); + + const auto& adlog_field_infos = constructor_info.adlog_field_infos(); + + for (const std::string &bs_field_enum : constructor_info.bs_field_enums()) { + output_["normal_field"].emplace_back(json::object({{"name", bs_field_enum}})); + auto it_field = adlog_field_infos.find(bs_field_enum); + if (it_field != adlog_field_infos.end()) { + const auto& field_info = it_field->second; + auto& last = output_["normal_field"].back(); + last["adlog_field"] = field_info.adlog_field(); + last["adlog_field_type"] = "normal"; + + if (field_info.adlog_field_type() == AdlogFieldType::COMMON_INFO) { + static std::regex p("key:\\d+$"); + if (const auto& enum_name = field_info.common_info_enum_name()) { + last["adlog_field_with_enum_name"] = + std::regex_replace(last["adlog_field"].get(), p, *enum_name); + last["common_info_enum_name"] = *enum_name; + } else { + LOG(INFO) << "missing common_info_enum_name, bs_enum_str: " << bs_field_enum; + } + + if (const auto& enum_value = field_info.common_info_enum_value()) { + last["common_info_enum_value"] = *enum_value; + } else { + LOG(INFO) << "missing common_info_enum_value, bs_enum_str: " << bs_field_enum; + } + } + } + } + + for (const auto &common_info : constructor_info.common_info_enums()) { + std::string bs_enum_str = common_info.bs_enum_str(); + if (bs_enum_str.size() > 0) { + output_["normal_field"].emplace_back(json::object({{"name", bs_enum_str}})); + } + } + + for (const std::string &leaf : constructor_info.middle_node_leafs()) { + output_["middle_node"].emplace_back(json::object({{"name", leaf}, {"adlog_field_type", "middle_node"}})); + } + + if (constructor_info.has_get_norm_query()) { + output_["norm_query"].emplace_back(json::object({{"name", "NormQuery"}})); + } + + for (auto it = new_field_defs_.begin(); it != new_field_defs_.end(); it++) { + json detail = json::object(); + + detail["adlog_var_type"] = static_cast(it->second.adlog_var_type()); + if (const auto& middle_node_root = it->second.middle_node_root()) { + detail["middle_node_root"] = *middle_node_root; + } + if (const auto& middle_node_field = it->second.middle_node_field()) { + detail["middle_node_field"] = *middle_node_field; + } + if (const auto& common_info_prefix_adlog = it->second.common_info_prefix_adlog()) { + detail["common_info_prefix_adlog"] = *common_info_prefix_adlog; + } + if (const auto& common_info_name_value_alias = it->second.common_info_name_value_alias()) { + detail["common_info_name_value_alias"] = *common_info_name_value_alias; + } + if (const auto& action_var_name = it->second.action_var_name()) { + detail["action_var_name"] = *action_var_name; + } + + if (tool::is_from_info_util(it->second.name())) { + detail["name"] = it->second.name(); + output_["field_def"].push_back(detail); + } + + if (tool::is_from_info_util(it->second.exists_name())) { + detail["name"] = it->second.exists_name(); + output_["exists_field_def"].push_back(detail); + } + } + + // 统一写到 all_field 中,保存 bs_field_enum 和 adlog_field, 中间节点都展开 + for (size_t i = 0; i < output_["normal_field"].size(); i++) { + output_["all_field"].push_back(output_["normal_field"][i]); + } + + for (size_t i = 0; i < output_["field_def"].size(); i++) { + if (output_["field_def"][i].contains("middle_node_root")) { + std::string middle_node_root = output_["field_def"][i]["middle_node_root"].get(); + const std::vector& prefixs = MiddleNodeInfo::get_possible_adlog_prefix(middle_node_root); + for (size_t j = 0; j < prefixs.size(); j++) { + std::string middle_node_field = output_["field_def"][i]["middle_node_field"].get(); + std::string leaf = prefixs[j] + std::string(".") + middle_node_field; + std::string bs_field_enum = tool::adlog_to_bs_enum_str(leaf); + json middle_node_leaf = json::object({{"name", leaf}, + {"bs_field_enum", bs_field_enum}, + {"middle_node_field", middle_node_field}, + {"adlog_field_type", "middle_node"}}); + output_["all_field"].emplace_back(middle_node_leaf); + } + } + } +} + +void FeatureInfo::add_middle_node_bs_enum_var_type(const std::string& middle_node_bs_enum_str, + NewVarType new_var_type, + const std::string& adlog_field) { + NewVarDef new_var_def; + new_var_def.set_bs_enum_str(middle_node_bs_enum_str); + new_var_def.set_new_var_type(new_var_type); + new_var_def.set_adlog_field(adlog_field); + + middle_node_bs_enum_var_type_.insert({middle_node_bs_enum_str, new_var_def}); +} + +void FeatureInfo::add_middle_node_bs_enum_var_type(const std::string& middle_node_bs_enum_str, + NewVarType new_var_type, + const std::string& adlog_field, + const std::string& list_inner_type) { + NewVarDef new_var_def; + new_var_def.set_bs_enum_str(middle_node_bs_enum_str); + new_var_def.set_new_var_type(new_var_type); + new_var_def.set_adlog_field(adlog_field); + new_var_def.set_list_inner_type(list_inner_type); + + middle_node_bs_enum_var_type_.insert({middle_node_bs_enum_str, new_var_def}); +} + +void FeatureInfo::add_specialization_class(const std::string& name) { + specialization_class_names_[name] = {}; +} + +void FeatureInfo::add_specialization_param_value(const std::string& name, + size_t index, + const std::string& param_value) { + if (index >= specialization_class_names_[name].size()) { + specialization_class_names_[name].resize(index + 1); + } + + specialization_class_names_[name][index].set_value_str(param_value); +} + +void FeatureInfo::add_specialization_param_value(const std::string& name, + size_t index, + const std::string& param_value, + int enum_value) { + if (index >= specialization_class_names_[name].size()) { + specialization_class_names_[name].resize(index + 1); + } + + specialization_class_names_[name][index].set_value(param_value, enum_value); +} + +TemplateParamInfo* FeatureInfo::touch_template_param_ptr(const std::string& name, size_t index) { + auto it = specialization_class_names_.find(name); + if (it == specialization_class_names_.end()) { + specialization_class_names_[name] = {}; + } + + auto& params = specialization_class_names_[name]; + if (index >= params.size()) { + params.resize(index + 1); + } + + return &(params[index]); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/FeatureInfo.h b/convert/info/FeatureInfo.h new file mode 100644 index 0000000..55a0330 --- /dev/null +++ b/convert/info/FeatureInfo.h @@ -0,0 +1,307 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include "../Type.h" +#include "CommonInfoMultiIntList.h" +#include "CommonInfoPrepare.h" +#include "ConstructorInfo.h" +#include "MethodInfo.h" +#include "NewVarDef.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtCXX.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/CommandLine.h" + +#include "TemplateParamInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; + +/// 类相关的信息,包括特征或者 item_filter、label_extractor +class FeatureInfo { + public: + FeatureInfo() = default; + explicit FeatureInfo(const std::string& feature_name): + feature_name_(feature_name), + constructor_info_(feature_name) {} + + const std::string& feature_name() const { return feature_name_; } + void set_feature_name(const std::string& feature_name) { feature_name_ = feature_name; } + + const std::string& feature_type() const { return feature_type_; } + void set_feature_type(const std::string& feature_type) { feature_type_ = feature_type; } + + void set_origin_file(const std::string& filename) { origin_file_ = filename; } + const std::string& origin_file() const { return origin_file_; } + + void set_is_template(bool filename) { is_template_ = filename; } + bool is_template() const { return is_template_; } + + const ConstructorInfo& constructor_info() const { return (constructor_info_); } + ConstructorInfo& mutable_constructor_info() { return (constructor_info_); } + + void set_extract_method_content(const std::string& extract_method_content) { + extract_method_content_ = extract_method_content; + } + const std::string& extract_method_content() const { return extract_method_content_; } + std::string& mutable_extract_method_content() { return extract_method_content_; } + + void set_header_content(const std::string& header_content) { header_content_ = header_content; } + const std::string& header_content() const { return header_content_; } + + void add_field_decl(const clang::FieldDecl* stmt) { field_decls_.push_back(stmt); } + const std::vector& field_decls() const { return field_decls_; } + + absl::optional last_field_decl_end_log() const; + + void set_file_id(const clang::FileID& file_id) { file_id_ = file_id; } + const clang::FileID& file_id() const { return file_id_; } + + void add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + NewVarType new_var_type, + AdlogVarType adlog_var_type); + void add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + NewVarType new_var_type, + ExprType expr_type, + AdlogVarType adlog_var_type); + void add_field_def(const std::string& bs_enum_str, + const std::string& name, + const std::string& new_def, + const std::string& exists_name, + const std::string& new_exists_def, + NewVarType new_var_type, + AdlogVarType adlog_var_type); + const std::unordered_map& new_field_defs() const { return new_field_defs_; } + + void set_middle_node_info(const std::string& bs_enum_str, + const std::string& middle_node_root, + const std::string& middle_node_field); + + void set_common_info_prefix_name_value(const std::string& bs_enum_str, + const std::string& prefix_adlog, + const std::string& name_value_alias); + + void set_action_var_name(const std::string& bs_enum_str, const std::string& action_var_name); + + /// OverviewHandler 后执行 + void clear_new_field_defs() { new_field_defs_.clear(); } + + void add_binary_op_stmt(clang::BinaryOperator* binary_operator) { + binary_op_stmts_.push_back(binary_operator); + } + + const std::vector& binary_op_stmts() const { return binary_op_stmts_; } + + clang::BinaryOperator* find_update_action_stmt() const; + + void set_action(const std::string& action) { action_.emplace(action); } + + void add_template_common_info_value(int value) { template_common_info_values_.insert(value); } + const std::set& template_common_info_values() const { return template_common_info_values_; } + + bool is_member(clang::Expr* expr) const; + bool is_member(const std::string& name) const; + bool is_int_list_member(const std::string& name) const; + bool is_int_list_member(clang::Expr* expr) const; + bool is_int_list_member(const std::string& name, clang::QualType qual_type) const; + + bool is_common_info_enum_member(const std::string& name, clang::QualType qual_type) const; + + void add_int_list_member_single_value(const std::string& name, int value); + void add_int_list_member_values(const std::string& name, const std::vector& values); + + const std::vector& get_int_list_member_values(const std::string& name) const; + + const std::string& origin_buffer() const { return origin_buffer_; } + + void set_origin_buffer(const std::string& origin_buffer) { origin_buffer_ = origin_buffer; } + + void add_other_method(const std::string& name, + clang::QualType return_type, + const std::string& bs_return_type, + const std::string& decl, + const std::string& body); + const std::unordered_map& other_methods() const { return other_methods_; } + + MethodInfo& touch_method_info(const std::string& method_name); + const MethodInfo* find_method_info(const std::string& method_name) const; + MethodInfo* mutable_method_info(const std::string& method_name); + + bool is_feature_other_method(const std::string& method_name) const; + + /// 用于区分 combine 特征里的 user 字段。 + /// 由于 item 特征里也有写错的,会访问 user 字段,因此 item 特征也被认为是 combine。 + bool is_combine() const; + + absl::optional& touch_common_info_multi_int_list(const std::string& prefix); + absl::optional& mutable_common_info_multi_int_list() { + return (common_info_multi_int_list_); + } + + const absl::optional& common_info_multi_int_list() const { + return (common_info_multi_int_list_); + } + + absl::optional& mutable_common_info_prepare() { return (common_info_prepare_); } + const absl::optional& common_info_prepare() const { return (common_info_prepare_); } + + const std::vector& template_var_names() const { return (template_var_names_); } + void add_template_var_name(const std::string& var_name) { template_var_names_.push_back(var_name); } + + const json& output() const { return (output_); } + json& mutable_output() { return (output_); } + + /// 从 constructor_info 以及 field_def 里获取字段信息保存到 output_ 中。 + void gen_output(); + + bool has_hash_fn_str() const { return has_hash_fn_str_; } + void set_has_hash_fn_str(bool v) { has_hash_fn_str_ = v; } + + const std::unordered_map& bs_enum_var_type() const { return (bs_enum_var_type_); } + void add_bs_enum_var_type(const std::string& bs_enum_str, NewVarType new_var_type) { + bs_enum_var_type_.insert({bs_enum_str, new_var_type}); + } + + bool is_in_bs_enum_var_type(const std::string& bs_enum_str) const { + return bs_enum_var_type_.find(bs_enum_str) != bs_enum_var_type_.end(); + } + + bool has_cc_file() const { return has_cc_file_; } + void set_has_cc_file(bool v) { has_cc_file_ = v; } + + bool has_query_token() const { return has_query_token_; } + void set_has_query_token(bool v) { has_query_token_ = v; } + + bool has_common_info_multi_int_list() const { return has_common_info_multi_int_list_; } + void set_has_common_info_multi_int_list(bool v) { has_common_info_multi_int_list_ = v; } + + bool has_common_info_multi_map() const {return has_common_info_multi_map_;} + void set_has_common_info_multi_map(bool v) {has_common_info_multi_map_ = v;} + + const std::unordered_map& middle_node_bs_enum_var_type() const { + return (middle_node_bs_enum_var_type_); + } + + /// scalar + void add_middle_node_bs_enum_var_type(const std::string& middle_node_bs_enum_str, + NewVarType new_var_type, + const std::string& adlog_field); + /// list + void add_middle_node_bs_enum_var_type(const std::string& middle_node_bs_enum_str, + NewVarType new_var_type, + const std::string& adlog_field, + const std::string& list_inner_type); + bool is_in_middle_node_bs_enum_var_type(const std::string& middle_node_bs_enum_str) const { + return middle_node_bs_enum_var_type_.find(middle_node_bs_enum_str) != middle_node_bs_enum_var_type_.end(); + } + + void add_specialization_class(const std::string& name); + + void add_specialization_param_value(const std::string& name, + size_t index, + const std::string& param_value); + + void add_specialization_param_value(const std::string& name, + size_t index, + const std::string& param_value, + int enum_value); + + TemplateParamInfo* touch_template_param_ptr(const std::string& name, size_t index); + + const std::unordered_map>& specialization_class_names() const { + return (specialization_class_names_); + } + + void set_template_param_names(const std::vector& param_names) { + template_param_names_ = param_names; + } + + const absl::optional& reco_extract_body() const { return reco_extract_body_; } + void set_reco_extract_body(const std::string& body) { reco_extract_body_.emplace(body); } + + const absl::optional& cc_filename() const { return cc_filename_; } + void set_cc_filename(const std::string& filename) { cc_filename_.emplace(filename); } + + private: + std::string feature_name_; + std::string feature_type_; + std::string origin_file_; + + absl::optional cc_filename_; + + bool is_template_ = false; + + /// specialization_name -> [TemplateParamInfo, ...] + std::unordered_map> specialization_class_names_; + + std::vector template_param_names_; + + clang::FileID file_id_; + + ConstructorInfo constructor_info_; + std::string extract_method_content_; + std::string header_content_; + + /// 模板类对应的字段 + std::unordered_map new_field_defs_; + std::vector binary_op_stmts_; + absl::optional action_; + + /// 模板参数中出现的 common info value + std::set template_common_info_values_; + + /// OverviewHandler 中收集 + std::vector template_var_names_; + + std::vector field_decls_; + std::unordered_map> int_list_member_values_; + + std::string origin_buffer_; + std::unordered_map other_methods_; + + absl::optional common_info_multi_int_list_; + absl::optional common_info_prepare_; + + json output_; + bool has_hash_fn_str_ = false; + + // 普通叶子节点的 list 对应的 bs_enum, 不包括 action detail, common info 等。 + // 用于判断 for 循环中的 size method 是否是叶子节点。 + std::unordered_map bs_enum_var_type_; + // 中间节点的叶子节点 list 或者对应的 bs_enum。不以 adlog 开头。需要保存 adlog_field, type, + // list_inner_type 等信息。 + // 单独存一个 map 和普通节点区分开。 + std::unordered_map middle_node_bs_enum_var_type_; + + bool has_cc_file_ = false; + bool has_query_token_ = false; + bool has_common_info_multi_int_list_ = false; + bool has_common_info_multi_map_ = false; + + /// reco user info + absl::optional reco_extract_body_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/IfInfo.cpp b/convert/info/IfInfo.cpp new file mode 100644 index 0000000..8315fa4 --- /dev/null +++ b/convert/info/IfInfo.cpp @@ -0,0 +1,45 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include "../Tool.h" +#include "IfInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +bool IfInfo::is_check_item_pos_cond() const { + return is_check_item_pos(if_stmt_); +} + +void IfInfo::update_check_equal(const std::string& op) { + if (op.find("==") != std::string::npos) { + is_check_equal_ = true; + } else if (op.find("!=") != std::string::npos) { + is_check_not_equal_ = true; + } +} + +bool IfInfo::is_body_only_break() const { + if (if_stmt_ == nullptr) { + return false; + } + + std::string body_text = tool::rm_surround_big_parantheses(stmt_to_string(if_stmt_->getThen())); + std::regex p("[ \\n]*break;[ \\n]*"); + if (std::regex_match(body_text, p)) { + return true; + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/IfInfo.h b/convert/info/IfInfo.h new file mode 100644 index 0000000..c0cb792 --- /dev/null +++ b/convert/info/IfInfo.h @@ -0,0 +1,125 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +enum class IfStage { + COND, + THEN, + ELSE, + END +}; + +/// 保存 if 相关的信息 +class IfInfo { + public: + IfInfo() = default; + explicit IfInfo(clang::IfStmt* if_stmt): if_stmt_(if_stmt), if_stage_(IfStage::COND) {} + + clang::IfStmt* if_stmt() const { return if_stmt_; } + + void set_if_stage(IfStage if_stage) { if_stage_ = if_stage; } + IfStage if_stage() const { return if_stage_; } + + bool is_check_item_pos_cond() const; + + bool is_check_middle_node_root_cond() const { return is_check_middle_node_root_; } + void set_is_check_middle_node_root_cond(bool v) { is_check_middle_node_root_ = v; } + + bool is_check_common_info_normal_cond() const { return is_check_common_info_normal_; } + void set_is_check_common_info_normal_cond(bool v) { is_check_common_info_normal_ = v; } + + bool is_check_common_info_multi_cond() const { return is_check_common_info_multi_; } + void set_is_check_common_info_multi_cond(bool v) { is_check_common_info_multi_ = v; } + + bool is_check_common_info_fixed_cond() const { return is_check_common_info_fixed_; } + void set_is_check_common_info_fixed_cond(bool v) { is_check_common_info_fixed_ = v; } + + bool is_check_common_info_cond() const { + return is_check_common_info_ || + is_check_common_info_normal_ || + is_check_common_info_multi_ || + is_check_common_info_fixed_; + } + void set_is_check_common_info_cond(bool v) { is_check_common_info_ = v; } + + bool is_check_action_detail_cond() const { return is_check_action_detail_; } + void set_is_check_action_detail_cond(bool v) { is_check_action_detail_ = v; } + + void add_cond_var_type(ExprType cond_var_type) { cond_var_types_.insert(cond_var_type); } + bool has_cond_var_type(ExprType cond_var_type) const { + return cond_var_types_.find(cond_var_type) != cond_var_types_.end(); + } + + void set_common_info_index(size_t index) { common_info_index_.emplace(index); } + const absl::optional& common_info_index() const { return (common_info_index_); } + + void set_common_info_value(int value) { common_info_value_.emplace(value); } + const absl::optional& common_info_value() const { return (common_info_value_); } + + void set_common_info_int_name(const std::string& int_name) {common_info_int_name_.emplace(int_name);} + const absl::optional& common_info_int_name() const {return (common_info_int_name_);} + + void update_check_equal(const std::string& op); + bool is_check_equal() const { return is_check_equal_; } + void set_is_check_equal(bool v) { is_check_equal_ = v; } + bool is_check_not_equal() const { return is_check_not_equal_; } + + bool is_check_item_pos_include() const { return is_check_item_pos_include_; } + void set_is_check_item_pos_include_cond(bool v) { is_check_item_pos_include_ = v; } + + bool is_check_seq_list_cond() const { return is_check_seq_list_; } + void set_is_check_seq_list_cond(bool v) { is_check_seq_list_ = v; } + + bool is_check_common_info_map_end() const { return is_check_common_info_map_end_; } + void set_is_check_common_info_map_end(bool v) { is_check_common_info_map_end_ = v; } + + bool is_check_common_info_list_size_not_equal() const {return is_check_common_info_list_size_not_equal_;} + void set_is_check_common_info_list_size_not_equal(bool v) {is_check_common_info_list_size_not_equal_ = v;} + + const absl::optional& left_expr_str() const { return (left_expr_str_); } + void set_left_expr_str(const std::string& left_expr_str) { left_expr_str_.emplace(left_expr_str); } + + bool is_body_only_break() const; + + private: + clang::IfStmt* if_stmt_ = nullptr; + IfStage if_stage_; + std::set cond_var_types_; + bool is_check_middle_node_root_ = false; + bool is_check_common_info_multi_ = false; + bool is_check_common_info_normal_ = false; + bool is_check_common_info_fixed_ = false; + bool is_check_common_info_ = false; + bool is_check_common_info_map_end_ = false; + bool is_check_common_info_list_size_not_equal_ = false; + bool is_check_action_detail_ = false; + bool is_check_equal_ = false; + bool is_check_not_equal_ = false; + bool is_check_item_pos_include_ = false; + bool is_check_seq_list_ = false; + absl::optional common_info_index_; + absl::optional common_info_value_; + absl::optional common_info_int_name_; + absl::optional left_expr_str_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/Info.cpp b/convert/info/Info.cpp new file mode 100644 index 0000000..b17abd6 --- /dev/null +++ b/convert/info/Info.cpp @@ -0,0 +1,18 @@ +#include +#include + +#include +#include +#include +#include + +#include "../Tool.h" +#include "Info.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/Info.h b/convert/info/Info.h new file mode 100644 index 0000000..f5a40f9 --- /dev/null +++ b/convert/info/Info.h @@ -0,0 +1,71 @@ +#pragma once + +#include + +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +#include "./IfInfo.h" +#include "./SwitchCaseInfo.h" +#include "./DeclInfo.h" +#include "./LoopInfo.h" +#include "./CommonInfo.h" +#include "./CommonInfoDetail.h" +#include "./CommonInfoNormal.h" +#include "./CommonInfoFixed.h" +#include "./CommonInfoFixedList.h" +#include "./CommonInfoMultiMap.h" +#include "./CommonInfoMultiIntList.h" +#include "./CommonInfoPrepare.h" +#include "./CommonInfoMiddleNode.h" +#include "./ConstructorInfo.h" +#include "./ActionDetailInfo.h" +#include "./ActionDetailFixedInfo.h" +#include "./MiddleNodeInfo.h" +#include "./FeatureInfo.h" +#include "./VarDeclInfo.h" +#include "./BinaryOpInfo.h" +#include "./SeqListInfo.h" +#include "./ProtoListInfo.h" +#include "./AssignInfo.h" +#include "./BSFieldInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +template struct InfoTraits {}; + +template<> struct InfoTraits { static const IfInfo v; }; +template<> struct InfoTraits { static const DeclInfo v; }; +template<> struct InfoTraits { static const VarDeclInfo v; }; +template<> struct InfoTraits { static const BinaryOpInfo v; }; +template<> struct InfoTraits { static const LoopInfo v; }; +template<> struct InfoTraits { static const SwitchCaseInfo v; }; +template<> struct InfoTraits { static const AssignInfo v; }; + +template<> struct InfoTraits { static const ActionDetailInfo v; }; +template<> struct InfoTraits { static const ActionDetailFixedInfo v; }; +template<> struct InfoTraits { static const CommonInfoPrepare v; }; +template<> struct InfoTraits { static const CommonInfoNormal v; }; +template<> struct InfoTraits { static const CommonInfoMultiMap v; }; +template<> struct InfoTraits { static const CommonInfoMultiIntList v; }; +template<> struct InfoTraits { static const CommonInfoFixed v; }; +template<> struct InfoTraits { static const CommonInfoFixedList v; }; +template<> struct InfoTraits { static const MiddleNodeInfo v; }; +template<> struct InfoTraits { static const SeqListInfo v; }; +template<> struct InfoTraits { static const ProtoListInfo v; }; +template<> struct InfoTraits { static const BSFieldInfo v; }; + +template<> struct InfoTraits { static const ConstructorInfo v; }; +template<> struct InfoTraits { static const FeatureInfo v; }; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/InfoBase.cpp b/convert/info/InfoBase.cpp new file mode 100644 index 0000000..85fc697 --- /dev/null +++ b/convert/info/InfoBase.cpp @@ -0,0 +1,38 @@ +#include "../Env.h" +#include "InfoBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void InfoBase::set_env_ptr(Env* env_ptr) { + env_ptr_ = env_ptr; +} + +Env* InfoBase::env_ptr() const { + return env_ptr_; +} + +Env* InfoBase::mutable_env_ptr() { + return env_ptr_; +} + +Env* InfoBase::parent_env_ptr() const { + if (env_ptr_ == nullptr) { + return nullptr; + } + + return env_ptr_->parent(); +} + +Env* InfoBase::mutable_parent_env_ptr() { + if (env_ptr_ == nullptr) { + return nullptr; + } + + return env_ptr_->parent(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/InfoBase.h b/convert/info/InfoBase.h new file mode 100644 index 0000000..fbc5948 --- /dev/null +++ b/convert/info/InfoBase.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class InfoBase { + public: + InfoBase() = default; + + void set_env_ptr(Env* env_ptr); + + Env* env_ptr() const; + Env* mutable_env_ptr(); + + Env* parent_env_ptr() const; + Env* mutable_parent_env_ptr(); + + protected: + Env* env_ptr_ = nullptr; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/LoopInfo.cpp b/convert/info/LoopInfo.cpp new file mode 100644 index 0000000..d705924 --- /dev/null +++ b/convert/info/LoopInfo.cpp @@ -0,0 +1,54 @@ +#include "../Env.h" +#include "../Tool.h" +#include "LoopInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::string LoopInfo::loop_var_expr_str() const { + if (loop_var_expr_ != nullptr) { + return tool::trim_this(stmt_to_string(loop_var_expr_)); + } + + return ""; +} + +std::string LoopInfo::origin_stmt_str() const { + if (cxx_for_range_stmt_ != nullptr) { + return stmt_to_string(cxx_for_range_stmt_); + } + + return stmt_to_string(for_stmt_); +} + +bool LoopInfo::is_double_list_loop() const { + return is_double_list_inner_loop() || is_double_list_outer_loop(); +} + +bool LoopInfo::is_double_list_inner_loop() const { + if (env_ptr_ != nullptr && + env_ptr_->is_parent_loop() && + !is_repeated_common_info() && + is_proto_list_loop() && + !is_for_stmt()) { + return true; + } + + return false; +} + +bool LoopInfo::is_double_list_outer_loop() const { + if (!is_repeated_common_info() && + is_proto_list_loop() && + is_child_proto_list_loop() && + !is_for_stmt()) { + return true; + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/LoopInfo.h b/convert/info/LoopInfo.h new file mode 100644 index 0000000..d51a289 --- /dev/null +++ b/convert/info/LoopInfo.h @@ -0,0 +1,155 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +#include "../Tool.h" +#include "../Type.h" +#include "InfoBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +enum class LoopStage { + INIT, + BODY, + END +}; + +/// 保存 loop 相关的信息 +class LoopInfo: public InfoBase { + public: + LoopInfo() = default; + explicit LoopInfo(clang::ForStmt* for_stmt): + is_for_stmt_(true), + for_stmt_(for_stmt), + loop_stage_(LoopStage::INIT) {} + explicit LoopInfo(clang::CXXForRangeStmt* cxx_for_range_stmt): + is_for_stmt_(false), + cxx_for_range_stmt_(cxx_for_range_stmt), + loop_stage_(LoopStage::INIT) {} + + bool is_for_stmt() const { return is_for_stmt_; } + + clang::ForStmt* for_stmt() { return for_stmt_; } + clang::CXXForRangeStmt* cxx_for_range_stmt() { return cxx_for_range_stmt_; } + + void set_loop_state(LoopStage loop_stage) { loop_stage_ = loop_stage; } + LoopStage loop_stage() const { return loop_stage_; } + LoopStage loop_stage() { return loop_stage_; } + + void set_loop_iter(const std::string& loop_iter) { loop_iter_ = loop_iter; } + const std::string& loop_iter() const { return loop_iter_; } + + clang::Expr* loop_var_expr() const { return loop_var_expr_; } + void set_loop_var_expr(clang::Expr* loop_var_expr) { loop_var_expr_ = loop_var_expr; } + std::string loop_var_expr_str() const; + + const std::string& loop_var() const { return loop_var_; } + void set_loop_var(const std::string& loop_var) { loop_var_ = loop_var; } + + const std::string &prefix_adlog() const { return prefix_adlog_; } + void set_prefix_adlog(const std::string &prefix_adlog) { prefix_adlog_ = prefix_adlog; } + + bool is_common_info_list_map() const { return is_common_info_list_map_; } + void set_is_common_info_list_map(bool v) { is_common_info_list_map_ = v; } + + bool is_common_info_map() const {return is_common_info_map_;} + void set_is_common_info_map(bool v) {is_common_info_map_ = v;} + + bool is_repeated_common_info() const { return is_repeated_common_info_; } + void set_is_repeated_common_info(bool v) { is_repeated_common_info_ = v; } + + bool is_proto_list_loop() const { return is_proto_list_loop_; } + void set_is_proto_list_loop(bool v) { is_proto_list_loop_ = v; } + + bool is_general_proto_map_loop() const { return is_general_proto_map_loop_; } + void set_is_general_proto_map_loop(bool v) { is_general_proto_map_loop_ = v; } + + bool is_child_proto_list_loop() const { return is_child_proto_list_loop_; } + void set_is_child_proto_list_loop(bool v) { is_child_proto_list_loop_ = v; } + + bool is_int_list_member_loop() const { return is_int_list_member_loop_; } + void set_is_int_list_member_loop(bool v) { is_int_list_member_loop_ = v; } + + const std::vector& int_list_member_values() const { return int_list_member_values_; } + void set_int_list_member_values(const std::vector& values) { int_list_member_values_ = values; } + + const absl::optional& int_list_index() const { return int_list_index_; } + void set_int_list_index(size_t x) { int_list_index_.emplace(x); } + + std::string origin_stmt_str() const; + + bool is_seq_list_loop() const { return is_seq_list_loop_; } + void set_is_seq_list_loop(bool v) { is_seq_list_loop_ = v; } + + const std::string& loop_var_expr_bs_enum_str() const { return loop_var_expr_bs_enum_str_; } + void set_loop_var_expr_bs_enum_str(const std::string& s) { loop_var_expr_bs_enum_str_ = s; } + + bool is_middle_node_proto_list_loop() const { return is_middle_node_proto_list_loop_; } + void set_is_middle_node_proto_list_loop(bool v) { is_middle_node_proto_list_loop_ = v; } + + bool is_double_list_loop() const; + bool is_double_list_inner_loop() const; + bool is_double_list_outer_loop() const; + + bool is_query_token_loop() const {return is_query_token_loop_;} + void set_is_query_token_loop(bool v) {is_query_token_loop_ = v;} + + const std::string& loop_var_type() const { return (loop_var_type_); } + void set_loop_var_type(const std::string& loop_var_type) { loop_var_type_ = loop_var_type; } + + const std::string& origin_size_var() const { return (origin_size_var_); } + void set_origin_size_var(const std::string& origin_size_var) { origin_size_var_ = origin_size_var; } + + void add_leaf_field(const std::string& field) { leaf_fields_.emplace_back(field); } + const std::vector& leaf_fields() const { return leaf_fields_; } + + bool is_reco_user_info_loop() const { return is_reco_user_info_loop_; } + void set_is_reco_user_info_loop(bool v) { is_reco_user_info_loop_ = v; } + + private: + bool is_for_stmt_; + clang::ForStmt* for_stmt_; + clang::CXXForRangeStmt* cxx_for_range_stmt_; + LoopStage loop_stage_; + std::string loop_iter_; + std::string loop_var_; + std::string loop_var_type_; + clang::Expr* loop_var_expr_ = nullptr; + std::string loop_var_expr_bs_enum_str_; + std::string prefix_adlog_; + + std::string origin_size_var_; + std::vector leaf_fields_; + + std::vector int_list_member_values_; + absl::optional int_list_index_; + + bool is_common_info_map_ = false; + bool is_common_info_list_map_ = false; + bool is_repeated_common_info_ = false; + bool is_proto_list_loop_ = false; + bool is_general_proto_map_loop_ = false; + bool is_child_proto_list_loop_ = false; + bool is_int_list_member_loop_ = false; + bool is_seq_list_loop_ = false; + bool is_middle_node_proto_list_loop_ = false; + bool is_query_token_loop_ = false; + bool is_reco_user_info_loop_ = false; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/MethodInfo.cpp b/convert/info/MethodInfo.cpp new file mode 100644 index 0000000..d78f962 --- /dev/null +++ b/convert/info/MethodInfo.cpp @@ -0,0 +1,80 @@ +#include +#include "MethodInfo.h" +#include "NewActionParam.h" +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void MethodInfo::update(clang::QualType return_type, + const std::string& bs_return_type, + const std::string& decl, + const std::string& body) { + return_type_ = return_type; + bs_return_type_ = bs_return_type; + decl_ = decl; + body_ = body; +} + +void MethodInfo::add_new_action_param(size_t index, + const NewActionParam &new_action_param, + const std::string& new_origin_name) { + if (index >= new_action_params_.size()) { + new_action_params_.resize(index + 1); + } + + new_action_params_[index] = new_action_param; + new_action_params_[index].set_origin_name(new_origin_name); +} + +void MethodInfo::add_new_action_param(size_t index, + const std::string& origin_name) { + if (index >= new_action_params_.size()) { + new_action_params_.resize(index + 1); + } + + new_action_params_[index] = std::move(NewActionParam(origin_name)); +} + +void MethodInfo::add_new_action_field_param(const std::string& origin_name, + const std::string& field, + const std::string& inner_type_str, + bool is_combine_feature, + const std::string& new_name) { + for (size_t i = 0; i < new_action_params_.size(); i++) { + if (new_action_params_[i].origin_name() == origin_name) { + if (!new_action_params_[i].has_new_param_name(new_name)) { + NewActionFieldParam new_param(new_name, field, inner_type_str, is_combine_feature); + new_action_params_[i].add_new_param(new_param); + } + } + } +} + +const NewActionParam& MethodInfo::find_new_action_param(size_t index) const { + if (index >= new_action_params_.size()) { + static NewActionParam empty; + return (empty); + } + + return new_action_params_[index]; +} + +absl::optional> MethodInfo::find_new_action_param_name(size_t index) const { + if (index >= new_action_params_.size() || new_action_params_[index].new_params().size() == 0) { + return absl::nullopt; + } + + std::vector new_names; + const auto& new_field_params = new_action_params_[index].new_params(); + for (size_t i = 0; i < new_field_params.size(); i++) { + new_names.push_back(new_field_params[i].name()); + } + + return absl::make_optional(new_names); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/MethodInfo.h b/convert/info/MethodInfo.h new file mode 100644 index 0000000..e450fee --- /dev/null +++ b/convert/info/MethodInfo.h @@ -0,0 +1,81 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/AST/Type.h" + +#include "NewActionParam.h" +#include "CommonInfoPrepare.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class MethodInfo { + public: + MethodInfo() = default; + explicit MethodInfo(const std::string& name): name_(name) {} + explicit MethodInfo(const std::string& name, + clang::QualType return_type, + const std::string& bs_return_type, + const std::string& decl, + const std::string& body): + name_(name), + return_type_(return_type), + bs_return_type_(bs_return_type), + decl_(decl), + body_(body) {} + + const std::string& name() const { return name_; } + clang::QualType return_type() const { return return_type_; } + const std::string& bs_return_type() const { return bs_return_type_; } + const std::string& decl() const { return decl_; } + const std::string& body() const { return body_; } + + void update(clang::QualType return_type, + const std::string& bs_return_type, + const std::string& decl, + const std::string& body); + + void add_new_action_param(size_t index, + const NewActionParam& new_action_param, + const std::string& new_origin_name); + + void add_new_action_param(size_t index, + const std::string& origin_name); + void add_new_action_field_param(const std::string& origin_name, + const std::string& field, + const std::string& inner_type_str, + bool is_combine_feature, + const std::string& new_name); + const NewActionParam& find_new_action_param(size_t index) const; + + absl::optional> find_new_action_param_name(size_t index) const; + + bool is_return_adlog_user_field() const { return is_return_adlog_user_field_; } + void set_is_return_adlog_user_field(bool v) { is_return_adlog_user_field_ = v; } + + absl::optional &mutable_common_info_prepare() { return (common_info_prepare_); } + const absl::optional &common_info_prepare() const { return (common_info_prepare_); } + + private: + std::string name_; + clang::QualType return_type_; + std::string bs_return_type_; + std::string decl_; + std::string body_; + + /// AdActionInfo 对应到多个用到的字段,需要保存多个新参数。 + std::vector new_action_params_; + + bool is_return_adlog_user_field_ = false; + absl::optional common_info_prepare_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/MiddleNodeInfo.cpp b/convert/info/MiddleNodeInfo.cpp new file mode 100644 index 0000000..e158238 --- /dev/null +++ b/convert/info/MiddleNodeInfo.cpp @@ -0,0 +1,146 @@ +#include +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "MiddleNodeInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::string MiddleNodeInfo::get_bs_exists_field_def(Env* env_ptr, + const std::string& leaf, + const std::string& field) const { + std::ostringstream oss; + std::string new_field = tool::trim_exists(field); + oss << "BSHas" << name_ << "Impl " << leaf << "{" << tool::add_quote(new_field) << "}"; + + return oss.str(); +} + +std::string MiddleNodeInfo::get_root_bs_exists_field_def(Env* env_ptr) const { + return get_bs_exists_field_def(env_ptr, std::string("BSHas") + name_, ""); +} + +std::string MiddleNodeInfo::get_bs_scalar_field_def(Env* env_ptr, + const std::string& leaf, + const std::string& field, + clang::QualType qual_type) const { + std::ostringstream oss; + + // 中间节点都是来自 item。 + std::string type_str = tool::get_builtin_type_str(qual_type); + oss << "BS" << name_ << "<" << type_str << "> "; + oss << leaf << "{" << tool::add_quote(field) << "}"; + + return oss.str(); +} + +std::string MiddleNodeInfo::get_bs_list_field_def(Env *env_ptr, + const std::string &leaf, + const std::string &field, + const std::string& inner_type) const { + std::ostringstream oss; + + // 中间节点都是来自 item。 + oss << "BS" << name_ << "> "; + oss << leaf << "{" << tool::add_quote(field) << "}"; + + return oss.str(); +} + +std::string MiddleNodeInfo::get_list_loop_bs_wrapped_text(Env *env_ptr, + const std::string &body, + const std::string &bs_enum_str) const { + if (const auto& var = env_ptr->find_new_def(bs_enum_str)) { + if (var->name().size() > 0) { + std::ostringstream oss; + oss << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << body + << "\n}\n "; + return oss.str(); + } else { + LOG(INFO) << "new var name is empty! bs_enum_str: " << bs_enum_str + << ", middle_node name: " << name_; + return ""; + } + } + + LOG(INFO) << "cannot find new var def, bs_enum_str: " << bs_enum_str; + return ""; +} + +std::string MiddleNodeInfo::get_bs_list_def(Env *env_ptr, + const std::string& bs_enum_str, + const std::string& middle_node_leaf, + const std::string& type_str) const { + std::ostringstream oss; + + // middle node 都是来自 item + std::string name = env_ptr->find_valid_new_name(bs_enum_str); + oss << "BSRepeatedField<" << type_str << "> " << name << " = std::move(" << middle_node_leaf << "(bs, pos))"; + + return oss.str(); +} + +bool MiddleNodeInfo::is_user_middle_node(const std::string& name) { + static std::unordered_set user_nodes = { + "FixedCommonInfo" + }; + + return user_nodes.find(name) != user_nodes.end(); +} + +std::string MiddleNodeInfo::get_bs_str_scalar_def(Env* env_ptr, + const std::string& bs_enum_str, + const std::string& middle_node_leaf) const { + std::ostringstream oss; + + std::string name = env_ptr->find_valid_new_name(bs_enum_str); + oss << "absl::string_view " << name << " = " << middle_node_leaf << "(bs, pos)"; + + return oss.str(); +} + +std::unordered_map> MiddleNodeInfo::possible_adlog_prefix_ = { + {"PhotoInfo", + {"adlog.item.ad_dsp_info.photo_info", + "adlog.item.fans_top_info.photo_info", + "adlog.item.nature_photo_info.photo_info"}}, + {"PhotoInfoNew", + {"adlog.item.ad_dsp_info.photo_info", + "adlog.item.fans_top_info.photo_info", + "adlog.item.nature_photo_info.photo_info"}}, + {"CommonInfoAttr", + {"adlog.item.ad_dsp_info.photo_info.common_info_attr", + "adlog.item.ad_dsp_info.live_info.common_info_attr", + "adlog.item.fans_top_info.photo_info.common_info_attr", + "adlog.item.nature_photo_info.photo_info.common_info_attr", + "adlog.item.fans_top_live_info.live_info.common_info_attr"}}, + {"LiveInfo", + {"adlog.item.fans_top_live_info.live_info", + "adlog.item.ad_dsp_info.live_info"}}, + {"AdDspMmuInfo", + {"adlog.item.ad_dsp_info.ad_dsp_mmu_info"}}, + {"AuthorInfo", + {"adlog.item.fans_top_live_info.live_info.author_info", + "adlog.item.fans_top_info.photo_info.author_info", + "adlog.item.ad_dsp_info.photo_info.author_info", + "adlog.item.ad_dsp_info.live_info.author_info"}} +}; + +const std::vector& MiddleNodeInfo::get_possible_adlog_prefix(const std::string& root) { + static std::vector empty; + auto it = possible_adlog_prefix_.find(root); + if (it == possible_adlog_prefix_.end()) { + return (empty); + } + + return (it->second); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/MiddleNodeInfo.h b/convert/info/MiddleNodeInfo.h new file mode 100644 index 0000000..b4ad6bd --- /dev/null +++ b/convert/info/MiddleNodeInfo.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 中间节点,如 PhotoInfo、LiveInfo 等。 +/// 包含前缀等信息。 +class MiddleNodeInfo { + public: + MiddleNodeInfo() = default; + explicit MiddleNodeInfo(const std::string& name): name_(name) {} + + const std::string& name() const { return name_; } + + std::string get_bs_exists_field_def(Env* env_ptr, + const std::string& leaf, + const std::string& field) const; + std::string get_root_bs_exists_field_def(Env* env_ptr) const; + + std::string get_bs_scalar_field_def(Env* env_ptr, + const std::string& leaf, + const std::string& field, + clang::QualType qual_type) const; + + std::string get_bs_list_field_def(Env *env_ptr, + const std::string &leaf, + const std::string &field, + const std::string& inner_type) const; + + std::string get_list_loop_bs_wrapped_text(Env* env_ptr, + const std::string& body, + const std::string& bs_enum_str) const; + + std::string get_bs_list_def(Env* env_ptr, + const std::string& bs_enum_str, + const std::string& middle_node_leaf, + const std::string& type_str) const; + + std::string get_bs_str_scalar_def(Env* env_ptr, + const std::string& bs_enum_str, + const std::string& middle_node_leaf) const; + + static bool is_user_middle_node(const std::string& name); + static const std::vector& get_possible_adlog_prefix(const std::string& root); + + protected: + std::string name_; + static std::unordered_map> possible_adlog_prefix_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/NewActionParam.cpp b/convert/info/NewActionParam.cpp new file mode 100644 index 0000000..0dbdfd4 --- /dev/null +++ b/convert/info/NewActionParam.cpp @@ -0,0 +1,90 @@ +#include +#include +#include + +#include + +#include "../Env.h" +#include "../Tool.h" +#include "NewActionParam.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +// action detail 肯定是 user 特征 +const std::string NewActionFieldParam::const_ref_str() const { + std::ostringstream oss; + oss << "const BSRepeatedField<" << inner_type_str_; + if (is_combine_feature_) { + oss << ", true"; + } + oss << ">& " << name_; + + return oss.str(); +} + +std::string NewActionFieldParam::get_bs_enum_str(const std::string& prefix) const { + return prefix + "_" + field_; +} + +std::string NewActionFieldParam::get_new_def(const std::string& prefix, Env* env_ptr) const { + std::ostringstream oss; + + bool is_user = !tool::is_item_feature(prefix); + std::string bs_enum_str = get_bs_enum_str(prefix); + std::string var_name = env_ptr->find_valid_new_name(bs_enum_str); + + oss << "auto enum_" << var_name << " = BSFieldEnum::" << bs_enum_str << ";\n "; + if (field_ == "size") { + oss << inner_type_str_ << " " << var_name << " = BSFieldHelper::GetSingular<" << inner_type_str_; + if (is_combine_feature_ && is_user) { + oss << ", true"; + } + oss << ">" << "(*bs, enum_" << var_name << ", pos);"; + } else { + oss << "BSRepeatedField<" << inner_type_str_; + if (is_combine_feature_ && is_user) { + oss << ", true"; + } + oss << "> " << var_name << "(*bs, enum_" << var_name << ", pos);"; + } + + return oss.str(); +} + +void NewActionParam::add_new_param(const NewActionFieldParam& new_param) { + new_params_.push_back(new_param); +} + +const NewActionFieldParam& NewActionParam::get_new_param(size_t index) const { + if (index >= new_params_.size()) { + static NewActionFieldParam empty; + return (empty); + } + + return (new_params_[index]); +} + +bool NewActionParam::has_new_param_name(const std::string& name) const { + for (size_t i = 0; i < new_params_.size(); i++) { + if (new_params_[i].name() == name) { + return true; + } + } + + return false; +} + +std::string NewActionParam::get_bs_field_param_str() const { + std::vector arr; + for (size_t i = 0; i < new_params_.size(); i++) { + arr.push_back(new_params_[i].name()); + } + + return absl::StrJoin(arr, ","); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/NewActionParam.h b/convert/info/NewActionParam.h new file mode 100644 index 0000000..c7327e3 --- /dev/null +++ b/convert/info/NewActionParam.h @@ -0,0 +1,66 @@ +#pragma once + +#include + +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class NewActionFieldParam { + public: + NewActionFieldParam() = default; + explicit NewActionFieldParam(const std::string& name, + const std::string& field, + const std::string& inner_type_str, + bool is_combine_feature): + name_(name), + field_(field), + inner_type_str_(inner_type_str), + is_combine_feature_(is_combine_feature) {} + + const std::string& name() const { return name_; } + const std::string& field() const { return field_; } + const std::string& inner_type_str() const { return inner_type_str_; } + bool is_combine_feature() const { return is_combine_feature_; } + + const std::string const_ref_str() const; + + std::string get_bs_enum_str(const std::string& prefix) const; + std::string get_new_def(const std::string& prefix, Env* env_ptr) const; + + private: + std::string name_; + std::string field_; + std::string inner_type_str_; + bool is_combine_feature_ = false; +}; + +class NewActionParam { + public: + NewActionParam() = default; + explicit NewActionParam(const std::string& origin_name): origin_name_(origin_name) {} + + const std::string& origin_name() const { return origin_name_; } + void set_origin_name(const std::string& origin_name) { origin_name_ = origin_name; } + + void add_new_param(const NewActionFieldParam& new_param); + const std::vector& new_params() const { return (new_params_); } + const NewActionFieldParam& get_new_param(size_t index) const; + + bool has_new_param_name(const std::string& name) const; + std::string get_bs_field_param_str() const; + + private: + std::string origin_name_; + std::vector new_params_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/NewVarDef.cpp b/convert/info/NewVarDef.cpp new file mode 100644 index 0000000..d6851df --- /dev/null +++ b/convert/info/NewVarDef.cpp @@ -0,0 +1,49 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include "NewVarDef.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void NewVarDef::set_name(const std::string& name) { + if (name != name_) { + std::regex p_name(std::string(" ") + name_ + " "); + if (var_def_.size() > 0) { + var_def_ = std::regex_replace(var_def_, p_name, std::string( " ") + name + std::string(" ")); + } + + name_ = name; + } +} + +void NewVarDef::set_exists_var_def(const std::string& exists_name, const std::string& exists_var_def) { + exists_name_ = exists_name; + exists_var_def_ = exists_var_def; +} + +void NewVarDef::set_var_def(const std::string& var_def, NewVarType new_var_type) { + var_def_ = var_def; + new_var_type_ = new_var_type; +} + +bool NewVarDef::is_list() const { + if (new_var_type_) { + if (*new_var_type_ == NewVarType::LIST) { + return true; + } + } + + return false; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/NewVarDef.h b/convert/info/NewVarDef.h new file mode 100644 index 0000000..61e6b3c --- /dev/null +++ b/convert/info/NewVarDef.h @@ -0,0 +1,171 @@ +#pragma once + +#include + +#include +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +#include "../Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +enum class NewVarType { + SCALAR, + LIST, + MAP +}; + +/// 来自 adlog 的字段的类型 +enum class AdlogVarType { + NONE, + + /// 普通字段,如 adlog.item.type, adlog.user_info.id + NORMAL, + + /// 来自中间节点的叶子节点,如 photo_info->author_info()->id() + MIDDLE_NODE_ROOT, + + MIDDLE_NODE_LEAF, + + /// common info, 但是 enum_value 是通过模板参数或者变量指定的。 + COMMON_INFO_FIXED, + + /// 来自中间节点的 common info, 如 photo_info->common_info_attr() + COMMON_INFO_MIDDLE_NODE, + + COMMON_INFO_MULTI_INT_LIST, + + /// 来自 action_detail 的字段。 + ACTION_DETAIL_FIELD, + + /// 通过模板参数或者变量获取 action_detail + ACTION_DETAIL_FIXED, + + /// GetPhotoText + GET_PHOTO_TEXT +}; + +/// 新增加的变量定义,需要保存定义以及新的变量名, 新的变量名必须保证和已有的 var_decl 以及新增变量 +/// 不重复, 注意必须也要查看 parent_ 范围内的变量。采取从后面按 _ 取字段的方式逐个实验。 +/// 如 adlog_user_info_action_detail_follow_id, 从 follow_id 开始尝试,如果已被使用,则尝试 +/// detail_follow_id, 以此类推,知道找到合法的变量名。 +/// 中间节点以及 GetCommonInfo 类型的变量需要和普通字段进行区分,保存 root 信息以及 field 等信息。 +/// 中间节点需要保存 root 以及 field, field 格式形如 author_info.id, +/// GetCommonInfo 需要保存 name_value_alias 信息,以便于和模板参数关联起来。 +class NewVarDef { + public: + NewVarDef() = default; + explicit NewVarDef(const std::string& bs_enum_str, + const std::string& name, + AdlogVarType adlog_var_type = AdlogVarType::NONE): + bs_enum_str_(bs_enum_str), + name_(name), + adlog_var_type_(adlog_var_type) {} + + /// 类型确定, common info 使用的变量。 + explicit NewVarDef(const std::string& bs_enum_str, + const std::string& name, + const std::string& var_def, + NewVarType new_var_type, + AdlogVarType adlog_var_type = AdlogVarType::NONE): + bs_enum_str_(bs_enum_str), + name_(name), + var_def_(var_def), + new_var_type_(new_var_type), + adlog_var_type_(adlog_var_type) {} + + const std::string& name() const { return name_; } + void set_name(const std::string& name); + + AdlogVarType adlog_var_type() const { return adlog_var_type_; } + + const std::string& bs_enum_str() const { return (bs_enum_str_); } + void set_bs_enum_str(const std::string& bs_enum_str) { bs_enum_str_ = bs_enum_str; } + const std::string& var_def() const { return var_def_; } + + const absl::optional& new_var_type() const { return (new_var_type_); } + void set_new_var_type(NewVarType new_var_type) { new_var_type_.emplace(new_var_type); } + + const std::string& exists_name() const { return (exists_name_); } + void set_exists_name(const std::string& exists_name) { exists_name_ = exists_name; } + + const std::string& exists_var_def() const { return (exists_var_def_); } + + void set_exists_var_def(const std::string& exists_name, const std::string& exists_var_def); + void set_var_def(const std::string& var_def, NewVarType new_var_type); + + const absl::optional& expr_type() const { return expr_type_; } + void set_expr_type(ExprType expr_type) { expr_type_.emplace(expr_type); } + + bool is_list() const; + + const std::string& adlog_field() const { return (adlog_field_); } + void set_adlog_field(const std::string& adlog_field) { adlog_field_ = adlog_field; } + + const absl::optional& list_inner_type() const { return (list_inner_type_); } + absl::optional& mutable_list_inner_type() { return (list_inner_type_); } + void set_list_inner_type(const std::string& inner_type) { list_inner_type_.emplace(inner_type); } + + /// 中间节点需要设置 + void set_middle_node_root(const std::string& middle_node_root) { + middle_node_root_.emplace(middle_node_root); + } + + const absl::optional& middle_node_root() const { return (middle_node_root_); } + + void set_middle_node_field(const std::string& middle_node_field) { + middle_node_field_.emplace(middle_node_field); + } + const absl::optional& middle_node_field() const { return (middle_node_field_); } + + /// GetCommonInfoFixed 需要设置 + void set_common_info_prefix_adlog(const std::string& common_info_prefix_adlog) { + common_info_prefix_adlog_.emplace(common_info_prefix_adlog); + } + const absl::optional& common_info_prefix_adlog() const { + return (common_info_prefix_adlog_); + } + + void set_common_info_name_value_alias(const std::string& common_info_name_value_alias) { + common_info_name_value_alias_.emplace(common_info_name_value_alias); + } + const absl::optional& common_info_name_value_alias() const { + return (common_info_name_value_alias_); + } + + void set_action_var_name(const std::string& action_var_name) {action_var_name_.emplace(action_var_name);} + const absl::optional& action_var_name() const { return (action_var_name_); } + + private: + std::string bs_enum_str_; + std::string name_; + std::string var_def_; + absl::optional new_var_type_; + std::string exists_name_; + std::string exists_var_def_; + absl::optional expr_type_; + std::string adlog_field_; + + /// list inner_type + absl::optional list_inner_type_; + + AdlogVarType adlog_var_type_; + absl::optional middle_node_root_; + absl::optional middle_node_field_; + absl::optional common_info_prefix_adlog_; + absl::optional common_info_name_value_alias_; + absl::optional action_var_name_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/PrefixPair.cpp b/convert/info/PrefixPair.cpp new file mode 100644 index 0000000..9a5283f --- /dev/null +++ b/convert/info/PrefixPair.cpp @@ -0,0 +1,17 @@ +#include "../Tool.h" +#include "PrefixPair.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +PrefixPair::PrefixPair(const std::string& prefix_adlog) { + prefix_adlog_ = prefix_adlog; + prefix_ = tool::adlog_to_bs_enum_str(prefix_adlog); + LOG(INFO) << "prefix_: " << prefix_ + << ", prefix_adlog_: " << prefix_adlog; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/PrefixPair.h b/convert/info/PrefixPair.h new file mode 100644 index 0000000..5609f1a --- /dev/null +++ b/convert/info/PrefixPair.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +#include + +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class PrefixPair { + public: + PrefixPair() = default; + explicit PrefixPair(const std::string& prefix_adlog); + + const std::string& prefix() const { return prefix_; } + const std::string& prefix_adlog() const { return prefix_adlog_; } + + protected: + std::string prefix_; + std::string prefix_adlog_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/ProtoListInfo.h b/convert/info/ProtoListInfo.h new file mode 100644 index 0000000..2c6b247 --- /dev/null +++ b/convert/info/ProtoListInfo.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" +#include "NewVarDef.h" +#include "PrefixPair.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class ProtoListInfo : public PrefixPair { + public: + explicit ProtoListInfo(const std::string& prefix_adlog): PrefixPair(prefix_adlog) {} + + const std::vector& fields() const { return (fields_); } + void add_field(const std::string& field) { fields_.push_back(field); } + + private: + std::vector fields_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/SeqListInfo.cpp b/convert/info/SeqListInfo.cpp new file mode 100644 index 0000000..e61b0f8 --- /dev/null +++ b/convert/info/SeqListInfo.cpp @@ -0,0 +1,37 @@ +#include +#include + +#include "../Tool.h" +#include "NewVarDef.h" +#include "SeqListInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void SeqListInfo::update(const std::string &var_name, + const std::string &caller_name, + const std::string &type_str) { + var_name_ = var_name; + caller_name_ = caller_name; + type_str_ = type_str; +} + +std::string SeqListInfo::get_def() const { + std::ostringstream oss; + oss << "const auto& " << var_name_ << " = " << root_name_; + return oss.str(); +} + +NewVarType SeqListInfo::get_var_type() const { + // ::google::protobuf::RepeatedField<::google::protobuf::int64>* + if (type_str_.find("Repeated") != std::string::npos) { + return NewVarType::LIST; + } else { + return NewVarType::SCALAR; + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/SeqListInfo.h b/convert/info/SeqListInfo.h new file mode 100644 index 0000000..d2b46c2 --- /dev/null +++ b/convert/info/SeqListInfo.h @@ -0,0 +1,47 @@ +#pragma once + +#include + +#include +#include +#include +#include "clang/AST/Expr.h" +#include "../Type.h" +#include "NewVarDef.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class SeqListInfo { + public: + explicit SeqListInfo(const std::string& root_name): root_name_(root_name) {} + explicit SeqListInfo(const std::string& var_name, + const std::string& caller_name, + const std::string& type_str): + var_name_(var_name), + caller_name_(caller_name), + type_str_(type_str) {} + + const std::string& root_name() const { return (root_name_); } + const std::string& var_name() const { return (var_name_); } + const std::string& caller_name() const { return (caller_name_); } + const std::string& type_str() const { return (type_str_); } + + void update(const std::string &var_name, + const std::string &caller_name, + const std::string &type_str); + + std::string get_def() const; + NewVarType get_var_type() const; + + private: + std::string root_name_; + std::string var_name_; + std::string caller_name_; + std::string type_str_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/SwitchCaseInfo.h b/convert/info/SwitchCaseInfo.h new file mode 100644 index 0000000..2d05907 --- /dev/null +++ b/convert/info/SwitchCaseInfo.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +#include + +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/AST/AST.h" +#include "clang/AST/StmtCXX.h" +#include "clang/AST/ASTConsumer.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +enum class SwitchCaseStage { + COND, + BODY +}; + +/// 保存 if 相关的信息 +class SwitchCaseInfo { + public: + SwitchCaseInfo() = default; + + explicit SwitchCaseInfo(clang::CaseStmt* case_stmt): + case_stmt_(case_stmt), + switch_case_stage_(SwitchCaseStage::COND) {} + + clang::CaseStmt* case_stmt() const { return case_stmt_; } + + void set_switch_case_stage(SwitchCaseStage switch_case_stage) { switch_case_stage_ = switch_case_stage; } + SwitchCaseStage switch_case_stage() const { return switch_case_stage_; } + + void set_common_info_index(size_t index) { common_info_index_.emplace(index); } + const absl::optional& common_info_index() const { return (common_info_index_); } + + void set_common_info_value(int value) { common_info_value_.emplace(value); } + const absl::optional& common_info_value() const { return (common_info_value_); } + + private: + clang::CaseStmt* case_stmt_ = nullptr; + SwitchCaseStage switch_case_stage_; + absl::optional common_info_index_; + absl::optional common_info_value_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/TemplateParamInfo.h b/convert/info/TemplateParamInfo.h new file mode 100644 index 0000000..0a295c2 --- /dev/null +++ b/convert/info/TemplateParamInfo.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include "clang/AST/Type.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class TemplateParamInfo { + public: + TemplateParamInfo() = default; + + const std::string& name() const { return (name_); } + void set_name(const std::string& name) { name_ = name; } + + const std::string& value_str() const { return (value_str_); } + + const absl::optional& enum_value() const { return (enum_value_); } + void set_enum_value(int enum_value) { enum_value_.emplace(enum_value); } + + void set_value_str(const std::string& value_str) { + value_str_ = value_str; + } + void set_value(const std::string& value_str, int enum_value) { + value_str_ = value_str; + enum_value_.emplace(enum_value); + } + + const absl::optional& qual_type() const { return (qual_type_); } + void set_qual_type(clang::QualType qual_type) { qual_type_.emplace(qual_type); } + + private: + std::string name_; + std::string value_str_; + absl::optional enum_value_; + absl::optional qual_type_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/VarDeclInfo.cpp b/convert/info/VarDeclInfo.cpp new file mode 100644 index 0000000..eff66f2 --- /dev/null +++ b/convert/info/VarDeclInfo.cpp @@ -0,0 +1,32 @@ +#include "VarDeclInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void VarDeclInfo::add(const std::string& name, clang::Expr* init_expr) { + var_decls_.emplace(name, DeclInfo(name, init_expr)); +} + +void VarDeclInfo::add(const std::string& name, clang::Expr* init_expr, clang::DeclStmt* decl_stmt) { + var_decls_.emplace(name, DeclInfo(name, init_expr, decl_stmt)); +} + +clang::Expr* VarDeclInfo::find(const std::string& name) { + auto it = var_decls_.find(name); + if (it == var_decls_.end()) { + return nullptr; + } + + return it->second.init_expr(); +} + +void VarDeclInfo::update(const std::map& decls) { + for (auto it = decls.begin(); it != decls.end(); it++) { + var_decls_.emplace(it->first, DeclInfo(it->first, it->second)); + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/info/VarDeclInfo.h b/convert/info/VarDeclInfo.h new file mode 100644 index 0000000..1f35596 --- /dev/null +++ b/convert/info/VarDeclInfo.h @@ -0,0 +1,37 @@ +#pragma once + +#include + +#include +#include +#include + +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" + +#include "../Type.h" +#include "DeclInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class VarDeclInfo { + public: + VarDeclInfo() = default; + + void add(const std::string& name, clang::Expr* init_expr); + void add(const std::string& name, clang::Expr* init_expr, clang::DeclStmt* decl_stmt); + clang::Expr* find(const std::string& name); + + const std::unordered_map& var_decls() const { return var_decls_; } + + void update(const std::map& decls); + + private: + std::unordered_map var_decls_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/BSFeatureDeclCallback.cpp b/convert/matcher_callback/BSFeatureDeclCallback.cpp new file mode 100644 index 0000000..aaa07de --- /dev/null +++ b/convert/matcher_callback/BSFeatureDeclCallback.cpp @@ -0,0 +1,109 @@ +#include +#include +#include +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" + +#include "../Tool.h" +#include "../Config.h" +#include "../info/FeatureInfo.h" +#include "../visitor/BSCtorVisitor.h" +#include "../visitor/BSExtractMethodVisitor.h" +#include "./BSFeatureDeclCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void BSFeatureDeclCallback::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { + auto config = GlobalConfig::Instance(); + + if (const clang::CXXRecordDecl* cxx_record_decl = Result.Nodes.getNodeAs("BSFeatureDecl")) { + std::string feature_name = cxx_record_decl->getNameAsString(); + if (tool::is_skip(feature_name)) { + LOG(INFO) << "feature_name: " << feature_name << ", skip"; + return; + } + + if (config->dump_ast) { + cxx_record_decl->dump(); + } + + FeatureInfo* feature_info_ptr = GlobalConfig::Instance()->feature_info_ptr(feature_name); + if (feature_info_ptr == nullptr) { + LOG(INFO) << "feature_info_ptr is nullptr! feature_name: " << feature_name; + return; + } + + std::string origin_file = Result.SourceManager->getFilename(cxx_record_decl->getBeginLoc()).str(); + + feature_info_ptr->set_feature_name(feature_name); + feature_info_ptr->set_origin_file(origin_file); + feature_info_ptr->set_is_template(cxx_record_decl->isTemplated()); + feature_info_ptr->set_file_id(Result.SourceManager->getFileID(cxx_record_decl->getBeginLoc())); + feature_info_ptr->set_has_cc_file(tool::has_cc_file(origin_file)); + + LOG(INFO) << "find class, start process, feature_name: " << feature_name + << ", is_template: " << cxx_record_decl->isTemplated() + << ", template_common_info_int_values: " + << absl::StrJoin(feature_info_ptr->template_common_info_values(), ",") + << ", origin_file: " << origin_file; + + if (feature_info_ptr->has_cc_file()) { + std::string cc_filename = std::regex_replace(origin_file, std::regex("\\.h"), ".cc"); + feature_info_ptr->set_cc_filename(cc_filename); + } + + for (auto it_field = cxx_record_decl->field_begin(); it_field != cxx_record_decl->field_end(); it_field++) { + process_field(*it_field, feature_info_ptr); + } + + for (auto it_ctor = cxx_record_decl->ctor_begin(); it_ctor != cxx_record_decl->ctor_end(); it_ctor++) { + process_ctor(*it_ctor, feature_info_ptr); + } + + process_all_methods(cxx_record_decl, feature_info_ptr); + } +} + +void BSFeatureDeclCallback::process_ctor(clang::CXXConstructorDecl* cxx_constructor_decl, + FeatureInfo* feature_info_ptr) { + BSCtorVisitor bs_ctor_visitor(rewriter_); + bs_ctor_visitor.visit(cxx_constructor_decl, feature_info_ptr); +} + +void BSFeatureDeclCallback::process_field(clang::FieldDecl* field_decl, + FeatureInfo* feature_info_ptr) { +} + +void BSFeatureDeclCallback::process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr) { + for (auto it_method = cxx_record_decl->method_begin(); + it_method != cxx_record_decl->method_end(); it_method++) { + if ((*it_method)->getNameAsString() != EXTRACT) { + process_method(*it_method, feature_info_ptr); + } + } + + for (auto it_method = cxx_record_decl->method_begin(); it_method != cxx_record_decl->method_end(); it_method++) { + if ((*it_method)->getNameAsString() == EXTRACT) { + process_method(*it_method, feature_info_ptr); + } + } +} + +void BSFeatureDeclCallback::process_method(clang::CXXMethodDecl* cxx_method_decl, + FeatureInfo* feature_info_ptr) { + if (cxx_method_decl == nullptr || feature_info_ptr == nullptr) { + LOG(INFO) << "cxx_method_decl or feature_info_ptr is nullptr!"; + return; + } + + BSExtractMethodVisitor bs_extract_method_visitor(rewriter_); + auto res = bs_extract_method_visitor.visit(cxx_method_decl, feature_info_ptr); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/BSFeatureDeclCallback.h b/convert/matcher_callback/BSFeatureDeclCallback.h new file mode 100644 index 0000000..484816f --- /dev/null +++ b/convert/matcher_callback/BSFeatureDeclCallback.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +// Declares llvm::cl::extrahelp. +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class FeatureInfo; + +/// 解析 BS 类的逻辑。 +class BSFeatureDeclCallback : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + BSFeatureDeclCallback() = default; + explicit BSFeatureDeclCallback(clang::Rewriter &rewriter): rewriter_(rewriter) {} // NOLINT + + void run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + void process_ctor(clang::CXXConstructorDecl* cxx_constructor_decl, FeatureInfo* feature_info_ptr); + void process_field(clang::FieldDecl* field_decl, FeatureInfo* feature_info_ptr); + void process_method(clang::CXXMethodDecl* cxx_method_decl, FeatureInfo* feature_info_ptr); + void process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, FeatureInfo* feature_info_ptr); + + private: + clang::Rewriter& rewriter_; + const std::string EXTRACT = "Extract"; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/BSTypeAliasCallback.cpp b/convert/matcher_callback/BSTypeAliasCallback.cpp new file mode 100644 index 0000000..bda1a63 --- /dev/null +++ b/convert/matcher_callback/BSTypeAliasCallback.cpp @@ -0,0 +1,82 @@ +#include + +#include + +#include "clang/AST/Decl.h" + +#include "../Env.h" +#include "../Tool.h" +#include "../Config.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/FeatureInfo.h" +#include "BSTypeAliasCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void BSTypeAliasCallback::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { + auto config = GlobalConfig::Instance(); + + if (const clang::Decl* decl = Result.Nodes.getNodeAs("BSTypeAlias")) { + if (const clang::TypeAliasDecl* type_alias_decl = dyn_cast(decl)) { + if (const clang::TypedefNameDecl* typedef_name_decl = type_alias_decl->getMostRecentDecl()) { + clang::QualType qual_type = typedef_name_decl->getUnderlyingType(); + if (starts_with(qual_type.getAsString(), "BSExtract")) { + if (config->dump_ast) { + decl->dump(); + } + + if (const clang::TemplateSpecializationType* tmpl_type = + dyn_cast(qual_type.getTypePtr())) { + clang::TemplateName template_name = tmpl_type->getTemplateName(); + + if (template_name.getKind() == clang::TemplateName::NameKind::Template) { + if (clang::TemplateDecl* template_decl = template_name.getAsTemplateDecl()) { + std::string feature_name = template_decl->getNameAsString(); + FeatureInfo* feature_info_ptr = GlobalConfig::Instance()->feature_info_ptr(feature_name); + if (feature_info_ptr == nullptr) { + LOG(INFO) << "feature_info_ptr is nullptr!"; + return; + } + + auto template_arguments = tmpl_type->template_arguments(); + + for (size_t i = 0; i < template_arguments.size(); i++) { + const clang::TemplateArgument& arg = template_arguments[i]; + + process_template_param(arg); + + if (arg.getKind() == clang::TemplateArgument::ArgKind::Expression) { + clang::Expr* expr = arg.getAsExpr(); + if (expr != nullptr && tool::is_common_info_enum(expr->getType())) { + if (absl::optional int_value = find_common_attr_int_value(expr)) { + LOG(INFO) << "find common info enum in template param, class: " << feature_name + << ", expr: " << stmt_to_string(expr) + << ", int_value: " << *int_value; + feature_info_ptr->add_template_common_info_value(*int_value); + } + } + } else if (arg.getKind() == clang::TemplateArgument::ArgKind::Type) { + // 暂时忽略 + } + } + + LOG(INFO) << "find template_common_info_int_values: " + << absl::StrJoin(feature_info_ptr->template_common_info_values(), ","); + } + } + } + } + } + } + } +} + +void BSTypeAliasCallback::process_template_param(const clang::TemplateArgument& arg) { +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/BSTypeAliasCallback.h b/convert/matcher_callback/BSTypeAliasCallback.h new file mode 100644 index 0000000..9116fed --- /dev/null +++ b/convert/matcher_callback/BSTypeAliasCallback.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "../handler/StrictRewriter.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 通过 TypeAliasDecl 获取模板参数。 +/// 目前主要是获取 common info 模板参数, 保存在 FeatureInfo 中。这一步必须在解析 Extract 之前。 +class BSTypeAliasCallback : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + BSTypeAliasCallback() = default; + + void run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + void process_template_param(const clang::TemplateArgument& arg); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/FeatureDeclCallback.cpp b/convert/matcher_callback/FeatureDeclCallback.cpp new file mode 100644 index 0000000..a428c0d --- /dev/null +++ b/convert/matcher_callback/FeatureDeclCallback.cpp @@ -0,0 +1,136 @@ +#include + +#include +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" + +#include "../Tool.h" +#include "../Config.h" +#include "../info/FeatureInfo.h" +#include "../visitor/CtorVisitor.h" +#include "../visitor/FieldDeclVisitor.h" +#include "../visitor/ExtractMethodVisitor.h" +#include "./FeatureDeclCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void FeatureDeclCallback::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { + auto config = GlobalConfig::Instance(); + + if (const clang::CXXRecordDecl* cxx_record_decl = Result.Nodes.getNodeAs("FeatureDecl")) { + std::string feature_name = cxx_record_decl->getNameAsString(); + if (tool::is_skip(feature_name)) { + LOG(INFO) << "feature_name: " << feature_name << ", skip"; + return; + } + + if (config->dump_ast) { + cxx_record_decl->dump(); + } + + FeatureInfo* feature_info_ptr = GlobalConfig::Instance()->feature_info_ptr(feature_name); + if (feature_info_ptr == nullptr) { + LOG(INFO) << "feature_info_ptr is nullptr! feature_name: " << feature_name; + return; + } + + LOG(INFO) << "find class, start process, feature_name: " << feature_name + << ", is_template: " << cxx_record_decl->isTemplated() + << ", template_common_info_int_values: " + << absl::StrJoin(feature_info_ptr->template_common_info_values(), ","); + + std::string origin_file = Result.SourceManager->getFilename(cxx_record_decl->getBeginLoc()).str(); + + feature_info_ptr->set_feature_name(feature_name); + feature_info_ptr->set_origin_file(origin_file); + feature_info_ptr->set_is_template(cxx_record_decl->isTemplated()); + feature_info_ptr->set_file_id(Result.SourceManager->getFileID(cxx_record_decl->getBeginLoc())); + feature_info_ptr->set_has_cc_file(tool::has_cc_file(origin_file)); + + if (cxx_record_decl->isTemplated()) { + process_template_params(cxx_record_decl, feature_info_ptr); + } + + std::string origin_buffer;; + llvm::raw_string_ostream raw_string(origin_buffer); + rewriter_.getEditBuffer(feature_info_ptr->file_id()).write(raw_string); + feature_info_ptr->set_origin_buffer(origin_buffer); + + // 必须按照 field, ctor, method 的顺序 + for (auto it_field = cxx_record_decl->field_begin(); it_field != cxx_record_decl->field_end(); it_field++) { + process_field(*it_field, feature_info_ptr); + } + + for (auto it_ctor = cxx_record_decl->ctor_begin(); it_ctor != cxx_record_decl->ctor_end(); it_ctor++) { + process_ctor(*it_ctor, feature_info_ptr); + } + + process_all_methods(cxx_record_decl, feature_info_ptr); + } +} + +void FeatureDeclCallback::process_ctor(clang::CXXConstructorDecl* cxx_constructor_decl, + FeatureInfo* feature_info_ptr) { + CtorVisitor ctor_visitor(rewriter_); + ctor_visitor.visit(cxx_constructor_decl, feature_info_ptr); +} + +void FeatureDeclCallback::process_field(clang::FieldDecl* field_decl, + FeatureInfo* feature_info_ptr) { + FieldDeclVisitor field_decl_visitor(rewriter_); + field_decl_visitor.visit(field_decl, feature_info_ptr); +} + +// 先访问其他方法,再访问 `Extract` +void FeatureDeclCallback::process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr) { + for (auto it_method = cxx_record_decl->method_begin(); it_method != cxx_record_decl->method_end(); it_method++) { + if ((*it_method)->getNameAsString() != EXTRACT) { + process_method(*it_method, feature_info_ptr); + } + } + + for (auto it_method = cxx_record_decl->method_begin(); it_method != cxx_record_decl->method_end(); it_method++) { + if ((*it_method)->getNameAsString() == EXTRACT) { + process_method(*it_method, feature_info_ptr); + } + } +} + +void FeatureDeclCallback::process_method(clang::CXXMethodDecl* cxx_method_decl, + FeatureInfo* feature_info_ptr) { + if (cxx_method_decl == nullptr || feature_info_ptr == nullptr) { + LOG(INFO) << "cxx_method_decl or feature_info_ptr is nullptr!"; + return; + } + + ExtractMethodVisitor extract_method_visitor(rewriter_); + extract_method_visitor.visit(cxx_method_decl, feature_info_ptr); +} + +void FeatureDeclCallback::process_template_params(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr) { + if (cxx_record_decl == nullptr || feature_info_ptr == nullptr) { + return; + } + + if (clang::ClassTemplateDecl* class_template_decl = cxx_record_decl->getDescribedClassTemplate()) { + if (clang::TemplateParameterList* template_parameter_list = class_template_decl->getTemplateParameters()) { + std::vector param_names(template_parameter_list->size()); + for (size_t i = 0; i < template_parameter_list->size(); i++) { + if (clang::NamedDecl* param = template_parameter_list->getParam(i)) { + param_names[i] = param->getNameAsString(); + } + } + + feature_info_ptr->set_template_param_names(param_names); + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/FeatureDeclCallback.h b/convert/matcher_callback/FeatureDeclCallback.h new file mode 100644 index 0000000..5677617 --- /dev/null +++ b/convert/matcher_callback/FeatureDeclCallback.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +// Declares llvm::cl::extrahelp. +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class FeatureInfo; + +/// 处理特征类的属性。 +class FeatureDeclCallback : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + FeatureDeclCallback() = default; + explicit FeatureDeclCallback(clang::Rewriter &rewriter): rewriter_(rewriter) {} // NOLINT + + void run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + void process_ctor(clang::CXXConstructorDecl* cxx_constructor_decl, + FeatureInfo* feature_info_ptr); + + void process_field(clang::FieldDecl* field_decl, FeatureInfo* feature_info_ptr); + + void process_method(clang::CXXMethodDecl* cxx_method_decl, FeatureInfo* feature_info_ptr); + + /// `Extract` 会调用其他方法,因此必须先访问其他方法,再访问 `Extract`。 + void process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr); + + void process_template_params(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr); + + private: + clang::Rewriter& rewriter_; + const std::string EXTRACT = "Extract"; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/InferFilterCallback.cpp b/convert/matcher_callback/InferFilterCallback.cpp new file mode 100644 index 0000000..c1fb56c --- /dev/null +++ b/convert/matcher_callback/InferFilterCallback.cpp @@ -0,0 +1,61 @@ +#include + +#include +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" + +#include "../Tool.h" +#include "../Config.h" +#include "../info/FeatureInfo.h" +#include "../visitor/ExtractMethodVisitor.h" +#include "InferFilterCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void InferFilterCallback::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { + auto config = GlobalConfig::Instance(); + + if (const clang::CXXRecordDecl* cxx_record_decl = + Result.Nodes.getNodeAs("InferFilter")) { + if (config->dump_ast) { + cxx_record_decl->dump(); + } + + std::string feature_name = "ItemFilter"; + FeatureInfo* feature_info_ptr = GlobalConfig::Instance()->feature_info_ptr(feature_name); + if (feature_info_ptr == nullptr) { + LOG(INFO) << "feature_info_ptr is nullptr! feature_name: " << feature_name; + return; + } + + LOG(INFO) << "find class, start process, feature_name: " << feature_name + << ", is_template: " << cxx_record_decl->isTemplated() + << ", template_common_info_int_values: " + << absl::StrJoin(feature_info_ptr->template_common_info_values(), ","); + + std::string origin_file = + Result.SourceManager->getFilename(cxx_record_decl->getBeginLoc()).str(); + + feature_info_ptr->set_feature_name(feature_name); + feature_info_ptr->set_origin_file(origin_file); + feature_info_ptr->set_file_id(Result.SourceManager->getFileID(cxx_record_decl->getBeginLoc())); + + process_all_methods(cxx_record_decl, feature_info_ptr); + } +} + +void InferFilterCallback::process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, + FeatureInfo* feature_info_ptr) { + for (auto it_method = cxx_record_decl->method_begin(); it_method != cxx_record_decl->method_end(); + it_method++) { + ExtractMethodVisitor extract_method_visitor(rewriter_); + extract_method_visitor.visit(*it_method, feature_info_ptr); + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/InferFilterCallback.h b/convert/matcher_callback/InferFilterCallback.h new file mode 100644 index 0000000..5301fbf --- /dev/null +++ b/convert/matcher_callback/InferFilterCallback.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +// Declares llvm::cl::extrahelp. +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/AST/Expr.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class FeatureInfo; + +/// 处理 ad_n/utils/item_filter.h 中 ItemFilter 的函数。 +/// 注意: 此处的 ItemFilter 不是训练用的 ItemFilter,训练用的 ItemFilter 是指 +/// ad_algorithm/log_preprocess/item_filter 目录下的。 +/// +/// 示例: +/// static inline bool OcpxActionTypeFilter(const ItemAdaptorBase& item, +/// const FilterCondition& filter_condition) { +/// int ocpc_action_type = item.ad_dsp_info().unit().base().ocpc_action_type(); +/// if (filter_condition.ocpx_action_type_set.count(ocpc_action_type) == 0) { +/// return true; +/// } +/// return false; +/// } +/// +/// 函数名不同,但是接口相同,参数都是 item 和 filter_condition。 +/// 需要将来自 item 的字段都认为是来自 adlog。 +class InferFilterCallback : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + InferFilterCallback() = default; + explicit InferFilterCallback(clang::Rewriter &rewriter): rewriter_(rewriter) {} // NOLINT + + void run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + void process_all_methods(const clang::CXXRecordDecl* cxx_record_decl, FeatureInfo* feature_info_ptr); + + private: + clang::Rewriter& rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/TypeAliasCallback.cpp b/convert/matcher_callback/TypeAliasCallback.cpp new file mode 100644 index 0000000..b2be254 --- /dev/null +++ b/convert/matcher_callback/TypeAliasCallback.cpp @@ -0,0 +1,118 @@ +#include + +#include + +#include "clang/AST/Decl.h" + +#include "../Env.h" +#include "../Tool.h" +#include "../Config.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/FeatureInfo.h" +#include "../info/TemplateParamInfo.h" +#include "TypeAliasCallback.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void TypeAliasCallback::run(const clang::ast_matchers::MatchFinder::MatchResult &Result) { + auto config = GlobalConfig::Instance(); + + if (const clang::Decl* decl = Result.Nodes.getNodeAs("TypeAlias")) { + if (const clang::TypeAliasDecl* type_alias_decl = dyn_cast(decl)) { + if (const clang::TypedefNameDecl* typedef_name_decl = type_alias_decl->getMostRecentDecl()) { + clang::QualType qual_type = typedef_name_decl->getUnderlyingType(); + if (starts_with(qual_type.getAsString(), "Extract")) { + if (config->dump_ast) { + decl->dump(); + } + + if (const clang::TemplateSpecializationType* tmpl_type = + dyn_cast(qual_type.getTypePtr())) { + clang::TemplateName template_name = tmpl_type->getTemplateName(); + + if (template_name.getKind() == clang::TemplateName::NameKind::Template) { + if (clang::TemplateDecl* template_decl = template_name.getAsTemplateDecl()) { + std::string feature_name = template_decl->getNameAsString(); + FeatureInfo* feature_info_ptr = GlobalConfig::Instance()->feature_info_ptr(feature_name); + if (feature_info_ptr == nullptr) { + LOG(INFO) << "feature_info_ptr is nullptr!"; + return; + } + + std::string special_name = typedef_name_decl->getNameAsString(); + feature_info_ptr->add_specialization_class(special_name); + + auto template_arguments = tmpl_type->template_arguments(); + + for (size_t i = 0; i < template_arguments.size(); i++) { + const clang::TemplateArgument& arg = template_arguments[i]; + + process_template_param(arg); + + if (arg.getKind() == clang::TemplateArgument::ArgKind::Expression) { + clang::Expr* expr = arg.getAsExpr(); + TemplateParamInfo* param_ptr = + feature_info_ptr->touch_template_param_ptr(special_name, i); + if (param_ptr == nullptr) { + LOG(INFO) << "param_ptr is nullptr! name: " << special_name << ", index: " << i; + continue; + } + + param_ptr->set_value_str(stmt_to_string(expr)); + if (expr != nullptr) { + param_ptr->set_qual_type(expr->getType()); + + if (tool::is_common_info_enum(expr->getType())) { + if (absl::optional int_value = find_common_attr_int_value(expr)) { + LOG(INFO) << "find common info enum in template param, class: " << feature_name + << ", expr: " << stmt_to_string(expr) << ", int_value: " << *int_value; + param_ptr->set_enum_value(*int_value); + } + } + } + } else if (arg.getKind() == clang::TemplateArgument::ArgKind::Type) { + // 暂时忽略 + } + } + + LOG(INFO) << "find template_common_info_int_values: " + << absl::StrJoin(feature_info_ptr->template_common_info_values(), ","); + } + } + } + } + } + } + } +} + +void TypeAliasCallback::process_template_param(const clang::TemplateArgument& arg) { + Env env; + + if (arg.getKind() == clang::TemplateArgument::ArgKind::Expression) { + clang::Expr *expr = arg.getAsExpr(); + if (expr == nullptr) { + return; + } + + auto expr_info_ptr = parse_expr(expr, &env); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->is_item_type_enum()) { + std::string new_text = std::string("bs::ItemType::") + expr_info_ptr->get_ad_enum_name(); + LOG(INFO) << "find template ad enum param: " << expr_info_ptr->origin_expr_str() + << ", type: " << expr->getType().getAsString() + << ", replace: " << new_text; + rewriter_.ReplaceText(expr, new_text); + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/matcher_callback/TypeAliasCallback.h b/convert/matcher_callback/TypeAliasCallback.h new file mode 100644 index 0000000..49cf684 --- /dev/null +++ b/convert/matcher_callback/TypeAliasCallback.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "../handler/StrictRewriter.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +/// 通过 TypeAliasDecl 获取模板参数。 +/// 目前主要是获取 common info 模板参数, 保存在 FeatureInfo 中。这一步必须在解析 Extract 之前。 +class TypeAliasCallback : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + TypeAliasCallback() = default; + explicit TypeAliasCallback(clang::Rewriter &rewriter): rewriter_(rewriter) {} // NOLINT + + void run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + void process_template_param(const clang::TemplateArgument& arg); + + private: + StrictRewriter rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/proto_parser/BUILD b/convert/proto_parser/BUILD new file mode 100644 index 0000000..60455d1 --- /dev/null +++ b/convert/proto_parser/BUILD @@ -0,0 +1,18 @@ +import os + +cc_library( +name = "proto_parser", +srcs = ["*.cc"], +deps = [ + "//third_party/nlohmann_json/BUILD:nlohmann_json", + "//third_party/glog/BUILD:glog", + "//third_party/abseil/BUILD:abseil", +], +cppflags = [ + "-Wno-unused-variable", + "-Wno-unused-parameter", + "-Wno-unknown-pragmas", + "-Wno-unused-local-typedefs", +], +link_all_symbols = True +) diff --git a/convert/proto_parser/proto_node.cc b/convert/proto_parser/proto_node.cc new file mode 100644 index 0000000..ea5437d --- /dev/null +++ b/convert/proto_parser/proto_node.cc @@ -0,0 +1,734 @@ +#include // NOLINT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./util.h" +#include "./proto_node.h" +#include "./proto_parser.h" + +namespace ks { +namespace ad_algorithm { +namespace proto_parser { + +void AdlogPathDetail::add_common_info_node(const AdlogNode* parent, int64_t int_value) { + enum_node = std::make_shared(std::to_string(int_value), "int64", int_value); + + enum_node->set_parent(parent); + enum_node->set_is_enum(true); + + adlog_path_type = AdlogPathType::COMMON_INFO; + type_str = "int64"; + + adlog_node = enum_node.get(); +} + +AdlogNode::AdlogNode(const std::string& name, const std::string& type_str, int index): + name_(name), type_str_(type_str), index_(index) {} + +const AdlogNode* AdlogNode::parent() const { + return parent_; +} + +void AdlogNode::set_parent(AdlogNode* parent) { + parent_ = parent; +} + +void AdlogNode::set_parent(const AdlogNode* parent) { + parent_ = parent; +} + +bool AdlogNode::has_child(const std::string& child_name) const { + return children_.find(child_name) != children_.end(); +} + +std::string AdlogNode::get_adlog_path() const { + if (parent_ == nullptr) { + return name_; + } + + if (is_common_info_leaf()) { + return parent_->get_adlog_path() + std::string(".key:") + std::to_string(index_); + } else if (is_action_detail_map()) { + LOG(ERROR) << "cur node is action_detail_map, must call with action number: get_adlog_path(action)"; + return ""; + } else if (is_label_infos_leaf()) { + LOG(ERROR) << "cur node is LabelAttr, must call with int key and label_info_key: get_adlog_path(key)"; + return ""; + } else { + return parent_->get_adlog_path() + std::string(".") + name_; + } +} + +std::string AdlogNode::get_adlog_path(int key) const { + if (parent_ == nullptr) { + return name_; + } + + if (is_common_info_leaf()) { + return parent_->get_adlog_path(key) + std::string(".key:") + std::to_string(index_); + } else if (is_action_detail_map()) { + return parent_->get_adlog_path(key) + std::string(".") + + name_ + std::string(".key:") + std::to_string(key); + } else if (is_label_infos_leaf()) { + return parent_->get_adlog_path(key) + std::string(".key:") + std::to_string(key); + } else { + return parent_->get_adlog_path(key) + std::string(".") + name_; + } +} + +std::string AdlogNode::get_bslog_path() const { + if (parent_ == nullptr) { + return name_; + } + + if (is_tube_item_info()) { + return "item_tube_item_info"; + } else if (is_tube_user_info()) { + return "user_info_tube_user_info"; + } else if (is_common_info_leaf()) { + return parent_->get_bslog_path() + std::string("_key_") + std::to_string(index_); + } else if (is_action_detail_map()) { + LOG(ERROR) << "cur node is action_detail_map, must call with action number: get_bslog_path(action)"; + return ""; + } else if (is_label_infos_leaf()) { + LOG(ERROR) << "cur node is LabelAttr, must call with int key and label_info_key: get_bslog_path(key)"; + return ""; + } else { + return parent_->get_bslog_path() + std::string("_") + name_; + } +} + +std::string AdlogNode::get_bslog_path(int key) const { + if (parent_ == nullptr) { + return name_; + } + + if (is_tube_item_info()) { + return "item_tube_item_info"; + } else if (is_tube_user_info()) { + return "user_info_tube_user_info"; + } else if (is_common_info_leaf()) { + return parent_->get_bslog_path(key) + std::string("_key_") + std::to_string(index_); + } else if (is_action_detail_map()) { + return parent_->get_bslog_path(key) + std::string("_") + + name_ + std::string("_key_") + std::to_string(key); + } else if (is_label_infos_leaf()) { + return parent_->get_bslog_path(key) + std::string("_key_") + std::to_string(key); + } else { + return parent_->get_bslog_path(key) + std::string("_") + name_; + } +} + +absl::optional AdlogNode::get_int_value_from_path(const std::string& bs_enum_str) const { + // 注意: 只能包含一组 key:int, 如果有多个会有问题。 + // 目前 CommonInfo, ActionDetail, LabelAttr 都是一组 key:int。 + std::regex p("^key[_:](\\d+)([_\\.]key|[_\\.]value)?"); + std::smatch m; + if (std::regex_search(bs_enum_str, m, p)) { + if (m.size() > 1) { + if (is_str_integer(m[1])) { + return absl::make_optional(std::stoi(m[1])); + } else { + return absl::nullopt; + } + } + } + + return absl::nullopt; +} + +std::string AdlogNode::find_field_comment(const std::string& bs_enum_str) const { + const AdlogNode* field = find_proto_node(bs_enum_str); + if (field != nullptr) { + if (field->is_enum()) { + return field->name() + "," + field->comment(); + } else { + return field->comment(); + } + } + + return ""; +} + +std::string AdlogNode::find_field_type_str(const std::string& bs_enum_str) const { + const AdlogNode* field = find_proto_node(bs_enum_str); + if (field != nullptr) { + return field->type_str(); + } + + return ""; +} + +void AdlogNode::find_middle_node_root(const std::string& middle_node_root, + std::vector* root_arr) const { + if (root_arr == nullptr) { + return; + } + + for (auto it = children_.begin(); it != children_.end(); it++) { + if (it->second != nullptr) { + it->second->find_middle_node_root(middle_node_root, root_arr); + } + } +} + +bool AdlogNode::is_str_all_uppercase(const std::string& s) const { + if (s.size() == 0) { + return false; + } + + for (size_t i = 0; i < s.size(); i++) { + if (std::isupper(s[i]) || s[i] == '_' || std::isdigit(s[i])) { + continue; + } + + return false; + } + + return true; +} + +// 需要判断是否以类型结尾。 +// 包含 : 的只有两种情况,一种是 CommonInfo, 如 +// adlog.user_info.common_info_attr.APP_LIST:int64_list, adlog.user_info.common_info_attr.key:2:int64_list, +// 另一种是中间的 ActionDetail map, 如 adlog.user_info.ad_dsp_action_detail.key:3.list.photo_id。 +// +// 可以按如下逻辑进行处理: +// 1. 按冒号拆分 adlog_field_str。 +// 2. 判断拆分后的最后一个 str 是否是类型,所有的类型可以提前列举出来。 +// 3. 如果最后一个 str 是类型,则将之前的 str 作为 find_proto_node_helper 的参数,否则将 adlog_field_str +// 整体作为 find_proto_node_helper 的参数。 +const AdlogNode* AdlogNode::find_proto_node(const std::string& adlog_field_str) const { + std::vector arr = absl::StrSplit(adlog_field_str, ":"); + if (arr.size() == 0) { + return nullptr; + } + + if (is_unified_type_str(arr[arr.size() - 1])) { + return find_proto_node_helper(absl::StrJoin(arr.begin(), arr.end() - 1, ":")); + } else { + return find_proto_node_helper(adlog_field_str); + } +} + +const AdlogNode* AdlogNode::find_proto_node_helper(const std::string& adlog_field_str) const { + auto adlog_path_detail = find_proto_path_detail_helper(adlog_field_str); + if (adlog_path_detail) { + return adlog_path_detail->adlog_node; + } + return nullptr; +} + +absl::optional +AdlogNode::find_proto_path_detail(const std::string& adlog_field_str) const { + std::vector arr = absl::StrSplit(adlog_field_str, ":"); + if (arr.size() == 0) { + return absl::nullopt; + } + + if (is_unified_type_str(arr.back())) { + auto res = find_proto_path_detail_helper(absl::StrJoin(arr.begin(), arr.end() - 1, ":")); + if (res.has_value()) { + res->type_str = arr.back(); + } + return res; + } else { + return find_proto_path_detail_helper(adlog_field_str); + } +} + +absl::optional +AdlogNode::find_proto_path_detail_helper(const std::string& adlog_field_str) const { + if (adlog_field_str.size() == 0) { + return absl::nullopt; + } + + if (adlog_field_str == "adlog.is_train") { + return absl::nullopt; + } + + if (adlog_field_str == name_) { + return absl::make_optional(this); + } + + if (!is_str_starts_with(adlog_field_str, name_)) { + LOG(ERROR) << "adlog_field_str should starts with: " << name_ << ", but is : " << adlog_field_str; + return absl::nullopt; + } + + std::string field_name = ""; + std::string suffix = ""; + + if (name_.size() + 1 >= adlog_field_str.size()) { + return absl::nullopt; + } + + std::string child_field_str = adlog_field_str.substr(name_.size() + 1); + + size_t pos = child_field_str.find("."); + if (pos != std::string::npos) { + field_name = child_field_str.substr(0, pos); + if (pos + 1 < child_field_str.size()) { + suffix = child_field_str.substr(pos + 1); + } + } else { + field_name = child_field_str; + suffix = ""; + } + + auto it = children_.find(field_name); + if (it != children_.end() && it->second != nullptr) { + // 可能是 action_detail 或者 common_info + if (it->second->is_common_info_list()) { + if (is_str_starts_with(suffix, "key:")) { + if (absl::optional int_value = get_int_value_from_path(suffix)) { + if (const AdlogNode* node = it->second->find_common_info_leaf_by_enum_value(*int_value)) { + return absl::make_optional(node, AdlogPathType::COMMON_INFO); + } else { + // 如果 use_name_value = false, 则返回 absl::nullopt。 + // 否则新建一个包含 *int_value 的节点。 + bool use_name_value = ProtoParser::instance().use_name_value(); + + if (use_name_value) { + absl::optional res = absl::make_optional(); + res->add_common_info_node(it->second.get(), *int_value); + return res; + } else { + return absl::nullopt; + } + } + } + } else if (suffix.size() == 0) { + // CommonInfo 列表 + if (const AdlogNode* node = it->second.get()) { + return absl::make_optional(node, AdlogPathType::COMMON_INFO); + } else { + return absl::nullopt; + } + } else if (suffix.size() > 0 && is_str_all_uppercase(suffix)) { + // 枚举名 + if (const AdlogNode* node = it->second->find_common_info_leaf_by_enum_str(suffix)) { + return absl::make_optional(node, AdlogPathType::COMMON_INFO); + } else { + return absl::nullopt; + } + } else if (is_nonstd_common_info_enum(suffix)) { + // 少量枚举名不规范,包含小写字母,单独处理。 + if (const AdlogNode* node = it->second->find_common_info_leaf_by_enum_str(suffix)) { + return absl::make_optional(node, AdlogPathType::COMMON_INFO); + } else { + return absl::nullopt; + } + } else { + LOG(ERROR) << "wrong format, should be enum str or enum value, but is: " << suffix; + return absl::nullopt; + } + } else if (it->second->is_action_detail_map()) { + if (is_str_starts_with(suffix, "key:")) { + // action_detail + // 需要去掉 .key:xxx, ActionDetail node 不包含 key,只有 value + if (absl::optional int_value = get_int_value_from_path(suffix)) { + std::regex p_action("key:\\d+\\."); + child_field_str = std::regex_replace(child_field_str, p_action, ""); + if (auto res = it->second->find_proto_path_detail_helper(child_field_str)) { + res->action.emplace(int_value.value()); + res->adlog_path_type = AdlogPathType::ACTION_DETAIL_LEAF; + return res; + } else { + return absl::nullopt; + } + } else { + LOG(ERROR) << "cannot find action in child_field_str: " << child_field_str; + return absl::nullopt; + } + } else if (suffix.size() == 0) { + if (const AdlogNode* node = it->second.get()) { + return absl::make_optional(node, AdlogPathType::ACTION_DETAIL_LIST); + } else { + LOG(ERROR) << "cannot find adlog node: " << adlog_field_str; + return absl::nullopt; + } + } else { + LOG(ERROR) << "wrong format for action, should be .key:xxx, but is: " << suffix; + return absl::nullopt; + } + } else if (it->second->is_label_infos_map()) { + if (absl::StartsWith(suffix, "key:")) { + if (absl::optional int_value = get_int_value_from_path(suffix)) { + // 固定是 0: UNKNOW_NAME + if (const AdlogNode* node = it->second->find_common_info_leaf_by_enum_value(0)) { + auto res = absl::make_optional(node, AdlogPathType::LABEL_INFO_LEAF); + res->label_info_value.emplace(*int_value); + return res; + } else { + LOG(ERROR) << "cannot find labe_info, key: 0, adlog_field_str: " << adlog_field_str; + return absl::nullopt; + } + } else { + LOG(ERROR) << "cannot find int_value from suffix: " << suffix + << ", adlog_field_str: " << adlog_field_str; + return absl::nullopt; + } + } else if (suffix.size() == 0) { + if (const AdlogNode* node = it->second.get()) { + return absl::make_optional(node, AdlogPathType::LABEL_INFO_MAP); + } else { + LOG(ERROR) << "cannot find label info node, adlog_field_str: " << adlog_field_str; + return absl::nullopt; + } + } else { + LOG(ERROR) << "wrong format for label_infos, should be .key:xxx, but is: " << suffix; + return absl::nullopt; + } + } else if (it->second->is_enum()) { + return absl::make_optional(it->second.get(), AdlogPathType::ENUM); + } else { + return it->second->find_proto_path_detail_helper(child_field_str); + } + } + + if (!is_field_non_proto(adlog_field_str)) { + LOG(ERROR) << "cannot find proto path detail, wrong format of adlog_field_str: " << adlog_field_str + << ", field_name: " << field_name; + } + + return absl::nullopt; +} + +absl::optional AdlogNode::find_common_info_leaf_enum_name( + const std::string& adlog_field_str) const { + const AdlogNode* node = find_proto_node(adlog_field_str); + if (node != nullptr && node->parent() != nullptr) { + return absl::make_optional(node->parent()->type_str() + std::string("::") + node->name()); + } + + LOG(INFO) << "cannot find common info leaf enum name, adlog_field_str: " << adlog_field_str; + return absl::nullopt; +} + +bool AdlogNode::is_common_info_leaf() const { + if (is_enum_) { + if (parent_ != nullptr && parent_->is_common_info_list()) { + if (!is_label_infos_leaf() && !is_label_infos_map()) { + return true; + } + } + } + + return false; +} + +bool AdlogNode::is_tube_item_info() const { + if (name_ == "tube_item_info") { + return true; + } else { + return false; + } +} + +bool AdlogNode::is_tube_user_info() const { + if (name_ == "tube_user_info") { + return true; + } else { + return false; + } +} + +bool AdlogNode::is_repeated_crowd_tag() const { + if (absl::EndsWith(type_str_, "StrategyCrowdTag")) { + return true; + } + + return false; +} + +bool AdlogNode::is_parent_repeated_crowd_tag() const { + if (parent_ != nullptr && parent_->is_repeated_crowd_tag()) { + return true; + } + + return false; +} + +bool AdlogNode::is_device_info() const { + return type_str_.find("DeviceInfo") != std::string::npos; +} + +bool AdlogNode::is_device_info_leaf() const { + if (parent_ != nullptr) { + return parent_->is_device_info(); + } + + return false; +} + +bool AdlogNode::is_repeated_device_info() const { + return is_repeated() && is_device_info(); +} + +bool AdlogNode::is_repeated_device_info_leaf() const { + if (parent_ != nullptr) { + return parent_->is_repeated_device_info(); + } + + return false; +} + +bool AdlogNode::is_label_infos_map() const { + if (parent_ != nullptr) { + if (parent_->name() == "label_info") { + return name_ == "label_infos" || name_ == "global_gmv_label_infos"; + } + } + + return false; +} + +bool AdlogNode::is_label_infos_leaf() const { + if (parent_ != nullptr) { + if (parent_->is_label_infos_map()) { + return true; + } + } + + return false; +} + +bool AdlogNode::is_action_detail_map() const { + static std::unordered_set action_types = { + "SimpleAdDspInfos", "SimpleAdDspInfosV2", + "SimpleFansTopInfos", "SimpleLiveInfos", "AdActionInfoList"}; + return action_types.find(type_str_) != action_types.end(); +} + +bool AdlogNode::is_action_detail_leaf() const { + if (parent_ != nullptr && parent_->parent() != nullptr) { + if (parent_->parent()->is_action_detail_map()) { + return true; + } + + if (parent_->parent()->is_user_action_detail()) { + return true; + } + } + + return false; +} + +bool AdlogNode::is_user_action_detail() const { + return type_str_ == "UserActionDetail"; +} + +bool AdlogNode::is_action_detail_list() const { + if (parent_ != nullptr) { + if (parent_->is_action_detail_map()) { + return true; + } + + if (parent_->is_user_action_detail()) { + return true; + } + } + + return false; +} + +bool AdlogNode::is_from_user_action_detail_list() const { + if (is_user_action_detail()) { + return true; + } + + if (parent_ != nullptr) { + return parent_->is_from_user_action_detail_list(); + } + + return false; +} + +// 枚举字段不会有 chidlren_,只会有嵌套定义的枚举 +const AdlogNode* AdlogNode::find_common_info_leaf_by_enum_value(int enum_value) const { + auto it_child = children_.find(std::to_string(enum_value)); + if (it_child != children_.end()) { + return it_child->second.get(); + } + + return nullptr; +} + +const AdlogNode* AdlogNode::find_common_info_leaf_by_enum_str(const std::string& enum_str) const { + auto it_child = children_.find(enum_str); + if (it_child != children_.end()) { + return it_child->second.get(); + } + + return nullptr; +} + +void AdlogNode::add_all_action_detail_field_types() { + if (!is_action_detail_map()) { + return; + } + + auto it = children_.find("list"); + if (it == children_.end()) { + return; + } + + const auto& m = it->second->children(); + for (auto it_field = m.begin(); it_field != m.end(); it_field++) { + if (it_field->second->name().size() > 0) { + action_detail_field_types_[it_field->second->name()] = it_field->second->type_str(); + } else { + LOG(INFO) << "field name is empty! type_str: " << it_field->second->type_str(); + } + } +} + +void AdlogNode::insert_action_detail_field_type(const std::string& field_name, const std::string& type_str) { + action_detail_field_types_[field_name] = type_str; +} + +bool AdlogNode::is_from_list() const { + if (parent_ != nullptr) { + return parent_->is_from_list(); + } + + return type_str_.find("repeated") != std::string::npos; +} + +std::string AdlogNode::type_str() const { + if (absl::EndsWith(type_str_, "_t")) { + return type_str_.substr(0, type_str_.size() - 2); + } else { + return type_str_; + } +} + +json AdlogNode::to_json() const { + json res = json::object(); + + // 不用存枚举。 + if (is_enum_) { + return res; + } + + res["name"] = name_; + res["type_str"] = type_str_; + res["children"] = json::object(); + + for (auto it = children_.begin(); it != children_.end(); it++) { + res["children"][it->first] = it->second->to_json(); + } + + return res; +} + +bool AdlogNode::is_field_non_proto(const std::string& adlog_field) const { + // 暂时简单处理,更精确的处理是判断 starts_with + static std::unordered_set method_names = { + "reco_user_info", + "ad_user_history_photo_embedding", + "is_train", + "ad_user_ad_action_map", + "h_maplist_append_req", + "colossus_ad_live_item", + "colossus_reco_live_v1_item", + "colossus_reco_live_item", + "colossus_ad_goods_item", + "colossus_ad_goods_item_new", + "colossus_ad_goods_item_new_pdn_v0", + "colossus_reco_photo", + "colossus_reco_photo_v3", + "colossus_reco_photo_v3_pdn_v0", + "colossus_ad_live_spu", + "colossus_ad_live_cid3", + "author_cluster_map", + "live_cluster_map", + "ad_live_global_gsu_result", + "ad_live_ecomm_gsu_result", + "ad_live_author_gsu_result", + "ad_live_spu_gsu_result", + "ad_live_cid3_gsu_result", + "ad_live_author_cluster_gsu_result", + "ad_live_remote_cluster_gsu_result", + "reco_live_global_gsu_result", + "reco_live_author_gsu_result", + "reco_live_author_cluster_gsu_result", + "reco_live_remote_cluster_gsu_result", + "reco_live_v1_remote_cluster_gsu_result", + "ad_live_colossus_idx_filtered_by_playtime", + "ad_live_colossus_idx_filtered_by_label", + "reco_live_colossus_idx_filtered_by_playtime", + "ad_user_all_goods_action", + "ad_user_history_photo_embedding", + "ad_user_history_pdct_embedding", + "ad_user_histactpdct_pdct", + "ad_user_histactpdct_firstid", + "ad_user_histactpdct_secondid", + "ad_user_histactpdct_u2uemb", + "ad_user_histact_weight", + "ad_user_histact_type", + "picasso_ad_goods_item", + "ad_live_delivering_author", + "living_author_set", + "ad_live_offline_sample_set", + }; + + for (const auto& method_name : method_names) { + if (adlog_field.find(method_name) != std::string::npos) { + return true; + } + } + + return false; +} + + +bool AdlogFieldDetail::is_int64() const { + static std::unordered_set types = {"int32_t", "int", "int64", "uint64", "int64_t", "uint64_t"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_int64_list() const { + static std::unordered_set types = {"int_list", "int64_list"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_float() const { + static std::unordered_set types = {"float", "double"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_float_list() const { + static std::unordered_set types = {"float_list", "double_list"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_str() const { + static std::unordered_set types = {"str", "string", "std::string", "absl::string_view"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_str_list() const { + static std::unordered_set types = {"str_list", "string_list", "std::vector", + "std::vector"}; + return types.find(type_str_) != types.end(); +} + +bool AdlogFieldDetail::is_user_field() const { return is_str_starts_with(adlog_path_, "adlog.user"); } + +bool AdlogFieldDetail::is_context_field() const { return is_str_starts_with(adlog_path_, "adlog.context"); } + +bool AdlogFieldDetail::is_item_field() const { return is_str_starts_with(adlog_path_, "adlog.item"); } + +} // namespace proto_parser +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/proto_parser/proto_node.h b/convert/proto_parser/proto_node.h new file mode 100644 index 0000000..d458981 --- /dev/null +++ b/convert/proto_parser/proto_node.h @@ -0,0 +1,328 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace proto_parser { + +using nlohmann::json; + +enum class AdlogPathType { + NONE, + /// 普通字段,如 adlog.user_info.id + NORMAL, + + /// CommonInfo,如 adlog.user_info.common_info_attr.APP_LIST:int64_list + COMMON_INFO, + + /// ActionDetailMap, 如 adlog.user_info.ad_dsp_action_detail.key:1.list + /// + /// 或者 UserActionDetail, 如 adlog.user_info.action_detail.follow.id + /// + /// message UserActionDetail { + /// repeated SimpleUserInfo follow = 1; + /// repeated SimplePhotoInfo like = 2; + /// repeated SimplePhotoInfo forward = 3; + /// } + /// + /// message SimpleUserInfo { + /// uint64 id = 1; + /// uint64 action_time = 2; + /// uint64 photo_id = 3; + /// }; + ACTION_DETAIL_LIST, + + /// ActionDetail, 如 adlog.user_info.ad_dsp_action_detail.key:1.list.photo_id + ACTION_DETAIL_LEAF, + + /// LabelInfos map, 如 adlog.item.label_info.label_infos + LABEL_INFO_MAP, + + /// LabelInfos 叶子节点, 如 adlog.item.label_info.label_infos.key:189 + LABEL_INFO_LEAF, + + /// repeated DeviceInfo leaf, 如 adlog.user_info.ad_user_info.device_info.os_type + REPEATED_DEVICE_INFO_LEAF, + + /// crowd tag leaf + CROWD_TAG_LEAF, + + ENUM +}; + +class AdlogNode; + +struct AdlogPathDetail { + AdlogPathDetail() = default; + + explicit AdlogPathDetail(const AdlogNode* adlog_node_ptr) : + adlog_node(adlog_node_ptr), adlog_path_type(AdlogPathType::NORMAL) {} + explicit AdlogPathDetail(const AdlogNode* adlog_node_ptr, AdlogPathType adlog_path_type): + adlog_node(adlog_node_ptr), adlog_path_type(adlog_path_type) {} + + const AdlogNode* adlog_node = nullptr; + + AdlogPathType adlog_path_type = AdlogPathType::NONE; + + std::string type_str; + + absl::optional action; + absl::optional label_info_value; + + std::shared_ptr enum_node = nullptr; + + void add_common_info_node(const AdlogNode* parent, int64_t int_value); +}; + +/// adlog 对应的 proto 节点,包含其中的字段信息。children_ 表示 message 中的各个字段。 +/// 每个节点都必须有名字和类型。如果是叶子节点, 则 children_ 为空。 +class AdlogNode { + std::string name_; + + std::string type_str_; + + int index_; + + bool is_enum_ = false; + + /// 比较特殊,单独标记。 + bool is_common_info_list_ = false; + bool is_common_info_map_ = false; + + std::string comment_; + + std::unordered_map> children_; + + const AdlogNode* parent_ = nullptr; + + std::unordered_map action_detail_field_types_; + + public: + AdlogNode() = default; + explicit AdlogNode(const std::string& name, const std::string& type_str, int index); + + const std::string& name() const { return name_; } + + std::string type_str() const; + + const std::string& comment() const { return comment_; } + + int index() const { return index_; } + + void set_type_str(const std::string& type_str) { type_str_ = type_str; } + + bool is_repeated() const { return type_str_.find("repeated") != std::string::npos; } + + const AdlogNode* parent() const; + void set_parent(AdlogNode* parent); + void set_parent(const AdlogNode* parent); + + void set_is_enum(bool is_enum) { is_enum_ = is_enum; } + bool is_enum() const { return is_enum_; } + + std::string get_adlog_path() const; + + /// 注意 1: ActionDetail 和 label_infos 需要 map 的 key,是其他地方的变量,因此调用放需要传入参数 key。 + /// 注意 2: 可能会有来自 label_infos 的 CommonInfo, 逻辑比较特殊,和普通的 CommonInfo 有区别。 + /// 如 adlog.item.label_info.label_infos.key:232, LabelAttr 中没有枚举,只有保存值的几个字段, + /// 因此解析 proto 时候并不能知道最终数据里的 key, 需要外面保存起来,在获取 adlog_path 时候 + /// 作为参数传进来。 + /// + /// map label_infos = 160; // 新 label 都放在这里,key 是 labelId + /// + /// message LabelAttr { + /// enum Name { UNKNOW_NAME = 0; }; + /// optional LabelCommonTypeEnum.AttrType type = 1; + /// optional int64 name_value = 2; + /// optional int64 int_value = 3; + /// optional float float_value = 4; + /// optional bool bool_value = 5; + /// optional string string_value = 6; + /// map map_int64_int64_value = 7; + /// map map_int64_string_value = 8; + /// optional uint64 first_occur_timestamp = 99; // 该 label 首次出现时间点 + /// } + std::string get_adlog_path(int key) const; + std::string get_bslog_path() const; + std::string get_bslog_path(int key) const; + + void set_is_common_info_list(bool is_common_info_list) { is_common_info_list_ = is_common_info_list; } + + /// label_infos map + void set_is_common_info_map(bool is_common_info_map) { is_common_info_map_ = is_common_info_map; } + + /// 是否是 CommonInfo list 节点,如 adlog.user_info.common_info_attr + bool is_common_info_list() const { return is_common_info_list_; } + + /// 是否是 CommonInfo map 节点,如 adlog.item.label_info.label_infos + bool is_common_info_map() const { return is_common_info_map_; } + + /// 是否是 CommoInfo 叶子节点,如 adlog.user_info.common_info_attr.APP_LIST:int64_list + bool is_common_info_leaf() const; + + /// 是否是 tube_item_info 字段,特殊处理,不以 adlog 开头。tube_item_info 只有 bs 数据,没有 adlog 数据。 + bool is_tube_item_info() const; + + /// 是否是 tube_user_info 字段,特殊处理,不以 adlog 开头。tube_user_info 只有 bs 数据,没有 adlog 数据。 + bool is_tube_user_info() const; + + /// 是否是 crowd_tag 字段,特殊处理,不以 adlog 开头。 + bool is_repeated_crowd_tag() const; + + /// 是否是 crowd_tag 字段,特殊处理,不以 adlog 开头。 + bool is_parent_repeated_crowd_tag() const; + + /// 示例: adlog.user_info.ad_user_info.device_info.app_package + /// + /// message DeviceInfo { + /// enum OsType { + /// UNKNOWN_OS_TYPE = 0; // Unknown + /// ANDROID = 1; // Android + /// IOS = 2; // iOS + /// }; + /// ... + /// }; + bool is_device_info() const; + bool is_repeated_device_info() const; + + bool is_device_info_leaf() const; + bool is_repeated_device_info_leaf() const; + + /// 是否是 label_infos map 节点,adlog.item.label_info.label_infos。 + bool is_label_infos_map() const; + + /// 是否是 label_infos 叶子节点,如 adlog.item.label_info.label_infos.key:572。 + bool is_label_infos_leaf() const; + + /// 当前类型是否是 LabelAttr + bool is_type_str_label_attr() const { return type_str_ == "LabelAttr"; } + + void add_child(const std::string& child_name, std::unique_ptr child) { + child->set_parent(this); + children_.insert({child_name, std::move(child)}); + } + + bool has_child(const std::string& child_name) const; + + /// 寻找 path 中的 key int, 如 key:2, key_2, key:2.key + absl::optional get_int_value_from_path(const std::string& bs_enum_str) const; + + /// bs_enum_str 是 adlog field 对应的 bs 枚举字符串 + std::string find_field_comment(const std::string& bs_enum_str) const; + std::string find_field_type_str(const std::string& bs_enum_str) const; + + /// 可能会有多个同名的 proto message + void find_middle_node_root(const std::string& middle_node_root, + std::vector* root_arr) const; + + bool is_str_all_uppercase(const std::string& s) const; + + /// 可能包含类型,需要去掉类型, 如 adlog.user_info.common_info_attr.APP_LIST:int64_list + const AdlogNode* find_proto_node(const std::string& adlog_field_str) const; + + /// 查找 node + const AdlogNode* find_proto_node_helper(const std::string& adlog_field_str) const; + + absl::optional find_proto_path_detail(const std::string& adlog_field_str) const; + absl::optional find_proto_path_detail_helper(const std::string& adlog_field_str) const; + + /// 返回叶子节点完整的 CommonInfo 枚举名, 如 CommonInfoAttr::APP_LIST。 + /// field_path 是 adlog 路径,如 adlog.user_info.common_info_attr.APP_LIST:int64_list + absl::optional find_common_info_leaf_enum_name(const std::string& adlog_field_str) const; + + /// 是否是 ActionDetail map 节点,如 adlog.user_info.ad_dsp_action_detail + bool is_action_detail_map() const; + + /// 是否是 ActionDetail 叶子节点,如 adlog.user_info.ad_dsp_action_detail.key:2.list.photo_id + bool is_action_detail_leaf() const; + + /// ActionDetail map 的 value 或者 UserActionDetail 中的 SimpleUserInfo + bool is_action_detail_list() const; + bool is_user_action_detail() const; + + /// adlog.user_info.action_detail.follow.id + bool is_from_user_action_detail_list() const; + bool is_from_list() const; + + /// 有列表类型需要处理 + std::string get_type_str() const; + + /// CommonInfo 节点 + const AdlogNode* find_common_info_leaf_by_enum_value(int enum_value) const; + const AdlogNode* find_common_info_leaf_by_enum_str(const std::string& enum_str) const; + + void add_all_action_detail_field_types(); + + void insert_action_detail_field_type(const std::string& field_name, + const std::string& type_str); + + const std::unordered_map& action_detail_field_types() const { + return action_detail_field_types_; + } + + const std::unordered_map>& children() const { + return children_; + } + + /// 主要用于中间节点。 + json to_json() const; + + bool is_field_non_proto(const std::string& adlog_field) const; +}; + +/// TODO(liuzhishan): 和其他地方逻辑有重复,之后单独拆一个 lib 公用。 +class AdlogFieldDetail { + public: + AdlogFieldDetail() = default; + + explicit AdlogFieldDetail(const std::string& adlog_path) : adlog_path_(adlog_path) {} + explicit AdlogFieldDetail(const std::string& adlog_path, int32_t attr_id, const std::string& type_str) + : adlog_path_(adlog_path), attr_id_(attr_id), type_str_(type_str) { + attr_id_str_ = std::to_string(attr_id_); + } + + const std::string& adlog_path() const { return adlog_path_; } + + int32_t attr_id() const { return attr_id_; } + + const std::string& attr_id_str() const { return attr_id_str_; } + + const std::string& type_str() const { return type_str_; } + + void set_attr_id(int32_t attr_id) { + attr_id_ = attr_id; + attr_id_str_ = std::to_string(attr_id_); + } + void set_type_str(const std::string& type_str) { type_str_ = type_str; } + + bool is_valid() const { return adlog_path_.size() > 0; } + + bool is_int64() const; + bool is_int64_list() const; + bool is_float() const; + bool is_float_list() const; + bool is_str() const; + bool is_str_list() const; + + bool is_user_field() const; + bool is_context_field() const; + bool is_item_field() const; + + private: + std::string adlog_path_; + int32_t attr_id_ = 0; + std::string attr_id_str_; + std::string type_str_; +}; + +} // namespace proto_parser +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/proto_parser/proto_parser.cc b/convert/proto_parser/proto_parser.cc new file mode 100644 index 0000000..45bf0ec --- /dev/null +++ b/convert/proto_parser/proto_parser.cc @@ -0,0 +1,250 @@ +#include // NOLINT +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./util.h" +#include "./proto_node.h" +#include "./proto_parser.h" +#include "proto/ad_joint_labeled_log.pb.h" + +namespace ks { +namespace ad_algorithm { +namespace proto_parser { + +using nlohmann::json; + +using auto_cpp_rewriter::AdActionType; +using auto_cpp_rewriter::AdActionInfoList; +using auto_cpp_rewriter::AdJointLabeledLog; +using auto_cpp_rewriter::SimpleAdDspInfos; +using auto_cpp_rewriter::SimpleLiveInfos; + +const AdlogNode* ProtoParser::adlog_root() const { + if (adlog_root_ == nullptr) { + return nullptr; + } + + return adlog_root_.get(); +} + +std::unique_ptr ProtoParser::build_adlog_tree_using_reflection() { + AdJointLabeledLog adlog; + return build_adlog_tree_from_descriptor(adlog.GetDescriptor(), "adlog", 0, 0, "adlog", false); +} + +void ProtoParser::add_single_enum(AdlogNode* root, + const std::string& name, + const EnumValueDescriptor* enum_value, + const std::string& type_str) { + if (root == nullptr || enum_value == nullptr) { + return; + } + + auto node = std::make_unique(enum_value->name(), type_str, enum_value->number()); + node->set_is_enum(true); + root->add_child(name, std::move(node)); +} + +void ProtoParser::add_enum(AdlogNode* root, + const EnumDescriptor* enum_type, + const std::string& type_str, + const std::string& field_name) { + if (root == nullptr || enum_type == nullptr) { + return; + } + + // 分别根据枚举名和 int 值建索引, + std::string name = enum_type->name(); + for (int j = 0; j < enum_type->value_count(); j++) { + const auto enum_value = enum_type->value(j); + add_single_enum(root, enum_value->name(), enum_value, type_str); + add_single_enum(root, std::to_string(enum_value->number()), enum_value, type_str); + } + + if (field_name.size() > 0) { + // 用于查找枚举字段 + auto node = std::make_unique(field_name, "int64", 0); + node->set_is_enum(true); + root->add_child(field_name, std::move(node)); + } +} + +std::unique_ptr ProtoParser::build_adlog_tree_from_descriptor(const Descriptor* descriptor, + const std::string& name, + int index, + int degree, + const std::string& prefix, + bool is_repeated) { + if (is_descriptor_common_info(descriptor)) { + return build_adlog_tree_common_info(descriptor, name, index, degree, prefix); + } + + std::string type_name = descriptor->name(); + if (is_repeated) { + type_name = std::string("repeated ") + type_name; + } + auto res = std::make_unique(name, type_name, index); + + for (int i = 0; i < descriptor->field_count(); i++) { + const auto field = descriptor->field(i); + + if (field->name() == "serialized_reco_user_info") { + // 不需要映射,直接获取原数据使用。 + auto node = std::make_unique(field->name(), field->type_name(), field->index()); + res->add_child(field->name(), std::move(node)); + } else if (const EnumDescriptor* enum_type = field->enum_type()) { + add_enum(res.get(), enum_type, field->type_name(), field->name()); + } else if (field->is_map()) { + // 必须在 message 判断之前, map 也是 message + const auto key_field = field->message_type()->FindFieldByName("key"); + const auto value_field = field->message_type()->FindFieldByName("value"); + + // value 是普通字段,直接当做叶子节点。 + if (is_basic_type(value_field->type())) { + std::ostringstream oss_type_name; + oss_type_name << "map<" << key_field->type_name() << ", " << value_field->type_name() << ">"; + auto node = std::make_unique(field->name(), oss_type_name.str(), field->index()); + + res->add_child(field->name(), std::move(node)); + } else { + // 中间的 map 按 key 的值展开,因此不需要关心 key,只需要按 value 继续展开。 + // 如 ActionDetail 类型是 map, field->name 为 key。 + // 但是在获取 adlog_path 时候必须传入 key 的值。 + auto node = std::move(build_adlog_tree_from_descriptor(value_field->message_type(), + field->name(), + field->index(), + degree + 1, + prefix + "." + field->name(), + is_repeated)); + res->add_child(field->name(), std::move(node)); + } + } else if (field->is_repeated()) { + if (is_basic_type(field->type())) { + auto node = std::make_unique(field->name(), + std::string("repeated ") + field->type_name(), + field->index()); + res->add_child(field->name(), std::move(node)); + } else { + auto node = std::move(build_adlog_tree_from_descriptor(field->message_type(), + field->name(), + field->index(), + degree + 1, + prefix + "." + field->name(), + true)); + res->add_child(field->name(), std::move(node)); + } + } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + auto node = std::move(build_adlog_tree_from_descriptor(field->message_type(), + field->name(), + i, + degree + 1, + prefix + "." + field->name(), + is_repeated)); + res->add_child(field->name(), std::move(node)); + } else if (is_basic_type(field->type())) { + auto node = std::make_unique(field->name(), field->type_name(), field->index()); + res->add_child(field->name(), std::move(node)); + } else { + LOG(INFO) << "ignore, field->type(): " << field->type_name() + << ", field_name: " << field->name(); + } + } + + if (res->is_action_detail_map()) { + res->add_all_action_detail_field_types(); + } + + return res; +} + +bool ProtoParser::is_descriptor_common_info(const Descriptor* descriptor) { + if (descriptor == nullptr) { + return false; + } + + bool has_name_value = false; + bool has_int_value = false; + + for (size_t i = 0; i < descriptor->field_count(); i++) { + const auto field = descriptor->field(i); + if (field->name() == "name_value") { + has_name_value = true; + } else if (field->name() == "int_value") { + has_int_value = true; + } + } + + return has_name_value && has_int_value; +} + +std::unique_ptr ProtoParser::build_adlog_tree_common_info( + const Descriptor* descriptor, + const std::string& name, + int index, + int degree, + const std::string& prefix) { + if (descriptor == nullptr) { + return nullptr; + } + + auto res = std::make_unique(name, descriptor->name(), index); + if (descriptor->name() == "LabelAttr") { + res->set_is_common_info_map(true); + } else { + res->set_is_common_info_list(true); + } + + for (size_t i = 0; i < descriptor->enum_type_count(); i++) { + const auto t = descriptor->enum_type(i); + if (is_str_starts_with(t->name(), "Name")) { + add_enum(res.get(), t, t->name(), name); + } + } + + return res; +} + +bool ProtoParser::is_basic_type(FieldDescriptor::Type type) { + if (type == FieldDescriptor::TYPE_MESSAGE || type == FieldDescriptor::TYPE_GROUP) { + return false; + } + + return true; +} + +bool ProtoParser::is_common_info(const Descriptor* descriptor) { + if (descriptor->FindEnumTypeByName("Name") != nullptr && + descriptor->FindFieldByName("name_value") != nullptr && + descriptor->FindFieldByName("int_list_value") != nullptr) { + return true; + } + + return false; +} + +ProtoParser::ProtoParser() { + LOG(INFO) << "start build adlog tree"; + adlog_root_ = std::move(build_adlog_tree_using_reflection()); + if (adlog_root_ == nullptr) { + LOG(ERROR) << "build adlog tree failed!"; + } + + LOG(INFO) << "build adlog tree success!"; +} + +} // namespace proto_parser +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/proto_parser/proto_parser.h b/convert/proto_parser/proto_parser.h new file mode 100644 index 0000000..9776733 --- /dev/null +++ b/convert/proto_parser/proto_parser.h @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "./proto_node.h" + +namespace ks { +namespace ad_algorithm { +namespace proto_parser { + +using ::google::protobuf::Descriptor; +using ::google::protobuf::EnumDescriptor; +using ::google::protobuf::FieldDescriptor; +using ::google::protobuf::EnumValueDescriptor; +using nlohmann::json; + +/// 根据 proto 反射建立树节点 +class ProtoParser { + ProtoParser(); + std::unique_ptr adlog_root_ = nullptr; + + /// 如果 use_name_value_ = true, 则当通过 common info int value 查找 AdlogNode 时,如果找不到, + /// 则新建一个 adlogNode,包含 int value 为值的枚举。 + bool use_name_value_ = false; + + public: + static ProtoParser& instance() { + static ProtoParser proto_parser; + return (proto_parser); + } + + /// 每个 message 对应一个节点,children_ 的 key 是 message 中 field 的字段名。嵌套类型再进行递归调用。 + /// 如果是 list 或者 map 的简单类型,则不需要递归,将当前字段作为叶子节点。 + /// + /// field 的情况根据类型分以下几种: + /// 1. 简单单值字段,如 int, float 等, name 为 children_ 的 key, ProtoField 为 value。 + /// 2. 简单 list 类型,list 中的数据为简单类型,不需要递归,将当前字段当做叶子节点的 ProtoField。 + /// 2. 嵌套 list 类型,list 中的数据为嵌套类型,需要递归,将当前 field 名作为 children_ 的 key, 嵌套 + /// 类型作为 value。 + /// 3. 简单 map 类型,不需要递归,将当前字段作为叶子节点的 ProtoField。 + /// 4. 嵌套 map 类型,需要递归,将当前 field 名作为 children_ 的 key, 嵌套的 value 类型作为 value,如 + /// adlog.user_info.ad_dsp_action_detail。 + /// + /// 注意 1: 在 message 中定义的内部 message 不会出现在 AdlogNode 中,只有 message 的字段才会成为 AdlogNode。 + /// 注意 2: LabelAttr 也是 CommonInfo 类型,但是 Name 枚举中只有 UNKNOW_NAME,查找 AdlogNode 时需要用 + /// 0 查询。 + /// 每个叶子节点对应一个 AdlogNode,包括简单类型、枚举、简单 list、简单 map。 + /// 简单类型以其变量名为 name, 枚举以其枚举名为 name。 + /// deprecate + /// 补全类型、注释等信息。 + const AdlogNode* adlog_root() const; + + bool use_name_value() const { return use_name_value_; } + void set_use_name_value(bool v) { use_name_value_ = v; } + + void add_single_enum(AdlogNode* root, + const std::string& name, + const EnumValueDescriptor* enum_value, + const std::string& type_str); + + void add_enum(AdlogNode* root, + const EnumDescriptor* enum_type, + const std::string& type_str, + const std::string& field_name); + + // 通过反射构建 adlog tree。 + std::unique_ptr build_adlog_tree_using_reflection(); + std::unique_ptr build_adlog_tree_from_descriptor(const Descriptor* descriptor, + const std::string& name, + int index, + int degree, + const std::string& prefix, + bool is_repeated); + + /// 根据其 field 是否包含 name_value 判断是否是 CommonInfo。 + bool is_descriptor_common_info(const Descriptor* descriptor); + + /// CommonInfo 逻辑比较特殊,需要单独处理。其格式如下 + /// message CommonInfoAttr { + /// enum Name { + /// UNKNOW_NAME = 0; + /// LIVE_RECO_EMBEDDING_CTR_USER = 1; + /// APP_LIST = 2; + /// }; + /// + /// enum NameExtendOne { + /// ECOM_BATCH_USER_SEARCH_1D_CAT2_LIST = 11000; // 用户 1 天搜索二级类目 ID + /// ECOM_BATCH_USER_SEARCH_1D_CAT3_LIST = 11001; // 用户 1 天搜索三级类目 ID + /// }; + /// + /// enum NameExtendTwo { + /// AKG_INDUS_USER_AD_INTEREST_SEQ_PAY_SUBMIT = 420004; + /// AKG_INDUS_USER_AD_INTEREST_SEQ_ITEM_CLICK = 420005; + /// }; + /// + /// optional CommonTypeEnum.AttrType type = 1; + /// optional int64 name_value = 2; + /// optional int64 int_value = 3; + /// optional float float_value = 4; + /// optional bytes string_value = 5; + /// repeated int64 int_list_value = 6; + /// repeated float float_list_value = 7; + /// repeated bytes string_list_value = 8; + /// map map_int64_int64_value = 9; + /// map map_string_int64_value = 10; + /// map map_int64_float_value = 11; + /// map map_string_float_value = 12; + /// }; + /// + /// 将枚举类型和值分别见索引,丢弃 int_value 等字段,实际使用时的类型是用户指定的。 + std::unique_ptr build_adlog_tree_common_info(const Descriptor* descriptor, + const std::string& name, + int index, + int degree, + const std::string& prefix); + + bool is_basic_type(FieldDescriptor::Type type); + + bool is_common_info(const Descriptor* descriptor); +}; + +} // namespace proto_parser +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/proto_parser/util.h b/convert/proto_parser/util.h new file mode 100644 index 0000000..475a5ec --- /dev/null +++ b/convert/proto_parser/util.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace proto_parser { + +using nlohmann::json; + +inline bool is_str_integer(const std::string& s) { + static const std::regex p(" ?[\\-\\+]?[0-9]+ ?"); + return std::regex_match(s, p); +} + +inline bool is_str_starts_with(const std::string& s, const std::string& x) { + if (s.size() < x.size()) { + return false; + } + + return s.substr(0, x.size()) == x; +} + +inline bool is_str_ends_with(const std::string& s, const std::string& x) { + if (s.size() < x.size()) { + return false; + } + + return s.substr(s.size() - x.size()) == x; +} + +/// TODO(liuzhishan): 和其他地方逻辑有重复,后面统一挪到一个地方。 +inline bool is_unified_type_str(const std::string& s) { + static std::unordered_set types = { + "int", "int64", "uint", "uint64", "float", + "str", "string", "int_list", "int64_list", "uint64_list", + "uint_list", "float_list", "str_list", "string_list", "map_int64_int64", + "map_int_int", "map_int_float", "map_int64_float", "map_int_string", "map_int64_string", + "map_int_bool", "map_int64_bool", "map_uint64_bool", "map_unit64_bool", "bool"}; + + return types.find(s) != types.end(); +} + +inline bool is_nonstd_common_info_enum(const std::string& name) { + static std::unordered_set names = { + "AUTHOR_HISTORY_REALTIME_PURCHASE_TIMESTAMP_Flag", + "USER_HISTORY_REALTIME_PURCHASE_TIMESTAMP_Flag", + "LPS_LLM_LANDING_USER_LPS_1cate_30D", + }; + + if (absl::StartsWith(name, "LPS_LLM_LANDING_USER")) { + return true; + } + + return names.find(name) != names.end(); +} + +} // namespace proto_parser +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/ActionDetailRule.cpp b/convert/rule/ActionDetailRule.cpp new file mode 100644 index 0000000..3c517f6 --- /dev/null +++ b/convert/rule/ActionDetailRule.cpp @@ -0,0 +1,171 @@ +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/ActionDetailInfo.h" +#include "ActionDetailRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void ActionDetailRule::process(clang::IfStmt* if_stmt, Env* env_ptr) { + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->is_check_action_detail_cond()) { + if (absl::optional& action_detail_info = env_ptr->mutable_action_detail_info()) { + std::ostringstream oss_action; + std::string exists_expr = action_detail_info->get_exists_expr(env_ptr); + if (if_info->is_check_equal()) { + oss_action << "!" << exists_expr; + rewriter_.ReplaceText(if_stmt->getCond(), oss_action.str()); + if (if_stmt->hasElseStorage()) { + // 暂时不需要处理。 + } else { + // 暂时不需要处理。 + } + } else { + oss_action << exists_expr; + rewriter_.ReplaceText(if_stmt->getCond(), oss_action.str()); + } + } else if (absl::optional& action_detail_fixed_info = + env_ptr->mutable_action_detail_fixed_info()) { + std::ostringstream oss_action; + std::string exists_expr = action_detail_fixed_info->get_action_detail_exists_expr(env_ptr); + if (if_info->is_check_equal()) { + oss_action << "if(!" << exists_expr << ") {\n" + << " return;\n" + << " }\n"; + rewriter_.ReplaceText(if_stmt, oss_action.str()); + } else { + oss_action << exists_expr; + rewriter_.ReplaceText(if_stmt->getCond(), oss_action.str()); + } + } else { + LOG(INFO) << "get action_detail_info error!"; + } + } + } +} + +void ActionDetailRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { + // 多个 action_detail + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (loop_info->is_int_list_member_loop() && loop_info->is_for_stmt()) { + const std::vector& int_list_member_values = loop_info->int_list_member_values(); + if (int_list_member_values.size() > 0) { + std::string body_str = get_complete_rewritten_text(for_stmt->getBody(), env_ptr); + clang::SourceRange source_range = find_source_range(for_stmt); + process_action_detail_int_list(source_range, int_list_member_values, body_str, env_ptr); + } + } + } +} + +void ActionDetailRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + // 多个 action_detail + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + if (!loop_info->is_for_stmt() && loop_info->is_int_list_member_loop()) { + const std::vector& int_list_member_values = loop_info->int_list_member_values(); + + if (int_list_member_values.size() > 0) { + clang::Stmt* body = cxx_for_range_stmt->getBody(); + std::string body_str = get_complete_rewritten_text(body, env_ptr); + + clang::SourceRange source_range = find_source_range(cxx_for_range_stmt); + process_action_detail_int_list(source_range, int_list_member_values, body_str, env_ptr); + } + } + } +} + +void ActionDetailRule::process(clang::CXXMemberCallExpr *cxx_member_call_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (!expr_info_ptr->is_action_detail_leaf()) { + return; + } + + if (auto parent = expr_info_ptr->parent()) { + std::ostringstream oss; + size_t param_size = parent->call_expr_params_size(); + if (param_size > 0) { + auto param0 = parent->call_expr_param(0); + if (param_size == 1) { + if (param0 != nullptr) { + oss << param0->get_bs_field_value(); + } + } else { + if (auto param1 = parent->call_expr_param(1)) { + if (param1->is_binary_op_expr()) { + if (param1->call_expr_params_size() == 2) { + oss << param1->call_expr_param(0)->get_bs_field_value() << " " + << param1->callee_name() << " " + << param1->call_expr_param(1)->get_bs_field_value(); + } else { + LOG(INFO) + << "binary operator param size is not 2, binary operator: " + << param0->origin_expr_str(); + } + } else { + oss << param1->get_bs_field_value(); + } + } + } + } else { + LOG(INFO) << "cannot find params, expr: " + << expr_info_ptr->origin_expr_str(); + } + + std::string bs_text = expr_info_ptr->get_bs_field_value_action_detail_leaf(oss.str()); + LOG(INFO) << "bs_text: " << bs_text; + rewriter_.ReplaceText(cxx_member_call_expr, bs_text); + } +} + +void ActionDetailRule::process_action_detail_int_list(clang::SourceRange source_range, + const std::vector& int_list_member_values, + const std::string& body_str, + Env* env_ptr) { + std::ostringstream oss; + + std::vector new_bs_field_enums; + + std::regex p(std::string("_key_") + std::to_string(int_list_member_values[0]) + std::string("_")); + for (size_t i = 0; i < int_list_member_values.size(); i++) { + std::string new_str = std::string("_key_") + std::to_string(int_list_member_values[i]) + std::string("_"); + oss << "auto process_action_" << int_list_member_values[i] << " = [&]" + << std::regex_replace(body_str, p, new_str) << ";\n\n "; + + if (const auto ctor_info = env_ptr->get_constructor_info()) { + const std::unordered_set& bs_field_enums = ctor_info->bs_field_enums(); + for (const auto& x : bs_field_enums) { + std::string new_bs_field_enum = std::regex_replace(x, p, new_str); + if (new_bs_field_enum != x) { + new_bs_field_enums.push_back(new_bs_field_enum); + } + } + } + } + + if (auto ctor_info = env_ptr->mutable_constructor_info()) { + for (const auto& x : new_bs_field_enums) { + ctor_info->add_bs_field_enum(x); + } + } + + for (size_t i = 0; i < int_list_member_values.size(); i++) { + oss << "process_action_" << int_list_member_values[i] << "();\n "; + } + + rewriter_.ReplaceText(source_range, oss.str()); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/ActionDetailRule.h b/convert/rule/ActionDetailRule.h new file mode 100644 index 0000000..861c0c7 --- /dev/null +++ b/convert/rule/ActionDetailRule.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include + +#include +#include + +#include "clang/AST/Expr.h" + +#include "../handler/StrictRewriter.h" +#include "RuleBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class ActionDetailRule: public RuleBase { + public: + using RuleBase::process; + explicit ActionDetailRule(clang::Rewriter& rewriter): RuleBase(rewriter, "ActionDetailRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, + Env* env_ptr) override; + + void process(clang::ForStmt* for_stmt, + Env* env_ptr) override; + + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr) override; + + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, + Env* env_ptr) override; + + void process_action_detail_int_list(clang::SourceRange source_range, + const std::vector& int_list_member_values, + const std::string& body_str, + Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/AddFeatureMethodRule.cpp b/convert/rule/AddFeatureMethodRule.cpp new file mode 100644 index 0000000..97569cb --- /dev/null +++ b/convert/rule/AddFeatureMethodRule.cpp @@ -0,0 +1,91 @@ +#include +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "../Deleter.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/ActionMethodInfo.h" +#include "../info/MethodInfo.h" +#include "AddFeatureMethodRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void AddFeatureMethodRule::process(clang::CallExpr *call_expr, Env *env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "expr_info_ptr is nullptr!"; + } + + // 见 docs/get_value_from_action.md。 + // 替换 add_feature 中调用 get_value_from_Action 的参数与名称。 + if (env_ptr->get_method_name() == "add_feature") { + // ks::ad_algorithm::get_value_from_Action + if (expr_info_ptr->callee_name() == ActionMethodInfo::name()) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + + size_t param_size = expr_info_ptr->call_expr_params_size(); + const ActionMethodInfo action_method_info(param_size); + + for (size_t i = 0; i < param_size; i++) { + auto param_info_ptr = expr_info_ptr->call_expr_param(i); + if (param_info_ptr != nullptr && + param_info_ptr->is_repeated_action_info()) { + const NewActionParam &new_param = action_method_info.find_param(i); + if (new_param.origin_name().size() > 0) { + std::string new_str = new_param.get_bs_field_param_str(); + rewriter_.ReplaceText(param_info_ptr->expr(), new_str); + } else { + LOG(INFO) << "cannot find new action param for method: " + << action_method_info.name(); + } + } + } + } + + std::string s = rewriter_.getRewrittenText(expr_info_ptr->expr()); + std::string new_s = std::regex_replace( + s, std::regex("ks::ad_algorithm::get_value_from_Action"), + "bs_get_value_from_action"); + rewriter_.ReplaceText(call_expr, new_s); + } + + return; + } +} + +void AddFeatureMethodRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, + Env* env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "expr_info_ptr is nullptr!"; + } + + if (expr_info_ptr->is_parent_repeated_action_info()) { + if (env_ptr->get_method_name() != "Extract") { + if (expr_info_ptr->callee_name() == "size") { + std::string name = expr_info_ptr->parent()->origin_expr_str() + std::string("_size"); + rewriter_.ReplaceText(cxx_member_call_expr, name); + } + + return; + } + } else if (expr_info_ptr->is_parent_action_info()) { + if (env_ptr->get_method_name() != "Extract") { + std::regex p("(.*?)\\[(\\w+)\\]\\.(\\w+)\\(\\)"); + std::string name = std::regex_replace(expr_info_ptr->raw_expr_str(), p, "$1_$3"); + std::string new_text = std::regex_replace(expr_info_ptr->raw_expr_str(), p, "$1_$3.Get($2)"); + rewriter_.ReplaceText(cxx_member_call_expr, new_text); + + return; + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/AddFeatureMethodRule.h b/convert/rule/AddFeatureMethodRule.h new file mode 100644 index 0000000..441c137 --- /dev/null +++ b/convert/rule/AddFeatureMethodRule.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 自定义函数 add_feature 处理逻辑。 +class AddFeatureMethodRule: public RuleBase { + public: + using RuleBase::process; + explicit AddFeatureMethodRule(clang::Rewriter& rewriter): // NOLINT + RuleBase(rewriter, "AddFeatureMethodRule") {} + + void process(clang::CallExpr *call_expr, Env *env_ptr); + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/BSFieldOrderRule.cpp b/convert/rule/BSFieldOrderRule.cpp new file mode 100644 index 0000000..e909920 --- /dev/null +++ b/convert/rule/BSFieldOrderRule.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../ExprParser.h" +#include "../info/NewVarDef.h" +#include "./BSFieldOrderRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; +using tool::replace_simple_text; +using tool::insert_str_at_block_begin; + +json BSFieldOrderRule::process_to_json(clang::DeclStmt *decl_stmt, Env *env_ptr) { + return json::array(); +} + +json BSFieldOrderRule::process_to_json(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) { + json res = json::array(); + + auto expr_info_ptr = parse_expr(decl_ref_expr, env_ptr); + + if (const auto &decl_info = env_ptr->cur_decl_info()) { + if (expr_info_ptr->origin_expr_str() == decl_info->name()) { + return res; + } + } + + std::string bs_var_name = expr_info_ptr->origin_expr_str(); + // 先找到 decl_stmt, 再找到 enum 的 decl_stmt, 删掉 decl_stmt。 + // 然后将新的定义写入到正确的 env 中。 + // + // 可能有如下形式 + // if (has_val) { ... } + // + // 这种情况需要判断 if env 的 parent。如果 if cond 表达式在 bs field 定义中出现过,则是这种情况。 + if (expr_info_ptr->is_bslog_field_var_decl()) { + if (env_ptr->is_in_loop_init() || env_ptr->is_in_loop_body() || env_ptr->is_in_if_cond()) { + if (env_ptr->is_parent_loop()) { + // 可能有两层 for 循环,需要定义在最外层 for 循环的 parent 中。 + if (auto* outer_env_ptr = env_ptr->mutable_outer_loop_parent()) { + if (!outer_env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_env_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else { + LOG(ERROR) << "cannot find outer loop parent!"; + } + } else if (env_ptr->is_parent_if()) { + if (env_ptr->is_in_parent_else()) { + if (auto* outer_parent_ptr = env_ptr->mutable_outer_if_parent()) { + if (!outer_parent_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_parent_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else { + if (!env_ptr->is_decl_in_parent_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr->parent()); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else { + if (!env_ptr->is_decl_in_parent_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr->parent()); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else if (env_ptr->is_in_if_body()) { + if (env_ptr->is_parent_if()) { + if (env_ptr->is_in_parent_else()) { + if (auto *outer_parent_ptr = env_ptr->mutable_outer_if_parent()) { + if (!outer_parent_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_parent_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + + return json::array(); + } + } + } + + // 如果 bs field 定义中有 has_value,则必定用在 if cond 中判断是否有值,因此需要判断 parent env。 + if (is_has_value_in_bs_field_params(bs_var_name, env_ptr)) { + if (!env_ptr->is_decl_in_parent_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr->parent()); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else if (!env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else { + if (env_ptr->is_parent_if()) { + if (env_ptr->is_in_parent_else()) { + if (auto *outer_parent_ptr = env_ptr->mutable_outer_if_parent()) { + if (!outer_parent_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_parent_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + + return json::array(); + } + } + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } + + return res; +} + +void BSFieldOrderRule::fix_bs_field_decl(const std::string& bs_var_name, Env* env_ptr, Env* target_env_ptr) { + if (auto* bs_field_detail_ptr = env_ptr->find_bs_field_detail_ptr_by_var_name(bs_var_name)) { + auto it = bs_field_detail_ptr->find(bs_var_name); + if (it != bs_field_detail_ptr->end()) { + if (it->second.is_visited == false) { + const auto &field_enum_names = it->second.enum_var_names; + + if (field_enum_names.size() > 0) { + if (clang::Stmt *decl_stmt = env_ptr->get_decl_stmt(bs_var_name)) { + rewriter_.ReplaceText(decl_stmt, ""); + } + + for (size_t i = 0; i < field_enum_names.size(); i++) { + if (!absl::StartsWith(field_enum_names[i], "BSFieldEnum::")) { + // 可能是枚举,也可能是变量,只有变量才需要删除。 + if (clang::Stmt *field_enum_decl_stmt = + env_ptr->get_decl_stmt(field_enum_names[i])) { + rewriter_.ReplaceText(field_enum_decl_stmt, ""); + } + } + } + + if (target_env_ptr != nullptr) { + LOG(INFO) << "add new def bs: " << bs_var_name + << ", is target env if: " << target_env_ptr->is_if(); + target_env_ptr->add_new_def_overwrite( + bs_var_name, it->second.new_def, it->second.new_var_type); + it->second.is_visited = true; + } + } else { + LOG(ERROR) << "field_enum_names.size() is 0, bs_var_name: " << bs_var_name; + } + } + } else { + LOG(ERROR) << "cannot find def in bs_field_info, bs_var_name: " << bs_var_name; + } + } else { + LOG(ERROR) << "cannot find bs_field_info, bs_var_name: " << bs_var_name; + } +} + +json BSFieldOrderRule::process_to_json(clang::IfStmt *if_stmt, Env *env_ptr) { + std::ostringstream oss; + + std::string if_text = rewriter_.getRewrittenText(if_stmt); + + std::string new_defs_str = env_ptr->get_all_new_defs(); + if (new_defs_str.size() > 0) { + std::string s = insert_str_at_block_begin(if_text, new_defs_str); + rewriter_.ReplaceText(if_stmt, replace_simple_text(s)); + } + + return json::array(); +} + +json BSFieldOrderRule::process_to_json(clang::ForStmt *for_stmt, Env *env_ptr) { + return json::array(); +} + +json BSFieldOrderRule::process_to_json(clang::CXXForRangeStmt *cxx_for_range_stmt, Env *env_ptr) { + return json::array(); +} + +json BSFieldOrderRule::process_to_json(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + if (cxx_member_call_expr == nullptr || env_ptr == nullptr) { + return json::array(); + } + + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(ERROR) << "parse expr failed! expr: " << stmt_to_string(cxx_member_call_expr); + return json::array(); + } + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + auto caller_expr_info_ptr = parse_expr(caller, env_ptr); + + if (caller_expr_info_ptr == nullptr) { + LOG(ERROR) << "parse expr failed! expr: " << stmt_to_string(caller); + return json::array(); + } + + std::string bs_var_name = caller_expr_info_ptr->origin_expr_str(); + bool contains_loop_var = expr_info_ptr->contains_loop_var(); + + // 先找到 decl_stmt, 再找到 enum 的 decl_stmt, 删掉 decl_stmt。 + // 然后将新的定义写入到正确的 env 中。 + if (env_ptr->is_bslog_field_var_decl(bs_var_name)) { + if (env_ptr->is_in_loop_init() || env_ptr->is_in_loop_body() || env_ptr->is_in_if_cond()) { + if (contains_loop_var) { + // 如果包含循环变量,则必须找到最外层的 loop + if (auto *outer_env_ptr = env_ptr->mutable_outer_loop_parent()) { + if (!outer_env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_env_ptr); + } + } else { + LOG(ERROR) << "cannot find outer loop parent! bs_var_name: " << bs_var_name; + } + } else { + if (!env_ptr->is_parent_loop() && !env_ptr->is_decl_in_parent_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr->parent()); + } else if (env_ptr->is_parent_loop()) { + if (auto *outer_env_ptr = env_ptr->mutable_outer_loop_parent()) { + if (!outer_env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_env_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else if (env_ptr->is_in_if_body()) { + if (contains_loop_var) { + if (auto *outer_env_ptr = env_ptr->mutable_outer_loop_parent()) { + if (!outer_env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_env_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else { + LOG(ERROR) << "cannot find outer loop parent! bs_var_name: " + << bs_var_name; + } + } else { + if (env_ptr->is_parent_if()) { + if (env_ptr->is_in_parent_else()) { + if (auto *outer_parent_ptr = env_ptr->mutable_outer_if_parent()) { + if (is_has_value_in_bs_field_params(bs_var_name, env_ptr)) { + // 单值需要判断这种情况,list 和 map 不用。 + if (!outer_parent_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_parent_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else if (!outer_parent_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, outer_parent_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } else { + if (is_has_value_in_bs_field_params(bs_var_name, env_ptr)) { + // 单值需要判断这种情况,list 和 map 不用。 + if (!env_ptr->is_decl_in_parent_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr->parent()); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } else if (!env_ptr->is_decl_in_cur_env(bs_var_name)) { + fix_bs_field_decl(bs_var_name, env_ptr, env_ptr); + } else { + set_bs_field_visited(bs_var_name, env_ptr); + } + } + } + } + } + + return json::array(); +} + +json BSFieldOrderRule::process_to_json(clang::UnaryOperator *unary_operator, Env *env_ptr) { + auto expr_info_ptr = parse_expr(unary_operator, env_ptr); + + return json::array(); +} + +void BSFieldOrderRule::set_bs_field_visited(const std::string& bs_var_name, Env* env_ptr) { + if (auto *bs_field_detail_ptr = env_ptr->find_bs_field_detail_ptr_by_var_name(bs_var_name)) { + auto it = bs_field_detail_ptr->find(bs_var_name); + if (it != bs_field_detail_ptr->end()) { + it->second.is_visited = true; + } else { + LOG(ERROR) << "cannot find def in bs_field_info, bs_var_name: " + << bs_var_name; + } + } else { + LOG(ERROR) << "cannot find bs_field_info, bs_var_name: " << bs_var_name; + } +} + +bool BSFieldOrderRule::is_has_value_in_bs_field_params(const std::string& bs_var_name, Env* env_ptr) const { + if (env_ptr == nullptr) { + LOG(ERROR) << "env_ptr is nullptr!"; + return false; + } + + if (auto *bs_field_detail_ptr = env_ptr->find_bs_field_detail_ptr_by_var_name(bs_var_name)) { + auto it = bs_field_detail_ptr->find(bs_var_name); + if (it != bs_field_detail_ptr->end()) { + if (it->second.is_has_value_in_params) { + return true; + } else { + return false; + } + } else { + LOG(ERROR) << "cannot find def in bs_field_info, bs_var_name: " + << bs_var_name; + return false; + } + } else { + LOG(ERROR) << "cannot find bs_field_info, bs_var_name: " << bs_var_name; + return false; + } +} + +// ad_algorithm/bs_feature/fast/impl/bs_extract_ad_first_industry_v3_dense_onehot_v1.cc + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/BSFieldOrderRule.h b/convert/rule/BSFieldOrderRule.h new file mode 100644 index 0000000..055cdce --- /dev/null +++ b/convert/rule/BSFieldOrderRule.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include +#include + +#include +#include "clang/AST/Expr.h" + +#include "./RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +using nlohmann::json; + +/// 修复 BS 特征类中的变量声明顺序。 +/// +/// 尽量将 bs 字段获取逻辑放到使用前最近的地方,减少不必要的计算。 +/// +/// 示例: if 条件中不用的字段都应该放到 if body 中。 +/// teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_reco_user_realtime_action_and_creative.cc +class BSFieldOrderRule: public RuleBase { + public: + using RuleBase::process; + using RuleBase::process_to_json; + + explicit BSFieldOrderRule(clang::Rewriter& rewriter): RuleBase(rewriter, "BSFieldOrderRule") {} // NOLINT + + /// 兼容接口,返回空 json。 + + /// 删除位置不对的 decl_stmt, 并且通过 decl_stmt 修复 bs_field_enum 的定义位置。 + json process_to_json(clang::DeclStmt* decl_stmt, Env* env_ptr) override; + + /// 修复 bs 变量以及 bs_field_enum 变量的声明位置。 + /// + /// bs 变量: + /// 判断当前 bs 变量的 decl 是否在正确的 env 中,如果不对则挪到正确的 env 中。 + /// 如果是在 if 的 cond 或者 for 的 init 中,则其定义必须在父 env 中。 + /// 其他情况则必须在当前 env 中。 + /// + /// bs_field_enum: + /// bs_field_enum 的声明与使用肯定都是在 body 的同一 env 中。 + /// 如果当前 decl_ref 是 bs_field_enum, 并且在 decl_stmt 中,且 decl_stmt 包含 + /// GetSingular、BSRepeatedField 或者 BSMapField, 则可以将其声明与 var_name 绑定, 保存在 + /// env 中, 当之后遇到 var_name 时候则一起新增变量,同时将老的变量 decl 删掉。 + json process_to_json(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) override; + + /// 添加新增的变量定义。 + json process_to_json(clang::IfStmt *if_stmt, Env *env_ptr) override; + + /// 添加新增的变量定义。 + json process_to_json(clang::ForStmt *for_stmt, Env *env_ptr) override; + + /// 添加新增的变量定义。 + json process_to_json(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + + json process_to_json(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + + json process_to_json(clang::UnaryOperator *unary_operator, Env *env_ptr); + + void fix_bs_field_decl(const std::string& bs_var_name, Env* env_ptr, Env* target_env_ptr); + + void set_bs_field_visited(const std::string& bs_var_name, Env* env_ptr); + + bool is_has_value_in_bs_field_params(const std::string& bs_var_name, Env* env_ptr) const; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/CommonInfoRule.cpp b/convert/rule/CommonInfoRule.cpp new file mode 100644 index 0000000..f1e0b79 --- /dev/null +++ b/convert/rule/CommonInfoRule.cpp @@ -0,0 +1,690 @@ +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/CommonInfoMultiIntList.h" +#include "../handler/StrictRewriter.h" +#include "CommonInfoRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CommonInfoRule::process(clang::IfStmt* if_stmt, Env* env_ptr) { + const auto& if_info = env_ptr->cur_if_info(); + if (!if_info) { + return; + } + + clang::Stmt* then_stmt = if_stmt->getThen(); + CommonInfoBodyText body_text; + if (if_info->is_check_common_info_cond()) { + body_text = get_common_info_body_text(if_stmt->getThen(), env_ptr); + } + + // commmon info 中的非 loop break, 需要特殊处理。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_sjh_follow.h + // for (const ::auto_cpp_rewriter::CommonInfoAttr& userAttr : adlog.user_info().common_info_attr()) { + // if (userAttr.name_value() == ::auto_cpp_rewriter::CommonInfoAttr_Name_USER_SJH_FOLLOW_LIST) { + // int times = userAttr.int_list_value_size() / 3; + // if (userAttr.int_list_value_size() % 3 != 0) { + // break; + // } + // ... + // break; + // } + // } + // 但是也有整体逻辑在 size_method 判断里面的,需要准确区分开。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_eds_comp_item_conv_list_ts.h + // if (attr.name_value() == auto_cpp_rewriter::CommonInfoAttr_Name_COMPUTE_COMP_CONV_PRODUCT_NAME_LIST) { + // if (attr.int_list_value_size() > 0) { + // const auto& val = attr.int_list_value(); + // int tcnt = 5; + // for (int i = 0; i < attr.int_list_value_size() && i < 50; i++) { + // if (attr.int_list_value(i) == prod_id) { + // if (i > ddeadline[tcnt]) { + // tcnt -= 1; + // if (tcnt < 0) + // break; + // } + // prod_cnts[tcnt] += 1; + // } + // } + // } + // } + // 整体是 if_stmt, 也包含 size_method, 但是不上上面的 userAttr.int_list_value_size() % 3 的情况。 + // 可能会有多个 common info detail, 因此也不能直接替换为 return。 + // + // 按如下逻辑处理。 + // 1. 在 if cond 表达式中判断,如果是判断 expr != num 的形式,并且 expr 是 list_size_method 或者 + // list_size_method % x 的形式,则标记 if 语句,并且在 common info detail 中记录 x 和 num 的值。 + // 2. 如果 if 的 body 是 break, 则将 if_stmt 整体删除。 + // 3. 在 common info detail 中进行替换的时候,将整体逻辑包含在 if (expr == num) 的语句中。 + if (if_info->is_check_common_info_list_size_not_equal() && if_info->is_body_only_break()) { + rewriter_.ReplaceText(if_stmt, ""); + } + + if (if_info->is_check_common_info_map_end()) { + if (if_info->is_check_not_equal()) { + if (const auto& left_expr_str = if_info->left_expr_str()) { + std::string new_text = *left_expr_str + ".second"; + rewriter_.ReplaceText(if_stmt->getCond(), new_text); + } + } + } + + // 简单处理,假设逻辑都一样。 + if (auto& common_info_multi_map = env_ptr->mutable_common_info_multi_map()) { + if (if_info->is_check_common_info_multi_cond()) { + std::ostringstream oss_cond; + oss_cond << "if (!" << common_info_multi_map->attr_name() << ".is_empty()) {\n" + << " " << body_text.get_bs_pre_text() + << " " << body_text.get_bs_loop_text() + << " " << body_text.get_bs_post_text() + << " }\n"; + + rewriter_.ReplaceText(if_stmt, oss_cond.str()); + } + + // 替换构造函数里的 common info enum + if (auto constructor_info = env_ptr->mutable_constructor_info()) { + std::vector& common_info_enums = constructor_info->mutable_common_info_enums(); + for (size_t i = 0; i < common_info_enums.size(); i++) { + absl::optional enum_value_opt = find_common_attr_int_value(common_info_enums[i].enum_ref()); + if (enum_value_opt.has_value()) { + std::string bs_enum_str = common_info_multi_map->prefix() + "_key_" + std::to_string(enum_value_opt.value()); + common_info_enums[i].set_bs_enum_str(bs_enum_str); + std::ostringstream oss_enum_str; + oss_enum_str << "static_cast(BSFieldEnum::" << bs_enum_str + << ")"; + rewriter_.ReplaceText(common_info_enums[i].enum_ref(), oss_enum_str.str()); + } else { + LOG(INFO) << "cannot find enum from expr: " << stmt_to_string(common_info_enums[i].enum_ref()); + } + } + } + } + + // 在遍历 common info list 或者 map 的循环中, 如 + // for (int64 value : attr.int_list_value()) { ... } + if (env_ptr->is_parent_common_info_loop()) { + if (if_info->is_check_common_info_normal_cond()) { + // common info if 判断 + // if (attr_name == auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_LATEST_LIVE_SEGMENT_INFO_TIMESTAMP) { + // .... + // continue; + // } + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (auto& common_info_index = if_info->common_info_index()) { + if (auto& common_info_detail = common_info_normal->last_mutable_common_info_detail()) { + common_info_detail->set_bs_rewritten_text(body_text); + } + } + } + } else { + // 先遍历 common info list 或者 map, 再判断枚举的情况。 + // 非 common info if。 + // for (int64 value : attr.int_list_value()) { + // if (x == 5) { + // ... + // } + // if (attr.name_value() == + // auto_cpp_rewriter::CommonInfoAttr_NameExtendTwo_LSP_XXXXX) { + // ... + // } + // } + // + // 需要将 if (x == 5) { ... } 保存起来。 + // 此种情况必须是遍历 list 或者 map 在外面,判断枚举是在里面,因此 loop 的 parent 必须是 common info + // prefix 所对应的 loop, 而不能是 if。 + if (auto prepare_env = env_ptr->parent()->parent()) { + if (const auto& switch_case_info = prepare_env->cur_switch_case_info()) { + // 忽略 + } else if (prepare_env->is_loop()) { + if (auto &common_info_prepare = prepare_env->cur_mutable_common_info_prepare()) { + common_info_prepare->add_other_if_stmt_str(rewriter_.getRewrittenText(if_stmt)); + } + } + } + } + } else if (env_ptr->is_parent_loop()) { + // 普通 common info 判断 + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (if_info->is_check_common_info_normal_cond()) { + if (auto& common_info_value = if_info->common_info_value()) { + auto common_info_detail = + common_info_normal->mutable_common_info_detail_by_value(*common_info_value); + if (common_info_detail != nullptr && common_info_detail->bs_body_text() == "") { + common_info_detail->set_bs_rewritten_text(body_text); + } + } else if (auto& common_info_index = if_info->common_info_index()) { + auto& common_info_detail = common_info_normal->last_mutable_common_info_detail(); + common_info_detail->set_bs_rewritten_text(body_text); + } + } + } + + if (auto& common_info_fixed_list = env_ptr->mutable_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_mutable_common_info_detail()) { + last_detail->set_bs_rewritten_text(body_text); + } + } + } + + // if else 获取 common info value + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_universe_position_status.h + // for (auto &attr : ad_log.context().info_common_attr()) { + // if (attr.name_value() == ::auto_cpp_rewriter::ContextInfoCommonAttr::AD_STYLE) { + // // AD_STYLE = 6, 中台广告位样式, 1: 信息流、2: 激励视频、3:插屏、4:开屏、5:banner + // AddFeature(GetFeature(FeaturePrefix::UNIVERSE_CONTEXT_AD_STYLE, attr.int_value()), 1.0, result); + // } else if (attr.name_value() == ::auto_cpp_rewriter::ContextInfoCommonAttr::COOPERATION_MODE) { + // // OOPERATION_MODE = 7; // 中台对外合作模式,0:未知;2:API;3:H5 + // AddFeature(GetFeature(FeaturePrefix::UNIVERSE_CONTEXT_COOPERATION_MODE, attr.int_value()), + // 1.0, result); + // } else if (attr.name_value() == ::auto_cpp_rewriter::ContextInfoCommonAttr::MEDIUM_ATTRIBUTE) { + // // LOG_MEDIUM = 4 , 2: 站内流量, 4: 站外流量 + // AddFeature(GetFeature(FeaturePrefix::CONTEXT_MEDIUM_ATTRIBUTE, attr.int_value()), 1.0, result); + // } + // } + if (if_info->is_check_common_info_normal_cond()) { + if (auto& common_info_value = if_info->common_info_value()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + auto common_info_detail = + common_info_normal->mutable_common_info_detail_by_value(*common_info_value); + if (common_info_detail != nullptr && common_info_detail->bs_body_text() == "") { + common_info_detail->set_bs_rewritten_text(body_text); + } + } + } + } +} + +void CommonInfoRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { +} + +// CommonInfoMultiIntList +void CommonInfoRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + if (const auto& common_info_multi_int_list = env_ptr->get_common_info_multi_int_list()) { + if (const auto feature_info = env_ptr->get_feature_info()) { + std::ostringstream oss; + + const std::unordered_map &map_vec_connections = + common_info_multi_int_list->map_vec_connections(); + + const std::vector &attr_map_names = common_info_multi_int_list->attr_map_names(); + for (size_t i = 0; i < attr_map_names.size(); i++) { + auto it = map_vec_connections.find(attr_map_names[i]); + if (it != map_vec_connections.end()) { + const std::string vec_name = it->second; + const std::vector &int_values = feature_info->get_int_list_member_values(vec_name); + if (int_values.size() > 0) { + for (int v : int_values) { + oss << attr_map_names[i] << "[" << v << "] = std::move(" + << common_info_multi_int_list->get_functor_name(v) << "(bs, pos));\n"; + } + } else { + LOG(INFO) << "cannot find int_values from feature_info, vec_name: " << vec_name; + } + } else { + LOG(INFO) << "cannot find vec_name in map_vec_connections, attr_map_name: " << attr_map_names[i]; + } + } + + const std::vector &attr_size_map_names = common_info_multi_int_list->attr_size_map_names(); + for (size_t i = 0; i < attr_size_map_names.size(); i++) { + auto it = map_vec_connections.find(attr_size_map_names[i]); + if (it != map_vec_connections.end()) { + const std::string vec_name = it->second; + const std::vector &int_values = feature_info->get_int_list_member_values(vec_name); + const std::string list_map_name = + common_info_multi_int_list->find_correspond_list_map(attr_size_map_names[i]); + if (list_map_name.size() > 0) { + if (int_values.size() > 0) { + for (int v : int_values) { + oss << attr_size_map_names[i] << "[" << v << "] = " + << list_map_name << "[" << v << "]" << ".size();\n "; + } + } else { + LOG(INFO) + << "cannot find int_values from feature_info, vec_name: " + << vec_name; + } + } else { + LOG(INFO) << "cannot find list_map_name, attr_size_map_name: " << attr_size_map_names[i]; + } + } else { + LOG(INFO) + << "cannot find vec_name in map_vec_connections, attr_size_map_name: " + << attr_size_map_names[i]; + } + } + + rewriter_.ReplaceText(cxx_for_range_stmt, oss.str()); + } + } + + if (env_ptr->get_method_name() == "helper") { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->is_common_info_list_map()) { + const auto& loop_var = loop_info->loop_var(); + if (loop_var.size() > 0) { + if (const auto &common_info_prepare = env_ptr->get_common_info_prepare()) { + if (const auto &attr_name = common_info_prepare->attr_name()) { + std::ostringstream oss; + oss << "for (size_t idx = 0; idx < " << *attr_name << ".size(); idx++) {\n " + << " auto " << loop_var << " = " << *attr_name << ".Get(idx);\n " + << get_compound_stmt_rewritten_text(cxx_for_range_stmt->getBody()) + << "\n}\n "; + + rewriter_.ReplaceText(cxx_for_range_stmt, oss.str()); + } + } + } else { + LOG(INFO) << "cannot find loop_var!"; + } + } + } + } +} + +void CommonInfoRule::process(clang::SwitchStmt* switch_stmt, Env* env_ptr) { + if (const auto& common_info_normal = env_ptr->get_common_info_normal()) { + env_ptr->update_template_common_info_values(); + std::ostringstream oss_res; + + const std::vector>& common_info_details = + common_info_normal->common_info_details(); + + for (size_t i = 0; i < common_info_details.size(); i++) { + oss_res << common_info_normal->get_bs_rewritten(&rewriter_, i) + << "\n\n"; + } + + rewriter_.ReplaceText(switch_stmt, oss_res.str()); + } +} + +void CommonInfoRule::process(clang::CaseStmt* case_stmt, Env* env_ptr) { + if (const auto& switch_case_info = env_ptr->get_switch_case_info()) { + if (auto& common_info_normal = env_ptr->mutable_common_info_normal()) { + if (auto& common_info_index = switch_case_info->common_info_index()) { + auto& common_info_detail = common_info_normal->last_mutable_common_info_detail(); + + CommonInfoBodyText body_text = get_common_info_body_text(case_stmt->getSubStmt(), env_ptr); + common_info_detail->set_bs_rewritten_text(body_text); + + std::string s = body_text.bs_body_text(); + + // 处理 case 共用逻辑的情况 + // teams/ad/ad_algorithm/feature/fast/impl/extract_search_query_combine_match_num.cc + // switch (photoAttr.name_value()) { + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_DESCRIPTION: + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_PRODUCT_NAME: + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_COVER_INFO: + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_CATEGORY_TAG: + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_PHOTO_OCR: + // case ::auto_cpp_rewriter::CommonInfoAttr_NameExtendOne_SEARCH_ADS_PARSER_TEXT_ALL_V1: + // for (int i = 0; i < photoAttr.int_list_value_size() && i < 5; i++) { + // uint64_t des = static_cast(photoAttr.int_list_value(i)); + // if (rec.find(des) == rec.end()) { + // rec.insert(des); + // } + // } + // break; + if (s.size() > 0) { + for (int i = common_info_normal->common_info_details_size() - 2; i >= 0; i--) { + auto& detail = common_info_normal->mutable_common_info_detail(i); + + if (detail->bs_body_text().size() > 0) { + break; + } + + detail->copy_except_int_value(common_info_detail.get()); + env_ptr->add_common_info_detail_def(*detail); + } + } + } + } + } +} + +void CommonInfoRule::process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_operator_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } +} + +void CommonInfoRule::process(clang::DeclStmt* decl_stmt, Env* env_ptr) { + if (clang::VarDecl *var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + // teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_7d.h + // std::unordered_map *> + // action_name2list; + // + // 简单处理,都当做 user int list 处理。 + if (tool::is_map_repeated_int_list_type(var_decl->getType())) { + std::string var_name = var_decl->getNameAsString(); + + std::ostringstream oss; + oss << "std::unordered_map> " << var_name << ";\n"; + + rewriter_.ReplaceText(decl_stmt, oss.str()); + } + + // teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_7d.h + // auto action_list = action_name2list[action_name]; + if (tool::is_repeated_proto_ptr(var_decl->getType())) { + static std::regex p("auto "); + rewriter_.ReplaceText(decl_stmt, std::regex_replace(stmt_to_string(decl_stmt), p, "auto&")); + } + + if (auto& common_info_prepare = env_ptr->cur_mutable_common_info_prepare()) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->loop_stage() == LoopStage::BODY) { + // 去掉 CommonInfoAttr 变量 + // const ::auto_cpp_rewriter::CommonInfoAttr & itemAttr = live_info->common_info_attr(i); + if (!tool::is_common_info_enum(var_decl->getType())) { + common_info_prepare->add_other_decl_stmt_str(stmt_to_string(decl_stmt)); + } + } + } + } + } +} + +void CommonInfoRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->parent() != nullptr && + expr_info_ptr->parent()->is_repeated_proto_ptr()) { + std::string parent_str = expr_info_ptr->parent()->origin_expr_str(); + std::regex p(parent_str + "\\->"); + std::string s = std::regex_replace(expr_info_ptr->origin_expr_str(), p, parent_str + "."); + rewriter_.ReplaceText(cxx_member_call_expr, s); + } + + if (expr_info_ptr->callee_name() == "helper") { + if (expr_info_ptr->call_expr_params_size() > 2) { + ExprInfo *param1 = expr_info_ptr->call_expr_param(1); + if (param1 != nullptr) { + if (param1->is_common_info_struct_type()) { + if (const auto &common_info_normal = env_ptr->get_common_info_normal()) { + if (auto last_detail = common_info_normal->last_common_info_detail()) { + std::string var_name = env_ptr->find_new_var_name(last_detail->get_bs_enum_str()); + if (var_name.size() > 0) { + rewriter_.ReplaceText(param1->origin_expr(), var_name); + } else { + LOG(INFO) << "cannot find var_name in env_ptr for common info " + "helper, bs_enum_str: " + << last_detail->get_bs_enum_str(); + } + } + } + } + } + } + } + + if (expr_info_ptr->is_common_info_scalar_method() || + expr_info_ptr->is_common_info_list_method()) { + if (const auto& common_info_fixed_list = env_ptr->get_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_common_info_detail()) { + rewriter_.ReplaceText(cxx_member_call_expr, last_detail->get_bs_var_name(env_ptr)); + } + } else if (const auto &common_info_normal = env_ptr->get_common_info_normal()) { + if (auto last_detail = common_info_normal->last_common_info_detail()) { + rewriter_.ReplaceText(cxx_member_call_expr, last_detail->get_bs_var_name(env_ptr)); + } + } + } + + // 示例: ad/ad_algorithm/feature/fast/impl/extract_combine_realtime_action_match_cnt_v2.h + // auto id = action_list[key_idx].int_list_value(i); + if (expr_info_ptr->is_common_info_list_method()) { + if (auto feature_info = env_ptr->get_feature_info()) { + if (feature_info->has_common_info_multi_map()) { + if (expr_info_ptr->parent() != nullptr && expr_info_ptr->call_expr_params_size() == 1) { + if (auto param = expr_info_ptr->call_expr_param(0)) { + std::string param_text = rewriter_.getRewrittenText(param); + std::string text = expr_info_ptr->parent()->origin_expr_str() + ".Get(" + param_text + ")"; + LOG(INFO) << "replace common info multi map list value, expr: " + << expr_info_ptr->origin_expr_str() + << ", text: " << text; + rewriter_.ReplaceText(cxx_member_call_expr, text); + } + } + } + } + } + + if (expr_info_ptr->is_common_info_map_find_expr()) { + if (const auto& common_info_normal = env_ptr->get_common_info_normal()) { + if (const auto last_detail = common_info_normal->last_common_info_detail()) { + std::string var_name = last_detail->get_bs_var_def_name(env_ptr); + if (var_name.size() > 0) { + if (expr_info_ptr->call_expr_params_size() == 1) { + if (auto param = expr_info_ptr->call_expr_param(0)) { + std::ostringstream oss; + oss << var_name << ".Get(" << param->origin_expr_str() << ")"; + LOG(INFO) << "replace common info map find expr: " << expr_info_ptr->origin_expr_str() + << ", text: " << oss.str(); + rewriter_.ReplaceText(cxx_member_call_expr, oss.str()); + } + } + } + } + } + } + + if (expr_info_ptr->is_common_info_list_size_method()) { + if (const auto& common_info_normal = env_ptr->get_common_info_normal()) { + if (auto last_detail = common_info_normal->last_common_info_detail()) { + std::string var_name = last_detail->get_bs_var_def_name(env_ptr); + if (var_name.size() > 0) { + LOG(INFO) << "replace cxx_member_call_expr: " << expr_info_ptr->origin_expr_str() + << ", text: " << var_name + ".size()"; + rewriter_.ReplaceText(cxx_member_call_expr, var_name + ".size()"); + } + } + } else if (const auto& common_info_fixed_list = env_ptr->get_common_info_fixed_list()) { + if (auto last_detail = common_info_fixed_list->last_common_info_detail()) { + std::string var_name = last_detail->get_bs_var_def_name(env_ptr); + if (var_name.size() > 0) { + LOG(INFO) << "replace cxx_member_call_expr: " << expr_info_ptr->origin_expr_str() + << ", text: " << var_name + ".size()"; + rewriter_.ReplaceText(cxx_member_call_expr, var_name + ".size()"); + } + } + } else { + if (const auto feature_info = env_ptr->get_feature_info()) { + if (feature_info->has_common_info_multi_map()) { + if (expr_info_ptr->parent() != nullptr) { + std::string text = expr_info_ptr->parent()->origin_expr_str() + ".size()"; + LOG(INFO) << "replace cxx_member_call_expr: " << expr_info_ptr->origin_expr_str() + << ", text: " << text; + rewriter_.ReplaceText(cxx_member_call_expr, text); + } + } + } + } + } + + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_creative_support_tag.h + // int max_num = attr.string_list_value().size() > 20 ? 20 : attr.string_list_value().size(); + if (expr_info_ptr->callee_name() == "size") { + if (auto parent = expr_info_ptr->parent()) { + if (parent->is_common_info_list_method()) { + std::string bs_text = parent->get_bs_field_value(); + if (bs_text.size() > 0) { + rewriter_.ReplaceText(cxx_member_call_expr, bs_text + ".size()"); + } + } + } + } +} + +void CommonInfoRule::process(clang::CallExpr* call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->callee_name() == "helper") { + if (expr_info_ptr->call_expr_params_size() > 2) { + ExprInfo *param1 = expr_info_ptr->call_expr_param(1); + if (param1 != nullptr) { + if (param1->is_common_info_struct_type()) { + if (const auto &common_info_normal = env_ptr->get_common_info_normal()) { + if (auto last_detail = common_info_normal->last_common_info_detail()) { + std::string var_name = env_ptr->find_new_var_name(last_detail->get_bs_enum_str()); + if (var_name.size() > 0) { + rewriter_.ReplaceText(param1->origin_expr(), var_name); + } else { + LOG(INFO) << "cannot find var_name in env_ptr for common info " + "helper, bs_enum_str: " + << last_detail->get_bs_enum_str(); + } + } + } + } + } + } + } +} + +void CommonInfoRule::process(clang::BinaryOperator* binary_operator, Env* env_ptr) { + auto expr_info_ptr = parse_expr(binary_operator, env_ptr); +} + +CommonInfoBodyText CommonInfoRule::get_common_info_body_text(clang::Stmt* stmt, Env* env_ptr) { + // 区分 common info loop 前后的语句,如果没有 common info + // loop,则都作为之前的语句。 loop 以被替换为 body 语句。 + // + // infer ItemFilter 中以下语句需要删掉,改写的 bs 代码中不需要此逻辑: + // 1. uint64_t name_value = attr.name_value(); + // 2. if (attr.int_list_value().empty()) { + // break; + // } + // + // 示例: teams/ad/ad_nn/utils/item_filter.h + // InnerLoopUnionHighCostFilterWeakenRoas + // + // for (const auto& attr : item.ad_dsp_info().common_info_attr()) { + // uint64_t name_value = attr.name_value(); + // if (name_value != ::auto_cpp_rewriter::CommonInfoAttr_Name_ECOM_REALTIME_DIRECT_CREATIVE_RECENT_COST) { + // continue; + // } + // if (attr.int_list_value().empty()) { + // break; + // } + // for (auto val : attr.int_list_value()) { cost_total += val; } + // } + std::ostringstream oss_pre; + std::ostringstream oss_loop_common_info; + std::ostringstream oss_post; + + absl::optional loop_name; + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (auto it = compound_stmt->child_begin(); it != compound_stmt->child_end(); + it++) { + if (clang::BreakStmt *break_stmt = dyn_cast(*it)) { + continue; + } + + if (clang::ContinueStmt *continue_stmt = dyn_cast(*it)) { + continue; + } + + // 去掉 + // uint64_t name_value = attr.name_value(); + if (clang::DeclStmt* decl_stmt = dyn_cast(*it)) { + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->getNameAsString() == "name_value") { + continue; + } + } + } + + // 去掉 + // if (attr.int_list_value().empty()) { + // break; + // } + if (clang::IfStmt* if_stmt = dyn_cast(*it)) { + clang::Expr* cond_expr = if_stmt->getCond(); + auto expr_info_ptr = parse_expr(cond_expr, env_ptr); + if (expr_info_ptr != nullptr && expr_info_ptr->is_common_info_empty_method()) { + continue; + } + } + + if (clang::DeclStmt* decl_stmt = dyn_cast(*it)) { + if (clang::VarDecl *var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->hasInit()) { + auto expr_info_ptr = parse_expr(var_decl->getInit(), env_ptr); + + // 忽略 int_list_value() 变量 + // const auto& values = itemAttr.int_list_value(); + // + // 忽略 map 变量 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_applist_cate_match_num.h + // auto& install_app_cate_map = userAttr.map_int64_int64_value(); + // for (auto iter = install_app_cate_map.begin(); iter != install_app_cate_map.end(); iter++) { + // install_app_cate_map_fix[iter->first] = iter->second; + // } + // + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_fanstop_ocpx_action_type.h + // const auto& bid_ocpc_type_map = attr.map_int64_int64_value(); + if (expr_info_ptr->is_common_info_list_method() || expr_info_ptr->is_common_info_map_method()) { + loop_name.emplace(var_decl->getNameAsString()); + continue; + } + } + } + } + + std::string new_text = rewriter_.getRewrittenText(*it); + new_text = fix_semicolon(new_text) + "\n"; + + std::string stmt_str = stmt_to_string(*it); + + if (loop_name) { + if (CommonAttrInfo::is_common_info_list_or_map_loop(stmt_str, *loop_name)) { + oss_loop_common_info << new_text; + continue; + } + } + + // 通过字符串判断 + if (tool::is_common_info_list_or_map_loop_stmt(*it)) { + oss_loop_common_info << new_text; + } else if (oss_loop_common_info.str().size() == 0) { + oss_pre << new_text; + } else { + oss_post << new_text; + } + } + } else { + std::string new_text = fix_semicolon(rewriter_.getRewrittenText(stmt)) + "\n"; + + if (tool::is_common_info_list_or_map_loop_stmt(stmt)) { + oss_loop_common_info << new_text; + } else { + oss_pre << new_text; + } + } + + CommonInfoBodyText body_text; + body_text.set_bs_rewritten_text(oss_pre.str(), oss_loop_common_info.str(), oss_post.str()); + + return body_text; +} + +} // convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/CommonInfoRule.h b/convert/rule/CommonInfoRule.h new file mode 100644 index 0000000..a416ffc --- /dev/null +++ b/convert/rule/CommonInfoRule.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class CommonInfoRule: public RuleBase { + public: + using RuleBase::process; + explicit CommonInfoRule(clang::Rewriter& rewriter): RuleBase(rewriter, "CommonInfoRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, Env* env_ptr) override; + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + void process(clang::SwitchStmt* switch_stmt, Env* env_ptr) override; + void process(clang::CaseStmt* case_stmt, Env* env_ptr) override; + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) override; + void process(clang::DeclStmt* decl_stmt, Env* env_ptr) override; + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + void process(clang::CallExpr* call_expr, Env* env_ptr) override; + void process(clang::BinaryOperator* binary_operator, Env* env_ptr) override; + + // common info 里的逻辑 + CommonInfoBodyText get_common_info_body_text(clang::Stmt* stmt, Env* env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/DoubleListRule.cpp b/convert/rule/DoubleListRule.cpp new file mode 100644 index 0000000..14b6396 --- /dev/null +++ b/convert/rule/DoubleListRule.cpp @@ -0,0 +1,59 @@ +#include "../Env.h" +#include "../Tool.h" +#include "../ExprParser.h" +#include "../info/NewVarDef.h" +#include "DoubleListRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void DoubleListRule::process(clang::ForStmt* for_stmt, + Env* env_ptr) { +} + +void DoubleListRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (!loop_info->is_double_list_loop()) { + return; + } + + // 最多两层 proto list, 更多层不支持。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_app_list_ad_app_id.h + if (env_ptr->is_parent_loop()) { + if (!loop_info->is_repeated_common_info() && + loop_info->is_proto_list_loop() && + !loop_info->is_for_stmt()) { + auto expr_info_ptr = parse_expr(loop_info->loop_var_expr(), env_ptr); + if (expr_info_ptr != nullptr) { + std::ostringstream oss; + + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + oss << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {" + << get_loop_body(cxx_for_range_stmt->getBody()) + << "\n}\n"; + rewriter_.ReplaceText(cxx_for_range_stmt, oss.str()); + } else { + LOG(INFO) << "cannot find loop var def, bs_enum_str: " << bs_enum_str + << ", env address: " << env_ptr; + } + } + } + } + + if (!loop_info->is_repeated_common_info() && + loop_info->is_proto_list_loop() && + loop_info->is_child_proto_list_loop() && + !loop_info->is_for_stmt()) { + rewriter_.ReplaceText(cxx_for_range_stmt, + get_loop_body(cxx_for_range_stmt->getBody())); + return; + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/DoubleListRule.h b/convert/rule/DoubleListRule.h new file mode 100644 index 0000000..5ad379b --- /dev/null +++ b/convert/rule/DoubleListRule.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 两层 list。遍历到第二层 for 循环时候才确定是两层 list。 +/// 如 teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_app_list_ad_app_id.h +/// for ( auto & device_info : device_info_list ) { +/// auto& app_package_list = device_info.app_package(); +/// for ( auto & app_package : app_package_list ) { +/// AddFeature(GetFeature(FeaturePrefix::COMBINE_USER_APP_LIST_AND_AD_APPID, +/// base::CityHash64(app_package.data(), +/// app_package.size()), app_id), 1.0, result); +/// } +/// } +class DoubleListRule: public RuleBase { + public: + using RuleBase::process; + explicit DoubleListRule(clang::Rewriter& rewriter): RuleBase(rewriter, "DoubleListRule") {} // NOLINT + + void process(clang::ForStmt* for_stmt, + Env* env_ptr) override; + + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/GeneralRule.cpp b/convert/rule/GeneralRule.cpp new file mode 100644 index 0000000..4b8148e --- /dev/null +++ b/convert/rule/GeneralRule.cpp @@ -0,0 +1,562 @@ +#include +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "GeneralRule.h" +#include "../Deleter.h" +#include "../info/MethodInfo.h" +#include "../info/ActionMethodInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +// 以下几种 if 条件需要直接修改代码: +// 1. pos < adlog.item_size(), 判断 pos 。 +// 2. adlog.item_size() > pos, 所有逻辑都在 if body 里。 +// 3. common info 的枚举判断,需要去掉 if 判断条件,将 if 里的内容作为替换后的代码。 +// 4. action_detail 的 find 判断,需要改成 bs 的 if 判断, 需要从 Env 获取 bs 的 action_detail 变量。 +// +// 然后再删掉多余的变量定义, 并添加新的需要的变量定义。 +void GeneralRule::process(clang::IfStmt* if_stmt, Env* env_ptr) { + process_deleted_var(env_ptr); + + const auto& if_info = env_ptr->cur_if_info(); + if (!if_info) { + return; + } + + if (env_ptr->parent() != nullptr && env_ptr->parent()->is_root() && env_ptr->is_first_if()) { + env_ptr->parent()->set_first_if_stmt(if_stmt); + env_ptr->parent()->set_is_first_if_check_item_pos_cond(if_info->is_check_item_pos_cond()); + + if (if_info->is_check_item_pos_include()) { + env_ptr->parent()->set_is_first_if_check_item_pos_include_cond(true); + } + } + + if (!if_stmt->hasElseStorage() && env_ptr->is_all_if_stmt_deleted()) { + rewriter_.ReplaceText(if_stmt, ""); + return; + } + + if (if_info->is_check_item_pos_include()) { + rewriter_.ReplaceText(if_stmt, get_compound_stmt_rewritten_text(if_stmt->getThen())); + } + + if (env_ptr->get_all_new_defs().size() > 0) { + if (if_stmt->hasElseStorage()) { + if (clang::IfStmt* else_if_stmt = dyn_cast(if_stmt->getElse())) { + std::ostringstream oss; + std::string s = rewriter_.getRewrittenText(find_source_range(if_stmt->getElse())); + oss << "{\n " << s << "\n}\n"; + rewriter_.ReplaceText(if_stmt->getElse(), oss.str()); + } + } + } + + // 必须放在最后,否则会 core + rewriter_.InsertTextBefore(if_stmt, env_ptr->get_all_new_defs()); +} + +void GeneralRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { + process_loop(for_stmt, env_ptr); +} + + +void GeneralRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + process_loop(cxx_for_range_stmt, env_ptr); +} + +void GeneralRule::process(clang::DeclStmt* decl_stmt, Env* env_ptr) { + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->hasInit()) { + // 例: thread_local static std::vector<::auto_cpp_rewriter::CommonInfoAttr> action_list(user_attr_map_size); + clang::Expr* init_expr = var_decl->getInit(); + std::string s = rewriter_.getRewrittenText(decl_stmt); + + // 替换变量声明中的 ad 枚举类型 + if (tool::is_ad_enum(init_expr->getType())) { + static std::regex p_ad_enum("(::)?([^ ]+) ([^ ]+) ="); + s = std::regex_replace(s, p_ad_enum, std::string("::bs::") + "$2 $3 ="); + rewriter_.ReplaceText(decl_stmt, s); + } + + // 替换 std::string 为 absl::string_view + if (s.find("string") != std::string::npos) { + s = tool::fix_std_string(s); + + if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(init_expr)) { + if (cxx_construct_expr->getNumArgs() > 0) { + auto expr_info_ptr = parse_expr(cxx_construct_expr->getArg(0), env_ptr); + if (expr_info_ptr->is_from_adlog()) { + if (s.find("std::string") != std::string::npos) { + rewriter_.ReplaceText(decl_stmt, tool::fix_string_view(s)); + } + } + } + } + } + + if (tool::is_common_info_vector(var_decl->getType())) { + // common info 对应的类型确定后才能替换 + auto f_replace = [](Env* env_ptr, clang::Stmt* stmt) { + static std::regex p("std::vector<.*>"); + + if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->hasInit()) { + // 例: thread_local static std::vector<::auto_cpp_rewriter::CommonInfoAttr> action_list(user_attr_map_size); + clang::Expr* init_expr = var_decl->getInit(); + auto expr_info_ptr = parse_expr(init_expr, env_ptr); + + std::string new_decl; + if (env_ptr->is_combine_feature() && !expr_info_ptr->is_item_field()) { + new_decl = std::regex_replace(stmt_to_string(decl_stmt), p, "std::vector>"); + } else { + new_decl = std::regex_replace(stmt_to_string(decl_stmt), p, "std::vector>"); + } + + return StmtReplacement{stmt, new_decl}; + } + } + } + + return StmtReplacement{stmt, stmt_to_string(stmt)}; + }; + + rewriter_.emplace_lazy_replace(env_ptr, decl_stmt, f_replace); + } + + std::string stmt_str = rewriter_.getRewrittenText(decl_stmt); + if (starts_with(stmt_str, "string ")) { + static std::regex p_string("string "); + std::string s = std::regex_replace(stmt_str, p_string, "std::string "); + rewriter_.ReplaceText(decl_stmt, s); + } + } + } + + std::string stmt_str = stmt_to_string(decl_stmt); + if (starts_with(stmt_str, "vector")) { + static std::regex p_vector("vector<(.*)>"); + static std::regex p_string("vector"); + std::string s = std::regex_replace(stmt_str, p_vector, "std::vector<$1>"); + s = std::regex_replace(s, p_string, "vector"); + LOG(INFO) << "replace decl_stmt: " << stmt_str << ", text: " << s; + rewriter_.ReplaceText(decl_stmt, s); + } +} + +void GeneralRule::process(clang::CallExpr* call_expr, Env* env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "expr_info_ptr is nullptr!"; + } + + // 示例: + // teams/ad/ad_algorithm/feature/fast/impl/extract_callback_event_sparse.h + // const google::protobuf::EnumDescriptor *callback_event_type_descriptor = + // auto_cpp_rewriter::AdCallbackLog::EventType_descriptor(); + // auto &ocpc_action_type_name = auto_cpp_rewriter::AdActionType_Name( + // auto_cpp_rewriter::AdActionType(ocpc_action_type)); + // auto_cpp_rewriter::AdCallbackLog::EventType_Parse(ocpc_action_type_name, + // &callback_event_) + if (expr_info_ptr->is_enum_proto_call()) { + rewriter_.ReplaceText(call_expr, std::string("::bs::") + rewriter_.getRewrittenText(call_expr)); + return; + } + + // 在 SeqListRule 中处理。 + if (expr_info_ptr->is_seq_list_reco_proto_type()) { + return; + } + + if (expr_info_ptr->is_from_seq_list()) { + return; + } + + if (!expr_info_ptr->need_replace()) { + return; + } + + // 确定了 common info 的数据类型 + if (CommonAttrInfo::is_common_info_method(expr_info_ptr->callee_name())) { + rewriter_.run_lazy_replace(); + } + + rewriter_.ReplaceText(call_expr, expr_info_ptr->get_bs_field_value()); +} + +void GeneralRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "expr_info_ptr is nullptr!"; + } + + if (expr_info_ptr->callee_name() == "name_value") { + return; + } + + if (expr_info_ptr->callee_name() == "is_train") { + rewriter_.ReplaceText(cxx_member_call_expr, "bslog.is_train()"); + } + + // 统一在 ActionDetailRule 中处理。 + if (expr_info_ptr->is_action_detail_leaf()) { + return; + } + + // 在 CommonInfoRule 中处理。 + if (expr_info_ptr->is_common_info_map_find_expr()) { + return; + } + + // 函数调用, 替换 action 参数 + // teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_action_cnt.h + // add_feature(item_played5s_action_base_infos, item_click_action_base_infos, + // before_product_map, after_product_map, before_industry_map, after_industry_map, + // process_time); + if (expr_info_ptr->is_cxx_member_call_expr() && + env_ptr->is_feature_other_method(expr_info_ptr->get_first_caller_name())) { + if (const auto feature_info = env_ptr->get_feature_info()) { + if (const MethodInfo* method_info_ptr = + feature_info->find_method_info(expr_info_ptr->get_first_caller_name())) { + for (size_t i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + clang::Expr* arg = cxx_member_call_expr->getArg(i); + std::string arg_str = stmt_to_string(arg); + + // add_feature action_detail param + const NewActionParam& new_param = method_info_ptr->find_new_action_param(i); + if (new_param.origin_name().size() > 0) { + ExprInfo* call_expr_param = expr_info_ptr->call_expr_param(i); + if (call_expr_param != nullptr) { + std::string prefix = call_expr_param->get_bs_enum_str(); + std::vector new_names = env_ptr->find_new_action_param_var_name(prefix, new_param); + if (new_names.size() > 0) { + rewriter_.ReplaceText(arg, absl::StrJoin(new_names, ",")); + } else { + LOG(INFO) << "cannot find new_names for action param, prefix: " << prefix + << ", cxx_member_call_expr: " << expr_info_ptr->origin_expr_str() + << ", i: " << i + << ", arg: " << stmt_to_string(arg); + } + } else { + LOG(INFO) << "something is wrong! call_expr_param is nullptr, i: " << i + << ", expr: " << expr_info_ptr->origin_expr_str(); + } + } + + // GetSeqList + if ( arg_str == "adlog" || arg_str == "ad_log") { + rewriter_.ReplaceText(arg, "bslog"); + } + } + } + } + } + + // GetNormQuery 逻辑固定, 直接字符串替换 + // teams/ad/ad_algorithm/feature/fast/impl/extract_combine_query_search_pos.h + // GetNormQuery(adlog, &query); + if (expr_info_ptr->is_get_norm_query()) { + rewriter_.ReplaceText(cxx_member_call_expr, replace_get_norm_query(cxx_member_call_expr)); + return; + } + + // reco_user_info 字段需要分两次替换。通过 config->rewrite_reco_user_info 字段区分。 + // + // 第一次只替换 adlog。如下所示 + // adlog.has_reco_user_info() => bslog.has_reco_user_info(); + // adlog.reco_user_info() => bslog.reco_user_info(); + // + // 第二次替换为 bs 格式。 + const std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (tool::is_str_from_reco_user_info(bs_enum_str) && env_ptr->is_in_loop_init()) { + if (auto &loop_info = env_ptr->mutable_loop_info()) { + loop_info->set_origin_size_var(expr_info_ptr->origin_expr_str()); + } else { + LOG(ERROR) << "cannot find loop_info, decl ref: " + << expr_info_ptr->to_string(); + } + } + + if (!expr_info_ptr->need_replace()) { + return; + } + + // 循环中 string loop var, 如 app_package.data(), app_package.size() + if (expr_info_ptr->is_caller_str_ref() && + !expr_info_ptr->is_from_repeated_common_info() && + !expr_info_ptr->is_from_query_token()) { + if (expr_info_ptr->is_from_adlog()) { + if (auto expr_parent = expr_info_ptr->parent()) { + std::string bs_enum_str = expr_parent->get_bs_enum_str(); + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + std::ostringstream oss; + if (env_ptr->is_loop_var(expr_parent->origin_expr_str())) { + oss << var->name() << ".Get(idx)." << expr_info_ptr->callee_with_params(rewriter_); + } else { + oss << var->name() << "." << expr_info_ptr->callee_with_params(rewriter_); + } + + LOG(INFO) << "cxx_member_call_expr: " << stmt_to_string(cxx_member_call_expr) + << ", replace: " << oss.str(); + rewriter_.ReplaceText(cxx_member_call_expr, oss.str()); + } else { + LOG(INFO) << "cannot find new_var_def, bs_enum_str: " << bs_enum_str + << ", expr: " << expr_info_ptr->origin_expr_str(); + } + } + } + return; + } + + // 确定了 common info 的数据类型 + if (CommonAttrInfo::is_common_info_method(expr_info_ptr->callee_name())) { + rewriter_.run_lazy_replace(); + } + + if (expr_info_ptr->is_from_action_detail_map() && + expr_info_ptr->contains_loop_var() && + expr_info_ptr->contains_template_parameter()) { + if (const auto& action_detail_fixed_info = env_ptr->get_action_detail_fixed_info()) { + if (absl::optional field_name = expr_info_ptr->get_action_detail_field_name()) { + rewriter_.ReplaceText(cxx_member_call_expr, + action_detail_fixed_info->get_bs_var_name(env_ptr, *field_name)); + } + } + return; + } + + if (expr_info_ptr->is_parent_str_ref() || expr_info_ptr->is_parent_str_type()) { + if (expr_info_ptr->parent() != nullptr) { + std::ostringstream oss; + std::string new_name = expr_info_ptr->parent()->get_bs_field_value(); + if (expr_info_ptr->callee_name() == "c_str") { + oss << new_name << ".data()"; + } else { + oss << new_name << "." << expr_info_ptr->callee_name() << "()"; + } + LOG(INFO) << "repalce, expr: " << expr_info_ptr->origin_expr_str() + << ", new_expr: " << oss.str(); + rewriter_.ReplaceText(cxx_member_call_expr, oss.str()); + } + + return; + } + + std::string new_name = expr_info_ptr->get_bs_field_value(); + + if (new_name.size() > 0) { + LOG(INFO) << "repalce, expr: " << expr_info_ptr->origin_expr_str() + << ", new_expr: " << new_name; + rewriter_.ReplaceText(cxx_member_call_expr, new_name); + } + + if (const auto& decl_info = env_ptr->cur_decl_info()) { + if (new_name == decl_info->name()) { + rewriter_.ReplaceText(decl_info->decl_stmt(), rm_decl_type(decl_info->decl_stmt())); + } + } +} + +void GeneralRule::process(clang::MemberExpr* member_expr, Env* env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(member_expr, env_ptr); + if (!expr_info_ptr->is_from_adlog()) { + return; + } + + rewriter_.ReplaceText(member_expr, expr_info_ptr->get_bs_field_value()); +} + +void GeneralRule::process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(decl_ref_expr, env_ptr); + + if (const auto& decl_info = env_ptr->cur_decl_info()) { + if (expr_info_ptr->origin_expr_str() == decl_info->name()) { + return; + } + } + + // 赋值操作左边的值不替换。 + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (binary_op_info->is_assign_op()) { + if (expr_info_ptr->origin_expr_str() == binary_op_info->left_expr_str()) { + return; + } + } + } + + if (expr_info_ptr->is_ad_enum()) { + rewriter_.ReplaceText(decl_ref_expr, expr_info_ptr->get_bs_field_value()); + return; + } + + if (expr_info_ptr->is_from_adlog()) { + if (expr_info_ptr->is_basic()) { + const std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (tool::is_str_from_reco_user_info(bs_enum_str) && env_ptr->is_in_loop_init()) { + if (auto& loop_info = env_ptr->mutable_loop_info()) { + loop_info->set_origin_size_var(expr_info_ptr->origin_expr_str()); + } else { + LOG(ERROR) << "cannot find loop_info, decl ref: " << expr_info_ptr->to_string(); + } + } else { + // common info loop var 在 common info 中处理。 + std::string new_str = expr_info_ptr->get_bs_field_value(); + if (new_str.size() > 0) { + LOG(INFO) << "replace decl_ref_expr: " << stmt_to_string(decl_ref_expr) << ", text: " << new_str; + rewriter_.ReplaceText(decl_ref_expr, new_str); + } + } + } + } +} + +void GeneralRule::process(clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_null_ptr_literal_expr, env_ptr); +} + +void GeneralRule::process(clang::UnaryOperator* unary_operator, Env* env_ptr) { + auto expr_info_ptr = parse_expr(unary_operator, env_ptr); +} + +void GeneralRule::process(clang::BinaryOperator* binary_operator, Env* env_ptr) { + if (const auto& binary_op_info = env_ptr->cur_binary_op_info()) { + if (binary_op_info->is_assign_op()) { + if (binary_op_info->left_expr_type() == ExprType::TEMPLATE_INT_REF && + binary_op_info->right_expr_type() == ExprType::INT) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->add_binary_op_stmt(binary_operator); + } + } + } else if (binary_op_info->is_and_op()) { + if (GlobalConfig::Instance()->rewrite_reco_user_info) { + if (binary_op_info->left_expr_str() == "reco_user_info") { + rewriter_.ReplaceText(binary_operator, rewriter_.getRewrittenText(binary_operator->getRHS())); + } else if (binary_op_info->right_expr_str() == "reco_user_info") { + rewriter_.ReplaceText(binary_operator, rewriter_.getRewrittenText(binary_operator->getLHS())); + } + } + } + } +} + +void GeneralRule::process(clang::IntegerLiteral* integer_literal, Env* env_ptr) { + auto expr_info_ptr = parse_expr(integer_literal, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } +} + +void GeneralRule::process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) { + LOG(INFO) << "cxx_operator_call_expr: " << stmt_to_string(cxx_operator_call_expr); + auto expr_info_ptr = parse_expr(cxx_operator_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } +} + +void GeneralRule::process(clang::ReturnStmt *return_stmt, Env *env_ptr) { + clang::Expr* ret_value = return_stmt->getRetValue(); + auto expr_info_ptr = parse_expr(ret_value, env_ptr); + if (expr_info_ptr != nullptr) { + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo& method_info = feature_info->touch_method_info(env_ptr->get_method_name()); + method_info.set_is_return_adlog_user_field(expr_info_ptr->is_adlog_user_field()); + } + } +} + +void GeneralRule::process(clang::CXXFunctionalCastExpr* cxx_functional_cast_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_functional_cast_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->is_enum_proto_call()) { + rewriter_.ReplaceText(cxx_functional_cast_expr, + std::string("::bs::") + rewriter_.getRewrittenText(cxx_functional_cast_expr)); + return; + } +} + +void GeneralRule::process(clang::GNUNullExpr* gnu_null_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(gnu_null_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + rewriter_.ReplaceText(gnu_null_expr, "nullptr"); +} + +void GeneralRule::process_loop_range(clang::ForStmt* for_stmt, Env* env_ptr, const std::string& body_text) { +} + +void GeneralRule::process_loop_range(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr, + const std::string& body_text) { + if (env_ptr->new_defs().size() > 0) { + std::ostringstream oss; + std::string new_var_name = env_ptr->new_defs().begin()->first; + oss << " for (size_t " << env_ptr->get_last_loop_var() << " = 0; " + << env_ptr->get_last_loop_var() << " < " + << new_var_name << ".size(); " + << env_ptr->get_last_loop_var() << "++) {\n " + << body_text << ";\n" + << "}\n"; + + rewriter_.ReplaceText(cxx_for_range_stmt, oss.str()); + } +} + +// std::string product_name = item.ad_dsp_info().advertiser().base().product_name(); +// product_name.find("xxx"); +void GeneralRule::replace_decl_ref_var(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr == nullptr || env_ptr == nullptr) { + return; + } + + if (expr_info_ptr->is_decl_ref_expr()) { + if (expr_info_ptr->is_from_adlog()) { + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str(); + if (const absl::optional& var = env_ptr->find_new_def(bs_enum_str)) { + if (env_ptr->is_loop_var(expr_info_ptr->origin_expr_str())) { + rewriter_.ReplaceText(expr_info_ptr->origin_expr(), var->name() + ".Get(idx)"); + } else { + rewriter_.ReplaceText(expr_info_ptr->origin_expr(), var->name()); + } + } else { + LOG(INFO) << "cannot find new_var_def, bs_enum_str: " << bs_enum_str; + } + } + + return; + } + + if (expr_info_ptr->parent() != nullptr) { + replace_decl_ref_var(expr_info_ptr->parent().get(), env_ptr); + } +} + +std::string GeneralRule::replace_get_norm_query(clang::CXXMemberCallExpr* cxx_member_call_expr) { + std::string s = stmt_to_string(cxx_member_call_expr); + + static std::regex p("this\\->GetNormQuery\\(adlog, \\&(\\w+)\\)"); + return std::regex_replace(s, p, "bs_util.BSGetNormQuery(bs, pos, &$1)"); +} + +std::string GeneralRule::rm_decl_type(clang::DeclStmt* decl_stmt) { + std::string decl_stmt_str = rewriter_.getRewrittenText(decl_stmt); + static std::regex p("([^ ]*) +([^ ]*) ?="); + return std::regex_replace(decl_stmt_str, p, "$2 ="); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/GeneralRule.h b/convert/rule/GeneralRule.h new file mode 100644 index 0000000..3ec5028 --- /dev/null +++ b/convert/rule/GeneralRule.h @@ -0,0 +1,249 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Env.h" +#include "../Tool.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class GeneralRule: public RuleBase { + public: + using RuleBase::process; + explicit GeneralRule(clang::Rewriter& rewriter): RuleBase(rewriter, "GeneralRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, Env* env_ptr) override; + + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + + void process(clang::DeclStmt* decl_stmt, Env* env_ptr) override; + + void process(clang::CallExpr* call_expr, Env* env_ptr) override; + + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + + void process(clang::MemberExpr* member_expr, Env* env_ptr) override; + + void process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) override; + + void process(clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr, Env* env_ptr) override; + + void process(clang::UnaryOperator* unary_operator, Env* env_ptr) override; + + void process(clang::BinaryOperator* binary_operator, Env* env_ptr) override; + + void process(clang::IntegerLiteral* integer_literal, Env* env_ptr) override; + + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) override; + + void process(clang::ReturnStmt* return_stmt, Env* env_ptr) override; + + void process(clang::CXXFunctionalCastExpr* cxx_functional_cast_expr, Env* env_ptr) override; + + void process(clang::GNUNullExpr* gnu_null_expr, Env* env_ptr) override; + + template + void process_loop(T* loop_stmt, + Env* env_ptr); + + void process_loop_range(clang::ForStmt* for_stmt, + Env* env_ptr, + const std::string& body_text); + + void process_loop_range(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr, + const std::string& body_text); + + void replace_decl_ref_var(ExprInfo* expr_info_ptr, Env* env_ptr); + std::string replace_get_norm_query(clang::CXXMemberCallExpr* cxx_member_call_expr); + std::string rm_decl_type(clang::DeclStmt* decl_stmt); +}; + +template +void GeneralRule::process_loop(T* loop_stmt, + Env* env_ptr) { + auto& loop_info = env_ptr->mutable_loop_info(); + if (!loop_info) { + LOG(INFO) << "cannot get loop_info from env! loop_stmt: " << stmt_to_string(loop_stmt); + return; + } + + clang::Stmt* body = loop_stmt->getBody(); + + if (env_ptr->is_common_info_loop()) { + if (env_ptr->get_method_name() == "Extract" || env_ptr->feature_name() == "ItemFilter") { + if (const auto &loop_info = env_ptr->cur_loop_info()) { + // cxx_for_range_stmt + if (!loop_info->is_for_stmt() || loop_info->is_common_info_map()) { + rewriter_.ReplaceText(loop_stmt, get_loop_body(body, false)); + return; + } + } + + return; + } else { + return; + } + } + + // reco user info + if (loop_info->is_reco_user_info_loop()) { + std::string s = rewriter_.getRewrittenText(loop_stmt); + const auto &leaf_fields = loop_info->leaf_fields(); + LOG(INFO) << "reco_user_info_loop, leaf_field: " << absl::StrJoin(leaf_fields, ", "); + if (leaf_fields.size() > 0) { + std::string origin_size_var = loop_info->origin_size_var(); + if (origin_size_var.size() > 0) { + std::regex p(origin_size_var); + s = std::regex_replace(s, p, leaf_fields[0] + ".size()"); + + rewriter_.ReplaceText(loop_stmt, s); + } else { + LOG(ERROR) << "origin_size_var is empty! loop_stmt: " + << stmt_to_string(loop_stmt); + return; + } + } else { + LOG(ERROR) << "leaf_fields is empty! loop_stmt: " + << stmt_to_string(loop_stmt); + } + + return; + } + + // 普通 proto map + if (loop_info->is_general_proto_map_loop()) { + const std::string& prefix_adlog = loop_info->prefix_adlog(); + if (prefix_adlog.size() > 0) { + std::string prefix = tool::adlog_to_bs_enum_str(prefix_adlog); + if (const auto& var_def = env_ptr->find_new_def(prefix)) { + if (var_def->new_var_type() == NewVarType::MAP) { + std::ostringstream oss_res; + oss_res << "for (size_t idx = 0; idx < " << var_def->name() << ".size(); idx++) {\n " + << get_compound_stmt_rewritten_text(body) + << "\n}\n "; + + std::string bs_text = oss_res.str(); + if (bs_text.size() > 0) { + rewriter_.ReplaceText(loop_stmt, bs_text); + } + } + } else { + LOG(INFO) << "cannot find map def in env, bs_enum_str: " << prefix; + } + } + } + + // common info 循环,if 条件需要在知道 common info 变量类型后才知道, 因此只能在 loop 替换时候处理 + if (const auto& common_info_normal = env_ptr->cur_common_info_normal()) { + env_ptr->update_template_common_info_values(); + LOG(INFO) << "update template common info values, detail size: " + << common_info_normal->common_info_details_size(); + std::ostringstream oss_res; + const std::vector>& common_info_details = + common_info_normal->common_info_details(); + + if (common_info_normal->is_check_equal()) { + for (size_t i = 0; i < common_info_details.size(); i++) { + LOG(INFO) << "i: " << i; + oss_res << common_info_normal->get_bs_rewritten(&rewriter_, i) + << "\n\n"; + } + } else { + oss_res << common_info_normal->get_bs_wrap_text(get_loop_body_without_if(body)); + } + + std::string bs_text = oss_res.str(); + if (bs_text.size() > 0) { + rewriter_.ReplaceText(loop_stmt, bs_text); + } + + // 可能有多个 common_info_normal, 每一个处理完必须清除 env 中的信息。 + if (auto& mutable_common_info_normal = env_ptr->cur_mutable_common_info_normal()) { + mutable_common_info_normal.reset(); + } + } + + // comon info enum 通过模板参数传进来 + if (const auto& common_info_fixed_list = env_ptr->cur_common_info_fixed_list()) { + std::ostringstream oss_res; + const std::vector> &common_info_details = + common_info_fixed_list->common_info_details(); + for (size_t i = 0; i < common_info_details.size(); i++) { + oss_res << common_info_fixed_list->get_bs_rewritten(&rewriter_, i) + << "\n\n"; + } + + std::string bs_text = oss_res.str(); + if (bs_text.size() > 0) { + rewriter_.ReplaceText(loop_stmt, bs_text); + } + + if (auto& mutable_common_info_fixed_list = env_ptr->cur_mutable_common_info_fixed_list()) { + mutable_common_info_fixed_list.reset(); + } + } + + if (const auto& common_info_multi_map = env_ptr->cur_common_info_multi_map()) { + std::ostringstream oss_range; + std::string iter_str = loop_info->loop_iter(); + std::string repeated_field = "BSRepeatedField"; + if (env_ptr->is_combine_feature() && !tool::is_item_field(common_info_multi_map->prefix())) { + repeated_field = "BSRepeatedField"; + } + + // get body_text + std::ostringstream oss_body; + for (auto it_body = body->child_begin(); it_body != body->child_end(); it_body++) { + if (clang::BreakStmt* break_stmt = dyn_cast(*it_body)) { + continue; + } + + std::string new_text = rewriter_.getRewrittenText(*it_body); + oss_body << fix_semicolon(new_text) << "\n"; + } + + oss_range << "for (auto " << iter_str << " = " << common_info_multi_map->map_name() << ".begin(); " + << iter_str << " != " << common_info_multi_map->map_name() << ".end(); " + << iter_str << "++) {\n" + << repeated_field << " " << common_info_multi_map->attr_name() + << "(*bs, iter->first, pos);\n" + << oss_body.str() + << " }\n"; + rewriter_.ReplaceText(loop_stmt, oss_range.str()); + } + + // seq list 不替换 + if (const auto &loop_info = env_ptr->get_loop_info()) { + if (!loop_info->is_for_stmt() && loop_info->is_seq_list_loop()) { + rewriter_.ReplaceText(loop_stmt, loop_info->origin_stmt_str()); + } + } + + std::ostringstream oss_new_defs; + oss_new_defs << env_ptr->get_all_new_defs() << "\n"; + + rewriter_.InsertTextBefore(loop_stmt, oss_new_defs.str()); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/HashFnRule.cpp b/convert/rule/HashFnRule.cpp new file mode 100644 index 0000000..d148b41 --- /dev/null +++ b/convert/rule/HashFnRule.cpp @@ -0,0 +1,65 @@ +#include "../Env.h" +#include "../Tool.h" +#include "../ExprParser.h" +#include "../info/NewVarDef.h" +#include "HashFnRule.h" +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void HashFnRule::process(clang::DeclStmt *decl_stmt, Env *env_ptr) { + std::string stmt_str = stmt_to_string(decl_stmt); + if (starts_with(stmt_str, "std::hash")) { + rewriter_.RemoveText(decl_stmt); + } +} + +void HashFnRule::process(clang::CXXOperatorCallExpr *cxx_operator_call_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(cxx_operator_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // 字符串 hash + // hash_fn(product_name) + if (expr_info_ptr->is_cxx_operator_call_expr()) { + if (expr_info_ptr->callee_name() == "operator()") { + if (expr_info_ptr->call_expr_params_size() == 2) { + auto param0 = expr_info_ptr->call_expr_param(0); + auto param1 = expr_info_ptr->call_expr_param(1); + if (param0 != nullptr && param1 != nullptr) { + LOG(INFO) << "cxx_operator_call_expr: " << stmt_to_string(cxx_operator_call_expr) + << ", param0: " << param0->origin_expr_str() + << ", param0 bs_text: " << param0->get_bs_field_value() + << ", param1: " << param1->origin_expr_str() + << ", param1 bs_text: " << param1->get_bs_field_value() + << ", param1 type_str: " << param1->expr()->getType().getAsString() + << ", param1 is_string: " << param1->is_string(); + if (param0->origin_expr_str() == "hash_fn" && param1->is_string()) { + std::ostringstream oss; + // std::string param1_text = param1->get_bs_field_value(); + std::string param1_text; + if (param1->is_decl_ref_expr()) { + param1_text = param1->origin_expr_str(); + } else { + param1_text = rewriter_.getRewrittenText(param1->expr()); + } + + oss << "ad_nn::bs::Hash(" << param1_text << ")"; + rewriter_.ReplaceText(cxx_operator_call_expr, oss.str()); + + if (auto feature_info = env_ptr->mutable_feature_info()) { + feature_info->set_has_hash_fn_str(true); + } + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/HashFnRule.h b/convert/rule/HashFnRule.h new file mode 100644 index 0000000..87c665b --- /dev/null +++ b/convert/rule/HashFnRule.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// hash_fn 相关逻辑。 +/// std::hash 统一替换为 ad_nn::bs::Hash +class HashFnRule: public RuleBase { + public: + using RuleBase::process; + explicit HashFnRule(clang::Rewriter& rewriter): RuleBase(rewriter, "HashFnRule") {} // NOLINT + + void process(clang::DeclStmt* decl_stmt, Env* env_ptr) override; + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/MiddleNodeRule.cpp b/convert/rule/MiddleNodeRule.cpp new file mode 100644 index 0000000..c0addd5 --- /dev/null +++ b/convert/rule/MiddleNodeRule.cpp @@ -0,0 +1,112 @@ +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "MiddleNodeRule.h" +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void MiddleNodeRule::process(clang::IfStmt* if_stmt, Env* env_ptr) { + if (const auto& if_info = env_ptr->cur_if_info()) { + if (auto& middle_node_info = env_ptr->mutable_middle_node_info()) { + // if (if_info->is_check_middle_node_root_cond()) { + // std::ostringstream oss; + // if (if_info->is_check_equal()) { + // oss << "!"; + // } + // oss << "BSHas" << middle_node_info->name() << "(bs, pos)"; + + // rewriter_.ReplaceText(if_stmt->getCond(), oss.str()); + // } + } + } +} + +void MiddleNodeRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { +} + +void MiddleNodeRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + const auto& loop_info = env_ptr->cur_loop_info(); + if (!loop_info) { + return; + } + + if (!loop_info->is_for_stmt() && loop_info->is_middle_node_proto_list_loop()) { + if (const auto& middle_node_info = env_ptr->get_middle_node_info()) { + std::string bs_enum_str = loop_info->loop_var_expr_bs_enum_str(); + std::string body = get_loop_body(cxx_for_range_stmt->getBody()); + std::string new_str = middle_node_info->get_list_loop_bs_wrapped_text(env_ptr, body, bs_enum_str); + + rewriter_.ReplaceText(cxx_for_range_stmt, new_str); + } + } +} + +void MiddleNodeRule::process(clang::BinaryOperator* binary_operator, Env* env_ptr) { + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_photo_enhance_age.h + // 使用 photo_info 判断是否为空。 + // auto photo_info = GetPhotoInfo(adlog.item(pos)); + // if (photo_info && photo_info->has_photo_attribute()) { + // ... + // } + // + // 可能会导致来自 MiddleNode 的 common info name_value() 被访问多次,从而导致 common info value 被添加多次。 + // 需要去重。 + std::string op = binary_operator->getOpcodeStr().str(); + if (const auto &middle_node_info = env_ptr->get_middle_node_info()) { + if (const auto &if_info = env_ptr->cur_if_info()) { + if (if_info->if_stage() == IfStage::COND) { + auto left_expr_info_ptr = parse_expr(binary_operator->getLHS(), env_ptr); + auto right_expr_info_ptr = parse_expr(binary_operator->getRHS(), env_ptr); + + std::ostringstream oss; + oss << "BSHas" << middle_node_info->name() << "(bs, pos)"; + + if (op == "&&" || op == "||") { + if (left_expr_info_ptr != nullptr) { + if (left_expr_info_ptr->is_middle_node_root() && + left_expr_info_ptr->is_decl_ref_expr()) { + LOG(INFO) << "replace left_expr: " << left_expr_info_ptr->origin_expr_str() + << ", text: " << oss.str(); + rewriter_.ReplaceText(left_expr_info_ptr->origin_expr(), oss.str()); + } + } + + if (right_expr_info_ptr != nullptr) { + if (right_expr_info_ptr->is_middle_node_root() && + right_expr_info_ptr->is_decl_ref_expr()) { + LOG(INFO) << "replace right_expr: " << right_expr_info_ptr->origin_expr_str() + << ", text: " << oss.str(); + rewriter_.ReplaceText(right_expr_info_ptr->origin_expr(), oss.str()); + } + } + } + + if (op == "==" || op == "!=") { + if (left_expr_info_ptr != nullptr && + left_expr_info_ptr->is_middle_node_root() && + left_expr_info_ptr->is_decl_ref_expr() && + right_expr_info_ptr != nullptr && + right_expr_info_ptr->is_nullptr()) { + if (op == "==") { + LOG(INFO) << "replace binary_operator: " << stmt_to_string(binary_operator) + << ", text: !" << oss.str(); + rewriter_.ReplaceText(binary_operator, std::string("!") + oss.str()); + } else if (op == "!=") { + LOG(INFO) << "replace binary_operator: " << stmt_to_string(binary_operator) + << ", text: " << oss.str(); + rewriter_.ReplaceText(binary_operator, oss.str()); + } + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/MiddleNodeRule.h b/convert/rule/MiddleNodeRule.h new file mode 100644 index 0000000..096a9af --- /dev/null +++ b/convert/rule/MiddleNodeRule.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "RuleBase.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class MiddleNodeRule: public RuleBase { + public: + using RuleBase::process; + explicit MiddleNodeRule(clang::Rewriter& rewriter): RuleBase(rewriter, "MiddleNodeRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, Env* env_ptr) override; + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + void process(clang::BinaryOperator* binary_operator, Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/PreRule.cpp b/convert/rule/PreRule.cpp new file mode 100644 index 0000000..718af28 --- /dev/null +++ b/convert/rule/PreRule.cpp @@ -0,0 +1,24 @@ +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "PreRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void PreRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { + process_deleted_var(env_ptr); +} + +void PreRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + process_deleted_var(env_ptr); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/PreRule.h b/convert/rule/PreRule.h new file mode 100644 index 0000000..771ce82 --- /dev/null +++ b/convert/rule/PreRule.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 在其他 Rule 之前执行,如 process_deleted_var +class PreRule: public RuleBase { + public: + using RuleBase::process; + explicit PreRule(clang::Rewriter& rewriter): RuleBase(rewriter, "PreRule") {} // NOLINT + + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/ProtoListRule.cpp b/convert/rule/ProtoListRule.cpp new file mode 100644 index 0000000..cc96f1b --- /dev/null +++ b/convert/rule/ProtoListRule.cpp @@ -0,0 +1,67 @@ +#include "../Env.h" +#include "../Tool.h" +#include "../ExprParser.h" +#include "../info/NewVarDef.h" +#include "ProtoListRule.h" +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void ProtoListRule::process(clang::ForStmt* for_stmt, Env* env_ptr) { +} + +void ProtoListRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) { + if (const auto& loop_info = env_ptr->cur_loop_info()) { + if (loop_info->is_double_list_loop()) { + return; + } + + if (const auto& proto_list_info = env_ptr->cur_proto_list_info()) { + std::string bs_enum_str = proto_list_info->prefix(); + + const auto& fields = proto_list_info->fields(); + if (fields.size() > 0) { + std::string adlog_str = proto_list_info->prefix_adlog() + "." + fields[0]; + bs_enum_str = tool::adlog_to_bs_enum_str(adlog_str); + } + + if (const auto& var = env_ptr->find_new_def(bs_enum_str)) { + std::ostringstream oss; + + oss << "for (size_t idx = 0; idx < " << var->name() << ".size(); idx++) {\n " + << " auto " << loop_info->loop_var() << " = " << var->name() << ".Get(idx);\n " + << get_compound_stmt_rewritten_text(cxx_for_range_stmt->getBody()) + << "\n}\n "; + + rewriter_.ReplaceText(cxx_for_range_stmt, oss.str()); + } else { + LOG(INFO) << "cannot find var def in env, bs_enum_str: " << bs_enum_str; + } + } + } +} + +void ProtoListRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // 比较特殊。for 循环遍历的时候还不知道具体类型,因此没办法添加定义,只能根据 bs_enum_str 获取 + // 变量名,然后替换为 var_name.size(), 在 for 循环内部才会添加定义。 + if (expr_info_ptr->is_general_proto_list_size_method()) { + if (!env_ptr->is_in_for_range_init()) { + std::string bs_enum_str = expr_info_ptr->get_bs_enum_str_trim_size(); + std::string var_name = env_ptr->find_valid_new_name(bs_enum_str); + if (var_name.size() > 0) { + rewriter_.ReplaceText(cxx_member_call_expr, var_name + ".size()"); + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/ProtoListRule.h b/convert/rule/ProtoListRule.h new file mode 100644 index 0000000..d09e4cd --- /dev/null +++ b/convert/rule/ProtoListRule.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// for 循环遍历的是中间的 proto list。 +/// 详细逻辑见: docs/proto_list.md +class ProtoListRule: public RuleBase { + public: + using RuleBase::process; + explicit ProtoListRule(clang::Rewriter& rewriter): RuleBase(rewriter, "ProtoListRule") {} // NOLINT + + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/QueryTokenRule.cpp b/convert/rule/QueryTokenRule.cpp new file mode 100644 index 0000000..6b11418 --- /dev/null +++ b/convert/rule/QueryTokenRule.cpp @@ -0,0 +1,124 @@ +#include + +#include "../Env.h" +#include "../ExprParser.h" +#include "../Tool.h" +#include "../info/NewVarDef.h" +#include "../handler/StrictRewriter.h" +#include "QueryTokenRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void QueryTokenRule::process(clang::IfStmt* if_stmt, Env* env_ptr) { +} + +void QueryTokenRule::process(clang::BinaryOperator* binary_operator, Env* env_ptr) { + if (binary_operator == nullptr) { + return; + } + + std::string op = binary_operator->getOpcodeStr().str(); + clang::Expr *left_expr = binary_operator->getLHS(); + clang::Expr *right_expr = binary_operator->getRHS(); + + if (binary_operator->isComparisonOp()) { + auto left_info_ptr = parse_expr(left_expr, env_ptr); + // 示例: + // teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_ad_campaign_id.cc + // auto query_token = GetQueryToken(adlog); + // if (query_token == nullptr || query_token->empty()) { + // return; + // } + if (left_info_ptr != nullptr && + left_info_ptr->is_decl_ref_expr() && + left_info_ptr->origin_expr() != nullptr && + left_info_ptr->is_proto_map_string_float_ref() && + !left_info_ptr->is_reco_proto_type()) { + if (stmt_to_string(right_expr) == "nullptr") { + if (op == "==") { + rewriter_.ReplaceText(binary_operator, left_info_ptr->origin_expr_str() + ".is_empty()"); + } else if (op == "!=") { + std::string s = std::string("!") + left_info_ptr->origin_expr_str() + ".is_empty()"; + rewriter_.ReplaceText(binary_operator, s); + } + } + } + } + + if (op == "||") { + std::string left_str = rewriter_.getRewrittenText(left_expr); + std::string right_str = rewriter_.getRewrittenText(right_expr); + if (left_str == right_str) { + rewriter_.ReplaceText(binary_operator, left_str); + } + } +} + +void QueryTokenRule::process(clang::MemberExpr* member_expr, Env* env_ptr) { +} + +void QueryTokenRule::process(clang::CXXMemberCallExpr *cxx_member_call_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (!expr_info_ptr->is_from_query_token() || !expr_info_ptr->is_from_photo_text()) { + return; + } +} + +void QueryTokenRule::process(clang::ForStmt *for_stmt, Env *env_ptr) { + if (const auto &loop_info = env_ptr->cur_loop_info()) { + if (loop_info->is_query_token_loop()) { + std::string name = loop_info->loop_var_expr_str(); + std::ostringstream oss; + oss << "for (size_t idx = 0; idx < " << name << ".size(); idx++) {\n " + << "auto query_key = " << name << ".GetKey(idx);\n" + << get_compound_stmt_rewritten_text(for_stmt->getBody()) + << "\n}\n"; + + rewriter_.ReplaceText(for_stmt, oss.str()); + } + } +} + +void QueryTokenRule::process(clang::CXXForRangeStmt *cxx_for_range_stmt, Env *env_ptr) { +} + +void QueryTokenRule::process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) { + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_asr_2.h + // if (photo_asr_token->find(query_iter->first) == photo_asr_token->end()) { + // /// + // } + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + if (op == "operator==" || op == "operator!=") { + if (cxx_operator_call_expr->getNumArgs() == 2) { + auto left_info_ptr = parse_expr(cxx_operator_call_expr->getArg(0), env_ptr); + if (left_info_ptr != nullptr && + left_info_ptr->is_photo_text_find_expr() && + left_info_ptr->parent() != nullptr) { + if (left_info_ptr->call_expr_params_size() == 1) { + if (auto param = left_info_ptr->call_expr_param(0)) { + std::string param_str = rewriter_.getRewrittenText(param->expr()); + std::string photo_text_map = left_info_ptr->parent()->origin_expr_str(); + if (photo_text_map.size() > 0 && param_str.size() > 0) { + std::string bs_text = photo_text_map + ".Get(query_key).second"; + if (op == "operator==") { + bs_text = std::string("!") + bs_text; + } + + rewriter_.ReplaceText(cxx_operator_call_expr, bs_text); + } + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/QueryTokenRule.h b/convert/rule/QueryTokenRule.h new file mode 100644 index 0000000..eadacfa --- /dev/null +++ b/convert/rule/QueryTokenRule.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// GetQueryToken 相关逻辑。 +/// 详细逻辑见: docs/query_token.md +class QueryTokenRule: public RuleBase { + public: + using RuleBase::process; + explicit QueryTokenRule(clang::Rewriter& rewriter): RuleBase(rewriter, "QueryTokenRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, Env* env_ptr) override; + void process(clang::BinaryOperator* binary_operator, Env* env_ptr) override; + void process(clang::MemberExpr* member_expr, Env* env_ptr) override; + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + void process(clang::ForStmt* for_stmt, Env* env_ptr) override; + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) override; + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) override; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/README.md b/convert/rule/README.md new file mode 100644 index 0000000..af8aac4 --- /dev/null +++ b/convert/rule/README.md @@ -0,0 +1,9 @@ +# 转换规则 + +每个规则需要处理的逻辑,包括以下部分: + +1. 更新 Env。 +2. 替换表达式。 +3. 其他一些工具函数。 + +各个规则间应该保证互斥, 不会 rewrite 同一个表达式。 diff --git a/convert/rule/RuleBase.cpp b/convert/rule/RuleBase.cpp new file mode 100644 index 0000000..7ce7e60 --- /dev/null +++ b/convert/rule/RuleBase.cpp @@ -0,0 +1,76 @@ +#include + +#include "../Tool.h" +#include "RuleBase.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +std::string RuleBase::get_loop_body(clang::Stmt* body, bool skip_if) { + if (body == nullptr) { + return ""; + } + + std::ostringstream oss_text; + for (auto it_body = body->child_begin(); it_body != body->child_end(); it_body++) { + if (skip_if) { + if (clang::IfStmt* if_stmt = dyn_cast(*it_body)) { + continue; + } + } + std::string new_text = rewriter_.getRewrittenText(*it_body); + oss_text << fix_semicolon(new_text) << "\n"; + } + + return oss_text.str(); +} + +std::string RuleBase::get_loop_body_without_if(clang::Stmt* body) { + return get_loop_body(body, true); +} + +std::string RuleBase::get_complete_rewritten_text(clang::Stmt* body, Env* env_ptr) { + std::ostringstream oss; + + process_deleted_var(env_ptr); + + oss << env_ptr->get_all_new_defs() << "\n"; + oss << rewriter_.getRewrittenText(find_source_range(body)); + + return oss.str(); +} + +std::string RuleBase::get_compound_stmt_rewritten_text(clang::Stmt* stmt) { + if (stmt == nullptr) { + return ""; + } + + std::ostringstream oss; + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + oss << rewriter_.getRewrittenText(compound_stmt); + } + + return tool::rm_surround_big_parantheses(oss.str()); +} + +void RuleBase::process_deleted_var(Env* env_ptr) { + // delete vars + const std::set& deleted_vars = env_ptr->deleted_vars(); + for (const std::string& name : deleted_vars) { + if (starts_with(name, "__") || starts_with(name, "* __")) { + continue; + } + + clang::Stmt* decl_stmt = env_ptr->get_decl_stmt(name); + if (decl_stmt != nullptr) { + LOG(INFO) << "delete var, name: " << name + << ", stmt: " << stmt_to_string(decl_stmt); + rewriter_.RemoveText(decl_stmt); + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/RuleBase.h b/convert/rule/RuleBase.h new file mode 100644 index 0000000..be51f7e --- /dev/null +++ b/convert/rule/RuleBase.h @@ -0,0 +1,114 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../Env.h" +#include "../handler/StrictRewriter.h" +#include "clang/AST/AST.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Rewrite/Core/Rewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class RuleBase { + public: + explicit RuleBase(clang::Rewriter& rewriter, const std::string& rule_name): // NOLINT + rewriter_(rewriter, rule_name) {} + + const std::string& name() const { return name_; } + + virtual void process(clang::Stmt* stmt, Env* env_ptr) {} + virtual void process(clang::IfStmt* stmt, Env* env_ptr) {} + virtual void process(clang::Expr* expr, Env* env_ptr) {} + virtual void process(clang::DeclStmt* decl_stmt, Env* env_ptr) {} + virtual void process(clang::CallExpr* call_expr, Env* env_ptr) {} + virtual void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) {} + virtual void process(clang::MemberExpr* member_expr, Env* env_ptr) {} + virtual void process(clang::UnaryOperator* unary_operator, Env* env_ptr) {} + virtual void process(clang::BinaryOperator* binary_operator, Env* env_ptr) {} + virtual void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env* env_ptr) {} + virtual void process(clang::ForStmt* for_stmt, Env* env_ptr) {} + virtual void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env* env_ptr) {} + virtual void process(clang::DeclRefExpr* decl_ref_expr, Env* env_ptr) {} + virtual void process(clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr, Env* env_ptr) {} + virtual void process(clang::GNUNullExpr* gnu_null_expr, Env* env_ptr) {} + virtual void process(clang::CXXThisExpr* cxx_this_expr, Env* env_ptr) {} + virtual void process(clang::ReturnStmt* return_stmt, Env* env_ptr) {} + virtual void process(clang::BreakStmt* break_stmt, Env* env_ptr) {} + virtual void process(clang::ContinueStmt* continue_stmt, Env* env_ptr) {} + virtual void process(clang::IntegerLiteral* integer_literal, Env* env_ptr) {} + virtual void process(clang::SwitchStmt* switch_stmt, Env* env_ptr) {} + virtual void process(clang::CaseStmt* case_stmt, Env* env_ptr) {} + virtual void process(clang::ConstantExpr* constant_expr, Env* env_ptr) {} + virtual void process(clang::CXXDependentScopeMemberExpr* cxx_dependent_scope_member_expr, Env* env_ptr) {} + virtual void process(clang::ArraySubscriptExpr* array_subscript_expr, Env* env_ptr) {} + virtual void process(clang::CXXFunctionalCastExpr* cxx_functional_cast_expr, Env* env_ptr) {} + + virtual json process_to_json(clang::Stmt *stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::IfStmt *stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::Expr *expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::DeclStmt *decl_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CallExpr *call_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXMemberCallExpr *cxx_member_call_expr, + Env *env_ptr) { return json::array(); } + + virtual json process_to_json(clang::MemberExpr *member_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::UnaryOperator *unary_operator, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::BinaryOperator *binary_operator, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXOperatorCallExpr *cxx_operator_call_expr, + Env *env_ptr) { return json::array(); } + + virtual json process_to_json(clang::ForStmt *for_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXForRangeStmt *cxx_for_range_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::DeclRefExpr *decl_ref_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXNullPtrLiteralExpr *cxx_null_ptr_literal_expr, + Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::GNUNullExpr *gnu_null_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXThisExpr *cxx_this_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::ReturnStmt *return_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::BreakStmt *break_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::ContinueStmt *continue_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::IntegerLiteral *integer_literal, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::SwitchStmt *switch_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CaseStmt *case_stmt, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::ConstantExpr *constant_expr, Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXDependentScopeMemberExpr *cxx_dependent_scope_member_expr, + Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::ArraySubscriptExpr *array_subscript_expr, + Env *env_ptr) { return json::array(); } + virtual json process_to_json(clang::CXXFunctionalCastExpr *cxx_functional_cast_expr, + Env *env_ptr) { return json::array(); } + + std::string get_loop_body(clang::Stmt* body, bool skip_if = false); + std::string get_loop_body_without_if(clang::Stmt* body); + + // 删掉多余变量 + std::string get_complete_rewritten_text(clang::Stmt* body, Env* env_ptr); + std::string get_compound_stmt_rewritten_text(clang::Stmt* stmt); + + void process_deleted_var(Env* env_ptr); + + protected: + StrictRewriter rewriter_; + std::string name_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/SeqListRule.cpp b/convert/rule/SeqListRule.cpp new file mode 100644 index 0000000..9779c4c --- /dev/null +++ b/convert/rule/SeqListRule.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include + +#include "clang/AST/Decl.h" + +#include "../Env.h" +#include "../Tool.h" +#include "../ExprInfo.h" +#include "../ExprParser.h" +#include "../info/IfInfo.h" +#include "../handler/StrictRewriter.h" +#include "SeqListRule.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void SeqListRule::process(clang::IfStmt* if_stmt, + Env* env_ptr) { + if (env_ptr == nullptr) { + return; + } + + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->is_check_seq_list_cond()) { + if (const auto& seq_list_info = env_ptr->get_seq_list_info()) { + std::ostringstream oss; + if (seq_list_info->var_name().size() > 0) { + oss << "if (!" << seq_list_info->var_name() << ".is_empty()) {\n "; + } else { + oss << "if (!" << seq_list_info->root_name() << "->is_empty()) {\n "; + } + oss << get_compound_stmt_rewritten_text(if_stmt->getThen()); + oss << "\n}\n "; + + rewriter_.ReplaceText(if_stmt, oss.str()); + } + } + } +} + +void SeqListRule::process(clang::DeclStmt *decl_stmt, Env *env_ptr) { + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + clang::QualType qual_type = var_decl->getType(); + if (tool::is_repeated_proto_ptr(qual_type) && !tool::is_reco_proto(qual_type)) { + if (absl::optional inner_type = tool::get_repeated_proto_inner_type(qual_type)) { + // 简单处理,目前的几个类都是 user + bool is_combine_user = env_ptr->is_combine_feature(); + std::string bs_repeated_type = tool::get_bs_repeated_field_type(*inner_type, is_combine_user); + std::ostringstream oss; + oss << bs_repeated_type + << var_decl->getNameAsString() + << ";\n"; + rewriter_.ReplaceText(decl_stmt, oss.str()); + } else { + LOG(INFO) << "cannot find inner_type for repeated proto, decl_stmt: " << stmt_to_string(decl_stmt); + } + } + } +} + +void SeqListRule::process(clang::UnaryOperator* unary_operator, Env *env_ptr) { + auto expr_info_ptr = parse_expr(unary_operator, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // const auto& seq_list = *seq_list_ptr; + if (expr_info_ptr->is_seq_list_root_deref()) { + if (expr_info_ptr->call_expr_params_size() == 1) { + ExprInfo* param = expr_info_ptr->call_expr_param(0); + if (param != nullptr) { + if (param->is_seq_list_root_ref()) { + rewriter_.ReplaceText(unary_operator, param->origin_expr_str() + ";\n"); + } + } + } + } +} + +void SeqListRule::process(clang::BinaryOperator* binary_operator, Env *env_ptr) { + if (env_ptr == nullptr) { + return; + } + + if (binary_operator->isAssignmentOp()) { + // 替换取地址为最后的 BSRepeatedField 对应的变量。 + // ad/ad_algorithm/feature/fast/impl/extract_mmu_top28_seq_sparse.h + // const auto &common_infos = adlog.user_info().common_info_attr(); + // for (const auto &userAttr : common_infos) { + // if (userAttr.name_value() == to_use_common_info_name_value) { + // seq_list = &userAttr.int_list_value(); + // break; + // } + // } + clang::Expr* init_expr = binary_operator->getRHS(); + auto expr_info_ptr = parse_expr(init_expr, env_ptr); + if (expr_info_ptr->is_address_expr() && + expr_info_ptr->call_expr_params_size() == 1) { + ExprInfo *param_info_ptr = expr_info_ptr->call_expr_param(0); + if (param_info_ptr != nullptr && + param_info_ptr->is_common_info_list_method()) { + if (const auto &common_info_fixed_list = env_ptr->get_common_info_fixed_list()) { + if (const auto last_detail = common_info_fixed_list->last_common_info_detail()) { + std::string bs_enum_str = last_detail->get_bs_enum_str(); + if (const auto &bs_var = env_ptr->find_new_def(bs_enum_str)) { + rewriter_.ReplaceText(init_expr, std::string("std::move(") + bs_var->name() + ")"); + } + } + } else if (const auto& common_info_normal = env_ptr->get_common_info_normal()) { + if (const auto& common_info_detail = common_info_normal->last_common_info_detail()) { + std::string bs_enum_str = common_info_detail->get_bs_enum_str(); + if (const auto& bs_var = env_ptr->find_new_def(bs_enum_str)) { + rewriter_.ReplaceText(init_expr, std::string("std::move(") + bs_var->name() + ")"); + } + } + } + } + } + } + + // timestamp_seq_list == nullptr + // timestamp_list != nullptr + if (binary_operator->isComparisonOp()) { + clang::Expr* left_expr = binary_operator->getLHS(); + clang::Expr* right_expr = binary_operator->getRHS(); + auto expr_info_ptr = parse_expr(left_expr, env_ptr); + if (expr_info_ptr != nullptr && + expr_info_ptr->is_decl_ref_expr() && + expr_info_ptr->origin_expr() != nullptr && + tool::is_repeated_proto_ptr(expr_info_ptr->origin_expr()->getType()) && + !expr_info_ptr->is_reco_proto_type()) { + std::string op = binary_operator->getOpcodeStr().str(); + if (stmt_to_string(right_expr) == "nullptr") { + if (op == "==") { + rewriter_.ReplaceText(binary_operator, expr_info_ptr->origin_expr_str() + ".is_empty()"); + } else if (op == "!=") { + std::string s = std::string("!") + expr_info_ptr->origin_expr_str() + ".is_empty()"; + rewriter_.ReplaceText(binary_operator, s); + } + } + } + } +} + +void SeqListRule::process(clang::CallExpr *call_expr, Env *env_ptr) { + std::shared_ptr expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + LOG(INFO) << "expr_info_ptr is nullptr!"; + } + + if (expr_info_ptr->is_seq_list_reco_proto_type() || expr_info_ptr->is_seq_list_root()) { + std::string s = tool::trim_this(expr_info_ptr->to_string()); + rewriter_.ReplaceText(call_expr, tool::replace_adlog_to_bslog(s)); + } +} + +void SeqListRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + // item_id_seq_list->size() => item_id_seq_list.size() + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + if (expr_info_ptr->callee_name() == "size") { + if (auto parent = expr_info_ptr->parent()) { + if (parent->is_decl_ref_expr() && parent->origin_expr() != nullptr) { + if (tool::is_repeated_proto_ptr(parent->origin_expr()->getType())) { + std::string s = parent->origin_expr_str() + ".size()"; + rewriter_.ReplaceText(cxx_member_call_expr, s); + } + } + } + } +} + +void SeqListRule::process(clang::ReturnStmt* return_stmt, Env* env_ptr) { + // 简单处理, nullptr 直接替换为 BSRepatedField{} + if (env_ptr->get_method_name() == "GetSeqList") { + clang::Expr *ret_value = return_stmt->getRetValue(); + if (stmt_to_string(ret_value) == "nullptr") { + rewriter_.ReplaceText(ret_value, "BSRepeatedField{}"); + } + } +} + +void SeqListRule::process(clang::CXXDependentScopeMemberExpr *cxx_dependent_scope_member_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(cxx_dependent_scope_member_expr, env_ptr); +} + +void SeqListRule::process(clang::ArraySubscriptExpr *array_subscript_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(array_subscript_expr, env_ptr); +} + +void SeqListRule::process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env *env_ptr) { +} + +} // convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/SeqListRule.h b/convert/rule/SeqListRule.h new file mode 100644 index 0000000..c18ac7e --- /dev/null +++ b/convert/rule/SeqListRule.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "../Type.h" +#include "RuleBase.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 详细规则见: docs/get_seq_list.md +class SeqListRule: public RuleBase { + public: + using RuleBase::process; + explicit SeqListRule(clang::Rewriter& rewriter): RuleBase(rewriter, "SeqListRule") {} // NOLINT + + void process(clang::IfStmt* if_stmt, Env* env_ptr) override; + + /// 替换 repeated proto 为 BSRepeatedField + /// 示例: + /// const ::google::protobuf::RepeatedField<::google::protobuf::int64> *seq_list = nullptr; + /// 替换为 BSRepeatedField + void process(clang::DeclStmt *decl_stmt, Env *env_ptr) override; + + void process(clang::UnaryOperator* unary_operator, Env *env_ptr) override; + void process(clang::BinaryOperator* binary_operator, Env *env_ptr) override; + void process(clang::CallExpr *call_expr, Env *env_ptr) override; + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + void process(clang::ReturnStmt* return_stmt, Env* env_ptr); + void process(clang::CXXDependentScopeMemberExpr *cxx_dependent_scope_member_expr, Env *env_ptr); + void process(clang::ArraySubscriptExpr *array_subscript_expr, Env *env_ptr); + void process(clang::CXXForRangeStmt* cxx_for_range_stmt, Env *env_ptr); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/StrRule.cpp b/convert/rule/StrRule.cpp new file mode 100644 index 0000000..6ca2c6e --- /dev/null +++ b/convert/rule/StrRule.cpp @@ -0,0 +1,171 @@ +#include "../Env.h" +#include "../Tool.h" +#include "../ExprParser.h" +#include "../info/NewVarDef.h" +#include "StrRule.h" +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void StrRule::process(clang::CallExpr *call_expr, Env *env_ptr) { + auto expr_info_ptr = parse_expr(call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + + // SplitString 参数需要从 absl::string_view 替换成 std::string + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_offline_retailer_keyword.h + // base::SplitString(userAttr.string_value(), std::string(","), &userKeywordCnt); + process_str_param_call(expr_info_ptr.get(), env_ptr); +} + +void StrRule::process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) { + auto expr_info_ptr = parse_expr(cxx_member_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_region.h + process_str_param_call(expr_info_ptr.get(), env_ptr); +} + +void StrRule::process_str_param_call(ExprInfo* expr_info_ptr, Env* env_ptr) { + if (expr_info_ptr->is_call_expr()) { + if (expr_info_ptr->callee_name() == "base::SplitString" || + expr_info_ptr->callee_name() == "base::StringToUint" || + expr_info_ptr->callee_name() == "GetRegion") { + if (expr_info_ptr->call_expr_params_size() > 0) { + auto param0 = expr_info_ptr->call_expr_param(0); + std::ostringstream oss; + if (param0 != nullptr && param0->is_string()) { + std::string param0_text; + if (param0->is_decl_ref_expr()) { + param0_text = rewriter_.getRewrittenText(param0->origin_expr()); + if (param0_text.size() > 0) { + oss << "std::string(" << param0_text << ".data(), " << param0_text << ".size())"; + rewriter_.ReplaceText(param0->origin_expr(), oss.str()); + } + } else { + param0_text = rewriter_.getRewrittenText(param0->expr()); + if (param0_text.size() > 0) { + oss << "std::string(" << param0_text << ".data(), " << param0_text << ".size())"; + rewriter_.ReplaceText(param0->expr(), oss.str()); + } + } + + LOG(INFO) << "replace str param call param0: " << param0->origin_expr_str() + << ", text: " << oss.str(); + } + } + } + } +} + +void StrRule::process(clang::BinaryOperator* binary_operator, Env *env_ptr) { + // str 赋值, 并且右边不能是 str concat 这样的表达式。 + // 示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_app_list_product.h + // std::string product_name; + // product_name = advertiser_base.product_name(); + if (binary_operator->isAssignmentOp()) { + auto left_expr_info = parse_expr(binary_operator->getLHS(), env_ptr); + auto right_expr_info = parse_expr(binary_operator->getRHS(), env_ptr); + + if (left_expr_info != nullptr && right_expr_info != nullptr) { + if (!right_expr_info->is_cxx_operator_call_expr()) { + clang::SourceRange source_range = find_source_range(binary_operator); + process_str_assign(source_range, left_expr_info.get(), right_expr_info.get()); + } + } + } +} + +void StrRule::process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env *env_ptr) { + // 注意,会用到 getRewrittenText,必须判断是否已经替换过,否则会 core。 + if (rewriter_.is_replace_visited(cxx_operator_call_expr)) { + return; + } + + auto expr_info_ptr = parse_expr(cxx_operator_call_expr, env_ptr); + if (expr_info_ptr == nullptr) { + return; + } + + // adlog.context().app_id() + "_" => std::string(app_id) + "_" + if (expr_info_ptr->is_str_concat()) { + // LOG(INFO) << "cxx_operator_call_expr, before : " << rewriter_.getRewrittenText(cxx_operator_call_expr); + if (expr_info_ptr->callee_name() == "operator+" && expr_info_ptr->call_expr_params_size() > 1) { + std::string param0_str; + std::string param1_str; + + if (auto param0 = expr_info_ptr->call_expr_param(0)) { + if (param0->is_string() && param0->is_from_adlog() && param0->is_basic_scalar() && + (param0->is_decl_ref_expr() || param0->is_cxx_member_call_expr())) { + std::string new_name = param0->get_bs_field_value(); + std::ostringstream oss_left; + oss_left << "std::string(" << new_name << ".data(), " << new_name << ".size())"; + param0_str = oss_left.str(); + } else { + param0_str = rewriter_.getRewrittenText(param0->expr()); + } + } + + if (auto param1 = expr_info_ptr->call_expr_param(1)) { + if (param1->is_string() && param1->is_from_adlog() && param1->is_basic_scalar() && + (param1->is_decl_ref_expr() || param1->is_cxx_member_call_expr())) { + std::string new_name = param1->get_bs_field_value(); + std::ostringstream oss_right; + oss_right << "std::string(" << new_name << ".data(), " << new_name << ".size())"; + param1_str = oss_right.str(); + } else { + param1_str = rewriter_.getRewrittenText(param1->expr()); + } + } + + std::ostringstream oss_text; + oss_text << param0_str << " + " << param1_str; + LOG(INFO) << "replace_cxx_operator_call_expr: " << stmt_to_string(cxx_operator_call_expr) + << ", new_text: " << oss_text.str(); + rewriter_.ReplaceText(cxx_operator_call_expr, oss_text.str()); + } + return; + } + + // str 赋值, 并且右边不能是 str concat 这样的表达式。 + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + if (op == "operator=") { + if (expr_info_ptr->call_expr_params_size() == 2) { + auto left_expr_info = expr_info_ptr->call_expr_param(0); + auto right_expr_info = expr_info_ptr->call_expr_param(1); + + if (left_expr_info != nullptr && right_expr_info != nullptr) { + if (!right_expr_info->is_cxx_operator_call_expr()) { + clang::SourceRange source_range = find_source_range(cxx_operator_call_expr); + process_str_assign(source_range, left_expr_info, right_expr_info); + } + } + } + } +} + +void StrRule::process_str_assign(clang::SourceRange source_range, + ExprInfo* left_expr_info, + ExprInfo* right_expr_info) { + if (left_expr_info != nullptr && right_expr_info != nullptr) { + if (left_expr_info->is_str_type() && right_expr_info->is_str_type()) { + std::string param = right_expr_info->get_bs_field_value(); + std::ostringstream oss; + oss << left_expr_info->origin_expr_str() << " = "; + oss << "std::string(" << param << ".data(), " << param << ".size())"; + LOG(INFO) << "replace str assign, expr: " << rewriter_.getRewrittenText(source_range) + << ", new text: " << oss.str(); + rewriter_.ReplaceText(source_range, oss.str()); + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/StrRule.h b/convert/rule/StrRule.h new file mode 100644 index 0000000..51e829e --- /dev/null +++ b/convert/rule/StrRule.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +#include "clang/AST/Expr.h" + +#include "RuleBase.h" +#include "../Type.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class ExprInfo; + +/// str 相关逻辑。 +class StrRule: public RuleBase { + public: + using RuleBase::process; + explicit StrRule(clang::Rewriter& rewriter): RuleBase(rewriter, "StrRule") {} // NOLINT + + void process(clang::CallExpr* call_expr, Env* env_ptr) override; + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, Env* env_ptr) override; + + /// 替换赋值运算右边的参数。 + void process(clang::BinaryOperator* binary_operator, Env *env_ptr) override; + void process(clang::CXXOperatorCallExpr* cxx_operator_call_expr, Env *env_ptr) override; + + private: + void process_str_param_call(ExprInfo* expr_info_ptr, Env* env_ptr); + + void process_str_assign(clang::SourceRange source_range, + ExprInfo* left_expr_info, + ExprInfo* right_expr_info); +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/proto_list/ProtoListExprInfo.cpp b/convert/rule/proto_list/ProtoListExprInfo.cpp new file mode 100644 index 0000000..0946cb3 --- /dev/null +++ b/convert/rule/proto_list/ProtoListExprInfo.cpp @@ -0,0 +1,14 @@ +#include +#include + +#include "Env.h" +#include "Tool.h" +#include "ProtoListExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/proto_list/ProtoListExprInfo.h b/convert/rule/proto_list/ProtoListExprInfo.h new file mode 100644 index 0000000..39d26e8 --- /dev/null +++ b/convert/rule/proto_list/ProtoListExprInfo.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "../../ExprInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class ProtoListExprInfo: public ExprInfo { + public: + using ExprInfo::ExprInfo; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/rule/proto_list/ProtoListExprParser.h b/convert/rule/proto_list/ProtoListExprParser.h new file mode 100644 index 0000000..9036d82 --- /dev/null +++ b/convert/rule/proto_list/ProtoListExprParser.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "../../ExprParser.h" +#include "../../ExprParserDetail.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class ProtoListExprInfo; + +/// 待定 +class ProtoListExprParser { +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/BSCtorVisitor.cpp b/convert/visitor/BSCtorVisitor.cpp new file mode 100644 index 0000000..4048428 --- /dev/null +++ b/convert/visitor/BSCtorVisitor.cpp @@ -0,0 +1,275 @@ +#include +#include +#include + +#include "../Env.h" +#include "../Tool.h" +#include "./BSCtorVisitor.h" +#include "../info/ConstructorInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void BSCtorVisitor::visit(clang::CXXConstructorDecl* cxx_constructor_decl, FeatureInfo* feature_info_ptr) { + if (cxx_constructor_decl == nullptr || feature_info_ptr == nullptr) { + return; + } + + std::string feature_type; + const std::string& feature_name = feature_info_ptr->feature_name(); + + ConstructorInfo* ctor_info_ptr = &(feature_info_ptr->mutable_constructor_info()); + if (ctor_info_ptr == nullptr) { + LOG(INFO) << "cannot get constructor_info from feature_info! feature_name: " + << feature_info_ptr->feature_name(); + return; + } + + // 处理构造函数参数, 获取 feature_type + for (clang::CXXCtorInitializer *const * it = cxx_constructor_decl->init_begin(); it != cxx_constructor_decl->init_end(); it++) { + if (ctor_info_ptr->init_list().size() == 0) { + ctor_info_ptr->set_init_list(rewriter_.getRewrittenText((*it)->getSourceRange())); + ctor_info_ptr->set_first_init_stmt(const_cast(*it)); + } + + if (clang::CXXConstructExpr* base_init = dyn_cast((*it)->getInit())) { + LOG(INFO) << "find base_init"; + if (base_init->getNumArgs() > 0) { + feature_type = stmt_to_string(base_init->getArg(0)); + } + break; + } else if (clang::ParenListExpr* base_init = dyn_cast((*it)->getInit())) { + LOG(INFO) << "find base_init paren list expr"; + if (base_init->getNumExprs() > 0) { + feature_type = stmt_to_string(base_init->getExpr(0)); + } + break; + } + } + + // 可能是别的构造函数,跳过。 + // 用户指定的构造函数必须有 feature_type。 + // if (feature_type.size() == 0) { + // return; + // } + + ctor_info_ptr->set_feature_type(feature_type); + feature_info_ptr->set_feature_type(feature_type); + + std::string content = rewriter_.getRewrittenText(cxx_constructor_decl->getSourceRange()); + if (content.size() > 0 && ctor_info_ptr->params().size() == 0) { + ctor_info_ptr->set_source_range(cxx_constructor_decl->getSourceRange()); + + // 可能会有多个构造函数,需要区分下,去掉拷贝构造以及移动构造等。 + for (size_t i = 0; i < cxx_constructor_decl->getNumParams(); i++) { + std::string param_str = + rewriter_.getRewrittenText(cxx_constructor_decl->getParamDecl(i)->getSourceRange()); + if (param_str.size() > 0) { + LOG(INFO) << "add ctor params: " << param_str << ", i: " << i; + ctor_info_ptr->add_param(param_str); + } + } + } + + // 处理构造函数逻辑, 获取 enum 等变量 + if (cxx_constructor_decl->hasBody()) { + ctor_info_ptr->set_body(cxx_constructor_decl->getBody()); + ctor_info_ptr->set_body_end(cxx_constructor_decl->getBody()->getEndLoc()); + Env env; + std::vector fields; + if (clang::Stmt* body = cxx_constructor_decl->getBody()) { + recursive_visit(body, feature_info_ptr, &env, &fields); + } + + LOG(INFO) << "fields: " << absl::StrJoin(fields, ", "); + + VarDeclInfo& var_decl_info = ctor_info_ptr->mutable_var_decl_info(); + var_decl_info.update(env.var_decls()); + LOG(INFO) << "env.var_decls.size: " << env.var_decls().size() + << ", var_decl_info.var_decls.size: " << var_decl_info.var_decls().size(); + } +} + +void BSCtorVisitor::recursive_visit(clang::Stmt* stmt, + FeatureInfo* info_ptr, + Env* env_ptr, + std::vector* fields_ptr) { + if (stmt == nullptr || info_ptr == nullptr || env_ptr == nullptr || fields_ptr == nullptr) { + return; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); start++) { + recursive_visit(*start, info_ptr, env_ptr, fields_ptr); + } + + } else if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + env_ptr->update(decl_stmt); + + } else if (clang::ExprWithCleanups* expr_with_cleanups = dyn_cast(stmt)) { + recursive_visit(expr_with_cleanups->getSubExpr(), info_ptr, env_ptr, fields_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + recursive_visit(implicit_cast_expr->getSubExpr(), info_ptr, env_ptr, fields_ptr); + + } else if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(stmt)) { + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_member_call_expr->getArg(i), info_ptr, env_ptr, fields_ptr); + } + process(cxx_member_call_expr, info_ptr, env_ptr, fields_ptr); + + } else if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_operator_call_expr->getArg(i), info_ptr, env_ptr, fields_ptr); + } + + } else if (clang::UnaryOperator* unary_operator = dyn_cast(stmt)) { + recursive_visit(unary_operator->getSubExpr(), info_ptr, env_ptr, fields_ptr); + + } else if (clang::MemberExpr* member_expr = dyn_cast(stmt)) { + + } else if (clang::BinaryOperator* binary_operator = dyn_cast(stmt)) { + env_ptr->update(binary_operator); + recursive_visit(binary_operator->getLHS(), info_ptr, env_ptr, fields_ptr); + recursive_visit(binary_operator->getRHS(), info_ptr, env_ptr, fields_ptr); + env_ptr->clear_binary_op_info(); + + } else if (clang::MaterializeTemporaryExpr* materialize_temporary_expr = dyn_cast(stmt)) { + recursive_visit(materialize_temporary_expr->getSubExpr(), info_ptr, env_ptr, fields_ptr); + + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_construct_expr->getNumArgs(); i++) { + recursive_visit(cxx_construct_expr->getArg(i), info_ptr, env_ptr, fields_ptr); + } + + } else if (clang::IfStmt* if_stmt = dyn_cast(stmt)) { + absl::optional tmp_if_info; + tmp_if_info.emplace(if_stmt); + Env new_env(env_ptr); + new_env.update(if_stmt); + + clang::Expr* cond_expr = if_stmt->getCond(); + clang::Stmt* then_expr = if_stmt->getThen(); + clang::Stmt* else_expr = if_stmt->getElse(); + + recursive_visit(cond_expr->IgnoreImpCasts(), info_ptr, &new_env, fields_ptr); + recursive_visit(then_expr, info_ptr, &new_env, fields_ptr); + recursive_visit(else_expr, info_ptr, &new_env, fields_ptr); + + } else if (clang::ForStmt* for_stmt = dyn_cast(stmt)) { + for (auto it = for_stmt->child_begin(); it != for_stmt->child_end(); it++) { + recursive_visit(*it, info_ptr, env_ptr, fields_ptr); + } + + } else if (clang::CXXForRangeStmt* cxx_for_range_stmt = dyn_cast(stmt)) { + for (auto it = cxx_for_range_stmt->child_begin(); it != cxx_for_range_stmt->child_end(); it++) { + recursive_visit(*it, info_ptr, env_ptr, fields_ptr); + } + + } else if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(stmt)) { + if (tool::is_common_info_enum(decl_ref_expr->getType())) { + ConstructorInfo* ctor_info_ptr = &(info_ptr->mutable_constructor_info()); + + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->if_stmt() != nullptr && if_info->if_stmt()->getThen() != nullptr) { + ctor_info_ptr->add_common_info_enum(decl_ref_expr, if_info->if_stmt()->getThen()->getEndLoc()); + LOG(INFO) << "ctor add common_info_enum: " << stmt_to_string(decl_ref_expr); + } else { + LOG(INFO) << "if_stmt is nullptr!"; + } + } else { + ctor_info_ptr->add_common_info_enum(decl_ref_expr); + } + } + + } else { + LOG(INFO) << "unsupported stmt, trated as string: " << stmt_to_string(stmt); + } +} + +void BSCtorVisitor::process(clang::CXXMemberCallExpr* cxx_member_call_expr, + FeatureInfo* feature_info_ptr, + Env* env_ptr, + std::vector* fields_ptr) { + if (cxx_member_call_expr == nullptr || feature_info_ptr == nullptr + || env_ptr == nullptr || fields_ptr == nullptr) { + return; + } + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + if (callee_name == "push_back" && tool::is_int_vector(caller->getType())) { + std::string caller_name = tool::trim_this(stmt_to_string(caller)); + if (feature_info_ptr->is_int_list_member(caller_name, caller->getType())) { + if (cxx_member_call_expr->getNumArgs() == 1) { + std::string arg_str = stmt_to_string(cxx_member_call_expr->getArg(0)); + if (is_integer(arg_str)) { + LOG(INFO) << "add_int_list_member_single_value, caller_name: " << caller_name + << ", v: " << arg_str; + feature_info_ptr->add_int_list_member_single_value(caller_name, std::stoi(arg_str)); + } else { + if (clang::Expr* inner_expr = tool::get_inner_expr(cxx_member_call_expr->getArg(0))) { + if (tool::is_common_info_enum(inner_expr->getType())) { + if (absl::optional enum_value = find_common_attr_int_value(inner_expr)) { + LOG(INFO) << "add_int_list_member_single_value, caller_name: " + << caller_name << ", v: " << *enum_value; + feature_info_ptr->add_int_list_member_single_value(caller_name, *enum_value); + // 不需要了 + // rewriter_.RemoveText(cxx_member_call_expr); + } + } + } + } + } + } + } + } + + // bs extractor 所用字段。 + // + // 主要分为普通字段和中间节点字段。 + // 普通字段是通过 attr_metas_.emplace_back 的字段,第一个函数参数即为具体的字段。 + // 中间节点字段指 PhotoInfo、LiveInfo 等嵌套的字段,需要根据名字获取具体的字段。 + // 如 BSGetPhotoInfoId, 即获取的是 photo_info.id 字段。 + std::string expr_str = stmt_to_string(cxx_member_call_expr); + + if (absl::StartsWith(expr_str, "this->")) { + expr_str = expr_str.substr(6); + } + + if (absl::StartsWith(expr_str, "bs_util.")) { + expr_str = expr_str.substr(8); + } + + if (absl::StartsWith(expr_str, "attr_metas_.emplace_back")) { + if (cxx_member_call_expr->getNumArgs() == 1) { + fields_ptr->emplace_back(stmt_to_string(cxx_member_call_expr->getArg(0))); + } else { + LOG(INFO) << "wrong number of args, should be 1, but is: " << cxx_member_call_expr->getNumArgs(); + } + } else if (expr_str.find("fill_attr_metas") != std::string::npos) { + LOG(INFO) << "dump bs functor"; + cxx_member_call_expr->dump(); + if (clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument()) { + LOG(INFO) << "caller"; + caller->dump(); + } + + std::string middle_node; + + std::vector arr = absl::StrSplit(expr_str, "."); + if (arr.size() > 0 && arr[0].size() > 5) { + middle_node = arr[0].substr(5); + if (middle_node.size() > 0) { + fields_ptr->emplace_back(middle_node); + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/BSCtorVisitor.h b/convert/visitor/BSCtorVisitor.h new file mode 100644 index 0000000..94f63ad --- /dev/null +++ b/convert/visitor/BSCtorVisitor.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include "clang/AST/DeclCXX.h" +#include "clang/Rewrite/Core/Rewriter.h" + +#include "../info/FeatureInfo.h" +#include "../info/ConstructorInfo.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +/// 解析 bs 特征类,获取使用的字段 +class BSCtorVisitor { + public: + explicit BSCtorVisitor(clang::Rewriter &rewriter) : // NOLINT + rewriter_(rewriter) { + } + + void visit(clang::CXXConstructorDecl* cxx_constructor_decl, + FeatureInfo* feature_info_ptr); + + void recursive_visit(clang::Stmt* stmt, + FeatureInfo* info_ptr, + Env* env_ptr, + std::vector* fields_ptr); + + private: + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, + FeatureInfo* feature_info_ptr, + Env* env_ptr, + std::vector* fields_ptr); + + protected: + StrictRewriter rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/BSExtractMethodVisitor.cpp b/convert/visitor/BSExtractMethodVisitor.cpp new file mode 100644 index 0000000..5d74713 --- /dev/null +++ b/convert/visitor/BSExtractMethodVisitor.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../handler/LogicHandler.h" +#include "../handler/BSFieldHandler.h" +#include "./BSExtractMethodVisitor.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; +using tool::replace_simple_text; +using tool::insert_str_after_bs_equal_end; + +json BSExtractMethodVisitor::visit(const clang::CXXMethodDecl* cxx_method_decl, + FeatureInfo* feature_info_ptr) { + auto config = GlobalConfig::Instance(); + + std::string method_name = cxx_method_decl->getNameAsString(); + + if (config->dump_ast) { + LOG(INFO) << "dump method: " << method_name; + cxx_method_decl->dump(); + } + + if (starts_with(method_name, "~") || + starts_with(method_name, "operator") || + method_name == feature_info_ptr->feature_name()) { + return nullptr; + } + + Env env; + env.set_feature_info(feature_info_ptr); + env.add_template_var_names(feature_info_ptr->template_var_names()); + + LOG(INFO) << "========================= start process method, name: " << method_name + << " =================================="; + + clang::Stmt *body = cxx_method_decl->getBody(); + + BSFieldHandler bs_field_handler(rewriter_); + recursive_visit(body, &bs_field_handler, &env); + + LogicHandler logic_handler; + auto root = recursive_visit (body, &logic_handler, &env); + + insert_new_def_at_begin(body, &env); + + if (root != nullptr) { + } else { + LOG(INFO) << "logic is nullptr!"; + } + + LOG(INFO) << "process method done, feature_name: " << feature_info_ptr->feature_name() + << ", method_name: " << method_name; + + return root; +} + +void BSExtractMethodVisitor::visit_params(const clang::CXXMethodDecl* cxx_method_decl, Env* env_ptr) { +} + +json BSExtractMethodVisitor::visit_loop_init(clang::ForStmt* for_stmt, + Env* env_ptr) { + auto root = json::array(); + clang::Stmt* init_stmt = for_stmt->getInit(); + if (init_stmt == nullptr) { + LOG(INFO) << "for_stmt has no init!"; + return nullptr; + } + + clang::VarDecl* var_decl = nullptr; + if (clang::DeclStmt* decl_stmt = dyn_cast(init_stmt)) { + if (decl_stmt->isSingleDecl()) { + var_decl = dyn_cast(decl_stmt->getSingleDecl()); + } else { + var_decl = dyn_cast(*(decl_stmt->decl_begin())); + } + } + + visit_loop_var_decl(var_decl, env_ptr); + + return root; +} + +json BSExtractMethodVisitor::visit_loop_init(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr) { + auto root = json::array(); + clang::VarDecl* var_decl = cxx_for_range_stmt->getLoopVariable(); + + visit_loop_var_decl(var_decl, env_ptr); + + return root; +} + +json BSExtractMethodVisitor::visit_loop_var_decl(clang::VarDecl* var_decl, + Env* env_ptr) { + if (var_decl != nullptr && var_decl->hasInit()) { + std::string for_var_name = var_decl->getNameAsString(); + + auto root = json::array(); + + env_ptr->add_loop_var(for_var_name); + clang::Expr* init_expr = var_decl->getInit(); + env_ptr->add(for_var_name, init_expr); + LOG(INFO) << "for range, var: " << for_var_name + << ", expr: " << stmt_to_string(init_expr) + << ", type: " << var_decl->getType().getAsString() + << ", loop_var_type; " << tool::get_builtin_type_str(var_decl->getType()); + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + loop_info->set_loop_var_type(tool::get_builtin_type_str(var_decl->getType())); + } + + return root; + } else { + LOG(INFO) << "loop var_decl is nullptr!"; + + return nullptr; + } +} + +void BSExtractMethodVisitor::insert_new_def_at_begin(clang::Stmt* body, Env* env_ptr) { + if (body == nullptr || env_ptr == nullptr) { + return; + } + + std::string body_str = rewriter_.getRewrittenText(body->getSourceRange()); + + std::string new_defs_str = env_ptr->get_all_new_defs(); + if (new_defs_str.size() > 0) { + std::string s = insert_str_after_bs_equal_end(body_str, new_defs_str); + rewriter_.ReplaceText(body->getSourceRange(), replace_simple_text(s)); + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/BSExtractMethodVisitor.h b/convert/visitor/BSExtractMethodVisitor.h new file mode 100644 index 0000000..a417e1e --- /dev/null +++ b/convert/visitor/BSExtractMethodVisitor.h @@ -0,0 +1,401 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include "clang/AST/ExprCXX.h" +#include "clang/AST/AST.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" + +#include "../Env.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; + +class BSExtractMethodVisitor { + protected: + clang::Rewriter &rewriter_; + + public: + explicit BSExtractMethodVisitor(clang::Rewriter &rewriter) : rewriter_(rewriter) {} // NOLINT + + void visit_params(const clang::CXXMethodDecl *cxx_method_decl, Env *env_ptr); + json visit(const clang::CXXMethodDecl *cxx_method_decl, FeatureInfo *feature_info_ptr); + + template + json recursive_visit(clang::Stmt *stmt, Handler *handler_ptr, Env *env_ptr); + + template + json visit_loop(T *loop_stmt, Handler *handler_ptr, Env *env_ptr); + + template + json visit_loop_range(T *loop_stmt, Handler *handler_ptr, Env *env_ptr); + + json visit_loop_init(clang::ForStmt *for_stmt, Env *env_ptr); + + json visit_loop_init(clang::CXXForRangeStmt *cxx_for_range_stmt, + Env *env_ptr); + + json visit_loop_var_decl(clang::VarDecl *var_decl, Env *env_ptr); + + void insert_new_def_at_begin(clang::Stmt* body, Env* env_ptr); +}; + +template +json BSExtractMethodVisitor::recursive_visit( + clang::Stmt *stmt, + Handler* handler_ptr, + Env* env_ptr +) { + if (!stmt) { + return nullptr; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + auto root = json::array({"do"}); + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); + start++) { + root.push_back(recursive_visit(*start, handler_ptr, env_ptr)); + } + + return root; + } else if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + auto root = json::array({"assign"}); + + env_ptr->update(decl_stmt); + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + root.push_back(var_decl->getNameAsString()); + + if (var_decl->hasInit()) { + root.push_back(recursive_visit(var_decl->getInit(), handler_ptr, env_ptr)); + } + + if (const clang::Expr* init_expr = var_decl->getAnyInitializer()) { + root.push_back(recursive_visit(const_cast(init_expr), handler_ptr, env_ptr)); + } + } + + handler_ptr->process_to_json(decl_stmt, env_ptr); + + // decl_stmt 比较特殊, decl_info 只维持当前变量, 访问完当前 decl_stmt 后立即销毁。 + env_ptr->clear_decl_info(); + + return root; + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(stmt)) { + auto root = json::array(); + for (size_t i = 0; i < cxx_construct_expr->getNumArgs(); i++) { + root.push_back(recursive_visit(cxx_construct_expr->getArg(i), handler_ptr, env_ptr)); + } + + return root; + } else if (clang::ExprWithCleanups* expr_with_cleanups = dyn_cast(stmt)) { + return recursive_visit(expr_with_cleanups->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + return recursive_visit(implicit_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXBindTemporaryExpr* cxx_bind_temporary_expr = + dyn_cast(stmt)) { + return recursive_visit(cxx_bind_temporary_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr = + dyn_cast(stmt)) { + auto root = json::array({"value", "nullptr"}); + return root; + } else if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(stmt)) { + auto root = json::array({"call"}); + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + root.push_back(callee_name); + + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + root.push_back(recursive_visit(cxx_member_call_expr->getArg(i), handler_ptr, env_ptr)); + } + } + + handler_ptr->process_to_json(cxx_member_call_expr, env_ptr); + + return root; + } else if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = + dyn_cast(stmt)) { + // 可能是 assign, compare, math + env_ptr->update(cxx_operator_call_expr); + + std::string stmt_str = stmt_to_string(cxx_operator_call_expr); + + std::string op = stmt_to_string(cxx_operator_call_expr->getCallee()); + json root = json::array(); + static std::regex p("operator"); + + if (op == "operator=") { + // root = std::move(std::make_unique()); + } else if (op == "operator==" || op == "operator<" || op == "operator>" + || op == "operator<=" || op == "operator>=") { + root = json::array({std::regex_replace(op, p, "")}); + } else if (op == "operator+" || op == "operator-" || op == "operator*" || op == "operator/") { + root = json::array({std::regex_replace(op, p, "")}); + } else if (op == "operator()") { + // 通过 CXXOperatorCallExpr 无法判断,只能通过字符串判断是否是 bs 中间节点函数。 + if (absl::StartsWith(stmt_str, "this->bs_util") || absl::StartsWith(stmt_str, "BS")) { + static std::regex p_bs("(this->bs_util.)?BS(Get|Has)(.*)\\("); + std::smatch m; + + if (std::regex_search(stmt_str, m, p_bs)) { + // 最后一个 match 是模板类名 + if (m.size() > 0) { + root = json::array({"bs_util", m[m.size() - 1]}); + } + } else { + LOG(ERROR) << "cannot find bs_util method, stmt_str: " << stmt_str; + } + } else { + LOG(INFO) << "unsupported operator(): " << stmt_str; + } + } else { + LOG(INFO) << "unsupported cxx_operator_call_expr: " << stmt_to_string(cxx_operator_call_expr) + << ", op: " << op; + return nullptr; + } + + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + root.push_back(recursive_visit(cxx_operator_call_expr->getArg(i), handler_ptr, env_ptr)); + } + + env_ptr->clear_binary_op_info(); + return root; + } else if (clang::CallExpr* call_expr = dyn_cast(stmt)) { + // auto root = std::make_unique(); + // 会包含 CXXMemberCallExpr 和 CXXOperatorCallExpr, 此处逻辑需要再仔细考虑下 + auto root = json::array(); + + if (clang::Expr* callee = call_expr->getCallee()) { + root.push_back(tool::trim_this(stmt_to_string(callee))); + } + + for (unsigned i = 0; i < call_expr->getNumArgs(); i++) { + root.push_back(recursive_visit(call_expr->getArg(i), handler_ptr, env_ptr)); + } + + return root; + } else if (clang::CXXDependentScopeMemberExpr *cxx_dependent_scope_member_expr = + dyn_cast(stmt)) { + return handler_ptr->process_to_json(cxx_dependent_scope_member_expr, env_ptr); + + } else if (clang::ArraySubscriptExpr* array_subscript_expr = dyn_cast(stmt)) { + auto root = json::array(); + root.push_back(handler_ptr->process_to_json(array_subscript_expr, env_ptr)); + + return root; + } else if (clang::UnaryOperator *unary_operator = dyn_cast(stmt)) { + auto op_str = clang::UnaryOperator::getOpcodeStr(unary_operator->getOpcode()); + auto root = json::array(); + root.push_back(recursive_visit(unary_operator->getSubExpr(), handler_ptr, env_ptr)); + + return root; + } else if (clang::MemberExpr* member_expr = dyn_cast(stmt)) { + auto root = json::array(); + root.push_back(handler_ptr->process_to_json(member_expr, env_ptr)); + + return root; + } else if (clang::BinaryOperator* binary_operator = dyn_cast(stmt)) { + auto root = json::array(); + + env_ptr->update(binary_operator); + if (binary_operator->isAssignmentOp()) { + env_ptr->update_assign_info(binary_operator); + // root = std::move(std::make_unique()); + } else { + root.push_back({binary_operator->getOpcodeStr().str()}); + } + + root.push_back(recursive_visit(binary_operator->getLHS(), handler_ptr, env_ptr)); + + root.push_back(recursive_visit(binary_operator->getRHS(), handler_ptr, env_ptr)); + + env_ptr->clear_binary_op_info(); + if (binary_operator->isAssignmentOp()) { + env_ptr->clear_assign_info(); + } + + return root; + } else if (clang::ParenExpr* paren_expr = dyn_cast(stmt)) { + return recursive_visit(paren_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + return recursive_visit(implicit_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::MaterializeTemporaryExpr* materialize_temporary_expr = + dyn_cast(stmt)) { + return recursive_visit(materialize_temporary_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXThisExpr* cxx_this_expr = dyn_cast(stmt)) { + return handler_ptr->process_to_json(cxx_this_expr, env_ptr); + + } else if (clang::ReturnStmt* return_stmt = dyn_cast(stmt)) { + return handler_ptr->process_to_json(return_stmt, env_ptr); + + } else if (clang::BreakStmt* break_stmt = dyn_cast(stmt)) { + return handler_ptr->process_to_json(break_stmt, env_ptr); + + } else if (clang::ContinueStmt* continue_stmt = dyn_cast(stmt)) { + return handler_ptr->process_to_json(continue_stmt, env_ptr); + + } else if (clang::IfStmt* if_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(if_stmt); + + auto root = json::array(); + + clang::Expr* cond_expr = if_stmt->getCond(); + cond_expr = cond_expr->IgnoreImpCasts(); + root.push_back(recursive_visit(cond_expr, handler_ptr, &new_env)); + + clang::Stmt* then_expr = if_stmt->getThen(); + clang::Stmt* else_expr = if_stmt->getElse(); + + auto& if_info = new_env.cur_mutable_if_info(); + if_info->set_if_stage(IfStage::THEN); + root.push_back(recursive_visit(then_expr, handler_ptr, &new_env)); + + if_info->set_if_stage(IfStage::ELSE); + root.push_back(recursive_visit(else_expr, handler_ptr, &new_env)); + + if_info->set_if_stage(IfStage::END); + + return handler_ptr->process_to_json(if_stmt, &new_env); + } else if (clang::ForStmt* for_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(for_stmt); + return visit_loop(for_stmt, handler_ptr, &new_env); + + } else if (clang::CXXForRangeStmt* cxx_for_range_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(cxx_for_range_stmt); + + return visit_loop(cxx_for_range_stmt, handler_ptr, &new_env); + + } else if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(stmt)) { + return handler_ptr->process_to_json(decl_ref_expr, env_ptr); + } else if (clang::ConstantExpr* constant_expr = dyn_cast(stmt)) { + return recursive_visit(constant_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::IntegerLiteral* integer_literal = dyn_cast(stmt)) { + auto root = json::array(); + + return root; + } else if (clang::CStyleCastExpr* c_style_cast_expr = dyn_cast(stmt)) { + auto root = json::array(); + root.push_back(recursive_visit(c_style_cast_expr->getSubExpr(), handler_ptr, env_ptr)); + + return root; + } else if (clang::CXXStaticCastExpr* cxx_static_cast_expr = dyn_cast(stmt)) { + auto root = json::array(); + root.push_back(recursive_visit(cxx_static_cast_expr->getSubExpr(), handler_ptr, env_ptr)); + + return root; + } else if (clang::SwitchStmt* switch_stmt = dyn_cast(stmt)) { + auto root = json::array(); + + root.push_back(recursive_visit(switch_stmt->getCond(), handler_ptr, env_ptr)); + root.push_back(recursive_visit(switch_stmt->getBody(), handler_ptr, env_ptr)); + + return root; + } else if (clang::CaseStmt* case_stmt = dyn_cast(stmt)) { + auto root = json::array(); + env_ptr->update(case_stmt); + + root.push_back(recursive_visit(case_stmt->getLHS(), handler_ptr, env_ptr)); + root.push_back(recursive_visit(case_stmt->getRHS(), handler_ptr, env_ptr)); + root.push_back(recursive_visit(case_stmt->getSubStmt(), handler_ptr, env_ptr)); + + env_ptr->clear_switch_case_info(); + + return root; + } else if (clang::ConditionalOperator* conditional_operator = dyn_cast(stmt)) { + auto root = json::array(); + + root.push_back(recursive_visit(conditional_operator->getCond(), handler_ptr, env_ptr)); + root.push_back(recursive_visit(conditional_operator->getTrueExpr(), handler_ptr, env_ptr)); + root.push_back(recursive_visit(conditional_operator->getFalseExpr(), handler_ptr, env_ptr)); + + return root; + } else if (clang::CXXFunctionalCastExpr* cxx_functional_cast_expr = + dyn_cast(stmt)) { + auto root = json::array(); + root.push_back(recursive_visit(cxx_functional_cast_expr->getSubExpr(), handler_ptr, env_ptr)); + + return root; + } else if (clang::GNUNullExpr* gnu_null_expr = dyn_cast(stmt)) { + auto root = json::array(); + + return root; + } else { + LOG(INFO) << "unsupported stmt, trated as string: " << stmt_to_string(stmt); + return nullptr; + } +} + +template +json BSExtractMethodVisitor::visit_loop(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr) { + // 添加 loop_var + auto root = visit_loop_init(loop_stmt, env_ptr); + if (root == nullptr) { + return root; + } + + // loop init + root.push_back(visit_loop_range(loop_stmt, handler_ptr, env_ptr)); + + auto& loop_info = env_ptr->mutable_loop_info(); + if (loop_info) { + loop_info->set_loop_state(LoopStage::BODY); + } + + clang::Stmt* body = loop_stmt->getBody(); + + for (auto body_it = body->child_begin(); body_it != body->child_end(); body_it++) { + root.push_back(recursive_visit(*body_it, handler_ptr, env_ptr)); + } + + handler_ptr->process_to_json(loop_stmt, env_ptr); + + env_ptr->pop_loop_var(); + return root; +} + +template +json BSExtractMethodVisitor::visit_loop_range(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr) { + auto root = json::array(); + for (auto it = loop_stmt->child_begin(); it != loop_stmt->child_end(); it++) { + if (*it != nullptr) { + if (clang::CompoundStmt* compound_stmt = dyn_cast(*it)) { + continue; + } + + root.push_back(recursive_visit(*it, handler_ptr, env_ptr)); + } + } + + return root; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/CtorVisitor.cpp b/convert/visitor/CtorVisitor.cpp new file mode 100644 index 0000000..460cae8 --- /dev/null +++ b/convert/visitor/CtorVisitor.cpp @@ -0,0 +1,223 @@ +#include + +#include "../Env.h" +#include "../Tool.h" +#include "./CtorVisitor.h" +#include "../info/ConstructorInfo.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void CtorVisitor::visit(clang::CXXConstructorDecl* cxx_constructor_decl, FeatureInfo* feature_info_ptr) { + if (cxx_constructor_decl == nullptr || feature_info_ptr == nullptr) { + return; + } + + std::string feature_type; + const std::string& feature_name = feature_info_ptr->feature_name(); + + ConstructorInfo* ctor_info_ptr = &(feature_info_ptr->mutable_constructor_info()); + if (ctor_info_ptr == nullptr) { + LOG(INFO) << "cannot get constructor_info from feature_info! feature_name: " + << feature_info_ptr->feature_name(); + return; + } + + // 处理构造函数参数, 获取 feature_type + for (clang::CXXCtorInitializer *const * it = cxx_constructor_decl->init_begin(); it != cxx_constructor_decl->init_end(); it++) { + if (ctor_info_ptr->init_list().size() == 0) { + ctor_info_ptr->set_init_list(rewriter_.getRewrittenText((*it)->getSourceRange())); + ctor_info_ptr->set_first_init_stmt(const_cast(*it)); + } + + if (clang::CXXConstructExpr* base_init = dyn_cast((*it)->getInit())) { + if (base_init->getNumArgs() > 0) { + feature_type = stmt_to_string(base_init->getArg(0)); + } + break; + } else if (clang::ParenListExpr* base_init = dyn_cast((*it)->getInit())) { + if (base_init->getNumExprs() > 0) { + feature_type = stmt_to_string(base_init->getExpr(0)); + } + break; + } + } + + // 可能是别的构造函数,跳过。 + // 用户指定的构造函数必须有 feature_type。 + if (feature_type.size() == 0) { + return; + } + + ctor_info_ptr->set_feature_type(feature_type); + feature_info_ptr->set_feature_type(feature_type); + LOG(INFO) << "get feature type: " << feature_type << ", class_name: " << feature_name; + + std::string content = rewriter_.getRewrittenText(cxx_constructor_decl->getSourceRange()); + if (content.size() > 0 && ctor_info_ptr->params().size() == 0) { + ctor_info_ptr->set_source_range(cxx_constructor_decl->getSourceRange()); + + // 可能会有多个构造函数,需要区分下,去掉拷贝构造以及移动构造等。 + for (size_t i = 0; i < cxx_constructor_decl->getNumParams(); i++) { + std::string param_str = + rewriter_.getRewrittenText(cxx_constructor_decl->getParamDecl(i)->getSourceRange()); + if (param_str.size() > 0) { + LOG(INFO) << "add ctor params: " << param_str << ", i: " << i; + ctor_info_ptr->add_param(param_str); + } + } + } + + // 处理构造函数逻辑, 获取 enum 等变量 + if (cxx_constructor_decl->hasBody()) { + ctor_info_ptr->set_body(cxx_constructor_decl->getBody()); + ctor_info_ptr->set_body_end(cxx_constructor_decl->getBody()->getEndLoc()); + Env env; + if (clang::Stmt* body = cxx_constructor_decl->getBody()) { + recursive_visit(body, feature_info_ptr, &env); + } + + VarDeclInfo& var_decl_info = ctor_info_ptr->mutable_var_decl_info(); + var_decl_info.update(env.var_decls()); + LOG(INFO) << "env.var_decls.size: " << env.var_decls().size() + << ", var_decl_info.var_decls.size: " << var_decl_info.var_decls().size(); + } +} + +void CtorVisitor::recursive_visit(clang::Stmt* stmt, FeatureInfo* info_ptr, Env* env_ptr) { + if (stmt == nullptr || info_ptr == nullptr || env_ptr == nullptr) { + return; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); start++) { + recursive_visit(*start, info_ptr, env_ptr); + } + + } else if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + env_ptr->update(decl_stmt); + + } else if (clang::ExprWithCleanups* expr_with_cleanups = dyn_cast(stmt)) { + recursive_visit(expr_with_cleanups->getSubExpr(), info_ptr, env_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + recursive_visit(implicit_cast_expr->getSubExpr(), info_ptr, env_ptr); + + } else if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(stmt)) { + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_member_call_expr->getArg(i), info_ptr, env_ptr); + } + process(cxx_member_call_expr, info_ptr, env_ptr); + + } else if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_operator_call_expr->getArg(i), info_ptr, env_ptr); + } + + } else if (clang::UnaryOperator* unary_operator = dyn_cast(stmt)) { + recursive_visit(unary_operator->getSubExpr(), info_ptr, env_ptr); + + } else if (clang::MemberExpr* member_expr = dyn_cast(stmt)) { + + } else if (clang::BinaryOperator* binary_operator = dyn_cast(stmt)) { + env_ptr->update(binary_operator); + recursive_visit(binary_operator->getLHS(), info_ptr, env_ptr); + recursive_visit(binary_operator->getRHS(), info_ptr, env_ptr); + env_ptr->clear_binary_op_info(); + + } else if (clang::MaterializeTemporaryExpr* materialize_temporary_expr = dyn_cast(stmt)) { + recursive_visit(materialize_temporary_expr->getSubExpr(), info_ptr, env_ptr); + + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_construct_expr->getNumArgs(); i++) { + recursive_visit(cxx_construct_expr->getArg(i), info_ptr, env_ptr); + } + + } else if (clang::IfStmt* if_stmt = dyn_cast(stmt)) { + absl::optional tmp_if_info; + tmp_if_info.emplace(if_stmt); + Env new_env(env_ptr); + new_env.update(if_stmt); + + clang::Expr* cond_expr = if_stmt->getCond(); + clang::Stmt* then_expr = if_stmt->getThen(); + clang::Stmt* else_expr = if_stmt->getElse(); + + recursive_visit(cond_expr->IgnoreImpCasts(), info_ptr, &new_env); + recursive_visit(then_expr, info_ptr, &new_env); + recursive_visit(else_expr, info_ptr, &new_env); + + } else if (clang::ForStmt* for_stmt = dyn_cast(stmt)) { + for (auto it = for_stmt->child_begin(); it != for_stmt->child_end(); it++) { + recursive_visit(*it, info_ptr, env_ptr); + } + + } else if (clang::CXXForRangeStmt* cxx_for_range_stmt = dyn_cast(stmt)) { + for (auto it = cxx_for_range_stmt->child_begin(); it != cxx_for_range_stmt->child_end(); it++) { + recursive_visit(*it, info_ptr, env_ptr); + } + + } else if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(stmt)) { + if (tool::is_common_info_enum(decl_ref_expr->getType())) { + ConstructorInfo* ctor_info_ptr = &(info_ptr->mutable_constructor_info()); + + if (const auto& if_info = env_ptr->cur_if_info()) { + if (if_info->if_stmt() != nullptr && if_info->if_stmt()->getThen() != nullptr) { + ctor_info_ptr->add_common_info_enum(decl_ref_expr, if_info->if_stmt()->getThen()->getEndLoc()); + LOG(INFO) << "ctor add common_info_enum: " << stmt_to_string(decl_ref_expr); + } else { + LOG(INFO) << "if_stmt is nullptr!"; + } + } else { + ctor_info_ptr->add_common_info_enum(decl_ref_expr); + } + } + + } else { + LOG(INFO) << "unsupported stmt, trated as string: " << stmt_to_string(stmt); + } +} + +void CtorVisitor::process(clang::CXXMemberCallExpr* cxx_member_call_expr, + FeatureInfo* feature_info_ptr, + Env* env_ptr) { + if (cxx_member_call_expr == nullptr || feature_info_ptr == nullptr || env_ptr == nullptr) { + return; + } + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + if (callee_name == "push_back" && tool::is_int_vector(caller->getType())) { + std::string caller_name = tool::trim_this(stmt_to_string(caller)); + if (feature_info_ptr->is_int_list_member(caller_name, caller->getType())) { + if (cxx_member_call_expr->getNumArgs() == 1) { + std::string arg_str = stmt_to_string(cxx_member_call_expr->getArg(0)); + if (is_integer(arg_str)) { + LOG(INFO) << "add_int_list_member_single_value, caller_name: " << caller_name + << ", v: " << arg_str; + feature_info_ptr->add_int_list_member_single_value(caller_name, std::stoi(arg_str)); + } else { + if (clang::Expr* inner_expr = tool::get_inner_expr(cxx_member_call_expr->getArg(0))) { + if (tool::is_common_info_enum(inner_expr->getType())) { + if (absl::optional enum_value = find_common_attr_int_value(inner_expr)) { + LOG(INFO) << "add_int_list_member_single_value, caller_name: " + << caller_name << ", v: " << *enum_value; + feature_info_ptr->add_int_list_member_single_value(caller_name, *enum_value); + // 不需要了 + // rewriter_.RemoveText(cxx_member_call_expr); + } + } + } + } + } + } + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/CtorVisitor.h b/convert/visitor/CtorVisitor.h new file mode 100644 index 0000000..b846915 --- /dev/null +++ b/convert/visitor/CtorVisitor.h @@ -0,0 +1,34 @@ +#pragma once + +#include "clang/AST/DeclCXX.h" +#include "clang/Rewrite/Core/Rewriter.h" + +#include "../info/FeatureInfo.h" +#include "../info/ConstructorInfo.h" +#include "../handler/StrictRewriter.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; + +class CtorVisitor { + public: + explicit CtorVisitor(clang::Rewriter &rewriter) : // NOLINT + rewriter_(rewriter) { + } + + void visit(clang::CXXConstructorDecl* cxx_constructor_decl, FeatureInfo* feature_info_ptr); + void recursive_visit(clang::Stmt* stmt, FeatureInfo* info_ptr, Env* env_ptr); + + private: + void process(clang::CXXMemberCallExpr* cxx_member_call_expr, FeatureInfo* feature_info_ptr, Env* env_ptr); + + protected: + StrictRewriter rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/ExtractMethodVisitor.cpp b/convert/visitor/ExtractMethodVisitor.cpp new file mode 100644 index 0000000..04400a2 --- /dev/null +++ b/convert/visitor/ExtractMethodVisitor.cpp @@ -0,0 +1,728 @@ +#include +#include +#include +#include +#include +#include +#include "../Env.h" +#include "../Tool.h" +#include "../handler/OverviewHandler.h" +#include "../handler/AdlogFieldHandler.h" +#include "ExtractMethodVisitor.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void ExtractMethodVisitor::visit(const clang::CXXMethodDecl* cxx_method_decl, FeatureInfo* feature_info_ptr) { + auto config = GlobalConfig::Instance(); + + std::string method_name = cxx_method_decl->getNameAsString(); + + if (config->dump_ast) { + LOG(INFO) << "dump method with body: " << method_name; + cxx_method_decl->dump(); + + if (cxx_method_decl->getBody() != nullptr) { + cxx_method_decl->getBody()->dump(); + } + } + + if (starts_with(method_name, "~") || + starts_with(method_name, "operator") || + method_name == feature_info_ptr->feature_name()) { + return; + } + + StrictRewriter strict_rewriter(rewriter_); + // LazyReplace 里会保存 Env*, 所以 Handler 必须是局部变量,否则会 core + AdlogFieldHandler adlog_field_handler(rewriter_); + FeatureInfo& feature_info = *feature_info_ptr; + + Env env; + ConstructorInfo& constructor_info = feature_info.mutable_constructor_info(); + env.set_method_name(method_name); + + get_overview_info(feature_info_ptr, cxx_method_decl, method_name); + + if (const auto& int_list_info = feature_info_ptr->common_info_multi_int_list()) { + const auto& map_vec_connections = int_list_info->map_vec_connections(); + LOG(INFO) << "after overview map_vec_connections size: " << map_vec_connections.size(); + } + + env.set_feature_info(feature_info_ptr); + env.add_template_var_names(feature_info_ptr->template_var_names()); + env.set_constructor_info(&constructor_info); + + LOG(INFO) << "========================= start process method, name: " << method_name + << " =================================="; + clang::Stmt *body = cxx_method_decl->getBody(); + + std::string origin_body_str = strict_rewriter.getRewrittenText(body); + + if (config->use_reco_user_info) { + config->rewrite_reco_user_info = false; + recursive_visit (body, &adlog_field_handler, &env); + + // 将新增变量加到 body 最开始。 + std::string body_str = strict_rewriter.getRewrittenText(body); + std::string new_body_str = insert_top_new_defs_to_body(body_str, + env.get_all_new_def_var_names(), + env.get_all_new_defs()); + + feature_info.set_extract_method_content(new_body_str); + + config->rewrite_reco_user_info = true; + } + + recursive_visit (body, &adlog_field_handler, &env); + + if (method_name == "Extract") { + process_get_bs(&strict_rewriter, &env, find_body_begin_loc(body), body); + process_deleted_var(&strict_rewriter, &env); + + process_constructor(&strict_rewriter, &env); + + process_update_action(&strict_rewriter, &env); + process_new_field_def(&strict_rewriter, &env, cxx_method_decl->getEndLoc()); + + constructor_info.set_body_content(strict_rewriter.getRewrittenText(constructor_info.body())); + + if (config->use_reco_user_info) { + std::string reco_body_str = strict_rewriter.getRewrittenText(body); + + std::ostringstream oss_reco; + oss_reco << "if (bs == nullptr) {\n" + << " return;\n" + << " }\n\n" + << env.get_all_new_defs() + << "\n"; + + std::regex p_bs("if (bs == nullptr) \\{[\\s\\S.]*?\\}"); + reco_body_str = std::regex_replace(reco_body_str, p_bs, oss_reco.str()); + + feature_info.set_reco_extract_body(reco_body_str); + + std::string normal_body_str = feature_info.extract_method_content(); + + std::regex p("(adlog\\.|ad_log\\.)"); + normal_body_str = std::regex_replace(normal_body_str, p, "bslog."); + + size_t pos = normal_body_str.find("{"); + if (pos != std::string::npos) { + std::ostringstream oss_normal; + oss_normal << "{\n" + << " if (FLAGS_use_bs_reco_userinfo) {\n" + << " ExtractWithBSRecoUserInfo(bslog, pos, result);\n" + << " return;\n" + << " }\n\n" + << " auto bs = bslog.GetBS();\n" + << " if (bs == nullptr) { return ; }\n \n" + << normal_body_str.substr(pos + 1); + + feature_info.set_extract_method_content(oss_normal.str()); + } else { + LOG(ERROR) << "cannot find { at begin of body"; + } + } else { + feature_info.set_extract_method_content(strict_rewriter.getRewrittenText(body)); + } + + if (!feature_info.is_template()) { + std::ostringstream oss_ctor; + oss_ctor << feature_info.feature_name() << "(" << constructor_info.joined_params() << ");"; + + strict_rewriter.ReplaceText(constructor_info.source_range(), oss_ctor.str()); + + if (config->use_reco_user_info) { + std::ostringstream oss_extract_reco; + oss_extract_reco << ";\n" + << "void ExtractWithBSRecoUserInfo(const BSLog& bslog, size_t pos," + << " std::vector* result);\n"; + strict_rewriter.ReplaceText(body, oss_extract_reco.str()); + } else { + strict_rewriter.ReplaceText(body, ";"); + } + } + } else if (is_infer_filter_method(cxx_method_decl)) { + // 替换参数 item 为 bslog + process_infer_filter_param(cxx_method_decl, &strict_rewriter); + process_infer_filter_root_env(&strict_rewriter, &env, find_body_begin_loc(body)); + + auto& infer_filter_funcs = config->infer_filter_funcs; + infer_filter_funcs[method_name] = rewriter_.getRewrittenText(find_source_range(body)); + + strict_rewriter.ReplaceText(find_source_range(body), ";"); + } else { + std::vector params_text; + process_params(cxx_method_decl, &strict_rewriter, &env, ¶ms_text); + std::string method_decl = method_name + "(" + absl::StrJoin(params_text, ",\n") + ")"; + bool is_combine_user = feature_info_ptr->is_combine(); + const MethodInfo* method_info = feature_info.find_method_info(method_name); + if (method_info->is_return_adlog_user_field()) { + is_combine_user &= true; + } + + std::string bs_return_type = strict_rewriter.getRewrittenText(cxx_method_decl->getReturnTypeSourceRange()); + if (cxx_method_decl->getReturnType().getTypePtr()->isVoidType() || + tool::is_reco_proto(cxx_method_decl->getReturnType())) { + } else { + bs_return_type = tool::get_bs_type_str(cxx_method_decl->getReturnType(), is_combine_user); + } + + feature_info.add_other_method(method_name, + cxx_method_decl->getReturnType(), + bs_return_type, + method_decl, + strict_rewriter.getRewrittenText(body)); + if (!feature_info.is_template()) { + strict_rewriter.ReplaceText(cxx_method_decl->getSourceRange(), + bs_return_type + " " + method_decl + ";"); + } else { + strict_rewriter.ReplaceText(cxx_method_decl->getReturnTypeSourceRange(), bs_return_type); + } + } + + LOG(INFO) << "process method done, feature_name: " << feature_info_ptr->feature_name() + << ", method_name: " << method_name; +} + +void ExtractMethodVisitor::visit_params(const clang::CXXMethodDecl* cxx_method_decl, Env* env_ptr) { + if (env_ptr == nullptr) { + return; + } + + const std::string& method_name = env_ptr->get_method_name(); + if (method_name == "Extract") { + return; + } + + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo& method_info = feature_info->touch_method_info(method_name); + + for (size_t i = 0; i < cxx_method_decl->getNumParams(); i++) { + const clang::ParmVarDecl* param_decl = cxx_method_decl->getParamDecl(i); + std::string param_name = param_decl->getNameAsString(); + if (param_name.size() == 0) { + continue; + } + + if (tool::is_repeated_action_info(param_decl->getType())) { + method_info.add_new_action_param(i, param_name); + } + } + } +} + +void ExtractMethodVisitor::get_overview_info(FeatureInfo* feature_info_ptr, + const clang::CXXMethodDecl* cxx_method_decl, + const std::string& method_name) { + Env env; + env.set_method_name(method_name); + env.set_feature_info(feature_info_ptr); + + visit_params(cxx_method_decl, &env); + + OverviewHandler overview_handler; + recursive_visit(cxx_method_decl->getBody(), &overview_handler, &env); + + feature_info_ptr->clear_new_field_defs(); +} + +void ExtractMethodVisitor::visit_loop_init(clang::ForStmt* for_stmt, + Env* env_ptr) { + clang::Stmt* init_stmt = for_stmt->getInit(); + if (init_stmt == nullptr) { + LOG(INFO) << "for_stmt has no init!"; + return; + } + + clang::VarDecl* var_decl = nullptr; + if (clang::DeclStmt* decl_stmt = dyn_cast(init_stmt)) { + if (decl_stmt->isSingleDecl()) { + var_decl = dyn_cast(decl_stmt->getSingleDecl()); + } else { + var_decl = dyn_cast(*(decl_stmt->decl_begin())); + } + } + + visit_loop_var_decl(var_decl, env_ptr); +} + +void ExtractMethodVisitor::visit_loop_init(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr) { + clang::VarDecl* var_decl = cxx_for_range_stmt->getLoopVariable(); + visit_loop_var_decl(var_decl, env_ptr); +} + +void ExtractMethodVisitor::visit_loop_var_decl(clang::VarDecl* var_decl, + Env* env_ptr) { + if (var_decl != nullptr && var_decl->hasInit()) { + std::string for_var_name = var_decl->getNameAsString(); + env_ptr->add_loop_var(for_var_name); + clang::Expr* init_expr = var_decl->getInit(); + env_ptr->add(for_var_name, init_expr); + LOG(INFO) << "for range, var: " << for_var_name + << ", expr: " << stmt_to_string(init_expr) + << ", type: " << var_decl->getType().getAsString() + << ", loop_var_type; " << tool::get_builtin_type_str(var_decl->getType()); + if (auto& loop_info = env_ptr->cur_mutable_loop_info()) { + loop_info->set_loop_var_type(tool::get_builtin_type_str(var_decl->getType())); + } + } else { + LOG(INFO) << "loop var_decl is nullptr!"; + } +} + +void ExtractMethodVisitor::process_deleted_var(StrictRewriter* rewriter_ptr, Env* env_ptr) { + if (rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + const std::set& deleted_vars = env_ptr->deleted_vars(); + for (const std::string& name : deleted_vars) { + clang::Stmt* decl_stmt = env_ptr->get_decl_stmt(name); + if (decl_stmt != nullptr) { + rewriter_ptr->RemoveText(decl_stmt); + } + } +} + +void ExtractMethodVisitor::process_get_bs(StrictRewriter* rewriter_ptr, + Env* env_ptr, + absl::optional body_begin_loc, + clang::Stmt* body) { + if (rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + std::ostringstream oss_get_bs; + + oss_get_bs << "auto bs = bslog.GetBS();\n" + << " if (bs == nullptr) { return ; }\n \n"; + // << env_ptr->get_all_new_defs(); + if (env_ptr->first_if_stmt() != nullptr) { + if (env_ptr->is_first_if_check_item_pos_include_cond()) { + LOG(INFO) << "is_first_if_check_item_pos_include_cond: " << env_ptr->is_first_if_check_item_pos_include_cond(); + rewriter_ptr->InsertTextBefore(env_ptr->first_if_stmt()->getBeginLoc(), oss_get_bs.str()); + rewriter_ptr->InsertTextAfterToken(env_ptr->first_if_stmt()->getEndLoc(), env_ptr->get_all_new_defs()); + } else if (env_ptr->is_first_if_check_item_pos_cond()) { + LOG(INFO) << "is_first_if_check_item_pos_cond: " << env_ptr->is_first_if_check_item_pos_cond(); + rewriter_ptr->ReplaceText(env_ptr->first_if_stmt(), oss_get_bs.str()); + rewriter_ptr->InsertTextAfterToken(env_ptr->first_if_stmt()->getEndLoc(), env_ptr->get_all_new_defs()); + } else { + // 逻辑有点复杂,先删掉 if 前面的语句,再整体替换。 + std::string text = + oss_get_bs.str() + env_ptr->get_all_new_defs() + get_rewritten_text_before_first_if(body); + remove_text_before_first_if(body); + rewriter_ptr->InsertTextBefore(env_ptr->first_if_stmt()->getBeginLoc(), text); + } + } else { + oss_get_bs << env_ptr->get_all_new_defs() + << get_rewritten_text_before_first_if(body); + + remove_text_before_first_if(body); + + // StrictRewriter 不能替换两次,因此这个地方不能用 rewriter_ptr + rewriter_.ReplaceText(find_source_range(body), tool::add_surround_big_parantheses(oss_get_bs.str())); + } +} + +std::string ExtractMethodVisitor::get_rewritten_text_before_first_if(clang::Stmt* body) { + std::ostringstream oss; + + if (clang::CompoundStmt* compound_stmt = dyn_cast(body)) { + for (auto it = compound_stmt->body_begin(); it != compound_stmt->body_end(); it++) { + if (clang::IfStmt* if_stmt = dyn_cast(*it)) { + break; + } + + oss << fix_semicolon(rewriter_.getRewrittenText(find_source_range(*it))); + } + } + + return oss.str(); +} + +void ExtractMethodVisitor::remove_text_before_first_if(clang::Stmt* body) { + if (clang::CompoundStmt* compound_stmt = dyn_cast(body)) { + for (auto it = compound_stmt->body_begin(); it != compound_stmt->body_end(); it++) { + if (clang::IfStmt* if_stmt = dyn_cast(*it)) { + return; + } + + rewriter_.RemoveText(find_source_range(*it)); + } + } +} + +void ExtractMethodVisitor::process_constructor(StrictRewriter* rewriter_ptr, Env* env_ptr) { + if (rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + const auto feature_info = env_ptr->get_feature_info(); + + LOG(INFO) << "start process constructor"; + + std::ostringstream oss; + oss << "\n "; + if (auto constructor_info = env_ptr->mutable_constructor_info()) { + constructor_info->fix_common_info_enums(); + + std::vector to_add; + + for (const std::string& bs_field_enum : constructor_info->bs_field_enums()) { + to_add.push_back(bs_field_enum); + } + + for (const auto& common_info : constructor_info->common_info_enums()) { + std::string bs_enum_str = common_info.bs_enum_str(); + if (bs_enum_str.size() > 0) { + if (common_info.if_stmt_end()) { + std::ostringstream oss_enum; + oss_enum << "attr_metas_.emplace_back(BSFieldEnum::" + << bs_enum_str + << ");\n "; + rewriter_ptr->InsertTextBefore(common_info.if_stmt_end().value(), oss_enum.str()); + } else if (bs_enum_str.size() > 0) { + to_add.push_back(bs_enum_str); + } + } + + if (feature_info != nullptr) { + if (feature_info->has_common_info_multi_int_list()) { + if (absl::optional enum_value = find_common_attr_int_value(common_info.enum_ref())) { + rewriter_ptr->ReplaceText(common_info.enum_ref(), std::to_string(*enum_value)); + } else { + LOG(INFO) << "cannot find common info enum from expr: " + << stmt_to_string(common_info.enum_ref()); + } + } + } + } + + auto compare_str = [](const std::string& a, const std::string& b) { + if (a.size() != b.size()) { + return a.size() < b.size(); + } + return a < b; + }; + + int reco_field_cnt = 0; + std::sort(to_add.begin(), to_add.end(), compare_str); + for (size_t i = 0; i < to_add.size(); i++) { + if (!tool::is_str_from_reco_user_info(to_add[i])) { + oss << "attr_metas_.emplace_back(BSFieldEnum::" << to_add[i] << ");\n "; + } else { + reco_field_cnt += 1; + } + } + + if (reco_field_cnt > 0) { + for (size_t i = 0; i < to_add.size(); i++) { + if (tool::is_str_from_reco_user_info(to_add[i])) { + oss << "attr_metas_.emplace_back(BSFieldEnum::" << to_add[i] + << ");\n "; + } + } + } + + oss << "\n"; + + for (const std::string& leaf : constructor_info->middle_node_leafs()) { + oss << leaf << ".fill_attr_metas(&attr_metas_);\n "; + } + + if (constructor_info->has_get_norm_query()) { + oss << "bs_util.BSFillNormQueryAttrMeta(&attr_metas_);\n "; + } + + if (feature_info != nullptr) { + const auto& new_field_defs = feature_info->new_field_defs(); + for (auto it = new_field_defs.begin(); it != new_field_defs.end(); it++) { + if (tool::is_from_info_util(it->second.name())) { + oss << it->second.name() << ".fill_attr_metas(&attr_metas_);\n "; + } + if (tool::is_from_info_util(it->second.exists_name())) { + oss << it->second.exists_name() << ".fill_attr_metas(&attr_metas_);\n "; + } + } + } + + rewriter_ptr->InsertTextBefore(constructor_info->body_end(), oss.str()); + } +} + +void ExtractMethodVisitor::process_update_action(StrictRewriter* rewriter_ptr, Env* env_ptr) { + if (rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + if (const auto feature_info = env_ptr->get_feature_info()) { + clang::BinaryOperator* update_action_stmt = feature_info->find_update_action_stmt(); + if (update_action_stmt != nullptr) { + std::vector arr = absl::StrSplit(stmt_to_string(update_action_stmt), "="); + if (arr.size() == 2) { + std::string x = std::regex_replace(arr[1], std::regex(" "), ""); + if (is_integer(x)) { + int action = std::stoi(x); + LOG(INFO) << "find action: " << action + << ", update_action_stmt: " << stmt_to_string(update_action_stmt); + + std::ostringstream oss; + const auto& new_field_defs = feature_info->new_field_defs(); + + for (auto it = new_field_defs.begin(); it != new_field_defs.end(); it++) { + if (const auto& expr_type = it->second.expr_type()) { + if (*expr_type == ExprType::ACTION_DETAIL_FIXED_GET) { + oss << it->second.name() << ".Update(" << action << ");\n "; + } else if (*expr_type == ExprType::ACTION_DETAIL_FIXED_HAS) { + oss << it->second.name() << ".UpdateHas(" << action << ");\n "; + } + } + } + + if (oss.str().size() > 0) { + rewriter_ptr->ReplaceText(update_action_stmt, oss.str()); + } + } + } + } + } +} + +void ExtractMethodVisitor::process_new_field_def(StrictRewriter* rewriter_ptr, + Env* env_ptr, + clang::SourceLocation extract_method_end) { + if(rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + + if (const auto feature_info = env_ptr->get_feature_info()) { + const std::unordered_map& new_field_defs = feature_info->new_field_defs(); + if (new_field_defs.size() == 0) { + return; + } + + std::ostringstream oss; + oss << "\n\n private:\n"; + + for (auto it = new_field_defs.begin(); it != new_field_defs.end(); it++) { + if (it->second.var_def().size() > 0) { + oss << it->second.var_def() << ";\n"; + } + if (it->second.exists_var_def().size() > 0) { + oss << it->second.exists_var_def() << ";\n"; + } + } + + if (oss.str().size() > 0) { + // 注意: 有些模板类参数来自 field, 因此新增 field_def 必须放到 field_end_loc 之后。 + std::string new_text = oss.str(); + + if (absl::optional field_end_loc = feature_info->last_field_decl_end_log()) { + rewriter_ptr->InsertTextAfterToken(*field_end_loc, std::string(";") + new_text); + } else { + if (feature_info->has_cc_file()) { + new_text = std::string(";") + new_text; + } + rewriter_ptr->InsertTextAfterToken(extract_method_end, new_text); + } + } + } +} + +absl::optional ExtractMethodVisitor::find_body_begin_loc(clang::Stmt* stmt) { + if (stmt == nullptr) { + return absl::nullopt; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); start++) { + return absl::optional((*start)->getBeginLoc()); + } + } + + return absl::optional(stmt->getBeginLoc()); +} + +void ExtractMethodVisitor::reset_rewrite_buffer(Env* env_ptr) { + if (const auto feature_info = env_ptr->get_feature_info()) { + clang::FileID file_id = feature_info->file_id(); + clang::RewriteBuffer& rewrite_buffer = rewriter_.getEditBuffer(file_id); + const std::string& origin_buffer = feature_info->origin_buffer(); + rewrite_buffer.Initialize(origin_buffer.data(), origin_buffer.data() + origin_buffer.size()); + } +} + +void ExtractMethodVisitor::process_params(const clang::CXXMethodDecl* cxx_method_decl, + StrictRewriter* rewriter_ptr, + Env* env_ptr, + std::vector* params_text_ptr) { + if (cxx_method_decl == nullptr || + rewriter_ptr == nullptr || + env_ptr == nullptr || + params_text_ptr == nullptr) { + return; + } + + const std::string& method_name = cxx_method_decl->getNameAsString(); + if (method_name == "Extract") { + return; + } + + if (auto feature_info = env_ptr->mutable_feature_info()) { + MethodInfo& method_info = feature_info->touch_method_info(method_name); + for (size_t i = 0; i < cxx_method_decl->getNumParams(); i++) { + const clang::ParmVarDecl* param_decl = cxx_method_decl->getParamDecl(i); + std::string param_name = param_decl->getNameAsString(); + if (param_name.size() == 0) { + continue; + } + + if (param_name == "adlog" || param_name == "ad_log") { + params_text_ptr->push_back("const BSLog& bslog"); + } else { + if (tool::is_repeated_action_info(param_decl->getType())) { + const NewActionParam& new_action_param = method_info.find_new_action_param(i); + if (new_action_param.origin_name() == param_name) { + const std::vector& new_params = new_action_param.new_params(); + for (size_t j = 0; j < new_params.size(); j++) { + std::ostringstream oss; + + if (new_params[j].field() == "size") { + oss << new_params[j].inner_type_str() << " " << new_params[j].name(); + } else { + oss << new_params[j].const_ref_str(); + } + + params_text_ptr->push_back(oss.str()); + } + } + } else if (tool::is_common_info_struct(param_decl->getType())) { + // 目前都是 user 特征,不需要区分 combine + // 如: helper 函数 + // void helper(FeaturePrefix prefix, + // const ::bs::auto_cpp_rewriter::CommonInfoAttr& userAttr, + // std::vector* result); + if (const auto& common_info_prepare = method_info.common_info_prepare()) { + if (const auto& common_info_method_name = common_info_prepare->method_name()) { + std::ostringstream oss; + + std::string bs_type_str = CommonAttrInfo::get_bs_type_str(*common_info_method_name, false); + oss << "const " << bs_type_str << "& " << param_name; + + params_text_ptr->push_back(oss.str()); + + if (feature_info->is_template()) { + rewriter_ptr->ReplaceText(param_decl->getSourceRange(), oss.str()); + } + } else { + LOG(INFO) << "cannot find common_inof_method_name from common info prepare, method_name: " + << method_name; + } + } else { + LOG(INFO) << "cannot find common info prepare, method_name: " << method_name; + } + } else { + LOG(INFO) << "param, i: " << i << ", type: " << param_decl->getType().getAsString() + << ", param_name: " << param_name; + params_text_ptr->push_back(param_decl->getType().getAsString() + " " + param_name); + } + } + } + } +} + +bool ExtractMethodVisitor::is_infer_filter_method(const clang::CXXMethodDecl* cxx_method_decl) { + if (cxx_method_decl == nullptr || cxx_method_decl->getNumParams() != 2) { + return false; + } + + const clang::ParmVarDecl* param0 = cxx_method_decl->getParamDecl(0); + const clang::ParmVarDecl* param1 = cxx_method_decl->getParamDecl(1); + + // 更准确的判断需要加上类型,先简单处理,只根据参数名称判断。 + if (param0->getNameAsString() == "item" && param1->getNameAsString() == "filter_condition") { + return true; + } + + return false; +} + +void ExtractMethodVisitor::process_infer_filter_param(const clang::CXXMethodDecl* cxx_method_decl, + StrictRewriter* rewriter_ptr) { + if (cxx_method_decl == nullptr || rewriter_ptr == nullptr) { + return; + } + + if (cxx_method_decl->getNumParams() == 2) { + const clang::ParmVarDecl* param0 = cxx_method_decl->getParamDecl(0); + const clang::ParmVarDecl* param1 = cxx_method_decl->getParamDecl(1); + + rewriter_ptr->ReplaceText(param0->getSourceRange(), "const SampleInterface& bs"); + rewriter_ptr->ReplaceText(param1->getSourceRange(), "const FilterCondition& filter_condition, size_t pos"); + } +} + +void ExtractMethodVisitor::process_infer_filter_root_env( + StrictRewriter* rewriter_ptr, + Env* env_ptr, + absl::optional body_begin_loc) { + if (rewriter_ptr == nullptr || env_ptr == nullptr) { + return; + } + + if (!body_begin_loc) { + return; + } + + rewriter_ptr->InsertTextBefore(*body_begin_loc, env_ptr->get_all_new_defs()); +} + +std::string ExtractMethodVisitor::insert_top_new_defs_to_body( + const std::string& body_str, + const std::vector& new_def_var_names, + const std::string& new_defs +) const { + size_t pos = std::string::npos; + + for (size_t i = 0; i < new_def_var_names.size(); i++) { + size_t index = body_str.find(new_def_var_names[i]); + if (index < pos) { + pos = index; + } + } + + if (pos != std::string::npos && pos < body_str.size()) { + int i = pos; + for (; i > 0; i--) { + // 最早出现的前一行 + if (body_str[i] == '\n') { + break; + } + } + + if (i > 0 && i < body_str.size()) { + std::ostringstream oss; + oss << body_str.substr(0, i) + << "\n" + << new_defs + << "\n" + << body_str.substr(i); + + return oss.str(); + } + } + + LOG(ERROR) << "cannot find new def var names in body_str, new_def_var_names: " + << absl::StrJoin(new_def_var_names, ", "); + return body_str; +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/ExtractMethodVisitor.h b/convert/visitor/ExtractMethodVisitor.h new file mode 100644 index 0000000..47bb272 --- /dev/null +++ b/convert/visitor/ExtractMethodVisitor.h @@ -0,0 +1,367 @@ +#pragma once + +#include +#include + +#include "clang/AST/ExprCXX.h" +#include "clang/AST/AST.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Basic/SourceLocation.h" + +#include "../Env.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class ExtractMethodVisitor { + public: + explicit ExtractMethodVisitor(clang::Rewriter &rewriter) : // NOLINT + rewriter_(rewriter) { + } + + void visit_params(const clang::CXXMethodDecl* cxx_method_decl, Env* env_ptr); + void visit(const clang::CXXMethodDecl* cxx_method_decl, FeatureInfo* feature_info_ptr); + + void get_overview_info(FeatureInfo* feature_info_ptr, + const clang::CXXMethodDecl* cxx_method_decl, + const std::string& method_name); + + template + void recursive_visit(clang::Stmt *stmt, + Handler* handler_ptr, + Env* env_ptr); + + template + void visit_loop(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr); + + template + void visit_loop_range(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr); + + void visit_loop_init(clang::ForStmt* for_stmt, + Env* env_ptr); + + void visit_loop_init(clang::CXXForRangeStmt* cxx_for_range_stmt, + Env* env_ptr); + + void visit_loop_var_decl(clang::VarDecl* var_decl, + Env* env_ptr); + + void process_deleted_var(StrictRewriter* rewriter_ptr, Env* env_ptr); + void process_get_bs(StrictRewriter* rewriter_ptr, + Env* env_ptr, + absl::optional body_begin_loc, + clang::Stmt* body); + void process_constructor(StrictRewriter* rewriter_ptr, Env* env_ptr); + void process_update_action(StrictRewriter* rewriter_ptr, Env* env_ptr); + void process_new_field_def(StrictRewriter* rewriter_ptr, + Env* env_ptr, + clang::SourceLocation extract_method_end); + + absl::optional find_body_begin_loc(clang::Stmt* stmt); + + void reset_rewrite_buffer(Env* env_ptr); + + void process_params(const clang::CXXMethodDecl* cxx_method_decl, + StrictRewriter* rewriter_ptr, + Env* env_ptr, + std::vector* params_text_ptr); + + void remove_text_before_first_if(clang::Stmt* body); + std::string get_rewritten_text_before_first_if(clang::Stmt* body); + + /// 用于处理 infer 的 ItemFilter + /// 如果第一个参数是 `const ItemAdaptorBase& item`, 第二个参数是 `const FilterCondition& filter_condition`, + /// 则返回 true,否则返回 false。 + bool is_infer_filter_method(const clang::CXXMethodDecl* cxx_method_decl); + /// 将 `const ItemAdaptorBase& item` 替换为 `const SampleInterface& bs`。 + void process_infer_filter_param(const clang::CXXMethodDecl* cxx_method_decl, + StrictRewriter* rewriter_ptr); + /// 将 root env 的变量添加到最开始。 + void process_infer_filter_root_env(StrictRewriter* rewriter_ptr, + Env* env_ptr, + absl::optional body_begin_loc); + + /// 将最外层的新增变量定义添加到 body 中合适的位置。取包含所有变量的最早的行的前一行为其位置。 + /// + /// 如 + /// if (info_exists) { ... } + /// + /// 则将 info_exists 定义添加到 if 前。 + std::string insert_top_new_defs_to_body( + const std::string& body_str, + const std::vector& new_def_var_names, + const std::string& new_defs) const; + + protected: + clang::Rewriter &rewriter_; +}; + +template +void ExtractMethodVisitor::recursive_visit(clang::Stmt *stmt, + Handler* handler_ptr, + Env* env_ptr) { + if (!stmt) { + return; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); + start++) { + recursive_visit(*start, handler_ptr, env_ptr); + handler_ptr->process(*start, env_ptr); + } + + } else if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + env_ptr->update(decl_stmt); + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->hasInit()) { + recursive_visit(var_decl->getInit(), handler_ptr, env_ptr); + } + if (const clang::Expr* init_expr = var_decl->getAnyInitializer()) { + recursive_visit(const_cast(init_expr), handler_ptr, env_ptr); + } + } + + handler_ptr->process(decl_stmt, env_ptr); + + // decl_stmt 比较特殊, decl_info 只维持当前变量, 访问完当前 decl_stmt 后立即销毁。 + env_ptr->clear_decl_info(); + + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_construct_expr->getNumArgs(); i++) { + recursive_visit(cxx_construct_expr->getArg(i), handler_ptr, env_ptr); + } + + } else if (clang::ExprWithCleanups* expr_with_cleanups = dyn_cast(stmt)) { + recursive_visit(expr_with_cleanups->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + recursive_visit(implicit_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXBindTemporaryExpr* cxx_bind_temporary_expr = + dyn_cast(stmt)) { + recursive_visit(cxx_bind_temporary_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXNullPtrLiteralExpr* cxx_null_ptr_literal_expr = + dyn_cast(stmt)) { + handler_ptr->process(cxx_null_ptr_literal_expr, env_ptr); + + } else if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(stmt)) { + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_member_call_expr->getArg(i), handler_ptr, env_ptr); + } + handler_ptr->process(cxx_member_call_expr, env_ptr); + + } else if (clang::CXXOperatorCallExpr* cxx_operator_call_expr = + dyn_cast(stmt)) { + env_ptr->update(cxx_operator_call_expr); + for (size_t i = 0; i < cxx_operator_call_expr->getNumArgs(); i++) { + recursive_visit(cxx_operator_call_expr->getArg(i), handler_ptr, env_ptr); + } + + handler_ptr->process(cxx_operator_call_expr, env_ptr); + env_ptr->clear_binary_op_info(); + + } else if (clang::CallExpr* call_expr = dyn_cast(stmt)) { + // 会包含 CXXMemberCallExpr 和 CXXOperatorCallExpr, 此处逻辑需要再仔细考虑下 + for (unsigned i = 0; i < call_expr->getNumArgs(); i++) { + recursive_visit(call_expr->getArg(i), handler_ptr, env_ptr); + } + + for (unsigned i = 0; i < call_expr->getNumArgs(); i++) { + handler_ptr->process(call_expr->getArg(i), env_ptr); + } + + handler_ptr->process(call_expr, env_ptr); + + } else if (clang::CXXDependentScopeMemberExpr *cxx_dependent_scope_member_expr = + dyn_cast(stmt)) { + handler_ptr->process(cxx_dependent_scope_member_expr, env_ptr); + + } else if (clang::ArraySubscriptExpr* array_subscript_expr = dyn_cast(stmt)) { + handler_ptr->process(array_subscript_expr, env_ptr); + + } else if (clang::UnaryOperator *unary_operator = dyn_cast(stmt)) { + recursive_visit(unary_operator->getSubExpr(), handler_ptr, env_ptr); + handler_ptr->process(unary_operator, env_ptr); + + } else if (clang::MemberExpr* member_expr = dyn_cast(stmt)) { + handler_ptr->process(member_expr, env_ptr); + + } else if (clang::BinaryOperator* binary_operator = dyn_cast(stmt)) { + env_ptr->update(binary_operator); + if (binary_operator->isAssignmentOp()) { + env_ptr->update_assign_info(binary_operator); + } + + recursive_visit(binary_operator->getLHS(), handler_ptr, env_ptr); + handler_ptr->process(binary_operator->getLHS(), env_ptr); + + recursive_visit(binary_operator->getRHS(), handler_ptr, env_ptr); + handler_ptr->process(binary_operator->getRHS(), env_ptr); + + handler_ptr->process(binary_operator, env_ptr); + + env_ptr->clear_binary_op_info(); + if (binary_operator->isAssignmentOp()) { + env_ptr->clear_assign_info(); + } + + } else if (clang::ParenExpr* paren_expr = dyn_cast(stmt)) { + recursive_visit(paren_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::ImplicitCastExpr* implicit_cast_expr = dyn_cast(stmt)) { + recursive_visit(implicit_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::MaterializeTemporaryExpr* materialize_temporary_expr = + dyn_cast(stmt)) { + recursive_visit(materialize_temporary_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXThisExpr* cxx_this_expr = dyn_cast(stmt)) { + handler_ptr->process(cxx_this_expr, env_ptr); + + } else if (clang::ReturnStmt* return_stmt = dyn_cast(stmt)) { + recursive_visit(return_stmt->getRetValue(), handler_ptr, env_ptr); + + handler_ptr->process(return_stmt, env_ptr); + + } else if (clang::BreakStmt* break_stmt = dyn_cast(stmt)) { + handler_ptr->process(break_stmt, env_ptr); + + } else if (clang::ContinueStmt* continue_stmt = dyn_cast(stmt)) { + handler_ptr->process(continue_stmt, env_ptr); + + } else if (clang::IfStmt* if_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(if_stmt); + + clang::Expr* cond_expr = if_stmt->getCond(); + cond_expr = cond_expr->IgnoreImpCasts(); + recursive_visit(cond_expr, handler_ptr, &new_env); + + clang::Stmt* then_expr = if_stmt->getThen(); + clang::Stmt* else_expr = if_stmt->getElse(); + + auto& if_info = new_env.cur_mutable_if_info(); + if_info->set_if_stage(IfStage::THEN); + recursive_visit(then_expr, handler_ptr, &new_env); + + if_info->set_if_stage(IfStage::ELSE); + recursive_visit(else_expr, handler_ptr, &new_env); + + if_info->set_if_stage(IfStage::END); + + handler_ptr->process(if_stmt, &new_env); + + } else if (clang::ForStmt* for_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(for_stmt); + visit_loop(for_stmt, handler_ptr, &new_env); + + } else if (clang::CXXForRangeStmt* cxx_for_range_stmt = dyn_cast(stmt)) { + Env new_env(env_ptr); + new_env.update(cxx_for_range_stmt); + + visit_loop(cxx_for_range_stmt, handler_ptr, &new_env); + + } else if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(stmt)) { + handler_ptr->process(decl_ref_expr, env_ptr); + + } else if (clang::ConstantExpr* constant_expr = dyn_cast(stmt)) { + recursive_visit(constant_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::IntegerLiteral* integer_literal = dyn_cast(stmt)) { + handler_ptr->process(integer_literal, env_ptr); + + } else if (clang::CStyleCastExpr* c_style_cast_expr = dyn_cast(stmt)) { + recursive_visit(c_style_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::CXXStaticCastExpr* cxx_static_cast_expr = dyn_cast(stmt)) { + recursive_visit(cxx_static_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + } else if (clang::SwitchStmt* switch_stmt = dyn_cast(stmt)) { + recursive_visit(switch_stmt->getCond(), handler_ptr, env_ptr); + recursive_visit(switch_stmt->getBody(), handler_ptr, env_ptr); + + handler_ptr->process(switch_stmt, env_ptr); + + } else if (clang::CaseStmt* case_stmt = dyn_cast(stmt)) { + env_ptr->update(case_stmt); + recursive_visit(case_stmt->getLHS(), handler_ptr, env_ptr); + recursive_visit(case_stmt->getRHS(), handler_ptr, env_ptr); + recursive_visit(case_stmt->getSubStmt(), handler_ptr, env_ptr); + + handler_ptr->process(case_stmt, env_ptr); + env_ptr->clear_switch_case_info(); + + } else if (clang::ConditionalOperator* conditional_operator = dyn_cast(stmt)) { + recursive_visit(conditional_operator->getCond(), handler_ptr, env_ptr); + recursive_visit(conditional_operator->getTrueExpr(), handler_ptr, env_ptr); + recursive_visit(conditional_operator->getFalseExpr(), handler_ptr, env_ptr); + + handler_ptr->process(conditional_operator, env_ptr); + + } else if (clang::CXXFunctionalCastExpr* cxx_functional_cast_expr = + dyn_cast(stmt)) { + recursive_visit(cxx_functional_cast_expr->getSubExpr(), handler_ptr, env_ptr); + + handler_ptr->process(cxx_functional_cast_expr, env_ptr); + + } else if (clang::GNUNullExpr* gnu_null_expr = dyn_cast(stmt)) { + handler_ptr->process(gnu_null_expr, env_ptr); + + } else { + LOG(INFO) << "unsupported stmt, trated as string: " << stmt_to_string(stmt); + } +} + +template +void ExtractMethodVisitor::visit_loop(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr) { + // 添加 loop_var + visit_loop_init(loop_stmt, env_ptr); + + // loop init + visit_loop_range(loop_stmt, handler_ptr, env_ptr); + + auto& loop_info = env_ptr->mutable_loop_info(); + if (loop_info) { + loop_info->set_loop_state(LoopStage::BODY); + } + + clang::Stmt* body = loop_stmt->getBody(); + + for (auto body_it = body->child_begin(); body_it != body->child_end(); body_it++) { + recursive_visit(*body_it, handler_ptr, env_ptr); + } + + handler_ptr->process(loop_stmt, env_ptr); + + env_ptr->pop_loop_var(); +} + +template +void ExtractMethodVisitor::visit_loop_range(T* loop_stmt, + Handler* handler_ptr, + Env* env_ptr) { + for (auto it = loop_stmt->child_begin(); it != loop_stmt->child_end(); it++) { + if (*it != nullptr) { + if (clang::CompoundStmt* compound_stmt = dyn_cast(*it)) { + continue; + } + + recursive_visit(*it, handler_ptr, env_ptr); + } + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/FieldDeclVisitor.cpp b/convert/visitor/FieldDeclVisitor.cpp new file mode 100644 index 0000000..3093e2e --- /dev/null +++ b/convert/visitor/FieldDeclVisitor.cpp @@ -0,0 +1,128 @@ +#include +#include + +#include "clang/Basic/SourceLocation.h" + +#include "../Env.h" +#include "../Tool.h" +#include "FieldDeclVisitor.h" +#include "../info/FeatureInfo.h" +#include "../handler/StrictRewriter.h" +#include "../handler/FieldDeclHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +void FieldDeclVisitor::visit(const clang::FieldDecl* field_decl, FeatureInfo* feature_info_ptr) { + if (field_decl == nullptr || feature_info_ptr == nullptr) { + return; + } + + feature_info_ptr->add_field_decl(field_decl); + + StrictRewriter strict_rewriter(rewriter_); + FieldDeclHandler field_decl_handler(rewriter_); + + Env env; + env.set_feature_info(feature_info_ptr); + + // 收集模板参数变量名 + if (field_decl->hasInClassInitializer()) { + if (clang::Expr* init_expr = field_decl->getInClassInitializer()) { + // 添加信息到 feature_info + if (clang::InitListExpr* init_list_expr = dyn_cast(init_expr)) { + for (size_t i = 0; i < init_list_expr->getNumInits(); i++) { + if (clang::DeclRefExpr* decl_ref_expr = dyn_cast(init_list_expr->getInit(i))) { + if (clang::ValueDecl* value_decl = decl_ref_expr->getDecl()) { + if (value_decl->isTemplateParameter()) { + LOG(INFO) << "add_template_var_name: " << stmt_to_string(decl_ref_expr); + feature_info_ptr->add_template_var_name(stmt_to_string(decl_ref_expr)); + } + } + } + } + } + + // 处理改写逻辑 + recursive_visit(init_expr, &field_decl_handler, &env); + + // 替换类型中的 ItemType + // 使用的时候可能会根据 int 值查找, 所以直接改成 int 类型。 + if (tool::is_map_item_type_int_type(field_decl->getType())) { + std::ostringstream oss; + oss << "std::unordered_map " + << field_decl->getNameAsString() + << strict_rewriter.getRewrittenText(init_expr); + + strict_rewriter.ReplaceText(field_decl->getSourceRange(), oss.str()); + } + } + } + + clang::SourceRange source_range = field_decl->getSourceRange(); + std::string s = rewriter_.getRewrittenText(source_range); + if (s.find("USED_FEATURES") != std::string::npos) { + rewriter_.RemoveText(source_range); + } else if (s.find("string") != std::string::npos) { + // string a; + // std::vector arr; + // std::map m; + rewriter_.ReplaceText(source_range, tool::fix_std_string(s)); + } +} + +void FieldDeclVisitor::recursive_visit(clang::Stmt *stmt, FieldDeclHandler* handler_ptr, Env *env_ptr) { + if (stmt == nullptr || handler_ptr == nullptr || env_ptr == nullptr) { + return; + } + + LOG(INFO) << "FieldDecl visit: " << stmt_to_string(stmt); + + if (clang::InitListExpr *init_list_expr = dyn_cast(stmt)) { + LOG(INFO) << "FieldDecl visit init_list_expr: " << stmt_to_string(stmt); + for (size_t i = 0; i < init_list_expr->getNumInits(); i++) { + recursive_visit(init_list_expr->getInit(i), handler_ptr, env_ptr); + } + } + + else if (clang::CXXStdInitializerListExpr* cxx_std_initializer_list_expr = + dyn_cast(stmt)) { + recursive_visit(cxx_std_initializer_list_expr->getSubExpr(), handler_ptr, env_ptr); + } + + else if (clang::ExprWithCleanups* expr_with_cleanup = dyn_cast(stmt)) { + recursive_visit(expr_with_cleanup->getSubExpr(), handler_ptr, env_ptr); + } + + else if (clang::MaterializeTemporaryExpr *materialize_temporary_expr = + dyn_cast(stmt)) { + recursive_visit(materialize_temporary_expr->getSubExpr(), handler_ptr, env_ptr); + } + + else if (clang::CXXConstructExpr *cxx_construct_expr = dyn_cast(stmt)) { + for (size_t i = 0; i < cxx_construct_expr->getNumArgs(); i++) { + recursive_visit(cxx_construct_expr->getArg(i), handler_ptr, env_ptr); + } + } + + else if (clang::DeclRefExpr *decl_ref_expr = + dyn_cast(stmt)) { + LOG(INFO) << "FieldDecl visit decl_ref_expr: " << stmt_to_string(stmt); + handler_ptr->process(decl_ref_expr, env_ptr); + } + + else if (clang::ConstantExpr *constant_expr = + dyn_cast(stmt)) { + LOG(INFO) << "FieldDecl visit constant_expr: " << stmt_to_string(stmt); + recursive_visit(constant_expr->getSubExpr(), handler_ptr, env_ptr); + } + + else { + // + } +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/FieldDeclVisitor.h b/convert/visitor/FieldDeclVisitor.h new file mode 100644 index 0000000..3430e34 --- /dev/null +++ b/convert/visitor/FieldDeclVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include "clang/AST/Decl.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "../handler/StrictRewriter.h" +#include "../handler/FieldDeclHandler.h" + +namespace ks { +namespace ad_algorithm { +namespace convert { + +class Env; +class FeatureInfo; + +class FieldDeclVisitor { + public: + explicit FieldDeclVisitor(clang::Rewriter &rewriter) : // NOLINT + rewriter_(rewriter) { + } + + void visit(const clang::FieldDecl* field_decl, FeatureInfo* feature_info_ptr); + void recursive_visit(clang::Stmt *stmt, FieldDeclHandler* handler_ptr, Env *env_ptr); + + protected: + clang::Rewriter& rewriter_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/MiddleNodeJson.cpp b/convert/visitor/MiddleNodeJson.cpp new file mode 100644 index 0000000..bfd7c4d --- /dev/null +++ b/convert/visitor/MiddleNodeJson.cpp @@ -0,0 +1,98 @@ +#include +#include +#include "../Config.h" +#include "./MiddleNodeJson.h" +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +MiddleNodeJson::MiddleNodeJson() { + auto config = GlobalConfig::Instance(); + + auto& filename = config->middle_node_json_file; + std::ifstream f(filename.c_str()); + + if (!f.is_open()) { + LOG(ERROR) << "open file failed! filename: " << filename; + return; + } + + data_ = json::parse(f); + if (data_ == nullptr) { + LOG(ERROR) << "parse json failed! json_file: " << filename; + return; + } + + prefix_map_["photo_info"] = {"adlog.item.ad_dsp_info.photo_info"}; + prefix_map_["live_info"] = {"adlog.item.ad_dsp_info.live_info"}; +} + +bool MiddleNodeJson::is_inited() const { return data_ != nullptr; } + +std::vector MiddleNodeJson::find_leaf(const std::string& name) const { + std::string name_underscore = camel_to_underscore(name); + std::vector name_arr = absl::StrSplit(name_underscore, "_"); + + std::vector path; + find_leaf_helper(name_arr, 0, data_, &path); + + std::vector leafs = { absl::StrJoin(path, ".")}; + + return leafs; +} + +void MiddleNodeJson::find_leaf_helper( + const std::vector& name_arr, + size_t pos, + const json& info, + std::vector* path_ptr +) const { + if (path_ptr == nullptr) { + return; + } + + if (pos >= name_arr.size()) { + return; + } + + for (size_t i = pos; i < name_arr.size(); i++) { + std::string s = absl::StrJoin(name_arr.data() + pos, name_arr.data() + i + 1, "_"); + if (info.contains(s)) { + path_ptr->emplace_back(s); + find_leaf_helper(name_arr, i + 1, info[s], path_ptr); + } else if (info.contains("children") && info["children"].contains(s)) { + path_ptr->emplace_back(s); + find_leaf_helper(name_arr, i + 1, info["children"][s], path_ptr); + } + } +} + +std::string MiddleNodeJson::camel_to_underscore(const std::string& s) const { + std::ostringstream oss; + + for (size_t i = 0; i < s.size(); i++) { + if (std::isupper(s[i])) { + if (i > 0) { + oss << "_"; + } + oss << (char)(std::tolower(s[i])); + } else if (std::isdigit(s[i])) { + if (i > 0 && std::isalpha(s[i - 1])) { + oss << "_"; + } + oss << s[i]; + } else { + oss << (char)(s[i]); + } + } + + return oss.str(); +} + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/convert/visitor/MiddleNodeJson.h b/convert/visitor/MiddleNodeJson.h new file mode 100644 index 0000000..99ea10c --- /dev/null +++ b/convert/visitor/MiddleNodeJson.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace ks { +namespace ad_algorithm { +namespace convert { + +using nlohmann::json; + +class MiddleNodeJson { + public: + static MiddleNodeJson& instance() { + static MiddleNodeJson x; + return x; + } + + bool is_inited() const; + + std::vector find_leaf(const std::string& name) const; + void find_leaf_helper(const std::vector& name_arr, + size_t pos, + const json& info, + std::vector* path_ptr) const; + + private: + MiddleNodeJson(); + std::string camel_to_underscore(const std::string& s) const; + + private: + json data_; + std::unordered_map> prefix_map_; +}; + +} // namespace convert +} // namespace ad_algorithm +} // namespace ks diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..e744670 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,73 @@ +# auto-cpp-rewriter + + +本项目是基于 `llvm` 的 `c++` 代码自动转换工具, 用于解决广告模型特征框架迁移中 `c++` 抽取类代码的改写问题。 + +## 背景 + +以下简单介绍一下背景。 + +模型数据流的流程如下,广告产生曝光给用户后,用户会发生点击、激活、付费、次留等信号会随客户回传等方式进入广告系统, +通过 user 、item 的构建流程,变成模型能够使用的 user info、item info。线上模型预估服务基于这些信息组装成 `AdLog` +进行打分,log 再通过 kafka/hive 进入模型训练流程。算法同学日常特征迭代,就是通过对原始数据的加工和变换,将原始的行为 +转换为特征能够识别的 sparse 和 dense 特征,这个过程就是我们一般说的特征抽取过程,也叫 feature generation (简称 FG)。 + +经过多年的发展,商业化模型链路虽然新的特征也使用了 CommonInfoAttr 的 类似 kv 化的方式进行迭代,但是一直使用的是基于 +c++ extractor 的模式,这种方式的弊端很明显: +1. `protobuf` 序列化和反序列化开销较大。 +2. `protobuf` 的存储 + hard code 的 extracor,导致数据的产出、使用、覆盖率、停更等都难以跟踪和管理,导致模型链路治理非常困难。 +3. 数据逻辑与计算逻辑耦合严重,逻辑无法复用。 +4. 大量的手写特征,难以标准化,很多有问题的特征,不断被拷贝扩散到很多模型中,历史包袱很重。工程团队的性能优化也难以开展。 + +为了解决以上这些问题,我们需要对整个特征链路进行彻底的改造。从业界的经验来看,迁移 `kv` 特征是一个比较好的思路。 +因此我们跟模型工程、数据工程团队一起对整体链路发起了特征 kv 化改造项目。 + +项目的目标是通过user info/item info特征数据的kv化改造,实现对特征存储和特征加工逻辑的解耦,建设通用化的特征处理逻辑并全量迁移, +摒弃掉 extractor hard code 的逻辑,实现代码和数据的复用。 + +主要存在的挑战如下: + +1. 模型链路长,特征的构建、模型的 label match 数据流在不同的场景在着多套,如果我们从头到尾建设一条完整的新数据流, + 需要消耗大量的资源,并且对算法团队的迭代形成很大的干扰。 +2. 历史包袱非常大。商业化场景多,模型多,存量有 3000+ extractor 实现全量迁移,工作量大,涉及到的团队、人力都非常多。 + +为了降低迁移成本,我跟工程团队一起制定了如下的整体迁移计划。 +1. 第一阶段,user info和item info的 kv 化改造, 将数据格式迁移为 `flatbuffers` 格式。 +2. 第二阶段,进行推理侧的 kv 化改造,实现存量 extractor 逻辑的平迁。 +3. 第三阶段,训练侧支持 kv 特征,实现最终的闭环。 + +以上是大的背景,本项目则是在解决第二阶段的存量 extractor 逻辑的平迁问题。 + +为了保证线上效果,我们需要保证两个版本的数据与处理逻辑完全一致,这样才能保证提取的特征一致。而需要改写 +的特征抽取类非常多,数据达到 3000+ 个,且抽取类逻辑并不都是简单逻辑,有些还比较复杂。除此之外,训练侧还用到过滤样本的 +`item_filter` 和提取 `label` 的 `label_extracctor`,在第三阶段也需要进行改写,数量也有 `1500+`。如果全部手动改写, +工作量非常巨大,而且排查逻辑一致需要花费大量精力。 + +因此,我基于 `llvm` 实现了自动改写 `c++` 特征抽取类的工具,自动将抽取类转换为 `kv` 特征抽取类,且保证 +逻辑完全一致。 + + +## 目录 + +本文档将按如下部分介绍此项目(待完善): + +- [问题描述](problem/README.md) + - [原始数据格式与特征提取](problem/original_format.md) + - [迁移 kv 特征](problem/kv_feature/README.md) + - [数据转换](problem/kv_feature/format_conversion.md) + - [特征改写](problem/kv_feature/feature_rewrite.md) + - [改写规则](problem/kv_feature/rewrite_rule/README.md) +- [挑战](challenge/README.md) +- [解决方案](solution/README.md) + - [思路](solution/README.md) + - [整体架构](solution/overall_architecture.md) + - [子模块](solution/sub_modules.md) + - [proto_parser](solution/sub_modules/proto_parser.md) + - [matcher](solution/sub_modules/matcher.md) + - [visitor](solution/sub_modules/visitor.md) + - [env](solution/sub_modules/env.md) + - [info](solution/sub_modules/info.md) + - [expr_parser](solution/sub_modules/expr_parser.md) + - [rewrite](solution/sub_modules/rewrite.md) +- [编译](compile/README.md) +- [测试](test/README.md) diff --git a/docs/_sidebar.md b/docs/_sidebar.md new file mode 100644 index 0000000..d1ffb4b --- /dev/null +++ b/docs/_sidebar.md @@ -0,0 +1,23 @@ +- [简介](README.md) +- [问题描述](problem/README.md) + - [原始数据格式与特征提取](problem/origin_format.md) + - [迁移 kv 特征](problem/kv_feature/README.md) + - [数据转换](problem/kv_feature/format_conversion.md) + - [特征改写](problem/kv_feature/feature_rewrite.md) + - [改写规则](problem/kv_feature/rewrite_rule/README.md) +- [挑战](challenge/README.md) +- [解决方案](solution/README.md) + - [思路](solution/README.md) + - [示例](solution/README.md?id=示例) + - [整体架构](solution/overall_architecture.md) + - [子模块](solution/sub_modules/README.md) + - [proto_parser](solution/sub_modules/proto_parser.md) + - [matcher](solution/sub_modules/matcher.md) + - [ast](solution/sub_modules/ast.md) + - [visitor](solution/sub_modules/visitor.md) + - [env](solution/sub_modules/env.md) + - [info](solution/sub_modules/info.md) + - [expr_parser](solution/sub_modules/expr_parser.md) + - [rewrite](solution/sub_modules/rewrite.md) +- [编译](compile/README.md) +- [测试](test/README.md) diff --git a/docs/a.sh b/docs/a.sh new file mode 100755 index 0000000..94ec52d --- /dev/null +++ b/docs/a.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_spu_attr.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_new_industry_v3_dense_outside.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_delay_delivery_time.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_creative_behavior_intention_keyword.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_ali_feature.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_dense_rewarded_pageid.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_level_photo_new.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_id_dup_cover_id.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_app_list_ad_app_id.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_reco_user_realtime_action_and_creative.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_querytoken_ad_campaign_type.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_longterm_action_cnt_ts_p5s_fix.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_like_author_id.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_follow_author_id_v1.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_callback_event_sparse.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_brand_user_combine_channel.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_product_name_dense.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_enhance_age.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_embedding_feature2.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_match_dense_num.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_user_merchant_list_online.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_realtime_detail_item.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_realtime_audience_count.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_item_goods_list.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_item_goods_id_list_size.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_realtime_action.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_live_mmu_class623.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_eshop_click_item_cate3_id.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_dense_ad_item_click_num.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_ad_playend_extend_short.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_ad_download_installed.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_universe_ad_.hount_target_cpm_pos.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_search_photo_asr.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_rtl_product_name_shallow_action_7d.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_reco_hate_photo.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_dpa_online_time.h ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_video_feature_moco_cluster.h ../teams/ad/ad_algorithm/bs_feature/fast/impl + +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_spu_attr.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_new_industry_v3_dense_outside.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_delay_delivery_time.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_creative_behavior_intention_keyword.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_ali_feature.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_dense_rewarded_pageid.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_level_photo_new.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_id_dup_cover_id.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_user_app_list_ad_app_id.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_reco_user_realtime_action_and_creative.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_querytoken_ad_campaign_type.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_longterm_action_cnt_ts_p5s_fix.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_like_author_id.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_follow_author_id_v1.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_callback_event_sparse.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_brand_user_combine_channel.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_product_name_dense.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_enhance_age.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_embedding_feature2.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_match_dense_num.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_user_merchant_list_online.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_realtime_detail_item.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_live_realtime_audience_count.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_item_goods_list.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_item_goods_id_list_size.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_realtime_action.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_live_mmu_class623.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_eshop_click_item_cate3_id.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_dense_ad_item_click_num.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_ad_playend_extend_short.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_ad_download_installed.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_universe_ad_account_target_cpm_pos.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_search_photo_asr.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_rtl_product_name_shallow_action_7d.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_user_reco_hate_photo.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_dpa_online_time.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl +cp ../../ast/teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_video_feature_moco_cluster.cc ../teams/ad/ad_algorithm/bs_feature/fast/impl diff --git a/docs/challenge/README.md b/docs/challenge/README.md new file mode 100644 index 0000000..26cb1be --- /dev/null +++ b/docs/challenge/README.md @@ -0,0 +1,51 @@ +# 挑战 + +## 人工改写工作量非常大 + +如前文所提到的,存量 extractor 类有 `3000+`,还有 `1500+` 的 `item_filter` 和 `label_extractor`,数量非常大。 +并且改写规则也非常多,涉及到很多 `c++` 的细节,算法同学需要首先花大量时间理解改写规则,然后才能进行改写。如果改写过程中 +出现问题,还需要排查逻辑是否一致,非常耗费精力。 + +## 能否实现自动改写 + +是否有办法实现自动改写? 如果需要自动改写,我们首先需要能够用程序来处理 `adlog` 特征类,即需要能够解析 `c++` 代码,然后 +分析代码中哪些部分是数据相关的逻辑,再根据规则进行重写。 + +从这个角度分析,理论上只要能够解析 `c++` 代码,就可以进行自动改写。 + +是否有办法解析 `c++` 代码 ?答案是肯定的。因为我们的特征类需要编译成功,就需要编译器首先解析 `c++` 代码,然后再转换成机器码。 + +因此,理论上这个程序是可以实现的。 + +## 如何解析 `c++` 代码 + +关键问题是如何解析 `c++` 代码。手动实现不太现实,需要借助一些工具。调研了下 `c++ static analysis`, `c++ 代码解析` 等相关工作, +发现几乎所有的结果都指向了同一个项目,那就是 [llvm](https://llvm.org/)。 + +`llvm` 是一个编译器基础项目,`c++` 编译器 `clang` 就是基于 `llvm` 开发的。`llvm` 提供了 `c++` 代码的词法分析、语法分析等 +一系列编译器方面的功能。我们可以利用 `llvm` 提供的 `clang` 库来解析 `c++` 代码。 + +## 如何根据解析的结果区分数据相关逻辑以及计算相关逻辑 + +有了 `llvm` 解析的结果后,我们还需要区分哪些代码是数据相关逻辑,哪些代码是计算相关逻辑。如前文所提,这些逻辑经常是耦合在一起的。 +如何进行区分也是一个重要的问题。 + +## 如何将 `c++` `proto` 字段映射到 `kv` 格式 + +`adlog` 数据的字段按路径展开可以映射到 `kv` 格式的 `key`,这些映射规则如何在代码解析中进行使用?因为特征类所使用的字段非常多, +我们不可能提前枚举所有字段,因此必须在运行时根据字段路径的字符串来找到对应的 `kv` 格式对应的字段和类型,如 `adlog.user_info.id` +对应到 `uint64_t` 类型的 `kv` 数据。 + +## 模板参数 + +特征类中还有很多模板类,参数通常是 `CommonInfoAttr` 枚举或者行为类型对应的 `int` 等。这些模板参数在改写时如何处理? + +## 如何根据规则进行自动改写 + +如何根据规则进行改写也有很多挑战: +- 前文总结了主要的几十种改写规则,如何知道每行代码应该根据哪种规则进行改写? +- 确定了改写规则后,如何应用这些改写规则? +- 同一行代码是否会被应用到多种规则? +- 这些规则是否会冲突 ? +- 如何保证改写后的代码和 `adlog` 特征类逻辑对齐? +- 改写后的代码还需要能够编译通过,如何保证改写后的代码是合法的 `c++` 代码? \ No newline at end of file diff --git a/docs/compile/README.md b/docs/compile/README.md new file mode 100644 index 0000000..c1350c8 --- /dev/null +++ b/docs/compile/README.md @@ -0,0 +1,13 @@ +## 编译 + +```bash +# 假设当前目录在 workspace 目录下 +git clone https://github.com/llvm/llvm-project.git +cd llvm-project +mkdir build +cd build + +cmake -S ../llvm -G Ninja -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;mlir" -DLLVM_BUILD_EXAMPLES=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DCMAKE_C_COMPILER="/home/liuzhishan/gcc-8.3.0/bin/gcc" -DCMAKE_CXX_COMPILER="/home/liuzhishan/gcc-8.3.0/bin/g++" + +ninja +``` \ No newline at end of file diff --git a/docs/images/problem/kv_feature/double_list.png b/docs/images/problem/kv_feature/double_list.png new file mode 100755 index 0000000..d5fa81f Binary files /dev/null and b/docs/images/problem/kv_feature/double_list.png differ diff --git a/docs/images/problem/kv_feature/leaf_map.png b/docs/images/problem/kv_feature/leaf_map.png new file mode 100755 index 0000000..ed799de Binary files /dev/null and b/docs/images/problem/kv_feature/leaf_map.png differ diff --git a/docs/images/problem/kv_feature/list_message.png b/docs/images/problem/kv_feature/list_message.png new file mode 100755 index 0000000..5ed2c5c Binary files /dev/null and b/docs/images/problem/kv_feature/list_message.png differ diff --git a/docs/images/problem/kv_feature/middle_map.png b/docs/images/problem/kv_feature/middle_map.png new file mode 100755 index 0000000..0122f98 Binary files /dev/null and b/docs/images/problem/kv_feature/middle_map.png differ diff --git a/docs/images/problem/kv_feature/pb_to_kv_convert.png b/docs/images/problem/kv_feature/pb_to_kv_convert.png new file mode 100755 index 0000000..c2f7257 Binary files /dev/null and b/docs/images/problem/kv_feature/pb_to_kv_convert.png differ diff --git a/docs/images/problem/kv_feature/proto_drawback.png b/docs/images/problem/kv_feature/proto_drawback.png new file mode 100755 index 0000000..4cab1e8 Binary files /dev/null and b/docs/images/problem/kv_feature/proto_drawback.png differ diff --git a/docs/images/problem/origin_format/proto_data_and_feature.png b/docs/images/problem/origin_format/proto_data_and_feature.png new file mode 100755 index 0000000..3634b78 Binary files /dev/null and b/docs/images/problem/origin_format/proto_data_and_feature.png differ diff --git a/docs/images/solution/adlog_photo_info_extract_photo_enhance_age.png b/docs/images/solution/adlog_photo_info_extract_photo_enhance_age.png new file mode 100755 index 0000000..c7f5051 Binary files /dev/null and b/docs/images/solution/adlog_photo_info_extract_photo_enhance_age.png differ diff --git a/docs/images/solution/bslog_photo_info_extract_photo_enhance_age.png b/docs/images/solution/bslog_photo_info_extract_photo_enhance_age.png new file mode 100755 index 0000000..7a5927a Binary files /dev/null and b/docs/images/solution/bslog_photo_info_extract_photo_enhance_age.png differ diff --git a/docs/images/solution/env.png b/docs/images/solution/env.png new file mode 100755 index 0000000..e90bf0f Binary files /dev/null and b/docs/images/solution/env.png differ diff --git a/docs/images/solution/overall_arch.png b/docs/images/solution/overall_arch.png new file mode 100755 index 0000000..4047dc9 Binary files /dev/null and b/docs/images/solution/overall_arch.png differ diff --git a/docs/images/solution/proto_data.png b/docs/images/solution/proto_data.png new file mode 100755 index 0000000..55442ba Binary files /dev/null and b/docs/images/solution/proto_data.png differ diff --git a/docs/images/solution/rewrite_complex_common_info.png b/docs/images/solution/rewrite_complex_common_info.png new file mode 100755 index 0000000..8c7a4b8 Binary files /dev/null and b/docs/images/solution/rewrite_complex_common_info.png differ diff --git a/docs/images/solution/rewrite_normal_field.png b/docs/images/solution/rewrite_normal_field.png new file mode 100755 index 0000000..e020e67 Binary files /dev/null and b/docs/images/solution/rewrite_normal_field.png differ diff --git a/docs/images/solution/rewrite_photo_info_upload_time.png b/docs/images/solution/rewrite_photo_info_upload_time.png new file mode 100755 index 0000000..14ae1c7 Binary files /dev/null and b/docs/images/solution/rewrite_photo_info_upload_time.png differ diff --git a/docs/images/solution/rewrite_simple_common_info.png b/docs/images/solution/rewrite_simple_common_info.png new file mode 100755 index 0000000..be21999 Binary files /dev/null and b/docs/images/solution/rewrite_simple_common_info.png differ diff --git a/docs/images/solution/solution_idea.png b/docs/images/solution/solution_idea.png new file mode 100755 index 0000000..0279a15 Binary files /dev/null and b/docs/images/solution/solution_idea.png differ diff --git a/docs/images/solution/visit_extract_method.png b/docs/images/solution/visit_extract_method.png new file mode 100755 index 0000000..47afc2c Binary files /dev/null and b/docs/images/solution/visit_extract_method.png differ diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..750998b --- /dev/null +++ b/docs/index.html @@ -0,0 +1,27 @@ + + + + + Document + + + + + + +
+ + + + + diff --git a/docs/problem/README.md b/docs/problem/README.md new file mode 100644 index 0000000..0064f81 --- /dev/null +++ b/docs/problem/README.md @@ -0,0 +1,8 @@ +# 问题描述 + +本项目解决的问题可以总结为一句话: 如何将读取 `protobuf` 数据的 `c++` 特征抽取类自动改写为读取 `kv` 数据的 `c++` 特征抽取类 ? + +实际问题中涉及很多的细节,如数据格式的定义与映射、读取数据的逻辑、计算逻辑等。以下将分两部分详细介绍下。 + +- [原始数据格式与特征提取](problem/origin_format.md) +- [迁移 kv 特征](problem/kv_feature/README.md) diff --git a/docs/problem/kv_feature/README.md b/docs/problem/kv_feature/README.md new file mode 100644 index 0000000..8ba2995 --- /dev/null +++ b/docs/problem/kv_feature/README.md @@ -0,0 +1,23 @@ +# 迁移 `kv` 特征 + +如前文所描述,迁移 `kv` 特征主要是为了解决特征链路中存在的逻辑复用、血缘治理等问题。而关键的一个环节就是大量存量 +平迁问题。 + +我们需要制定转换规则,将 `protobuf` 格式的数据转换为 `flatbuffers` 格式的数据,保证数据的一致性。并且还需要 +将特征抽取类改写为读取 `kv` 数据的逻辑,保证逻辑对齐。 + +本章内容分为如下几个部分: + +- [数据转换](problem/kv_feature/format_conversion.md) +- [特征改写](problem/kv_feature/feature_rewrite.md) +- [改写规则](problem/kv_feature/rewrite_rule/README.md) + - [normal_adlog_field](problem/kv_feature/rewrite_rule/normal_adlog_field.md) + - [common_info](problem/kv_feature/rewrite_rule/common_info.md) + - [middle_node](problem/kv_feature/rewrite_rule/middle_node.md) + - [action_detail](problem/kv_feature/rewrite_rule/action_detail.md) + - [multi_action](problem/kv_feature/rewrite_rule/multi_action.md) + - [reco_user_info](problem/kv_feature/rewrite_rule/reco_user_info.md) + - [get_value_from_action](problem/kv_feature/rewrite_rule/get_value_from_action.md) + - [photo_text](problem/kv_feature/rewrite_rule/photo_text.md) + - [query_token](problem/kv_feature/rewrite_rule/query_token.md) + - [other](problem/kv_feature/rewrite_rule/other.md) \ No newline at end of file diff --git a/docs/problem/kv_feature/feature_rewrite.md b/docs/problem/kv_feature/feature_rewrite.md new file mode 100644 index 0000000..5579852 --- /dev/null +++ b/docs/problem/kv_feature/feature_rewrite.md @@ -0,0 +1,142 @@ +# 特征改写 + +我们用 `bslog` 表示 `flatbuffers` 格式的 `kv` 数据。 + + +我们用一个示例来展示如何将 `protobuf` 格式的特征类改写为 `flatbuffers` 格式的特征类。 + +`adlog` 特征类 + +```c++ +class ExtractPhotoVideoFeatureMocoCluster : public FastFeature { + public: + ExtractPhotoVideoFeatureMocoCluster() + :FastFeature(ITEM) { + } + + virtual void Extract(const AdLog & adlog, size_t pos, std::vector* result) { + if (pos >= adlog.item_size()) { + return; + } + + auto &item = adlog.item(pos); + if ((item.type() == AD_DSP || item.type() == NATIVE_AD) && item.has_ad_dsp_info() + && item.ad_dsp_info().common_info_attr_size() > 0) { + for (const auto &attr : item.ad_dsp_info().common_info_attr()) { + if (attr.name_value() == ::auto_cpp_rewriter::CommonInfoAttr_Name_VIDEO_MOCO_CLUSTER) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, attr.int_value()), 1.0f, result); + break; + } + } + } + } + + private: + const std::string USED_FEATURES[1] = { + "item.ad_dsp_info.common_info_attr.VIDEO_MOCO_CLUSTER" + }; + DISALLOW_COPY_AND_ASSIGN(ExtractPhotoVideoFeatureMocoCluster); +}; + +REGISTER_EXTRACTOR(ExtractPhotoVideoFeatureMocoCluster); +``` + + +`bslog` 特征类 + +```c++ + +// .h +class BSExtractPhotoVideoFeatureMocoCluster : public BSFastFeature { + public: + BSExtractPhotoVideoFeatureMocoCluster(); + + virtual void Extract(const BSLog& bslog, size_t pos, std::vector* result); + + private: + DISALLOW_COPY_AND_ASSIGN(BSExtractPhotoVideoFeatureMocoCluster); +}; + +REGISTER_BS_EXTRACTOR(BSExtractPhotoVideoFeatureMocoCluster); + + +// .cc +BSExtractPhotoVideoFeatureMocoCluster::BSExtractPhotoVideoFeatureMocoCluster() : BSFastFeature(ITEM) { + attr_metas_.emplace_back(BSFieldEnum::adlog_item_type); + attr_metas_.emplace_back(BSFieldEnum::adlog_item_ad_dsp_info_exists); + attr_metas_.emplace_back(BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_size); + attr_metas_.emplace_back(BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535); +} + +void BSExtractPhotoVideoFeatureMocoCluster::Extract(const BSLog& bslog, size_t pos, + std::vector* result) { + auto bs = bslog.GetBS(); + if (bs == nullptr) { + return; + } + + auto enum_attr_size = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_size; + int32_t attr_size = BSFieldHelper::GetSingular(*bs, enum_attr_size, pos); + + auto enum_info_exists = BSFieldEnum::adlog_item_ad_dsp_info_exists; + bool info_exists = BSFieldHelper::GetSingular(*bs, enum_info_exists, pos); + + auto enum_item_type = BSFieldEnum::adlog_item_type; + int64_t item_type = BSFieldHelper::GetSingular(*bs, enum_item_type, pos); + + auto enum_key_535 = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + int64_t key_535 = BSFieldHelper::GetSingular(*bs, enum_key_535, pos); + + auto enum_key_535_exists = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + bool key_535_exists = BSFieldHelper::HasSingular(*bs, enum_key_535_exists, pos); + + if ((item_type == bs::ItemType::AD_DSP || item_type == bs::ItemType::NATIVE_AD) && info_exists && + attr_size > 0) { + if (key_535_exists) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, key_535), 1.0f, result); + } + } +} +``` + +## 改写步骤 + +以上示例是简单的 `CommonInfoAttr` 类型。 + +在 `adlog` 特征类中,我们获取的数据来自 `AdLog`, 需要遍历 `common_info_attr` 列表,并根据 `name_value` +来判断是否是我们需要的特征。如果找到了对应的数据,在处理完成后则需要 `break`。这是处理 `CommonInfoAttr` 类型 +特征的固定写法。 + +```c++ +for (const auto &attr : item.ad_dsp_info().common_info_attr()) { + if (attr.name_value() == ::auto_cpp_rewriter::CommonInfoAttr_Name_VIDEO_MOCO_CLUSTER) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, attr.int_value()), 1.0f, result); + break; + } +} +``` + +而在 `bslog` 特征类中,我们为了和 `adlog` 特征类对齐逻辑,获取数据的写法需要修改为按 `key` 的方式获取,并且还需要 +判断 `key` 是否存在,以对齐 `adlog` 中列表元素是否存在的逻辑。可以按如下步骤进行改写: + +1. 根据字段路径获取 `key` 的值, 以及字段是否存在的结果。 + + ```c++ + auto enum_key_535 = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + int64_t key_535 = BSFieldHelper::GetSingular(*bs, enum_key_535, pos); + + auto enum_key_535_exists = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + bool key_535_exists = BSFieldHelper::HasSingular(*bs, enum_key_535_exists, pos); + ``` + +2. 直接用 `key` 获取的结果来实现计算逻辑即可,不需要 `for` 循环遍历。 +3. 构造函数中添加 `bs` 字段的路径。 +4. 修改注册特征的宏。 + +按以上步骤改写后,则可以和之前的逻辑完全对齐。 + +还有其他类型的特征,如获取 `ActionDetail` 中的数据、`PhotoInfo` 等中间节点的数据、模板等等, 由于获取 `protobuf` +格式的数据逻辑和计算逻辑耦合比较严重,因此每中类型的特征都需要专门的处理逻辑。 + +具体示例可见 `teams/ad/ad_algorithm/feature/fast/impl/` 目录下的特征类,对应的改写后的 `bslog` 特征类在 +`teams/ad/ad_algorithm/bs_feature/fast/impl/` 目录下。 \ No newline at end of file diff --git a/docs/problem/kv_feature/format_conversion.md b/docs/problem/kv_feature/format_conversion.md new file mode 100644 index 0000000..0da2e61 --- /dev/null +++ b/docs/problem/kv_feature/format_conversion.md @@ -0,0 +1,75 @@ +# 数据转换 + +## 目标格式 + +由于 `protobuf` 定义的数据格式太复杂,并且序列化与反序列开销也比较大,因此我们打算将数据格式迁移为 `flatbuffers` +格式的 `kv` 数据。 + +`flatbuffers` 相关知识可参考其官网: https://flatbuffers.dev/。 + +与 `protobuf` 相比,其优点是读取性能比较好,直接采用指针读取数据,几乎不需要反序列化时间。但缺点是数据是只读的, +使用起来不是很方便。 + +## 如何将 `AdjointLabeledLog` 转换为 `flatbuffers` 格式 ? + +我们需要考虑如何将 `AdjointLabeledLog` 转换为 `flatbuffers` 格式, 并保证数据的唯一映射。 + +### `flatbuffers` 数据格式定义 + +```flatbuffers +namespace ks.cofea_fbs; + +table BytesList { + value:[string]; +} + +table FloatList { + value:[float]; +} + +table Int64List { + value:[long]; +} + +table RawFeature { + kind:ks.cofea_fbs.RawFeature_.Anonymous0; + name:string; + is_list:bool; +} + +namespace ks.cofea_fbs.RawFeature_; + +table Anonymous0 { + bytes_list:ks.cofea_fbs.BytesList; + float_list:ks.cofea_fbs.FloatList; + int64_list:ks.cofea_fbs.Int64List; + simple_kscore_list:ks.cofea_fbs.SimpleKeyScoreList; + bytes_msg_list:ks.cofea_fbs.BytesMsgList; +} + +... + +table FeatureSet { + feature_name:string; + raw_features:[ks.cofea_fbs.RawFeature]; +} + +table FeatureSets { + feature_columns:[ks.cofea_fbs.FeatureSet]; +} + +table BatchedSamples { + primary_key:ulong; + timestamp:ulong; + context:ks.cofea_fbs.FeatureSets; + sub_context:[ks.cofea_fbs.FeatureSets]; + samples:ks.cofea_fbs.FeatureSets; +} +``` + +### 格式转换 + +我们可以观察到 `AdjointLabeledLog` 的结构中的路径是唯一的,因此我们可以用此路径作为唯一的 `key`,而 `value` 则可以 +统一对应到几种基础类型。对于叶子节点为基础类型的简单嵌套字段,可以直接将路径作为`key`, 叶子节点的数据作为 `value`。 + +![pb_to_kv_convert.png](../../images/problem/kv_feature/pb_to_kv_convert.png) \ No newline at end of file diff --git a/docs/problem/kv_feature/rewrite_rule/README.md b/docs/problem/kv_feature/rewrite_rule/README.md new file mode 100644 index 0000000..c4b43eb --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/README.md @@ -0,0 +1,14 @@ +# 改写规则 + +以下针对主要的特征类型的改写规则进行一下总结。还有很多其他的规则可见代码中的具体实现,此处不再一一展开。 + +- [normal_adlog_field](problem/kv_feature/rewrite_rule/normal_adlog_field.md) +- [common_info](problem/kv_feature/rewrite_rule/common_info.md) +- [middle_node](problem/kv_feature/rewrite_rule/middle_node.md) +- [action_detail](problem/kv_feature/rewrite_rule/action_detail.md) +- [multi_action](problem/kv_feature/rewrite_rule/multi_action.md) +- [reco_user_info](problem/kv_feature/rewrite_rule/reco_user_info.md) +- [get_value_from_action](problem/kv_feature/rewrite_rule/get_value_from_action.md) +- [photo_text](problem/kv_feature/rewrite_rule/photo_text.md) +- [query_token](problem/kv_feature/rewrite_rule/query_token.md) +- [other](problem/kv_feature/rewrite_rule/other.md) \ No newline at end of file diff --git a/docs/problem/kv_feature/rewrite_rule/action_detail.md b/docs/problem/kv_feature/rewrite_rule/action_detail.md new file mode 100644 index 0000000..6a183ce --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/action_detail.md @@ -0,0 +1,139 @@ +# action_detail + +`action_detail` 在 `proto` 的 `user_info` 中是以 `map` 形式存储的,`key` 是 `action number`, `value` +是嵌套的 `UserActionDetail` 类型。 + +使用时需要先根据 `action number` 找到对应的 `value`, 然后再根据嵌套的字段取得具体的数据。 + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_longterm_action_cnt_ts_p5s_fix.h + +```cpp + int32_t action = 4; + + const auto& ad_dsp_action_detail = adlog.user_info().explore_long_term_ad_action(); + + auto iter = ad_dsp_action_detail.find(action); + if (iter == ad_dsp_action_detail.end()) { + return; + } + + const auto& dsp_infos = iter->second.list(); + ... + + for (int i = 0; i < dsp_infos.size() && i < 1000; ++i) { + auto& dinfo = dsp_infos.Get(i); + uint64 atime = dinfo.action_timestamp(); + + if (photo_id == dinfo.photo_id()) { + ... + } + + if (prod_id == dinfo.product_id_hash()) { + ... + } + } +``` + +## 改写规则 + +需要先根据后面的代码找到具体的叶子节点字段,然后记录到 `Env` 中,之后对每个字段进行 `bslog` 数据的获取,将遍历 +逻辑替换为直接遍历具体某个字段的逻辑。`map` 查找相关的逻辑也直接替换为根据路径 `key` 获取数据的逻辑。 + +`bslog` 特征类改写示例: teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_combine_longterm_action_cnt_ts_p5s_fix.cc + +```cpp + auto enum_ctime = BSFieldEnum::adlog_time; + uint64_t ctime = BSFieldHelper::GetSingular(*bs, enum_ctime, pos); + + auto enum_info_exists = BSFieldEnum::adlog_user_info_exists; + bool info_exists = BSFieldHelper::GetSingular(*bs, enum_info_exists, pos); + + auto enum_base_exists = BSFieldEnum::adlog_item_ad_dsp_info_advertiser_base_exists; + bool base_exists = BSFieldHelper::GetSingular(*bs, enum_base_exists, pos); + + auto enum_creative_id = BSFieldEnum::adlog_item_ad_dsp_info_creative_base_id; + uint64_t creative_id = BSFieldHelper::GetSingular(*bs, enum_creative_id, pos); + + auto enum_photo_id = BSFieldEnum::adlog_item_ad_dsp_info_creative_base_photo_id; + uint64_t photo_id = BSFieldHelper::GetSingular(*bs, enum_photo_id, pos); + + auto enum_action_timestamp = + BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_action_timestamp; + BSRepeatedField action_timestamp(*bs, enum_action_timestamp, pos); + + auto enum_list_photo_id = BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_photo_id; + BSRepeatedField list_photo_id(*bs, enum_list_photo_id, pos); + + auto enum_id_hash = BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_product_id_hash; + BSRepeatedField id_hash(*bs, enum_id_hash, pos); + + auto enum_hash_v3 = + BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_second_industry_id_hash_v3; + BSRepeatedField hash_v3(*bs, enum_hash_v3, pos); + + auto enum_list_size = BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_size; + int32_t list_size = BSFieldHelper::GetSingular(*bs, enum_list_size, pos); + + auto enum_list_size_exists = BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_list_size; + bool list_size_exists = BSFieldHelper::HasSingular(*bs, enum_list_size_exists, pos); + + if (info_exists) { + // 组合一下 + + uint64 prod_id = 0; + auto enum_pname = BSFieldEnum::adlog_item_ad_dsp_info_advertiser_base_product_name; + absl::string_view pname = BSFieldHelper::GetSingular(*bs, enum_pname, pos); + + if (base_exists) { + prod_id = base::CityHash64(pname.data(), pname.size()); + } + uint64 secind_id = 0; + auto enum_iname = BSFieldEnum::adlog_item_ad_dsp_info_advertiser_base_second_industry_name; + absl::string_view iname = BSFieldHelper::GetSingular(*bs, enum_iname, pos); + + if (base_exists) { + secind_id = base::CityHash64(iname.data(), iname.size()); + } + bool key_exists = list_size_exists; + if (FLAGS_use_exist_fix_version) { + key_exists = BSFieldHelper::GetSingular( + *bs, BSFieldEnum::adlog_user_info_explore_long_term_ad_action_key_4_exists, pos); + } + if (!key_exists) { + return; + } + + const int C = 6; + const int TC = 11; + int ccnts[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int pcnts[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int ucnts[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + for (int i = 0; i < list_size && i < 1000; ++i) { + double days = (ctime - action_timestamp.Get(i)) / 1000 / 3600 / 24; + int idx = 0; + if (days < 3) { + idx = 0; + } else if (days >= 3 && days < 7) { + idx = 1; + } else if (days >= 7 && days < 15) { + idx = 2; + } else if (days >= 15 && days < 30) { + idx = 3; + } else if (days >= 30 && days < 90) { + idx = 4; + } else { + idx = 5; + } + if (photo_id == list_photo_id.Get(i)) { + ccnts[idx] += 1; + } + if (prod_id == id_hash.Get(i)) { + pcnts[idx] += 1; + } + if (secind_id == hash_v3.Get(i)) { + ucnts[idx] += 1; + } + } + } +``` diff --git a/docs/problem/kv_feature/rewrite_rule/common_info.md b/docs/problem/kv_feature/rewrite_rule/common_info.md new file mode 100644 index 0000000..7718a57 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/common_info.md @@ -0,0 +1,251 @@ +# common info + +## 普通 common info + +遍历 `common_info_attr` 列表,通过枚举判断是否是需要的数据,并在处理结束后 `break` 循环。 + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_photo_video_feature_moco_cluster.h + +```cpp + for (const auto &attr : item.ad_dsp_info().common_info_attr()) { + if (attr.name_value() == ::auto_cpp_rewriter::CommonInfoAttr_Name_VIDEO_MOCO_CLUSTER) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, attr.int_value()), 1.0f, result); + break; + } + } +``` + +### 改写规则 + +`bslog` 特征了改写示例: teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_video_feature_moco_cluster.cc + +```cpp + auto enum_key_535 = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + int64_t key_535 = BSFieldHelper::GetSingular(*bs, enum_key_535, pos); + + auto enum_key_535_exists = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + bool key_535_exists = BSFieldHelper::HasSingular(*bs, enum_key_535_exists, pos); + + if ((item_type == bs::ItemType::AD_DSP || item_type == bs::ItemType::NATIVE_AD) && info_exists && + attr_size > 0) { + if (key_535_exists) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, key_535), 1.0f, result); + } + } +``` + +## 来自中间节点的 common info + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_item_goods_id_list_size.h, 所需字段来自中间 +节点的 common info, 如 live info common info。 + +```cpp + auto live_info = GetLiveInfo(adlog.item(pos)); + if (live_info == nullptr) { + return; + } + if (live_info->common_info_attr_size() > 0) { + const auto& attr = live_info->common_info_attr(); + for (const auto& liveAttr : attr) { + if (liveAttr.name_value() == 23071) { + int goods_num = liveAttr.int_list_value_size(); + AddFeature(0, goods_num, result); + break; + } + } + } +``` + +在遇到 `common_info_attr` 时候可以确定是否来自中间节点, 如果是则创建 `CommonInfoMiddleNode`, 之后需要 +重新实现 `CommonInfo` 对应的逻辑。 + +注意,有可能同时出现中间节点和 `common info`, 但是 `common info` 不是来自于中间节点。需要在创建 +`common_info_normal` 或者 `common_info_fixed_list` 时候根据 `prefix_adlog` 是否以 `adlog` 开头来区分。 + +示例: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_cross_eds_comp_product_name_clk.h + +## common info case switch + +见: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_impression_realtime_new_extend.h + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_live_realtime_audience_count.h + + for (int i = 0; i < live_info->common_info_attr_size(); i++) { + const ::auto_cpp_rewriter::CommonInfoAttr& attr = + live_info->common_info_attr(i); + if (attr.type() == ::auto_cpp_rewriter::CommonTypeEnum_AttrType_INT_ATTR) { + switch (attr.name_value()) { + case ::auto_cpp_rewriter:: + CommonInfoAttr_Name_LIVE_ACCUMULATIVE_START_PLAY_COUNT: + play_cnt_acc = attr.int_value(); + case ::auto_cpp_rewriter:: + CommonInfoAttr_Name_LIVE_ACCUMULATIVE_STOP_PLAY_COUNT: + stop_play_cnt_acc = attr.int_value(); + } + } + } + +根据 switch 的条件将 enum 的值添加到 `CommonInfoNormal` 中即可。需要处理共用逻辑的情况。 + +## GetCommonInfoAttrNew + +这种写法比较绕, 共有 5 个类是这种写法, 而且其中两个 `switch` 语句有 bug, 忽略, 手动改。 + +- teams/ad/ad_algorithm/feature/fast/impl/extract_photo_author_level_id_new.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_live_author_tag_category_new_fix.cc + +其他三个是 + +- teams/ad/ad_algorithm/feature/fast/impl/extract_fanstop_photo_mmu_hetu_tag_new.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_live_author_tag_category_new.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_photo_enhance_htag_new.h + +## common info 模板参数 + +`common_info` 枚举来自模板参数。且逻辑中没有 `break`。 + +以下类是这种写法, 共有 19 个。 + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_realtime_action.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_realtime_action_time_stamp.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_applist_sdpa_info.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_info_sdpa_info.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_dense_sdpa_ecom_pid_action_cnt.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_dense_sdpa_list_cnt.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_sdpa_ecom_user_action_kwds.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_len.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_list_add.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_seq.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_sp_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_realtime_action_ts_seq.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_dsplive_realtime_action_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_search_action_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_search_action_list_v2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_search_action_timestamp_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_search_action_timestamp_list_v2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_u_spu_action_list.h + + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_realtime_action.h + + if (adlog.has_user_info() && adlog.user_info().common_info_attr_size() > 0) { + for (const ::auto_cpp_rewriter::CommonInfoAttr& user_attr : adlog.user_info().common_info_attr()) { + if (user_attr.name_value() == user_attr_name_) { + for (const auto& val : user_attr.int_list_value()) { + action_list.push_back(val); + } + } + if (user_attr.name_value() == timestamp_attr_name_) { + for (const auto& val : user_attr.int_list_value()) { + timestamp_list.push_back(val); + } + } + if (action_list.size() > 0 && timestamp_list.size() > 0) { + break; + } + } + } + +其中 `user_attr_name_` 和 `timestamp_attr_name_` 是模板参数。 + +用 `CommonInfoFixed` 处理。 + +## common info 放到 map 里 + +`common info` 用 `map` 存起来,用 `CommonInfoMultiIntList` 表示。 + +以下类是这种写法: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_7d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_deep_action_30d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_shallow_action_3d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_shallow_action_7d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_deep_action_90d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_30d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_deep_action_30d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_deep_action_90d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_shallow_action_3d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_30d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_industry_id_shallow_action_30d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_photo_id_deep_action_90d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_industry_id_deep_action_90d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_industry_id_shallow_action_3d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_industry_id_shallow_action_7d.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_rtl_product_name_shallow_action_7d.h + +示例: + + // 定义 map 字段 空间换时间 + std::unordered_map*> action_name2list; + std::unordered_map*> timestamp_name2list; + std::unordered_map action_name2size; + std::unordered_map timestamp_name2size; + + // 遍历用户行为 + for (const ::auto_cpp_rewriter::CommonInfoAttr &userAttr : adlog.user_info().common_info_attr()) { + if (userAttr.name_value() < action_map.size() && action_map[userAttr.name_value()]) { + action_name2list[userAttr.name_value()] = &(userAttr.int_list_value()); + action_name2size[userAttr.name_value()] = userAttr.int_list_value_size(); + } + + if (userAttr.name_value() < timestamp_map.size() && timestamp_map[userAttr.name_value()]) { + timestamp_name2list[userAttr.name_value()] = &(userAttr.int_list_value()); + timestamp_name2size[userAttr.name_value()] = userAttr.int_list_value_size(); + } + } + + // 截断用户行为和行为时间 + for (int i = 0; i < action_vec.size(); i ++) { + auto action_name = action_vec[i]; + auto timestamp_name = timestamp_vec[i]; + ... + } + + +`CommonInfo` 结果保存在 `action_name2list` 和 `timestamp_name2list` 中。`common_info` 枚举通过构造函数添加到 +`action_vev` 和 `timestamp_vec` 中。 + +先通过 `OverviewHandler` 找到 `common_info` 的 `prefix`, 然后统一替换 `map` 相关操作。通过 `if` 条件中国的 +`name_value()` 和 `action_map` 来判断。需要用 `CommonInfoFixed`。 + +## 遍历 common info map_int_int + +以下类是这种写法: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_user_applist_cate_match_num.h + +## 遍历 common info int_list + +见: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_eshop_click_item_cate3_id.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_photo_click_cart_seller_id_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_merchant_click_cart_seller_id_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_merchant_buy_item_id_list.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_eshop_click_item_cate2_id.h + +注意有强转成 `int` 的情况。 + +## common info int_list 强转 int + +- teams/ad/ad_algorithm/feature/fast/impl/extract_live_user_merchant_list_online.h + +### 同时出现 common info normal 和 common info fixed + +同时出现 common info normal 和 common info fixed。 + +见: +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_author_media_app_avg_explore_ratio.h + +之前的逻辑是只能出现一种,在 overview 中会进行标记是哪一种。通过如下逻辑判断是否是 `common info fixed': + + template_int_names_.size() > 0 && common_info_values_.size() == 0 + +因此需要想办法支持两种都存在的情况。 diff --git a/docs/problem/kv_feature/rewrite_rule/get_value_from_action.md b/docs/problem/kv_feature/rewrite_rule/get_value_from_action.md new file mode 100644 index 0000000..bb5701e --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/get_value_from_action.md @@ -0,0 +1,47 @@ +# get_value_from_action + +通用的函数,会在 `add_feature` 中被调用。参数固定。 + +共 11个类,见: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_click_no_lps.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_item_click_no_conv.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_item_click_no_lps.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_played3s_no_item_click.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_played3s_no_lps.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_played5s_no_item_click.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_played5s_no_lps.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_playedend_no_lps.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_played_fix5s_no_item_click.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_server_no_item_click_flag.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_server_no_item_click.h + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_click_no_lps.h + +```cpp + if (adlog.has_user_info() && adlog.user_info().explore_long_term_ad_action_size() > 0) { + const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); + auto item_played5s_iter = ad_action.find(item_played5s_no); + auto item_click_iter = ad_action.find(item_click_no); + if (item_played5s_iter != ad_action.end() && item_click_iter != ad_action.end()) { + const auto& item_played5s_action_base_infos = item_played5s_iter->second.list(); + const auto& item_click_action_base_infos = item_click_iter->second.list(); + + add_feature(item_played5s_action_base_infos, item_click_action_base_infos, + FeaturePrefix::USER_AD_CLICK_NO_LPS_PRODUCT_1, + FeaturePrefix::USER_AD_CLICK_NO_LPS_INDUSTRY_1, 24, process_time, result); + + add_feature(item_played5s_action_base_infos, item_click_action_base_infos, + FeaturePrefix::USER_AD_CLICK_NO_LPS_PRODUCT_3, + FeaturePrefix::USER_AD_CLICK_NO_LPS_INDUSTRY_3, 72, process_time, result); + + add_feature(item_played5s_action_base_infos, item_click_action_base_infos, + FeaturePrefix::USER_AD_CLICK_NO_LPS_PRODUCT_7, + FeaturePrefix::USER_AD_CLICK_NO_LPS_INDUSTRY_7, 168, process_time, result); + + add_feature(item_played5s_action_base_infos, item_click_action_base_infos, + FeaturePrefix::USER_AD_CLICK_NO_LPS_PRODUCT_30, + FeaturePrefix::USER_AD_CLICK_NO_LPS_INDUSTRY_30, 720, process_time, result); + } + } +``` diff --git a/docs/problem/kv_feature/rewrite_rule/middle_node.md b/docs/problem/kv_feature/rewrite_rule/middle_node.md new file mode 100644 index 0000000..3a55362 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/middle_node.md @@ -0,0 +1,54 @@ +# middle_node + +字段来自 `PhotoInfo` 或者 `AuthorInfo` 等中间嵌套节点。 + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_combine_follow_author_id_v1.h + +```cpp + auto live_info = GetLiveInfo(adlog.item(pos)); + + if(live_info == nullptr){ + return; + } + + if(!live_info->has_author_info() ) { + return; + } + + . const auto & author_info = live_info->author_info(); + if(!adlog.has_user_info() + ||!adlog.user_info().has_action_detail()) { + return; + } + + const auto & action_detail = adlog.user_info().action_detail(); + int follow_num = action_detail.follow().size(); + + char author_buffer[256]; + char follow_buffer[256]; + + for(int i = 0; i < follow_num && i < max_follow_num; ++i){ + const auto & follow_info = action_detail.follow(i); + + if(follow_info.id() == 90041) { + continue; + } + + if(follow_info.id() == author_info.id()) { + continue; + } + + uint64_t id = GetFeature(FeaturePrefix::COMBINE_FOLLOW_AUTHOR_NEW, + follow_info.id()); + AddFeature(id, 1.0, result); + } +``` + +叶子节点也可能是 str 或者 list。 + +可能最早遇到 `size` 方法,此时知道去掉 `size` 后 `bs_enum_str` 对应的变量名,然后替换。需要在 +`OverviewHandler` 中记录叶子节点的类型,用来创建变量。 + +## 改写规则 + +根据叶子节点对应到提前实现好的模板类,具体实现逻辑见: teams/ad/ad_algorithm/bs_feature/fast/frame/bs_info_util.h \ No newline at end of file diff --git a/docs/problem/kv_feature/rewrite_rule/multi_action.md b/docs/problem/kv_feature/rewrite_rule/multi_action.md new file mode 100644 index 0000000..f720b24 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/multi_action.md @@ -0,0 +1,18 @@ +## 多个 action 通过数组保存 + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_match_dense_num.h + + const auto& ad_action = adlog.user_info().explore_long_term_ad_action(); + for (auto action_no : action_vec_) { + auto action_no_iter = ad_action.find(action_no); + int photo_id_action_num = 0; + int product_name_hash_action_num = 0; + int second_industy_id_hash_action_num = 0; + if (action_no_iter != ad_action.end()) { + const auto& action_no_list = action_no_iter->second.list(); + for (int k = 0; k < action_no_list.size() && k < 100; ++k) { + ... + } + } + +用第一个进行处理,然后将其他 action 转换为 lambda 函数, 具体处理逻辑见 `ActionDetailRule`。 diff --git a/docs/problem/kv_feature/rewrite_rule/normal_adlog_field.md b/docs/problem/kv_feature/rewrite_rule/normal_adlog_field.md new file mode 100644 index 0000000..0520e37 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/normal_adlog_field.md @@ -0,0 +1,46 @@ +# normal_adlog_field + +普通 `adlog` 字段,指最终叶子节点是简单的类型,中间节点没有复杂类型的情况,如 `adlog.user_info.id` 等。 +直接通过字段路径名对应 `bslog` 中的路径,然后根据 `proto` 字段类型对应 `bslog` 中的类型。 + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_photo_video_feature_moco_cluster.h + +```cpp + auto &item = adlog.item(pos); + if ((item.type() == AD_DSP || item.type() == NATIVE_AD) && item.has_ad_dsp_info() + && item.ad_dsp_info().common_info_attr_size() > 0) { + ... + } +``` + +`adlog.item.type` 则表示 `item type` 字段,类型是枚举,可以转换成 `int` 类型。 + +## 改写规则 + +这种类型的字段是最简单的类型,只需要在解析时候根据表达式本身就可以确定对应的改写逻辑。 + +`bslog` 特征类改写示例: teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_photo_video_feature_moco_cluster.cc + +```cpp + auto enum_attr_size = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_size; + int32_t attr_size = BSFieldHelper::GetSingular(*bs, enum_attr_size, pos); + + auto enum_info_exists = BSFieldEnum::adlog_item_ad_dsp_info_exists; + bool info_exists = BSFieldHelper::GetSingular(*bs, enum_info_exists, pos); + + auto enum_item_type = BSFieldEnum::adlog_item_type; + int64_t item_type = BSFieldHelper::GetSingular(*bs, enum_item_type, pos); + + auto enum_key_535 = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + int64_t key_535 = BSFieldHelper::GetSingular(*bs, enum_key_535, pos); + + auto enum_key_535_exists = BSFieldEnum::adlog_item_ad_dsp_info_common_info_attr_key_535; + bool key_535_exists = BSFieldHelper::HasSingular(*bs, enum_key_535_exists, pos); + + if ((item_type == bs::ItemType::AD_DSP || item_type == bs::ItemType::NATIVE_AD) && info_exists && + attr_size > 0) { + if (key_535_exists) { + AddFeature(GetFeature(FeaturePrefix::PHOTO_VIDEO_MOCO_CLUSTER, key_535), 1.0f, result); + } + } +``` diff --git a/docs/problem/kv_feature/rewrite_rule/other.md b/docs/problem/kv_feature/rewrite_rule/other.md new file mode 100644 index 0000000..b2fb0f4 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/other.md @@ -0,0 +1,26 @@ +# other + +## proto map + +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_live_mmu_class623.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_lda.h + +## add feature + +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_item_click_no_lps.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_ad_server_no_item_click.h + +## list + +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_view_like_photo_label.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_audience.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_photo_caption_segment.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_photo_dsp_embedding.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_business_interest.h + +## common info list + +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_live_user_followed_merchant_author_list.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_user_like_tags.h +- [x] teams/ad/ad_algorithm/feature/fast/impl/extract_ad_akg_user_hweight_rec.h + diff --git a/docs/problem/kv_feature/rewrite_rule/photo_text.md b/docs/problem/kv_feature/rewrite_rule/photo_text.md new file mode 100644 index 0000000..1a1e97a --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/photo_text.md @@ -0,0 +1,25 @@ +# photo_text + +## GetPhotoText + +类似 `GetQueryToken` 的逻辑, 不过多了 `pos` 和 `common info enum` 这两个参数。需要用到 +`FixedCommonInfo` 。 + +有 14 个类用了 `GetPhotoText`。 + +见: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_asr_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_asr.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_cname_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_cname.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_description_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_description.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_title_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_title.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_pname_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_pname.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_slogan_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_slogan.h diff --git a/docs/problem/kv_feature/rewrite_rule/proto_list.md b/docs/problem/kv_feature/rewrite_rule/proto_list.md new file mode 100644 index 0000000..9988a21 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/proto_list.md @@ -0,0 +1,10 @@ +# photo_list + +## ProtoList + +for 循环遍历的是中间的 proto list。需要知道最终用到的叶子节点字段,取其中一个用来作为遍历的变量。 +添加 `ProtoListInfo` 和 `ProtoListRule`。 + +示例: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_user_lda.h diff --git a/docs/problem/kv_feature/rewrite_rule/query_token.md b/docs/problem/kv_feature/rewrite_rule/query_token.md new file mode 100644 index 0000000..043a932 --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/query_token.md @@ -0,0 +1,31 @@ +# query_token + +## GetQueryToken + +common info map_string_float。 +将函数替换为直接返回 `BSMapField`, 再替换 `for` 循环或者其他引用的部分。 + +有 20 个类用了 `GetQueryToken`。 + +见: + +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_ad_campaign_id.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_ad_campaign_type.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_ad_industry.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_ad_unit_id.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_combine_querytoken_photo.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_query_token.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_asr_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_asr.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_cname_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_cname.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_description_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_description.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_title_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_ocr_title.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_pname_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_pname.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_slogan_2.h +- teams/ad/ad_algorithm/feature/fast/impl/extract_search_photo_slogan.h diff --git a/docs/problem/kv_feature/rewrite_rule/reco_user_info.md b/docs/problem/kv_feature/rewrite_rule/reco_user_info.md new file mode 100644 index 0000000..422edfa --- /dev/null +++ b/docs/problem/kv_feature/rewrite_rule/reco_user_info.md @@ -0,0 +1,11 @@ +## reco_user_info + +示例: teams/ad/ad_algorithm/feature/fast/impl/extract_reco_user_like_list.h + +只需要替换 `adlog.has_reco_user_info()` 和 `adlog.reco_user_info()`, 其他逻辑都不用动。 + + if (!adlog.has_reco_user_info()) { + return; + } + + const auto *ruinfo = adlog.reco_user_info(); \ No newline at end of file diff --git a/docs/problem/origin_format.md b/docs/problem/origin_format.md new file mode 100644 index 0000000..7685fe7 --- /dev/null +++ b/docs/problem/origin_format.md @@ -0,0 +1,126 @@ +# 原始数据格式与特征提取 + +原始数据是以 `protobuf` 格式存储的,通过 `c++` 实现特征提取的逻辑。如下图所示: + +![proto_data_and_feature](../../images/problem/origin_format/proto_data_and_feature.png) + +## 原始数据格式: `AdJointLabeledLog` + +`AdJointLabeledLog` 是 `protobuf` 定义的嵌套结构,用于保存特征提取所需要的原始数据,如下所示: + +```proto +message CommonInfoAttr { + enum Name { + UNKNOW_NAME = 0; + + LIVE_RECO_EMBEDDING_CTR_USER = 1; + APP_LIST = 2; + CLICK_LIST_FOLLOW = 3; + CLICK_LIST_HOT = 4; + CLICK_LIST_NEAR = 5; + FORWARD_LIST = 6; + } + + optional CommonTypeEnum.AttrType type = 1; + optional int64 name_value = 2; + optional int64 int_value = 3; + optional float float_value = 4; + optional bytes string_value = 5; + repeated int64 int_list_value = 6; + repeated float float_list_value = 7; + repeated bytes string_list_value = 8; + map map_int64_int64_value = 9; + map map_string_int64_value = 10; + map map_int64_float_value = 11; + map map_string_float_value = 12; +} + +message UserInfo { + uint64 id = 1; + UserActionDetail action_detail = 2; + repeated CommonInfoAttr common_info_attr = 3; +} + +/////// Item 表示一个投放单元 /////////////// +message Item { + string reason = 1; + ItemType type = 2; + uint64 id = 3; + repeated CommonInfoAttr common_info_attr = 4; +} +//////////// AdJointLabeledLog ///////////////// +message AdJointLabeledLog { + uint64 llsid = 1; + uint32 deprecated_tab = 2; + uint64 time = 3; + uint32 page = 4; + UserInfo user_info = 5; + repeated Item item = 6; +} +``` + + +其字段主要分为两种类型: +- 基础类型以及普通嵌套字段类型,如 `UserInfo`、`PhotoInfo` 等。 +- `CommonInfoAttr` 类型,为了更灵活的在同一结构体中保存不同类型的数据,采用类似 `json` 的方式,预留不同类型的字段, + 在使用时候根据枚举名区分不同的数据。因此使用时必须遍历整个 `CommonInfoAttr` 并根据枚举来判断是否是需要的数据。 + +注意: 为了简化变量名,以后用 `AdLog` 特指 `AdJointLabeledLog`。 + +## 特征提取 + +特征提取逻辑通过 `c++` 实现。采用继承的方式提供统一的接口。输入统一为 `AdLog` 类型和 `item` 下标。 + +不同的特征提取类继承自基类 `FastFeature`,并实现 `Extract` 方法。 + +如下所示: + +```c++ +/// 为了节省内存,这里使用 union 压缩了内存占用 +/// 用法:需要在访问的地方判断是否是 sparse or dense。 +/// 如果是 sparse feature, 只能访问 sign 字段 +/// 如果是 dense feature,只能访问 value 和 dense_sign 字段 +union ExtractResult { + /// for sparse feature only + uint64_t sign; + + /// for dense feature only + struct { + float value; + uint32_t dense_sign; +}; + +/// 基类 +class FastFeature : public FastFeatureInterface { + public: + explicit FastFeature(FeatureType type, size_t left_bits = 52) : + FastFeatureInterface(type, left_bits) {} + + virtual ~FastFeature() {} + + void ExtractFea(const AdLogInterface& adlog, size_t pos, + std::vector* result) { + return Extract(reinterpret_cast(adlog), pos, result); + } + + protected: + virtual void Extract(const AdLog& adlog, size_t pos, + std::vector* result) = 0; +}; + +/// 子类: 用户 id 特征提取类。 +class ExtractUserId : public FastFeature { + public: + ExtractUserId():FastFeature(FeatureType::USER){} + virtual void Extract(const AdLog& adlog, + size_t pos, + std::vector* result) { + if(adlog.has_user_info()){ + AddFeature(GetFeature(FeaturePrefix::USER_ID,adlog.user_info().id()),1.0f,result); + } + } +}; +REGISTER_EXTRACTOR(ExtractUserId); +``` + +注意: `AdLog` 是对 `protobuf` 定义的 `AdJointLabeledLog` 的简单封装。 diff --git a/docs/solution/README.md b/docs/solution/README.md new file mode 100644 index 0000000..1442e5c --- /dev/null +++ b/docs/solution/README.md @@ -0,0 +1,184 @@ +# 思路 + +有了 `llvm` 解析 `c++` 代码为基础,针对之前总结的问题,我们就可以按如下思路来解决自动改写的问题: + +![solution_idea](../../images/solution/solution_idea.png) + +1. 利用 `llvm` 解析 `c++` 代码, 获取代码对应的完整的 `ast` 结构, 即语法树结构。 +2. 理解代码中的上下文信息,如是 `if` 语句还是循环语句,是函数调用还是变量定义,并讲这些 `scope` 相关信息保存在专门设计的 `Env` 中。 +3. 利用 `protobuf` 提供的功能,对 `AdJointLabeledLog` 中的节点建立一棵 `ProtoNode` 树,保存所有 `proto` 字段节点的信息, + 用于自动对应 `kv` 数据, 用于理解 `proto` 数据相关的代码。 +4. 遍历 `ast` 结构,解析表达式,并将解析的结果保存在 `Env` 中。结合 `ProtoNode` 中以及上下文信息,就可以判断是数据相关逻辑还是计算相关逻辑。 +5. 结合以上解析代码所得到的信息,根据规则进行改写: + - 对于数据相关逻辑,结合 `ProtoNode` 树中的信息,区分是哪种类型的数据,进一步判断属于哪种改写规则。 + - 结合不同类型的改写规则,进行自动改写,并用改写后的代码替换原有的代码。 + - 将改写后的结果按特征类的逻辑,写入到新的文件中。 + +## 示例 + +以下我们通过一个简单的示例来展示这个思路。 + +`adlog` 特征类: teams/ad/ad_algorithm/feature/fast/impl/extract_ad_delay_label.h + +代码如下: + +```cpp +class ExtractAdDelayLabel : public FastFeature { + public: + ExtractAdDelayLabel() : FastFeature(FeatureType::DENSE_ITEM) {} + + virtual void Extract(const AdLog& adlog, size_t pos, + std::vector* result) { + if (adlog.item_size() <= pos) { + return; + } + auto& item = adlog.item(pos); + if ((item.type() == AD_DSP || item.type() == NATIVE_AD) && item.has_label_info()) { + for (auto & attr : item.label_info().label_info_attr()) { + if (attr.name_value() == + ::auto_cpp_rewriter::LabelInfoCommonAttr_Name_BACKEND_LABEL_MATCH_CALIBRATION_TAG) { + AddFeature(0, attr.bool_value(), result); + } + } + } + } + + private: + DISALLOW_COPY_AND_ASSIGN(ExtractAdDelayLabel); +}; +``` + +`llvm` 来解析 `c++` 代码可以获取代码对应的完整的 `ast` 结构。 + +我们只截取了部分主要结构, 其他部分以 `...` 代替, 详细结构可以参考 [ast](solution/sub_modules/ast.md): + +```cpp +CXXRecordDecl 0x7f909280fcc8 line:9:7 referenced class ExtractAdDelayLabel definition +... +|-public 'FastFeature':'ks::ad_algorithm::FastFeature' +|-CXXRecordDecl 0x7f909280fe28 col:7 implicit referenced class ExtractAdDelayLabel +|-AccessSpecDecl 0x7f909280feb8 col:2 public +|-CXXConstructorDecl 0x7f909280ff48 col:5 used ExtractAdDelayLabel 'void ()' implicit-inline +| |-CXXCtorInitializer 'FastFeature':'ks::ad_algorithm::FastFeature' +| | `-CXXConstructExpr 0x7f9092810b60 'FastFeature':'ks::ad_algorithm::FastFeature' 'void (FeatureType, size_t)' +... +| `-CompoundStmt 0x7f9092810bc0 +|-CXXMethodDecl 0x7f90928103c0 line:13:16 used Extract 'void (const AdLog &, size_t, std::vector *)' virtual implicit-inline +| |-Overrides: [ 0x7f909534e860 FastFeature::Extract 'void (const AdLog &, size_t, std::vector *)' ] +| |-ParmVarDecl 0x7f9092810010 col:37 used adlog 'const AdLog &' +| |-ParmVarDecl 0x7f9092810088 col:51 used pos 'size_t':'unsigned long' +| |-ParmVarDecl 0x7f90928102d8 col:52 used result 'std::vector *' +| `-CompoundStmt 0x7f9092815d20 +| |-IfStmt 0x7f9092810cf8 +| | |-BinaryOperator 0x7f9092810cb0 'bool' '<=' +| | | |-ImplicitCastExpr 0x7f9092810c98 'size_t':'unsigned long' +| | | | `-CXXMemberCallExpr 0x7f9092810c40 'int' +... +| | | `-ImplicitCastExpr 0x7f9092810c80 'size_t':'unsigned long' +| | | `-DeclRefExpr 0x7f9092810c60 'size_t':'unsigned long' lvalue ParmVar 0x7f9092810088 'pos' 'size_t':'unsigned long' +| | `-CompoundStmt 0x7f9092810ce0 +| | `-ReturnStmt 0x7f9092810cd0 +| |-DeclStmt 0x7f9092811008 +| | `-VarDecl 0x7f9092810d38 col:11 used item 'const ItemAdaptorBase &' cinit +... +| `-IfStmt 0x7f9092815d00 +| |-BinaryOperator 0x7f90928113f0 'bool' '&&' +| | |-ParenExpr 0x7f9092811360 'bool' +| | | `-BinaryOperator 0x7f9092811340 'bool' '||' +... +| `-CompoundStmt 0x7f9092815ce8 +| `-CXXForRangeStmt 0x7f9092815878 +| |-<<>> +| |-DeclStmt 0x7f9092811828 +... +| |-DeclStmt 0x7f9092815170 +... +| |-DeclStmt 0x7f9092815188 +... +| |-CXXOperatorCallExpr 0x7f9092815538 'iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue '++' +... +| | `-DeclRefExpr 0x7f9092815478 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue Var 0x7f9092811898 '__begin3' 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| |-DeclStmt 0x7f90928115e8 +| | `-VarDecl 0x7f9092811580 col:19 used attr 'const auto_cpp_rewriter::LabelInfoCommonAttr &' cinit +... +| `-CompoundStmt 0x7f9092815cd0 +| `-IfStmt 0x7f9092815cb0 +| |-BinaryOperator 0x7f9092815a98 'bool' '==' +| | |-CXXMemberCallExpr 0x7f9092815928 'int64_t':'long' +| | | `-MemberExpr 0x7f90928158f8 '' .name_value 0x15afc2c0 +... +| `-CompoundStmt 0x7f9092815c98 +| `-CXXMemberCallExpr 0x7f9092815c18 'void' +| |-MemberExpr 0x7f9092815b10 '' ->AddFeature 0x1f69a1e0 +... +|-AccessSpecDecl 0x7f90928104b0 col:2 private +... + `-CompoundStmt 0x7f909281a720 +``` + +可以看出,这个 `ast` 结构中包含了所有的信息,包含变量定义、函数定义、类定义、宏定义、模板定义等。这个结构还是挺复杂的。 +不同节点的 `ast` 通过一种递归的结构组织起来。 + +我们对其中的节点进行一些简单的解释。 + +比如 `CXXRecordDecl` 表示整个特征类的定义, `CXXMethodDecl` 表示方法的定义, `CXXForRangeStmt` 表示 `for` 循环的定义。 +详细的接口则可以查询 `llvm` 的文档可以了解。如 `CXXRecordDecl` 的详细接口可以参考 [CXXRecordDecl](https://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html)。 + +`adlog` 是通过参数传递到特征类的,因此其 `ast` 节点是固定的 `DeclRefExpr`, 并且 `name` 是 `adlog`。 +我们通过判断一个表达式是否是来自 `adlog` 来判断其是否是数据相关逻辑。如下所示: + +```cpp + if (item.type() == AD_DSP || item.type() == NATIVE_AD) { + ... + } +``` + +对于表达式 `item.type()`, 我们通过递归的寻找其父节点来判断其根节点是否是 `adlog`。当发现其是 `adlog` 字段后, +我们根据 `ProtoNode` 中的信息,获取其字段类型是 `int`。并且可以知道 `kv` 数据中其 `key` 是 `adlog.item.type`。 + +如下所示: + +```cpp + auto enum_item_type = BSFieldEnum::adlog_item_type; + int64_t item_type = BSFieldHelper::GetSingular(*bs, enum_item_type, pos); +``` + +而对于 `common info` 中的数据,我们根据其字段路径和枚举名可以确定其 `kv` 数据,之后再将原来的 `for` 循环遍历 +`common_info_attr` 列表的逻辑去掉,直接实现具体的计算逻辑即可。 + +最终的 `bslog` 特征类见: teams/ad/ad_algorithm/bs_feature/fast/impl/bs_extract_ad_delay_label.cc + +如下所示: + +```cpp +BSExtractAdDelayLabel::BSExtractAdDelayLabel() : BSFastFeature(FeatureType::DENSE_ITEM) { + attr_metas_.emplace_back(BSFieldEnum::adlog_item_type); + attr_metas_.emplace_back(BSFieldEnum::adlog_item_label_info_exists); + attr_metas_.emplace_back(BSFieldEnum::adlog_item_label_info_label_info_attr_key_1006); +} + +void BSExtractAdDelayLabel::Extract(const BSLog& bslog, size_t pos, std::vector* result) { + auto bs = bslog.GetBS(); + if (bs == nullptr) { + return; + } + + auto enum_info_exists = BSFieldEnum::adlog_item_label_info_exists; + bool info_exists = BSFieldHelper::GetSingular(*bs, enum_info_exists, pos); + + auto enum_item_type = BSFieldEnum::adlog_item_type; + int64_t item_type = BSFieldHelper::GetSingular(*bs, enum_item_type, pos); + + auto enum_key_1006 = BSFieldEnum::adlog_item_label_info_label_info_attr_key_1006; + bool key_1006 = BSFieldHelper::GetSingular(*bs, enum_key_1006, pos); + + auto enum_key_1006_exists = BSFieldEnum::adlog_item_label_info_label_info_attr_key_1006; + bool key_1006_exists = BSFieldHelper::HasSingular(*bs, enum_key_1006_exists, pos); + + if ((item_type == bs::ItemType::AD_DSP || item_type == bs::ItemType::NATIVE_AD) && info_exists) { + if (key_1006_exists) { + AddFeature(0, key_1006, result); + } + } +} +``` \ No newline at end of file diff --git a/docs/solution/overall_architecture.md b/docs/solution/overall_architecture.md new file mode 100644 index 0000000..a70adab --- /dev/null +++ b/docs/solution/overall_architecture.md @@ -0,0 +1,5 @@ +# 整体架构 + +整体架构如下所示: + +![overall_arch](../../images/solution/overall_arch.png) diff --git a/docs/solution/sub_modules/README.md b/docs/solution/sub_modules/README.md new file mode 100644 index 0000000..1cc46d8 --- /dev/null +++ b/docs/solution/sub_modules/README.md @@ -0,0 +1,10 @@ +# 子模块 + +- [proto_parser](solution/sub_modules/proto_parser.md) +- [matcher](solution/sub_modules/matcher.md) +- [ast](solution/sub_modules/ast.md) +- [visitor](solution/sub_modules/visitor.md) +- [env](solution/sub_modules/env.md) +- [info](solution/sub_modules/info.md) +- [expr_parser](solution/sub_modules/expr_parser.md) +- [rewrite](solution/sub_modules/rewrite.md) \ No newline at end of file diff --git a/docs/solution/sub_modules/ast.md b/docs/solution/sub_modules/ast.md new file mode 100644 index 0000000..f15ae54 --- /dev/null +++ b/docs/solution/sub_modules/ast.md @@ -0,0 +1,173 @@ +# ast + +我们再仔细看一下思路中的解析示例: teams/ad/ad_algorithm/feature/fast/impl/extract_ad_delay_label.h + +`adlog` 特征类代码如下: + +```cpp +class ExtractAdDelayLabel : public FastFeature { + public: + ExtractAdDelayLabel() : FastFeature(FeatureType::DENSE_ITEM) {} + + virtual void Extract(const AdLog& adlog, size_t pos, + std::vector* result) { + if (adlog.item_size() <= pos) { + return; + } + auto& item = adlog.item(pos); + if ((item.type() == AD_DSP || item.type() == NATIVE_AD) && item.has_label_info()) { + for (auto & attr : item.label_info().label_info_attr()) { + if (attr.name_value() == + ::auto_cpp_rewriter::LabelInfoCommonAttr_Name_BACKEND_LABEL_MATCH_CALIBRATION_TAG) { + AddFeature(0, attr.bool_value(), result); + } + } + } + } + + private: + DISALLOW_COPY_AND_ASSIGN(ExtractAdDelayLabel); +}; +``` + +`llvm` 解析后的 `ast` 完整结构如下: + +```cpp +CXXRecordDecl 0x7f909280fcc8 line:9:7 referenced class ExtractAdDelayLabel definition +|-DefinitionData polymorphic has_user_declared_ctor can_const_default_init +| |-DefaultConstructor exists non_trivial user_provided +| |-CopyConstructor non_trivial user_declared has_const_param needs_overload_resolution implicit_has_const_param +| |-MoveConstructor needs_overload_resolution +| |-CopyAssignment non_trivial has_const_param user_declared needs_overload_resolution implicit_has_const_param +| |-MoveAssignment needs_overload_resolution +| `-Destructor simple non_trivial needs_overload_resolution +|-public 'FastFeature':'ks::ad_algorithm::FastFeature' +|-CXXRecordDecl 0x7f909280fe28 col:7 implicit referenced class ExtractAdDelayLabel +|-AccessSpecDecl 0x7f909280feb8 col:2 public +|-CXXConstructorDecl 0x7f909280ff48 col:5 used ExtractAdDelayLabel 'void ()' implicit-inline +| |-CXXCtorInitializer 'FastFeature':'ks::ad_algorithm::FastFeature' +| | `-CXXConstructExpr 0x7f9092810b60 'FastFeature':'ks::ad_algorithm::FastFeature' 'void (FeatureType, size_t)' +| | |-DeclRefExpr 0x7f9092810ae0 'ks::ad_algorithm::FeatureType' EnumConstant 0x1f693b00 'DENSE_ITEM' 'ks::ad_algorithm::FeatureType' +| | | `-NestedNameSpecifier TypeSpec 'ks::ad_algorithm::FeatureType' +| | `-CXXDefaultArgExpr 0x7f9092810b40 <> 'size_t':'unsigned long' +| `-CompoundStmt 0x7f9092810bc0 +|-CXXMethodDecl 0x7f90928103c0 line:13:16 used Extract 'void (const AdLog &, size_t, std::vector *)' virtual implicit-inline +| |-Overrides: [ 0x7f909534e860 FastFeature::Extract 'void (const AdLog &, size_t, std::vector *)' ] +| |-ParmVarDecl 0x7f9092810010 col:37 used adlog 'const AdLog &' +| |-ParmVarDecl 0x7f9092810088 col:51 used pos 'size_t':'unsigned long' +| |-ParmVarDecl 0x7f90928102d8 col:52 used result 'std::vector *' +| `-CompoundStmt 0x7f9092815d20 +| |-IfStmt 0x7f9092810cf8 +| | |-BinaryOperator 0x7f9092810cb0 'bool' '<=' +| | | |-ImplicitCastExpr 0x7f9092810c98 'size_t':'unsigned long' +| | | | `-CXXMemberCallExpr 0x7f9092810c40 'int' +| | | | `-MemberExpr 0x7f9092810bf0 '' .item_size 0x6095b90 +| | | | `-ImplicitCastExpr 0x7f9092810c20 'const ks::ad_algorithm::AdLogInterface' lvalue +| | | | `-DeclRefExpr 0x7f9092810bd0 'const AdLog':'const ks::ad_algorithm::AdLog' lvalue ParmVar 0x7f9092810010 'adlog' 'const AdLog &' +| | | `-ImplicitCastExpr 0x7f9092810c80 'size_t':'unsigned long' +| | | `-DeclRefExpr 0x7f9092810c60 'size_t':'unsigned long' lvalue ParmVar 0x7f9092810088 'pos' 'size_t':'unsigned long' +| | `-CompoundStmt 0x7f9092810ce0 +| | `-ReturnStmt 0x7f9092810cd0 +| |-DeclStmt 0x7f9092811008 +| | `-VarDecl 0x7f9092810d38 col:11 used item 'const ItemAdaptorBase &' cinit +| | `-CXXMemberCallExpr 0x7f9092810e10 'const ItemAdaptorBase':'const ks::ad_algorithm::ItemAdaptorBase' lvalue +| | |-MemberExpr 0x7f9092810dc0 '' .item 0xafc3968 +| | | `-DeclRefExpr 0x7f9092810da0 'const AdLog':'const ks::ad_algorithm::AdLog' lvalue ParmVar 0x7f9092810010 'adlog' 'const AdLog &' +| | `-ImplicitCastExpr 0x7f9092810e50 'int' +| | `-ImplicitCastExpr 0x7f9092810e38 'size_t':'unsigned long' +| | `-DeclRefExpr 0x7f9092810df0 'size_t':'unsigned long' lvalue ParmVar 0x7f9092810088 'pos' 'size_t':'unsigned long' +| `-IfStmt 0x7f9092815d00 +| |-BinaryOperator 0x7f90928113f0 'bool' '&&' +| | |-ParenExpr 0x7f9092811360 'bool' +| | | `-BinaryOperator 0x7f9092811340 'bool' '||' +| | | |-BinaryOperator 0x7f90928111a8 'bool' '==' +| | | | |-ImplicitCastExpr 0x7f9092811178 'int' +| | | | | `-CXXMemberCallExpr 0x7f9092811070 'auto_cpp_rewriter::ItemType' +| | | | | `-MemberExpr 0x7f9092811040 '' .type 0x6911c90 +| | | | | `-DeclRefExpr 0x7f9092811020 'const ItemAdaptorBase':'const ks::ad_algorithm::ItemAdaptorBase' lvalue Var 0x7f9092810d38 'item' 'const ItemAdaptorBase &' +| | | | `-ImplicitCastExpr 0x7f9092811190 'int' +| | | | `-DeclRefExpr 0x7f90928110c0 'auto_cpp_rewriter::ItemType' EnumConstant 0x7f90930c7ad0 'AD_DSP' 'auto_cpp_rewriter::ItemType' (UsingShadow 0x7f909280f530 'AD_DSP') +| | | `-BinaryOperator 0x7f9092811320 'bool' '==' +| | | |-ImplicitCastExpr 0x7f90928112f0 'int' +| | | | `-CXXMemberCallExpr 0x7f9092811218 'auto_cpp_rewriter::ItemType' +| | | | `-MemberExpr 0x7f90928111e8 '' .type 0x6911c90 +| | | | `-DeclRefExpr 0x7f90928111c8 'const ItemAdaptorBase':'const ks::ad_algorithm::ItemAdaptorBase' lvalue Var 0x7f9092810d38 'item' 'const ItemAdaptorBase &' +| | | `-ImplicitCastExpr 0x7f9092811308 'int' +| | | `-DeclRefExpr 0x7f9092811238 'auto_cpp_rewriter::ItemType' EnumConstant 0x7f90930c8450 'NATIVE_AD' 'auto_cpp_rewriter::ItemType' (UsingShadow 0x7f909280f700 'NATIVE_AD') +| | `-CXXMemberCallExpr 0x7f90928113d0 'bool' +| | `-MemberExpr 0x7f90928113a0 '' .has_label_info 0x6912388 +| | `-DeclRefExpr 0x7f9092811380 'const ItemAdaptorBase':'const ks::ad_algorithm::ItemAdaptorBase' lvalue Var 0x7f9092810d38 'item' 'const ItemAdaptorBase &' +| `-CompoundStmt 0x7f9092815ce8 +| `-CXXForRangeStmt 0x7f9092815878 +| |-<<>> +| |-DeclStmt 0x7f9092811828 +| | `-VarDecl 0x7f9092811620 col:26 implicit used __range3 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr> &' cinit +| | `-CXXMemberCallExpr 0x7f9092811510 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr>':'const google::protobuf::RepeatedPtrField' lvalue +| | `-MemberExpr 0x7f90928114e0 '' .label_info_attr 0x7f9092d36bf8 +| | `-CXXMemberCallExpr 0x7f9092811460 'const auto_cpp_rewriter::LabelInfo' lvalue +| | `-MemberExpr 0x7f9092811430 '' .label_info 0x6912598 +| | `-DeclRefExpr 0x7f9092811410 'const ItemAdaptorBase':'const ks::ad_algorithm::ItemAdaptorBase' lvalue Var 0x7f9092810d38 'item' 'const ItemAdaptorBase &' +| |-DeclStmt 0x7f9092815170 +| | `-VarDecl 0x7f9092811898 col:24 implicit used __begin3 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' cinit +| | `-CXXMemberCallExpr 0x7f9092811a10 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| | `-MemberExpr 0x7f90928119e0 '' .begin 0x7f909316b670 +| | `-DeclRefExpr 0x7f9092811840 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr>':'const google::protobuf::RepeatedPtrField' lvalue Var 0x7f9092811620 '__range3' 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr> &' +| |-DeclStmt 0x7f9092815188 +| | `-VarDecl 0x7f9092811918 col:24 implicit used __end3 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' cinit +| | `-CXXMemberCallExpr 0x7f9092815090 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| | `-MemberExpr 0x7f9092815060 '' .end 0x7f909316b908 +| | `-DeclRefExpr 0x7f9092811860 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr>':'const google::protobuf::RepeatedPtrField' lvalue Var 0x7f9092811620 '__range3' 'const ::google::protobuf::RepeatedPtrField< ::auto_cpp_rewriter::LabelInfoCommonAttr> &' +| |-CXXOperatorCallExpr 0x7f9092815338 'bool' '!=' +| | |-ImplicitCastExpr 0x7f9092815320 'bool (*)(const iterator &) const' +| | | `-DeclRefExpr 0x7f90928152a0 'bool (const iterator &) const' lvalue CXXMethod 0x7f90928135e8 'operator!=' 'bool (const iterator &) const' +| | |-ImplicitCastExpr 0x7f9092815270 'const google::protobuf::internal::RepeatedPtrIterator' lvalue +| | | `-DeclRefExpr 0x7f90928151a0 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue Var 0x7f9092811898 '__begin3' 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| | `-ImplicitCastExpr 0x7f9092815288 'const iterator':'const google::protobuf::internal::RepeatedPtrIterator' lvalue +| | `-DeclRefExpr 0x7f90928151c0 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue Var 0x7f9092811918 '__end3' 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| |-CXXOperatorCallExpr 0x7f9092815538 'iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue '++' +| | |-ImplicitCastExpr 0x7f9092815520 'iterator &(*)()' +| | | `-DeclRefExpr 0x7f9092815498 'iterator &()' lvalue CXXMethod 0x7f9092812e28 'operator++' 'iterator &()' +| | `-DeclRefExpr 0x7f9092815478 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue Var 0x7f9092811898 '__begin3' 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| |-DeclStmt 0x7f90928115e8 +| | `-VarDecl 0x7f9092811580 col:19 used attr 'const auto_cpp_rewriter::LabelInfoCommonAttr &' cinit +| | `-CXXOperatorCallExpr 0x7f90928156a8 'const auto_cpp_rewriter::LabelInfoCommonAttr' lvalue '*' +| | |-ImplicitCastExpr 0x7f9092815690 'reference (*)() const' +| | | `-DeclRefExpr 0x7f9092815608 'reference () const' lvalue CXXMethod 0x7f9092812a80 'operator*' 'reference () const' +| | `-ImplicitCastExpr 0x7f90928155f0 'const google::protobuf::internal::RepeatedPtrIterator' lvalue +| | `-DeclRefExpr 0x7f90928155d0 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' lvalue Var 0x7f9092811898 '__begin3' 'const_iterator':'google::protobuf::internal::RepeatedPtrIterator' +| `-CompoundStmt 0x7f9092815cd0 +| `-IfStmt 0x7f9092815cb0 +| |-BinaryOperator 0x7f9092815a98 'bool' '==' +| | |-CXXMemberCallExpr 0x7f9092815928 'int64_t':'long' +| | | `-MemberExpr 0x7f90928158f8 '' .name_value 0x15afc2c0 +| | | `-DeclRefExpr 0x7f90928158d8 'const auto_cpp_rewriter::LabelInfoCommonAttr' lvalue Var 0x7f9092811580 'attr' 'const auto_cpp_rewriter::LabelInfoCommonAttr &' +| | `-ImplicitCastExpr 0x7f9092815a80 'int64_t':'long' +| `-CompoundStmt 0x7f9092815c98 +| `-CXXMemberCallExpr 0x7f9092815c18 'void' +| |-MemberExpr 0x7f9092815b10 '' ->AddFeature 0x1f69a1e0 +| | `-ImplicitCastExpr 0x7f9092815bf0 'ks::ad_algorithm::FastFeatureInterface *' FastFeatureInterface)> +| | `-CXXThisExpr 0x7f9092815b00 'ks::ad_algorithm::ExtractAdDelayLabel *' implicit this +| |-ImplicitCastExpr 0x7f9092815c50 'uint64_t':'unsigned long' +| | `-IntegerLiteral 0x7f9092815b40 'int' 0 +| |-ImplicitCastExpr 0x7f9092815c68 'float' +| | `-CXXMemberCallExpr 0x7f9092815bb0 'bool' +| | `-MemberExpr 0x7f9092815b80 '' .bool_value 0x15affd68 +| | `-DeclRefExpr 0x7f9092815b60 'const auto_cpp_rewriter::LabelInfoCommonAttr' lvalue Var 0x7f9092811580 'attr' 'const auto_cpp_rewriter::LabelInfoCommonAttr &' +| `-ImplicitCastExpr 0x7f9092815c80 'std::vector *' +| `-DeclRefExpr 0x7f9092815bd0 'std::vector *' lvalue ParmVar 0x7f90928102d8 'result' 'std::vector *' +|-AccessSpecDecl 0x7f90928104b0 col:2 private +|-AccessSpecDecl 0x7f90928104d8 <./base/common/basic_types.h:70:3, col:10> col:3 private +|-CXXConstructorDecl 0x7f90928106b0 /home/liuzhishan/ast/teams/ad/ad_algorithm/feature/fast/impl/extract_ad_delay_label.h:30:28 ExtractAdDelayLabel 'void (const ExtractAdDelayLabel &)' +| `-ParmVarDecl 0x7f90928105a8 <./base/common/basic_types.h:71:12, col:26> col:27 'const ExtractAdDelayLabel &' +|-CXXMethodDecl 0x7f9092810820 col:8 operator= 'void (const ExtractAdDelayLabel &)' +| `-ParmVarDecl 0x7f9092810790 col:33 'const ExtractAdDelayLabel &' +`-CXXDestructorDecl 0x7f90928108e8 col:7 implicit used ~ExtractAdDelayLabel 'void () noexcept' inline default + |-Overrides: [ 0x7f909534cb58 FastFeature::~FastFeature 'void () noexcept' ] + `-CompoundStmt 0x7f909281a720 +``` + +可以看出,`ast` 中包含了所有的信息,包含变量定义、函数定义、类定义、宏定义、模板定义等。 + +虽然 `ast` 结构很复杂,但也是按照代码本身递归的结构进行组织的, 因此,结合 `llvm` 提供的 `ast` 接口,以及节点 +上下文的信息,我们可以很方便的遍历代码中的信息。进而我们可以知道每一行代码是否是取数据相关的逻辑,并且结合代码中的 +信息,我们可以知道应该属于哪个改写规则。 diff --git a/docs/solution/sub_modules/env.md b/docs/solution/sub_modules/env.md new file mode 100644 index 0000000..c03cb67 --- /dev/null +++ b/docs/solution/sub_modules/env.md @@ -0,0 +1,7 @@ +# env + +详细实现可参考: `convert/Env.h`。 + +`Env` 用于保存上下文相关信息,设计为一种递归的结构,如下所示 + +![env](../../images/solution/env.png) diff --git a/docs/solution/sub_modules/expr_parser.md b/docs/solution/sub_modules/expr_parser.md new file mode 100644 index 0000000..61ed63b --- /dev/null +++ b/docs/solution/sub_modules/expr_parser.md @@ -0,0 +1,63 @@ +# expr_parser + +详细实现可参考: `convert/expr_parser.h`。 + +`expr_parser` 主要用于解析表达式,并将解析得到的信息结合 `ProtoNode` 中的信息,更新到前文所提到的 `Env` 中。 +具体的信息回更新到 `Env` 中不同的 `Info` 属性中。用于之后的改写。 + +实际逻辑也比较多,这里只列出部分关键逻辑。 + +```cpp +std::shared_ptr parse_expr_simple(clang::Expr* expr, Env* env_ptr) { + if (expr == nullptr) { + LOG(INFO) << "expr is null"; + return nullptr; + } + + if (clang::CXXMemberCallExpr* cxx_member_call_expr = dyn_cast(expr)) { + auto expr_info_ptr = std::make_shared(expr, env_ptr); + ExprInfo& expr_info = *expr_info_ptr; + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + + clang::Expr* caller = cxx_member_call_expr->getImplicitObjectArgument(); + if (clang::MemberExpr* callee = dyn_cast(cxx_member_call_expr->getCallee())) { + std::string callee_name = callee->getMemberDecl()->getNameAsString(); + + expr_info.set_parent(std::move(parse_expr_simple(caller, env_ptr))); + + // for loop, begin is loop var + if (callee_name == "begin") { + return expr_info_ptr; + } + + expr_info.set_callee_name(callee_name); + expr_info.add_cur_expr_str(callee_name); + + for (unsigned i = 0; i < cxx_member_call_expr->getNumArgs(); i++) { + expr_info.add_param(cxx_member_call_expr->getArg(i)); + auto param_expr_info_ptr = parse_expr_simple(cxx_member_call_expr->getArg(i), env_ptr); + param_expr_info_ptr->set_caller_info(expr_info_ptr); + expr_info_ptr->add_call_expr_param(std::move(param_expr_info_ptr)); + } + + return expr_info_ptr; + } else { + std::string expr_str = stmt_to_string(expr); + LOG(INFO) << "unsupported cxx member call expr: " << expr_str; + return expr_info_ptr; + } + } else if (clang::MemberExpr* member_expr = dyn_cast(expr)) { + ... + } else if (...) { + ... + } else { + LOG(INFO) << "unknown type, expr: " << stmt_to_string(expr); + auto expr_info_ptr = std::make_shared(expr, env_ptr); + expr_info_ptr->set_raw_expr_str(stmt_to_string(expr)); + return expr_info_ptr; + } + + LOG(INFO) << "parse error, expr: " << stmt_to_string(expr); + return nullptr; +} +``` \ No newline at end of file diff --git a/docs/solution/sub_modules/info.md b/docs/solution/sub_modules/info.md new file mode 100644 index 0000000..1493ac2 --- /dev/null +++ b/docs/solution/sub_modules/info.md @@ -0,0 +1,15 @@ +# info + +详细实现可参考: `convert/info`。 + +为了区分各种不同改写规则需要的信息,我们将每种改写规则对应的信息都定义在一个单独的类中,并都保存为 `Env` 的成员变量。 + +如下是一些常用的 `Info` 类: +- `CommonInfoLeaf`: 用于 `CommonInfo` 数据的改写,具体实现可参考: `convert/info/CommonInfoLeaf.h`。主要包含以下主要信息: + - `common_info_value_`: `CommonInfo` 枚举对应的 `int` 值。 + - `list_loop_var_`: 遍历 `common_info` 列表时候用的变量。 + - `common_info_enum_name_`: `CommonInfo` 枚举名。 +- `ActionDetailInfo`: 用于 `ActionDetail` 数据的改写,具体实现可参考: `convert/info/ActionDetailInfo.h`。 +- `AssignInfo`: `Assign` 语句的信息,具体实现可参考: `convert/info/AssignInfo.h`。 + +其他还有很多 `Info` 类,这里不再一一列举。 \ No newline at end of file diff --git a/docs/solution/sub_modules/matcher.md b/docs/solution/sub_modules/matcher.md new file mode 100644 index 0000000..47cb971 --- /dev/null +++ b/docs/solution/sub_modules/matcher.md @@ -0,0 +1,28 @@ +# matcher + +`ASTMatcher` 相关文档可参考: [ASTMatcher](https://clang.llvm.org/docs/LibASTMatchersReference.html) + +`matcher` 模块使用 `llvm` 提供的 `ASTMatcher` 功能,通过宏定义需要匹配的代码。 + +如下所示: + +```cpp + // 目前只能匹配到 typeAliasDecl(), 可能会有更好的匹配。 + auto TypeAliasMatcher = decl(typeAliasDecl(), + namedDecl(matchesName("Extract.*"))).bind("TypeAlias"); + type_alias_finder_.addMatcher(TypeAliasMatcher, &type_alias_callback_); + + // TK_IgnoreUnlessSpelledInSource 用来忽略模板 + auto FeatureDeclMatcher = traverse(TK_IgnoreUnlessSpelledInSource, + cxxRecordDecl(isDerivedFrom(hasName("FastFeature")), + unless(isExpandedFromMacro("DISALLOW_COPY_AND_ASSIGN")), + unless(isExpandedFromMacro("REGISTER_EXTRACTOR")))).bind("FeatureDecl"); + + match_finder_.addMatcher(FeatureDeclMatcher, &feature_decl_callback_); +``` + +`TypeAliasMatcher` 表示匹配模板类 `alias`, 且名字以 `Extract` 开头。 + +`FeatureDeclMatcher` 表示匹配父类是 `FastFeature` 的类定义,并且忽略来自 `DISALLOW_COPY_AND_ASSIGN` 和 `REGISTER_EXTRACTOR` 宏展开的结果。 + +详细实现可参考: `convert/ConvertAction.cpp`。 \ No newline at end of file diff --git a/docs/solution/sub_modules/proto_parser.md b/docs/solution/sub_modules/proto_parser.md new file mode 100644 index 0000000..912dd37 --- /dev/null +++ b/docs/solution/sub_modules/proto_parser.md @@ -0,0 +1,98 @@ +# proto_parser + +如前文所述,原始的 `adlog` 数据是 `protobuf` 定义的嵌套结构数据,如下图所示 + +![proto_dat](../../images/solution/proto_data.png) + +我们可以利用 `protobuf` 提供的反射功能,根据 `adlog` 节点的定义构造一棵 `ProtoNode` 树。 + +详细代码实现可参考: `convert/proto_parser`。 + +如下部分为构建 `ProtoNode` 树的核心逻辑: + +```cpp +std::unique_ptr ProtoParser::build_adlog_tree_from_descriptor(const Descriptor* descriptor, + const std::string& name, + int index, + int degree, + const std::string& prefix, + bool is_repeated) { + if (is_descriptor_common_info(descriptor)) { + return build_adlog_tree_common_info(descriptor, name, index, degree, prefix); + } + + std::string type_name = descriptor->name(); + if (is_repeated) { + type_name = std::string("repeated ") + type_name; + } + auto res = std::make_unique(name, type_name, index); + + for (int i = 0; i < descriptor->field_count(); i++) { + const auto field = descriptor->field(i); + + if (field->name() == "serialized_reco_user_info") { + // 不需要映射,直接获取原数据使用。 + auto node = std::make_unique(field->name(), field->type_name(), field->index()); + res->add_child(field->name(), std::move(node)); + } else if (const EnumDescriptor* enum_type = field->enum_type()) { + add_enum(res.get(), enum_type, field->type_name(), field->name()); + } else if (field->is_map()) { + // 必须在 message 判断之前, map 也是 message + const auto key_field = field->message_type()->FindFieldByName("key"); + const auto value_field = field->message_type()->FindFieldByName("value"); + + // value 是普通字段,直接当做叶子节点。 + if (is_basic_type(value_field->type())) { + std::ostringstream oss_type_name; + oss_type_name << "map<" << key_field->type_name() << ", " << value_field->type_name() << ">"; + auto node = std::make_unique(field->name(), oss_type_name.str(), field->index()); + + res->add_child(field->name(), std::move(node)); + } else { + // 中间的 map 按 key 的值展开,因此不需要关心 key,只需要按 value 继续展开。 + // 如 ActionDetail 类型是 map, field->name 为 key。 + // 但是在获取 adlog_path 时候必须传入 key 的值。 + auto node = std::move(build_adlog_tree_from_descriptor(value_field->message_type(), + field->name(), + field->index(), + degree + 1, + prefix + "." + field->name(), + is_repeated)); + res->add_child(field->name(), std::move(node)); + } + } else if (field->is_repeated()) { + if (is_basic_type(field->type())) { + auto node = std::make_unique(field->name(), + std::string("repeated ") + field->type_name(), + field->index()); + res->add_child(field->name(), std::move(node)); + } else { + auto node = std::move(build_adlog_tree_from_descriptor(field->message_type(), + field->name(), + field->index(), + degree + 1, + prefix + "." + field->name(), + true)); + res->add_child(field->name(), std::move(node)); + } + } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + auto node = std::move(build_adlog_tree_from_descriptor(field->message_type(), + field->name(), + i, + degree + 1, + prefix + "." + field->name(), + is_repeated)); + res->add_child(field->name(), std::move(node)); + } else if (is_basic_type(field->type())) { + auto node = std::make_unique(field->name(), field->type_name(), field->index()); + res->add_child(field->name(), std::move(node)); + } else { + LOG(INFO) << "ignore, field->type(): " << field->type_name() + << ", field_name: " << field->name(); + } + } + + return res; +} + +``` diff --git a/docs/solution/sub_modules/rewrite.md b/docs/solution/sub_modules/rewrite.md new file mode 100644 index 0000000..80abe0b --- /dev/null +++ b/docs/solution/sub_modules/rewrite.md @@ -0,0 +1,26 @@ +# rewrite + +详细实现可参考: +- `convert/matcher_callback`。 +- `convert/rule`。 + +有了之前步骤得到的各种信息,就可以进行代码改写了。详细改写逻辑在 `convert/rule` 中实现,每种改写规则会对应一个类。 +如 `ActionDetailRule` 用于处理 `action_detail` 的改写。 + +以下结合一些具体示例展示一些改写规则。 + +普通字段替换 + +![rewrite_normal_field](../../images/solution/rewrite_normal_field.png) + +简单 `common info` 替换 + +![rewrite_simple_common_info](../../images/solution/rewrite_simple_common_info.png) + +复杂 `common info` 替换 + +![rewrite_complex_common_info](../../images/solution/rewrite_complex_common_info.png) + +中间节点替换 + +![rewrite_photo_info_upload_time](../../images/solution/rewrite_photo_info_upload_time.png) diff --git a/docs/solution/sub_modules/visitor.md b/docs/solution/sub_modules/visitor.md new file mode 100644 index 0000000..a93c3e2 --- /dev/null +++ b/docs/solution/sub_modules/visitor.md @@ -0,0 +1,57 @@ +# visitor + +详细实现可参考: `convert/visitor`。 + +有了解析得到的 `ast` 后,我们就需要遍历这些节点。基本思路就是递归访问 `ast` 节点,并根据表达式不同的类型 +调用不同的处理逻辑。 + +如下所示: + +![visit_extract_method](../../images/solution/visit_extract_method.png) + +以下是部分 `visitor` 逻辑示例: + +```cpp +template +void ExtractMethodVisitor::recursive_visit(clang::Stmt *stmt, + Handler* handler_ptr, + Env* env_ptr) { + if (!stmt) { + return; + } + + if (clang::CompoundStmt* compound_stmt = dyn_cast(stmt)) { + for (clang::CompoundStmt::body_iterator start = compound_stmt->body_begin(); + start != compound_stmt->body_end(); + start++) { + recursive_visit(*start, handler_ptr, env_ptr); + handler_ptr->process(*start, env_ptr); + } + + } else if (clang::DeclStmt* decl_stmt = dyn_cast(stmt)) { + env_ptr->update(decl_stmt); + if (clang::VarDecl* var_decl = dyn_cast(decl_stmt->getSingleDecl())) { + if (var_decl->hasInit()) { + recursive_visit(var_decl->getInit(), handler_ptr, env_ptr); + } + if (const clang::Expr* init_expr = var_decl->getAnyInitializer()) { + recursive_visit(const_cast(init_expr), handler_ptr, env_ptr); + } + } + + handler_ptr->process(decl_stmt, env_ptr); + + // decl_stmt 比较特殊, decl_info 只维持当前变量, 访问完当前 decl_stmt 后立即销毁。 + env_ptr->clear_decl_info(); + + } else if (clang::CXXConstructExpr* cxx_construct_expr = dyn_cast(stmt)) { + ... + } else if (clang::BinaryOperator* binary_operator = dyn_cast(stmt)) { + ... + } else if (...) { + ... + } else { + LOG(INFO) << "unsupported stmt, trated as string: " << stmt_to_string(stmt); + } +} +``` diff --git a/docs/test/README.md b/docs/test/README.md new file mode 100644 index 0000000..dffcbbe --- /dev/null +++ b/docs/test/README.md @@ -0,0 +1,25 @@ +# 测试 + +`feature_list_debug.cc` 中保存需要改写的文件名: + +```cpp +// feature_list_debug.cc + +#include +#include +#include + +#include "teams/ad/ad_algorithm/feature/fast/impl/extract_ad_delay_label.h" + +namespace ks { +namespace ad_algorithm { + +} // namespace +} // namespace +``` + +执行如下命令可以进行改写 + +```bash +convert feature_list_debug.cc --cmd=convert --field-detail-filename=field.json --use_reco_user_info=false --overwrite --dump-ast -- pthread -MMD -march=haswell -march=haswell -Wno-deprecated-builtins -I/usr/local/include/c++/v1 -march=haswell -Iinfra/ -Ipub/src/infra/component_usage_tracker/src/ -Ithird_party/apache-arrow/arrow-8.0.1/cpp/src -fPIC -Wno-inconsistent-missing-override -Werror=return-type -Wtrigraphs -Wuninitialized -Wimplicit-const-int-float-conversion -Wwrite-strings -Wpointer-arith -Wmissing-include-dirs -Wno-unused-function -Wno-unused-parameter -Wno-ignored-qualifiers -Wno-implicit-fallthrough -Wno-deprecated-declarations -Wno-missing-field-initializers -Wno-missing-include-dirs -std=c++17 -Wvla -Wnon-virtual-dtor -Woverloaded-virtual -Wno-invalid-offsetof -Werror=non-virtual-dtor -O3 -Wformat=2 -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -Wframe-larger-than=262143 -ggdb3 -Wno-format-nonliteral -Wno-register -DENABLE_KUIBA -DASIO_STANDALONE -DBRPC_WITH_GLOG=1 -DBTHREAD_USE_FAST_PTHREAD_MUTEX -DGFLAGS_NS=google -DHAVE_PTHREAD -DHAVE_ZLIB=1 -DNO_DUMMY_DECL -DOSATOMIC_USE_INLINED=1 -DPB_FIELD_32BIT -DTHREADED -D_ONLY_GET_SYNC_PAIRS_CONF -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D__const__= -Wno-implicit-fallthrough -Wno-non-virtual-dtor -Wno-vla -D__STDC_FORMAT_MACROS -DUSE_SYMBOLIZE -DPIC -I.build/pb/c++ -Iprebuilt/include -I./third_party -I. -DNDEBUG -DUSE_TCMALLOC=1 -DENABLE_TCMALLOC=1 -nostdinc++ -nodefaultlibs -Werror -I/usr/java/default/include/ -I/usr/java/default/include/linux -MT .build/opt/objs/teams/ad/ad_algorithm/bs_feature/bs_fea_util/fast/frame/bs_leaf_util.o -o .build/opt/objs/teams/ad/ad_algorithm/bs_feature/bs_fea_util/fast/frame/bs_leaf_util.o +``` diff --git a/proto/ad_joint_labeled_log.proto b/proto/ad_joint_labeled_log.proto new file mode 100644 index 0000000..b9f1d0a --- /dev/null +++ b/proto/ad_joint_labeled_log.proto @@ -0,0 +1,103 @@ +syntax = "proto3"; + +package auto_cpp_rewriter; + +option cc_enable_arenas = true; + +/////// 打标信息 ///////////// +message LabelInfo { + bool click = 1; + bool like = 2; + bool follow = 3; + bool forward = 4; + map label_infos = 5; // 新label都放在这里,key是labelId +}; + +//////////////////////// 投放单元类型 /////////////////// +enum ItemType { + UNKNOWN = 0; + AD_DSP = 1; + FANS_TOP = 2; + NATURE_PHOTO = 3; +} + +message CommonInfoAttr { + enum Name { + UNKNOW_NAME = 0; + + LIVE_RECO_EMBEDDING_CTR_USER = 1; + APP_LIST = 2; + CLICK_LIST_FOLLOW = 3; + CLICK_LIST_HOT = 4; + CLICK_LIST_NEAR = 5; + FORWARD_LIST = 6; + } + + optional CommonTypeEnum.AttrType type = 1; + optional int64 name_value = 2; + optional int64 int_value = 3; + optional float float_value = 4; + optional bytes string_value = 5; + repeated int64 int_list_value = 6; + repeated float float_list_value = 7; + repeated bytes string_list_value = 8; + map map_int64_int64_value = 9; + map map_string_int64_value = 10; + map map_int64_float_value = 11; + map map_string_float_value = 12; +} + +///// Item1: AdDspInfo ///////////// +message AdDspInfo { + Creative creative = 1; + PhotoInfo photo_info = 2; + AdDspMmuInfo ad_dsp_mmu_info = 3; + LiveInfo live_info = 4; +} + +message Context { + string app_id = 1; + int64 page_id = 2; + int64 sub_page_id = 3; + repeated ContextInfoCommonAttr info_common_attr = 9; // 上下文通用信息类特征 +} + +message UserActionDetail { + repeated SimpleUserInfo follow = 1; + repeated SimplePhotoInfo like = 2; +} + +message DeviceInfo { + string id = 1; + string app_ver = 2; + uint32 platform = 3; // 对应client_id : ios or android + string visit_net = 4; //WIFI, 或者 上网制式, 如 EDGE,UTMS 等 + string visit_mod = 5; //设备型号,MI 3, Coolpad 2, IOS:iPhone8,1 + repeated string app_installed = 6; // 安装的app列表 + string os_version = 7; + string os_type = 8; +} + +message UserInfo { + uint64 id = 1; + UserActionDetail action_detail = 2; + repeated CommonInfoAttr common_info_attr = 3; +} + +/////// Item 表示一个投放单元 /////////////// +message Item { + string reason = 1; + ItemType type = 2; + uint64 id = 3; + repeated CommonInfoAttr common_info_attr = 4; +} + +//////////// AdJointLabeledLog ///////////////// +message AdJointLabeledLog { + uint64 llsid = 1; + uint32 deprecated_tab = 2; + uint64 time = 3; + uint32 page = 4; + UserInfo user_info = 5; + repeated Item item = 6; +}