diff --git a/lib/models/include/models/dlrm/dlrm.h b/lib/models/include/models/dlrm/dlrm.h index 9f941176ee..f89603ec47 100644 --- a/lib/models/include/models/dlrm/dlrm.h +++ b/lib/models/include/models/dlrm/dlrm.h @@ -4,9 +4,9 @@ * @brief DLRM model * * @details The DLRM implementation refers to the examples at - * https://github.com/flexflow/FlexFlow/blob/inference/examples/cpp/DLRM/dlrm.cc + * https://github.com/flexflow/FlexFlow/blob/78307b0e8beb5d41ee003be8b5db168c2b3ef4e2/examples/cpp/DLRM/dlrm.cc * and - * https://github.com/pytorch/torchrec/blob/main/torchrec/models/dlrm.py#L440. + * https://github.com/pytorch/torchrec/blob/7e7819e284398d7dc420e3bf149107ad310fa861/torchrec/models/dlrm.py#L440. */ #ifndef _FLEXFLOW_LIB_MODELS_INCLUDE_MODELS_DLRM_H @@ -19,6 +19,12 @@ namespace FlexFlow { // Helper functions to construct the DLRM model +/** + * @brief Get the default DLRM config. + * + * @details The configs here refer to the example at + * https://github.com/flexflow/FlexFlow/blob/78307b0e8beb5d41ee003be8b5db168c2b3ef4e2/examples/cpp/DLRM/dlrm.cc. + */ DLRMConfig get_default_dlrm_config(); tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, diff --git a/lib/models/include/models/dlrm/dlrm_config.struct.toml b/lib/models/include/models/dlrm/dlrm_config.struct.toml index 08fb6837ed..52488b3229 100644 --- a/lib/models/include/models/dlrm/dlrm_config.struct.toml +++ b/lib/models/include/models/dlrm/dlrm_config.struct.toml @@ -26,7 +26,7 @@ type = "int" [[fields]] name = "embedding_bag_size" -type = "size_t" +type = "int" [[fields]] name = "embedding_size" @@ -34,11 +34,11 @@ type = "std::vector" [[fields]] name = "dense_arch_layer_sizes" -type = "std::vector" +type = "std::vector" [[fields]] name = "over_arch_layer_sizes" -type = "std::vector" +type = "std::vector" [[fields]] name = "arch_interaction_op" @@ -46,7 +46,7 @@ type = "std::string" [[fields]] name = "batch_size" -type = "size_t" +type = "int" [[fields]] name = "seed" diff --git a/lib/models/src/models/dlrm/dlrm.cc b/lib/models/src/models/dlrm/dlrm.cc index e98407fe5a..74cf486e2a 100644 --- a/lib/models/src/models/dlrm/dlrm.cc +++ b/lib/models/src/models/dlrm/dlrm.cc @@ -6,12 +6,6 @@ namespace FlexFlow { -/** - * @brief Get the default DLRM config. - * - * @details The configs here refer to the example at - * https://github.com/flexflow/FlexFlow/blob/inference/examples/cpp/DLRM/dlrm.cc. - */ DLRMConfig get_default_dlrm_config() { return DLRMConfig{ /*embedding_dim=*/64, @@ -24,13 +18,13 @@ DLRMConfig get_default_dlrm_config() { 1000000, }, /*dense_arch_layer_sizes=*/ - std::vector{ + std::vector{ 4, 64, 64, }, /*over_arch_layer_sizes=*/ - std::vector{ + std::vector{ 64, 64, 2, @@ -44,10 +38,14 @@ DLRMConfig get_default_dlrm_config() { tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, DLRMConfig const &config, tensor_guid_t const &input, - std::vector const &mlp_layers) { + std::vector const &mlp_layers) { tensor_guid_t t = input; + + // Refer to + // https://github.com/facebookresearch/dlrm/blob/64063a359596c72a29c670b4fcc9450bb342e764/dlrm_s_pytorch.py#L218-L228 + // for example initializer. for (size_t i = 0; i < mlp_layers.size() - 1; i++) { - float std_dev = sqrt(2.0f / (mlp_layers[i + 1] + mlp_layers[i])); + float std_dev = sqrt(2.0f / (mlp_layers.at(i + 1) + mlp_layers.at(i))); InitializerAttrs projection_initializer = InitializerAttrs{NormInitializerAttrs{ /*seed=*/config.seed, @@ -55,7 +53,7 @@ tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, /*stddev=*/std_dev, }}; - std_dev = sqrt(2.0f / mlp_layers[i + 1]); + std_dev = sqrt(2.0f / mlp_layers.at(i + 1)); InitializerAttrs bias_initializer = InitializerAttrs{NormInitializerAttrs{ /*seed=*/config.seed, /*mean=*/0, @@ -63,7 +61,7 @@ tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, }}; t = cgb.dense(/*input=*/t, - /*outDim=*/mlp_layers[i + 1], + /*outDim=*/mlp_layers.at(i + 1), /*activation=*/Activation::RELU, /*use_bias=*/true, /*data_type=*/DataType::FLOAT, @@ -127,11 +125,13 @@ ComputationGraph get_dlrm_computation_graph(DLRMConfig const &config) { // Create input tensors std::vector sparse_inputs( config.embedding_size.size(), - create_input_tensor({config.batch_size, config.embedding_bag_size}, + create_input_tensor({static_cast(config.batch_size), + static_cast(config.embedding_bag_size)}, DataType::INT64)); tensor_guid_t dense_input = create_input_tensor( - {config.batch_size, config.dense_arch_layer_sizes.front()}, + {static_cast(config.batch_size), + static_cast(config.dense_arch_layer_sizes.front())}, DataType::FLOAT); // Construct the model