fix layer names for save_inference_tensors_to_file

flexflow · Nov 8, 2023 · fca16cc · fca16cc
1 parent b11c5e9
commit fca16cc
Show file tree

Hide file tree

Showing 23 changed files with 135 additions and 13 deletions.
diff --git a/include/flexflow/operator.h b/include/flexflow/operator.h
@@ -258,11 +258,21 @@ class Op {
       mkdir(folder_path, 0700);
     }
     // output base filepath, shared by all tensors from the same operator
+    std::string op_name_without_uid = std::string(m->op_name);
+    size_t last_underscore = op_name_without_uid.length() - 1;
+    for (int i = op_name_without_uid.length() - 1; i > 0; i--) {
+      if (!(std::isdigit(m->op_name[i]) || m->op_name[i] == '_')) {
+        break;
+      } else if (m->op_name[i] == '_') {
+        last_underscore = i;
+      }
+    }
+    op_name_without_uid.erase(last_underscore);
     std::string base_filepath =
         "./inference_tensors/model_" + std::to_string(m->layer_guid.model_id) +
         "_decoding-step_" + std::to_string(m->decoding_step) + "_layer-num_" +
         std::to_string(m->layer_guid.transformer_layer_id) + "_layer-name_" +
-        m->op_name + "_shard-id_" + std::to_string(shard_id);
+        op_name_without_uid + "_shard-id_" + std::to_string(shard_id);
     // save batch config, if passed
     if (bc != nullptr) {
       bc->save_to_file(base_filepath + "_batch-config");

diff --git a/include/flexflow/ops/add_bias_residual_layer_norm_params.h b/include/flexflow/ops/add_bias_residual_layer_norm_params.h
@@ -12,6 +12,7 @@ struct AddBiasResidualLayerNormParams {
   bool elementwise_affine;
   float eps;
   bool use_bias;
+  char name[MAX_OPNAME];
   bool is_valid(
       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
 };

diff --git a/include/flexflow/ops/embedding_params.h b/include/flexflow/ops/embedding_params.h
@@ -12,6 +12,7 @@ struct EmbeddingParams {
   LayerID layer_guid;
   AggrMode aggr;
   DataType data_type;
+  char name[MAX_OPNAME];
 
   bool is_valid(ParallelTensorShape const &) const;
 };

diff --git a/include/flexflow/ops/inc_multihead_self_attention_params.h b/include/flexflow/ops/inc_multihead_self_attention_params.h
@@ -16,6 +16,7 @@ struct IncMultiHeadSelfAttentionParams {
       scaling_query, qk_prod_scaling, position_bias;
   DataType quantization_type;
   bool offload;
+  char name[MAX_OPNAME];
   bool is_valid(ParallelTensorShape const &) const;
 };
 

diff --git a/include/flexflow/ops/linear_params.h b/include/flexflow/ops/linear_params.h
@@ -20,6 +20,7 @@ class LinearParams {
   float kernel_reg_lambda;
   DataType quantization_type;
   bool offload;
+  char name[MAX_OPNAME];
 
   bool is_valid(ParallelTensorShape const &input_shape) const;
   void solve_dims(const ParallelTensor input,

diff --git a/include/flexflow/ops/residual_layer_norm_params.h b/include/flexflow/ops/residual_layer_norm_params.h
@@ -13,6 +13,7 @@ struct ResidualLayerNormParams {
   float eps;
   bool use_bias;
   bool use_two_residuals;
+  char name[MAX_OPNAME];
   bool is_valid(std::tuple<ParallelTensorShape,
                            ParallelTensorShape,
                            ParallelTensorShape> const &) const;

diff --git a/include/flexflow/ops/residual_rms_norm_params.h b/include/flexflow/ops/residual_rms_norm_params.h
@@ -11,6 +11,7 @@ struct ResidualRMSNormParams {
   LayerID layer_guid;
   float eps;
   int dim;
+  char name[MAX_OPNAME];
   bool is_valid(
       std::pair<ParallelTensorShape, ParallelTensorShape> const &input) const;
 };

diff --git a/include/flexflow/ops/rms_norm_params.h b/include/flexflow/ops/rms_norm_params.h
@@ -11,6 +11,7 @@ struct RMSNormParams {
   LayerID layer_guid;
   float eps;
   int dim;
+  char name[MAX_OPNAME];
   bool is_valid(ParallelTensorShape const &) const;
 };
 

diff --git a/include/flexflow/ops/sigmoid_silu_multi_params.h b/include/flexflow/ops/sigmoid_silu_multi_params.h
@@ -8,6 +8,7 @@ namespace FlexFlow {
 
 struct SigmoidSiluMultiParams {
   LayerID layer_guid;
+  char name[MAX_OPNAME];
   bool is_valid(
       std::pair<ParallelTensorShape, ParallelTensorShape> const &) const;
 };

diff --git a/include/flexflow/ops/spec_inc_multihead_self_attention_params.h b/include/flexflow/ops/spec_inc_multihead_self_attention_params.h
@@ -13,6 +13,7 @@ struct SpecIncMultiHeadSelfAttentionParams {
   float dropout, scaling_factor;
   bool qkv_bias, final_bias, add_zero_attn, apply_rotary_embedding,
       scaling_query, qk_prod_scaling, position_bias;
+  char name[MAX_OPNAME];
 
   bool is_valid(ParallelTensorShape const &) const;
 };

diff --git a/include/flexflow/ops/tree_inc_multihead_self_attention_params.h b/include/flexflow/ops/tree_inc_multihead_self_attention_params.h
@@ -16,6 +16,7 @@ struct TreeIncMultiHeadSelfAttentionParams {
       scaling_query, qk_prod_scaling, position_bias;
   DataType quantization_type;
   bool offload;
+  char name[MAX_OPNAME];
   bool is_valid(ParallelTensorShape const &) const;
 };
 

diff --git a/inference/incr_decoding/incr_decoding.cc b/inference/incr_decoding/incr_decoding.cc
@@ -263,8 +263,12 @@ void FlexFlow::top_level_task(Task const *task,
       peft_model_name.empty()
           ? LoraLinearConfig::DefaultConfig
           : LoraLinearConfig(file_paths.cache_folder_path, peft_model_name);
-  PEFTModelID peft_model_id = model.register_peft_model(
-      LoraLinearConfig::DefaultConfig /*mlp_first*/, mlp_second /*mlp_second*/);
+  PEFTModelID peft_model_id =
+      peft_model_name.empty()
+          ? PEFTModelID::NO_ID
+          : model.register_peft_model(
+                LoraLinearConfig::DefaultConfig /*mlp_first*/,
+                mlp_second /*mlp_second*/);
 
   int total_num_requests = 0;
   {

diff --git a/src/ops/add_bias_residual_layer_norm.cc b/src/ops/add_bias_residual_layer_norm.cc
@@ -58,6 +58,9 @@ AddBiasResidualLayerNormParams AddBiasResidualLayerNorm::get_params() const {
   params.elementwise_affine = this->elementwise_affine;
   params.eps = this->eps;
   params.use_bias = this->use_bias;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
   return params;
 }
 
@@ -213,7 +216,7 @@ AddBiasResidualLayerNorm::AddBiasResidualLayerNorm(
                                params.use_bias,
                                params.eps,
                                allocate_weights,
-                               name) {}
+                               params.name) {}
 
 AddBiasResidualLayerNorm::AddBiasResidualLayerNorm(
     FFModel &model,
@@ -1027,6 +1030,8 @@ void AddBiasResidualLayerNorm::serialize(Legion::Serializer &sez) const {
   sez.serialize(this->elementwise_affine);
   sez.serialize(this->eps);
   sez.serialize(this->use_bias);
+  sez.serialize(strlen(this->name));
+  sez.serialize(this->name, strlen(this->name));
 }
 
 using PCG::Node;
@@ -1055,13 +1060,18 @@ Node AddBiasResidualLayerNorm::deserialize(FFModel &ff,
   dez.deserialize(elementwise_affine);
   dez.deserialize(eps);
   dez.deserialize(use_bias);
+  size_t name_len;
+  char name[MAX_OPNAME] = {0};
+  dez.deserialize(name_len);
+  dez.deserialize(name, name_len);
 
   AddBiasResidualLayerNormParams params;
   params.layer_guid = layer_guid;
   params.axes = axes;
   params.elementwise_affine = elementwise_affine;
   params.eps = eps;
   params.use_bias = use_bias;
+  strcpy(params.name, name);
   return ff.get_or_create_node<AddBiasResidualLayerNorm>({inputs[0], inputs[1]},
                                                          params);
 }

diff --git a/src/ops/inc_multihead_self_attention.cc b/src/ops/inc_multihead_self_attention.cc
@@ -567,7 +567,7 @@ IncMultiHeadSelfAttention::IncMultiHeadSelfAttention(
                                 params.quantization_type,
                                 params.offload,
                                 params.tensor_parallelism_degree,
-                                name) {}
+                                params.name) {}
 
 void IncMultiHeadSelfAttention::init_inference(
     FFModel const &ff,
@@ -1055,6 +1055,9 @@ IncMultiHeadSelfAttentionParams IncMultiHeadSelfAttention::get_params() const {
   params.quantization_type = this->quantization_type;
   params.offload = this->offload;
   params.num_kv_heads = this->num_kv_heads;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
 
   return params;
 }

diff --git a/src/ops/linear.cc b/src/ops/linear.cc
@@ -190,7 +190,7 @@ Linear::Linear(FFModel &model,
              params.quantization_type,
              params.offload,
              allocate_weights,
-             name) {}
+             params.name) {}
 
 Linear::Linear(FFModel &model,
                LayerID const &_layer_guid,
@@ -1354,6 +1354,8 @@ void Linear::serialize(Legion::Serializer &sez) const {
   sez.serialize(this->data_type);
   sez.serialize(this->quantization_type);
   sez.serialize(this->offload);
+  sez.serialize(strlen(this->name));
+  sez.serialize(this->name, strlen(this->name));
 }
 
 /* static */
@@ -1384,6 +1386,10 @@ Node Linear::deserialize(FFModel &ff,
   dez.deserialize(data_type);
   dez.deserialize(quantization_type);
   dez.deserialize(offload);
+  size_t name_len;
+  char name[MAX_OPNAME] = {0};
+  dez.deserialize(name_len);
+  dez.deserialize(name, name_len);
 
   LinearParams params;
   params.activation = activation;
@@ -1395,6 +1401,7 @@ Node Linear::deserialize(FFModel &ff,
   params.layer_guid = layer_guid;
   params.quantization_type = quantization_type;
   params.offload = offload;
+  strcpy(params.name, name);
   return ff.get_or_create_node<Linear>(inputs[0], params);
 }
 
@@ -1409,6 +1416,9 @@ LinearParams Linear::get_params() const {
   params.kernel_reg_lambda = this->kernel_reg_lambda;
   params.quantization_type = this->quantization_type;
   params.offload = this->offload;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
 
   return params;
 }

diff --git a/src/ops/lora_linear.cc b/src/ops/lora_linear.cc
@@ -674,7 +674,7 @@ Node LoraLinear::deserialize(FFModel &ff,
   size_t id, transformer_layer_id, deserialized_model_id;
   OperatorType op_type;
   size_t name_len;
-  char name[MAX_OPNAME];
+  char name[MAX_OPNAME] = {0};
   dez.deserialize(id);
   dez.deserialize(transformer_layer_id);
   dez.deserialize(deserialized_model_id);

diff --git a/src/ops/residual_layer_norm.cc b/src/ops/residual_layer_norm.cc
@@ -63,6 +63,9 @@ ResidualLayerNormParams ResidualLayerNorm::get_params() const {
   params.eps = this->eps;
   params.use_bias = this->use_bias;
   params.use_two_residuals = this->use_two_residuals;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
   return params;
 }
 
@@ -228,7 +231,7 @@ ResidualLayerNorm::ResidualLayerNorm(
                         params.use_bias,
                         params.eps,
                         allocate_weights,
-                        name) {}
+                        params.name) {}
 
 ResidualLayerNorm::ResidualLayerNorm(FFModel &model,
                                      LayerID const &_layer_guid,
@@ -1069,6 +1072,8 @@ void ResidualLayerNorm::serialize(Legion::Serializer &sez) const {
   sez.serialize(this->eps);
   sez.serialize(this->use_bias);
   sez.serialize(this->use_two_residuals);
+  sez.serialize(strlen(this->name));
+  sez.serialize(this->name, strlen(this->name));
 }
 
 using PCG::Node;
@@ -1098,6 +1103,10 @@ Node ResidualLayerNorm::deserialize(FFModel &ff,
   dez.deserialize(eps);
   dez.deserialize(use_bias);
   dez.deserialize(use_two_residuals);
+  size_t name_len;
+  char name[MAX_OPNAME] = {0};
+  dez.deserialize(name_len);
+  dez.deserialize(name, name_len);
   if (use_two_residuals) {
     assert(num_inputs == 3);
   } else {
@@ -1111,6 +1120,7 @@ Node ResidualLayerNorm::deserialize(FFModel &ff,
   params.eps = eps;
   params.use_bias = use_bias;
   params.use_two_residuals = use_two_residuals;
+  strcpy(params.name, name);
   if (use_two_residuals) {
     return ff.get_or_create_node<ResidualLayerNorm>(
         {inputs[0], inputs[1], inputs[2]}, params);

diff --git a/src/ops/residual_rms_norm.cc b/src/ops/residual_rms_norm.cc
@@ -55,6 +55,9 @@ ResidualRMSNormParams ResidualRMSNorm::get_params() const {
   params.layer_guid = this->layer_guid;
   params.eps = this->eps;
   params.dim = this->dim;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
   return params;
 }
 
@@ -141,7 +144,7 @@ ResidualRMSNorm::ResidualRMSNorm(
                       params.eps,
                       params.dim,
                       allocate_weights,
-                      name) {}
+                      params.name) {}
 
 ResidualRMSNorm::ResidualRMSNorm(
     FFModel &model,
@@ -460,6 +463,8 @@ void ResidualRMSNorm::serialize(Legion::Serializer &sez) const {
   sez.serialize(this->layer_guid.model_id);
   sez.serialize(this->eps);
   sez.serialize(this->dim);
+  sez.serialize(strlen(this->name));
+  sez.serialize(this->name, strlen(this->name));
 }
 
 using PCG::Node;
@@ -478,10 +483,15 @@ Node ResidualRMSNorm::deserialize(FFModel &ff,
   LayerID layer_guid(id, transformer_layer_id, deserialized_model_id);
   dez.deserialize(eps);
   dez.deserialize(dim);
+  size_t name_len;
+  char name[MAX_OPNAME] = {0};
+  dez.deserialize(name_len);
+  dez.deserialize(name, name_len);
   ResidualRMSNormParams params;
   params.layer_guid = layer_guid;
   params.eps = eps;
   params.dim = dim;
+  strcpy(params.name, name);
   return ff.get_or_create_node<ResidualRMSNorm>({inputs[0], inputs[1]}, params);
 }
 

diff --git a/src/ops/rms_norm.cc b/src/ops/rms_norm.cc
@@ -53,6 +53,9 @@ RMSNormParams RMSNorm::get_params() const {
   params.layer_guid = this->layer_guid;
   params.eps = this->eps;
   params.dim = this->dim;
+  if (this->name != nullptr) {
+    strcpy(params.name, this->name);
+  }
   return params;
 }
 
@@ -583,6 +586,8 @@ void RMSNorm::serialize(Legion::Serializer &sez) const {
   sez.serialize(this->layer_guid.model_id);
   sez.serialize(this->eps);
   sez.serialize(this->dim);
+  sez.serialize(strlen(this->name));
+  sez.serialize(this->name, strlen(this->name));
 }
 
 using PCG::Node;
@@ -602,18 +607,24 @@ Node RMSNorm::deserialize(FFModel &ff,
   LayerID layer_guid(id, transformer_layer_id, deserialized_model_id);
   dez.deserialize(eps);
   dez.deserialize(dim);
+  size_t name_len;
+  char name[MAX_OPNAME] = {0};
+  dez.deserialize(name_len);
+  dez.deserialize(name, name_len);
   RMSNormParams params;
   params.layer_guid = layer_guid;
   params.eps = eps;
   params.dim = dim;
+  strcpy(params.name, name);
+
   return ff.get_or_create_node<RMSNorm>(inputs[0], params);
 }
 
 Op *RMSNorm::materialize(FFModel &ff,
                          ParallelTensor inputs[],
                          int num_inputs) const {
   RMSNormParams params = get_params();
-  return new RMSNorm(ff, params, inputs[0], true, this->name);
+  return new RMSNorm(ff, params, inputs[0], true, params.name);
 }
 
 bool RMSNorm::measure_operator_cost(Simulator *sim,