diff --git a/src/ops/group_by.cc b/src/ops/group_by.cc index fafacc8460..8c0bbcbbf3 100644 --- a/src/ops/group_by.cc +++ b/src/ops/group_by.cc @@ -42,8 +42,6 @@ using Legion::TaskLauncher; using PCG::Node; -// TODO replace "n" by "num_local_experts" everywhere in the script (only did it partially) - void FFModel::group_by(const Tensor input, const Tensor assign, Tensor *outputs, @@ -70,15 +68,13 @@ void FFModel::group_by(const Tensor input, dims[i] = input->dims[i]; } -// // Define max expert capacity + // Define max expert capacity if (alpha != 0.0f) { -// int seq_len = input->dims[num_dims - 1]; dims[num_dims - 1] = (int)ceil(alpha * k_experts_per_tok / num_local_experts * input->dims[num_dims - 1]); } else { // MK: added this for dummy groupby dims[num_dims - 1] = 128; // TODO remove magic number } - // printf("ff.groupby output: %d %d %d %d\n", dims[0], dims[1], dims[2], dims[3]); for (int i = 0; i < num_local_experts; i++) { // Creating one tensor per expert, each with size (DATA_DIMS, @@ -163,22 +159,11 @@ Group_by::Group_by(FFModel &model, dims[i] = inputs[0]->dims[i]; } // set max expert size - // TODO: this is a dirty fix while we don't use aggregate - // dims[num_dims - 2].size = (int)ceil(alpha * k_experts_per_tok / n * inputs[0]->dims[1].size); - // MK why is this - 2 instead of - 1? Also, why no alpha? - if (alpha != 0.0f) { -// int seq_len = input->dims[num_dims - 1]; dims[num_dims - 2].size = (int)ceil(alpha * k_experts_per_tok / n * inputs[0]->dims[2].size); // was inputs[0]->dims[1].size - } else { // MK: added this for dummy groupby - dims[num_dims - 2].size = 128; // TODO remove magic number - //dims[num_dims - 2] = 128; } - // printf("groupby op output: %d %d %d %d\n", dims[0].size, dims[1].size, dims[2].size, dims[3].size); - // dims[num_dims - 2].size = 128; -// printf("max num tokens dim in output used to be %d\n", (int)ceil(alpha * k_experts_per_tok / n * inputs[0]->dims[1].size)); for (int i = 0; i < n; i++) { outputs[i] = model.create_parallel_tensor_legion_ordering(