Fix the bug in regularizer matching (pytorch#23485)

Summary: Pull Request resolved: pytorch#23485 In previous diff D16326492, the "regularizer" in dot processor is defined according to input regularizer options through the function "get_emb_weighting_reg" in processor_utils.py. The option matching is only valid in local test, but doesn't work in workflows. This bug causes the regularizer not added in actual models and has made previous trimmed lasso implementation useless. An evidence is that before D16326492, a flow f126010621 has elastic regularizer added: https://our.intern.facebook.com/intern/chronos/jobinstance/?jobinstanceid=5375243255&smc=chronos_gp_admin_client {F171862755} while after D16326492, the regularizer is gone in flow f127262007 https://our.intern.facebook.com/intern/chronos/jobinstance/?jobinstanceid=5428982684&smc=chronos_gp_admin_client {F171862770} Differential Revision: D16535466 fbshipit-source-id: 6b0b5e95b2b14a0d6c6d65f96bab89529f4e79c5
walterddr · Aug 2, 2019 · a1b1027 · a1b1027
1 parent 29881c7
commit a1b1027
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/caffe2/python/layer_model_helper.py b/caffe2/python/layer_model_helper.py
@@ -636,7 +636,7 @@ def apply_regularizers_after_optimizer(
         grad_map,
         blob_to_device=None,
     ):
-        logger.info("apply regulizer after optimizer")
+        logger.info("apply regularizer after optimizer")
         CPU = muji.OnCPU()
         # if given, blob_to_device is a map from blob to device_option
         blob_to_device = blob_to_device or {}

diff --git a/caffe2/python/regularizer.py b/caffe2/python/regularizer.py
@@ -100,9 +100,9 @@ def __init__(self, reg_lambda, k):
 
     def _run_on_loss(self, net, param_init_net, param, grad=None):
         output_blob = net.NextScopedBlob(param + "_l1_trimmed_regularization")
-        abs = net.Abs(param, [net.NextScopedBlob("abs")])
+        abs = net.Abs([param], [net.NextScopedBlob("abs")])
         sum_abs = net.SumElements([abs], [net.NextScopedBlob("sum_abs")], average=False)
-        topk, _, _ = net.TopK(abs, [net.NextScopedBlob("topk"), 'id', 'flat_id'], k=self.k)
+        topk, _, _ = net.TopK([abs], [net.NextScopedBlob("topk"), net.NextScopedBlob("id"), net.NextScopedBlob("flat_id")], k=self.k)
         topk_sum = net.SumElements([topk], [net.NextScopedBlob("topk_sum")], average=False)
         net.Sub([sum_abs, topk_sum], [output_blob])
         net.Scale([output_blob], [output_blob], scale=self.reg_lambda)
@@ -155,9 +155,9 @@ def _run_on_loss(self, net, param_init_net, param, grad=None):
         net.Scale([l2_blob], [l2_blob], scale=self.l2)
 
         l1_blob = net.NextScopedBlob(param + "_l1_blob")
-        abs = net.Abs(param, [net.NextScopedBlob("abs")])
+        abs = net.Abs([param], [net.NextScopedBlob("abs")])
         sum_abs = net.SumElements([abs], [net.NextScopedBlob("sum_abs")], average=False)
-        topk, _, _ = net.TopK(abs, [net.NextScopedBlob("topk"), 'id', 'flat_id'], k=self.k)
+        topk, _, _ = net.TopK([abs], [net.NextScopedBlob("topk"), net.NextScopedBlob("id"), net.NextScopedBlob("flat_id")], k=self.k)
         topk_sum = net.SumElements([topk], [net.NextScopedBlob("topk_sum")], average=False)
         net.Sub([sum_abs, topk_sum], [l1_blob])
         net.Scale([l1_blob], [l1_blob], scale=self.l1)