Merge pull request #17 from jalammar/v0.0.11

V0.0.11
jalammar · Jan 4, 2021 · c3b1528 · c3b1528
2 parents 40ff4cd + 8f96065
commit c3b1528
Show file tree

Hide file tree

Showing 6 changed files with 123 additions and 67 deletions.
diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@ def read(*names, **kwargs):
 
 setup(
     name='ecco',
-    version='0.0.10',
+    version='0.0.11',
     license='BSD-3-Clause',
     description='Visualization tools for NLP machine learning models.',
     long_description='%s\n%s' % (

diff --git a/src/ecco/__init__.py b/src/ecco/__init__.py
@@ -1,19 +1,24 @@
-__version__ = '0.0.10'
+__version__ = '0.0.11'
 from ecco.lm import LM, MockGPT, MockGPTTokenizer
 from transformers import AutoTokenizer, AutoModelForCausalLM
 
-def from_pretrained(hf_model_id, activations=False, attention=False):
+def from_pretrained(hf_model_id,
+                    activations=False,
+                    attention=False,
+                    hidden_states=True,
+                    activations_layer_nums=None,
+                    ):
     if hf_model_id == "mockGPT":
         tokenizer = MockGPTTokenizer()
         model = MockGPT()
     else:
         tokenizer = AutoTokenizer.from_pretrained(hf_model_id)
         model = AutoModelForCausalLM.from_pretrained(hf_model_id,
-                                                     output_hidden_states=True,
+                                                     output_hidden_states=hidden_states,
                                                      output_attentions=attention)
-    if activations:
-        lm = LM(model, tokenizer, collect_activations_flag=True)
-        return lm
-    else:
-        lm = LM(model, tokenizer)
-        return lm
+
+    lm_kwargs = {
+        'collect_activations_flag': activations,
+        'collect_activations_layer_nums': activations_layer_nums}
+    lm = LM(model, tokenizer, **lm_kwargs)
+    return lm
diff --git a/src/ecco/attribution.py b/src/ecco/attribution.py
@@ -2,9 +2,9 @@
 import numpy as np
 
 
-def saliency(prediction_logit, token_ids_tensor_one_hot, norm=True):
+def saliency(prediction_logit, token_ids_tensor_one_hot, norm=True, retain_graph=True):
     # Back-propegate the gradient from the selected output-logit
-    prediction_logit.backward(retain_graph=True)
+    prediction_logit.backward(retain_graph=retain_graph)
 
     # token_ids_tensor_one_hot.grad is the gradient propegated to ever embedding dimension of
     # the input tokens.
@@ -22,11 +22,11 @@ def saliency(prediction_logit, token_ids_tensor_one_hot, norm=True):
     return token_importance
 
 
-def saliency_on_d_embeddings(prediction_logit, inputs_embeds, aggregation="L2"):
+def saliency_on_d_embeddings(prediction_logit, inputs_embeds, aggregation="L2", retain_graph=True):
     inputs_embeds.retain_grad()
 
     # Back-propegate the gradient from the selected output-logit
-    prediction_logit.backward(retain_graph=True)
+    prediction_logit.backward(retain_graph=retain_graph)
 
     # inputs_embeds.grad
     # token_ids_tensor_one_hot.grad is the gradient propegated to ever embedding dimension of
@@ -49,11 +49,11 @@ def saliency_on_d_embeddings(prediction_logit, inputs_embeds, aggregation="L2"):
     return token_importance
 
 
-def gradient_x_inputs_attribution(prediction_logit, inputs_embeds):
+def gradient_x_inputs_attribution(prediction_logit, inputs_embeds, retain_graph=True):
 
     inputs_embeds.retain_grad()
     # back-prop gradient
-    prediction_logit.backward(retain_graph=True)
+    prediction_logit.backward(retain_graph=retain_graph)
     grad = inputs_embeds.grad
     # This should be equivalent to
     # grad = torch.autograd.grad(prediction_logit, inputs_embeds)[0]
@@ -71,3 +71,23 @@ def gradient_x_inputs_attribution(prediction_logit, inputs_embeds):
     # gradients accumulating
     inputs_embeds.grad.data.zero_()
     return token_importance_normalized
+
+def compute_saliency_scores(prediction_logit,
+                            token_ids_tensor_one_hot,
+                            inputs_embeds,
+                            gradient_kwargs={},
+                            gradient_x_input_kwargs={},
+                            ):
+    results = {}
+
+    results['grad_x_input'] = gradient_x_inputs_attribution(prediction_logit,
+                                                            inputs_embeds,
+                                                            retain_graph=True,
+                                                            **gradient_x_input_kwargs)
+
+    results['gradient'] = saliency(prediction_logit,
+                                   token_ids_tensor_one_hot,
+                                   retain_graph=False,
+                                   **gradient_kwargs)
+
+    return results
diff --git a/src/ecco/lm.py b/src/ecco/lm.py
@@ -30,6 +30,7 @@ def sample_output_token(scores, do_sample, temperature, top_k, top_p):
     else:
         # Greedy decoding
         prediction_id = torch.argmax(scores, dim=-1)
+    prediction_id = prediction_id.squeeze()
     return prediction_id
 
 
@@ -40,10 +41,10 @@ def _one_hot(token_ids, vocab_size):
 def activations_dict_to_array(activations_dict):
     # print(activations_dict[0].shape)
     activations = []
-    for i in range(len(activations_dict)):
+    for i in sorted(activations_dict.keys()):
         activations.append(activations_dict[i])
 
-    activations = np.squeeze(np.array(activations))
+    activations = np.concatenate(activations, axis=0)
     return np.swapaxes(activations, 1, 2)
 
 
@@ -54,7 +55,9 @@ class LM(object):
 
     def __init__(self, model, tokenizer,
                  collect_activations_flag=False,
-                 collect_gen_activations_flag=False):
+                 collect_gen_activations_flag=False,
+                 collect_activations_layer_nums=None,  # None --> collect for all layers
+                 ):
         self.model = model
         if torch.cuda.is_available():
             self.model = model.to('cuda')
@@ -68,6 +71,7 @@ def __init__(self, model, tokenizer,
         # Neuron Activation
         self.collect_activations_flag = collect_activations_flag
         self.collect_gen_activations_flag = collect_gen_activations_flag
+        self.collect_activations_layer_nums = collect_activations_layer_nums
         self._hooks = {}
         self._reset()
         self._attach_hooks(self.model)
@@ -97,12 +101,10 @@ def _generate_token(self, input_ids, past, do_sample: bool, temperature: float,
         """
         inputs_embeds, token_ids_tensor_one_hot = self._get_embeddings(input_ids)
 
-        output = self.model(inputs_embeds=inputs_embeds, return_dict=True)
-        predict = output[0]
-        past = output[1]  # We're not using past because by presenting all the past tokens at every
-        # step, we can get feature importance attribution. Let me know if it can be done with past
+        output = self.model(inputs_embeds=inputs_embeds, return_dict=True, use_cache=False)
+        predict = output.logits
 
-        scores = predict[-1, :]
+        scores = predict[-1:, :]
 
         prediction_id = sample_output_token(scores, do_sample, temperature, top_k, top_p)
         # Print the sampled token
@@ -114,18 +116,24 @@ def _generate_token(self, input_ids, past, do_sample: bool, temperature: float,
         prediction_logit = predict[inputs_embeds.shape[0] - 1][prediction_id]
 
         if attribution_flag:
-            saliency_scores = saliency(prediction_logit, token_ids_tensor_one_hot)
+            saliency_results = compute_saliency_scores(prediction_logit, token_ids_tensor_one_hot, inputs_embeds)
+
             if 'gradient' not in self.attributions:
                 self.attributions['gradient'] = []
-            self.attributions['gradient'].append(saliency_scores.cpu().detach().numpy())
+            self.attributions['gradient'].append(saliency_results['gradient'].cpu().detach().numpy())
 
-            grad_x_input = gradient_x_inputs_attribution(prediction_logit,
-                                                         inputs_embeds)
             if 'grad_x_input' not in self.attributions:
                 self.attributions['grad_x_input'] = []
-            self.attributions['grad_x_input'].append(grad_x_input.cpu().detach().numpy())
+            self.attributions['grad_x_input'].append(saliency_results['grad_x_input'].cpu().detach().numpy())
+
+        output['logits'] = None  # free tensor memory we won't use again
+
+        # detach(): don't need grads here
+        # cpu(): not used by GPU during generation; may lead to GPU OOM if left on GPU during long generations
+        if getattr(output, "hidden_states", None) is not None:
+            output.hidden_states = tuple([h.cpu().detach() for h in output.hidden_states])
 
-        return prediction_id, output, past
+        return prediction_id, output
 
     def generate(self, input_str: str, max_length: Optional[int] = 128,
                  temperature: Optional[float] = None,
@@ -163,13 +171,13 @@ def generate(self, input_str: str, max_length: Optional[int] = 128,
         viz_id = self.display_input_sequence(input_ids)
 
         while cur_len < max_length:
-            output_token_id, output, past = self._generate_token(input_ids,
-                                                                 past,
-                                                                 # Note, this is not currently used
-                                                                 temperature=temperature,
-                                                                 top_k=top_k, top_p=top_p,
-                                                                 do_sample=do_sample,
-                                                                 attribution_flag=attribution)
+            output_token_id, output = self._generate_token(input_ids,
+                                                           past,
+                                                           # Note, this is not currently used
+                                                           temperature=temperature,
+                                                           top_k=top_k, top_p=top_p,
+                                                           do_sample=do_sample,
+                                                           attribution_flag=attribution)
 
             if (get_model_output):
                 outputs.append(output)
@@ -189,16 +197,14 @@ def generate(self, input_str: str, max_length: Optional[int] = 128,
         if activations_dict != {}:
             self.activations = activations_dict_to_array(activations_dict)
 
-        hidden_states = output[2]
+        hidden_states = getattr(output, "hidden_states", None)
         tokens = []
         for i in input_ids:
             token = self.tokenizer.decode([i])
             tokens.append(token)
 
         attributions = self.attributions
-        attn = None
-        if len(output) == 4:
-            attn = output[-1]
+        attn = getattr(output, "attentions", None)
         return OutputSeq(**{'tokenizer': self.tokenizer,
                             'token_ids': input_ids,
                             'n_input_tokens': n_input_tokens,
@@ -209,6 +215,7 @@ def generate(self, input_str: str, max_length: Optional[int] = 128,
                             'model_outputs': outputs,
                             'attribution': attributions,
                             'activations': self.activations,
+                            'collect_activations_layer_nums': self.collect_activations_layer_nums,
                             'lm_head': self.model.lm_head,
                             'device': self.device})
 
@@ -256,13 +263,16 @@ def _get_activations_hook(self, name: str, input_):
         # Extract the number of the layer from the name
         layer_number = int(name.split('.')[2])
 
-        if layer_number not in self._all_activations_dict:
-            self._all_activations_dict[layer_number] = [0]
+        collecting_this_layer = (self.collect_activations_layer_nums is None) or (layer_number in self.collect_activations_layer_nums)
 
-        # Overwrite the previous step activations. This collects all activations in the last step
-        # Assuming all input tokens are presented as input, no "past"
-        # The inputs to c_proj already pass through the gelu activation function
-        self._all_activations_dict[layer_number][0] = input_[0][0].detach().cpu().numpy()
+        if collecting_this_layer:
+            if layer_number not in self._all_activations_dict:
+                self._all_activations_dict[layer_number] = [0]
+
+            # Overwrite the previous step activations. This collects all activations in the last step
+            # Assuming all input tokens are presented as input, no "past"
+            # The inputs to c_proj already pass through the gelu activation function
+            self._all_activations_dict[layer_number][0] = input_[0][0].detach().cpu().numpy()
 
     def _get_generation_activations_hook(self, name: str, input_):
         """
@@ -273,12 +283,15 @@ def _get_generation_activations_hook(self, name: str, input_):
         # Extract the number of the layer from the name
         layer_number = int(name.split('.')[2])
 
-        if layer_number not in self._generation_activations_dict:
-            self._generation_activations_dict[layer_number] = []
+        collecting_this_layer = (self.collect_activations_layer_nums is None) or (layer_number in self.collect_activations_layer_nums)
+
+        if collecting_this_layer:
+            if layer_number not in self._generation_activations_dict:
+                self._generation_activations_dict[layer_number] = []
 
-        # Accumulate in dict
-        # The inputs to c_proj already pass through the gelu activation function
-        self._generation_activations_dict[layer_number].append(input_[0][0][-1].detach().cpu().numpy())
+            # Accumulate in dict
+            # The inputs to c_proj already pass through the gelu activation function
+            self._generation_activations_dict[layer_number].append(input_[0][0][-1].detach().cpu().numpy())
 
     def _inhibit_neurons_hook(self, name: str, input_tensor):
         """
@@ -345,9 +358,9 @@ def display_token(self, viz_id, token_id, position):
             'type': 'output'
         }
         js = f"""
-        // We don't really need these require scripts. But this is to avert 
+        // We don't really need these require scripts. But this is to avert
         //this code from running before display_input_sequence which DOES require external files
-        requirejs(['basic', 'ecco'], function(basic, ecco){{ 
+        requirejs(['basic', 'ecco'], function(basic, ecco){{
                 console.log('addToken viz_id', '{viz_id}');
                 window.ecco['{viz_id}'].addToken({json.dumps(token)})
                 window.ecco['{viz_id}'].redraw()