-
Notifications
You must be signed in to change notification settings - Fork 0
/
param_magnitude.pl
executable file
·37 lines (26 loc) · 1.76 KB
/
param_magnitude.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env perl
# example of use:
# ~/bin/param_importance.pl pruned_transducer_stateless7/scaled_adam_exp204_1job_md600/diagnostics_epoch15.txt | sort -gr -k2 | head
# ~/bin/param_importance.pl pruned_transducer_stateless7/scaled_adam_exp204_1job_md600/diagnostics_epoch15.txt | sort -gr -k2 | tail
#
# ~/bin/param_importance.pl <p*7/*381*/diag*15.txt | sort -k2 -gr | grep -E '^..(self_attn.|feed_forward.|attention_squeeze.|self_attn_weights|conv_module|nonlin_attention_module) \S+$' | head -n 10
# p~/bin/param_importance.pl <p*7/*386*/diag*15.txt | sort -k2 -gr | grep -E 'self_attn1|feed_forward.|attention_squeeze.|self_attn_weights|conv_module' | head -n 20
# takes as stdin a diagnostics file from icefall by running train.py with --print-diagnostics True.
# for conformers.
# this is for analyzing the relative importance of different parameters.
# Sub-module relative importance:
# ~/bin/param_importance.pl <p*7/*291*/diag*15.txt | sort -k2 -gr | grep -E '^..(self_attn.|feed_forward.|squeeze_excite.|self_attn|attention_squeeze.|self_attn_weights|conv_module) \S+$' | head -n 9
#
# Relative importance of stacks:
# ~/bin/param_importance.pl <p*7/*291*/diag*15.txt | sort -k2 -gr | grep -E '^encoder.encoders..(|.encoder).layers '
# submodules of attention_squeeze
# ~/bin/param_importance.pl pruned_transducer_stateless7/scaled_adam_exp389_1job_md600/diagnostics_epoch15.txt | sort -gr -k2 | awk '{print NR, $0;}' | grep squeeze | grep -v layers | grep -v -E '^[0-9]+ ..[0-9]'
my %data;
my %modules;
while (<>) {
# Could make this rms instead of abs. I was trying to get more "typical" values, to
# ignore situations where the largest value actually has a small grad.
if (m/^module=(.*).param_value, dim=0, size=\d+, abs .+mean=(.+),/) {
print("$1 $2\n");
}
}