-
Notifications
You must be signed in to change notification settings - Fork 19
loop_function
Higepon Taro Minowa edited this page Jul 9, 2017
·
7 revisions
- for each this step, we choose best top k symbols
- Questions
- who eventually choose the best?
- Is this necessary for training?
- emb_prev is returning [beam_size, emb_vec] as nature.
- IMAKOKO but none beam_search one is returning only one, who abstract the difference TRY__THIS https://github.com/AvaisP/Neural_Conversation_Models
# Say beam_size = 2
# i = 1
# probs = [0.4, 0.2, 0.3, ..., 0.11] shape=(1, num_symbols)
# best_probs = [0.4, 0.3] shape=(1, beam_size)
# indices = [0, 2] shape=(1, beam_size)
# indices = [0; 2] shape=(2, 1)
# best_probs = [0.4; 0.3] shape=(2, 1)
# log_beam_probs = [[0.4; 0.3]] list of shape(2, 1)
#
# i = 2
# probs = [0.01, 0.9, 0.4, ..., 0.11] shape=(1, num_symbols)
# probs = [probs + log_beam_probs[-1]]
# probs = [[0.01, 0.9, 0.4, ..., 0.11] + [0.4; 0.3]]
# probs = [[0.01 + 0.4, 0.9 + 0.4 , 0.4 + 0.4, ..., 0.11 + 0.4];
# [0.01 + 0.3, 0.9 + 0.3 , 0.3 + 0.3, ..., 0.11 + 0.3]]
# reshape
# probs = [0.01 + 0.4, 0.9 + 0.4 , 0.4 + 0.4, ..., 0.11 + 0.4, 0.01 + 0.3, 0.9 + 0.3 , 0.3 + 0.3, ..., 0.11 + 0.3]
# = [0.41, 1.3, 0.8, ..., 0.51, 0.31, 1.2, 0.6, ..., 0.41] shape=(1, num_symbols * beam_size)
# best_probs = [1.3, 1,2]
# reshape = [1.3; 1.2]
# indices = [1, 102]
#
def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
if output_projection is not None:
prev = nn_ops.xw_plus_b(
prev, output_projection[0], output_projection[1])
probs = tf.log(tf.nn.softmax(prev))
if i > 1:
probs = tf.reshape(probs + log_beam_probs[-1],
[-1, beam_size * num_symbols])
best_probs, indices = tf.nn.top_k(probs, beam_size)
indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))
symbols = indices % num_symbols # Which word in vocabulary.
beam_parent = indices // num_symbols # Which hypothesis it came from.
beam_symbols.append(symbols)
beam_path.append(beam_parent)
log_beam_probs.append(best_probs)
# Note that gradients will not propagate through the second parameter of
# embedding_lookup.
emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
emb_prev = tf.reshape(emb_prev ,[beam_size ,embedding_size])
# emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
if not update_embedding:
emb_prev = array_ops.stop_gradient(emb_prev)
return emb_prev
def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols): if output_projection is not None: prev = nn_ops.xw_plus_b( prev, output_projection[0], output_projection[1])
probs = tf.log(tf.nn.softmax(prev))
if i > 1:
probs = tf.reshape(probs + log_beam_probs[-1],
[-1, beam_size * num_symbols])
best_probs, indices = tf.nn.top_k(probs, beam_size)
indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))
symbols = indices % num_symbols # Which word in vocabulary.
beam_parent = indices // num_symbols # Which hypothesis it came from.
beam_symbols.append(symbols)
beam_path.append(beam_parent)
log_beam_probs.append(best_probs)
# Note that gradients will not propagate through the second parameter of
# embedding_lookup.
emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
emb_prev = tf.reshape(emb_prev ,[beam_size ,embedding_size])
# emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
if not update_embedding:
emb_prev = array_ops.stop_gradient(emb_prev)
return emb_prev
--- /Users/higepon/Desktop/a.py 2017-07-09 15:05:16.000000000 +0900
+++ /Users/higepon/Desktop/b.py 2017-07-09 15:05:30.000000000 +0900
@@ -1,11 +1,53 @@
- def loop_function(prev, _):
+ def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
if output_projection is not None:
prev = nn_ops.xw_plus_b(
prev, output_projection[0], output_projection[1])
- prev_symbol = math_ops.argmax(prev, 1)
+ # prev= prev.get_shape().with_rank(2)[1]
+
+ # Say k = 2, beam_size=3
+ # convert prev output to probability(?)
+ # i = 1
+ # probs = [0.3, 0.2, 0.4, ..., 0.12] len = num_symbols
+ # best_probs = [0.3, 0.4] =>reshape=> [0.3, 0.4]
+ # indices = [0, 2] =>reshape=> [0, 2]
+ # log_beam_probs = [] => [[0.3, 0.4]]
+ # i = 2
+ # probs = [0.99, 0.89, 0.4, ..., 0.001] len = num_symbols
+ # probs = [[0.99, 0.89, 0.4, ..., 0.001] + [0.3, 0.4]]
+ # best_probs = [0.99, 0.89]
+ # indices = [0, 1]
+ # probs = [[current probs],
+ # [top k of prev]]
+ # top_k returns top k for each row
+
+ probs = tf.log(tf.nn.softmax(prev))
+
+ if i > 1:
+
+ # reshape current_probs + previous_probs
+ # we can add probability, as this is log!!!
+ probs = tf.reshape(probs + log_beam_probs[-1],
+ [-1, beam_size * num_symbols])
+
+ # Pick top k probs and indices, but why we searchibng in past probs?
+ best_probs, indices = tf.nn.top_k(probs, beam_size)
+ indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
+ best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))
+
+ symbols = indices % num_symbols # Which word in vocabulary.
+ beam_parent = indices // num_symbols # Which hypothesis it came from.
+
+
+ beam_symbols.append(symbols)
+ beam_path.append(beam_parent)
+ log_beam_probs.append(best_probs)
+
# Note that gradients will not propagate through the second parameter of
# embedding_lookup.
- emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
+
+ emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
+ emb_prev = tf.reshape(emb_prev ,[beam_size ,embedding_size])
+ # emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
if not update_embedding:
emb_prev = array_ops.stop_gradient(emb_prev)
return emb_prev