-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathutils.py
82 lines (66 loc) · 2.91 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
import tensorflow as tf
import scipy.signal
def add_histogram(writer, tag, values, step, bins=1000):
"""
Logs the histogram of a list/vector of values.
From: https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
"""
# Create histogram using numpy
counts, bin_edges = np.histogram(values, bins=bins)
# Fill fields of histogram proto
hist = tf.HistogramProto()
hist.min = float(np.min(values))
hist.max = float(np.max(values))
hist.num = int(np.prod(values.shape))
hist.sum = float(np.sum(values))
hist.sum_squares = float(np.sum(values ** 2))
# Requires equal number as bins, where the first goes from -DBL_MAX to bin_edges[1]
# See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto#L30
# Therefore we drop the start of the first bin
bin_edges = bin_edges[1:]
# Add bin edges and counts
for edge in bin_edges:
hist.bucket_limit.append(edge)
for c in counts:
hist.bucket.append(c)
# Create and write Summary
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
writer.add_summary(summary, step)
def discount(x, gamma, terminal_array=None):
if terminal_array is None:
return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]
else:
y, adv = 0, []
terminals_reversed = terminal_array[1:][::-1]
for step, dt in enumerate(reversed(x)):
y = dt + gamma * y * (1 - terminals_reversed[step])
adv.append(y)
return np.array(adv)[::-1]
class RunningStats(object):
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
# https://github.com/openai/baselines/blob/master/baselines/common/running_mean_std.py
def __init__(self, epsilon=1e-4, shape=()):
self.mean = np.zeros(shape, 'float64')
self.var = np.ones(shape, 'float64')
self.std = np.ones(shape, 'float64')
self.count = epsilon
def update(self, x):
batch_mean = np.mean(x, axis=0)
batch_var = np.var(x, axis=0)
batch_count = x.shape[0]
self.update_from_moments(batch_mean, batch_var, batch_count)
def update_from_moments(self, batch_mean, batch_var, batch_count):
delta = batch_mean - self.mean
new_mean = self.mean + delta * batch_count / (self.count + batch_count)
m_a = self.var * self.count
m_b = batch_var * batch_count
M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count)
new_var = M2 / (self.count + batch_count)
self.mean = new_mean
self.var = new_var
self.std = np.maximum(np.sqrt(self.var), 1e-6)
self.count = batch_count + self.count
def lstm_state_combine(state):
return np.reshape([s[0] for s in state], (len(state), -1)), \
np.reshape([s[1] for s in state], (len(state), -1))