-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
154 lines (115 loc) · 4.84 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# utils.py: Utility file for implementing helpful utility functions used by the ML algorithms.
#
# Submitted by: [enter your full name here] -- [enter your IU username here]
#
# Based on skeleton code by CSCI-B 551 Fall 2021 Course Staff
import numpy as np
# https://stackoverflow.com/questions/1401712/how-can-the-euclidean-distance-be-calculated-with-numpy
def euclidean_distance(x1, x2):
"""
Computes and returns the Euclidean distance between two vectors.
Args:
x1: A numpy array of shape (n_features,).
x2: A numpy array of shape (n_features,).
"""
return np.linalg.norm(x1 - x2, ord=2, axis=1)
# https://stackoverflow.com/questions/1401712/how-can-the-euclidean-distance-be-calculated-with-numpy
def manhattan_distance(x1, x2):
"""
Computes and returns the Manhattan distance between two vectors.
Args:
x1: A numpy array of shape (n_features,).
x2: A numpy array of shape (n_features,).
"""
return np.linalg.norm(x1 - x2, ord=1, axis=1)
def identity(x, derivative=False):
"""
Computes and returns the identity activation function of the given input data x. If derivative = True,
the derivative of the activation function is returned instead.
Args:
x: A numpy array of shape (n_samples, n_hidden).
derivative: A boolean representing whether or not the derivative of the function should be returned instead.
"""
if not derivative:
return x
return 1
def d_sigmoid(x):
return x * (1 - x)
def sigmoid(x, derivative=False):
"""
Computes and returns the sigmoid (logistic) activation function of the given input data x. If derivative = True,
the derivative of the activation function is returned instead.
Args:
x: A numpy array of shape (n_samples, n_hidden).
derivative: A boolean representing whether or not the derivative of the function should be returned instead.
"""
if not derivative:
return 1 / (1 + np.exp(-x))
return d_sigmoid(x)
def d_tanh(x):
return 1 - x ** 2
def tanh(x, derivative=False):
"""
Computes and returns the hyperbolic tangent activation function of the given input data x. If derivative = True,
the derivative of the activation function is returned instead.
Args:
x: A numpy array of shape (n_samples, n_hidden).
derivative: A boolean representing whether or not the derivative of the function should be returned instead.
"""
if not derivative:
return np.tanh(x)
return d_tanh(x)
def d_relu(x):
return 1 * (x > 0)
def relu(x, derivative=False):
"""
Computes and returns the rectified linear unit activation function of the given input data x. If derivative = True,
the derivative of the activation function is returned instead.
Args:
x: A numpy array of shape (n_samples, n_hidden).
derivative: A boolean representing whether or not the derivative of the function should be returned instead.
"""
if not derivative:
return x * (x > 0)
return d_relu(x)
def softmax(x, derivative=False):
x = np.clip(x, -1e100, 1e100)
if not derivative:
c = np.max(x, axis=1, keepdims=True)
return np.exp(x - c - np.log(np.sum(np.exp(x - c), axis=1, keepdims=True)))
else:
return softmax(x) * (1 - softmax(x))
def cross_entropy(y, p):
"""
Computes and returns the cross-entropy loss, defined as the negative log-likelihood of a logistic model that returns
p probabilities for its true class labels y.
Args:
y:
A numpy array of shape (n_samples, n_outputs) representing the one-hot encoded target class values for the
input data used when fitting the model.
p:
A numpy array of shape (n_samples, n_outputs) representing the predicted probabilities from the softmax
output activation function.
"""
delta = 0.00000000001
p = np.clip(p, delta, 1-delta)
A2 = p
Y = y
m = Y.shape[1]
logprobs = np.multiply(Y ,np.log(A2)) + np.multiply((1-Y), np.log(1-A2))
cost = (-1/m) * np.sum(logprobs)
cost = float(np.squeeze(cost))
return cost
def one_hot_encoding(y):
"""
Converts a vector y of categorical target class values into a one-hot numeric array using one-hot encoding: one-hot
encoding creates new binary-valued columns, each of which indicate the presence of each possible value from the
original data.
Args:
y: A numpy array of shape (n_samples,) representing the target class values for each sample in the input data.
Returns:
A numpy array of shape (n_samples, n_outputs) representing the one-hot encoded target class values for the input
data. n_outputs is equal to the number of unique categorical class values in the numpy array y.
"""
classes = len(np.unique(y))
return np.eye(classes)[np.array(y)]